kreuzberg 3.16.0__py3-none-any.whl → 3.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. kreuzberg/__init__.py +2 -0
  2. kreuzberg/_config.py +8 -9
  3. kreuzberg/_extractors/_base.py +0 -46
  4. kreuzberg/_extractors/_html.py +1 -1
  5. kreuzberg/_extractors/_pandoc.py +2 -2
  6. kreuzberg/_extractors/_pdf.py +4 -4
  7. kreuzberg/_gmft.py +2 -2
  8. kreuzberg/_language_detection.py +16 -39
  9. kreuzberg/_mcp/server.py +1 -1
  10. kreuzberg/_mime_types.py +1 -1
  11. kreuzberg/_ocr/_easyocr.py +4 -9
  12. kreuzberg/_ocr/_paddleocr.py +1 -1
  13. kreuzberg/_ocr/_tesseract.py +15 -25
  14. kreuzberg/_token_reduction/__init__.py +11 -0
  15. kreuzberg/_token_reduction/_reducer.py +439 -0
  16. kreuzberg/_token_reduction/_stopwords.py +116 -0
  17. kreuzberg/_token_reduction/stopwords/af_stopwords.json +53 -0
  18. kreuzberg/_token_reduction/stopwords/ar_stopwords.json +482 -0
  19. kreuzberg/_token_reduction/stopwords/bg_stopwords.json +261 -0
  20. kreuzberg/_token_reduction/stopwords/bn_stopwords.json +400 -0
  21. kreuzberg/_token_reduction/stopwords/br_stopwords.json +1205 -0
  22. kreuzberg/_token_reduction/stopwords/ca_stopwords.json +280 -0
  23. kreuzberg/_token_reduction/stopwords/cs_stopwords.json +425 -0
  24. kreuzberg/_token_reduction/stopwords/da_stopwords.json +172 -0
  25. kreuzberg/_token_reduction/stopwords/de_stopwords.json +622 -0
  26. kreuzberg/_token_reduction/stopwords/el_stopwords.json +849 -0
  27. kreuzberg/_token_reduction/stopwords/en_stopwords.json +1300 -0
  28. kreuzberg/_token_reduction/stopwords/eo_stopwords.json +175 -0
  29. kreuzberg/_token_reduction/stopwords/es_stopwords.json +734 -0
  30. kreuzberg/_token_reduction/stopwords/et_stopwords.json +37 -0
  31. kreuzberg/_token_reduction/stopwords/eu_stopwords.json +100 -0
  32. kreuzberg/_token_reduction/stopwords/fa_stopwords.json +801 -0
  33. kreuzberg/_token_reduction/stopwords/fi_stopwords.json +849 -0
  34. kreuzberg/_token_reduction/stopwords/fr_stopwords.json +693 -0
  35. kreuzberg/_token_reduction/stopwords/ga_stopwords.json +111 -0
  36. kreuzberg/_token_reduction/stopwords/gl_stopwords.json +162 -0
  37. kreuzberg/_token_reduction/stopwords/gu_stopwords.json +226 -0
  38. kreuzberg/_token_reduction/stopwords/ha_stopwords.json +41 -0
  39. kreuzberg/_token_reduction/stopwords/he_stopwords.json +196 -0
  40. kreuzberg/_token_reduction/stopwords/hi_stopwords.json +227 -0
  41. kreuzberg/_token_reduction/stopwords/hr_stopwords.json +181 -0
  42. kreuzberg/_token_reduction/stopwords/hu_stopwords.json +791 -0
  43. kreuzberg/_token_reduction/stopwords/hy_stopwords.json +47 -0
  44. kreuzberg/_token_reduction/stopwords/id_stopwords.json +760 -0
  45. kreuzberg/_token_reduction/stopwords/it_stopwords.json +634 -0
  46. kreuzberg/_token_reduction/stopwords/ja_stopwords.json +136 -0
  47. kreuzberg/_token_reduction/stopwords/kn_stopwords.json +84 -0
  48. kreuzberg/_token_reduction/stopwords/ko_stopwords.json +681 -0
  49. kreuzberg/_token_reduction/stopwords/ku_stopwords.json +64 -0
  50. kreuzberg/_token_reduction/stopwords/la_stopwords.json +51 -0
  51. kreuzberg/_token_reduction/stopwords/lt_stopwords.json +476 -0
  52. kreuzberg/_token_reduction/stopwords/lv_stopwords.json +163 -0
  53. kreuzberg/_token_reduction/stopwords/ml_stopwords.json +11 -0
  54. kreuzberg/_token_reduction/stopwords/mr_stopwords.json +101 -0
  55. kreuzberg/_token_reduction/stopwords/ms_stopwords.json +477 -0
  56. kreuzberg/_token_reduction/stopwords/ne_stopwords.json +490 -0
  57. kreuzberg/_token_reduction/stopwords/nl_stopwords.json +415 -0
  58. kreuzberg/_token_reduction/stopwords/no_stopwords.json +223 -0
  59. kreuzberg/_token_reduction/stopwords/pl_stopwords.json +331 -0
  60. kreuzberg/_token_reduction/stopwords/pt_stopwords.json +562 -0
  61. kreuzberg/_token_reduction/stopwords/ro_stopwords.json +436 -0
  62. kreuzberg/_token_reduction/stopwords/ru_stopwords.json +561 -0
  63. kreuzberg/_token_reduction/stopwords/si_stopwords.json +193 -0
  64. kreuzberg/_token_reduction/stopwords/sk_stopwords.json +420 -0
  65. kreuzberg/_token_reduction/stopwords/sl_stopwords.json +448 -0
  66. kreuzberg/_token_reduction/stopwords/so_stopwords.json +32 -0
  67. kreuzberg/_token_reduction/stopwords/st_stopwords.json +33 -0
  68. kreuzberg/_token_reduction/stopwords/sv_stopwords.json +420 -0
  69. kreuzberg/_token_reduction/stopwords/sw_stopwords.json +76 -0
  70. kreuzberg/_token_reduction/stopwords/ta_stopwords.json +129 -0
  71. kreuzberg/_token_reduction/stopwords/te_stopwords.json +54 -0
  72. kreuzberg/_token_reduction/stopwords/th_stopwords.json +118 -0
  73. kreuzberg/_token_reduction/stopwords/tl_stopwords.json +149 -0
  74. kreuzberg/_token_reduction/stopwords/tr_stopwords.json +506 -0
  75. kreuzberg/_token_reduction/stopwords/uk_stopwords.json +75 -0
  76. kreuzberg/_token_reduction/stopwords/ur_stopwords.json +519 -0
  77. kreuzberg/_token_reduction/stopwords/vi_stopwords.json +647 -0
  78. kreuzberg/_token_reduction/stopwords/yo_stopwords.json +62 -0
  79. kreuzberg/_token_reduction/stopwords/zh_stopwords.json +796 -0
  80. kreuzberg/_token_reduction/stopwords/zu_stopwords.json +31 -0
  81. kreuzberg/_types.py +50 -9
  82. kreuzberg/_utils/_image_preprocessing.py +1 -1
  83. kreuzberg/_utils/_ref.py +14 -6
  84. kreuzberg/exceptions.py +0 -1
  85. kreuzberg/extraction.py +33 -10
  86. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.1.dist-info}/METADATA +6 -5
  87. kreuzberg-3.17.1.dist-info/RECORD +128 -0
  88. kreuzberg-3.16.0.dist-info/RECORD +0 -61
  89. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.1.dist-info}/WHEEL +0 -0
  90. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.1.dist-info}/entry_points.txt +0 -0
  91. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,31 @@
1
+ [
2
+ "futhi",
3
+ "kahle",
4
+ "kakhulu",
5
+ "kanye",
6
+ "khona",
7
+ "kodwa",
8
+ "kungani",
9
+ "kusho",
10
+ "la",
11
+ "lakhe",
12
+ "lapho",
13
+ "mina",
14
+ "ngesikhathi",
15
+ "nje",
16
+ "phansi",
17
+ "phezulu",
18
+ "u",
19
+ "ukuba",
20
+ "ukuthi",
21
+ "ukuze",
22
+ "uma",
23
+ "wahamba",
24
+ "wakhe",
25
+ "wami",
26
+ "wase",
27
+ "wathi",
28
+ "yakhe",
29
+ "zakhe",
30
+ "zonke"
31
+ ]
kreuzberg/_types.py CHANGED
@@ -7,6 +7,7 @@ from enum import Enum
7
7
  from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Literal, NamedTuple, TypedDict
9
9
 
10
+ import langcodes
10
11
  import msgspec
11
12
 
12
13
  from kreuzberg._constants import DEFAULT_MAX_CHARACTERS, DEFAULT_MAX_OVERLAP
@@ -401,9 +402,12 @@ class ImageOCRConfig(ConfigDict):
401
402
 
402
403
  @dataclass(unsafe_hash=True, frozen=True, slots=True)
403
404
  class LanguageDetectionConfig(ConfigDict):
404
- low_memory: bool = True
405
- """If True, uses a smaller model (~200MB). If False, uses a larger, more accurate model.
406
- Defaults to True for better memory efficiency."""
405
+ model: Literal["lite", "full", "auto"] = "auto"
406
+ """Language detection model to use:
407
+ - 'lite': Smaller, faster model with good accuracy
408
+ - 'full': Larger model with highest accuracy
409
+ - 'auto': Automatically choose based on memory availability (default)
410
+ """
407
411
  top_k: int = 3
408
412
  """Maximum number of languages to return for multilingual detection."""
409
413
  multilingual: bool = False
@@ -411,8 +415,8 @@ class LanguageDetectionConfig(ConfigDict):
411
415
  If False, uses single language detection."""
412
416
  cache_dir: str | None = None
413
417
  """Custom directory for model cache. If None, uses system default."""
414
- allow_fallback: bool = True
415
- """If True, falls back to small model if large model fails."""
418
+ low_memory: bool = True
419
+ """Deprecated. Use 'model' parameter instead. If True, uses 'lite' model."""
416
420
 
417
421
 
418
422
  @dataclass(unsafe_hash=True, frozen=True, slots=True)
@@ -695,6 +699,8 @@ class Metadata(TypedDict, total=False):
695
699
  """Message or communication content."""
696
700
  attributes: NotRequired[dict[str, Any]]
697
701
  """Additional attributes extracted from structured data (e.g., custom text fields with dotted keys)."""
702
+ token_reduction: NotRequired[dict[str, float]]
703
+ """Token reduction statistics including reduction ratios and counts."""
698
704
 
699
705
 
700
706
  _VALID_METADATA_KEYS = {
@@ -749,6 +755,7 @@ _VALID_METADATA_KEYS = {
749
755
  "text",
750
756
  "message",
751
757
  "attributes",
758
+ "token_reduction",
752
759
  }
753
760
 
754
761
 
@@ -979,8 +986,14 @@ class ExtractionConfig(ConfigDict):
979
986
  """Custom entity patterns as a frozenset of (entity_type, regex_pattern) tuples."""
980
987
  auto_detect_language: bool = False
981
988
  """Whether to automatically detect language and configure OCR accordingly."""
989
+ language_detection_model: Literal["lite", "full", "auto"] = "auto"
990
+ """Language detection model to use when auto_detect_language is True.
991
+ - 'lite': Smaller, faster model with good accuracy
992
+ - 'full': Larger model with highest accuracy
993
+ - 'auto': Automatically choose based on memory availability (default)
994
+ """
982
995
  language_detection_config: LanguageDetectionConfig | None = None
983
- """Configuration for language detection. If None, uses default settings."""
996
+ """Configuration for language detection. If None, uses default settings with language_detection_model."""
984
997
  spacy_entity_extraction_config: SpacyEntityExtractionConfig | None = None
985
998
  """Configuration for spaCy entity extraction. If None, uses default settings."""
986
999
  auto_detect_document_type: bool = False
@@ -1009,6 +1022,8 @@ class ExtractionConfig(ConfigDict):
1009
1022
  """Minimum DPI threshold when auto-adjusting DPI."""
1010
1023
  max_dpi: int = 600
1011
1024
  """Maximum DPI threshold when auto-adjusting DPI."""
1025
+ token_reduction: TokenReductionConfig | None = None
1026
+ """Configuration for token reduction to optimize output size while preserving meaning."""
1012
1027
 
1013
1028
  def __post_init__(self) -> None:
1014
1029
  if self.custom_entity_patterns is not None and isinstance(self.custom_entity_patterns, dict):
@@ -1151,11 +1166,11 @@ class HTMLToMarkdownConfig:
1151
1166
  """Mapping of HTML tag names to custom converter functions."""
1152
1167
  default_title: bool = False
1153
1168
  """Use default titles for elements like links."""
1154
- escape_asterisks: bool = True
1169
+ escape_asterisks: bool = False
1155
1170
  """Escape * characters to prevent unintended formatting."""
1156
- escape_misc: bool = True
1171
+ escape_misc: bool = False
1157
1172
  """Escape miscellaneous characters to prevent Markdown conflicts."""
1158
- escape_underscores: bool = True
1173
+ escape_underscores: bool = False
1159
1174
  """Escape _ characters to prevent unintended formatting."""
1160
1175
  extract_metadata: bool = True
1161
1176
  """Extract document metadata as comment header."""
@@ -1199,3 +1214,29 @@ class HTMLToMarkdownConfig:
1199
1214
  def to_dict(self) -> dict[str, Any]:
1200
1215
  result = msgspec.to_builtins(self, builtin_types=(type(None),), order="deterministic")
1201
1216
  return {k: v for k, v in result.items() if v is not None}
1217
+
1218
+
1219
+ @dataclass(unsafe_hash=True, frozen=True, slots=True)
1220
+ class TokenReductionConfig:
1221
+ mode: Literal["off", "light", "moderate"] = "off"
1222
+ preserve_markdown: bool = True
1223
+ custom_stopwords: dict[str, list[str]] | None = field(default=None, compare=False, hash=False)
1224
+ language_hint: str | None = None
1225
+
1226
+ def __post_init__(self) -> None:
1227
+ if self.language_hint:
1228
+ hint = self.language_hint.strip()
1229
+
1230
+ if not hint or len(hint) > 50 or any(c in hint for c in "\x00\r\n\t"):
1231
+ object.__setattr__(self, "language_hint", None)
1232
+ return
1233
+
1234
+ try:
1235
+ normalized = langcodes.standardize_tag(hint)
1236
+
1237
+ lang = langcodes.Language.get(normalized).language
1238
+
1239
+ if lang and lang != hint:
1240
+ object.__setattr__(self, "language_hint", lang)
1241
+ except (ValueError, AttributeError, TypeError):
1242
+ object.__setattr__(self, "language_hint", None)
@@ -198,7 +198,7 @@ def normalize_image_dpi(
198
198
  calculated_dpi=calculated_dpi,
199
199
  )
200
200
 
201
- except OSError as e:
201
+ except OSError as e: # pragma: no cover
202
202
  return image, ImagePreprocessingMetadata(
203
203
  original_dimensions=(original_width, original_height),
204
204
  original_dpi=original_dpi,
kreuzberg/_utils/_ref.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import threading
3
4
  from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar, cast
4
5
 
5
6
  if TYPE_CHECKING:
@@ -10,23 +11,30 @@ T = TypeVar("T")
10
11
 
11
12
  class Ref(Generic[T]):
12
13
  _instances: ClassVar[dict[str, Any]] = {}
14
+ _lock: ClassVar[threading.Lock] = threading.Lock()
13
15
 
14
16
  def __init__(self, name: str, factory: Callable[[], T]) -> None:
15
17
  self.name = name
16
18
  self.factory = factory
17
19
 
18
20
  def get(self) -> T:
19
- if self.name not in self._instances:
20
- self._instances[self.name] = self.factory()
21
- return cast("T", self._instances[self.name])
21
+ if self.name in self._instances:
22
+ return cast("T", self._instances[self.name])
23
+
24
+ with self._lock:
25
+ if self.name not in self._instances:
26
+ self._instances[self.name] = self.factory()
27
+ return cast("T", self._instances[self.name])
22
28
 
23
29
  def clear(self) -> None:
24
- if self.name in self._instances:
25
- del self._instances[self.name]
30
+ with self._lock:
31
+ if self.name in self._instances:
32
+ del self._instances[self.name]
26
33
 
27
34
  def is_initialized(self) -> bool:
28
35
  return self.name in self._instances
29
36
 
30
37
  @classmethod
31
38
  def clear_all(cls) -> None:
32
- cls._instances.clear()
39
+ with cls._lock:
40
+ cls._instances.clear()
kreuzberg/exceptions.py CHANGED
@@ -17,7 +17,6 @@ class KreuzbergError(Exception):
17
17
  super().__init__(message)
18
18
 
19
19
  def _serialize_context(self, obj: Any) -> Any:
20
- """Recursively serialize context objects to ensure JSON compatibility."""
21
20
  if isinstance(obj, bytes):
22
21
  return obj.decode("utf-8", errors="replace")
23
22
  if isinstance(obj, dict):
kreuzberg/extraction.py CHANGED
@@ -15,6 +15,7 @@ from kreuzberg._mime_types import (
15
15
  validate_mime_type,
16
16
  )
17
17
  from kreuzberg._registry import ExtractorRegistry
18
+ from kreuzberg._token_reduction import get_reduction_stats, reduce_tokens
18
19
  from kreuzberg._types import ExtractionConfig, ExtractionResult
19
20
  from kreuzberg._utils._document_cache import get_document_cache
20
21
  from kreuzberg._utils._errors import create_error_context
@@ -31,15 +32,6 @@ DEFAULT_CONFIG: Final[ExtractionConfig] = ExtractionConfig()
31
32
 
32
33
 
33
34
  async def _handle_cache_async(path: Path, config: ExtractionConfig) -> ExtractionResult | None:
34
- """Handle cache lookup and coordination with other processes.
35
-
36
- Args:
37
- path: Path to the file being processed
38
- config: Extraction configuration
39
-
40
- Returns:
41
- Cached result if available, None otherwise
42
- """
43
35
  cache = get_document_cache()
44
36
 
45
37
  cached_result = cache.get(path, config)
@@ -84,14 +76,45 @@ def _validate_and_post_process_helper(
84
76
  result.keywords = None
85
77
 
86
78
  if config.auto_detect_language:
79
+ # Use provided config or create one with the model from ExtractionConfig
80
+ lang_config = config.language_detection_config
81
+ if lang_config is None:
82
+ from kreuzberg._types import LanguageDetectionConfig # noqa: PLC0415
83
+
84
+ lang_config = LanguageDetectionConfig(model=config.language_detection_model)
85
+
87
86
  result.detected_languages = detect_languages(
88
87
  result.content,
89
- config=config.language_detection_config,
88
+ config=lang_config,
90
89
  )
91
90
 
92
91
  if config.auto_detect_document_type:
93
92
  result = auto_detect_document_type(result, config, file_path=file_path)
94
93
 
94
+ if config.token_reduction is not None and config.token_reduction.mode != "off":
95
+ original_content = result.content
96
+
97
+ language_hint = None
98
+ if result.detected_languages and len(result.detected_languages) > 0:
99
+ language_hint = result.detected_languages[0]
100
+
101
+ reduced_content = reduce_tokens(
102
+ original_content,
103
+ config=config.token_reduction,
104
+ language=language_hint,
105
+ )
106
+ reduction_stats = get_reduction_stats(original_content, reduced_content)
107
+
108
+ result.content = reduced_content
109
+ result.metadata["token_reduction"] = {
110
+ "character_reduction_ratio": reduction_stats["character_reduction_ratio"],
111
+ "token_reduction_ratio": reduction_stats["token_reduction_ratio"],
112
+ "original_characters": reduction_stats["original_characters"],
113
+ "reduced_characters": reduction_stats["reduced_characters"],
114
+ "original_tokens": reduction_stats["original_tokens"],
115
+ "reduced_tokens": reduction_stats["reduced_tokens"],
116
+ }
117
+
95
118
  return result
96
119
 
97
120
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.16.0
3
+ Version: 3.17.1
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -32,12 +32,13 @@ Requires-Dist: anyio>=4.10.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
34
  Requires-Dist: html-to-markdown[lxml]>=1.13.0
35
- Requires-Dist: mcp>=1.14.0
35
+ Requires-Dist: langcodes>=3.5.0
36
+ Requires-Dist: mcp>=1.14.1
36
37
  Requires-Dist: msgspec>=0.18.0
37
38
  Requires-Dist: numpy>=2.0.0
38
39
  Requires-Dist: playa-pdf>=0.7.0
39
40
  Requires-Dist: polars>=1.33.1
40
- Requires-Dist: psutil>=7.0.0
41
+ Requires-Dist: psutil>=7.1.0
41
42
  Requires-Dist: pypdfium2==4.30.0
42
43
  Requires-Dist: python-calamine>=0.5.3
43
44
  Requires-Dist: python-pptx>=1.0.2
@@ -49,7 +50,7 @@ Provides-Extra: all
49
50
  Requires-Dist: click>=8.2.1; extra == 'all'
50
51
  Requires-Dist: deep-translator>=1.11.4; extra == 'all'
51
52
  Requires-Dist: easyocr>=1.7.2; extra == 'all'
52
- Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
53
+ Requires-Dist: fast-langdetect>=1.0.0; extra == 'all'
53
54
  Requires-Dist: gmft>=0.4.2; extra == 'all'
54
55
  Requires-Dist: keybert>=0.9.0; extra == 'all'
55
56
  Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
@@ -82,7 +83,7 @@ Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
82
83
  Provides-Extra: gmft
83
84
  Requires-Dist: gmft>=0.4.2; extra == 'gmft'
84
85
  Provides-Extra: langdetect
85
- Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
86
+ Requires-Dist: fast-langdetect>=1.0.0; extra == 'langdetect'
86
87
  Provides-Extra: paddleocr
87
88
  Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
88
89
  Requires-Dist: paddlepaddle>=3.2.0; extra == 'paddleocr'
@@ -0,0 +1,128 @@
1
+ kreuzberg/__init__.py,sha256=niF_YZ7YADL_oXZ8zB5EMov4xnyFzuxTABVlHoRnBJA,1629
2
+ kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
+ kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
+ kreuzberg/_config.py,sha256=ZYIcnJAjDnbWW_2WBy7NlOk1Ol6WpoMG5FMNMmHpqSY,13086
5
+ kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
+ kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
7
+ kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
+ kreuzberg/_gmft.py,sha256=gfRXOsv-K9R7Y0zZ2SUa5wid3FpP2eFIlg5nepWcz1Q,20827
9
+ kreuzberg/_language_detection.py,sha256=OwIWIddERPEz8krU_Aq0_KjRF6MHP-LpugH6Y6miwOc,1204
10
+ kreuzberg/_mime_types.py,sha256=duEMDBg_qIf9A02tXAC_2znD-wgE-2BBMW9ofyYTJjE,8622
11
+ kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
+ kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
+ kreuzberg/_types.py,sha256=ttY61QI8mruCI70Af3owlU-O5LdvQ6gOqIZTGQ9PaVs,49129
14
+ kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
+ kreuzberg/exceptions.py,sha256=KiGAfIX3_TkGYG1h9eTZ_E_pALsAqhZ_A3XfhwxwaS0,2909
16
+ kreuzberg/extraction.py,sha256=jwzWdomwrl-2z1UznLoURLyqD5r0U-rFABXSBV2B2wA,19063
17
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
+ kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
21
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ kreuzberg/_extractors/_base.py,sha256=4MRBXdLsgdtdrTuupWb2IT9YpRSnNPpWWviS2mfeOXg,9961
23
+ kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
24
+ kreuzberg/_extractors/_html.py,sha256=TXXgwQZuEvnrny5HdBpn8oikGktyxgY9jvgZmnFtnqY,6371
25
+ kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
+ kreuzberg/_extractors/_pandoc.py,sha256=cwthr--IFwbu8r0rCZ_Cx5zRlan94yuqt5e3mjYxesE,24182
27
+ kreuzberg/_extractors/_pdf.py,sha256=GFy7xHUH09i48E5Xixy6nReF_uBu9646UTjywKoH-Rs,23304
28
+ kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
29
+ kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
30
+ kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
31
+ kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
+ kreuzberg/_mcp/server.py,sha256=71MhjiFDwgFROdGejf0djgO1eG370qudWmZsN59CUeA,16743
33
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
+ kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
+ kreuzberg/_ocr/_easyocr.py,sha256=bHz2S_8nNHaPHPemcJK-U0al9_qP-vUmWE4ECVlf7AA,15485
36
+ kreuzberg/_ocr/_paddleocr.py,sha256=CV9cCjkRe-3cNJ5tRu_sBXd_HNghEwfPIgWwxAZTeRY,15026
37
+ kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
+ kreuzberg/_ocr/_tesseract.py,sha256=1SEfrX_JvU6KIeWt31GsRWnNmjaAh3xgQaRMPvoZLJA,51349
39
+ kreuzberg/_token_reduction/__init__.py,sha256=y_2WgPxJes8_PD-VMfx7vQT0hGjFIixzS8PjaIseAGg,311
40
+ kreuzberg/_token_reduction/_reducer.py,sha256=shAfMPznP69sTSzwX_bE1LpcBmoia9cpd7r6bSc4R5Q,13609
41
+ kreuzberg/_token_reduction/_stopwords.py,sha256=mu-5CapG0RCP7LYzjhdTM6WWLtmt3cjZ08OOsyQkJVg,3608
42
+ kreuzberg/_token_reduction/stopwords/af_stopwords.json,sha256=RlgUHyzPIQBbTA52kLSQpmTfteRbbV_qb_Spa51RI8Q,452
43
+ kreuzberg/_token_reduction/stopwords/ar_stopwords.json,sha256=GKcR9MyDM5zvIQhLqWfq87Jmj3gbAM81ZZi-eBKBjz8,6738
44
+ kreuzberg/_token_reduction/stopwords/bg_stopwords.json,sha256=7KuYBTg7jc8ZLFYI6QwaVatlh_gP8i80EkQHD1suhXs,3707
45
+ kreuzberg/_token_reduction/stopwords/bn_stopwords.json,sha256=qAtZN89LGy0se9i_FrB02vsNLgE2gG1clwMHTi4Qncg,7437
46
+ kreuzberg/_token_reduction/stopwords/br_stopwords.json,sha256=RuErMr4twvsocqw9fvwtgrnbzVKB2WABVi5AfPy2lqo,13601
47
+ kreuzberg/_token_reduction/stopwords/ca_stopwords.json,sha256=HRrh4QKFXDsAfmk5yjXHD28KjdO2vMjqJFIltNwh_F8,2952
48
+ kreuzberg/_token_reduction/stopwords/cs_stopwords.json,sha256=Dlsq3UFIHD9USIuHiwrFur0DvIaRpjftnBb3Qnjio4M,4523
49
+ kreuzberg/_token_reduction/stopwords/da_stopwords.json,sha256=zLk-90hrY5tH4gS4uOcMlviky4mmg7b8WaXnn_NzKfQ,1664
50
+ kreuzberg/_token_reduction/stopwords/de_stopwords.json,sha256=4lB0tUyT9PlB9ubnUbwOObO_RT0irBSdPOuvQIgrr7g,7052
51
+ kreuzberg/_token_reduction/stopwords/el_stopwords.json,sha256=VqO3y_q_ZeSBZAMxD1KjMMkCylYN2uuN620szHmFx-M,13604
52
+ kreuzberg/_token_reduction/stopwords/en_stopwords.json,sha256=VvLb0zoUKjhqQH-RGkSTpPtdRjvgv_G8l4i9ub2fJmU,14171
53
+ kreuzberg/_token_reduction/stopwords/eo_stopwords.json,sha256=xnojHtnik734Mzw4i4bIxPZEgBRXvgK2TRkHnxBCjWw,1722
54
+ kreuzberg/_token_reduction/stopwords/es_stopwords.json,sha256=PcSwxKskYQXc-21vNkpb6IntQYVP50CwuXfx4Gyhhx8,8598
55
+ kreuzberg/_token_reduction/stopwords/et_stopwords.json,sha256=_t6iPfNa1LhqRq4sLNbIB6_B5-472UCNi9IARJTPhzQ,327
56
+ kreuzberg/_token_reduction/stopwords/eu_stopwords.json,sha256=SNa84Zkx5Rcf8JZBdm4rCMxxZ7Z_94fW9cebZC4qgqI,1069
57
+ kreuzberg/_token_reduction/stopwords/fa_stopwords.json,sha256=8R1724IQHkXc1g_jXJjRMVLgq2Zz6YgPeE4DI0iSj9Y,11708
58
+ kreuzberg/_token_reduction/stopwords/fi_stopwords.json,sha256=yOlZLoh3aMJ-YXz3r7kGLAIsDyvxNrhFyvWr7Vu_z5o,10699
59
+ kreuzberg/_token_reduction/stopwords/fr_stopwords.json,sha256=KkiZ8dQYFQzjVJ-YwUoP13zwLwz7zu9Fpw-X-wmxya0,8025
60
+ kreuzberg/_token_reduction/stopwords/ga_stopwords.json,sha256=K8LOrUkqSi82KTLlZ1NnadEU-HMyCd2Ofm13GfxW3J0,1100
61
+ kreuzberg/_token_reduction/stopwords/gl_stopwords.json,sha256=Y0GfhhcOv1GNPJP3zoFYIYkg369GT1yHK5xCPiH6Pn0,1602
62
+ kreuzberg/_token_reduction/stopwords/gu_stopwords.json,sha256=YSldatfgVz_gNWopQ5TMFTHWEbGVYPcJMwO-bThtYAI,3818
63
+ kreuzberg/_token_reduction/stopwords/ha_stopwords.json,sha256=EohjrRkbSuLOn_aiDcsMOUEYPJjVha9wHhCsoxiwNsU,354
64
+ kreuzberg/_token_reduction/stopwords/he_stopwords.json,sha256=STlmHNDJqDEZI7ZCtBcZlEU1ndoEeJIexuOnTaOXJac,2629
65
+ kreuzberg/_token_reduction/stopwords/hi_stopwords.json,sha256=aYojvEA-UlivR_JCJTwZRoK2BJjVUW_m9q8eDRRczpE,3792
66
+ kreuzberg/_token_reduction/stopwords/hr_stopwords.json,sha256=2s5uhGAitVRDLgKdbA0F9sFZWtRWcmyiDZY9adwLGzk,1769
67
+ kreuzberg/_token_reduction/stopwords/hu_stopwords.json,sha256=9o0snSijbEEt9Hpbs4kTW6czhcdiXLTa5sbC68nitDY,9830
68
+ kreuzberg/_token_reduction/stopwords/hy_stopwords.json,sha256=QLsYw_y9ESyou1bHbPwjSrWy_nJq8wjiNihrvikYSKY,525
69
+ kreuzberg/_token_reduction/stopwords/id_stopwords.json,sha256=TZB_e1Txu3oGpQfHCzodoOTcKoKplTC5ZDr1iAbdzVI,10238
70
+ kreuzberg/_token_reduction/stopwords/it_stopwords.json,sha256=BSOpBGf_StyW6tdycNRMSvXGTksvrOowrE--D5914J4,7277
71
+ kreuzberg/_token_reduction/stopwords/ja_stopwords.json,sha256=E7MSvBOnRvTeChRk0Nm5X7xxwP50BHaP5FGOfDbnmRI,1680
72
+ kreuzberg/_token_reduction/stopwords/kn_stopwords.json,sha256=km3Qk1vy3OVdsAoE_YbZ-oXRYapFBi5k59o1mlWnk70,1626
73
+ kreuzberg/_token_reduction/stopwords/ko_stopwords.json,sha256=sHR2SLh_zXVs6SKZlWCS29MGRv6xlKcp3Ckvf0-aXt8,9932
74
+ kreuzberg/_token_reduction/stopwords/ku_stopwords.json,sha256=1Vj0g-fwacVcwaJ66BSPe4GkI7WybXK-EspIE6uvAmY,893
75
+ kreuzberg/_token_reduction/stopwords/la_stopwords.json,sha256=1d6iV2sTgZF6G7EF5yb3G0Sic85awtjN617cWXb-ltw,456
76
+ kreuzberg/_token_reduction/stopwords/lt_stopwords.json,sha256=7WE-NiX-y2IQnnO61-2pDExaR0ZeOq6A7YMn29effAM,5675
77
+ kreuzberg/_token_reduction/stopwords/lv_stopwords.json,sha256=WTp3jWxsX054E53DdpoI0BqujDefICljN4d7KiIIsls,1796
78
+ kreuzberg/_token_reduction/stopwords/ml_stopwords.json,sha256=lDoq0gGSI4zbuKhdNyF8MMTPkSI9wYb1om6pRPY5zkw,192
79
+ kreuzberg/_token_reduction/stopwords/mr_stopwords.json,sha256=6XjzSLaHwwOGWot1QszaUMl12mAVFh840GH9MJoYoes,1764
80
+ kreuzberg/_token_reduction/stopwords/ms_stopwords.json,sha256=eJsXJ0bVOnWUSVG3XwkIClxlR3qd_2k75ZAQlmHpsKQ,5950
81
+ kreuzberg/_token_reduction/stopwords/ne_stopwords.json,sha256=MoAXH6Tncag9Qgr6TR7yp1FguDCGQBXpGdSQ2DIOikE,9447
82
+ kreuzberg/_token_reduction/stopwords/nl_stopwords.json,sha256=W08hz9JP3EdWpXtLPUjWFOSr3AwGnZPkwcjEUBiFWnA,4724
83
+ kreuzberg/_token_reduction/stopwords/no_stopwords.json,sha256=bOjDCti_Loe0ZYSF2mR-LQzMqViZRkur1GEOLh4Mr-A,2210
84
+ kreuzberg/_token_reduction/stopwords/pl_stopwords.json,sha256=TcnvzF5uMVDKxQUt1YBu7Lw1qIpeHftuIDSguz8ZAdA,3487
85
+ kreuzberg/_token_reduction/stopwords/pt_stopwords.json,sha256=h4jmBxUu10PuzQzTjeFm1B5NBl0Owt7uGhwx66mTTYQ,6413
86
+ kreuzberg/_token_reduction/stopwords/ro_stopwords.json,sha256=iuHvFs-iS118RH07v0hO7Oxfdx5rDqJwl3lRPMWINbM,4569
87
+ kreuzberg/_token_reduction/stopwords/ru_stopwords.json,sha256=MZckTBKlL1i4Kv16RSSozUfCM6dcKI5H9PYZD7pS0Ac,9028
88
+ kreuzberg/_token_reduction/stopwords/si_stopwords.json,sha256=jvtaQfO4fc-XPHgaO1hPsbpJQQg40rSeEbCGWm2AO60,3324
89
+ kreuzberg/_token_reduction/stopwords/sk_stopwords.json,sha256=FDaLmQ61_fFg0k3cGthv8flKFs67M1hmSE-6PrfMCAU,4638
90
+ kreuzberg/_token_reduction/stopwords/sl_stopwords.json,sha256=UoQRoLRT9qzmS8ALY_cuDE1uukK0hS6Q6QuUhr7oLHc,4669
91
+ kreuzberg/_token_reduction/stopwords/so_stopwords.json,sha256=Z7ayeNV98MOx_xkGxtcSX3dh8GAhgCRFa0EC1VDG29Q,299
92
+ kreuzberg/_token_reduction/stopwords/st_stopwords.json,sha256=ajvBq5XQCse62nptN_m8Jll5-Ps9j3bK4RODMIzCkD4,268
93
+ kreuzberg/_token_reduction/stopwords/sv_stopwords.json,sha256=kLz5vgx0VfQI0jtOj3Rlp6wuj3tKhqp2oF-f9f2-neQ,4737
94
+ kreuzberg/_token_reduction/stopwords/sw_stopwords.json,sha256=x4eOC7-nRlSS7qv_pwW6yECDrfhm_3zoTWenIPL1aWY,780
95
+ kreuzberg/_token_reduction/stopwords/ta_stopwords.json,sha256=qBbEu6m_HEx2C27ep6UJOyxQ6st74Et1fN8TvRHoTxw,2634
96
+ kreuzberg/_token_reduction/stopwords/te_stopwords.json,sha256=GT0Rj3MsgCJSj9GdzKjpgsQJE3-wCaS5Aa3_ynIZKx0,1263
97
+ kreuzberg/_token_reduction/stopwords/th_stopwords.json,sha256=5DEb-W41TFL4BGS-_CJzgPTkpmuLN20WBfeO1hG0HLc,2010
98
+ kreuzberg/_token_reduction/stopwords/tl_stopwords.json,sha256=pu3wAWQyT0vzGwSO8N2x2mRlaCHzEgEIvECTCrJOLE8,1663
99
+ kreuzberg/_token_reduction/stopwords/tr_stopwords.json,sha256=hSmUsApI7lxVfwJwAInkCLoa3YoGjI85Mwg9DpiHTDo,6159
100
+ kreuzberg/_token_reduction/stopwords/uk_stopwords.json,sha256=_j_lYv_bE5RAEMcW7-u0rYWf39fMrlpIgFEMFQDjqW0,965
101
+ kreuzberg/_token_reduction/stopwords/ur_stopwords.json,sha256=IcrM74VdmSbgM7wlBtFVtkrWsCI0SDFbRCSSAkyvlqo,7370
102
+ kreuzberg/_token_reduction/stopwords/vi_stopwords.json,sha256=UOyAEKBwMcQV65QGpQU-ynmyignNoqFzUSQ8p_1XuoY,9152
103
+ kreuzberg/_token_reduction/stopwords/yo_stopwords.json,sha256=60liY89h7KReEvHEPxe-hCWLPuqr4U89aQDCi7iRCfo,651
104
+ kreuzberg/_token_reduction/stopwords/zh_stopwords.json,sha256=rouSTCkXun90Q1aCvLjHyt4I7pGrtlcruDpNVybpAMI,8934
105
+ kreuzberg/_token_reduction/stopwords/zu_stopwords.json,sha256=hfm4E2EDI_VWyR0GUOVjcMQA7ZDH7FsV4FUMcns1H28,324
106
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
108
+ kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
109
+ kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
110
+ kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
111
+ kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
112
+ kreuzberg/_utils/_image_preprocessing.py,sha256=f7ioWQyARnhzj0am0Y1_eteJwWomdPy7AnbXqw2xWBs,10954
113
+ kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
114
+ kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
115
+ kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
116
+ kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
117
+ kreuzberg/_utils/_ref.py,sha256=BDuk9hHYq1KPRgenjC3-6iFEjGsrGfHZKr9tPNhfquU,1109
118
+ kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
119
+ kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
120
+ kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
121
+ kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
122
+ kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
123
+ kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
124
+ kreuzberg-3.17.1.dist-info/METADATA,sha256=ttfOl3XA6b-M2BMY7v1cfASGm_Qe91HPzfRcAf_-zU8,12351
125
+ kreuzberg-3.17.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
126
+ kreuzberg-3.17.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
127
+ kreuzberg-3.17.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
128
+ kreuzberg-3.17.1.dist-info/RECORD,,
@@ -1,61 +0,0 @@
1
- kreuzberg/__init__.py,sha256=EE6ENEjyKlt0o6QN1cG3Z_1isCtminVOjQT7ii5eBHA,1575
2
- kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
- kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
- kreuzberg/_config.py,sha256=H4jUAL0fNY-YE61GbGq5UtAUtXHbZA4-9W3YwcT_hu8,12988
5
- kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
- kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
7
- kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
- kreuzberg/_gmft.py,sha256=XI8vdBG0tdEVwFiabVieCuvxM5esqTSiFtsEwJ0YT5g,20787
9
- kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
- kreuzberg/_mime_types.py,sha256=-05mBS5AoF4LUmfB_WyLoce0y4peiOyOf2JucF714WQ,8602
11
- kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
- kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
- kreuzberg/_types.py,sha256=Xht1_TcvsbIpdmLYMy6Pa_HpbQuF9MBOo-BrKkZ7cLA,47358
14
- kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
- kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
- kreuzberg/extraction.py,sha256=5TuuRqLRmboLaTS0x9eZ2lrYOHKJBSHuTT_U-5nn6ek,17829
17
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
- kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
21
- kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- kreuzberg/_extractors/_base.py,sha256=39E7R7hV6C2uMJdQKLBVSWK3tN-mtK0LaayU10-8Fqo,11191
23
- kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
24
- kreuzberg/_extractors/_html.py,sha256=zZ9WZmmoIG9B5dGF25ulm_GmW9RsYFI1HddDUUp3hOE,6351
25
- kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
- kreuzberg/_extractors/_pandoc.py,sha256=CPEJxKTZdfyb7jPacZkiAsR2NEGL6KyiHzOr88tprJY,24142
27
- kreuzberg/_extractors/_pdf.py,sha256=78gPO7m8nPFIOskqqRpUfyOhKUk6f5rjJ0cZDnL9Vdk,23224
28
- kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
29
- kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
30
- kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
31
- kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
- kreuzberg/_mcp/server.py,sha256=vJWCXbBiv0ktIPZeLedSWZEwKF46p6642H6lxhTnjek,16723
33
- kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
- kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
- kreuzberg/_ocr/_easyocr.py,sha256=7bkMM_zN0h7ZiX0-VHxxnwNOhQloI-dlOOibpRc-vNs,15710
36
- kreuzberg/_ocr/_paddleocr.py,sha256=XyYc3gtmnvOGfQ0qBQYFphJa1kSv5hZ_LJ0weD2hQ08,15006
37
- kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
- kreuzberg/_ocr/_tesseract.py,sha256=BjTKE6ilUpSEKarHdgP3PbsE6I89JeqgDtpQ-XHniBA,51452
39
- kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
41
- kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
42
- kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
43
- kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
44
- kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
45
- kreuzberg/_utils/_image_preprocessing.py,sha256=arl4UDDiD_Z6SKM-jTXENaOaaHZBVFTsueb6DcpFXOo,10934
46
- kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
47
- kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
48
- kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
49
- kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
50
- kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
51
- kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
52
- kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
53
- kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
54
- kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
55
- kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
56
- kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
57
- kreuzberg-3.16.0.dist-info/METADATA,sha256=d1sUA7WBl0VcXHX0jPGzTHeXmj7yyJzTWjzHUmT-Dp4,12319
58
- kreuzberg-3.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- kreuzberg-3.16.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
60
- kreuzberg-3.16.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
61
- kreuzberg-3.16.0.dist-info/RECORD,,