kreuzberg 3.16.0__py3-none-any.whl → 3.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. kreuzberg/__init__.py +2 -0
  2. kreuzberg/_config.py +8 -9
  3. kreuzberg/_extractors/_base.py +0 -46
  4. kreuzberg/_extractors/_html.py +1 -1
  5. kreuzberg/_extractors/_pandoc.py +2 -2
  6. kreuzberg/_extractors/_pdf.py +4 -4
  7. kreuzberg/_gmft.py +2 -2
  8. kreuzberg/_mcp/server.py +1 -1
  9. kreuzberg/_mime_types.py +1 -1
  10. kreuzberg/_ocr/_easyocr.py +4 -9
  11. kreuzberg/_ocr/_paddleocr.py +1 -1
  12. kreuzberg/_ocr/_tesseract.py +15 -25
  13. kreuzberg/_token_reduction/__init__.py +11 -0
  14. kreuzberg/_token_reduction/_reducer.py +439 -0
  15. kreuzberg/_token_reduction/_stopwords.py +116 -0
  16. kreuzberg/_token_reduction/stopwords/af_stopwords.json +53 -0
  17. kreuzberg/_token_reduction/stopwords/ar_stopwords.json +482 -0
  18. kreuzberg/_token_reduction/stopwords/bg_stopwords.json +261 -0
  19. kreuzberg/_token_reduction/stopwords/bn_stopwords.json +400 -0
  20. kreuzberg/_token_reduction/stopwords/br_stopwords.json +1205 -0
  21. kreuzberg/_token_reduction/stopwords/ca_stopwords.json +280 -0
  22. kreuzberg/_token_reduction/stopwords/cs_stopwords.json +425 -0
  23. kreuzberg/_token_reduction/stopwords/da_stopwords.json +172 -0
  24. kreuzberg/_token_reduction/stopwords/de_stopwords.json +622 -0
  25. kreuzberg/_token_reduction/stopwords/el_stopwords.json +849 -0
  26. kreuzberg/_token_reduction/stopwords/en_stopwords.json +1300 -0
  27. kreuzberg/_token_reduction/stopwords/eo_stopwords.json +175 -0
  28. kreuzberg/_token_reduction/stopwords/es_stopwords.json +734 -0
  29. kreuzberg/_token_reduction/stopwords/et_stopwords.json +37 -0
  30. kreuzberg/_token_reduction/stopwords/eu_stopwords.json +100 -0
  31. kreuzberg/_token_reduction/stopwords/fa_stopwords.json +801 -0
  32. kreuzberg/_token_reduction/stopwords/fi_stopwords.json +849 -0
  33. kreuzberg/_token_reduction/stopwords/fr_stopwords.json +693 -0
  34. kreuzberg/_token_reduction/stopwords/ga_stopwords.json +111 -0
  35. kreuzberg/_token_reduction/stopwords/gl_stopwords.json +162 -0
  36. kreuzberg/_token_reduction/stopwords/gu_stopwords.json +226 -0
  37. kreuzberg/_token_reduction/stopwords/ha_stopwords.json +41 -0
  38. kreuzberg/_token_reduction/stopwords/he_stopwords.json +196 -0
  39. kreuzberg/_token_reduction/stopwords/hi_stopwords.json +227 -0
  40. kreuzberg/_token_reduction/stopwords/hr_stopwords.json +181 -0
  41. kreuzberg/_token_reduction/stopwords/hu_stopwords.json +791 -0
  42. kreuzberg/_token_reduction/stopwords/hy_stopwords.json +47 -0
  43. kreuzberg/_token_reduction/stopwords/id_stopwords.json +760 -0
  44. kreuzberg/_token_reduction/stopwords/it_stopwords.json +634 -0
  45. kreuzberg/_token_reduction/stopwords/ja_stopwords.json +136 -0
  46. kreuzberg/_token_reduction/stopwords/kn_stopwords.json +84 -0
  47. kreuzberg/_token_reduction/stopwords/ko_stopwords.json +681 -0
  48. kreuzberg/_token_reduction/stopwords/ku_stopwords.json +64 -0
  49. kreuzberg/_token_reduction/stopwords/la_stopwords.json +51 -0
  50. kreuzberg/_token_reduction/stopwords/lt_stopwords.json +476 -0
  51. kreuzberg/_token_reduction/stopwords/lv_stopwords.json +163 -0
  52. kreuzberg/_token_reduction/stopwords/ml_stopwords.json +11 -0
  53. kreuzberg/_token_reduction/stopwords/mr_stopwords.json +101 -0
  54. kreuzberg/_token_reduction/stopwords/ms_stopwords.json +477 -0
  55. kreuzberg/_token_reduction/stopwords/ne_stopwords.json +490 -0
  56. kreuzberg/_token_reduction/stopwords/nl_stopwords.json +415 -0
  57. kreuzberg/_token_reduction/stopwords/no_stopwords.json +223 -0
  58. kreuzberg/_token_reduction/stopwords/pl_stopwords.json +331 -0
  59. kreuzberg/_token_reduction/stopwords/pt_stopwords.json +562 -0
  60. kreuzberg/_token_reduction/stopwords/ro_stopwords.json +436 -0
  61. kreuzberg/_token_reduction/stopwords/ru_stopwords.json +561 -0
  62. kreuzberg/_token_reduction/stopwords/si_stopwords.json +193 -0
  63. kreuzberg/_token_reduction/stopwords/sk_stopwords.json +420 -0
  64. kreuzberg/_token_reduction/stopwords/sl_stopwords.json +448 -0
  65. kreuzberg/_token_reduction/stopwords/so_stopwords.json +32 -0
  66. kreuzberg/_token_reduction/stopwords/st_stopwords.json +33 -0
  67. kreuzberg/_token_reduction/stopwords/sv_stopwords.json +420 -0
  68. kreuzberg/_token_reduction/stopwords/sw_stopwords.json +76 -0
  69. kreuzberg/_token_reduction/stopwords/ta_stopwords.json +129 -0
  70. kreuzberg/_token_reduction/stopwords/te_stopwords.json +54 -0
  71. kreuzberg/_token_reduction/stopwords/th_stopwords.json +118 -0
  72. kreuzberg/_token_reduction/stopwords/tl_stopwords.json +149 -0
  73. kreuzberg/_token_reduction/stopwords/tr_stopwords.json +506 -0
  74. kreuzberg/_token_reduction/stopwords/uk_stopwords.json +75 -0
  75. kreuzberg/_token_reduction/stopwords/ur_stopwords.json +519 -0
  76. kreuzberg/_token_reduction/stopwords/vi_stopwords.json +647 -0
  77. kreuzberg/_token_reduction/stopwords/yo_stopwords.json +62 -0
  78. kreuzberg/_token_reduction/stopwords/zh_stopwords.json +796 -0
  79. kreuzberg/_token_reduction/stopwords/zu_stopwords.json +31 -0
  80. kreuzberg/_types.py +35 -3
  81. kreuzberg/_utils/_image_preprocessing.py +1 -1
  82. kreuzberg/_utils/_ref.py +14 -6
  83. kreuzberg/exceptions.py +0 -1
  84. kreuzberg/extraction.py +25 -9
  85. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/METADATA +4 -3
  86. kreuzberg-3.17.0.dist-info/RECORD +128 -0
  87. kreuzberg-3.16.0.dist-info/RECORD +0 -61
  88. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/WHEEL +0 -0
  89. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/entry_points.txt +0 -0
  90. {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,31 @@
1
+ [
2
+ "futhi",
3
+ "kahle",
4
+ "kakhulu",
5
+ "kanye",
6
+ "khona",
7
+ "kodwa",
8
+ "kungani",
9
+ "kusho",
10
+ "la",
11
+ "lakhe",
12
+ "lapho",
13
+ "mina",
14
+ "ngesikhathi",
15
+ "nje",
16
+ "phansi",
17
+ "phezulu",
18
+ "u",
19
+ "ukuba",
20
+ "ukuthi",
21
+ "ukuze",
22
+ "uma",
23
+ "wahamba",
24
+ "wakhe",
25
+ "wami",
26
+ "wase",
27
+ "wathi",
28
+ "yakhe",
29
+ "zakhe",
30
+ "zonke"
31
+ ]
kreuzberg/_types.py CHANGED
@@ -7,6 +7,7 @@ from enum import Enum
7
7
  from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Literal, NamedTuple, TypedDict
9
9
 
10
+ import langcodes
10
11
  import msgspec
11
12
 
12
13
  from kreuzberg._constants import DEFAULT_MAX_CHARACTERS, DEFAULT_MAX_OVERLAP
@@ -695,6 +696,8 @@ class Metadata(TypedDict, total=False):
695
696
  """Message or communication content."""
696
697
  attributes: NotRequired[dict[str, Any]]
697
698
  """Additional attributes extracted from structured data (e.g., custom text fields with dotted keys)."""
699
+ token_reduction: NotRequired[dict[str, float]]
700
+ """Token reduction statistics including reduction ratios and counts."""
698
701
 
699
702
 
700
703
  _VALID_METADATA_KEYS = {
@@ -749,6 +752,7 @@ _VALID_METADATA_KEYS = {
749
752
  "text",
750
753
  "message",
751
754
  "attributes",
755
+ "token_reduction",
752
756
  }
753
757
 
754
758
 
@@ -1009,6 +1013,8 @@ class ExtractionConfig(ConfigDict):
1009
1013
  """Minimum DPI threshold when auto-adjusting DPI."""
1010
1014
  max_dpi: int = 600
1011
1015
  """Maximum DPI threshold when auto-adjusting DPI."""
1016
+ token_reduction: TokenReductionConfig | None = None
1017
+ """Configuration for token reduction to optimize output size while preserving meaning."""
1012
1018
 
1013
1019
  def __post_init__(self) -> None:
1014
1020
  if self.custom_entity_patterns is not None and isinstance(self.custom_entity_patterns, dict):
@@ -1151,11 +1157,11 @@ class HTMLToMarkdownConfig:
1151
1157
  """Mapping of HTML tag names to custom converter functions."""
1152
1158
  default_title: bool = False
1153
1159
  """Use default titles for elements like links."""
1154
- escape_asterisks: bool = True
1160
+ escape_asterisks: bool = False
1155
1161
  """Escape * characters to prevent unintended formatting."""
1156
- escape_misc: bool = True
1162
+ escape_misc: bool = False
1157
1163
  """Escape miscellaneous characters to prevent Markdown conflicts."""
1158
- escape_underscores: bool = True
1164
+ escape_underscores: bool = False
1159
1165
  """Escape _ characters to prevent unintended formatting."""
1160
1166
  extract_metadata: bool = True
1161
1167
  """Extract document metadata as comment header."""
@@ -1199,3 +1205,29 @@ class HTMLToMarkdownConfig:
1199
1205
  def to_dict(self) -> dict[str, Any]:
1200
1206
  result = msgspec.to_builtins(self, builtin_types=(type(None),), order="deterministic")
1201
1207
  return {k: v for k, v in result.items() if v is not None}
1208
+
1209
+
1210
+ @dataclass(unsafe_hash=True, frozen=True, slots=True)
1211
+ class TokenReductionConfig:
1212
+ mode: Literal["off", "light", "moderate"] = "off"
1213
+ preserve_markdown: bool = True
1214
+ custom_stopwords: dict[str, list[str]] | None = field(default=None, compare=False, hash=False)
1215
+ language_hint: str | None = None
1216
+
1217
+ def __post_init__(self) -> None:
1218
+ if self.language_hint:
1219
+ hint = self.language_hint.strip()
1220
+
1221
+ if not hint or len(hint) > 50 or any(c in hint for c in "\x00\r\n\t"):
1222
+ object.__setattr__(self, "language_hint", None)
1223
+ return
1224
+
1225
+ try:
1226
+ normalized = langcodes.standardize_tag(hint)
1227
+
1228
+ lang = langcodes.Language.get(normalized).language
1229
+
1230
+ if lang and lang != hint:
1231
+ object.__setattr__(self, "language_hint", lang)
1232
+ except (ValueError, AttributeError, TypeError):
1233
+ object.__setattr__(self, "language_hint", None)
@@ -198,7 +198,7 @@ def normalize_image_dpi(
198
198
  calculated_dpi=calculated_dpi,
199
199
  )
200
200
 
201
- except OSError as e:
201
+ except OSError as e: # pragma: no cover
202
202
  return image, ImagePreprocessingMetadata(
203
203
  original_dimensions=(original_width, original_height),
204
204
  original_dpi=original_dpi,
kreuzberg/_utils/_ref.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import threading
3
4
  from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar, cast
4
5
 
5
6
  if TYPE_CHECKING:
@@ -10,23 +11,30 @@ T = TypeVar("T")
10
11
 
11
12
  class Ref(Generic[T]):
12
13
  _instances: ClassVar[dict[str, Any]] = {}
14
+ _lock: ClassVar[threading.Lock] = threading.Lock()
13
15
 
14
16
  def __init__(self, name: str, factory: Callable[[], T]) -> None:
15
17
  self.name = name
16
18
  self.factory = factory
17
19
 
18
20
  def get(self) -> T:
19
- if self.name not in self._instances:
20
- self._instances[self.name] = self.factory()
21
- return cast("T", self._instances[self.name])
21
+ if self.name in self._instances:
22
+ return cast("T", self._instances[self.name])
23
+
24
+ with self._lock:
25
+ if self.name not in self._instances:
26
+ self._instances[self.name] = self.factory()
27
+ return cast("T", self._instances[self.name])
22
28
 
23
29
  def clear(self) -> None:
24
- if self.name in self._instances:
25
- del self._instances[self.name]
30
+ with self._lock:
31
+ if self.name in self._instances:
32
+ del self._instances[self.name]
26
33
 
27
34
  def is_initialized(self) -> bool:
28
35
  return self.name in self._instances
29
36
 
30
37
  @classmethod
31
38
  def clear_all(cls) -> None:
32
- cls._instances.clear()
39
+ with cls._lock:
40
+ cls._instances.clear()
kreuzberg/exceptions.py CHANGED
@@ -17,7 +17,6 @@ class KreuzbergError(Exception):
17
17
  super().__init__(message)
18
18
 
19
19
  def _serialize_context(self, obj: Any) -> Any:
20
- """Recursively serialize context objects to ensure JSON compatibility."""
21
20
  if isinstance(obj, bytes):
22
21
  return obj.decode("utf-8", errors="replace")
23
22
  if isinstance(obj, dict):
kreuzberg/extraction.py CHANGED
@@ -15,6 +15,7 @@ from kreuzberg._mime_types import (
15
15
  validate_mime_type,
16
16
  )
17
17
  from kreuzberg._registry import ExtractorRegistry
18
+ from kreuzberg._token_reduction import get_reduction_stats, reduce_tokens
18
19
  from kreuzberg._types import ExtractionConfig, ExtractionResult
19
20
  from kreuzberg._utils._document_cache import get_document_cache
20
21
  from kreuzberg._utils._errors import create_error_context
@@ -31,15 +32,6 @@ DEFAULT_CONFIG: Final[ExtractionConfig] = ExtractionConfig()
31
32
 
32
33
 
33
34
  async def _handle_cache_async(path: Path, config: ExtractionConfig) -> ExtractionResult | None:
34
- """Handle cache lookup and coordination with other processes.
35
-
36
- Args:
37
- path: Path to the file being processed
38
- config: Extraction configuration
39
-
40
- Returns:
41
- Cached result if available, None otherwise
42
- """
43
35
  cache = get_document_cache()
44
36
 
45
37
  cached_result = cache.get(path, config)
@@ -92,6 +84,30 @@ def _validate_and_post_process_helper(
92
84
  if config.auto_detect_document_type:
93
85
  result = auto_detect_document_type(result, config, file_path=file_path)
94
86
 
87
+ if config.token_reduction is not None and config.token_reduction.mode != "off":
88
+ original_content = result.content
89
+
90
+ language_hint = None
91
+ if result.detected_languages and len(result.detected_languages) > 0:
92
+ language_hint = result.detected_languages[0]
93
+
94
+ reduced_content = reduce_tokens(
95
+ original_content,
96
+ config=config.token_reduction,
97
+ language=language_hint,
98
+ )
99
+ reduction_stats = get_reduction_stats(original_content, reduced_content)
100
+
101
+ result.content = reduced_content
102
+ result.metadata["token_reduction"] = {
103
+ "character_reduction_ratio": reduction_stats["character_reduction_ratio"],
104
+ "token_reduction_ratio": reduction_stats["token_reduction_ratio"],
105
+ "original_characters": reduction_stats["original_characters"],
106
+ "reduced_characters": reduction_stats["reduced_characters"],
107
+ "original_tokens": reduction_stats["original_tokens"],
108
+ "reduced_tokens": reduction_stats["reduced_tokens"],
109
+ }
110
+
95
111
  return result
96
112
 
97
113
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.16.0
3
+ Version: 3.17.0
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -32,6 +32,7 @@ Requires-Dist: anyio>=4.10.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
34
  Requires-Dist: html-to-markdown[lxml]>=1.13.0
35
+ Requires-Dist: langcodes>=3.5.0
35
36
  Requires-Dist: mcp>=1.14.0
36
37
  Requires-Dist: msgspec>=0.18.0
37
38
  Requires-Dist: numpy>=2.0.0
@@ -49,7 +50,7 @@ Provides-Extra: all
49
50
  Requires-Dist: click>=8.2.1; extra == 'all'
50
51
  Requires-Dist: deep-translator>=1.11.4; extra == 'all'
51
52
  Requires-Dist: easyocr>=1.7.2; extra == 'all'
52
- Requires-Dist: fast-langdetect>=0.3.2; extra == 'all'
53
+ Requires-Dist: fast-langdetect>=1.0.0; extra == 'all'
53
54
  Requires-Dist: gmft>=0.4.2; extra == 'all'
54
55
  Requires-Dist: keybert>=0.9.0; extra == 'all'
55
56
  Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
@@ -82,7 +83,7 @@ Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
82
83
  Provides-Extra: gmft
83
84
  Requires-Dist: gmft>=0.4.2; extra == 'gmft'
84
85
  Provides-Extra: langdetect
85
- Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
86
+ Requires-Dist: fast-langdetect>=1.0.0; extra == 'langdetect'
86
87
  Provides-Extra: paddleocr
87
88
  Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
88
89
  Requires-Dist: paddlepaddle>=3.2.0; extra == 'paddleocr'
@@ -0,0 +1,128 @@
1
+ kreuzberg/__init__.py,sha256=niF_YZ7YADL_oXZ8zB5EMov4xnyFzuxTABVlHoRnBJA,1629
2
+ kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
+ kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
+ kreuzberg/_config.py,sha256=ZYIcnJAjDnbWW_2WBy7NlOk1Ol6WpoMG5FMNMmHpqSY,13086
5
+ kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
+ kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
7
+ kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
+ kreuzberg/_gmft.py,sha256=gfRXOsv-K9R7Y0zZ2SUa5wid3FpP2eFIlg5nepWcz1Q,20827
9
+ kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
+ kreuzberg/_mime_types.py,sha256=duEMDBg_qIf9A02tXAC_2znD-wgE-2BBMW9ofyYTJjE,8622
11
+ kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
+ kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
+ kreuzberg/_types.py,sha256=uULpUfQzpt_AAr8epOvIl3cdB9TkNTFrxWQssnZg_IM,48655
14
+ kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
+ kreuzberg/exceptions.py,sha256=KiGAfIX3_TkGYG1h9eTZ_E_pALsAqhZ_A3XfhwxwaS0,2909
16
+ kreuzberg/extraction.py,sha256=Z2rBVGs8oteXU1mynHCd9q1yKz9NNA5tQdWq35jP2EE,18743
17
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
+ kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
21
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ kreuzberg/_extractors/_base.py,sha256=4MRBXdLsgdtdrTuupWb2IT9YpRSnNPpWWviS2mfeOXg,9961
23
+ kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
24
+ kreuzberg/_extractors/_html.py,sha256=TXXgwQZuEvnrny5HdBpn8oikGktyxgY9jvgZmnFtnqY,6371
25
+ kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
+ kreuzberg/_extractors/_pandoc.py,sha256=cwthr--IFwbu8r0rCZ_Cx5zRlan94yuqt5e3mjYxesE,24182
27
+ kreuzberg/_extractors/_pdf.py,sha256=GFy7xHUH09i48E5Xixy6nReF_uBu9646UTjywKoH-Rs,23304
28
+ kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
29
+ kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
30
+ kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
31
+ kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
+ kreuzberg/_mcp/server.py,sha256=71MhjiFDwgFROdGejf0djgO1eG370qudWmZsN59CUeA,16743
33
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
+ kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
+ kreuzberg/_ocr/_easyocr.py,sha256=bHz2S_8nNHaPHPemcJK-U0al9_qP-vUmWE4ECVlf7AA,15485
36
+ kreuzberg/_ocr/_paddleocr.py,sha256=CV9cCjkRe-3cNJ5tRu_sBXd_HNghEwfPIgWwxAZTeRY,15026
37
+ kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
+ kreuzberg/_ocr/_tesseract.py,sha256=1SEfrX_JvU6KIeWt31GsRWnNmjaAh3xgQaRMPvoZLJA,51349
39
+ kreuzberg/_token_reduction/__init__.py,sha256=y_2WgPxJes8_PD-VMfx7vQT0hGjFIixzS8PjaIseAGg,311
40
+ kreuzberg/_token_reduction/_reducer.py,sha256=shAfMPznP69sTSzwX_bE1LpcBmoia9cpd7r6bSc4R5Q,13609
41
+ kreuzberg/_token_reduction/_stopwords.py,sha256=mu-5CapG0RCP7LYzjhdTM6WWLtmt3cjZ08OOsyQkJVg,3608
42
+ kreuzberg/_token_reduction/stopwords/af_stopwords.json,sha256=RlgUHyzPIQBbTA52kLSQpmTfteRbbV_qb_Spa51RI8Q,452
43
+ kreuzberg/_token_reduction/stopwords/ar_stopwords.json,sha256=GKcR9MyDM5zvIQhLqWfq87Jmj3gbAM81ZZi-eBKBjz8,6738
44
+ kreuzberg/_token_reduction/stopwords/bg_stopwords.json,sha256=7KuYBTg7jc8ZLFYI6QwaVatlh_gP8i80EkQHD1suhXs,3707
45
+ kreuzberg/_token_reduction/stopwords/bn_stopwords.json,sha256=qAtZN89LGy0se9i_FrB02vsNLgE2gG1clwMHTi4Qncg,7437
46
+ kreuzberg/_token_reduction/stopwords/br_stopwords.json,sha256=RuErMr4twvsocqw9fvwtgrnbzVKB2WABVi5AfPy2lqo,13601
47
+ kreuzberg/_token_reduction/stopwords/ca_stopwords.json,sha256=HRrh4QKFXDsAfmk5yjXHD28KjdO2vMjqJFIltNwh_F8,2952
48
+ kreuzberg/_token_reduction/stopwords/cs_stopwords.json,sha256=Dlsq3UFIHD9USIuHiwrFur0DvIaRpjftnBb3Qnjio4M,4523
49
+ kreuzberg/_token_reduction/stopwords/da_stopwords.json,sha256=zLk-90hrY5tH4gS4uOcMlviky4mmg7b8WaXnn_NzKfQ,1664
50
+ kreuzberg/_token_reduction/stopwords/de_stopwords.json,sha256=4lB0tUyT9PlB9ubnUbwOObO_RT0irBSdPOuvQIgrr7g,7052
51
+ kreuzberg/_token_reduction/stopwords/el_stopwords.json,sha256=VqO3y_q_ZeSBZAMxD1KjMMkCylYN2uuN620szHmFx-M,13604
52
+ kreuzberg/_token_reduction/stopwords/en_stopwords.json,sha256=VvLb0zoUKjhqQH-RGkSTpPtdRjvgv_G8l4i9ub2fJmU,14171
53
+ kreuzberg/_token_reduction/stopwords/eo_stopwords.json,sha256=xnojHtnik734Mzw4i4bIxPZEgBRXvgK2TRkHnxBCjWw,1722
54
+ kreuzberg/_token_reduction/stopwords/es_stopwords.json,sha256=PcSwxKskYQXc-21vNkpb6IntQYVP50CwuXfx4Gyhhx8,8598
55
+ kreuzberg/_token_reduction/stopwords/et_stopwords.json,sha256=_t6iPfNa1LhqRq4sLNbIB6_B5-472UCNi9IARJTPhzQ,327
56
+ kreuzberg/_token_reduction/stopwords/eu_stopwords.json,sha256=SNa84Zkx5Rcf8JZBdm4rCMxxZ7Z_94fW9cebZC4qgqI,1069
57
+ kreuzberg/_token_reduction/stopwords/fa_stopwords.json,sha256=8R1724IQHkXc1g_jXJjRMVLgq2Zz6YgPeE4DI0iSj9Y,11708
58
+ kreuzberg/_token_reduction/stopwords/fi_stopwords.json,sha256=yOlZLoh3aMJ-YXz3r7kGLAIsDyvxNrhFyvWr7Vu_z5o,10699
59
+ kreuzberg/_token_reduction/stopwords/fr_stopwords.json,sha256=KkiZ8dQYFQzjVJ-YwUoP13zwLwz7zu9Fpw-X-wmxya0,8025
60
+ kreuzberg/_token_reduction/stopwords/ga_stopwords.json,sha256=K8LOrUkqSi82KTLlZ1NnadEU-HMyCd2Ofm13GfxW3J0,1100
61
+ kreuzberg/_token_reduction/stopwords/gl_stopwords.json,sha256=Y0GfhhcOv1GNPJP3zoFYIYkg369GT1yHK5xCPiH6Pn0,1602
62
+ kreuzberg/_token_reduction/stopwords/gu_stopwords.json,sha256=YSldatfgVz_gNWopQ5TMFTHWEbGVYPcJMwO-bThtYAI,3818
63
+ kreuzberg/_token_reduction/stopwords/ha_stopwords.json,sha256=EohjrRkbSuLOn_aiDcsMOUEYPJjVha9wHhCsoxiwNsU,354
64
+ kreuzberg/_token_reduction/stopwords/he_stopwords.json,sha256=STlmHNDJqDEZI7ZCtBcZlEU1ndoEeJIexuOnTaOXJac,2629
65
+ kreuzberg/_token_reduction/stopwords/hi_stopwords.json,sha256=aYojvEA-UlivR_JCJTwZRoK2BJjVUW_m9q8eDRRczpE,3792
66
+ kreuzberg/_token_reduction/stopwords/hr_stopwords.json,sha256=2s5uhGAitVRDLgKdbA0F9sFZWtRWcmyiDZY9adwLGzk,1769
67
+ kreuzberg/_token_reduction/stopwords/hu_stopwords.json,sha256=9o0snSijbEEt9Hpbs4kTW6czhcdiXLTa5sbC68nitDY,9830
68
+ kreuzberg/_token_reduction/stopwords/hy_stopwords.json,sha256=QLsYw_y9ESyou1bHbPwjSrWy_nJq8wjiNihrvikYSKY,525
69
+ kreuzberg/_token_reduction/stopwords/id_stopwords.json,sha256=TZB_e1Txu3oGpQfHCzodoOTcKoKplTC5ZDr1iAbdzVI,10238
70
+ kreuzberg/_token_reduction/stopwords/it_stopwords.json,sha256=BSOpBGf_StyW6tdycNRMSvXGTksvrOowrE--D5914J4,7277
71
+ kreuzberg/_token_reduction/stopwords/ja_stopwords.json,sha256=E7MSvBOnRvTeChRk0Nm5X7xxwP50BHaP5FGOfDbnmRI,1680
72
+ kreuzberg/_token_reduction/stopwords/kn_stopwords.json,sha256=km3Qk1vy3OVdsAoE_YbZ-oXRYapFBi5k59o1mlWnk70,1626
73
+ kreuzberg/_token_reduction/stopwords/ko_stopwords.json,sha256=sHR2SLh_zXVs6SKZlWCS29MGRv6xlKcp3Ckvf0-aXt8,9932
74
+ kreuzberg/_token_reduction/stopwords/ku_stopwords.json,sha256=1Vj0g-fwacVcwaJ66BSPe4GkI7WybXK-EspIE6uvAmY,893
75
+ kreuzberg/_token_reduction/stopwords/la_stopwords.json,sha256=1d6iV2sTgZF6G7EF5yb3G0Sic85awtjN617cWXb-ltw,456
76
+ kreuzberg/_token_reduction/stopwords/lt_stopwords.json,sha256=7WE-NiX-y2IQnnO61-2pDExaR0ZeOq6A7YMn29effAM,5675
77
+ kreuzberg/_token_reduction/stopwords/lv_stopwords.json,sha256=WTp3jWxsX054E53DdpoI0BqujDefICljN4d7KiIIsls,1796
78
+ kreuzberg/_token_reduction/stopwords/ml_stopwords.json,sha256=lDoq0gGSI4zbuKhdNyF8MMTPkSI9wYb1om6pRPY5zkw,192
79
+ kreuzberg/_token_reduction/stopwords/mr_stopwords.json,sha256=6XjzSLaHwwOGWot1QszaUMl12mAVFh840GH9MJoYoes,1764
80
+ kreuzberg/_token_reduction/stopwords/ms_stopwords.json,sha256=eJsXJ0bVOnWUSVG3XwkIClxlR3qd_2k75ZAQlmHpsKQ,5950
81
+ kreuzberg/_token_reduction/stopwords/ne_stopwords.json,sha256=MoAXH6Tncag9Qgr6TR7yp1FguDCGQBXpGdSQ2DIOikE,9447
82
+ kreuzberg/_token_reduction/stopwords/nl_stopwords.json,sha256=W08hz9JP3EdWpXtLPUjWFOSr3AwGnZPkwcjEUBiFWnA,4724
83
+ kreuzberg/_token_reduction/stopwords/no_stopwords.json,sha256=bOjDCti_Loe0ZYSF2mR-LQzMqViZRkur1GEOLh4Mr-A,2210
84
+ kreuzberg/_token_reduction/stopwords/pl_stopwords.json,sha256=TcnvzF5uMVDKxQUt1YBu7Lw1qIpeHftuIDSguz8ZAdA,3487
85
+ kreuzberg/_token_reduction/stopwords/pt_stopwords.json,sha256=h4jmBxUu10PuzQzTjeFm1B5NBl0Owt7uGhwx66mTTYQ,6413
86
+ kreuzberg/_token_reduction/stopwords/ro_stopwords.json,sha256=iuHvFs-iS118RH07v0hO7Oxfdx5rDqJwl3lRPMWINbM,4569
87
+ kreuzberg/_token_reduction/stopwords/ru_stopwords.json,sha256=MZckTBKlL1i4Kv16RSSozUfCM6dcKI5H9PYZD7pS0Ac,9028
88
+ kreuzberg/_token_reduction/stopwords/si_stopwords.json,sha256=jvtaQfO4fc-XPHgaO1hPsbpJQQg40rSeEbCGWm2AO60,3324
89
+ kreuzberg/_token_reduction/stopwords/sk_stopwords.json,sha256=FDaLmQ61_fFg0k3cGthv8flKFs67M1hmSE-6PrfMCAU,4638
90
+ kreuzberg/_token_reduction/stopwords/sl_stopwords.json,sha256=UoQRoLRT9qzmS8ALY_cuDE1uukK0hS6Q6QuUhr7oLHc,4669
91
+ kreuzberg/_token_reduction/stopwords/so_stopwords.json,sha256=Z7ayeNV98MOx_xkGxtcSX3dh8GAhgCRFa0EC1VDG29Q,299
92
+ kreuzberg/_token_reduction/stopwords/st_stopwords.json,sha256=ajvBq5XQCse62nptN_m8Jll5-Ps9j3bK4RODMIzCkD4,268
93
+ kreuzberg/_token_reduction/stopwords/sv_stopwords.json,sha256=kLz5vgx0VfQI0jtOj3Rlp6wuj3tKhqp2oF-f9f2-neQ,4737
94
+ kreuzberg/_token_reduction/stopwords/sw_stopwords.json,sha256=x4eOC7-nRlSS7qv_pwW6yECDrfhm_3zoTWenIPL1aWY,780
95
+ kreuzberg/_token_reduction/stopwords/ta_stopwords.json,sha256=qBbEu6m_HEx2C27ep6UJOyxQ6st74Et1fN8TvRHoTxw,2634
96
+ kreuzberg/_token_reduction/stopwords/te_stopwords.json,sha256=GT0Rj3MsgCJSj9GdzKjpgsQJE3-wCaS5Aa3_ynIZKx0,1263
97
+ kreuzberg/_token_reduction/stopwords/th_stopwords.json,sha256=5DEb-W41TFL4BGS-_CJzgPTkpmuLN20WBfeO1hG0HLc,2010
98
+ kreuzberg/_token_reduction/stopwords/tl_stopwords.json,sha256=pu3wAWQyT0vzGwSO8N2x2mRlaCHzEgEIvECTCrJOLE8,1663
99
+ kreuzberg/_token_reduction/stopwords/tr_stopwords.json,sha256=hSmUsApI7lxVfwJwAInkCLoa3YoGjI85Mwg9DpiHTDo,6159
100
+ kreuzberg/_token_reduction/stopwords/uk_stopwords.json,sha256=_j_lYv_bE5RAEMcW7-u0rYWf39fMrlpIgFEMFQDjqW0,965
101
+ kreuzberg/_token_reduction/stopwords/ur_stopwords.json,sha256=IcrM74VdmSbgM7wlBtFVtkrWsCI0SDFbRCSSAkyvlqo,7370
102
+ kreuzberg/_token_reduction/stopwords/vi_stopwords.json,sha256=UOyAEKBwMcQV65QGpQU-ynmyignNoqFzUSQ8p_1XuoY,9152
103
+ kreuzberg/_token_reduction/stopwords/yo_stopwords.json,sha256=60liY89h7KReEvHEPxe-hCWLPuqr4U89aQDCi7iRCfo,651
104
+ kreuzberg/_token_reduction/stopwords/zh_stopwords.json,sha256=rouSTCkXun90Q1aCvLjHyt4I7pGrtlcruDpNVybpAMI,8934
105
+ kreuzberg/_token_reduction/stopwords/zu_stopwords.json,sha256=hfm4E2EDI_VWyR0GUOVjcMQA7ZDH7FsV4FUMcns1H28,324
106
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
108
+ kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
109
+ kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
110
+ kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
111
+ kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
112
+ kreuzberg/_utils/_image_preprocessing.py,sha256=f7ioWQyARnhzj0am0Y1_eteJwWomdPy7AnbXqw2xWBs,10954
113
+ kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
114
+ kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
115
+ kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
116
+ kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
117
+ kreuzberg/_utils/_ref.py,sha256=BDuk9hHYq1KPRgenjC3-6iFEjGsrGfHZKr9tPNhfquU,1109
118
+ kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
119
+ kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
120
+ kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
121
+ kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
122
+ kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
123
+ kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
124
+ kreuzberg-3.17.0.dist-info/METADATA,sha256=4iVwQUo4FVNSwj8h6oEqNAT5B6zm-J-u5k3Jy3Pv3L0,12351
125
+ kreuzberg-3.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
126
+ kreuzberg-3.17.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
127
+ kreuzberg-3.17.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
128
+ kreuzberg-3.17.0.dist-info/RECORD,,
@@ -1,61 +0,0 @@
1
- kreuzberg/__init__.py,sha256=EE6ENEjyKlt0o6QN1cG3Z_1isCtminVOjQT7ii5eBHA,1575
2
- kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
- kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
- kreuzberg/_config.py,sha256=H4jUAL0fNY-YE61GbGq5UtAUtXHbZA4-9W3YwcT_hu8,12988
5
- kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
- kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
7
- kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
- kreuzberg/_gmft.py,sha256=XI8vdBG0tdEVwFiabVieCuvxM5esqTSiFtsEwJ0YT5g,20787
9
- kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
- kreuzberg/_mime_types.py,sha256=-05mBS5AoF4LUmfB_WyLoce0y4peiOyOf2JucF714WQ,8602
11
- kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
- kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
- kreuzberg/_types.py,sha256=Xht1_TcvsbIpdmLYMy6Pa_HpbQuF9MBOo-BrKkZ7cLA,47358
14
- kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
- kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
- kreuzberg/extraction.py,sha256=5TuuRqLRmboLaTS0x9eZ2lrYOHKJBSHuTT_U-5nn6ek,17829
17
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
- kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
21
- kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- kreuzberg/_extractors/_base.py,sha256=39E7R7hV6C2uMJdQKLBVSWK3tN-mtK0LaayU10-8Fqo,11191
23
- kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
24
- kreuzberg/_extractors/_html.py,sha256=zZ9WZmmoIG9B5dGF25ulm_GmW9RsYFI1HddDUUp3hOE,6351
25
- kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
- kreuzberg/_extractors/_pandoc.py,sha256=CPEJxKTZdfyb7jPacZkiAsR2NEGL6KyiHzOr88tprJY,24142
27
- kreuzberg/_extractors/_pdf.py,sha256=78gPO7m8nPFIOskqqRpUfyOhKUk6f5rjJ0cZDnL9Vdk,23224
28
- kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
29
- kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
30
- kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
31
- kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
- kreuzberg/_mcp/server.py,sha256=vJWCXbBiv0ktIPZeLedSWZEwKF46p6642H6lxhTnjek,16723
33
- kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
- kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
- kreuzberg/_ocr/_easyocr.py,sha256=7bkMM_zN0h7ZiX0-VHxxnwNOhQloI-dlOOibpRc-vNs,15710
36
- kreuzberg/_ocr/_paddleocr.py,sha256=XyYc3gtmnvOGfQ0qBQYFphJa1kSv5hZ_LJ0weD2hQ08,15006
37
- kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
- kreuzberg/_ocr/_tesseract.py,sha256=BjTKE6ilUpSEKarHdgP3PbsE6I89JeqgDtpQ-XHniBA,51452
39
- kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
41
- kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
42
- kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
43
- kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
44
- kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
45
- kreuzberg/_utils/_image_preprocessing.py,sha256=arl4UDDiD_Z6SKM-jTXENaOaaHZBVFTsueb6DcpFXOo,10934
46
- kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
47
- kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
48
- kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
49
- kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
50
- kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
51
- kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
52
- kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
53
- kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
54
- kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
55
- kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
56
- kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
57
- kreuzberg-3.16.0.dist-info/METADATA,sha256=d1sUA7WBl0VcXHX0jPGzTHeXmj7yyJzTWjzHUmT-Dp4,12319
58
- kreuzberg-3.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- kreuzberg-3.16.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
60
- kreuzberg-3.16.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
61
- kreuzberg-3.16.0.dist-info/RECORD,,