kreuzberg 3.16.0__py3-none-any.whl → 3.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/__init__.py +2 -0
- kreuzberg/_config.py +8 -9
- kreuzberg/_extractors/_base.py +0 -46
- kreuzberg/_extractors/_html.py +1 -1
- kreuzberg/_extractors/_pandoc.py +2 -2
- kreuzberg/_extractors/_pdf.py +4 -4
- kreuzberg/_gmft.py +2 -2
- kreuzberg/_mcp/server.py +1 -1
- kreuzberg/_mime_types.py +1 -1
- kreuzberg/_ocr/_easyocr.py +4 -9
- kreuzberg/_ocr/_paddleocr.py +1 -1
- kreuzberg/_ocr/_tesseract.py +15 -25
- kreuzberg/_token_reduction/__init__.py +11 -0
- kreuzberg/_token_reduction/_reducer.py +439 -0
- kreuzberg/_token_reduction/_stopwords.py +116 -0
- kreuzberg/_token_reduction/stopwords/af_stopwords.json +53 -0
- kreuzberg/_token_reduction/stopwords/ar_stopwords.json +482 -0
- kreuzberg/_token_reduction/stopwords/bg_stopwords.json +261 -0
- kreuzberg/_token_reduction/stopwords/bn_stopwords.json +400 -0
- kreuzberg/_token_reduction/stopwords/br_stopwords.json +1205 -0
- kreuzberg/_token_reduction/stopwords/ca_stopwords.json +280 -0
- kreuzberg/_token_reduction/stopwords/cs_stopwords.json +425 -0
- kreuzberg/_token_reduction/stopwords/da_stopwords.json +172 -0
- kreuzberg/_token_reduction/stopwords/de_stopwords.json +622 -0
- kreuzberg/_token_reduction/stopwords/el_stopwords.json +849 -0
- kreuzberg/_token_reduction/stopwords/en_stopwords.json +1300 -0
- kreuzberg/_token_reduction/stopwords/eo_stopwords.json +175 -0
- kreuzberg/_token_reduction/stopwords/es_stopwords.json +734 -0
- kreuzberg/_token_reduction/stopwords/et_stopwords.json +37 -0
- kreuzberg/_token_reduction/stopwords/eu_stopwords.json +100 -0
- kreuzberg/_token_reduction/stopwords/fa_stopwords.json +801 -0
- kreuzberg/_token_reduction/stopwords/fi_stopwords.json +849 -0
- kreuzberg/_token_reduction/stopwords/fr_stopwords.json +693 -0
- kreuzberg/_token_reduction/stopwords/ga_stopwords.json +111 -0
- kreuzberg/_token_reduction/stopwords/gl_stopwords.json +162 -0
- kreuzberg/_token_reduction/stopwords/gu_stopwords.json +226 -0
- kreuzberg/_token_reduction/stopwords/ha_stopwords.json +41 -0
- kreuzberg/_token_reduction/stopwords/he_stopwords.json +196 -0
- kreuzberg/_token_reduction/stopwords/hi_stopwords.json +227 -0
- kreuzberg/_token_reduction/stopwords/hr_stopwords.json +181 -0
- kreuzberg/_token_reduction/stopwords/hu_stopwords.json +791 -0
- kreuzberg/_token_reduction/stopwords/hy_stopwords.json +47 -0
- kreuzberg/_token_reduction/stopwords/id_stopwords.json +760 -0
- kreuzberg/_token_reduction/stopwords/it_stopwords.json +634 -0
- kreuzberg/_token_reduction/stopwords/ja_stopwords.json +136 -0
- kreuzberg/_token_reduction/stopwords/kn_stopwords.json +84 -0
- kreuzberg/_token_reduction/stopwords/ko_stopwords.json +681 -0
- kreuzberg/_token_reduction/stopwords/ku_stopwords.json +64 -0
- kreuzberg/_token_reduction/stopwords/la_stopwords.json +51 -0
- kreuzberg/_token_reduction/stopwords/lt_stopwords.json +476 -0
- kreuzberg/_token_reduction/stopwords/lv_stopwords.json +163 -0
- kreuzberg/_token_reduction/stopwords/ml_stopwords.json +11 -0
- kreuzberg/_token_reduction/stopwords/mr_stopwords.json +101 -0
- kreuzberg/_token_reduction/stopwords/ms_stopwords.json +477 -0
- kreuzberg/_token_reduction/stopwords/ne_stopwords.json +490 -0
- kreuzberg/_token_reduction/stopwords/nl_stopwords.json +415 -0
- kreuzberg/_token_reduction/stopwords/no_stopwords.json +223 -0
- kreuzberg/_token_reduction/stopwords/pl_stopwords.json +331 -0
- kreuzberg/_token_reduction/stopwords/pt_stopwords.json +562 -0
- kreuzberg/_token_reduction/stopwords/ro_stopwords.json +436 -0
- kreuzberg/_token_reduction/stopwords/ru_stopwords.json +561 -0
- kreuzberg/_token_reduction/stopwords/si_stopwords.json +193 -0
- kreuzberg/_token_reduction/stopwords/sk_stopwords.json +420 -0
- kreuzberg/_token_reduction/stopwords/sl_stopwords.json +448 -0
- kreuzberg/_token_reduction/stopwords/so_stopwords.json +32 -0
- kreuzberg/_token_reduction/stopwords/st_stopwords.json +33 -0
- kreuzberg/_token_reduction/stopwords/sv_stopwords.json +420 -0
- kreuzberg/_token_reduction/stopwords/sw_stopwords.json +76 -0
- kreuzberg/_token_reduction/stopwords/ta_stopwords.json +129 -0
- kreuzberg/_token_reduction/stopwords/te_stopwords.json +54 -0
- kreuzberg/_token_reduction/stopwords/th_stopwords.json +118 -0
- kreuzberg/_token_reduction/stopwords/tl_stopwords.json +149 -0
- kreuzberg/_token_reduction/stopwords/tr_stopwords.json +506 -0
- kreuzberg/_token_reduction/stopwords/uk_stopwords.json +75 -0
- kreuzberg/_token_reduction/stopwords/ur_stopwords.json +519 -0
- kreuzberg/_token_reduction/stopwords/vi_stopwords.json +647 -0
- kreuzberg/_token_reduction/stopwords/yo_stopwords.json +62 -0
- kreuzberg/_token_reduction/stopwords/zh_stopwords.json +796 -0
- kreuzberg/_token_reduction/stopwords/zu_stopwords.json +31 -0
- kreuzberg/_types.py +35 -3
- kreuzberg/_utils/_image_preprocessing.py +1 -1
- kreuzberg/_utils/_ref.py +14 -6
- kreuzberg/exceptions.py +0 -1
- kreuzberg/extraction.py +25 -9
- {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/METADATA +4 -3
- kreuzberg-3.17.0.dist-info/RECORD +128 -0
- kreuzberg-3.16.0.dist-info/RECORD +0 -61
- {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/WHEEL +0 -0
- {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.16.0.dist-info → kreuzberg-3.17.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
[
|
2
|
+
"futhi",
|
3
|
+
"kahle",
|
4
|
+
"kakhulu",
|
5
|
+
"kanye",
|
6
|
+
"khona",
|
7
|
+
"kodwa",
|
8
|
+
"kungani",
|
9
|
+
"kusho",
|
10
|
+
"la",
|
11
|
+
"lakhe",
|
12
|
+
"lapho",
|
13
|
+
"mina",
|
14
|
+
"ngesikhathi",
|
15
|
+
"nje",
|
16
|
+
"phansi",
|
17
|
+
"phezulu",
|
18
|
+
"u",
|
19
|
+
"ukuba",
|
20
|
+
"ukuthi",
|
21
|
+
"ukuze",
|
22
|
+
"uma",
|
23
|
+
"wahamba",
|
24
|
+
"wakhe",
|
25
|
+
"wami",
|
26
|
+
"wase",
|
27
|
+
"wathi",
|
28
|
+
"yakhe",
|
29
|
+
"zakhe",
|
30
|
+
"zonke"
|
31
|
+
]
|
kreuzberg/_types.py
CHANGED
@@ -7,6 +7,7 @@ from enum import Enum
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import TYPE_CHECKING, Any, Literal, NamedTuple, TypedDict
|
9
9
|
|
10
|
+
import langcodes
|
10
11
|
import msgspec
|
11
12
|
|
12
13
|
from kreuzberg._constants import DEFAULT_MAX_CHARACTERS, DEFAULT_MAX_OVERLAP
|
@@ -695,6 +696,8 @@ class Metadata(TypedDict, total=False):
|
|
695
696
|
"""Message or communication content."""
|
696
697
|
attributes: NotRequired[dict[str, Any]]
|
697
698
|
"""Additional attributes extracted from structured data (e.g., custom text fields with dotted keys)."""
|
699
|
+
token_reduction: NotRequired[dict[str, float]]
|
700
|
+
"""Token reduction statistics including reduction ratios and counts."""
|
698
701
|
|
699
702
|
|
700
703
|
_VALID_METADATA_KEYS = {
|
@@ -749,6 +752,7 @@ _VALID_METADATA_KEYS = {
|
|
749
752
|
"text",
|
750
753
|
"message",
|
751
754
|
"attributes",
|
755
|
+
"token_reduction",
|
752
756
|
}
|
753
757
|
|
754
758
|
|
@@ -1009,6 +1013,8 @@ class ExtractionConfig(ConfigDict):
|
|
1009
1013
|
"""Minimum DPI threshold when auto-adjusting DPI."""
|
1010
1014
|
max_dpi: int = 600
|
1011
1015
|
"""Maximum DPI threshold when auto-adjusting DPI."""
|
1016
|
+
token_reduction: TokenReductionConfig | None = None
|
1017
|
+
"""Configuration for token reduction to optimize output size while preserving meaning."""
|
1012
1018
|
|
1013
1019
|
def __post_init__(self) -> None:
|
1014
1020
|
if self.custom_entity_patterns is not None and isinstance(self.custom_entity_patterns, dict):
|
@@ -1151,11 +1157,11 @@ class HTMLToMarkdownConfig:
|
|
1151
1157
|
"""Mapping of HTML tag names to custom converter functions."""
|
1152
1158
|
default_title: bool = False
|
1153
1159
|
"""Use default titles for elements like links."""
|
1154
|
-
escape_asterisks: bool =
|
1160
|
+
escape_asterisks: bool = False
|
1155
1161
|
"""Escape * characters to prevent unintended formatting."""
|
1156
|
-
escape_misc: bool =
|
1162
|
+
escape_misc: bool = False
|
1157
1163
|
"""Escape miscellaneous characters to prevent Markdown conflicts."""
|
1158
|
-
escape_underscores: bool =
|
1164
|
+
escape_underscores: bool = False
|
1159
1165
|
"""Escape _ characters to prevent unintended formatting."""
|
1160
1166
|
extract_metadata: bool = True
|
1161
1167
|
"""Extract document metadata as comment header."""
|
@@ -1199,3 +1205,29 @@ class HTMLToMarkdownConfig:
|
|
1199
1205
|
def to_dict(self) -> dict[str, Any]:
|
1200
1206
|
result = msgspec.to_builtins(self, builtin_types=(type(None),), order="deterministic")
|
1201
1207
|
return {k: v for k, v in result.items() if v is not None}
|
1208
|
+
|
1209
|
+
|
1210
|
+
@dataclass(unsafe_hash=True, frozen=True, slots=True)
|
1211
|
+
class TokenReductionConfig:
|
1212
|
+
mode: Literal["off", "light", "moderate"] = "off"
|
1213
|
+
preserve_markdown: bool = True
|
1214
|
+
custom_stopwords: dict[str, list[str]] | None = field(default=None, compare=False, hash=False)
|
1215
|
+
language_hint: str | None = None
|
1216
|
+
|
1217
|
+
def __post_init__(self) -> None:
|
1218
|
+
if self.language_hint:
|
1219
|
+
hint = self.language_hint.strip()
|
1220
|
+
|
1221
|
+
if not hint or len(hint) > 50 or any(c in hint for c in "\x00\r\n\t"):
|
1222
|
+
object.__setattr__(self, "language_hint", None)
|
1223
|
+
return
|
1224
|
+
|
1225
|
+
try:
|
1226
|
+
normalized = langcodes.standardize_tag(hint)
|
1227
|
+
|
1228
|
+
lang = langcodes.Language.get(normalized).language
|
1229
|
+
|
1230
|
+
if lang and lang != hint:
|
1231
|
+
object.__setattr__(self, "language_hint", lang)
|
1232
|
+
except (ValueError, AttributeError, TypeError):
|
1233
|
+
object.__setattr__(self, "language_hint", None)
|
@@ -198,7 +198,7 @@ def normalize_image_dpi(
|
|
198
198
|
calculated_dpi=calculated_dpi,
|
199
199
|
)
|
200
200
|
|
201
|
-
except OSError as e:
|
201
|
+
except OSError as e: # pragma: no cover
|
202
202
|
return image, ImagePreprocessingMetadata(
|
203
203
|
original_dimensions=(original_width, original_height),
|
204
204
|
original_dpi=original_dpi,
|
kreuzberg/_utils/_ref.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import threading
|
3
4
|
from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar, cast
|
4
5
|
|
5
6
|
if TYPE_CHECKING:
|
@@ -10,23 +11,30 @@ T = TypeVar("T")
|
|
10
11
|
|
11
12
|
class Ref(Generic[T]):
|
12
13
|
_instances: ClassVar[dict[str, Any]] = {}
|
14
|
+
_lock: ClassVar[threading.Lock] = threading.Lock()
|
13
15
|
|
14
16
|
def __init__(self, name: str, factory: Callable[[], T]) -> None:
|
15
17
|
self.name = name
|
16
18
|
self.factory = factory
|
17
19
|
|
18
20
|
def get(self) -> T:
|
19
|
-
if self.name
|
20
|
-
self._instances[self.name]
|
21
|
-
|
21
|
+
if self.name in self._instances:
|
22
|
+
return cast("T", self._instances[self.name])
|
23
|
+
|
24
|
+
with self._lock:
|
25
|
+
if self.name not in self._instances:
|
26
|
+
self._instances[self.name] = self.factory()
|
27
|
+
return cast("T", self._instances[self.name])
|
22
28
|
|
23
29
|
def clear(self) -> None:
|
24
|
-
|
25
|
-
|
30
|
+
with self._lock:
|
31
|
+
if self.name in self._instances:
|
32
|
+
del self._instances[self.name]
|
26
33
|
|
27
34
|
def is_initialized(self) -> bool:
|
28
35
|
return self.name in self._instances
|
29
36
|
|
30
37
|
@classmethod
|
31
38
|
def clear_all(cls) -> None:
|
32
|
-
cls.
|
39
|
+
with cls._lock:
|
40
|
+
cls._instances.clear()
|
kreuzberg/exceptions.py
CHANGED
@@ -17,7 +17,6 @@ class KreuzbergError(Exception):
|
|
17
17
|
super().__init__(message)
|
18
18
|
|
19
19
|
def _serialize_context(self, obj: Any) -> Any:
|
20
|
-
"""Recursively serialize context objects to ensure JSON compatibility."""
|
21
20
|
if isinstance(obj, bytes):
|
22
21
|
return obj.decode("utf-8", errors="replace")
|
23
22
|
if isinstance(obj, dict):
|
kreuzberg/extraction.py
CHANGED
@@ -15,6 +15,7 @@ from kreuzberg._mime_types import (
|
|
15
15
|
validate_mime_type,
|
16
16
|
)
|
17
17
|
from kreuzberg._registry import ExtractorRegistry
|
18
|
+
from kreuzberg._token_reduction import get_reduction_stats, reduce_tokens
|
18
19
|
from kreuzberg._types import ExtractionConfig, ExtractionResult
|
19
20
|
from kreuzberg._utils._document_cache import get_document_cache
|
20
21
|
from kreuzberg._utils._errors import create_error_context
|
@@ -31,15 +32,6 @@ DEFAULT_CONFIG: Final[ExtractionConfig] = ExtractionConfig()
|
|
31
32
|
|
32
33
|
|
33
34
|
async def _handle_cache_async(path: Path, config: ExtractionConfig) -> ExtractionResult | None:
|
34
|
-
"""Handle cache lookup and coordination with other processes.
|
35
|
-
|
36
|
-
Args:
|
37
|
-
path: Path to the file being processed
|
38
|
-
config: Extraction configuration
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
Cached result if available, None otherwise
|
42
|
-
"""
|
43
35
|
cache = get_document_cache()
|
44
36
|
|
45
37
|
cached_result = cache.get(path, config)
|
@@ -92,6 +84,30 @@ def _validate_and_post_process_helper(
|
|
92
84
|
if config.auto_detect_document_type:
|
93
85
|
result = auto_detect_document_type(result, config, file_path=file_path)
|
94
86
|
|
87
|
+
if config.token_reduction is not None and config.token_reduction.mode != "off":
|
88
|
+
original_content = result.content
|
89
|
+
|
90
|
+
language_hint = None
|
91
|
+
if result.detected_languages and len(result.detected_languages) > 0:
|
92
|
+
language_hint = result.detected_languages[0]
|
93
|
+
|
94
|
+
reduced_content = reduce_tokens(
|
95
|
+
original_content,
|
96
|
+
config=config.token_reduction,
|
97
|
+
language=language_hint,
|
98
|
+
)
|
99
|
+
reduction_stats = get_reduction_stats(original_content, reduced_content)
|
100
|
+
|
101
|
+
result.content = reduced_content
|
102
|
+
result.metadata["token_reduction"] = {
|
103
|
+
"character_reduction_ratio": reduction_stats["character_reduction_ratio"],
|
104
|
+
"token_reduction_ratio": reduction_stats["token_reduction_ratio"],
|
105
|
+
"original_characters": reduction_stats["original_characters"],
|
106
|
+
"reduced_characters": reduction_stats["reduced_characters"],
|
107
|
+
"original_tokens": reduction_stats["original_tokens"],
|
108
|
+
"reduced_tokens": reduction_stats["reduced_tokens"],
|
109
|
+
}
|
110
|
+
|
95
111
|
return result
|
96
112
|
|
97
113
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.17.0
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -32,6 +32,7 @@ Requires-Dist: anyio>=4.10.0
|
|
32
32
|
Requires-Dist: chardetng-py>=0.3.5
|
33
33
|
Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
34
34
|
Requires-Dist: html-to-markdown[lxml]>=1.13.0
|
35
|
+
Requires-Dist: langcodes>=3.5.0
|
35
36
|
Requires-Dist: mcp>=1.14.0
|
36
37
|
Requires-Dist: msgspec>=0.18.0
|
37
38
|
Requires-Dist: numpy>=2.0.0
|
@@ -49,7 +50,7 @@ Provides-Extra: all
|
|
49
50
|
Requires-Dist: click>=8.2.1; extra == 'all'
|
50
51
|
Requires-Dist: deep-translator>=1.11.4; extra == 'all'
|
51
52
|
Requires-Dist: easyocr>=1.7.2; extra == 'all'
|
52
|
-
Requires-Dist: fast-langdetect>=0.
|
53
|
+
Requires-Dist: fast-langdetect>=1.0.0; extra == 'all'
|
53
54
|
Requires-Dist: gmft>=0.4.2; extra == 'all'
|
54
55
|
Requires-Dist: keybert>=0.9.0; extra == 'all'
|
55
56
|
Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
|
@@ -82,7 +83,7 @@ Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
|
|
82
83
|
Provides-Extra: gmft
|
83
84
|
Requires-Dist: gmft>=0.4.2; extra == 'gmft'
|
84
85
|
Provides-Extra: langdetect
|
85
|
-
Requires-Dist: fast-langdetect>=0.
|
86
|
+
Requires-Dist: fast-langdetect>=1.0.0; extra == 'langdetect'
|
86
87
|
Provides-Extra: paddleocr
|
87
88
|
Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
|
88
89
|
Requires-Dist: paddlepaddle>=3.2.0; extra == 'paddleocr'
|
@@ -0,0 +1,128 @@
|
|
1
|
+
kreuzberg/__init__.py,sha256=niF_YZ7YADL_oXZ8zB5EMov4xnyFzuxTABVlHoRnBJA,1629
|
2
|
+
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
+
kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
|
4
|
+
kreuzberg/_config.py,sha256=ZYIcnJAjDnbWW_2WBy7NlOk1Ol6WpoMG5FMNMmHpqSY,13086
|
5
|
+
kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
|
6
|
+
kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
|
7
|
+
kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
|
8
|
+
kreuzberg/_gmft.py,sha256=gfRXOsv-K9R7Y0zZ2SUa5wid3FpP2eFIlg5nepWcz1Q,20827
|
9
|
+
kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
|
10
|
+
kreuzberg/_mime_types.py,sha256=duEMDBg_qIf9A02tXAC_2znD-wgE-2BBMW9ofyYTJjE,8622
|
11
|
+
kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
|
12
|
+
kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
|
13
|
+
kreuzberg/_types.py,sha256=uULpUfQzpt_AAr8epOvIl3cdB9TkNTFrxWQssnZg_IM,48655
|
14
|
+
kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
|
15
|
+
kreuzberg/exceptions.py,sha256=KiGAfIX3_TkGYG1h9eTZ_E_pALsAqhZ_A3XfhwxwaS0,2909
|
16
|
+
kreuzberg/extraction.py,sha256=Z2rBVGs8oteXU1mynHCd9q1yKz9NNA5tQdWq35jP2EE,18743
|
17
|
+
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
|
20
|
+
kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
|
21
|
+
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
+
kreuzberg/_extractors/_base.py,sha256=4MRBXdLsgdtdrTuupWb2IT9YpRSnNPpWWviS2mfeOXg,9961
|
23
|
+
kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
|
24
|
+
kreuzberg/_extractors/_html.py,sha256=TXXgwQZuEvnrny5HdBpn8oikGktyxgY9jvgZmnFtnqY,6371
|
25
|
+
kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
|
26
|
+
kreuzberg/_extractors/_pandoc.py,sha256=cwthr--IFwbu8r0rCZ_Cx5zRlan94yuqt5e3mjYxesE,24182
|
27
|
+
kreuzberg/_extractors/_pdf.py,sha256=GFy7xHUH09i48E5Xixy6nReF_uBu9646UTjywKoH-Rs,23304
|
28
|
+
kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
|
29
|
+
kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
|
30
|
+
kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
|
31
|
+
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
32
|
+
kreuzberg/_mcp/server.py,sha256=71MhjiFDwgFROdGejf0djgO1eG370qudWmZsN59CUeA,16743
|
33
|
+
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
34
|
+
kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
|
35
|
+
kreuzberg/_ocr/_easyocr.py,sha256=bHz2S_8nNHaPHPemcJK-U0al9_qP-vUmWE4ECVlf7AA,15485
|
36
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=CV9cCjkRe-3cNJ5tRu_sBXd_HNghEwfPIgWwxAZTeRY,15026
|
37
|
+
kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
|
38
|
+
kreuzberg/_ocr/_tesseract.py,sha256=1SEfrX_JvU6KIeWt31GsRWnNmjaAh3xgQaRMPvoZLJA,51349
|
39
|
+
kreuzberg/_token_reduction/__init__.py,sha256=y_2WgPxJes8_PD-VMfx7vQT0hGjFIixzS8PjaIseAGg,311
|
40
|
+
kreuzberg/_token_reduction/_reducer.py,sha256=shAfMPznP69sTSzwX_bE1LpcBmoia9cpd7r6bSc4R5Q,13609
|
41
|
+
kreuzberg/_token_reduction/_stopwords.py,sha256=mu-5CapG0RCP7LYzjhdTM6WWLtmt3cjZ08OOsyQkJVg,3608
|
42
|
+
kreuzberg/_token_reduction/stopwords/af_stopwords.json,sha256=RlgUHyzPIQBbTA52kLSQpmTfteRbbV_qb_Spa51RI8Q,452
|
43
|
+
kreuzberg/_token_reduction/stopwords/ar_stopwords.json,sha256=GKcR9MyDM5zvIQhLqWfq87Jmj3gbAM81ZZi-eBKBjz8,6738
|
44
|
+
kreuzberg/_token_reduction/stopwords/bg_stopwords.json,sha256=7KuYBTg7jc8ZLFYI6QwaVatlh_gP8i80EkQHD1suhXs,3707
|
45
|
+
kreuzberg/_token_reduction/stopwords/bn_stopwords.json,sha256=qAtZN89LGy0se9i_FrB02vsNLgE2gG1clwMHTi4Qncg,7437
|
46
|
+
kreuzberg/_token_reduction/stopwords/br_stopwords.json,sha256=RuErMr4twvsocqw9fvwtgrnbzVKB2WABVi5AfPy2lqo,13601
|
47
|
+
kreuzberg/_token_reduction/stopwords/ca_stopwords.json,sha256=HRrh4QKFXDsAfmk5yjXHD28KjdO2vMjqJFIltNwh_F8,2952
|
48
|
+
kreuzberg/_token_reduction/stopwords/cs_stopwords.json,sha256=Dlsq3UFIHD9USIuHiwrFur0DvIaRpjftnBb3Qnjio4M,4523
|
49
|
+
kreuzberg/_token_reduction/stopwords/da_stopwords.json,sha256=zLk-90hrY5tH4gS4uOcMlviky4mmg7b8WaXnn_NzKfQ,1664
|
50
|
+
kreuzberg/_token_reduction/stopwords/de_stopwords.json,sha256=4lB0tUyT9PlB9ubnUbwOObO_RT0irBSdPOuvQIgrr7g,7052
|
51
|
+
kreuzberg/_token_reduction/stopwords/el_stopwords.json,sha256=VqO3y_q_ZeSBZAMxD1KjMMkCylYN2uuN620szHmFx-M,13604
|
52
|
+
kreuzberg/_token_reduction/stopwords/en_stopwords.json,sha256=VvLb0zoUKjhqQH-RGkSTpPtdRjvgv_G8l4i9ub2fJmU,14171
|
53
|
+
kreuzberg/_token_reduction/stopwords/eo_stopwords.json,sha256=xnojHtnik734Mzw4i4bIxPZEgBRXvgK2TRkHnxBCjWw,1722
|
54
|
+
kreuzberg/_token_reduction/stopwords/es_stopwords.json,sha256=PcSwxKskYQXc-21vNkpb6IntQYVP50CwuXfx4Gyhhx8,8598
|
55
|
+
kreuzberg/_token_reduction/stopwords/et_stopwords.json,sha256=_t6iPfNa1LhqRq4sLNbIB6_B5-472UCNi9IARJTPhzQ,327
|
56
|
+
kreuzberg/_token_reduction/stopwords/eu_stopwords.json,sha256=SNa84Zkx5Rcf8JZBdm4rCMxxZ7Z_94fW9cebZC4qgqI,1069
|
57
|
+
kreuzberg/_token_reduction/stopwords/fa_stopwords.json,sha256=8R1724IQHkXc1g_jXJjRMVLgq2Zz6YgPeE4DI0iSj9Y,11708
|
58
|
+
kreuzberg/_token_reduction/stopwords/fi_stopwords.json,sha256=yOlZLoh3aMJ-YXz3r7kGLAIsDyvxNrhFyvWr7Vu_z5o,10699
|
59
|
+
kreuzberg/_token_reduction/stopwords/fr_stopwords.json,sha256=KkiZ8dQYFQzjVJ-YwUoP13zwLwz7zu9Fpw-X-wmxya0,8025
|
60
|
+
kreuzberg/_token_reduction/stopwords/ga_stopwords.json,sha256=K8LOrUkqSi82KTLlZ1NnadEU-HMyCd2Ofm13GfxW3J0,1100
|
61
|
+
kreuzberg/_token_reduction/stopwords/gl_stopwords.json,sha256=Y0GfhhcOv1GNPJP3zoFYIYkg369GT1yHK5xCPiH6Pn0,1602
|
62
|
+
kreuzberg/_token_reduction/stopwords/gu_stopwords.json,sha256=YSldatfgVz_gNWopQ5TMFTHWEbGVYPcJMwO-bThtYAI,3818
|
63
|
+
kreuzberg/_token_reduction/stopwords/ha_stopwords.json,sha256=EohjrRkbSuLOn_aiDcsMOUEYPJjVha9wHhCsoxiwNsU,354
|
64
|
+
kreuzberg/_token_reduction/stopwords/he_stopwords.json,sha256=STlmHNDJqDEZI7ZCtBcZlEU1ndoEeJIexuOnTaOXJac,2629
|
65
|
+
kreuzberg/_token_reduction/stopwords/hi_stopwords.json,sha256=aYojvEA-UlivR_JCJTwZRoK2BJjVUW_m9q8eDRRczpE,3792
|
66
|
+
kreuzberg/_token_reduction/stopwords/hr_stopwords.json,sha256=2s5uhGAitVRDLgKdbA0F9sFZWtRWcmyiDZY9adwLGzk,1769
|
67
|
+
kreuzberg/_token_reduction/stopwords/hu_stopwords.json,sha256=9o0snSijbEEt9Hpbs4kTW6czhcdiXLTa5sbC68nitDY,9830
|
68
|
+
kreuzberg/_token_reduction/stopwords/hy_stopwords.json,sha256=QLsYw_y9ESyou1bHbPwjSrWy_nJq8wjiNihrvikYSKY,525
|
69
|
+
kreuzberg/_token_reduction/stopwords/id_stopwords.json,sha256=TZB_e1Txu3oGpQfHCzodoOTcKoKplTC5ZDr1iAbdzVI,10238
|
70
|
+
kreuzberg/_token_reduction/stopwords/it_stopwords.json,sha256=BSOpBGf_StyW6tdycNRMSvXGTksvrOowrE--D5914J4,7277
|
71
|
+
kreuzberg/_token_reduction/stopwords/ja_stopwords.json,sha256=E7MSvBOnRvTeChRk0Nm5X7xxwP50BHaP5FGOfDbnmRI,1680
|
72
|
+
kreuzberg/_token_reduction/stopwords/kn_stopwords.json,sha256=km3Qk1vy3OVdsAoE_YbZ-oXRYapFBi5k59o1mlWnk70,1626
|
73
|
+
kreuzberg/_token_reduction/stopwords/ko_stopwords.json,sha256=sHR2SLh_zXVs6SKZlWCS29MGRv6xlKcp3Ckvf0-aXt8,9932
|
74
|
+
kreuzberg/_token_reduction/stopwords/ku_stopwords.json,sha256=1Vj0g-fwacVcwaJ66BSPe4GkI7WybXK-EspIE6uvAmY,893
|
75
|
+
kreuzberg/_token_reduction/stopwords/la_stopwords.json,sha256=1d6iV2sTgZF6G7EF5yb3G0Sic85awtjN617cWXb-ltw,456
|
76
|
+
kreuzberg/_token_reduction/stopwords/lt_stopwords.json,sha256=7WE-NiX-y2IQnnO61-2pDExaR0ZeOq6A7YMn29effAM,5675
|
77
|
+
kreuzberg/_token_reduction/stopwords/lv_stopwords.json,sha256=WTp3jWxsX054E53DdpoI0BqujDefICljN4d7KiIIsls,1796
|
78
|
+
kreuzberg/_token_reduction/stopwords/ml_stopwords.json,sha256=lDoq0gGSI4zbuKhdNyF8MMTPkSI9wYb1om6pRPY5zkw,192
|
79
|
+
kreuzberg/_token_reduction/stopwords/mr_stopwords.json,sha256=6XjzSLaHwwOGWot1QszaUMl12mAVFh840GH9MJoYoes,1764
|
80
|
+
kreuzberg/_token_reduction/stopwords/ms_stopwords.json,sha256=eJsXJ0bVOnWUSVG3XwkIClxlR3qd_2k75ZAQlmHpsKQ,5950
|
81
|
+
kreuzberg/_token_reduction/stopwords/ne_stopwords.json,sha256=MoAXH6Tncag9Qgr6TR7yp1FguDCGQBXpGdSQ2DIOikE,9447
|
82
|
+
kreuzberg/_token_reduction/stopwords/nl_stopwords.json,sha256=W08hz9JP3EdWpXtLPUjWFOSr3AwGnZPkwcjEUBiFWnA,4724
|
83
|
+
kreuzberg/_token_reduction/stopwords/no_stopwords.json,sha256=bOjDCti_Loe0ZYSF2mR-LQzMqViZRkur1GEOLh4Mr-A,2210
|
84
|
+
kreuzberg/_token_reduction/stopwords/pl_stopwords.json,sha256=TcnvzF5uMVDKxQUt1YBu7Lw1qIpeHftuIDSguz8ZAdA,3487
|
85
|
+
kreuzberg/_token_reduction/stopwords/pt_stopwords.json,sha256=h4jmBxUu10PuzQzTjeFm1B5NBl0Owt7uGhwx66mTTYQ,6413
|
86
|
+
kreuzberg/_token_reduction/stopwords/ro_stopwords.json,sha256=iuHvFs-iS118RH07v0hO7Oxfdx5rDqJwl3lRPMWINbM,4569
|
87
|
+
kreuzberg/_token_reduction/stopwords/ru_stopwords.json,sha256=MZckTBKlL1i4Kv16RSSozUfCM6dcKI5H9PYZD7pS0Ac,9028
|
88
|
+
kreuzberg/_token_reduction/stopwords/si_stopwords.json,sha256=jvtaQfO4fc-XPHgaO1hPsbpJQQg40rSeEbCGWm2AO60,3324
|
89
|
+
kreuzberg/_token_reduction/stopwords/sk_stopwords.json,sha256=FDaLmQ61_fFg0k3cGthv8flKFs67M1hmSE-6PrfMCAU,4638
|
90
|
+
kreuzberg/_token_reduction/stopwords/sl_stopwords.json,sha256=UoQRoLRT9qzmS8ALY_cuDE1uukK0hS6Q6QuUhr7oLHc,4669
|
91
|
+
kreuzberg/_token_reduction/stopwords/so_stopwords.json,sha256=Z7ayeNV98MOx_xkGxtcSX3dh8GAhgCRFa0EC1VDG29Q,299
|
92
|
+
kreuzberg/_token_reduction/stopwords/st_stopwords.json,sha256=ajvBq5XQCse62nptN_m8Jll5-Ps9j3bK4RODMIzCkD4,268
|
93
|
+
kreuzberg/_token_reduction/stopwords/sv_stopwords.json,sha256=kLz5vgx0VfQI0jtOj3Rlp6wuj3tKhqp2oF-f9f2-neQ,4737
|
94
|
+
kreuzberg/_token_reduction/stopwords/sw_stopwords.json,sha256=x4eOC7-nRlSS7qv_pwW6yECDrfhm_3zoTWenIPL1aWY,780
|
95
|
+
kreuzberg/_token_reduction/stopwords/ta_stopwords.json,sha256=qBbEu6m_HEx2C27ep6UJOyxQ6st74Et1fN8TvRHoTxw,2634
|
96
|
+
kreuzberg/_token_reduction/stopwords/te_stopwords.json,sha256=GT0Rj3MsgCJSj9GdzKjpgsQJE3-wCaS5Aa3_ynIZKx0,1263
|
97
|
+
kreuzberg/_token_reduction/stopwords/th_stopwords.json,sha256=5DEb-W41TFL4BGS-_CJzgPTkpmuLN20WBfeO1hG0HLc,2010
|
98
|
+
kreuzberg/_token_reduction/stopwords/tl_stopwords.json,sha256=pu3wAWQyT0vzGwSO8N2x2mRlaCHzEgEIvECTCrJOLE8,1663
|
99
|
+
kreuzberg/_token_reduction/stopwords/tr_stopwords.json,sha256=hSmUsApI7lxVfwJwAInkCLoa3YoGjI85Mwg9DpiHTDo,6159
|
100
|
+
kreuzberg/_token_reduction/stopwords/uk_stopwords.json,sha256=_j_lYv_bE5RAEMcW7-u0rYWf39fMrlpIgFEMFQDjqW0,965
|
101
|
+
kreuzberg/_token_reduction/stopwords/ur_stopwords.json,sha256=IcrM74VdmSbgM7wlBtFVtkrWsCI0SDFbRCSSAkyvlqo,7370
|
102
|
+
kreuzberg/_token_reduction/stopwords/vi_stopwords.json,sha256=UOyAEKBwMcQV65QGpQU-ynmyignNoqFzUSQ8p_1XuoY,9152
|
103
|
+
kreuzberg/_token_reduction/stopwords/yo_stopwords.json,sha256=60liY89h7KReEvHEPxe-hCWLPuqr4U89aQDCi7iRCfo,651
|
104
|
+
kreuzberg/_token_reduction/stopwords/zh_stopwords.json,sha256=rouSTCkXun90Q1aCvLjHyt4I7pGrtlcruDpNVybpAMI,8934
|
105
|
+
kreuzberg/_token_reduction/stopwords/zu_stopwords.json,sha256=hfm4E2EDI_VWyR0GUOVjcMQA7ZDH7FsV4FUMcns1H28,324
|
106
|
+
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
107
|
+
kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
|
108
|
+
kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
|
109
|
+
kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
|
110
|
+
kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
|
111
|
+
kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
|
112
|
+
kreuzberg/_utils/_image_preprocessing.py,sha256=f7ioWQyARnhzj0am0Y1_eteJwWomdPy7AnbXqw2xWBs,10954
|
113
|
+
kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
|
114
|
+
kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
|
115
|
+
kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
|
116
|
+
kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
|
117
|
+
kreuzberg/_utils/_ref.py,sha256=BDuk9hHYq1KPRgenjC3-6iFEjGsrGfHZKr9tPNhfquU,1109
|
118
|
+
kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
|
119
|
+
kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
|
120
|
+
kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
|
121
|
+
kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
|
122
|
+
kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
|
123
|
+
kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
|
124
|
+
kreuzberg-3.17.0.dist-info/METADATA,sha256=4iVwQUo4FVNSwj8h6oEqNAT5B6zm-J-u5k3Jy3Pv3L0,12351
|
125
|
+
kreuzberg-3.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
126
|
+
kreuzberg-3.17.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
127
|
+
kreuzberg-3.17.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
128
|
+
kreuzberg-3.17.0.dist-info/RECORD,,
|
@@ -1,61 +0,0 @@
|
|
1
|
-
kreuzberg/__init__.py,sha256=EE6ENEjyKlt0o6QN1cG3Z_1isCtminVOjQT7ii5eBHA,1575
|
2
|
-
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
-
kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
|
4
|
-
kreuzberg/_config.py,sha256=H4jUAL0fNY-YE61GbGq5UtAUtXHbZA4-9W3YwcT_hu8,12988
|
5
|
-
kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
|
6
|
-
kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
|
7
|
-
kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
|
8
|
-
kreuzberg/_gmft.py,sha256=XI8vdBG0tdEVwFiabVieCuvxM5esqTSiFtsEwJ0YT5g,20787
|
9
|
-
kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
|
10
|
-
kreuzberg/_mime_types.py,sha256=-05mBS5AoF4LUmfB_WyLoce0y4peiOyOf2JucF714WQ,8602
|
11
|
-
kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
|
12
|
-
kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
|
13
|
-
kreuzberg/_types.py,sha256=Xht1_TcvsbIpdmLYMy6Pa_HpbQuF9MBOo-BrKkZ7cLA,47358
|
14
|
-
kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
|
15
|
-
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
-
kreuzberg/extraction.py,sha256=5TuuRqLRmboLaTS0x9eZ2lrYOHKJBSHuTT_U-5nn6ek,17829
|
17
|
-
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
|
20
|
-
kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
|
21
|
-
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
kreuzberg/_extractors/_base.py,sha256=39E7R7hV6C2uMJdQKLBVSWK3tN-mtK0LaayU10-8Fqo,11191
|
23
|
-
kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
|
24
|
-
kreuzberg/_extractors/_html.py,sha256=zZ9WZmmoIG9B5dGF25ulm_GmW9RsYFI1HddDUUp3hOE,6351
|
25
|
-
kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
|
26
|
-
kreuzberg/_extractors/_pandoc.py,sha256=CPEJxKTZdfyb7jPacZkiAsR2NEGL6KyiHzOr88tprJY,24142
|
27
|
-
kreuzberg/_extractors/_pdf.py,sha256=78gPO7m8nPFIOskqqRpUfyOhKUk6f5rjJ0cZDnL9Vdk,23224
|
28
|
-
kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
|
29
|
-
kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
|
30
|
-
kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
|
31
|
-
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
32
|
-
kreuzberg/_mcp/server.py,sha256=vJWCXbBiv0ktIPZeLedSWZEwKF46p6642H6lxhTnjek,16723
|
33
|
-
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
34
|
-
kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
|
35
|
-
kreuzberg/_ocr/_easyocr.py,sha256=7bkMM_zN0h7ZiX0-VHxxnwNOhQloI-dlOOibpRc-vNs,15710
|
36
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=XyYc3gtmnvOGfQ0qBQYFphJa1kSv5hZ_LJ0weD2hQ08,15006
|
37
|
-
kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
|
38
|
-
kreuzberg/_ocr/_tesseract.py,sha256=BjTKE6ilUpSEKarHdgP3PbsE6I89JeqgDtpQ-XHniBA,51452
|
39
|
-
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
-
kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
|
41
|
-
kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
|
42
|
-
kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
|
43
|
-
kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
|
44
|
-
kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
|
45
|
-
kreuzberg/_utils/_image_preprocessing.py,sha256=arl4UDDiD_Z6SKM-jTXENaOaaHZBVFTsueb6DcpFXOo,10934
|
46
|
-
kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
|
47
|
-
kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
|
48
|
-
kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
|
49
|
-
kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
|
50
|
-
kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
|
51
|
-
kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
|
52
|
-
kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
|
53
|
-
kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
|
54
|
-
kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
|
55
|
-
kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
|
56
|
-
kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
|
57
|
-
kreuzberg-3.16.0.dist-info/METADATA,sha256=d1sUA7WBl0VcXHX0jPGzTHeXmj7yyJzTWjzHUmT-Dp4,12319
|
58
|
-
kreuzberg-3.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
59
|
-
kreuzberg-3.16.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
60
|
-
kreuzberg-3.16.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
61
|
-
kreuzberg-3.16.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|