kreuzberg 3.13.0__py3-none-any.whl → 3.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/_chunker.py +0 -15
- kreuzberg/_config.py +0 -124
- kreuzberg/_document_classification.py +20 -39
- kreuzberg/_entity_extraction.py +0 -29
- kreuzberg/_extractors/_base.py +4 -66
- kreuzberg/_extractors/_email.py +0 -4
- kreuzberg/_extractors/_image.py +0 -2
- kreuzberg/_extractors/_pandoc.py +0 -58
- kreuzberg/_extractors/_pdf.py +0 -3
- kreuzberg/_extractors/_presentation.py +0 -82
- kreuzberg/_extractors/_spread_sheet.py +0 -2
- kreuzberg/_gmft.py +0 -61
- kreuzberg/_language_detection.py +0 -14
- kreuzberg/_mime_types.py +0 -17
- kreuzberg/_ocr/_base.py +4 -76
- kreuzberg/_ocr/_easyocr.py +110 -85
- kreuzberg/_ocr/_paddleocr.py +146 -138
- kreuzberg/_ocr/_table_extractor.py +0 -76
- kreuzberg/_ocr/_tesseract.py +0 -206
- kreuzberg/_playa.py +0 -27
- kreuzberg/_registry.py +0 -36
- kreuzberg/_types.py +16 -119
- kreuzberg/_utils/_cache.py +0 -52
- kreuzberg/_utils/_device.py +0 -56
- kreuzberg/_utils/_document_cache.py +0 -73
- kreuzberg/_utils/_errors.py +0 -47
- kreuzberg/_utils/_ocr_cache.py +136 -0
- kreuzberg/_utils/_pdf_lock.py +0 -14
- kreuzberg/_utils/_process_pool.py +0 -47
- kreuzberg/_utils/_quality.py +0 -17
- kreuzberg/_utils/_ref.py +0 -16
- kreuzberg/_utils/_serialization.py +0 -25
- kreuzberg/_utils/_string.py +0 -20
- kreuzberg/_utils/_sync.py +0 -76
- kreuzberg/_utils/_table.py +0 -45
- kreuzberg/_utils/_tmp.py +0 -9
- kreuzberg/cli.py +2 -2
- {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/METADATA +3 -2
- kreuzberg-3.13.2.dist-info/RECORD +57 -0
- kreuzberg-3.13.0.dist-info/RECORD +0 -56
- {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/WHEEL +0 -0
- {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/entry_points.txt +0 -0
- {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/licenses/LICENSE +0 -0
kreuzberg/_utils/_string.py
CHANGED
@@ -21,20 +21,10 @@ _encoding_cache: dict[str, str] = {}
|
|
21
21
|
|
22
22
|
@lru_cache(maxsize=128)
|
23
23
|
def _get_encoding_cache_key(data_hash: str, size: int) -> str:
|
24
|
-
"""Generate cache key for encoding detection."""
|
25
24
|
return f"{data_hash}:{size}"
|
26
25
|
|
27
26
|
|
28
27
|
def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
|
29
|
-
"""Decode a byte string safely with mojibake detection and correction.
|
30
|
-
|
31
|
-
Args:
|
32
|
-
byte_data: The byte string to decode.
|
33
|
-
encoding: The encoding to use when decoding the byte string.
|
34
|
-
|
35
|
-
Returns:
|
36
|
-
The decoded string with mojibake detection and correction.
|
37
|
-
"""
|
38
28
|
if not byte_data:
|
39
29
|
return ""
|
40
30
|
|
@@ -88,7 +78,6 @@ def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
|
|
88
78
|
|
89
79
|
|
90
80
|
def _calculate_text_confidence(text: str) -> float:
|
91
|
-
"""Calculate confidence score for decoded text quality."""
|
92
81
|
if not text:
|
93
82
|
return 0.0
|
94
83
|
|
@@ -114,7 +103,6 @@ def _calculate_text_confidence(text: str) -> float:
|
|
114
103
|
|
115
104
|
|
116
105
|
def _fix_mojibake(text: str) -> str:
|
117
|
-
"""Attempt to fix common mojibake patterns."""
|
118
106
|
if not text:
|
119
107
|
return text
|
120
108
|
|
@@ -131,14 +119,6 @@ def _fix_mojibake(text: str) -> str:
|
|
131
119
|
|
132
120
|
|
133
121
|
def normalize_spaces(text: str) -> str:
|
134
|
-
"""Normalize spaces while preserving line breaks and paragraph structure.
|
135
|
-
|
136
|
-
Args:
|
137
|
-
text: The text to normalize.
|
138
|
-
|
139
|
-
Returns:
|
140
|
-
The normalized text with proper spacing.
|
141
|
-
"""
|
142
122
|
if not text or not text.strip():
|
143
123
|
return ""
|
144
124
|
|
kreuzberg/_utils/_sync.py
CHANGED
@@ -18,16 +18,6 @@ P = ParamSpec("P")
|
|
18
18
|
|
19
19
|
|
20
20
|
async def run_sync(sync_fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
|
21
|
-
"""Run a synchronous function in an asynchronous context.
|
22
|
-
|
23
|
-
Args:
|
24
|
-
sync_fn: The synchronous function to run.
|
25
|
-
*args: The positional arguments to pass to the function.
|
26
|
-
**kwargs: The keyword arguments to pass to the function.
|
27
|
-
|
28
|
-
Returns:
|
29
|
-
The result of the synchronous function.
|
30
|
-
"""
|
31
21
|
if kwargs:
|
32
22
|
handler = partial(sync_fn, **kwargs)
|
33
23
|
return cast("T", await any_io_run_sync(handler, *args, abandon_on_cancel=True)) # pyright: ignore [reportCallIssue]
|
@@ -35,14 +25,6 @@ async def run_sync(sync_fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -
|
|
35
25
|
|
36
26
|
|
37
27
|
async def run_taskgroup(*async_tasks: Awaitable[Any]) -> list[Any]:
|
38
|
-
"""Run a list of coroutines concurrently.
|
39
|
-
|
40
|
-
Args:
|
41
|
-
*async_tasks: The list of coroutines to run.
|
42
|
-
|
43
|
-
Returns:
|
44
|
-
The results of the coroutines.
|
45
|
-
"""
|
46
28
|
results: list[Any] = [None] * len(async_tasks)
|
47
29
|
|
48
30
|
async def run_task(index: int, task: Awaitable[T]) -> None:
|
@@ -56,15 +38,6 @@ async def run_taskgroup(*async_tasks: Awaitable[Any]) -> list[Any]:
|
|
56
38
|
|
57
39
|
|
58
40
|
async def run_taskgroup_batched(*async_tasks: Awaitable[Any], batch_size: int) -> list[Any]:
|
59
|
-
"""Run a list of coroutines concurrently in batches.
|
60
|
-
|
61
|
-
Args:
|
62
|
-
*async_tasks: The list of coroutines to run.
|
63
|
-
batch_size: The size of each batch.
|
64
|
-
|
65
|
-
Returns:
|
66
|
-
The results of the coroutines.
|
67
|
-
"""
|
68
41
|
results: list[Any] = []
|
69
42
|
|
70
43
|
for i in range(0, len(async_tasks), batch_size):
|
@@ -75,25 +48,6 @@ async def run_taskgroup_batched(*async_tasks: Awaitable[Any], batch_size: int) -
|
|
75
48
|
|
76
49
|
|
77
50
|
async def run_maybe_sync(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
|
78
|
-
"""Executes a callable function and handles both synchronous and asynchronous
|
79
|
-
results.
|
80
|
-
|
81
|
-
This function invokes the provided callable `sync_fn` with the given
|
82
|
-
arguments and keyword arguments. If the result of `sync_fn` is awaitable,
|
83
|
-
it awaits the result before returning it. Otherwise, the result is returned
|
84
|
-
directly.
|
85
|
-
|
86
|
-
Args:
|
87
|
-
fn: The callable to be executed. It can produce either a
|
88
|
-
synchronous or asynchronous result.
|
89
|
-
*args: Positional arguments to pass to `sync_fn`.
|
90
|
-
**kwargs: Keyword arguments to pass to `sync_fn`.
|
91
|
-
|
92
|
-
Returns:
|
93
|
-
The result of `sync_fn` invocation. If the result is awaitable, the
|
94
|
-
awaited value is returned. Otherwise, the synchronous result is
|
95
|
-
returned.
|
96
|
-
"""
|
97
51
|
result = fn(*args, **kwargs)
|
98
52
|
if isawaitable(result):
|
99
53
|
return cast("T", await result)
|
@@ -101,40 +55,10 @@ async def run_maybe_sync(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwa
|
|
101
55
|
|
102
56
|
|
103
57
|
def run_maybe_async(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
|
104
|
-
"""Runs a synchronous or asynchronous function, resolving the output.
|
105
|
-
|
106
|
-
Determines if the provided function is synchronous or asynchronous. If synchronous,
|
107
|
-
executes it directly. If asynchronous, it runs the function within the event loop
|
108
|
-
using anyio. The return value is resolved regardless of the function type.
|
109
|
-
|
110
|
-
Args:
|
111
|
-
fn: The function to be executed, which can
|
112
|
-
either be synchronous or asynchronous.
|
113
|
-
*args: Positional arguments to be passed to the function.
|
114
|
-
**kwargs: Keyword arguments to be passed to the function.
|
115
|
-
|
116
|
-
Returns:
|
117
|
-
T: The return value of the executed function, resolved if asynchronous.
|
118
|
-
"""
|
119
58
|
return cast("T", fn(*args, **kwargs) if not iscoroutinefunction(fn) else anyio.run(partial(fn, **kwargs), *args))
|
120
59
|
|
121
60
|
|
122
61
|
def run_sync_only(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
|
123
|
-
"""Runs a function, but only if it's synchronous. Raises error if async.
|
124
|
-
|
125
|
-
This is used for pure sync code paths where we cannot handle async functions.
|
126
|
-
|
127
|
-
Args:
|
128
|
-
fn: The function to be executed, must be synchronous.
|
129
|
-
*args: Positional arguments to be passed to the function.
|
130
|
-
**kwargs: Keyword arguments to be passed to the function.
|
131
|
-
|
132
|
-
Returns:
|
133
|
-
T: The return value of the executed function.
|
134
|
-
|
135
|
-
Raises:
|
136
|
-
RuntimeError: If the function is asynchronous.
|
137
|
-
"""
|
138
62
|
if iscoroutinefunction(fn):
|
139
63
|
raise RuntimeError(f"Cannot run async function {fn.__name__} in sync-only context")
|
140
64
|
return cast("T", fn(*args, **kwargs))
|
kreuzberg/_utils/_table.py
CHANGED
@@ -8,15 +8,6 @@ if TYPE_CHECKING:
|
|
8
8
|
|
9
9
|
|
10
10
|
def export_table_to_csv(table: TableData, separator: str = ",") -> str:
|
11
|
-
r"""Export a TableData object to CSV/TSV format.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
table: TableData object containing DataFrame
|
15
|
-
separator: Field separator ("," for CSV, "\t" for TSV)
|
16
|
-
|
17
|
-
Returns:
|
18
|
-
String representation in CSV/TSV format
|
19
|
-
"""
|
20
11
|
if "df" not in table or table["df"] is None:
|
21
12
|
return ""
|
22
13
|
|
@@ -27,26 +18,10 @@ def export_table_to_csv(table: TableData, separator: str = ",") -> str:
|
|
27
18
|
|
28
19
|
|
29
20
|
def export_table_to_tsv(table: TableData) -> str:
|
30
|
-
"""Export a TableData object to TSV format.
|
31
|
-
|
32
|
-
Args:
|
33
|
-
table: TableData object containing DataFrame
|
34
|
-
|
35
|
-
Returns:
|
36
|
-
String representation in TSV format
|
37
|
-
"""
|
38
21
|
return export_table_to_csv(table, separator="\t")
|
39
22
|
|
40
23
|
|
41
24
|
def enhance_table_markdown(table: TableData) -> str:
|
42
|
-
"""Generate enhanced markdown table with better formatting.
|
43
|
-
|
44
|
-
Args:
|
45
|
-
table: TableData object
|
46
|
-
|
47
|
-
Returns:
|
48
|
-
Enhanced markdown table string
|
49
|
-
"""
|
50
25
|
if "df" not in table or table["df"] is None:
|
51
26
|
return table.get("text", "")
|
52
27
|
|
@@ -72,7 +47,6 @@ def enhance_table_markdown(table: TableData) -> str:
|
|
72
47
|
|
73
48
|
|
74
49
|
def _generate_separator_row(df: Any) -> str:
|
75
|
-
"""Generate separator row with proper alignment hints."""
|
76
50
|
separators = []
|
77
51
|
for col in df.columns:
|
78
52
|
dtype_str = str(df[col].dtype)
|
@@ -84,7 +58,6 @@ def _generate_separator_row(df: Any) -> str:
|
|
84
58
|
|
85
59
|
|
86
60
|
def _analyze_float_columns(df: Any) -> dict[str, str]:
|
87
|
-
"""Analyze float columns to determine formatting strategy."""
|
88
61
|
float_col_formatting = {}
|
89
62
|
for col in df.columns:
|
90
63
|
dtype_str = str(df[col].dtype)
|
@@ -103,7 +76,6 @@ def _analyze_float_columns(df: Any) -> dict[str, str]:
|
|
103
76
|
|
104
77
|
|
105
78
|
def _format_table_row(row: Any, df: Any, float_col_formatting: dict[str, str]) -> list[str]:
|
106
|
-
"""Format a single table row with proper value formatting."""
|
107
79
|
formatted_row = []
|
108
80
|
for col_name, value in row.items():
|
109
81
|
if value is None:
|
@@ -124,7 +96,6 @@ def _format_table_row(row: Any, df: Any, float_col_formatting: dict[str, str]) -
|
|
124
96
|
|
125
97
|
|
126
98
|
def _is_numeric_column(series: Any) -> bool:
|
127
|
-
"""Check if a polars Series contains mostly numeric values."""
|
128
99
|
if len(series) == 0:
|
129
100
|
return False
|
130
101
|
|
@@ -161,14 +132,6 @@ def _is_numeric_column(series: Any) -> bool:
|
|
161
132
|
|
162
133
|
|
163
134
|
def generate_table_summary(tables: list[TableData]) -> dict[str, Any]:
|
164
|
-
"""Generate summary statistics for extracted tables.
|
165
|
-
|
166
|
-
Args:
|
167
|
-
tables: List of TableData objects
|
168
|
-
|
169
|
-
Returns:
|
170
|
-
Dictionary with table statistics
|
171
|
-
"""
|
172
135
|
if not tables:
|
173
136
|
return {
|
174
137
|
"table_count": 0,
|
@@ -208,14 +171,6 @@ def generate_table_summary(tables: list[TableData]) -> dict[str, Any]:
|
|
208
171
|
|
209
172
|
|
210
173
|
def extract_table_structure_info(table: TableData) -> dict[str, Any]:
|
211
|
-
"""Extract structural information from a table.
|
212
|
-
|
213
|
-
Args:
|
214
|
-
table: TableData object
|
215
|
-
|
216
|
-
Returns:
|
217
|
-
Dictionary with structural information
|
218
|
-
"""
|
219
174
|
info = {
|
220
175
|
"has_headers": False,
|
221
176
|
"row_count": 0,
|
kreuzberg/_utils/_tmp.py
CHANGED
@@ -16,15 +16,6 @@ if TYPE_CHECKING: # pragma: no cover
|
|
16
16
|
async def create_temp_file(
|
17
17
|
extension: str, content: bytes | None = None
|
18
18
|
) -> tuple[Path, Callable[[], Coroutine[None, None, None]]]:
|
19
|
-
"""Create a temporary file that is closed.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
extension: The file extension.
|
23
|
-
content: The content to write to the file.
|
24
|
-
|
25
|
-
Returns:
|
26
|
-
The temporary file path.
|
27
|
-
"""
|
28
19
|
file = await run_sync(NamedTemporaryFile, suffix=extension, delete=False)
|
29
20
|
if content:
|
30
21
|
await AsyncPath(file.name).write_bytes(content)
|
kreuzberg/cli.py
CHANGED
@@ -265,7 +265,7 @@ def cli(ctx: click.Context) -> None:
|
|
265
265
|
@click.option("--easyocr-languages", help="EasyOCR language codes (comma-separated, e.g., 'en,de')")
|
266
266
|
@click.option("--paddleocr-languages", help="PaddleOCR language codes (comma-separated, e.g., 'en,german')")
|
267
267
|
@click.pass_context
|
268
|
-
def extract(ctx: click.Context) -> None:
|
268
|
+
def extract(ctx: click.Context, /, **kwargs: Any) -> None:
|
269
269
|
"""Extract text from a document.
|
270
270
|
|
271
271
|
FILE can be a path to a document or '-' to read from stdin.
|
@@ -279,7 +279,7 @@ def extract(ctx: click.Context) -> None:
|
|
279
279
|
|
280
280
|
extraction_config = build_extraction_config(file_config, cli_args)
|
281
281
|
|
282
|
-
result = _perform_extraction(
|
282
|
+
result = _perform_extraction(kwargs.get("file"), extraction_config, params["verbose"])
|
283
283
|
|
284
284
|
_write_output(result, params["output"], params["show_metadata"], params["output_format"], params["verbose"])
|
285
285
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kreuzberg
|
3
|
-
Version: 3.13.
|
3
|
+
Version: 3.13.2
|
4
4
|
Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
|
5
5
|
Project-URL: documentation, https://kreuzberg.dev
|
6
6
|
Project-URL: homepage, https://github.com/Goldziher/kreuzberg
|
@@ -34,11 +34,12 @@ Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
|
|
34
34
|
Requires-Dist: html-to-markdown[lxml]>=1.9.1
|
35
35
|
Requires-Dist: mcp>=1.13.0
|
36
36
|
Requires-Dist: msgspec>=0.18.0
|
37
|
+
Requires-Dist: numpy>=1.24.0
|
37
38
|
Requires-Dist: playa-pdf>=0.7.0
|
38
39
|
Requires-Dist: polars>=1.33.0
|
39
40
|
Requires-Dist: psutil>=7.0.0
|
40
41
|
Requires-Dist: pypdfium2==4.30.0
|
41
|
-
Requires-Dist: python-calamine>=0.
|
42
|
+
Requires-Dist: python-calamine>=0.5.2
|
42
43
|
Requires-Dist: python-pptx>=1.0.2
|
43
44
|
Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
|
44
45
|
Provides-Extra: additional-extensions
|
@@ -0,0 +1,57 @@
|
|
1
|
+
kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
|
2
|
+
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
+
kreuzberg/_chunker.py,sha256=tr9_KUYTSLauFois3MsB-A-0hGcTT8hTQFrqNRTii-I,1373
|
4
|
+
kreuzberg/_config.py,sha256=Q5oiJE1XRf8ITuYcO8LZAOB3G2zNlXz2458rgPSth-U,12257
|
5
|
+
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
+
kreuzberg/_document_classification.py,sha256=Mz_s2GJGsEl7MQ-67BPoGYCZibTy9Sw0PScUZKBjKOA,5736
|
7
|
+
kreuzberg/_entity_extraction.py,sha256=5YpPnqoJ5aiHd_sy4bN4-Ngiq79RhCV6yaUQE8joGXo,3503
|
8
|
+
kreuzberg/_gmft.py,sha256=jKbD7V_KP9XTLjT9SBgSgE3CyDjqbRDm9BAiWV2sAC0,19542
|
9
|
+
kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
|
10
|
+
kreuzberg/_mime_types.py,sha256=kGBDSMO4XPgzUKC7iaBeChCtRQXZ9_zXq6eJydejX_k,7739
|
11
|
+
kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
|
12
|
+
kreuzberg/_registry.py,sha256=8cPpz3oZVnMwWDT2v_Q7wf-GHd5YuHmc-nkLtvPfE1I,2433
|
13
|
+
kreuzberg/_types.py,sha256=D-2d_WG8HyByA163izGhjk7t-e4FL_N-_6UzlVso8Dg,36020
|
14
|
+
kreuzberg/cli.py,sha256=nPH4FDW6WkoF4gtH0s4RWmxjAveJ_-Unb6fev6x0Sko,12752
|
15
|
+
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
+
kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
|
17
|
+
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
|
20
|
+
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
kreuzberg/_extractors/_base.py,sha256=i2FvAhRnamEtBb4a-C7pfcdWIXnkEBw0saMQu7h1_RQ,2069
|
22
|
+
kreuzberg/_extractors/_email.py,sha256=jn_8J4BASKJ7zFHBG0PgxNe3OT4pjmEM2tTKX8y_0AE,5887
|
23
|
+
kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
|
24
|
+
kreuzberg/_extractors/_image.py,sha256=UqPoYfvDRX6Rd1yPhcLHJLDw6d2cUzgkqOGjh2eleJM,3301
|
25
|
+
kreuzberg/_extractors/_pandoc.py,sha256=-Ai4S1cXs7F6yeonb_7Y7_ZoWHn29E2oP1WlPtM-4HM,22505
|
26
|
+
kreuzberg/_extractors/_pdf.py,sha256=Yv_c3xYzrGAjgTbwCGqbiQTDLjIUP_Pu7Z3GmMOqgqg,17865
|
27
|
+
kreuzberg/_extractors/_presentation.py,sha256=ULGkt7dzeA9sYSEhpAucKZmkdv9EubzeZtOjoLP3Z2E,6994
|
28
|
+
kreuzberg/_extractors/_spread_sheet.py,sha256=x25u2M-ufxpDd7_qrjhMEz1yFftIcOISE1qwPW09Zm0,11962
|
29
|
+
kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
|
30
|
+
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
31
|
+
kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
|
32
|
+
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
|
+
kreuzberg/_ocr/_base.py,sha256=5ef2g8JuSaZF2sDiAmoaODHbeG4MT0LtNzbtW0n9BnU,1445
|
34
|
+
kreuzberg/_ocr/_easyocr.py,sha256=XbgpGt5tkE4xHleIGvV1cHlpOQTp43rSXBO1CyIyKTg,14599
|
35
|
+
kreuzberg/_ocr/_paddleocr.py,sha256=58sKOHfKCHGFJNlRLrJwey8G_7xbsAAPBXB4n3hKc7k,14052
|
36
|
+
kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
|
37
|
+
kreuzberg/_ocr/_tesseract.py,sha256=xGML3ygY5xMN5T3YznrKDVAH_DWfaFiteFBo_-GpjCs,48931
|
38
|
+
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
|
+
kreuzberg/_utils/_cache.py,sha256=S6Oc4TJamiuuWeJ2ABxDFbbQh4o8w38AUyZeBEc1NN8,12767
|
40
|
+
kreuzberg/_utils/_device.py,sha256=UxGkSTN3Up-Zn43CSyvf8CozW2xAF05Cm01LWA2FZmg,8263
|
41
|
+
kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
|
42
|
+
kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
|
43
|
+
kreuzberg/_utils/_ocr_cache.py,sha256=8_-qmPlK2adQKsH4OO4Mlk8wmqBMl3XxkcV_NsXVyFs,3501
|
44
|
+
kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
|
45
|
+
kreuzberg/_utils/_process_pool.py,sha256=9dPMD_gBocQ5VaeCIrlSJfPXKyXNuyKaATmqOPExxiE,6723
|
46
|
+
kreuzberg/_utils/_quality.py,sha256=f7NbyZysyJQD8jKCNWhogvluU9A7GdEYhMsDBeMbGAA,5412
|
47
|
+
kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
|
48
|
+
kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
|
49
|
+
kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
|
50
|
+
kreuzberg/_utils/_sync.py,sha256=OWiciXPTGHIxgiGoHI2AglZ1siTNT-nU_JCgHPNzzHk,2196
|
51
|
+
kreuzberg/_utils/_table.py,sha256=R-6owHjvcvHGhem_vDsFH7S2yMHGoUUO2PFcj-Idptk,6361
|
52
|
+
kreuzberg/_utils/_tmp.py,sha256=wnOInBkcuQoxI1vBLvNv9NqbRCEu9Y03qfOjqQuAk3s,841
|
53
|
+
kreuzberg-3.13.2.dist-info/METADATA,sha256=c1w8iB_Frnzr0DHY-X-a9rk5S9vQPICPIniPzwfvHV8,12127
|
54
|
+
kreuzberg-3.13.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
55
|
+
kreuzberg-3.13.2.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
56
|
+
kreuzberg-3.13.2.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
57
|
+
kreuzberg-3.13.2.dist-info/RECORD,,
|
@@ -1,56 +0,0 @@
|
|
1
|
-
kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
|
2
|
-
kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
|
3
|
-
kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
|
4
|
-
kreuzberg/_config.py,sha256=dSTumnpleMeUjUabWgAH7WlhTkdNG3eeMv8FSFmUaEI,15776
|
5
|
-
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
-
kreuzberg/_document_classification.py,sha256=NZ-6tQtVa1OgigC7xf30hAsnL5_gi9ak9X2XYdsCfTI,6361
|
7
|
-
kreuzberg/_entity_extraction.py,sha256=QFIPQ_fovEnEezpS6W4pwpjTA2PqS7TUCD9AKf8sAyc,4666
|
8
|
-
kreuzberg/_gmft.py,sha256=60WpPTf7jocU-kmkBe-pBytl7l58aQzd-Aw2_Hlioug,21481
|
9
|
-
kreuzberg/_language_detection.py,sha256=yLUliJOUyofVma_q6FwzG9Ck4-XX3AEjxleTHrqi8R4,2445
|
10
|
-
kreuzberg/_mime_types.py,sha256=fwtPKtp2XhCLT686qF26PBMeOqcVJroKPwkp7JgaM0E,8462
|
11
|
-
kreuzberg/_playa.py,sha256=1viLRqgcDWvaPo5ZsDPO2gqHFSBApOYortTV_SPVK9k,12190
|
12
|
-
kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
|
13
|
-
kreuzberg/_types.py,sha256=WFUFY1S7SL7kTfHCX-zGASLYT94FxLD71C9vGUzFOiA,38922
|
14
|
-
kreuzberg/cli.py,sha256=MLeWoMcLoN6WnkbyRbOY-2dqp-vNZf7Nb-K_R5F5CoU,12730
|
15
|
-
kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
|
16
|
-
kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
|
17
|
-
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
|
20
|
-
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
-
kreuzberg/_extractors/_base.py,sha256=EZTEJzwJxwu_yYFQ5QlZVNQMPCcli7yyUB4T5mFotCY,4209
|
22
|
-
kreuzberg/_extractors/_email.py,sha256=mVi_VDmiFhe6NgiWxJDYt4DQiP6jVs5dP8BsPClm3WQ,6108
|
23
|
-
kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
|
24
|
-
kreuzberg/_extractors/_image.py,sha256=UZEOmKNAS4KjaX38iYq2Ux6Mta3juCF1MzWNeBxpPE8,3414
|
25
|
-
kreuzberg/_extractors/_pandoc.py,sha256=zumwImIXwD3ziPhYxt0EQct5sSMy5lQiY6KnPSDxBTU,24183
|
26
|
-
kreuzberg/_extractors/_pdf.py,sha256=766O7rXAeAJ42vPpWbGpW_WgHXm48eWwX09l3aqjKeM,18064
|
27
|
-
kreuzberg/_extractors/_presentation.py,sha256=BJdEM9jsuAd0vb-PIRwNMcRj4xVjItb5kpOpnjsCBi0,10175
|
28
|
-
kreuzberg/_extractors/_spread_sheet.py,sha256=wqAV-Stqfd4hXs5ock-chqBEdzv4voSgT1uFUO1cIU0,12075
|
29
|
-
kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
|
30
|
-
kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
|
31
|
-
kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
|
32
|
-
kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
|
33
|
-
kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
|
34
|
-
kreuzberg/_ocr/_easyocr.py,sha256=CtiHGx_BmuUwZhC7bScYF9mwnAxRrLWJ-X70fuwFTjk,14079
|
35
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=wCuIQ_yxPWE9hukiehYNRdt00Rb2h6pWdfqPS8hI2s0,14297
|
36
|
-
kreuzberg/_ocr/_table_extractor.py,sha256=MeQLQn_bRco5OAcUoy613ZbZLCDBRJY8uHH_bUBSP8I,7613
|
37
|
-
kreuzberg/_ocr/_tesseract.py,sha256=i_UTjOmrFxZbtmXxrQIsE78wtZLTyZph0i0jDQc4EMA,56916
|
38
|
-
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
|
-
kreuzberg/_utils/_cache.py,sha256=fDqFp_54-Kyn3_4VkXkhovvNIB2osKqXlS13MlORrU8,14539
|
40
|
-
kreuzberg/_utils/_device.py,sha256=35xQvrLSPISJlWicQGknoBjkwdalwVxiJbzyxwuwOVo,9747
|
41
|
-
kreuzberg/_utils/_document_cache.py,sha256=CpCdJVd8SYLjfwm0ozSM8mx5x8i9vVDet3BlEUpzuZY,6920
|
42
|
-
kreuzberg/_utils/_errors.py,sha256=ctD-s1q7vbEgqHQ3OVJiEOODDLTd2LvrM3z6o37zrGI,6395
|
43
|
-
kreuzberg/_utils/_pdf_lock.py,sha256=mHB1A4Fo_nSfgdqUNEWODH9b5tNFqpEHcNE6rT41dGE,1886
|
44
|
-
kreuzberg/_utils/_process_pool.py,sha256=ebuMPmHXPkWaLWjgAkeaONvAZo974PhfENN8pnPTCco,8415
|
45
|
-
kreuzberg/_utils/_quality.py,sha256=m3SIXGDY9pfRmh3XeKdZWT1vBz7issH0SfKsutEuRxw,5833
|
46
|
-
kreuzberg/_utils/_ref.py,sha256=uP_S3x0AQH2Nyjo1tYEj7N_u9hGzYVewdjch6a8Fv5I,1458
|
47
|
-
kreuzberg/_utils/_serialization.py,sha256=duKP5OuBvi-m6ljQOhoyuJU7sl2WPnov8yJDpYuDArw,2052
|
48
|
-
kreuzberg/_utils/_string.py,sha256=yrcwHHl23FxWrNoFXkmR3icgivfvbLRvkqQek8F3qqI,5020
|
49
|
-
kreuzberg/_utils/_sync.py,sha256=mc-K2y_sc6mG-HOswlHTXAWaEzgisEERvq9PPw2dAw4,4869
|
50
|
-
kreuzberg/_utils/_table.py,sha256=dYM_dWNHRCXcWOhSQBnahOJaBXyuQFyYX9arHrH4TF8,7555
|
51
|
-
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
52
|
-
kreuzberg-3.13.0.dist-info/METADATA,sha256=896BWDLD6ApGiOQFKXMqQezC4qgKRUxjMqbZVWxBoJ0,12098
|
53
|
-
kreuzberg-3.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
54
|
-
kreuzberg-3.13.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
|
55
|
-
kreuzberg-3.13.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
56
|
-
kreuzberg-3.13.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|