kreuzberg 3.13.0__py3-none-any.whl → 3.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. kreuzberg/_chunker.py +0 -15
  2. kreuzberg/_config.py +0 -124
  3. kreuzberg/_document_classification.py +20 -39
  4. kreuzberg/_entity_extraction.py +0 -29
  5. kreuzberg/_extractors/_base.py +4 -66
  6. kreuzberg/_extractors/_email.py +0 -4
  7. kreuzberg/_extractors/_image.py +0 -2
  8. kreuzberg/_extractors/_pandoc.py +0 -58
  9. kreuzberg/_extractors/_pdf.py +0 -3
  10. kreuzberg/_extractors/_presentation.py +0 -82
  11. kreuzberg/_extractors/_spread_sheet.py +0 -2
  12. kreuzberg/_gmft.py +0 -61
  13. kreuzberg/_language_detection.py +0 -14
  14. kreuzberg/_mime_types.py +0 -17
  15. kreuzberg/_ocr/_base.py +4 -76
  16. kreuzberg/_ocr/_easyocr.py +110 -85
  17. kreuzberg/_ocr/_paddleocr.py +146 -138
  18. kreuzberg/_ocr/_table_extractor.py +0 -76
  19. kreuzberg/_ocr/_tesseract.py +0 -206
  20. kreuzberg/_playa.py +0 -27
  21. kreuzberg/_registry.py +0 -36
  22. kreuzberg/_types.py +16 -119
  23. kreuzberg/_utils/_cache.py +0 -52
  24. kreuzberg/_utils/_device.py +0 -56
  25. kreuzberg/_utils/_document_cache.py +0 -73
  26. kreuzberg/_utils/_errors.py +0 -47
  27. kreuzberg/_utils/_ocr_cache.py +136 -0
  28. kreuzberg/_utils/_pdf_lock.py +0 -14
  29. kreuzberg/_utils/_process_pool.py +0 -47
  30. kreuzberg/_utils/_quality.py +0 -17
  31. kreuzberg/_utils/_ref.py +0 -16
  32. kreuzberg/_utils/_serialization.py +0 -25
  33. kreuzberg/_utils/_string.py +0 -20
  34. kreuzberg/_utils/_sync.py +0 -76
  35. kreuzberg/_utils/_table.py +0 -45
  36. kreuzberg/_utils/_tmp.py +0 -9
  37. kreuzberg/cli.py +2 -2
  38. {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/METADATA +3 -2
  39. kreuzberg-3.13.2.dist-info/RECORD +57 -0
  40. kreuzberg-3.13.0.dist-info/RECORD +0 -56
  41. {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/WHEEL +0 -0
  42. {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/entry_points.txt +0 -0
  43. {kreuzberg-3.13.0.dist-info → kreuzberg-3.13.2.dist-info}/licenses/LICENSE +0 -0
@@ -21,20 +21,10 @@ _encoding_cache: dict[str, str] = {}
21
21
 
22
22
  @lru_cache(maxsize=128)
23
23
  def _get_encoding_cache_key(data_hash: str, size: int) -> str:
24
- """Generate cache key for encoding detection."""
25
24
  return f"{data_hash}:{size}"
26
25
 
27
26
 
28
27
  def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
29
- """Decode a byte string safely with mojibake detection and correction.
30
-
31
- Args:
32
- byte_data: The byte string to decode.
33
- encoding: The encoding to use when decoding the byte string.
34
-
35
- Returns:
36
- The decoded string with mojibake detection and correction.
37
- """
38
28
  if not byte_data:
39
29
  return ""
40
30
 
@@ -88,7 +78,6 @@ def safe_decode(byte_data: bytes, encoding: str | None = None) -> str:
88
78
 
89
79
 
90
80
  def _calculate_text_confidence(text: str) -> float:
91
- """Calculate confidence score for decoded text quality."""
92
81
  if not text:
93
82
  return 0.0
94
83
 
@@ -114,7 +103,6 @@ def _calculate_text_confidence(text: str) -> float:
114
103
 
115
104
 
116
105
  def _fix_mojibake(text: str) -> str:
117
- """Attempt to fix common mojibake patterns."""
118
106
  if not text:
119
107
  return text
120
108
 
@@ -131,14 +119,6 @@ def _fix_mojibake(text: str) -> str:
131
119
 
132
120
 
133
121
  def normalize_spaces(text: str) -> str:
134
- """Normalize spaces while preserving line breaks and paragraph structure.
135
-
136
- Args:
137
- text: The text to normalize.
138
-
139
- Returns:
140
- The normalized text with proper spacing.
141
- """
142
122
  if not text or not text.strip():
143
123
  return ""
144
124
 
kreuzberg/_utils/_sync.py CHANGED
@@ -18,16 +18,6 @@ P = ParamSpec("P")
18
18
 
19
19
 
20
20
  async def run_sync(sync_fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
21
- """Run a synchronous function in an asynchronous context.
22
-
23
- Args:
24
- sync_fn: The synchronous function to run.
25
- *args: The positional arguments to pass to the function.
26
- **kwargs: The keyword arguments to pass to the function.
27
-
28
- Returns:
29
- The result of the synchronous function.
30
- """
31
21
  if kwargs:
32
22
  handler = partial(sync_fn, **kwargs)
33
23
  return cast("T", await any_io_run_sync(handler, *args, abandon_on_cancel=True)) # pyright: ignore [reportCallIssue]
@@ -35,14 +25,6 @@ async def run_sync(sync_fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -
35
25
 
36
26
 
37
27
  async def run_taskgroup(*async_tasks: Awaitable[Any]) -> list[Any]:
38
- """Run a list of coroutines concurrently.
39
-
40
- Args:
41
- *async_tasks: The list of coroutines to run.
42
-
43
- Returns:
44
- The results of the coroutines.
45
- """
46
28
  results: list[Any] = [None] * len(async_tasks)
47
29
 
48
30
  async def run_task(index: int, task: Awaitable[T]) -> None:
@@ -56,15 +38,6 @@ async def run_taskgroup(*async_tasks: Awaitable[Any]) -> list[Any]:
56
38
 
57
39
 
58
40
  async def run_taskgroup_batched(*async_tasks: Awaitable[Any], batch_size: int) -> list[Any]:
59
- """Run a list of coroutines concurrently in batches.
60
-
61
- Args:
62
- *async_tasks: The list of coroutines to run.
63
- batch_size: The size of each batch.
64
-
65
- Returns:
66
- The results of the coroutines.
67
- """
68
41
  results: list[Any] = []
69
42
 
70
43
  for i in range(0, len(async_tasks), batch_size):
@@ -75,25 +48,6 @@ async def run_taskgroup_batched(*async_tasks: Awaitable[Any], batch_size: int) -
75
48
 
76
49
 
77
50
  async def run_maybe_sync(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
78
- """Executes a callable function and handles both synchronous and asynchronous
79
- results.
80
-
81
- This function invokes the provided callable `sync_fn` with the given
82
- arguments and keyword arguments. If the result of `sync_fn` is awaitable,
83
- it awaits the result before returning it. Otherwise, the result is returned
84
- directly.
85
-
86
- Args:
87
- fn: The callable to be executed. It can produce either a
88
- synchronous or asynchronous result.
89
- *args: Positional arguments to pass to `sync_fn`.
90
- **kwargs: Keyword arguments to pass to `sync_fn`.
91
-
92
- Returns:
93
- The result of `sync_fn` invocation. If the result is awaitable, the
94
- awaited value is returned. Otherwise, the synchronous result is
95
- returned.
96
- """
97
51
  result = fn(*args, **kwargs)
98
52
  if isawaitable(result):
99
53
  return cast("T", await result)
@@ -101,40 +55,10 @@ async def run_maybe_sync(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwa
101
55
 
102
56
 
103
57
  def run_maybe_async(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
104
- """Runs a synchronous or asynchronous function, resolving the output.
105
-
106
- Determines if the provided function is synchronous or asynchronous. If synchronous,
107
- executes it directly. If asynchronous, it runs the function within the event loop
108
- using anyio. The return value is resolved regardless of the function type.
109
-
110
- Args:
111
- fn: The function to be executed, which can
112
- either be synchronous or asynchronous.
113
- *args: Positional arguments to be passed to the function.
114
- **kwargs: Keyword arguments to be passed to the function.
115
-
116
- Returns:
117
- T: The return value of the executed function, resolved if asynchronous.
118
- """
119
58
  return cast("T", fn(*args, **kwargs) if not iscoroutinefunction(fn) else anyio.run(partial(fn, **kwargs), *args))
120
59
 
121
60
 
122
61
  def run_sync_only(fn: Callable[P, T | Awaitable[T]], *args: P.args, **kwargs: P.kwargs) -> T:
123
- """Runs a function, but only if it's synchronous. Raises error if async.
124
-
125
- This is used for pure sync code paths where we cannot handle async functions.
126
-
127
- Args:
128
- fn: The function to be executed, must be synchronous.
129
- *args: Positional arguments to be passed to the function.
130
- **kwargs: Keyword arguments to be passed to the function.
131
-
132
- Returns:
133
- T: The return value of the executed function.
134
-
135
- Raises:
136
- RuntimeError: If the function is asynchronous.
137
- """
138
62
  if iscoroutinefunction(fn):
139
63
  raise RuntimeError(f"Cannot run async function {fn.__name__} in sync-only context")
140
64
  return cast("T", fn(*args, **kwargs))
@@ -8,15 +8,6 @@ if TYPE_CHECKING:
8
8
 
9
9
 
10
10
  def export_table_to_csv(table: TableData, separator: str = ",") -> str:
11
- r"""Export a TableData object to CSV/TSV format.
12
-
13
- Args:
14
- table: TableData object containing DataFrame
15
- separator: Field separator ("," for CSV, "\t" for TSV)
16
-
17
- Returns:
18
- String representation in CSV/TSV format
19
- """
20
11
  if "df" not in table or table["df"] is None:
21
12
  return ""
22
13
 
@@ -27,26 +18,10 @@ def export_table_to_csv(table: TableData, separator: str = ",") -> str:
27
18
 
28
19
 
29
20
  def export_table_to_tsv(table: TableData) -> str:
30
- """Export a TableData object to TSV format.
31
-
32
- Args:
33
- table: TableData object containing DataFrame
34
-
35
- Returns:
36
- String representation in TSV format
37
- """
38
21
  return export_table_to_csv(table, separator="\t")
39
22
 
40
23
 
41
24
  def enhance_table_markdown(table: TableData) -> str:
42
- """Generate enhanced markdown table with better formatting.
43
-
44
- Args:
45
- table: TableData object
46
-
47
- Returns:
48
- Enhanced markdown table string
49
- """
50
25
  if "df" not in table or table["df"] is None:
51
26
  return table.get("text", "")
52
27
 
@@ -72,7 +47,6 @@ def enhance_table_markdown(table: TableData) -> str:
72
47
 
73
48
 
74
49
  def _generate_separator_row(df: Any) -> str:
75
- """Generate separator row with proper alignment hints."""
76
50
  separators = []
77
51
  for col in df.columns:
78
52
  dtype_str = str(df[col].dtype)
@@ -84,7 +58,6 @@ def _generate_separator_row(df: Any) -> str:
84
58
 
85
59
 
86
60
  def _analyze_float_columns(df: Any) -> dict[str, str]:
87
- """Analyze float columns to determine formatting strategy."""
88
61
  float_col_formatting = {}
89
62
  for col in df.columns:
90
63
  dtype_str = str(df[col].dtype)
@@ -103,7 +76,6 @@ def _analyze_float_columns(df: Any) -> dict[str, str]:
103
76
 
104
77
 
105
78
  def _format_table_row(row: Any, df: Any, float_col_formatting: dict[str, str]) -> list[str]:
106
- """Format a single table row with proper value formatting."""
107
79
  formatted_row = []
108
80
  for col_name, value in row.items():
109
81
  if value is None:
@@ -124,7 +96,6 @@ def _format_table_row(row: Any, df: Any, float_col_formatting: dict[str, str]) -
124
96
 
125
97
 
126
98
  def _is_numeric_column(series: Any) -> bool:
127
- """Check if a polars Series contains mostly numeric values."""
128
99
  if len(series) == 0:
129
100
  return False
130
101
 
@@ -161,14 +132,6 @@ def _is_numeric_column(series: Any) -> bool:
161
132
 
162
133
 
163
134
  def generate_table_summary(tables: list[TableData]) -> dict[str, Any]:
164
- """Generate summary statistics for extracted tables.
165
-
166
- Args:
167
- tables: List of TableData objects
168
-
169
- Returns:
170
- Dictionary with table statistics
171
- """
172
135
  if not tables:
173
136
  return {
174
137
  "table_count": 0,
@@ -208,14 +171,6 @@ def generate_table_summary(tables: list[TableData]) -> dict[str, Any]:
208
171
 
209
172
 
210
173
  def extract_table_structure_info(table: TableData) -> dict[str, Any]:
211
- """Extract structural information from a table.
212
-
213
- Args:
214
- table: TableData object
215
-
216
- Returns:
217
- Dictionary with structural information
218
- """
219
174
  info = {
220
175
  "has_headers": False,
221
176
  "row_count": 0,
kreuzberg/_utils/_tmp.py CHANGED
@@ -16,15 +16,6 @@ if TYPE_CHECKING: # pragma: no cover
16
16
  async def create_temp_file(
17
17
  extension: str, content: bytes | None = None
18
18
  ) -> tuple[Path, Callable[[], Coroutine[None, None, None]]]:
19
- """Create a temporary file that is closed.
20
-
21
- Args:
22
- extension: The file extension.
23
- content: The content to write to the file.
24
-
25
- Returns:
26
- The temporary file path.
27
- """
28
19
  file = await run_sync(NamedTemporaryFile, suffix=extension, delete=False)
29
20
  if content:
30
21
  await AsyncPath(file.name).write_bytes(content)
kreuzberg/cli.py CHANGED
@@ -265,7 +265,7 @@ def cli(ctx: click.Context) -> None:
265
265
  @click.option("--easyocr-languages", help="EasyOCR language codes (comma-separated, e.g., 'en,de')")
266
266
  @click.option("--paddleocr-languages", help="PaddleOCR language codes (comma-separated, e.g., 'en,german')")
267
267
  @click.pass_context
268
- def extract(ctx: click.Context) -> None:
268
+ def extract(ctx: click.Context, /, **kwargs: Any) -> None:
269
269
  """Extract text from a document.
270
270
 
271
271
  FILE can be a path to a document or '-' to read from stdin.
@@ -279,7 +279,7 @@ def extract(ctx: click.Context) -> None:
279
279
 
280
280
  extraction_config = build_extraction_config(file_config, cli_args)
281
281
 
282
- result = _perform_extraction(params["file"], extraction_config, params["verbose"])
282
+ result = _perform_extraction(kwargs.get("file"), extraction_config, params["verbose"])
283
283
 
284
284
  _write_output(result, params["output"], params["show_metadata"], params["output_format"], params["verbose"])
285
285
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.13.0
3
+ Version: 3.13.2
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -34,11 +34,12 @@ Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
34
  Requires-Dist: html-to-markdown[lxml]>=1.9.1
35
35
  Requires-Dist: mcp>=1.13.0
36
36
  Requires-Dist: msgspec>=0.18.0
37
+ Requires-Dist: numpy>=1.24.0
37
38
  Requires-Dist: playa-pdf>=0.7.0
38
39
  Requires-Dist: polars>=1.33.0
39
40
  Requires-Dist: psutil>=7.0.0
40
41
  Requires-Dist: pypdfium2==4.30.0
41
- Requires-Dist: python-calamine>=0.3.2
42
+ Requires-Dist: python-calamine>=0.5.2
42
43
  Requires-Dist: python-pptx>=1.0.2
43
44
  Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
44
45
  Provides-Extra: additional-extensions
@@ -0,0 +1,57 @@
1
+ kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
2
+ kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
+ kreuzberg/_chunker.py,sha256=tr9_KUYTSLauFois3MsB-A-0hGcTT8hTQFrqNRTii-I,1373
4
+ kreuzberg/_config.py,sha256=Q5oiJE1XRf8ITuYcO8LZAOB3G2zNlXz2458rgPSth-U,12257
5
+ kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
+ kreuzberg/_document_classification.py,sha256=Mz_s2GJGsEl7MQ-67BPoGYCZibTy9Sw0PScUZKBjKOA,5736
7
+ kreuzberg/_entity_extraction.py,sha256=5YpPnqoJ5aiHd_sy4bN4-Ngiq79RhCV6yaUQE8joGXo,3503
8
+ kreuzberg/_gmft.py,sha256=jKbD7V_KP9XTLjT9SBgSgE3CyDjqbRDm9BAiWV2sAC0,19542
9
+ kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
+ kreuzberg/_mime_types.py,sha256=kGBDSMO4XPgzUKC7iaBeChCtRQXZ9_zXq6eJydejX_k,7739
11
+ kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
+ kreuzberg/_registry.py,sha256=8cPpz3oZVnMwWDT2v_Q7wf-GHd5YuHmc-nkLtvPfE1I,2433
13
+ kreuzberg/_types.py,sha256=D-2d_WG8HyByA163izGhjk7t-e4FL_N-_6UzlVso8Dg,36020
14
+ kreuzberg/cli.py,sha256=nPH4FDW6WkoF4gtH0s4RWmxjAveJ_-Unb6fev6x0Sko,12752
15
+ kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
+ kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
17
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
20
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ kreuzberg/_extractors/_base.py,sha256=i2FvAhRnamEtBb4a-C7pfcdWIXnkEBw0saMQu7h1_RQ,2069
22
+ kreuzberg/_extractors/_email.py,sha256=jn_8J4BASKJ7zFHBG0PgxNe3OT4pjmEM2tTKX8y_0AE,5887
23
+ kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
24
+ kreuzberg/_extractors/_image.py,sha256=UqPoYfvDRX6Rd1yPhcLHJLDw6d2cUzgkqOGjh2eleJM,3301
25
+ kreuzberg/_extractors/_pandoc.py,sha256=-Ai4S1cXs7F6yeonb_7Y7_ZoWHn29E2oP1WlPtM-4HM,22505
26
+ kreuzberg/_extractors/_pdf.py,sha256=Yv_c3xYzrGAjgTbwCGqbiQTDLjIUP_Pu7Z3GmMOqgqg,17865
27
+ kreuzberg/_extractors/_presentation.py,sha256=ULGkt7dzeA9sYSEhpAucKZmkdv9EubzeZtOjoLP3Z2E,6994
28
+ kreuzberg/_extractors/_spread_sheet.py,sha256=x25u2M-ufxpDd7_qrjhMEz1yFftIcOISE1qwPW09Zm0,11962
29
+ kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
30
+ kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
31
+ kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
32
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
33
+ kreuzberg/_ocr/_base.py,sha256=5ef2g8JuSaZF2sDiAmoaODHbeG4MT0LtNzbtW0n9BnU,1445
34
+ kreuzberg/_ocr/_easyocr.py,sha256=XbgpGt5tkE4xHleIGvV1cHlpOQTp43rSXBO1CyIyKTg,14599
35
+ kreuzberg/_ocr/_paddleocr.py,sha256=58sKOHfKCHGFJNlRLrJwey8G_7xbsAAPBXB4n3hKc7k,14052
36
+ kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
37
+ kreuzberg/_ocr/_tesseract.py,sha256=xGML3ygY5xMN5T3YznrKDVAH_DWfaFiteFBo_-GpjCs,48931
38
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ kreuzberg/_utils/_cache.py,sha256=S6Oc4TJamiuuWeJ2ABxDFbbQh4o8w38AUyZeBEc1NN8,12767
40
+ kreuzberg/_utils/_device.py,sha256=UxGkSTN3Up-Zn43CSyvf8CozW2xAF05Cm01LWA2FZmg,8263
41
+ kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
42
+ kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
43
+ kreuzberg/_utils/_ocr_cache.py,sha256=8_-qmPlK2adQKsH4OO4Mlk8wmqBMl3XxkcV_NsXVyFs,3501
44
+ kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
45
+ kreuzberg/_utils/_process_pool.py,sha256=9dPMD_gBocQ5VaeCIrlSJfPXKyXNuyKaATmqOPExxiE,6723
46
+ kreuzberg/_utils/_quality.py,sha256=f7NbyZysyJQD8jKCNWhogvluU9A7GdEYhMsDBeMbGAA,5412
47
+ kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
48
+ kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
49
+ kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
50
+ kreuzberg/_utils/_sync.py,sha256=OWiciXPTGHIxgiGoHI2AglZ1siTNT-nU_JCgHPNzzHk,2196
51
+ kreuzberg/_utils/_table.py,sha256=R-6owHjvcvHGhem_vDsFH7S2yMHGoUUO2PFcj-Idptk,6361
52
+ kreuzberg/_utils/_tmp.py,sha256=wnOInBkcuQoxI1vBLvNv9NqbRCEu9Y03qfOjqQuAk3s,841
53
+ kreuzberg-3.13.2.dist-info/METADATA,sha256=c1w8iB_Frnzr0DHY-X-a9rk5S9vQPICPIniPzwfvHV8,12127
54
+ kreuzberg-3.13.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
55
+ kreuzberg-3.13.2.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
56
+ kreuzberg-3.13.2.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
57
+ kreuzberg-3.13.2.dist-info/RECORD,,
@@ -1,56 +0,0 @@
1
- kreuzberg/__init__.py,sha256=Oh_NTp8wf0BlvD8CSBad2A493nEWH4jTE0x8v7v1Y9w,1341
2
- kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
- kreuzberg/_chunker.py,sha256=y4-dX6ILjjBkkC1gkCzXb7v7vbi8844m7vz1gIzbmv4,1952
4
- kreuzberg/_config.py,sha256=dSTumnpleMeUjUabWgAH7WlhTkdNG3eeMv8FSFmUaEI,15776
5
- kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
- kreuzberg/_document_classification.py,sha256=NZ-6tQtVa1OgigC7xf30hAsnL5_gi9ak9X2XYdsCfTI,6361
7
- kreuzberg/_entity_extraction.py,sha256=QFIPQ_fovEnEezpS6W4pwpjTA2PqS7TUCD9AKf8sAyc,4666
8
- kreuzberg/_gmft.py,sha256=60WpPTf7jocU-kmkBe-pBytl7l58aQzd-Aw2_Hlioug,21481
9
- kreuzberg/_language_detection.py,sha256=yLUliJOUyofVma_q6FwzG9Ck4-XX3AEjxleTHrqi8R4,2445
10
- kreuzberg/_mime_types.py,sha256=fwtPKtp2XhCLT686qF26PBMeOqcVJroKPwkp7JgaM0E,8462
11
- kreuzberg/_playa.py,sha256=1viLRqgcDWvaPo5ZsDPO2gqHFSBApOYortTV_SPVK9k,12190
12
- kreuzberg/_registry.py,sha256=wGSlkS0U1zqruWQCLE95vj4a2mw1yyvf0j6rgz80sJg,3473
13
- kreuzberg/_types.py,sha256=WFUFY1S7SL7kTfHCX-zGASLYT94FxLD71C9vGUzFOiA,38922
14
- kreuzberg/cli.py,sha256=MLeWoMcLoN6WnkbyRbOY-2dqp-vNZf7Nb-K_R5F5CoU,12730
15
- kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
- kreuzberg/extraction.py,sha256=jiMKiDyTf3sHyk76sMffHR-eH-_yg-DFRMuXEKufRYI,17649
17
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- kreuzberg/_api/main.py,sha256=JALYRD0qwyoZloWk5dNNuslBtG4GlVNc0G2oADm6cAc,7578
20
- kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- kreuzberg/_extractors/_base.py,sha256=EZTEJzwJxwu_yYFQ5QlZVNQMPCcli7yyUB4T5mFotCY,4209
22
- kreuzberg/_extractors/_email.py,sha256=mVi_VDmiFhe6NgiWxJDYt4DQiP6jVs5dP8BsPClm3WQ,6108
23
- kreuzberg/_extractors/_html.py,sha256=NyQKChNLvaSUC_5x1qTYlIQGwL4lEbgUF7BgH9ejEVY,1583
24
- kreuzberg/_extractors/_image.py,sha256=UZEOmKNAS4KjaX38iYq2Ux6Mta3juCF1MzWNeBxpPE8,3414
25
- kreuzberg/_extractors/_pandoc.py,sha256=zumwImIXwD3ziPhYxt0EQct5sSMy5lQiY6KnPSDxBTU,24183
26
- kreuzberg/_extractors/_pdf.py,sha256=766O7rXAeAJ42vPpWbGpW_WgHXm48eWwX09l3aqjKeM,18064
27
- kreuzberg/_extractors/_presentation.py,sha256=BJdEM9jsuAd0vb-PIRwNMcRj4xVjItb5kpOpnjsCBi0,10175
28
- kreuzberg/_extractors/_spread_sheet.py,sha256=wqAV-Stqfd4hXs5ock-chqBEdzv4voSgT1uFUO1cIU0,12075
29
- kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
30
- kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
31
- kreuzberg/_mcp/server.py,sha256=iYJG6g0u7I6mWtC4R1XlxydBrPpgnp5dGJzpm9QAZig,8438
32
- kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
33
- kreuzberg/_ocr/_base.py,sha256=IkONqwG6zxZoVMni1JlYugBoyONahlRny7J2_7Dy69c,3953
34
- kreuzberg/_ocr/_easyocr.py,sha256=CtiHGx_BmuUwZhC7bScYF9mwnAxRrLWJ-X70fuwFTjk,14079
35
- kreuzberg/_ocr/_paddleocr.py,sha256=wCuIQ_yxPWE9hukiehYNRdt00Rb2h6pWdfqPS8hI2s0,14297
36
- kreuzberg/_ocr/_table_extractor.py,sha256=MeQLQn_bRco5OAcUoy613ZbZLCDBRJY8uHH_bUBSP8I,7613
37
- kreuzberg/_ocr/_tesseract.py,sha256=i_UTjOmrFxZbtmXxrQIsE78wtZLTyZph0i0jDQc4EMA,56916
38
- kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- kreuzberg/_utils/_cache.py,sha256=fDqFp_54-Kyn3_4VkXkhovvNIB2osKqXlS13MlORrU8,14539
40
- kreuzberg/_utils/_device.py,sha256=35xQvrLSPISJlWicQGknoBjkwdalwVxiJbzyxwuwOVo,9747
41
- kreuzberg/_utils/_document_cache.py,sha256=CpCdJVd8SYLjfwm0ozSM8mx5x8i9vVDet3BlEUpzuZY,6920
42
- kreuzberg/_utils/_errors.py,sha256=ctD-s1q7vbEgqHQ3OVJiEOODDLTd2LvrM3z6o37zrGI,6395
43
- kreuzberg/_utils/_pdf_lock.py,sha256=mHB1A4Fo_nSfgdqUNEWODH9b5tNFqpEHcNE6rT41dGE,1886
44
- kreuzberg/_utils/_process_pool.py,sha256=ebuMPmHXPkWaLWjgAkeaONvAZo974PhfENN8pnPTCco,8415
45
- kreuzberg/_utils/_quality.py,sha256=m3SIXGDY9pfRmh3XeKdZWT1vBz7issH0SfKsutEuRxw,5833
46
- kreuzberg/_utils/_ref.py,sha256=uP_S3x0AQH2Nyjo1tYEj7N_u9hGzYVewdjch6a8Fv5I,1458
47
- kreuzberg/_utils/_serialization.py,sha256=duKP5OuBvi-m6ljQOhoyuJU7sl2WPnov8yJDpYuDArw,2052
48
- kreuzberg/_utils/_string.py,sha256=yrcwHHl23FxWrNoFXkmR3icgivfvbLRvkqQek8F3qqI,5020
49
- kreuzberg/_utils/_sync.py,sha256=mc-K2y_sc6mG-HOswlHTXAWaEzgisEERvq9PPw2dAw4,4869
50
- kreuzberg/_utils/_table.py,sha256=dYM_dWNHRCXcWOhSQBnahOJaBXyuQFyYX9arHrH4TF8,7555
51
- kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
52
- kreuzberg-3.13.0.dist-info/METADATA,sha256=896BWDLD6ApGiOQFKXMqQezC4qgKRUxjMqbZVWxBoJ0,12098
53
- kreuzberg-3.13.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
54
- kreuzberg-3.13.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
55
- kreuzberg-3.13.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
56
- kreuzberg-3.13.0.dist-info/RECORD,,