kreuzberg 3.11.1__py3-none-any.whl → 3.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kreuzberg/_gmft.py CHANGED
@@ -444,14 +444,26 @@ def _extract_tables_in_process(
444
444
  cropped_image.save(img_bytes, format="PNG")
445
445
  img_bytes.seek(0)
446
446
 
447
- results.append(
448
- {
449
- "cropped_image_bytes": img_bytes.getvalue(),
450
- "page_number": cropped_table.page.page_number,
451
- "text": data_frame.to_markdown(),
452
- "df_csv": data_frame.to_csv(index=False),
453
- }
454
- )
447
+ if data_frame.empty:
448
+ results.append(
449
+ {
450
+ "cropped_image_bytes": img_bytes.getvalue(),
451
+ "page_number": cropped_table.page.page_number,
452
+ "text": data_frame.to_markdown(),
453
+ "df_columns": data_frame.columns.tolist(),
454
+ "df_csv": None,
455
+ }
456
+ )
457
+ else:
458
+ results.append(
459
+ {
460
+ "cropped_image_bytes": img_bytes.getvalue(),
461
+ "page_number": cropped_table.page.page_number,
462
+ "text": data_frame.to_markdown(),
463
+ "df_columns": None,
464
+ "df_csv": data_frame.to_csv(index=False),
465
+ }
466
+ )
455
467
 
456
468
  result_queue.put((True, results))
457
469
 
@@ -532,7 +544,10 @@ def _extract_tables_isolated(
532
544
  img = Image.open(io.BytesIO(table_dict["cropped_image_bytes"]))
533
545
  import pandas as pd # noqa: PLC0415
534
546
 
535
- df = pd.read_csv(StringIO(table_dict["df_csv"]))
547
+ if table_dict["df_csv"] is None:
548
+ df = pd.DataFrame(columns=table_dict["df_columns"])
549
+ else:
550
+ df = pd.read_csv(StringIO(table_dict["df_csv"]))
536
551
 
537
552
  tables.append(
538
553
  TableData(
@@ -638,7 +653,10 @@ async def _extract_tables_isolated_async(
638
653
  img = Image.open(io.BytesIO(table_dict["cropped_image_bytes"]))
639
654
  import pandas as pd # noqa: PLC0415
640
655
 
641
- df = pd.read_csv(StringIO(table_dict["df_csv"]))
656
+ if table_dict["df_csv"] is None:
657
+ df = pd.DataFrame(columns=table_dict["df_columns"])
658
+ else:
659
+ df = pd.read_csv(StringIO(table_dict["df_csv"]))
642
660
 
643
661
  tables.append(
644
662
  TableData(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.11.1
3
+ Version: 3.11.3
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -32,7 +32,7 @@ Requires-Dist: anyio>=4.10.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
34
  Requires-Dist: html-to-markdown[lxml]>=1.9.0
35
- Requires-Dist: mcp>=1.12.4
35
+ Requires-Dist: mcp>=1.13.0
36
36
  Requires-Dist: msgspec>=0.18.0
37
37
  Requires-Dist: playa-pdf>=0.7.0
38
38
  Requires-Dist: psutil>=7.0.0
@@ -52,9 +52,9 @@ Requires-Dist: gmft>=0.4.2; extra == 'all'
52
52
  Requires-Dist: keybert>=0.9.0; extra == 'all'
53
53
  Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'all'
54
54
  Requires-Dist: mailparse>=1.0.15; extra == 'all'
55
- Requires-Dist: paddleocr>=3.1.0; extra == 'all'
56
- Requires-Dist: paddlepaddle>=3.1.0; extra == 'all'
57
- Requires-Dist: pandas>=2.3.1; extra == 'all'
55
+ Requires-Dist: paddleocr>=3.2.0; extra == 'all'
56
+ Requires-Dist: paddlepaddle>=3.1.1; extra == 'all'
57
+ Requires-Dist: pandas>=2.3.2; extra == 'all'
58
58
  Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'all'
59
59
  Requires-Dist: rich>=14.1.0; extra == 'all'
60
60
  Requires-Dist: semantic-text-splitter>=0.27.0; extra == 'all'
@@ -73,7 +73,7 @@ Provides-Extra: crypto
73
73
  Requires-Dist: playa-pdf[crypto]>=0.7.0; extra == 'crypto'
74
74
  Provides-Extra: document-classification
75
75
  Requires-Dist: deep-translator>=1.11.4; extra == 'document-classification'
76
- Requires-Dist: pandas>=2.3.1; extra == 'document-classification'
76
+ Requires-Dist: pandas>=2.3.2; extra == 'document-classification'
77
77
  Provides-Extra: easyocr
78
78
  Requires-Dist: easyocr>=1.7.2; extra == 'easyocr'
79
79
  Provides-Extra: entity-extraction
@@ -84,8 +84,8 @@ Requires-Dist: gmft>=0.4.2; extra == 'gmft'
84
84
  Provides-Extra: langdetect
85
85
  Requires-Dist: fast-langdetect>=0.3.2; extra == 'langdetect'
86
86
  Provides-Extra: paddleocr
87
- Requires-Dist: paddleocr>=3.1.0; extra == 'paddleocr'
88
- Requires-Dist: paddlepaddle>=3.1.0; extra == 'paddleocr'
87
+ Requires-Dist: paddleocr>=3.2.0; extra == 'paddleocr'
88
+ Requires-Dist: paddlepaddle>=3.1.1; extra == 'paddleocr'
89
89
  Requires-Dist: setuptools>=80.9.0; extra == 'paddleocr'
90
90
  Description-Content-Type: text/markdown
91
91
 
@@ -5,7 +5,7 @@ kreuzberg/_config.py,sha256=Au521UiR7vcQs_8_hhoWIfmDDMJIrDM3XZUB_qHfCmo,14035
5
5
  kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
6
6
  kreuzberg/_document_classification.py,sha256=qFGmwvUMhnNAvNNJO7E-huPx-Ps-_DWxdNxsozIzgaw,6870
7
7
  kreuzberg/_entity_extraction.py,sha256=Oa1T-9mptimpOHtcda-GtrVYH9PFy7DSJj3thJZUD7k,7902
8
- kreuzberg/_gmft.py,sha256=HdQ7Xpuixxl2Y0jY8C3KfyQEU0mN4yQdqErWCv4TnFY,25573
8
+ kreuzberg/_gmft.py,sha256=6P4gSSmU39puaYAKmdGr9ALf0USYTwRDuvvhG1LmI24,26441
9
9
  kreuzberg/_language_detection.py,sha256=_Ng2aHgPxOHFgd507gVNiIGVmnxxbpgYwsO0bD0yTzg,3315
10
10
  kreuzberg/_mime_types.py,sha256=2warRVqfBUNIg8JBg8yP4pRqaMPvwINosHMkJwtH_Fc,8488
11
11
  kreuzberg/_playa.py,sha256=_IPrUSWwSfDQlWXOpKlauV0D9MhGrujGP5kmQ0U3L0g,12188
@@ -47,8 +47,8 @@ kreuzberg/_utils/_string.py,sha256=bCzO3UO6nXupxvtMWvHqfp1Vd9CTzEH9jmpJXQ7upAU,6
47
47
  kreuzberg/_utils/_sync.py,sha256=7LSavBmxVKQUzdjfx9fYRAI9IbJtRw8iGf_Q8B7RX9g,4923
48
48
  kreuzberg/_utils/_table.py,sha256=IomrfQBP85DZI8RmQjOVs2Siq7VP9FUTYPaZR4t3yRw,8199
49
49
  kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
50
- kreuzberg-3.11.1.dist-info/METADATA,sha256=4b51JDwqoS-gjitz5PEpOlDxZ1-lO2C3BR5X2pec4g0,12136
51
- kreuzberg-3.11.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
- kreuzberg-3.11.1.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
53
- kreuzberg-3.11.1.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
54
- kreuzberg-3.11.1.dist-info/RECORD,,
50
+ kreuzberg-3.11.3.dist-info/METADATA,sha256=JOaIdR8UamST2qu5b0zJnk8qvyg5ly8LI7DKaFszUEs,12136
51
+ kreuzberg-3.11.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
+ kreuzberg-3.11.3.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
53
+ kreuzberg-3.11.3.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
54
+ kreuzberg-3.11.3.dist-info/RECORD,,