kreuzberg 3.17.3__py3-none-any.whl → 3.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kreuzberg
3
- Version: 3.17.3
3
+ Version: 3.19.0
4
4
  Summary: Document intelligence framework for Python - Extract text, metadata, and structured data from diverse file formats
5
5
  Project-URL: documentation, https://kreuzberg.dev
6
6
  Project-URL: homepage, https://github.com/Goldziher/kreuzberg
@@ -28,12 +28,12 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
28
  Classifier: Topic :: Text Processing :: General
29
29
  Classifier: Typing :: Typed
30
30
  Requires-Python: >=3.10
31
- Requires-Dist: anyio>=4.10.0
31
+ Requires-Dist: anyio>=4.11.0
32
32
  Requires-Dist: chardetng-py>=0.3.5
33
33
  Requires-Dist: exceptiongroup>=1.2.2; python_version < '3.11'
34
- Requires-Dist: html-to-markdown[lxml]>=1.14.0
34
+ Requires-Dist: html-to-markdown[lxml]>=1.16.0
35
35
  Requires-Dist: langcodes>=3.5.0
36
- Requires-Dist: mcp>=1.14.1
36
+ Requires-Dist: mcp>=1.15.0
37
37
  Requires-Dist: msgspec>=0.18.0
38
38
  Requires-Dist: numpy>=2.0.0
39
39
  Requires-Dist: playa-pdf>=0.7.0
@@ -42,6 +42,7 @@ Requires-Dist: psutil>=7.1.0
42
42
  Requires-Dist: pypdfium2==4.30.0
43
43
  Requires-Dist: python-calamine>=0.5.3
44
44
  Requires-Dist: python-pptx>=1.0.2
45
+ Requires-Dist: transformers>=4.30.0
45
46
  Requires-Dist: typing-extensions>=4.15.0; python_version < '3.12'
46
47
  Provides-Extra: additional-extensions
47
48
  Requires-Dist: mailparse>=1.0.15; extra == 'additional-extensions'
@@ -63,6 +64,7 @@ Requires-Dist: semantic-text-splitter>=0.28.0; extra == 'all'
63
64
  Requires-Dist: setuptools>=80.9.0; extra == 'all'
64
65
  Requires-Dist: spacy>=3.8.7; extra == 'all'
65
66
  Requires-Dist: tomli>=2.0.0; (python_version < '3.11') and extra == 'all'
67
+ Requires-Dist: transformers>=4.25.0; extra == 'all'
66
68
  Provides-Extra: api
67
69
  Requires-Dist: litestar[opentelemetry,standard,structlog]>=2.17.0; extra == 'api'
68
70
  Provides-Extra: chunking
@@ -82,6 +84,7 @@ Requires-Dist: keybert>=0.9.0; extra == 'entity-extraction'
82
84
  Requires-Dist: spacy>=3.8.7; extra == 'entity-extraction'
83
85
  Provides-Extra: gmft
84
86
  Requires-Dist: gmft>=0.4.2; extra == 'gmft'
87
+ Requires-Dist: transformers>=4.25.0; extra == 'gmft'
85
88
  Provides-Extra: langdetect
86
89
  Requires-Dist: fast-langdetect>=1.0.0; extra == 'langdetect'
87
90
  Provides-Extra: paddleocr
@@ -4,30 +4,31 @@ kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
4
  kreuzberg/_config.py,sha256=ZYIcnJAjDnbWW_2WBy7NlOk1Ol6WpoMG5FMNMmHpqSY,13086
5
5
  kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
6
  kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
7
- kreuzberg/_entity_extraction.py,sha256=NuGcmIU-gBfzKmrhk6AcO6angCeUbML8REKPp7CE8sc,4710
7
+ kreuzberg/_entity_extraction.py,sha256=Ks-1gZIYDqgg2uJerd0FH_lYhjIwS0f0bMVhR9M59jA,7518
8
+ kreuzberg/_error_handling.py,sha256=Isr9yrY4JRKOmUVaUOky_LZ7tGVZAm8jxRD3qGbkc1g,5604
8
9
  kreuzberg/_gmft.py,sha256=gfRXOsv-K9R7Y0zZ2SUa5wid3FpP2eFIlg5nepWcz1Q,20827
9
- kreuzberg/_language_detection.py,sha256=y48gNaexnC6OIVTh3yBjXDumMeIKMggCDuacoXa7AvU,1080
10
+ kreuzberg/_language_detection.py,sha256=4JzQldcDIVZRWUzRFc9AOFiq6Wfl9858mip1ZnrD2Ks,1143
10
11
  kreuzberg/_mime_types.py,sha256=duEMDBg_qIf9A02tXAC_2znD-wgE-2BBMW9ofyYTJjE,8622
11
12
  kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
13
  kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
- kreuzberg/_types.py,sha256=ttY61QI8mruCI70Af3owlU-O5LdvQ6gOqIZTGQ9PaVs,49129
14
- kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
14
+ kreuzberg/_types.py,sha256=6oBsmUUihVr4hJJrYeuWoUVzCP_-eciCrBVvGQHQTDI,49920
15
+ kreuzberg/cli.py,sha256=P_dqOHbGh-fFYZ4WErjngTKq7wbqaUmTD1Gjw2lIsDI,15242
15
16
  kreuzberg/exceptions.py,sha256=KiGAfIX3_TkGYG1h9eTZ_E_pALsAqhZ_A3XfhwxwaS0,2909
16
- kreuzberg/extraction.py,sha256=ArsmHcJDvjx9Cog3IQ0D52oS9GbaH_Yhs5mfJfGgiaM,18982
17
+ kreuzberg/extraction.py,sha256=jMsomvg7SPnuXLGZKQl0YH64D0AhczSNDM4CKORd9d0,24185
17
18
  kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
19
  kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
20
  kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
- kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
21
+ kreuzberg/_api/main.py,sha256=tmg1fICU4wshq0XXhGOk22oivfXjELtsEgOumdkZNI4,15257
21
22
  kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- kreuzberg/_extractors/_base.py,sha256=4MRBXdLsgdtdrTuupWb2IT9YpRSnNPpWWviS2mfeOXg,9961
23
+ kreuzberg/_extractors/_base.py,sha256=99r-CUZcAp72c0mqkj-E41lj0SyzNaTb_w2EtKgfGJ8,9934
23
24
  kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
24
- kreuzberg/_extractors/_html.py,sha256=TXXgwQZuEvnrny5HdBpn8oikGktyxgY9jvgZmnFtnqY,6371
25
+ kreuzberg/_extractors/_html.py,sha256=vNAgBrfok-16SOkhhsy10unqVwAczlTL_2KEn2X6S98,6315
25
26
  kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
27
  kreuzberg/_extractors/_pandoc.py,sha256=cwthr--IFwbu8r0rCZ_Cx5zRlan94yuqt5e3mjYxesE,24182
27
- kreuzberg/_extractors/_pdf.py,sha256=GFy7xHUH09i48E5Xixy6nReF_uBu9646UTjywKoH-Rs,23304
28
+ kreuzberg/_extractors/_pdf.py,sha256=_MPtO_8BCpyAXyIWusmfqOaEsPMDxucjTQKz3cTaj8o,22663
28
29
  kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
29
30
  kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
30
- kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
31
+ kreuzberg/_extractors/_structured.py,sha256=thpXhsBnvaHzGQX4sy6eVHowFv0yaYxLGHwxx4DouCI,8947
31
32
  kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
33
  kreuzberg/_mcp/server.py,sha256=71MhjiFDwgFROdGejf0djgO1eG370qudWmZsN59CUeA,16743
33
34
  kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
@@ -35,7 +36,7 @@ kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
36
  kreuzberg/_ocr/_easyocr.py,sha256=bHz2S_8nNHaPHPemcJK-U0al9_qP-vUmWE4ECVlf7AA,15485
36
37
  kreuzberg/_ocr/_paddleocr.py,sha256=CV9cCjkRe-3cNJ5tRu_sBXd_HNghEwfPIgWwxAZTeRY,15026
37
38
  kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
- kreuzberg/_ocr/_tesseract.py,sha256=1SEfrX_JvU6KIeWt31GsRWnNmjaAh3xgQaRMPvoZLJA,51349
39
+ kreuzberg/_ocr/_tesseract.py,sha256=Uu6H1LMh1WSC1OmKhPx-miG98r9KEfc0GF7b8isS33E,52420
39
40
  kreuzberg/_token_reduction/__init__.py,sha256=y_2WgPxJes8_PD-VMfx7vQT0hGjFIixzS8PjaIseAGg,311
40
41
  kreuzberg/_token_reduction/_reducer.py,sha256=shAfMPznP69sTSzwX_bE1LpcBmoia9cpd7r6bSc4R5Q,13609
41
42
  kreuzberg/_token_reduction/_stopwords.py,sha256=mu-5CapG0RCP7LYzjhdTM6WWLtmt3cjZ08OOsyQkJVg,3608
@@ -121,8 +122,8 @@ kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4
121
122
  kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
122
123
  kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
123
124
  kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
124
- kreuzberg-3.17.3.dist-info/METADATA,sha256=XXgXKaiujoGAGsCn-skmPDij6vcQ9XqwbA1LBpX_Pvw,12351
125
- kreuzberg-3.17.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
126
- kreuzberg-3.17.3.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
127
- kreuzberg-3.17.3.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
128
- kreuzberg-3.17.3.dist-info/RECORD,,
125
+ kreuzberg-3.19.0.dist-info/METADATA,sha256=fV1j2iWA2-rcZodFFV3kmSsuBJhoDsW6OuyIu9Myf4A,12492
126
+ kreuzberg-3.19.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
127
+ kreuzberg-3.19.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
128
+ kreuzberg-3.19.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
129
+ kreuzberg-3.19.0.dist-info/RECORD,,