kreuzberg 3.6.2__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kreuzberg/_extractors/_base.py +40 -0
- kreuzberg/_extractors/_email.py +149 -0
- kreuzberg/_extractors/_html.py +15 -3
- kreuzberg/_extractors/_image.py +17 -18
- kreuzberg/_extractors/_pdf.py +68 -14
- kreuzberg/_extractors/_presentation.py +62 -10
- kreuzberg/_extractors/_spread_sheet.py +179 -4
- kreuzberg/_extractors/_structured.py +148 -0
- kreuzberg/_gmft.py +2 -2
- kreuzberg/_mcp/__init__.py +5 -0
- kreuzberg/_mcp/server.py +227 -0
- kreuzberg/_mime_types.py +27 -1
- kreuzberg/_multiprocessing/__init__.py +2 -3
- kreuzberg/_ocr/__init__.py +30 -0
- kreuzberg/{_multiprocessing/tesseract_pool.py → _ocr/_pool.py} +3 -5
- kreuzberg/_ocr/_sync.py +566 -0
- kreuzberg/_ocr/_tesseract.py +6 -2
- kreuzberg/_registry.py +4 -0
- kreuzberg/_types.py +131 -0
- kreuzberg/_utils/_cache.py +17 -2
- kreuzberg/_utils/_process_pool.py +178 -1
- kreuzberg/_utils/_quality.py +237 -0
- kreuzberg/_utils/_serialization.py +4 -2
- kreuzberg/_utils/_string.py +153 -10
- kreuzberg/_utils/_sync.py +5 -2
- kreuzberg/_utils/_table.py +261 -0
- {kreuzberg-3.6.2.dist-info → kreuzberg-3.8.0.dist-info}/METADATA +116 -48
- kreuzberg-3.8.0.dist-info/RECORD +57 -0
- {kreuzberg-3.6.2.dist-info → kreuzberg-3.8.0.dist-info}/entry_points.txt +1 -0
- kreuzberg/_multiprocessing/process_manager.py +0 -189
- kreuzberg/_multiprocessing/sync_easyocr.py +0 -235
- kreuzberg/_multiprocessing/sync_paddleocr.py +0 -199
- kreuzberg/_multiprocessing/sync_tesseract.py +0 -261
- kreuzberg-3.6.2.dist-info/RECORD +0 -54
- {kreuzberg-3.6.2.dist-info → kreuzberg-3.8.0.dist-info}/WHEEL +0 -0
- {kreuzberg-3.6.2.dist-info → kreuzberg-3.8.0.dist-info}/licenses/LICENSE +0 -0
kreuzberg-3.6.2.dist-info/RECORD
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
kreuzberg/__init__.py,sha256=wVxbug-w1cO2xHcP04Bf6QeIKmT2Ep6aeenb8EOYLA0,1534
|
2
|
-
kreuzberg/__main__.py,sha256=s2qM1nPEkRHAQP-G3P7sf5l6qA_KJeIEHS5LpPz04lg,183
|
3
|
-
kreuzberg/_chunker.py,sha256=2eHSRHcZdJ2ZjR3in49y3o9tPl5HMO3vkbnMqaVCbHI,1887
|
4
|
-
kreuzberg/_cli_config.py,sha256=WD_seFjbuay_NJv77vGLBW6BVV9WZNujdzf3zQkhzPc,5691
|
5
|
-
kreuzberg/_constants.py,sha256=Bxc8oiN-wHwnWXT9bEiJhTUcu1ygPpra5qHirAif3b4,191
|
6
|
-
kreuzberg/_entity_extraction.py,sha256=EIasBGpkZ-3FwivjEpisz23LilTwx8os-IbfrDtzNl4,7815
|
7
|
-
kreuzberg/_gmft.py,sha256=e-UpYwizRX_V-dn0a7ja0Z9nShAmDKA1Q7HThJy8cyA,14856
|
8
|
-
kreuzberg/_language_detection.py,sha256=22-uXoOu_ws0K8Hz2M7U_SF9QX3npRYLhntAE1dNLFU,3283
|
9
|
-
kreuzberg/_mime_types.py,sha256=QgX-k8aI4lTKArObDM0TFPt7DUjUVwWrdIaIZDh_XQY,7815
|
10
|
-
kreuzberg/_playa.py,sha256=rU6ii2Qnrj8tkDYlSiab5h-BCYLJnUg4QwSLVDEXF5g,11883
|
11
|
-
kreuzberg/_registry.py,sha256=c2B_PJbaL0q3ab2eNmj_0jldeyMaqgvRwkZqUU4MM5Q,3290
|
12
|
-
kreuzberg/_types.py,sha256=U72a4SXS1e-zV8cXG0tiozMy9mX9wFM1ma6sVz7HpJo,9936
|
13
|
-
kreuzberg/cli.py,sha256=S0w2nGXBWPFn1NhxppW7dpUwB9f_3ymFuWSAB2aRu9g,12465
|
14
|
-
kreuzberg/exceptions.py,sha256=xRaiJh11i8E6Nc-gAQPgNW5xvhiiFBhRS-CBbCEbHQM,2881
|
15
|
-
kreuzberg/extraction.py,sha256=mdH45bMAAUUNXYT7UrNyWJ2oD_gXuLUU-NyuYxQM884,17459
|
16
|
-
kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
-
kreuzberg/_api/main.py,sha256=kZCMPPzP4BGzEege9pdhQTJPKKVjCaC6kZdMMeaqP2M,2599
|
19
|
-
kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
|
-
kreuzberg/_extractors/_base.py,sha256=YUr6A2n34LlFzbYQkiKqhXAphL9RYrvAls5SlkoQqNg,3028
|
21
|
-
kreuzberg/_extractors/_html.py,sha256=L_vcKyv1ObI6irPaD3-WTKqxeRfZA4Rhsl3zUiAe_ws,1312
|
22
|
-
kreuzberg/_extractors/_image.py,sha256=pYfh3x9CkiIxOLvp0jkkZcmLbB_FpdfDo01klSc6OzQ,4819
|
23
|
-
kreuzberg/_extractors/_pandoc.py,sha256=oQ4DgQSPoX1LXjGAKh_A40JHqiKWb91LeRBYSS_6EUA,26750
|
24
|
-
kreuzberg/_extractors/_pdf.py,sha256=R33ggTd0IU6NsEnzgHFTr9ScgcnM8nIIstDq7XMVcvg,14792
|
25
|
-
kreuzberg/_extractors/_presentation.py,sha256=ZX-EKQppHwvKtyKk0-IQVF6QAqJi0SfGgCiiyqMQh0w,8701
|
26
|
-
kreuzberg/_extractors/_spread_sheet.py,sha256=HOzCeYQc6kaMveAHfi80LrsF0yU7Kn74aKQ7lrMAlo8,6480
|
27
|
-
kreuzberg/_multiprocessing/__init__.py,sha256=nwYQpKH7ixHwzkQbTMFCstOCBKktmbNq5dTrwI2Mn94,203
|
28
|
-
kreuzberg/_multiprocessing/gmft_isolated.py,sha256=ZfbhiL5bhBEJnibUSls3WV-FECrnU9VvKfq5O2foHcc,11191
|
29
|
-
kreuzberg/_multiprocessing/process_manager.py,sha256=_qtB8y9td2coJevlIl4z6F__jau320RdI1lqdyuaeD4,6061
|
30
|
-
kreuzberg/_multiprocessing/sync_easyocr.py,sha256=-3_Ol0H8G6RhPxTbTPvoe8fTsTz3e-dg2QbHHnoJL48,7693
|
31
|
-
kreuzberg/_multiprocessing/sync_paddleocr.py,sha256=5558iTjPXCyJWuyhZckmuJLadUwJDb5YVC8Cv-FOaWg,6090
|
32
|
-
kreuzberg/_multiprocessing/sync_tesseract.py,sha256=Ck1PvHGWOMQWUcC7RyVrBt8K9VDFQ0lQcwFkwYzl3rE,8240
|
33
|
-
kreuzberg/_multiprocessing/tesseract_pool.py,sha256=UN7BtS_ib1ux9xuR6d6AB3PY7UEUhd-5Ti1n1H0UnYw,10945
|
34
|
-
kreuzberg/_ocr/__init__.py,sha256=VTqwKDlIRbjve71Y11Ztygyhv5aWG9LWTj8iX66ANxE,533
|
35
|
-
kreuzberg/_ocr/_base.py,sha256=lNT0Tin4hzbmaamqqySxvYEwNtrJB5gGlStrANQQcyc,1637
|
36
|
-
kreuzberg/_ocr/_easyocr.py,sha256=90Dv1xaLXbpG7EtmRQE5ykvnhqZJR3xSFXlxFMCSVSI,13740
|
37
|
-
kreuzberg/_ocr/_paddleocr.py,sha256=UvugDdZd7RojHUiFeBaI8aqz36ecegPLj2v6oT6c42g,13776
|
38
|
-
kreuzberg/_ocr/_tesseract.py,sha256=3s3MkZN9xA_Uedx4s2p5m4IEIMhGjs9gYHxan9Iz-2g,13044
|
39
|
-
kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
-
kreuzberg/_utils/_cache.py,sha256=JGiwwcNBoD950IbsPUUAD5gAGS7byUuz0BqYSneVakc,13088
|
41
|
-
kreuzberg/_utils/_device.py,sha256=rnaSSB5ibf2wr7EDxrcmOUZ4Ocor0pHkwb3N1pC46EY,10276
|
42
|
-
kreuzberg/_utils/_document_cache.py,sha256=z8irioKsOu8xve1YgHatm__wIFvs9I1gDK3tLNsNyqM,6926
|
43
|
-
kreuzberg/_utils/_errors.py,sha256=AV3oaRQDgJxe1YUZd9pCQUysUv9KW8Ib37MvnyFOZ4o,6386
|
44
|
-
kreuzberg/_utils/_pdf_lock.py,sha256=nqxAYCNlfWDrJtP4ZNu57st1YnkDl-gYXdr0q8nv0kA,1961
|
45
|
-
kreuzberg/_utils/_process_pool.py,sha256=-0SNP01Qz21D7hgJmN0eHoqKusSygwPbi1U7IzJlPio,2895
|
46
|
-
kreuzberg/_utils/_serialization.py,sha256=AhZvyAu4KsjAqyZDh--Kn2kSWGgCuH7udio8lTklO0g,2132
|
47
|
-
kreuzberg/_utils/_string.py,sha256=owIVkUtP0__GiJD9RIJzPdvyIigT5sQho3mOXPbsnW0,958
|
48
|
-
kreuzberg/_utils/_sync.py,sha256=oT4Y_cDBKtE_BFEoLTae3rSisqlYXzW-jlUG_x-dmLM,4725
|
49
|
-
kreuzberg/_utils/_tmp.py,sha256=hVn-VVijIg2FM7EZJ899gc7wZg-TGoJZoeAcxMX-Cxg,1044
|
50
|
-
kreuzberg-3.6.2.dist-info/METADATA,sha256=shguv5yge8FkD9aT0x02dRdLpuLi1PW4SmczFYiILmU,9910
|
51
|
-
kreuzberg-3.6.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
52
|
-
kreuzberg-3.6.2.dist-info/entry_points.txt,sha256=VdoFaTl3QSvVWOZcIlPpDd47o6kn7EvmXSs8FI0ExLc,48
|
53
|
-
kreuzberg-3.6.2.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
|
54
|
-
kreuzberg-3.6.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|