kreuzberg 3.15.0__py3-none-any.whl → 3.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. kreuzberg/__init__.py +6 -0
  2. kreuzberg/_api/main.py +0 -53
  3. kreuzberg/_config.py +17 -8
  4. kreuzberg/_document_classification.py +1 -1
  5. kreuzberg/_extractors/_base.py +0 -46
  6. kreuzberg/_extractors/_email.py +16 -10
  7. kreuzberg/_extractors/_html.py +39 -12
  8. kreuzberg/_extractors/_pandoc.py +2 -2
  9. kreuzberg/_extractors/_pdf.py +6 -7
  10. kreuzberg/_extractors/_presentation.py +4 -0
  11. kreuzberg/_extractors/_spread_sheet.py +0 -1
  12. kreuzberg/_extractors/_structured.py +83 -15
  13. kreuzberg/_gmft.py +7 -2
  14. kreuzberg/_mcp/server.py +1 -22
  15. kreuzberg/_mime_types.py +1 -1
  16. kreuzberg/_ocr/_easyocr.py +47 -20
  17. kreuzberg/_ocr/_paddleocr.py +1 -1
  18. kreuzberg/_ocr/_tesseract.py +27 -26
  19. kreuzberg/_token_reduction/__init__.py +11 -0
  20. kreuzberg/_token_reduction/_reducer.py +439 -0
  21. kreuzberg/_token_reduction/_stopwords.py +116 -0
  22. kreuzberg/_token_reduction/stopwords/af_stopwords.json +53 -0
  23. kreuzberg/_token_reduction/stopwords/ar_stopwords.json +482 -0
  24. kreuzberg/_token_reduction/stopwords/bg_stopwords.json +261 -0
  25. kreuzberg/_token_reduction/stopwords/bn_stopwords.json +400 -0
  26. kreuzberg/_token_reduction/stopwords/br_stopwords.json +1205 -0
  27. kreuzberg/_token_reduction/stopwords/ca_stopwords.json +280 -0
  28. kreuzberg/_token_reduction/stopwords/cs_stopwords.json +425 -0
  29. kreuzberg/_token_reduction/stopwords/da_stopwords.json +172 -0
  30. kreuzberg/_token_reduction/stopwords/de_stopwords.json +622 -0
  31. kreuzberg/_token_reduction/stopwords/el_stopwords.json +849 -0
  32. kreuzberg/_token_reduction/stopwords/en_stopwords.json +1300 -0
  33. kreuzberg/_token_reduction/stopwords/eo_stopwords.json +175 -0
  34. kreuzberg/_token_reduction/stopwords/es_stopwords.json +734 -0
  35. kreuzberg/_token_reduction/stopwords/et_stopwords.json +37 -0
  36. kreuzberg/_token_reduction/stopwords/eu_stopwords.json +100 -0
  37. kreuzberg/_token_reduction/stopwords/fa_stopwords.json +801 -0
  38. kreuzberg/_token_reduction/stopwords/fi_stopwords.json +849 -0
  39. kreuzberg/_token_reduction/stopwords/fr_stopwords.json +693 -0
  40. kreuzberg/_token_reduction/stopwords/ga_stopwords.json +111 -0
  41. kreuzberg/_token_reduction/stopwords/gl_stopwords.json +162 -0
  42. kreuzberg/_token_reduction/stopwords/gu_stopwords.json +226 -0
  43. kreuzberg/_token_reduction/stopwords/ha_stopwords.json +41 -0
  44. kreuzberg/_token_reduction/stopwords/he_stopwords.json +196 -0
  45. kreuzberg/_token_reduction/stopwords/hi_stopwords.json +227 -0
  46. kreuzberg/_token_reduction/stopwords/hr_stopwords.json +181 -0
  47. kreuzberg/_token_reduction/stopwords/hu_stopwords.json +791 -0
  48. kreuzberg/_token_reduction/stopwords/hy_stopwords.json +47 -0
  49. kreuzberg/_token_reduction/stopwords/id_stopwords.json +760 -0
  50. kreuzberg/_token_reduction/stopwords/it_stopwords.json +634 -0
  51. kreuzberg/_token_reduction/stopwords/ja_stopwords.json +136 -0
  52. kreuzberg/_token_reduction/stopwords/kn_stopwords.json +84 -0
  53. kreuzberg/_token_reduction/stopwords/ko_stopwords.json +681 -0
  54. kreuzberg/_token_reduction/stopwords/ku_stopwords.json +64 -0
  55. kreuzberg/_token_reduction/stopwords/la_stopwords.json +51 -0
  56. kreuzberg/_token_reduction/stopwords/lt_stopwords.json +476 -0
  57. kreuzberg/_token_reduction/stopwords/lv_stopwords.json +163 -0
  58. kreuzberg/_token_reduction/stopwords/ml_stopwords.json +11 -0
  59. kreuzberg/_token_reduction/stopwords/mr_stopwords.json +101 -0
  60. kreuzberg/_token_reduction/stopwords/ms_stopwords.json +477 -0
  61. kreuzberg/_token_reduction/stopwords/ne_stopwords.json +490 -0
  62. kreuzberg/_token_reduction/stopwords/nl_stopwords.json +415 -0
  63. kreuzberg/_token_reduction/stopwords/no_stopwords.json +223 -0
  64. kreuzberg/_token_reduction/stopwords/pl_stopwords.json +331 -0
  65. kreuzberg/_token_reduction/stopwords/pt_stopwords.json +562 -0
  66. kreuzberg/_token_reduction/stopwords/ro_stopwords.json +436 -0
  67. kreuzberg/_token_reduction/stopwords/ru_stopwords.json +561 -0
  68. kreuzberg/_token_reduction/stopwords/si_stopwords.json +193 -0
  69. kreuzberg/_token_reduction/stopwords/sk_stopwords.json +420 -0
  70. kreuzberg/_token_reduction/stopwords/sl_stopwords.json +448 -0
  71. kreuzberg/_token_reduction/stopwords/so_stopwords.json +32 -0
  72. kreuzberg/_token_reduction/stopwords/st_stopwords.json +33 -0
  73. kreuzberg/_token_reduction/stopwords/sv_stopwords.json +420 -0
  74. kreuzberg/_token_reduction/stopwords/sw_stopwords.json +76 -0
  75. kreuzberg/_token_reduction/stopwords/ta_stopwords.json +129 -0
  76. kreuzberg/_token_reduction/stopwords/te_stopwords.json +54 -0
  77. kreuzberg/_token_reduction/stopwords/th_stopwords.json +118 -0
  78. kreuzberg/_token_reduction/stopwords/tl_stopwords.json +149 -0
  79. kreuzberg/_token_reduction/stopwords/tr_stopwords.json +506 -0
  80. kreuzberg/_token_reduction/stopwords/uk_stopwords.json +75 -0
  81. kreuzberg/_token_reduction/stopwords/ur_stopwords.json +519 -0
  82. kreuzberg/_token_reduction/stopwords/vi_stopwords.json +647 -0
  83. kreuzberg/_token_reduction/stopwords/yo_stopwords.json +62 -0
  84. kreuzberg/_token_reduction/stopwords/zh_stopwords.json +796 -0
  85. kreuzberg/_token_reduction/stopwords/zu_stopwords.json +31 -0
  86. kreuzberg/_types.py +146 -43
  87. kreuzberg/_utils/_html_streaming.py +20 -0
  88. kreuzberg/_utils/_image_preprocessing.py +1 -1
  89. kreuzberg/_utils/_ref.py +14 -6
  90. kreuzberg/_utils/_serialization.py +13 -6
  91. kreuzberg/_utils/_sync.py +15 -16
  92. kreuzberg/exceptions.py +0 -1
  93. kreuzberg/extraction.py +27 -11
  94. {kreuzberg-3.15.0.dist-info → kreuzberg-3.17.0.dist-info}/METADATA +15 -13
  95. kreuzberg-3.17.0.dist-info/RECORD +128 -0
  96. kreuzberg-3.15.0.dist-info/RECORD +0 -60
  97. {kreuzberg-3.15.0.dist-info → kreuzberg-3.17.0.dist-info}/WHEEL +0 -0
  98. {kreuzberg-3.15.0.dist-info → kreuzberg-3.17.0.dist-info}/entry_points.txt +0 -0
  99. {kreuzberg-3.15.0.dist-info → kreuzberg-3.17.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,128 @@
1
+ kreuzberg/__init__.py,sha256=niF_YZ7YADL_oXZ8zB5EMov4xnyFzuxTABVlHoRnBJA,1629
2
+ kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
+ kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
+ kreuzberg/_config.py,sha256=ZYIcnJAjDnbWW_2WBy7NlOk1Ol6WpoMG5FMNMmHpqSY,13086
5
+ kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
+ kreuzberg/_document_classification.py,sha256=55aDxDIJ65qK6yEXt-fRYTn8LgALvYsWssjWSheVpR0,5697
7
+ kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
+ kreuzberg/_gmft.py,sha256=gfRXOsv-K9R7Y0zZ2SUa5wid3FpP2eFIlg5nepWcz1Q,20827
9
+ kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
+ kreuzberg/_mime_types.py,sha256=duEMDBg_qIf9A02tXAC_2znD-wgE-2BBMW9ofyYTJjE,8622
11
+ kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
+ kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
+ kreuzberg/_types.py,sha256=uULpUfQzpt_AAr8epOvIl3cdB9TkNTFrxWQssnZg_IM,48655
14
+ kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
+ kreuzberg/exceptions.py,sha256=KiGAfIX3_TkGYG1h9eTZ_E_pALsAqhZ_A3XfhwxwaS0,2909
16
+ kreuzberg/extraction.py,sha256=Z2rBVGs8oteXU1mynHCd9q1yKz9NNA5tQdWq35jP2EE,18743
17
+ kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
+ kreuzberg/_api/main.py,sha256=_tBZaRiq7qq7x4nXkVRgU5FBivLFJ_dmadAc7aT0H_k,13901
21
+ kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ kreuzberg/_extractors/_base.py,sha256=4MRBXdLsgdtdrTuupWb2IT9YpRSnNPpWWviS2mfeOXg,9961
23
+ kreuzberg/_extractors/_email.py,sha256=DzNthVbmbdlajDUfs0nNwbHNvG0CAQVqJsRfsatHtf0,8799
24
+ kreuzberg/_extractors/_html.py,sha256=TXXgwQZuEvnrny5HdBpn8oikGktyxgY9jvgZmnFtnqY,6371
25
+ kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
+ kreuzberg/_extractors/_pandoc.py,sha256=cwthr--IFwbu8r0rCZ_Cx5zRlan94yuqt5e3mjYxesE,24182
27
+ kreuzberg/_extractors/_pdf.py,sha256=GFy7xHUH09i48E5Xixy6nReF_uBu9646UTjywKoH-Rs,23304
28
+ kreuzberg/_extractors/_presentation.py,sha256=2g6PJnpgUpUfMjQJh-7_gHywDulE8QE8ypH__BrEUTQ,10692
29
+ kreuzberg/_extractors/_spread_sheet.py,sha256=TJOM70DLN0HzcOkAowZJogAx7QFrouohvU5V0OIliag,12738
30
+ kreuzberg/_extractors/_structured.py,sha256=YkTOfSQJOe127ZURrAYAomNrIkKoAYC4gt0P9ypY3RY,8919
31
+ kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
+ kreuzberg/_mcp/server.py,sha256=71MhjiFDwgFROdGejf0djgO1eG370qudWmZsN59CUeA,16743
33
+ kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
+ kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
+ kreuzberg/_ocr/_easyocr.py,sha256=bHz2S_8nNHaPHPemcJK-U0al9_qP-vUmWE4ECVlf7AA,15485
36
+ kreuzberg/_ocr/_paddleocr.py,sha256=CV9cCjkRe-3cNJ5tRu_sBXd_HNghEwfPIgWwxAZTeRY,15026
37
+ kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
+ kreuzberg/_ocr/_tesseract.py,sha256=1SEfrX_JvU6KIeWt31GsRWnNmjaAh3xgQaRMPvoZLJA,51349
39
+ kreuzberg/_token_reduction/__init__.py,sha256=y_2WgPxJes8_PD-VMfx7vQT0hGjFIixzS8PjaIseAGg,311
40
+ kreuzberg/_token_reduction/_reducer.py,sha256=shAfMPznP69sTSzwX_bE1LpcBmoia9cpd7r6bSc4R5Q,13609
41
+ kreuzberg/_token_reduction/_stopwords.py,sha256=mu-5CapG0RCP7LYzjhdTM6WWLtmt3cjZ08OOsyQkJVg,3608
42
+ kreuzberg/_token_reduction/stopwords/af_stopwords.json,sha256=RlgUHyzPIQBbTA52kLSQpmTfteRbbV_qb_Spa51RI8Q,452
43
+ kreuzberg/_token_reduction/stopwords/ar_stopwords.json,sha256=GKcR9MyDM5zvIQhLqWfq87Jmj3gbAM81ZZi-eBKBjz8,6738
44
+ kreuzberg/_token_reduction/stopwords/bg_stopwords.json,sha256=7KuYBTg7jc8ZLFYI6QwaVatlh_gP8i80EkQHD1suhXs,3707
45
+ kreuzberg/_token_reduction/stopwords/bn_stopwords.json,sha256=qAtZN89LGy0se9i_FrB02vsNLgE2gG1clwMHTi4Qncg,7437
46
+ kreuzberg/_token_reduction/stopwords/br_stopwords.json,sha256=RuErMr4twvsocqw9fvwtgrnbzVKB2WABVi5AfPy2lqo,13601
47
+ kreuzberg/_token_reduction/stopwords/ca_stopwords.json,sha256=HRrh4QKFXDsAfmk5yjXHD28KjdO2vMjqJFIltNwh_F8,2952
48
+ kreuzberg/_token_reduction/stopwords/cs_stopwords.json,sha256=Dlsq3UFIHD9USIuHiwrFur0DvIaRpjftnBb3Qnjio4M,4523
49
+ kreuzberg/_token_reduction/stopwords/da_stopwords.json,sha256=zLk-90hrY5tH4gS4uOcMlviky4mmg7b8WaXnn_NzKfQ,1664
50
+ kreuzberg/_token_reduction/stopwords/de_stopwords.json,sha256=4lB0tUyT9PlB9ubnUbwOObO_RT0irBSdPOuvQIgrr7g,7052
51
+ kreuzberg/_token_reduction/stopwords/el_stopwords.json,sha256=VqO3y_q_ZeSBZAMxD1KjMMkCylYN2uuN620szHmFx-M,13604
52
+ kreuzberg/_token_reduction/stopwords/en_stopwords.json,sha256=VvLb0zoUKjhqQH-RGkSTpPtdRjvgv_G8l4i9ub2fJmU,14171
53
+ kreuzberg/_token_reduction/stopwords/eo_stopwords.json,sha256=xnojHtnik734Mzw4i4bIxPZEgBRXvgK2TRkHnxBCjWw,1722
54
+ kreuzberg/_token_reduction/stopwords/es_stopwords.json,sha256=PcSwxKskYQXc-21vNkpb6IntQYVP50CwuXfx4Gyhhx8,8598
55
+ kreuzberg/_token_reduction/stopwords/et_stopwords.json,sha256=_t6iPfNa1LhqRq4sLNbIB6_B5-472UCNi9IARJTPhzQ,327
56
+ kreuzberg/_token_reduction/stopwords/eu_stopwords.json,sha256=SNa84Zkx5Rcf8JZBdm4rCMxxZ7Z_94fW9cebZC4qgqI,1069
57
+ kreuzberg/_token_reduction/stopwords/fa_stopwords.json,sha256=8R1724IQHkXc1g_jXJjRMVLgq2Zz6YgPeE4DI0iSj9Y,11708
58
+ kreuzberg/_token_reduction/stopwords/fi_stopwords.json,sha256=yOlZLoh3aMJ-YXz3r7kGLAIsDyvxNrhFyvWr7Vu_z5o,10699
59
+ kreuzberg/_token_reduction/stopwords/fr_stopwords.json,sha256=KkiZ8dQYFQzjVJ-YwUoP13zwLwz7zu9Fpw-X-wmxya0,8025
60
+ kreuzberg/_token_reduction/stopwords/ga_stopwords.json,sha256=K8LOrUkqSi82KTLlZ1NnadEU-HMyCd2Ofm13GfxW3J0,1100
61
+ kreuzberg/_token_reduction/stopwords/gl_stopwords.json,sha256=Y0GfhhcOv1GNPJP3zoFYIYkg369GT1yHK5xCPiH6Pn0,1602
62
+ kreuzberg/_token_reduction/stopwords/gu_stopwords.json,sha256=YSldatfgVz_gNWopQ5TMFTHWEbGVYPcJMwO-bThtYAI,3818
63
+ kreuzberg/_token_reduction/stopwords/ha_stopwords.json,sha256=EohjrRkbSuLOn_aiDcsMOUEYPJjVha9wHhCsoxiwNsU,354
64
+ kreuzberg/_token_reduction/stopwords/he_stopwords.json,sha256=STlmHNDJqDEZI7ZCtBcZlEU1ndoEeJIexuOnTaOXJac,2629
65
+ kreuzberg/_token_reduction/stopwords/hi_stopwords.json,sha256=aYojvEA-UlivR_JCJTwZRoK2BJjVUW_m9q8eDRRczpE,3792
66
+ kreuzberg/_token_reduction/stopwords/hr_stopwords.json,sha256=2s5uhGAitVRDLgKdbA0F9sFZWtRWcmyiDZY9adwLGzk,1769
67
+ kreuzberg/_token_reduction/stopwords/hu_stopwords.json,sha256=9o0snSijbEEt9Hpbs4kTW6czhcdiXLTa5sbC68nitDY,9830
68
+ kreuzberg/_token_reduction/stopwords/hy_stopwords.json,sha256=QLsYw_y9ESyou1bHbPwjSrWy_nJq8wjiNihrvikYSKY,525
69
+ kreuzberg/_token_reduction/stopwords/id_stopwords.json,sha256=TZB_e1Txu3oGpQfHCzodoOTcKoKplTC5ZDr1iAbdzVI,10238
70
+ kreuzberg/_token_reduction/stopwords/it_stopwords.json,sha256=BSOpBGf_StyW6tdycNRMSvXGTksvrOowrE--D5914J4,7277
71
+ kreuzberg/_token_reduction/stopwords/ja_stopwords.json,sha256=E7MSvBOnRvTeChRk0Nm5X7xxwP50BHaP5FGOfDbnmRI,1680
72
+ kreuzberg/_token_reduction/stopwords/kn_stopwords.json,sha256=km3Qk1vy3OVdsAoE_YbZ-oXRYapFBi5k59o1mlWnk70,1626
73
+ kreuzberg/_token_reduction/stopwords/ko_stopwords.json,sha256=sHR2SLh_zXVs6SKZlWCS29MGRv6xlKcp3Ckvf0-aXt8,9932
74
+ kreuzberg/_token_reduction/stopwords/ku_stopwords.json,sha256=1Vj0g-fwacVcwaJ66BSPe4GkI7WybXK-EspIE6uvAmY,893
75
+ kreuzberg/_token_reduction/stopwords/la_stopwords.json,sha256=1d6iV2sTgZF6G7EF5yb3G0Sic85awtjN617cWXb-ltw,456
76
+ kreuzberg/_token_reduction/stopwords/lt_stopwords.json,sha256=7WE-NiX-y2IQnnO61-2pDExaR0ZeOq6A7YMn29effAM,5675
77
+ kreuzberg/_token_reduction/stopwords/lv_stopwords.json,sha256=WTp3jWxsX054E53DdpoI0BqujDefICljN4d7KiIIsls,1796
78
+ kreuzberg/_token_reduction/stopwords/ml_stopwords.json,sha256=lDoq0gGSI4zbuKhdNyF8MMTPkSI9wYb1om6pRPY5zkw,192
79
+ kreuzberg/_token_reduction/stopwords/mr_stopwords.json,sha256=6XjzSLaHwwOGWot1QszaUMl12mAVFh840GH9MJoYoes,1764
80
+ kreuzberg/_token_reduction/stopwords/ms_stopwords.json,sha256=eJsXJ0bVOnWUSVG3XwkIClxlR3qd_2k75ZAQlmHpsKQ,5950
81
+ kreuzberg/_token_reduction/stopwords/ne_stopwords.json,sha256=MoAXH6Tncag9Qgr6TR7yp1FguDCGQBXpGdSQ2DIOikE,9447
82
+ kreuzberg/_token_reduction/stopwords/nl_stopwords.json,sha256=W08hz9JP3EdWpXtLPUjWFOSr3AwGnZPkwcjEUBiFWnA,4724
83
+ kreuzberg/_token_reduction/stopwords/no_stopwords.json,sha256=bOjDCti_Loe0ZYSF2mR-LQzMqViZRkur1GEOLh4Mr-A,2210
84
+ kreuzberg/_token_reduction/stopwords/pl_stopwords.json,sha256=TcnvzF5uMVDKxQUt1YBu7Lw1qIpeHftuIDSguz8ZAdA,3487
85
+ kreuzberg/_token_reduction/stopwords/pt_stopwords.json,sha256=h4jmBxUu10PuzQzTjeFm1B5NBl0Owt7uGhwx66mTTYQ,6413
86
+ kreuzberg/_token_reduction/stopwords/ro_stopwords.json,sha256=iuHvFs-iS118RH07v0hO7Oxfdx5rDqJwl3lRPMWINbM,4569
87
+ kreuzberg/_token_reduction/stopwords/ru_stopwords.json,sha256=MZckTBKlL1i4Kv16RSSozUfCM6dcKI5H9PYZD7pS0Ac,9028
88
+ kreuzberg/_token_reduction/stopwords/si_stopwords.json,sha256=jvtaQfO4fc-XPHgaO1hPsbpJQQg40rSeEbCGWm2AO60,3324
89
+ kreuzberg/_token_reduction/stopwords/sk_stopwords.json,sha256=FDaLmQ61_fFg0k3cGthv8flKFs67M1hmSE-6PrfMCAU,4638
90
+ kreuzberg/_token_reduction/stopwords/sl_stopwords.json,sha256=UoQRoLRT9qzmS8ALY_cuDE1uukK0hS6Q6QuUhr7oLHc,4669
91
+ kreuzberg/_token_reduction/stopwords/so_stopwords.json,sha256=Z7ayeNV98MOx_xkGxtcSX3dh8GAhgCRFa0EC1VDG29Q,299
92
+ kreuzberg/_token_reduction/stopwords/st_stopwords.json,sha256=ajvBq5XQCse62nptN_m8Jll5-Ps9j3bK4RODMIzCkD4,268
93
+ kreuzberg/_token_reduction/stopwords/sv_stopwords.json,sha256=kLz5vgx0VfQI0jtOj3Rlp6wuj3tKhqp2oF-f9f2-neQ,4737
94
+ kreuzberg/_token_reduction/stopwords/sw_stopwords.json,sha256=x4eOC7-nRlSS7qv_pwW6yECDrfhm_3zoTWenIPL1aWY,780
95
+ kreuzberg/_token_reduction/stopwords/ta_stopwords.json,sha256=qBbEu6m_HEx2C27ep6UJOyxQ6st74Et1fN8TvRHoTxw,2634
96
+ kreuzberg/_token_reduction/stopwords/te_stopwords.json,sha256=GT0Rj3MsgCJSj9GdzKjpgsQJE3-wCaS5Aa3_ynIZKx0,1263
97
+ kreuzberg/_token_reduction/stopwords/th_stopwords.json,sha256=5DEb-W41TFL4BGS-_CJzgPTkpmuLN20WBfeO1hG0HLc,2010
98
+ kreuzberg/_token_reduction/stopwords/tl_stopwords.json,sha256=pu3wAWQyT0vzGwSO8N2x2mRlaCHzEgEIvECTCrJOLE8,1663
99
+ kreuzberg/_token_reduction/stopwords/tr_stopwords.json,sha256=hSmUsApI7lxVfwJwAInkCLoa3YoGjI85Mwg9DpiHTDo,6159
100
+ kreuzberg/_token_reduction/stopwords/uk_stopwords.json,sha256=_j_lYv_bE5RAEMcW7-u0rYWf39fMrlpIgFEMFQDjqW0,965
101
+ kreuzberg/_token_reduction/stopwords/ur_stopwords.json,sha256=IcrM74VdmSbgM7wlBtFVtkrWsCI0SDFbRCSSAkyvlqo,7370
102
+ kreuzberg/_token_reduction/stopwords/vi_stopwords.json,sha256=UOyAEKBwMcQV65QGpQU-ynmyignNoqFzUSQ8p_1XuoY,9152
103
+ kreuzberg/_token_reduction/stopwords/yo_stopwords.json,sha256=60liY89h7KReEvHEPxe-hCWLPuqr4U89aQDCi7iRCfo,651
104
+ kreuzberg/_token_reduction/stopwords/zh_stopwords.json,sha256=rouSTCkXun90Q1aCvLjHyt4I7pGrtlcruDpNVybpAMI,8934
105
+ kreuzberg/_token_reduction/stopwords/zu_stopwords.json,sha256=hfm4E2EDI_VWyR0GUOVjcMQA7ZDH7FsV4FUMcns1H28,324
106
+ kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
+ kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
108
+ kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
109
+ kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
110
+ kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
111
+ kreuzberg/_utils/_html_streaming.py,sha256=ywQgEQfEGm6MSotS1g_HXgl0e7V59yLmf2wytALuZko,648
112
+ kreuzberg/_utils/_image_preprocessing.py,sha256=f7ioWQyARnhzj0am0Y1_eteJwWomdPy7AnbXqw2xWBs,10954
113
+ kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
114
+ kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
115
+ kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
116
+ kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
117
+ kreuzberg/_utils/_ref.py,sha256=BDuk9hHYq1KPRgenjC3-6iFEjGsrGfHZKr9tPNhfquU,1109
118
+ kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
119
+ kreuzberg/_utils/_serialization.py,sha256=G-kxtCPDPGFqBMyHfzvAPo-bNUmPdaXYdeg1dnBLfN4,1789
120
+ kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
121
+ kreuzberg/_utils/_sync.py,sha256=gb828WYfVtkB4wKslJrPMmrdeI1h3htWceq-gywHtO4,3184
122
+ kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
123
+ kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
124
+ kreuzberg-3.17.0.dist-info/METADATA,sha256=4iVwQUo4FVNSwj8h6oEqNAT5B6zm-J-u5k3Jy3Pv3L0,12351
125
+ kreuzberg-3.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
126
+ kreuzberg-3.17.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
127
+ kreuzberg-3.17.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
128
+ kreuzberg-3.17.0.dist-info/RECORD,,
@@ -1,60 +0,0 @@
1
- kreuzberg/__init__.py,sha256=-IHDHXKE7q43MBr_KklpqvhNPjJRhX3qFpMge8kuViE,1467
2
- kreuzberg/__main__.py,sha256=3cIDdzTggj2kj8uKx4WShWHmCWqdZazdM3BxUGbAuSI,104
3
- kreuzberg/_chunker.py,sha256=lRXvVN60vmWaTxa1b3QzvE-jBmOqYzh5dY-3Kl6pSqI,1427
4
- kreuzberg/_config.py,sha256=2LI5z9gXniqO4afrMmbZfMdhlT2701O5OlGKkrMo-bM,12385
5
- kreuzberg/_constants.py,sha256=gY6SpCi9za59ghRuLX_z7xfSok6qqvPbvEnv4BLczqI,265
6
- kreuzberg/_document_classification.py,sha256=zgBjqiHCqhtz74JLtt_V8kk6HQTkK5egGWdAGk9dOEQ,5672
7
- kreuzberg/_entity_extraction.py,sha256=YvcELIo3kV8A_WbzwNjhKn7rPhkZXjbpNMgm2UK0oJw,3621
8
- kreuzberg/_gmft.py,sha256=a7KDXbZM0PxyFpAIjM0xMRvxzoMo4fTQuGlFNa8uXBU,20502
9
- kreuzberg/_language_detection.py,sha256=T9p6aimB7QFXAQiEntIMZeH_Z62E52E8fBQ43hWuyhs,1960
10
- kreuzberg/_mime_types.py,sha256=-05mBS5AoF4LUmfB_WyLoce0y4peiOyOf2JucF714WQ,8602
11
- kreuzberg/_playa.py,sha256=p4G5ymSSCbQoDeXJjH-yuVzdd4y-wKcolqDthjPtqok,11413
12
- kreuzberg/_registry.py,sha256=8XYT-vPhNYMAbB5RBIUKz-1Zdg48OCnBcdVZzBq6YwY,3307
13
- kreuzberg/_types.py,sha256=7hj2KWohuSKQ9cJd_VCuSeciuyuOC5MdSkS1s5QaPOg,44870
14
- kreuzberg/cli.py,sha256=OoHA5MiIcRBATFJpb-FZYlZfpohxL2AbVgamyhnEMFo,14342
15
- kreuzberg/exceptions.py,sha256=PTiAZgQwcG9hXbgYg2W7sfxksFhq5_wzOFgZGnTJAoc,2991
16
- kreuzberg/extraction.py,sha256=gDkwuj_omQ8OCx4RALD0NjasxMhZLhIju7odK7wMwDM,17789
17
- kreuzberg/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- kreuzberg/_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- kreuzberg/_api/_config_cache.py,sha256=gX_ezGNq6SCpTn02yFkn24zMVrQwfIk8-u5XkKJiHFg,8774
20
- kreuzberg/_api/main.py,sha256=_r2R_-4zBkyJBn0bcPWogVEDICxWWt5_FFiQIF-r4N4,15463
21
- kreuzberg/_extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- kreuzberg/_extractors/_base.py,sha256=39E7R7hV6C2uMJdQKLBVSWK3tN-mtK0LaayU10-8Fqo,11191
23
- kreuzberg/_extractors/_email.py,sha256=8tsHycVBQ2KSSqp2TZ9a0O1Yxjwe0YvE2GVxUajCVz4,8478
24
- kreuzberg/_extractors/_html.py,sha256=7fzNr7-BJ4IND7PWTlEIiqfeKDUb_ZjWO3KDdU3umgI,5151
25
- kreuzberg/_extractors/_image.py,sha256=7rKEGhUAmdzO0YcBKQVhVme4PqyKIi2UCn4esmmFXOY,4300
26
- kreuzberg/_extractors/_pandoc.py,sha256=CPEJxKTZdfyb7jPacZkiAsR2NEGL6KyiHzOr88tprJY,24142
27
- kreuzberg/_extractors/_pdf.py,sha256=MKfihJcveulfkMQc-s5VUCgvK1aw8EyCbUMRwJo_KoM,23225
28
- kreuzberg/_extractors/_presentation.py,sha256=MZd4Ft2g5oIrEZ1h3ZWsQTW_VpHI2yi4g4Tdh5iw_7I,10466
29
- kreuzberg/_extractors/_spread_sheet.py,sha256=Q2uXvotwqvWiYkIPrtnVL2Ci9ZA7fmTgN6tDN_huwdE,12801
30
- kreuzberg/_extractors/_structured.py,sha256=PpefI_GDrdLyUgnElrbdB-MeTMKVWium4Ckxm5Zg100,5536
31
- kreuzberg/_mcp/__init__.py,sha256=h6DgLFO4TMUk7_wCJ2jn2Y6IkFmfzb-Z7jX-G5UCYVc,43
32
- kreuzberg/_mcp/server.py,sha256=n_bfNPSU_SvXVJ5z05oKVj2sFv2uRYoe3ZZzyVOHQOI,17608
33
- kreuzberg/_ocr/__init__.py,sha256=grshVFwVQl2rMvH1hg1JNlYXjy5-Tdb_rusLD1Cselk,706
34
- kreuzberg/_ocr/_base.py,sha256=ZvOJvW8DtylQJZdCPk9vlVNZiBFK-dC4Oj7Kb6-mWkY,1419
35
- kreuzberg/_ocr/_easyocr.py,sha256=6Naqy9JvL96Mm9gw4s-4nRsubd0Z0t8Zn6VC_HInUfc,14577
36
- kreuzberg/_ocr/_paddleocr.py,sha256=XyYc3gtmnvOGfQ0qBQYFphJa1kSv5hZ_LJ0weD2hQ08,15006
37
- kreuzberg/_ocr/_table_extractor.py,sha256=LhBiCX8R_xR-uK1FH3ONA_vqOmqUWANZJ2HMCBLsmNY,5513
38
- kreuzberg/_ocr/_tesseract.py,sha256=fq4qdrzPss9ZaIneUxmwq9x3sFJe8FEi__DLOa1AXN4,50945
39
- kreuzberg/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- kreuzberg/_utils/_cache.py,sha256=AtANbs1MWR4WLB2MhatVGhlh7kM-yjSfFuDnSVSNp50,14110
41
- kreuzberg/_utils/_device.py,sha256=o03rLiHiRX6TKhJ55LO1Vj2Map1Po5YdjuMdA63tGOE,8249
42
- kreuzberg/_utils/_document_cache.py,sha256=tfk9_Yc1cQkT5_uM5R1uaI4w-2SjNn7QyAd6AmWkSz8,4851
43
- kreuzberg/_utils/_errors.py,sha256=aQYEnp8oJ-WJVmCNo7YY-25y1KZZFEwjAmxVRfw4a_M,4920
44
- kreuzberg/_utils/_image_preprocessing.py,sha256=arl4UDDiD_Z6SKM-jTXENaOaaHZBVFTsueb6DcpFXOo,10934
45
- kreuzberg/_utils/_ocr_cache.py,sha256=uCCZfdY7EiqMhCnhNwqirFOr-Wfaobd2Ntc-F07TKec,3425
46
- kreuzberg/_utils/_pdf_lock.py,sha256=Ytvds30aZf3yXeZFo27ZenrhUoU-GZlR2rKEkhJ_wlk,1349
47
- kreuzberg/_utils/_process_pool.py,sha256=fqlxNsxDoqS28BLrZeDBH743HdaUBuGPYFH5hjSajIg,7493
48
- kreuzberg/_utils/_quality.py,sha256=FCVh9KieWUYgT1klLxudbslzKuqbOTBbTsHbvIuru7M,5510
49
- kreuzberg/_utils/_ref.py,sha256=iOflvjTUc_F0XaL28Bd6fpvL6qkeoURGA4B77Nqky7I,840
50
- kreuzberg/_utils/_resource_managers.py,sha256=N3-VeHDj6sKBeg3UL-PqRtKGExUBoVcEB5UuQ8FncY8,2079
51
- kreuzberg/_utils/_serialization.py,sha256=97iIgdcxdbym-BEvy0J6HAduBCUXyCGwhuEHCT_l7I4,1513
52
- kreuzberg/_utils/_string.py,sha256=wVyvEHByHBeu_6evmqJGv9Ml-NAwkyz60n8l-7L5Cw0,4366
53
- kreuzberg/_utils/_sync.py,sha256=O4ukJfo8hIr72kaoRvvJjbkBeorIw0SUfkovv0YXa7k,3170
54
- kreuzberg/_utils/_table.py,sha256=OVg6T2QnerMhVNb1juLTBSIjyjFiE5-OrUWr5NSCgnQ,6493
55
- kreuzberg/_utils/_tmp.py,sha256=mwZ0BFzhGPfYa2tt8qSjUjfcHnSYvbQT4VlPRCRc_q8,2038
56
- kreuzberg-3.15.0.dist-info/METADATA,sha256=-4oGIVQAYBB8BSPbwA_MA1LK-ZROaCxwX6g-re4ZtCQ,12246
57
- kreuzberg-3.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
- kreuzberg-3.15.0.dist-info/entry_points.txt,sha256=GplGhFryCP7kyAG_k-Mdahznvo2fwi73qLFg5yQfH_A,91
59
- kreuzberg-3.15.0.dist-info/licenses/LICENSE,sha256=-8caMvpCK8SgZ5LlRKhGCMtYDEXqTKH9X8pFEhl91_4,1066
60
- kreuzberg-3.15.0.dist-info/RECORD,,