kreuzberg 4.0.8 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +94 -98
  5. data/ext/kreuzberg_rb/native/Cargo.toml +4 -2
  6. data/ext/kreuzberg_rb/native/src/batch.rs +139 -0
  7. data/ext/kreuzberg_rb/native/src/config/mod.rs +10 -0
  8. data/ext/kreuzberg_rb/native/src/config/types.rs +1058 -0
  9. data/ext/kreuzberg_rb/native/src/error_handling.rs +125 -0
  10. data/ext/kreuzberg_rb/native/src/extraction.rs +79 -0
  11. data/ext/kreuzberg_rb/native/src/gc_guarded_value.rs +35 -0
  12. data/ext/kreuzberg_rb/native/src/helpers.rs +176 -0
  13. data/ext/kreuzberg_rb/native/src/lib.rs +342 -3622
  14. data/ext/kreuzberg_rb/native/src/metadata.rs +34 -0
  15. data/ext/kreuzberg_rb/native/src/plugins/mod.rs +92 -0
  16. data/ext/kreuzberg_rb/native/src/plugins/ocr_backend.rs +159 -0
  17. data/ext/kreuzberg_rb/native/src/plugins/post_processor.rs +126 -0
  18. data/ext/kreuzberg_rb/native/src/plugins/validator.rs +99 -0
  19. data/ext/kreuzberg_rb/native/src/result.rs +326 -0
  20. data/ext/kreuzberg_rb/native/src/validation.rs +4 -0
  21. data/lib/kreuzberg/config.rb +99 -2
  22. data/lib/kreuzberg/result.rb +107 -2
  23. data/lib/kreuzberg/types.rb +104 -0
  24. data/lib/kreuzberg/version.rb +1 -1
  25. data/lib/kreuzberg.rb +0 -4
  26. data/sig/kreuzberg.rbs +105 -1
  27. data/spec/fixtures/config.toml +1 -1
  28. data/spec/fixtures/config.yaml +1 -1
  29. data/vendor/Cargo.toml +3 -3
  30. data/vendor/kreuzberg/Cargo.toml +5 -4
  31. data/vendor/kreuzberg/README.md +1 -1
  32. data/vendor/kreuzberg/src/api/config.rs +69 -0
  33. data/vendor/kreuzberg/src/api/handlers.rs +99 -2
  34. data/vendor/kreuzberg/src/api/mod.rs +14 -7
  35. data/vendor/kreuzberg/src/api/router.rs +214 -0
  36. data/vendor/kreuzberg/src/api/startup.rs +243 -0
  37. data/vendor/kreuzberg/src/api/types.rs +78 -0
  38. data/vendor/kreuzberg/src/cache/cleanup.rs +277 -0
  39. data/vendor/kreuzberg/src/cache/core.rs +428 -0
  40. data/vendor/kreuzberg/src/cache/mod.rs +21 -843
  41. data/vendor/kreuzberg/src/cache/utilities.rs +156 -0
  42. data/vendor/kreuzberg/src/chunking/boundaries.rs +301 -0
  43. data/vendor/kreuzberg/src/chunking/builder.rs +294 -0
  44. data/vendor/kreuzberg/src/chunking/config.rs +52 -0
  45. data/vendor/kreuzberg/src/chunking/core.rs +1017 -0
  46. data/vendor/kreuzberg/src/chunking/mod.rs +14 -2211
  47. data/vendor/kreuzberg/src/chunking/processor.rs +10 -0
  48. data/vendor/kreuzberg/src/chunking/validation.rs +686 -0
  49. data/vendor/kreuzberg/src/core/config/extraction/core.rs +169 -0
  50. data/vendor/kreuzberg/src/core/config/extraction/env.rs +179 -0
  51. data/vendor/kreuzberg/src/core/config/extraction/loaders.rs +204 -0
  52. data/vendor/kreuzberg/src/core/config/extraction/mod.rs +42 -0
  53. data/vendor/kreuzberg/src/core/config/extraction/types.rs +93 -0
  54. data/vendor/kreuzberg/src/core/config/formats.rs +135 -0
  55. data/vendor/kreuzberg/src/core/config/mod.rs +20 -0
  56. data/vendor/kreuzberg/src/core/config/ocr.rs +73 -0
  57. data/vendor/kreuzberg/src/core/config/page.rs +57 -0
  58. data/vendor/kreuzberg/src/core/config/pdf.rs +111 -0
  59. data/vendor/kreuzberg/src/core/config/processing.rs +312 -0
  60. data/vendor/kreuzberg/src/core/config_validation/dependencies.rs +187 -0
  61. data/vendor/kreuzberg/src/core/config_validation/mod.rs +386 -0
  62. data/vendor/kreuzberg/src/core/config_validation/sections.rs +401 -0
  63. data/vendor/kreuzberg/src/core/extractor/batch.rs +246 -0
  64. data/vendor/kreuzberg/src/core/extractor/bytes.rs +116 -0
  65. data/vendor/kreuzberg/src/core/extractor/file.rs +240 -0
  66. data/vendor/kreuzberg/src/core/extractor/helpers.rs +71 -0
  67. data/vendor/kreuzberg/src/core/extractor/legacy.rs +62 -0
  68. data/vendor/kreuzberg/src/core/extractor/mod.rs +490 -0
  69. data/vendor/kreuzberg/src/core/extractor/sync.rs +208 -0
  70. data/vendor/kreuzberg/src/core/mime.rs +15 -0
  71. data/vendor/kreuzberg/src/core/mod.rs +4 -1
  72. data/vendor/kreuzberg/src/core/pipeline/cache.rs +60 -0
  73. data/vendor/kreuzberg/src/core/pipeline/execution.rs +89 -0
  74. data/vendor/kreuzberg/src/core/pipeline/features.rs +108 -0
  75. data/vendor/kreuzberg/src/core/pipeline/format.rs +392 -0
  76. data/vendor/kreuzberg/src/core/pipeline/initialization.rs +67 -0
  77. data/vendor/kreuzberg/src/core/pipeline/mod.rs +135 -0
  78. data/vendor/kreuzberg/src/core/pipeline/tests.rs +975 -0
  79. data/vendor/kreuzberg/src/core/server_config/env.rs +90 -0
  80. data/vendor/kreuzberg/src/core/server_config/loader.rs +202 -0
  81. data/vendor/kreuzberg/src/core/server_config/mod.rs +380 -0
  82. data/vendor/kreuzberg/src/core/server_config/tests/basic_tests.rs +124 -0
  83. data/vendor/kreuzberg/src/core/server_config/tests/env_tests.rs +216 -0
  84. data/vendor/kreuzberg/src/core/server_config/tests/file_loading_tests.rs +341 -0
  85. data/vendor/kreuzberg/src/core/server_config/tests/mod.rs +5 -0
  86. data/vendor/kreuzberg/src/core/server_config/validation.rs +17 -0
  87. data/vendor/kreuzberg/src/embeddings.rs +136 -13
  88. data/vendor/kreuzberg/src/extraction/{archive.rs → archive/mod.rs} +45 -239
  89. data/vendor/kreuzberg/src/extraction/archive/sevenz.rs +98 -0
  90. data/vendor/kreuzberg/src/extraction/archive/tar.rs +118 -0
  91. data/vendor/kreuzberg/src/extraction/archive/zip.rs +101 -0
  92. data/vendor/kreuzberg/src/extraction/html/converter.rs +592 -0
  93. data/vendor/kreuzberg/src/extraction/html/image_handling.rs +95 -0
  94. data/vendor/kreuzberg/src/extraction/html/mod.rs +53 -0
  95. data/vendor/kreuzberg/src/extraction/html/processor.rs +659 -0
  96. data/vendor/kreuzberg/src/extraction/html/stack_management.rs +103 -0
  97. data/vendor/kreuzberg/src/extraction/html/types.rs +28 -0
  98. data/vendor/kreuzberg/src/extraction/mod.rs +6 -2
  99. data/vendor/kreuzberg/src/extraction/pptx/container.rs +159 -0
  100. data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +168 -0
  101. data/vendor/kreuzberg/src/extraction/pptx/elements.rs +132 -0
  102. data/vendor/kreuzberg/src/extraction/pptx/image_handling.rs +57 -0
  103. data/vendor/kreuzberg/src/extraction/pptx/metadata.rs +160 -0
  104. data/vendor/kreuzberg/src/extraction/pptx/mod.rs +558 -0
  105. data/vendor/kreuzberg/src/extraction/pptx/parser.rs +388 -0
  106. data/vendor/kreuzberg/src/extraction/transform/content.rs +205 -0
  107. data/vendor/kreuzberg/src/extraction/transform/elements.rs +211 -0
  108. data/vendor/kreuzberg/src/extraction/transform/mod.rs +480 -0
  109. data/vendor/kreuzberg/src/extraction/transform/types.rs +27 -0
  110. data/vendor/kreuzberg/src/extractors/archive.rs +2 -0
  111. data/vendor/kreuzberg/src/extractors/bibtex.rs +2 -0
  112. data/vendor/kreuzberg/src/extractors/djot_format/attributes.rs +134 -0
  113. data/vendor/kreuzberg/src/extractors/djot_format/conversion.rs +223 -0
  114. data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +172 -0
  115. data/vendor/kreuzberg/src/extractors/djot_format/mod.rs +24 -0
  116. data/vendor/kreuzberg/src/extractors/djot_format/parsing/block_handlers.rs +271 -0
  117. data/vendor/kreuzberg/src/extractors/djot_format/parsing/content_extraction.rs +257 -0
  118. data/vendor/kreuzberg/src/extractors/djot_format/parsing/event_handlers.rs +101 -0
  119. data/vendor/kreuzberg/src/extractors/djot_format/parsing/inline_handlers.rs +201 -0
  120. data/vendor/kreuzberg/src/extractors/djot_format/parsing/mod.rs +16 -0
  121. data/vendor/kreuzberg/src/extractors/djot_format/parsing/state.rs +78 -0
  122. data/vendor/kreuzberg/src/extractors/djot_format/parsing/table_extraction.rs +68 -0
  123. data/vendor/kreuzberg/src/extractors/djot_format/parsing/text_extraction.rs +61 -0
  124. data/vendor/kreuzberg/src/extractors/djot_format/rendering.rs +452 -0
  125. data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
  126. data/vendor/kreuzberg/src/extractors/docx.rs +12 -1
  127. data/vendor/kreuzberg/src/extractors/email.rs +2 -0
  128. data/vendor/kreuzberg/src/extractors/epub/content.rs +333 -0
  129. data/vendor/kreuzberg/src/extractors/epub/metadata.rs +137 -0
  130. data/vendor/kreuzberg/src/extractors/epub/mod.rs +186 -0
  131. data/vendor/kreuzberg/src/extractors/epub/parsing.rs +86 -0
  132. data/vendor/kreuzberg/src/extractors/excel.rs +4 -0
  133. data/vendor/kreuzberg/src/extractors/fictionbook.rs +2 -0
  134. data/vendor/kreuzberg/src/extractors/frontmatter_utils.rs +466 -0
  135. data/vendor/kreuzberg/src/extractors/html.rs +80 -8
  136. data/vendor/kreuzberg/src/extractors/image.rs +8 -1
  137. data/vendor/kreuzberg/src/extractors/jats/elements.rs +350 -0
  138. data/vendor/kreuzberg/src/extractors/jats/metadata.rs +21 -0
  139. data/vendor/kreuzberg/src/extractors/{jats.rs → jats/mod.rs} +10 -412
  140. data/vendor/kreuzberg/src/extractors/jats/parser.rs +52 -0
  141. data/vendor/kreuzberg/src/extractors/jupyter.rs +2 -0
  142. data/vendor/kreuzberg/src/extractors/latex/commands.rs +93 -0
  143. data/vendor/kreuzberg/src/extractors/latex/environments.rs +157 -0
  144. data/vendor/kreuzberg/src/extractors/latex/metadata.rs +27 -0
  145. data/vendor/kreuzberg/src/extractors/latex/mod.rs +146 -0
  146. data/vendor/kreuzberg/src/extractors/latex/parser.rs +231 -0
  147. data/vendor/kreuzberg/src/extractors/latex/utilities.rs +126 -0
  148. data/vendor/kreuzberg/src/extractors/markdown.rs +39 -162
  149. data/vendor/kreuzberg/src/extractors/mod.rs +9 -1
  150. data/vendor/kreuzberg/src/extractors/odt.rs +2 -0
  151. data/vendor/kreuzberg/src/extractors/opml/core.rs +165 -0
  152. data/vendor/kreuzberg/src/extractors/opml/mod.rs +31 -0
  153. data/vendor/kreuzberg/src/extractors/opml/parser.rs +479 -0
  154. data/vendor/kreuzberg/src/extractors/orgmode.rs +2 -0
  155. data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +106 -0
  156. data/vendor/kreuzberg/src/extractors/{pdf.rs → pdf/mod.rs} +25 -324
  157. data/vendor/kreuzberg/src/extractors/pdf/ocr.rs +214 -0
  158. data/vendor/kreuzberg/src/extractors/pdf/pages.rs +51 -0
  159. data/vendor/kreuzberg/src/extractors/pptx.rs +9 -2
  160. data/vendor/kreuzberg/src/extractors/rst.rs +2 -0
  161. data/vendor/kreuzberg/src/extractors/rtf/encoding.rs +116 -0
  162. data/vendor/kreuzberg/src/extractors/rtf/formatting.rs +24 -0
  163. data/vendor/kreuzberg/src/extractors/rtf/images.rs +72 -0
  164. data/vendor/kreuzberg/src/extractors/rtf/metadata.rs +216 -0
  165. data/vendor/kreuzberg/src/extractors/rtf/mod.rs +142 -0
  166. data/vendor/kreuzberg/src/extractors/rtf/parser.rs +259 -0
  167. data/vendor/kreuzberg/src/extractors/rtf/tables.rs +83 -0
  168. data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
  169. data/vendor/kreuzberg/src/extractors/text.rs +4 -0
  170. data/vendor/kreuzberg/src/extractors/typst.rs +2 -0
  171. data/vendor/kreuzberg/src/extractors/xml.rs +2 -0
  172. data/vendor/kreuzberg/src/keywords/processor.rs +14 -0
  173. data/vendor/kreuzberg/src/language_detection/processor.rs +10 -0
  174. data/vendor/kreuzberg/src/lib.rs +2 -2
  175. data/vendor/kreuzberg/src/mcp/errors.rs +312 -0
  176. data/vendor/kreuzberg/src/mcp/format.rs +211 -0
  177. data/vendor/kreuzberg/src/mcp/mod.rs +9 -3
  178. data/vendor/kreuzberg/src/mcp/params.rs +196 -0
  179. data/vendor/kreuzberg/src/mcp/server.rs +39 -1438
  180. data/vendor/kreuzberg/src/mcp/tools/cache.rs +179 -0
  181. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +403 -0
  182. data/vendor/kreuzberg/src/mcp/tools/mime.rs +150 -0
  183. data/vendor/kreuzberg/src/mcp/tools/mod.rs +11 -0
  184. data/vendor/kreuzberg/src/ocr/backends/easyocr.rs +96 -0
  185. data/vendor/kreuzberg/src/ocr/backends/mod.rs +7 -0
  186. data/vendor/kreuzberg/src/ocr/backends/paddleocr.rs +27 -0
  187. data/vendor/kreuzberg/src/ocr/backends/tesseract.rs +134 -0
  188. data/vendor/kreuzberg/src/ocr/hocr.rs +60 -16
  189. data/vendor/kreuzberg/src/ocr/language_registry.rs +11 -235
  190. data/vendor/kreuzberg/src/ocr/mod.rs +1 -0
  191. data/vendor/kreuzberg/src/ocr/processor/config.rs +203 -0
  192. data/vendor/kreuzberg/src/ocr/processor/execution.rs +494 -0
  193. data/vendor/kreuzberg/src/ocr/processor/mod.rs +265 -0
  194. data/vendor/kreuzberg/src/ocr/processor/validation.rs +145 -0
  195. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +41 -24
  196. data/vendor/kreuzberg/src/pdf/bindings.rs +21 -8
  197. data/vendor/kreuzberg/src/pdf/hierarchy/bounding_box.rs +289 -0
  198. data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +199 -0
  199. data/vendor/kreuzberg/src/pdf/{hierarchy.rs → hierarchy/extraction.rs} +6 -346
  200. data/vendor/kreuzberg/src/pdf/hierarchy/mod.rs +18 -0
  201. data/vendor/kreuzberg/src/plugins/extractor/mod.rs +319 -0
  202. data/vendor/kreuzberg/src/plugins/extractor/registry.rs +434 -0
  203. data/vendor/kreuzberg/src/plugins/extractor/trait.rs +391 -0
  204. data/vendor/kreuzberg/src/plugins/mod.rs +13 -0
  205. data/vendor/kreuzberg/src/plugins/ocr.rs +11 -0
  206. data/vendor/kreuzberg/src/plugins/processor/mod.rs +365 -0
  207. data/vendor/kreuzberg/src/plugins/processor/registry.rs +37 -0
  208. data/vendor/kreuzberg/src/plugins/processor/trait.rs +284 -0
  209. data/vendor/kreuzberg/src/plugins/registry/extractor.rs +416 -0
  210. data/vendor/kreuzberg/src/plugins/registry/mod.rs +116 -0
  211. data/vendor/kreuzberg/src/plugins/registry/ocr.rs +293 -0
  212. data/vendor/kreuzberg/src/plugins/registry/processor.rs +304 -0
  213. data/vendor/kreuzberg/src/plugins/registry/validator.rs +238 -0
  214. data/vendor/kreuzberg/src/plugins/validator/mod.rs +424 -0
  215. data/vendor/kreuzberg/src/plugins/validator/registry.rs +355 -0
  216. data/vendor/kreuzberg/src/plugins/validator/trait.rs +276 -0
  217. data/vendor/kreuzberg/src/stopwords/languages/asian.rs +40 -0
  218. data/vendor/kreuzberg/src/stopwords/languages/germanic.rs +36 -0
  219. data/vendor/kreuzberg/src/stopwords/languages/mod.rs +10 -0
  220. data/vendor/kreuzberg/src/stopwords/languages/other.rs +44 -0
  221. data/vendor/kreuzberg/src/stopwords/languages/romance.rs +36 -0
  222. data/vendor/kreuzberg/src/stopwords/languages/slavic.rs +36 -0
  223. data/vendor/kreuzberg/src/stopwords/mod.rs +7 -33
  224. data/vendor/kreuzberg/src/text/quality.rs +1 -1
  225. data/vendor/kreuzberg/src/text/quality_processor.rs +10 -0
  226. data/vendor/kreuzberg/src/text/token_reduction/core/analysis.rs +238 -0
  227. data/vendor/kreuzberg/src/text/token_reduction/core/mod.rs +8 -0
  228. data/vendor/kreuzberg/src/text/token_reduction/core/punctuation.rs +54 -0
  229. data/vendor/kreuzberg/src/text/token_reduction/core/reducer.rs +384 -0
  230. data/vendor/kreuzberg/src/text/token_reduction/core/sentence_selection.rs +68 -0
  231. data/vendor/kreuzberg/src/text/token_reduction/core/word_filtering.rs +156 -0
  232. data/vendor/kreuzberg/src/text/token_reduction/filters/general.rs +377 -0
  233. data/vendor/kreuzberg/src/text/token_reduction/filters/html.rs +51 -0
  234. data/vendor/kreuzberg/src/text/token_reduction/filters/markdown.rs +285 -0
  235. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +131 -246
  236. data/vendor/kreuzberg/src/types/djot.rs +209 -0
  237. data/vendor/kreuzberg/src/types/extraction.rs +301 -0
  238. data/vendor/kreuzberg/src/types/formats.rs +443 -0
  239. data/vendor/kreuzberg/src/types/metadata.rs +560 -0
  240. data/vendor/kreuzberg/src/types/mod.rs +281 -0
  241. data/vendor/kreuzberg/src/types/page.rs +182 -0
  242. data/vendor/kreuzberg/src/types/serde_helpers.rs +132 -0
  243. data/vendor/kreuzberg/src/types/tables.rs +39 -0
  244. data/vendor/kreuzberg/src/utils/quality/heuristics.rs +58 -0
  245. data/vendor/kreuzberg/src/utils/{quality.rs → quality/mod.rs} +168 -489
  246. data/vendor/kreuzberg/src/utils/quality/patterns.rs +117 -0
  247. data/vendor/kreuzberg/src/utils/quality/scoring.rs +178 -0
  248. data/vendor/kreuzberg/src/utils/string_pool/buffer_pool.rs +325 -0
  249. data/vendor/kreuzberg/src/utils/string_pool/interned.rs +102 -0
  250. data/vendor/kreuzberg/src/utils/string_pool/language_pool.rs +119 -0
  251. data/vendor/kreuzberg/src/utils/string_pool/mime_pool.rs +235 -0
  252. data/vendor/kreuzberg/src/utils/string_pool/mod.rs +41 -0
  253. data/vendor/kreuzberg/tests/api_chunk.rs +313 -0
  254. data/vendor/kreuzberg/tests/api_embed.rs +6 -9
  255. data/vendor/kreuzberg/tests/batch_orchestration.rs +1 -0
  256. data/vendor/kreuzberg/tests/concurrency_stress.rs +7 -0
  257. data/vendor/kreuzberg/tests/core_integration.rs +1 -0
  258. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +130 -0
  259. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +5 -14
  260. data/vendor/kreuzberg/tests/format_integration.rs +2 -0
  261. data/vendor/kreuzberg/tests/helpers/mod.rs +1 -0
  262. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  263. data/vendor/kreuzberg/tests/ocr_configuration.rs +16 -0
  264. data/vendor/kreuzberg/tests/ocr_errors.rs +18 -0
  265. data/vendor/kreuzberg/tests/ocr_quality.rs +9 -0
  266. data/vendor/kreuzberg/tests/ocr_stress.rs +1 -0
  267. data/vendor/kreuzberg/tests/pipeline_integration.rs +50 -0
  268. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +13 -0
  269. data/vendor/kreuzberg/tests/plugin_system.rs +12 -0
  270. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +504 -0
  271. data/vendor/kreuzberg/tests/registry_integration_tests.rs +2 -0
  272. data/vendor/kreuzberg-ffi/Cargo.toml +2 -1
  273. data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +2 -0
  274. data/vendor/kreuzberg-ffi/kreuzberg.h +347 -178
  275. data/vendor/kreuzberg-ffi/src/config/html.rs +318 -0
  276. data/vendor/kreuzberg-ffi/src/config/loader.rs +154 -0
  277. data/vendor/kreuzberg-ffi/src/config/merge.rs +104 -0
  278. data/vendor/kreuzberg-ffi/src/config/mod.rs +385 -0
  279. data/vendor/kreuzberg-ffi/src/config/parse.rs +91 -0
  280. data/vendor/kreuzberg-ffi/src/config/serialize.rs +118 -0
  281. data/vendor/kreuzberg-ffi/src/config_builder.rs +598 -0
  282. data/vendor/kreuzberg-ffi/src/error.rs +46 -14
  283. data/vendor/kreuzberg-ffi/src/helpers.rs +10 -0
  284. data/vendor/kreuzberg-ffi/src/html_options.rs +421 -0
  285. data/vendor/kreuzberg-ffi/src/lib.rs +16 -0
  286. data/vendor/kreuzberg-ffi/src/panic_shield.rs +11 -0
  287. data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +2 -0
  288. data/vendor/kreuzberg-ffi/src/result.rs +148 -122
  289. data/vendor/kreuzberg-ffi/src/result_view.rs +4 -0
  290. data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
  291. metadata +201 -28
  292. data/vendor/kreuzberg/src/api/server.rs +0 -518
  293. data/vendor/kreuzberg/src/core/config.rs +0 -1914
  294. data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
  295. data/vendor/kreuzberg/src/core/extractor.rs +0 -1200
  296. data/vendor/kreuzberg/src/core/pipeline.rs +0 -1223
  297. data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
  298. data/vendor/kreuzberg/src/extraction/html.rs +0 -1830
  299. data/vendor/kreuzberg/src/extraction/pptx.rs +0 -3102
  300. data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
  301. data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
  302. data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
  303. data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
  304. data/vendor/kreuzberg/src/ocr/processor.rs +0 -858
  305. data/vendor/kreuzberg/src/plugins/extractor.rs +0 -1042
  306. data/vendor/kreuzberg/src/plugins/processor.rs +0 -650
  307. data/vendor/kreuzberg/src/plugins/registry.rs +0 -1339
  308. data/vendor/kreuzberg/src/plugins/validator.rs +0 -967
  309. data/vendor/kreuzberg/src/text/token_reduction/core.rs +0 -832
  310. data/vendor/kreuzberg/src/types.rs +0 -1713
  311. data/vendor/kreuzberg/src/utils/string_pool.rs +0 -762
  312. data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5742beaf511e059ff6d34f536e90a5636097b48cd1262a34eaa7692261de7aa
4
- data.tar.gz: b913234c3ecda47e6e4944181309c2e60c09a8e708eb651cd705258048d3f7bb
3
+ metadata.gz: 0c1c0519fb3a58c45ec553994bd982b4f284835bd35ea0758461f6f381accfd6
4
+ data.tar.gz: 161c18cfabdd20bdaa520abda521cb16072dcc00f5fd2e41152d9da4acdb9d08
5
5
  SHA512:
6
- metadata.gz: 1a459be58b389e806f6c46ffadaf0f01ea095154c3f9265b54bcd1c722493d8a094df41cc5a008bea6ec3cadc2622ecd0379f36a0ae7404f93e2a6c39f44ebfd
7
- data.tar.gz: 31c2d8cb14a9bd72e7fc4fff84f598ec6ecb24c5d982394f642fcc04e09cc74d4280550e10ca1b5d703a0a824fc3c63330d5608c42785a825ea4f46ee3ad8c0a
6
+ metadata.gz: 7c6e1768022dcfdef5eaaaa3557a8388e8ad45158a69ed022d852b07f7658cbb885ca7860fa32eda8f29b5f7ea4216f93033aad77614afb82578b9157ed92710
7
+ data.tar.gz: 96ca3f1f3c6d6f9ea6dc826f7704c1d741ffaab24f524f099c4eb294652211ac6b9f324874974bb24d417193e76b1bce951315330d6f1ccd8eb5b014fa7fc71f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.8)
4
+ kreuzberg (4.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -207,7 +207,7 @@ CHECKSUMS
207
207
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
208
208
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
209
209
  json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
210
- kreuzberg (4.0.8)
210
+ kreuzberg (4.1.1)
211
211
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
212
212
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
213
213
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.0.0" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.1.1" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -307,7 +307,7 @@ dependencies = [
307
307
  "num-traits",
308
308
  "pastey 0.1.1",
309
309
  "rayon",
310
- "thiserror 2.0.17",
310
+ "thiserror 2.0.18",
311
311
  "v_frame",
312
312
  "y4m",
313
313
  ]
@@ -711,14 +711,14 @@ dependencies = [
711
711
  "serde_json",
712
712
  "syn",
713
713
  "tempfile",
714
- "toml 0.9.10+spec-1.1.0",
714
+ "toml 0.9.11+spec-1.1.0",
715
715
  ]
716
716
 
717
717
  [[package]]
718
718
  name = "cc"
719
- version = "1.2.51"
719
+ version = "1.2.53"
720
720
  source = "registry+https://github.com/rust-lang/crates.io-index"
721
- checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
721
+ checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
722
722
  dependencies = [
723
723
  "find-msvc-tools",
724
724
  "jobserver",
@@ -1515,7 +1515,7 @@ dependencies = [
1515
1515
  "lebe",
1516
1516
  "miniz_oxide",
1517
1517
  "rayon-core",
1518
- "smallvec 1.15.1",
1518
+ "smallvec",
1519
1519
  "zune-inflate",
1520
1520
  ]
1521
1521
 
@@ -1544,27 +1544,28 @@ checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
1544
1544
 
1545
1545
  [[package]]
1546
1546
  name = "fast_image_resize"
1547
- version = "5.6.0"
1547
+ version = "6.0.0"
1548
1548
  source = "registry+https://github.com/rust-lang/crates.io-index"
1549
- checksum = "6b6e793dfd0ee192d1999c655797ecc956c82d1f6d367be20bf6b81d6a1c87ac"
1549
+ checksum = "12dd43e5011e8d8411a3215a0d57a2ec5c68282fb90eb5d7221fab0113442174"
1550
1550
  dependencies = [
1551
1551
  "cfg-if",
1552
1552
  "document-features",
1553
1553
  "num-traits",
1554
- "thiserror 2.0.17",
1554
+ "thiserror 2.0.18",
1555
1555
  ]
1556
1556
 
1557
1557
  [[package]]
1558
1558
  name = "fastembed"
1559
- version = "5.7.0"
1559
+ version = "5.8.1"
1560
1560
  source = "registry+https://github.com/rust-lang/crates.io-index"
1561
- checksum = "158bd4a909fb7edd96013796d6b6c57660615a90da81811f2a97bf4ea832d6e4"
1561
+ checksum = "59a3f841f27a44bcc32214f8df75cc9b6cea55dbbebbfe546735690eab5bb2d2"
1562
1562
  dependencies = [
1563
1563
  "anyhow",
1564
1564
  "hf-hub",
1565
- "ndarray 0.16.1",
1565
+ "ndarray",
1566
1566
  "ort",
1567
1567
  "safetensors",
1568
+ "serde",
1568
1569
  "serde_json",
1569
1570
  "tokenizers",
1570
1571
  ]
@@ -1630,9 +1631,9 @@ dependencies = [
1630
1631
 
1631
1632
  [[package]]
1632
1633
  name = "find-msvc-tools"
1633
- version = "0.1.6"
1634
+ version = "0.1.8"
1634
1635
  source = "registry+https://github.com/rust-lang/crates.io-index"
1635
- checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
1636
+ checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
1636
1637
 
1637
1638
  [[package]]
1638
1639
  name = "flate2"
@@ -1992,7 +1993,7 @@ dependencies = [
1992
1993
  "reqwest 0.12.28",
1993
1994
  "serde",
1994
1995
  "serde_json",
1995
- "thiserror 2.0.17",
1996
+ "thiserror 2.0.18",
1996
1997
  "ureq 2.12.1",
1997
1998
  "windows-sys 0.60.2",
1998
1999
  ]
@@ -2026,9 +2027,9 @@ dependencies = [
2026
2027
 
2027
2028
  [[package]]
2028
2029
  name = "html-to-markdown-rs"
2029
- version = "2.22.2"
2030
+ version = "2.23.1"
2030
2031
  source = "registry+https://github.com/rust-lang/crates.io-index"
2031
- checksum = "bc035a13874f15114115e664e096596b9ac54c9938befb5bfc2f2e35a5492e7d"
2032
+ checksum = "e1f4d6781ac8dd203853803d27054ca4153c7fd0f3956cb7fc95dc06f42a1c46"
2032
2033
  dependencies = [
2033
2034
  "astral-tl",
2034
2035
  "base64 0.22.1",
@@ -2041,7 +2042,7 @@ dependencies = [
2041
2042
  "regex",
2042
2043
  "serde",
2043
2044
  "serde_json",
2044
- "thiserror 2.0.17",
2045
+ "thiserror 2.0.18",
2045
2046
  ]
2046
2047
 
2047
2048
  [[package]]
@@ -2135,7 +2136,7 @@ dependencies = [
2135
2136
  "itoa",
2136
2137
  "pin-project-lite",
2137
2138
  "pin-utils",
2138
- "smallvec 1.15.1",
2139
+ "smallvec",
2139
2140
  "tokio",
2140
2141
  "want",
2141
2142
  ]
@@ -2282,7 +2283,7 @@ dependencies = [
2282
2283
  "icu_normalizer_data",
2283
2284
  "icu_properties",
2284
2285
  "icu_provider",
2285
- "smallvec 1.15.1",
2286
+ "smallvec",
2286
2287
  "zerovec",
2287
2288
  ]
2288
2289
 
@@ -2364,7 +2365,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
2364
2365
  checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
2365
2366
  dependencies = [
2366
2367
  "idna_adapter",
2367
- "smallvec 1.15.1",
2368
+ "smallvec",
2368
2369
  "utf8_iter",
2369
2370
  ]
2370
2371
 
@@ -2599,6 +2600,12 @@ dependencies = [
2599
2600
  "libc",
2600
2601
  ]
2601
2602
 
2603
+ [[package]]
2604
+ name = "jotdown"
2605
+ version = "0.9.1"
2606
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2607
+ checksum = "086b08ec7a274cd60cd575ed3651ba081ee72dec0d39a6210e8adcff9efe3880"
2608
+
2602
2609
  [[package]]
2603
2610
  name = "js-sys"
2604
2611
  version = "0.3.83"
@@ -2620,7 +2627,7 @@ dependencies = [
2620
2627
 
2621
2628
  [[package]]
2622
2629
  name = "kreuzberg"
2623
- version = "4.0.0-rc.28"
2630
+ version = "4.1.0"
2624
2631
  dependencies = [
2625
2632
  "ahash",
2626
2633
  "async-trait",
@@ -2644,6 +2651,7 @@ dependencies = [
2644
2651
  "image",
2645
2652
  "indexmap",
2646
2653
  "infer",
2654
+ "jotdown",
2647
2655
  "kamadak-exif",
2648
2656
  "kreuzberg-pdfium-render",
2649
2657
  "kreuzberg-tesseract",
@@ -2655,7 +2663,7 @@ dependencies = [
2655
2663
  "memchr",
2656
2664
  "mime_guess",
2657
2665
  "msg_parser",
2658
- "ndarray 0.17.1",
2666
+ "ndarray",
2659
2667
  "num_cpus",
2660
2668
  "once_cell",
2661
2669
  "opentelemetry",
@@ -2666,7 +2674,7 @@ dependencies = [
2666
2674
  "pkg-config",
2667
2675
  "polars",
2668
2676
  "pulldown-cmark",
2669
- "quick-xml 0.38.4",
2677
+ "quick-xml 0.39.0",
2670
2678
  "rake",
2671
2679
  "rayon",
2672
2680
  "regex",
@@ -2684,10 +2692,10 @@ dependencies = [
2684
2692
  "smartcore",
2685
2693
  "tar",
2686
2694
  "text-splitter",
2687
- "thiserror 2.0.17",
2695
+ "thiserror 2.0.18",
2688
2696
  "tiff 0.11.0",
2689
2697
  "tokio",
2690
- "toml 0.9.10+spec-1.1.0",
2698
+ "toml 0.9.11+spec-1.1.0",
2691
2699
  "tower",
2692
2700
  "tower-http",
2693
2701
  "tracing",
@@ -2698,12 +2706,12 @@ dependencies = [
2698
2706
  "uuid",
2699
2707
  "whatlang",
2700
2708
  "yake-rust",
2701
- "zip 7.0.0",
2709
+ "zip 7.1.0",
2702
2710
  ]
2703
2711
 
2704
2712
  [[package]]
2705
2713
  name = "kreuzberg-ffi"
2706
- version = "4.0.0-rc.28"
2714
+ version = "4.1.0"
2707
2715
  dependencies = [
2708
2716
  "async-trait",
2709
2717
  "cbindgen",
@@ -2752,19 +2760,21 @@ dependencies = [
2752
2760
  "pretty_assertions",
2753
2761
  "rb-sys",
2754
2762
  "serde_json",
2763
+ "serde_yaml_ng",
2755
2764
  "tokio",
2765
+ "toml 0.8.23",
2756
2766
  ]
2757
2767
 
2758
2768
  [[package]]
2759
2769
  name = "kreuzberg-tesseract"
2760
- version = "4.0.0-rc.28"
2770
+ version = "4.1.0"
2761
2771
  dependencies = [
2762
2772
  "cc",
2763
2773
  "cmake",
2764
2774
  "libc",
2765
2775
  "reqwest 0.13.1",
2766
- "thiserror 2.0.17",
2767
- "zip 7.0.0",
2776
+ "thiserror 2.0.18",
2777
+ "zip 7.1.0",
2768
2778
  ]
2769
2779
 
2770
2780
  [[package]]
@@ -2912,9 +2922,9 @@ dependencies = [
2912
2922
 
2913
2923
  [[package]]
2914
2924
  name = "lopdf"
2915
- version = "0.38.0"
2925
+ version = "0.39.0"
2916
2926
  source = "registry+https://github.com/rust-lang/crates.io-index"
2917
- checksum = "c7184fdea2bc3cd272a1acec4030c321a8f9875e877b3f92a53f2f6033fdc289"
2927
+ checksum = "f560f57dfb9142a02d673e137622fd515d4231e51feb8b4af28d92647d83f35b"
2918
2928
  dependencies = [
2919
2929
  "aes",
2920
2930
  "bitflags",
@@ -2936,7 +2946,7 @@ dependencies = [
2936
2946
  "rayon",
2937
2947
  "sha2",
2938
2948
  "stringprep",
2939
- "thiserror 2.0.17",
2949
+ "thiserror 2.0.18",
2940
2950
  "time",
2941
2951
  "ttf-parser",
2942
2952
  "weezl",
@@ -2978,9 +2988,9 @@ dependencies = [
2978
2988
 
2979
2989
  [[package]]
2980
2990
  name = "lzma-rust2"
2981
- version = "0.15.6"
2991
+ version = "0.15.7"
2982
2992
  source = "registry+https://github.com/rust-lang/crates.io-index"
2983
- checksum = "17f7337d278fec032975dc884152491580dd23750ee957047856735fe0e61ede"
2993
+ checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69"
2984
2994
  dependencies = [
2985
2995
  "crc",
2986
2996
  "sha2",
@@ -3255,24 +3265,9 @@ dependencies = [
3255
3265
 
3256
3266
  [[package]]
3257
3267
  name = "ndarray"
3258
- version = "0.16.1"
3268
+ version = "0.17.2"
3259
3269
  source = "registry+https://github.com/rust-lang/crates.io-index"
3260
- checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
3261
- dependencies = [
3262
- "matrixmultiply",
3263
- "num-complex",
3264
- "num-integer",
3265
- "num-traits",
3266
- "portable-atomic",
3267
- "portable-atomic-util",
3268
- "rawpointer",
3269
- ]
3270
-
3271
- [[package]]
3272
- name = "ndarray"
3273
- version = "0.17.1"
3274
- source = "registry+https://github.com/rust-lang/crates.io-index"
3275
- checksum = "0c7c9125e8f6f10c9da3aad044cc918cf8784fa34de857b1aa68038eb05a50a9"
3270
+ checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d"
3276
3271
  dependencies = [
3277
3272
  "matrixmultiply",
3278
3273
  "num-complex",
@@ -3476,7 +3471,7 @@ dependencies = [
3476
3471
  "serde",
3477
3472
  "serde_json",
3478
3473
  "serde_urlencoded",
3479
- "thiserror 2.0.17",
3474
+ "thiserror 2.0.18",
3480
3475
  "tokio",
3481
3476
  "tracing",
3482
3477
  "url",
@@ -3579,7 +3574,7 @@ dependencies = [
3579
3574
  "futures-sink",
3580
3575
  "js-sys",
3581
3576
  "pin-project-lite",
3582
- "thiserror 2.0.17",
3577
+ "thiserror 2.0.18",
3583
3578
  "tracing",
3584
3579
  ]
3585
3580
 
@@ -3595,7 +3590,7 @@ dependencies = [
3595
3590
  "opentelemetry",
3596
3591
  "percent-encoding",
3597
3592
  "rand 0.9.2",
3598
- "thiserror 2.0.17",
3593
+ "thiserror 2.0.18",
3599
3594
  "tokio",
3600
3595
  "tokio-stream",
3601
3596
  ]
@@ -3623,25 +3618,22 @@ checksum = "cfdf547b633735ad9d67353aba48b3e685ab5ffb3195aaa9a1b1d8613e11b98c"
3623
3618
 
3624
3619
  [[package]]
3625
3620
  name = "ort"
3626
- version = "2.0.0-rc.10"
3621
+ version = "2.0.0-rc.11"
3627
3622
  source = "registry+https://github.com/rust-lang/crates.io-index"
3628
- checksum = "1fa7e49bd669d32d7bc2a15ec540a527e7764aec722a45467814005725bcd721"
3623
+ checksum = "4a5df903c0d2c07b56950f1058104ab0c8557159f2741782223704de9be73c3c"
3629
3624
  dependencies = [
3630
- "libloading 0.8.9",
3631
- "ndarray 0.16.1",
3625
+ "libloading 0.9.0",
3626
+ "ndarray",
3632
3627
  "ort-sys",
3633
- "smallvec 2.0.0-alpha.10",
3628
+ "smallvec",
3634
3629
  "tracing",
3635
3630
  ]
3636
3631
 
3637
3632
  [[package]]
3638
3633
  name = "ort-sys"
3639
- version = "2.0.0-rc.10"
3634
+ version = "2.0.0-rc.11"
3640
3635
  source = "registry+https://github.com/rust-lang/crates.io-index"
3641
- checksum = "e2aba9f5c7c479925205799216e7e5d07cc1d4fa76ea8058c60a9a30f6a4e890"
3642
- dependencies = [
3643
- "pkg-config",
3644
- ]
3636
+ checksum = "06503bb33f294c5f1ba484011e053bfa6ae227074bdb841e9863492dc5960d4b"
3645
3637
 
3646
3638
  [[package]]
3647
3639
  name = "outref"
@@ -3674,7 +3666,7 @@ dependencies = [
3674
3666
  "cfg-if",
3675
3667
  "libc",
3676
3668
  "redox_syscall 0.5.18",
3677
- "smallvec 1.15.1",
3669
+ "smallvec",
3678
3670
  "windows-link",
3679
3671
  ]
3680
3672
 
@@ -4536,6 +4528,16 @@ dependencies = [
4536
4528
  "serde",
4537
4529
  ]
4538
4530
 
4531
+ [[package]]
4532
+ name = "quick-xml"
4533
+ version = "0.39.0"
4534
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4535
+ checksum = "f2e3bf4aa9d243beeb01a7b3bc30b77cfe2c44e24ec02d751a7104a53c2c49a1"
4536
+ dependencies = [
4537
+ "memchr",
4538
+ "serde",
4539
+ ]
4540
+
4539
4541
  [[package]]
4540
4542
  name = "quinn"
4541
4543
  version = "0.11.9"
@@ -4550,7 +4552,7 @@ dependencies = [
4550
4552
  "rustc-hash 2.1.1",
4551
4553
  "rustls",
4552
4554
  "socket2",
4553
- "thiserror 2.0.17",
4555
+ "thiserror 2.0.18",
4554
4556
  "tokio",
4555
4557
  "tracing",
4556
4558
  "web-time",
@@ -4572,7 +4574,7 @@ dependencies = [
4572
4574
  "rustls",
4573
4575
  "rustls-pki-types",
4574
4576
  "slab",
4575
- "thiserror 2.0.17",
4577
+ "thiserror 2.0.18",
4576
4578
  "tinyvec",
4577
4579
  "tracing",
4578
4580
  "web-time",
@@ -4714,7 +4716,7 @@ dependencies = [
4714
4716
  "rand 0.9.2",
4715
4717
  "rand_chacha",
4716
4718
  "simd_helpers",
4717
- "thiserror 2.0.17",
4719
+ "thiserror 2.0.18",
4718
4720
  "v_frame",
4719
4721
  "wasm-bindgen",
4720
4722
  ]
@@ -4856,7 +4858,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
4856
4858
  dependencies = [
4857
4859
  "getrandom 0.2.16",
4858
4860
  "libredox",
4859
- "thiserror 2.0.17",
4861
+ "thiserror 2.0.18",
4860
4862
  ]
4861
4863
 
4862
4864
  [[package]]
@@ -5020,9 +5022,9 @@ dependencies = [
5020
5022
 
5021
5023
  [[package]]
5022
5024
  name = "rmcp"
5023
- version = "0.12.0"
5025
+ version = "0.13.0"
5024
5026
  source = "registry+https://github.com/rust-lang/crates.io-index"
5025
- checksum = "528d42f8176e6e5e71ea69182b17d1d0a19a6b3b894b564678b74cd7cab13cfa"
5027
+ checksum = "d1815dbc06c414d720f8bc1951eccd66bc99efc6376331f1e7093a119b3eb508"
5026
5028
  dependencies = [
5027
5029
  "async-trait",
5028
5030
  "axum",
@@ -5041,7 +5043,7 @@ dependencies = [
5041
5043
  "serde",
5042
5044
  "serde_json",
5043
5045
  "sse-stream",
5044
- "thiserror 2.0.17",
5046
+ "thiserror 2.0.18",
5045
5047
  "tokio",
5046
5048
  "tokio-stream",
5047
5049
  "tokio-util",
@@ -5052,9 +5054,9 @@ dependencies = [
5052
5054
 
5053
5055
  [[package]]
5054
5056
  name = "rmcp-macros"
5055
- version = "0.12.0"
5057
+ version = "0.13.0"
5056
5058
  source = "registry+https://github.com/rust-lang/crates.io-index"
5057
- checksum = "e3f81daaa494eb8e985c9462f7d6ce1ab05e5299f48aafd76cdd3d8b060e6f59"
5059
+ checksum = "11f0bc7008fa102e771a76c6d2c9b253be3f2baa5964e060464d038ae1cbc573"
5058
5060
  dependencies = [
5059
5061
  "darling 0.23.0",
5060
5062
  "proc-macro2",
@@ -5236,10 +5238,11 @@ checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
5236
5238
 
5237
5239
  [[package]]
5238
5240
  name = "safetensors"
5239
- version = "0.4.5"
5241
+ version = "0.7.0"
5240
5242
  source = "registry+https://github.com/rust-lang/crates.io-index"
5241
- checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6"
5243
+ checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5"
5242
5244
  dependencies = [
5245
+ "hashbrown 0.16.1",
5243
5246
  "serde",
5244
5247
  "serde_json",
5245
5248
  ]
@@ -5607,12 +5610,6 @@ version = "1.15.1"
5607
5610
  source = "registry+https://github.com/rust-lang/crates.io-index"
5608
5611
  checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
5609
5612
 
5610
- [[package]]
5611
- name = "smallvec"
5612
- version = "2.0.0-alpha.10"
5613
- source = "registry+https://github.com/rust-lang/crates.io-index"
5614
- checksum = "51d44cfb396c3caf6fbfd0ab422af02631b69ddd96d2eff0b0f0724f9024051b"
5615
-
5616
5613
  [[package]]
5617
5614
  name = "smartcore"
5618
5615
  version = "0.4.8"
@@ -5922,7 +5919,7 @@ dependencies = [
5922
5919
  "memchr",
5923
5920
  "pulldown-cmark",
5924
5921
  "strum",
5925
- "thiserror 2.0.17",
5922
+ "thiserror 2.0.18",
5926
5923
  ]
5927
5924
 
5928
5925
  [[package]]
@@ -5942,11 +5939,11 @@ dependencies = [
5942
5939
 
5943
5940
  [[package]]
5944
5941
  name = "thiserror"
5945
- version = "2.0.17"
5942
+ version = "2.0.18"
5946
5943
  source = "registry+https://github.com/rust-lang/crates.io-index"
5947
- checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
5944
+ checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
5948
5945
  dependencies = [
5949
- "thiserror-impl 2.0.17",
5946
+ "thiserror-impl 2.0.18",
5950
5947
  ]
5951
5948
 
5952
5949
  [[package]]
@@ -5962,9 +5959,9 @@ dependencies = [
5962
5959
 
5963
5960
  [[package]]
5964
5961
  name = "thiserror-impl"
5965
- version = "2.0.17"
5962
+ version = "2.0.18"
5966
5963
  source = "registry+https://github.com/rust-lang/crates.io-index"
5967
- checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
5964
+ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
5968
5965
  dependencies = [
5969
5966
  "proc-macro2",
5970
5967
  "quote",
@@ -6092,7 +6089,7 @@ dependencies = [
6092
6089
  "serde",
6093
6090
  "serde_json",
6094
6091
  "spm_precompiled",
6095
- "thiserror 2.0.17",
6092
+ "thiserror 2.0.18",
6096
6093
  "unicode-normalization-alignments",
6097
6094
  "unicode-segmentation",
6098
6095
  "unicode_categories",
@@ -6183,9 +6180,9 @@ dependencies = [
6183
6180
 
6184
6181
  [[package]]
6185
6182
  name = "toml"
6186
- version = "0.9.10+spec-1.1.0"
6183
+ version = "0.9.11+spec-1.1.0"
6187
6184
  source = "registry+https://github.com/rust-lang/crates.io-index"
6188
- checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48"
6185
+ checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
6189
6186
  dependencies = [
6190
6187
  "indexmap",
6191
6188
  "serde_core",
@@ -6351,8 +6348,8 @@ dependencies = [
6351
6348
  "opentelemetry",
6352
6349
  "opentelemetry_sdk",
6353
6350
  "rustversion",
6354
- "smallvec 1.15.1",
6355
- "thiserror 2.0.17",
6351
+ "smallvec",
6352
+ "thiserror 2.0.18",
6356
6353
  "tracing",
6357
6354
  "tracing-core",
6358
6355
  "tracing-log",
@@ -6534,7 +6531,7 @@ version = "0.1.12"
6534
6531
  source = "registry+https://github.com/rust-lang/crates.io-index"
6535
6532
  checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
6536
6533
  dependencies = [
6537
- "smallvec 1.15.1",
6534
+ "smallvec",
6538
6535
  ]
6539
6536
 
6540
6537
  [[package]]
@@ -7501,12 +7498,11 @@ dependencies = [
7501
7498
 
7502
7499
  [[package]]
7503
7500
  name = "zip"
7504
- version = "7.0.0"
7501
+ version = "7.1.0"
7505
7502
  source = "registry+https://github.com/rust-lang/crates.io-index"
7506
- checksum = "bdd8a47718a4ee5fe78e07667cd36f3de80e7c2bfe727c7074245ffc7303c037"
7503
+ checksum = "9013f1222db8a6d680f13a7ccdc60a781199cd09c2fa4eff58e728bb181757fc"
7507
7504
  dependencies = [
7508
7505
  "aes",
7509
- "arbitrary",
7510
7506
  "bzip2",
7511
7507
  "constant_time_eq",
7512
7508
  "crc32fast",
@@ -31,7 +31,7 @@ embeddings = ["kreuzberg/embeddings"]
31
31
 
32
32
  [dependencies]
33
33
  async-trait = "0.1.89"
34
- kreuzberg = { path = "../../../vendor/kreuzberg", default-features = false, features = [
34
+ kreuzberg = { path = "../../../../../crates/kreuzberg", default-features = false, features = [
35
35
  "pdf",
36
36
  "excel",
37
37
  "office",
@@ -51,7 +51,7 @@ kreuzberg = { path = "../../../vendor/kreuzberg", default-features = false, feat
51
51
  "bundled-pdfium",
52
52
  "tokio-runtime",
53
53
  ] }
54
- kreuzberg-ffi = { path = "../../../vendor/kreuzberg-ffi" }
54
+ kreuzberg-ffi = { path = "../../../../../crates/kreuzberg-ffi" }
55
55
  magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = [
56
56
  "rb-sys",
57
57
  ] }
@@ -59,6 +59,8 @@ rb-sys = { version = "0.9.119", default-features = false, features = [
59
59
  "stable-api-compiled-fallback",
60
60
  ] }
61
61
  serde_json = "1.0.145"
62
+ toml = "0.8"
63
+ serde_yaml_ng = "0.10"
62
64
  tokio = { version = "1.48.0", features = [
63
65
  "rt",
64
66
  "rt-multi-thread",