kreuzberg 4.0.8 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +2 -2
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +94 -98
  5. data/ext/kreuzberg_rb/native/Cargo.toml +4 -2
  6. data/ext/kreuzberg_rb/native/src/batch.rs +139 -0
  7. data/ext/kreuzberg_rb/native/src/config/mod.rs +10 -0
  8. data/ext/kreuzberg_rb/native/src/config/types.rs +1058 -0
  9. data/ext/kreuzberg_rb/native/src/error_handling.rs +125 -0
  10. data/ext/kreuzberg_rb/native/src/extraction.rs +79 -0
  11. data/ext/kreuzberg_rb/native/src/gc_guarded_value.rs +35 -0
  12. data/ext/kreuzberg_rb/native/src/helpers.rs +176 -0
  13. data/ext/kreuzberg_rb/native/src/lib.rs +342 -3622
  14. data/ext/kreuzberg_rb/native/src/metadata.rs +34 -0
  15. data/ext/kreuzberg_rb/native/src/plugins/mod.rs +92 -0
  16. data/ext/kreuzberg_rb/native/src/plugins/ocr_backend.rs +159 -0
  17. data/ext/kreuzberg_rb/native/src/plugins/post_processor.rs +126 -0
  18. data/ext/kreuzberg_rb/native/src/plugins/validator.rs +99 -0
  19. data/ext/kreuzberg_rb/native/src/result.rs +326 -0
  20. data/ext/kreuzberg_rb/native/src/validation.rs +4 -0
  21. data/lib/kreuzberg/config.rb +99 -2
  22. data/lib/kreuzberg/result.rb +107 -2
  23. data/lib/kreuzberg/types.rb +104 -0
  24. data/lib/kreuzberg/version.rb +1 -1
  25. data/lib/kreuzberg.rb +0 -4
  26. data/sig/kreuzberg.rbs +105 -1
  27. data/spec/fixtures/config.toml +1 -1
  28. data/spec/fixtures/config.yaml +1 -1
  29. data/vendor/Cargo.toml +3 -3
  30. data/vendor/kreuzberg/Cargo.toml +5 -4
  31. data/vendor/kreuzberg/README.md +1 -1
  32. data/vendor/kreuzberg/src/api/config.rs +69 -0
  33. data/vendor/kreuzberg/src/api/handlers.rs +99 -2
  34. data/vendor/kreuzberg/src/api/mod.rs +14 -7
  35. data/vendor/kreuzberg/src/api/router.rs +214 -0
  36. data/vendor/kreuzberg/src/api/startup.rs +243 -0
  37. data/vendor/kreuzberg/src/api/types.rs +78 -0
  38. data/vendor/kreuzberg/src/cache/cleanup.rs +277 -0
  39. data/vendor/kreuzberg/src/cache/core.rs +428 -0
  40. data/vendor/kreuzberg/src/cache/mod.rs +21 -843
  41. data/vendor/kreuzberg/src/cache/utilities.rs +156 -0
  42. data/vendor/kreuzberg/src/chunking/boundaries.rs +301 -0
  43. data/vendor/kreuzberg/src/chunking/builder.rs +294 -0
  44. data/vendor/kreuzberg/src/chunking/config.rs +52 -0
  45. data/vendor/kreuzberg/src/chunking/core.rs +1017 -0
  46. data/vendor/kreuzberg/src/chunking/mod.rs +14 -2211
  47. data/vendor/kreuzberg/src/chunking/processor.rs +10 -0
  48. data/vendor/kreuzberg/src/chunking/validation.rs +686 -0
  49. data/vendor/kreuzberg/src/core/config/extraction/core.rs +169 -0
  50. data/vendor/kreuzberg/src/core/config/extraction/env.rs +179 -0
  51. data/vendor/kreuzberg/src/core/config/extraction/loaders.rs +204 -0
  52. data/vendor/kreuzberg/src/core/config/extraction/mod.rs +42 -0
  53. data/vendor/kreuzberg/src/core/config/extraction/types.rs +93 -0
  54. data/vendor/kreuzberg/src/core/config/formats.rs +135 -0
  55. data/vendor/kreuzberg/src/core/config/mod.rs +20 -0
  56. data/vendor/kreuzberg/src/core/config/ocr.rs +73 -0
  57. data/vendor/kreuzberg/src/core/config/page.rs +57 -0
  58. data/vendor/kreuzberg/src/core/config/pdf.rs +111 -0
  59. data/vendor/kreuzberg/src/core/config/processing.rs +312 -0
  60. data/vendor/kreuzberg/src/core/config_validation/dependencies.rs +187 -0
  61. data/vendor/kreuzberg/src/core/config_validation/mod.rs +386 -0
  62. data/vendor/kreuzberg/src/core/config_validation/sections.rs +401 -0
  63. data/vendor/kreuzberg/src/core/extractor/batch.rs +246 -0
  64. data/vendor/kreuzberg/src/core/extractor/bytes.rs +116 -0
  65. data/vendor/kreuzberg/src/core/extractor/file.rs +240 -0
  66. data/vendor/kreuzberg/src/core/extractor/helpers.rs +71 -0
  67. data/vendor/kreuzberg/src/core/extractor/legacy.rs +62 -0
  68. data/vendor/kreuzberg/src/core/extractor/mod.rs +490 -0
  69. data/vendor/kreuzberg/src/core/extractor/sync.rs +208 -0
  70. data/vendor/kreuzberg/src/core/mime.rs +15 -0
  71. data/vendor/kreuzberg/src/core/mod.rs +4 -1
  72. data/vendor/kreuzberg/src/core/pipeline/cache.rs +60 -0
  73. data/vendor/kreuzberg/src/core/pipeline/execution.rs +89 -0
  74. data/vendor/kreuzberg/src/core/pipeline/features.rs +108 -0
  75. data/vendor/kreuzberg/src/core/pipeline/format.rs +392 -0
  76. data/vendor/kreuzberg/src/core/pipeline/initialization.rs +67 -0
  77. data/vendor/kreuzberg/src/core/pipeline/mod.rs +135 -0
  78. data/vendor/kreuzberg/src/core/pipeline/tests.rs +975 -0
  79. data/vendor/kreuzberg/src/core/server_config/env.rs +90 -0
  80. data/vendor/kreuzberg/src/core/server_config/loader.rs +202 -0
  81. data/vendor/kreuzberg/src/core/server_config/mod.rs +380 -0
  82. data/vendor/kreuzberg/src/core/server_config/tests/basic_tests.rs +124 -0
  83. data/vendor/kreuzberg/src/core/server_config/tests/env_tests.rs +216 -0
  84. data/vendor/kreuzberg/src/core/server_config/tests/file_loading_tests.rs +341 -0
  85. data/vendor/kreuzberg/src/core/server_config/tests/mod.rs +5 -0
  86. data/vendor/kreuzberg/src/core/server_config/validation.rs +17 -0
  87. data/vendor/kreuzberg/src/embeddings.rs +136 -13
  88. data/vendor/kreuzberg/src/extraction/{archive.rs → archive/mod.rs} +45 -239
  89. data/vendor/kreuzberg/src/extraction/archive/sevenz.rs +98 -0
  90. data/vendor/kreuzberg/src/extraction/archive/tar.rs +118 -0
  91. data/vendor/kreuzberg/src/extraction/archive/zip.rs +101 -0
  92. data/vendor/kreuzberg/src/extraction/html/converter.rs +592 -0
  93. data/vendor/kreuzberg/src/extraction/html/image_handling.rs +95 -0
  94. data/vendor/kreuzberg/src/extraction/html/mod.rs +53 -0
  95. data/vendor/kreuzberg/src/extraction/html/processor.rs +659 -0
  96. data/vendor/kreuzberg/src/extraction/html/stack_management.rs +103 -0
  97. data/vendor/kreuzberg/src/extraction/html/types.rs +28 -0
  98. data/vendor/kreuzberg/src/extraction/mod.rs +6 -2
  99. data/vendor/kreuzberg/src/extraction/pptx/container.rs +159 -0
  100. data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +168 -0
  101. data/vendor/kreuzberg/src/extraction/pptx/elements.rs +132 -0
  102. data/vendor/kreuzberg/src/extraction/pptx/image_handling.rs +57 -0
  103. data/vendor/kreuzberg/src/extraction/pptx/metadata.rs +160 -0
  104. data/vendor/kreuzberg/src/extraction/pptx/mod.rs +558 -0
  105. data/vendor/kreuzberg/src/extraction/pptx/parser.rs +388 -0
  106. data/vendor/kreuzberg/src/extraction/transform/content.rs +205 -0
  107. data/vendor/kreuzberg/src/extraction/transform/elements.rs +211 -0
  108. data/vendor/kreuzberg/src/extraction/transform/mod.rs +480 -0
  109. data/vendor/kreuzberg/src/extraction/transform/types.rs +27 -0
  110. data/vendor/kreuzberg/src/extractors/archive.rs +2 -0
  111. data/vendor/kreuzberg/src/extractors/bibtex.rs +2 -0
  112. data/vendor/kreuzberg/src/extractors/djot_format/attributes.rs +134 -0
  113. data/vendor/kreuzberg/src/extractors/djot_format/conversion.rs +223 -0
  114. data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +172 -0
  115. data/vendor/kreuzberg/src/extractors/djot_format/mod.rs +24 -0
  116. data/vendor/kreuzberg/src/extractors/djot_format/parsing/block_handlers.rs +271 -0
  117. data/vendor/kreuzberg/src/extractors/djot_format/parsing/content_extraction.rs +257 -0
  118. data/vendor/kreuzberg/src/extractors/djot_format/parsing/event_handlers.rs +101 -0
  119. data/vendor/kreuzberg/src/extractors/djot_format/parsing/inline_handlers.rs +201 -0
  120. data/vendor/kreuzberg/src/extractors/djot_format/parsing/mod.rs +16 -0
  121. data/vendor/kreuzberg/src/extractors/djot_format/parsing/state.rs +78 -0
  122. data/vendor/kreuzberg/src/extractors/djot_format/parsing/table_extraction.rs +68 -0
  123. data/vendor/kreuzberg/src/extractors/djot_format/parsing/text_extraction.rs +61 -0
  124. data/vendor/kreuzberg/src/extractors/djot_format/rendering.rs +452 -0
  125. data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
  126. data/vendor/kreuzberg/src/extractors/docx.rs +12 -1
  127. data/vendor/kreuzberg/src/extractors/email.rs +2 -0
  128. data/vendor/kreuzberg/src/extractors/epub/content.rs +333 -0
  129. data/vendor/kreuzberg/src/extractors/epub/metadata.rs +137 -0
  130. data/vendor/kreuzberg/src/extractors/epub/mod.rs +186 -0
  131. data/vendor/kreuzberg/src/extractors/epub/parsing.rs +86 -0
  132. data/vendor/kreuzberg/src/extractors/excel.rs +4 -0
  133. data/vendor/kreuzberg/src/extractors/fictionbook.rs +2 -0
  134. data/vendor/kreuzberg/src/extractors/frontmatter_utils.rs +466 -0
  135. data/vendor/kreuzberg/src/extractors/html.rs +80 -8
  136. data/vendor/kreuzberg/src/extractors/image.rs +8 -1
  137. data/vendor/kreuzberg/src/extractors/jats/elements.rs +350 -0
  138. data/vendor/kreuzberg/src/extractors/jats/metadata.rs +21 -0
  139. data/vendor/kreuzberg/src/extractors/{jats.rs → jats/mod.rs} +10 -412
  140. data/vendor/kreuzberg/src/extractors/jats/parser.rs +52 -0
  141. data/vendor/kreuzberg/src/extractors/jupyter.rs +2 -0
  142. data/vendor/kreuzberg/src/extractors/latex/commands.rs +93 -0
  143. data/vendor/kreuzberg/src/extractors/latex/environments.rs +157 -0
  144. data/vendor/kreuzberg/src/extractors/latex/metadata.rs +27 -0
  145. data/vendor/kreuzberg/src/extractors/latex/mod.rs +146 -0
  146. data/vendor/kreuzberg/src/extractors/latex/parser.rs +231 -0
  147. data/vendor/kreuzberg/src/extractors/latex/utilities.rs +126 -0
  148. data/vendor/kreuzberg/src/extractors/markdown.rs +39 -162
  149. data/vendor/kreuzberg/src/extractors/mod.rs +9 -1
  150. data/vendor/kreuzberg/src/extractors/odt.rs +2 -0
  151. data/vendor/kreuzberg/src/extractors/opml/core.rs +165 -0
  152. data/vendor/kreuzberg/src/extractors/opml/mod.rs +31 -0
  153. data/vendor/kreuzberg/src/extractors/opml/parser.rs +479 -0
  154. data/vendor/kreuzberg/src/extractors/orgmode.rs +2 -0
  155. data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +106 -0
  156. data/vendor/kreuzberg/src/extractors/{pdf.rs → pdf/mod.rs} +25 -324
  157. data/vendor/kreuzberg/src/extractors/pdf/ocr.rs +214 -0
  158. data/vendor/kreuzberg/src/extractors/pdf/pages.rs +51 -0
  159. data/vendor/kreuzberg/src/extractors/pptx.rs +9 -2
  160. data/vendor/kreuzberg/src/extractors/rst.rs +2 -0
  161. data/vendor/kreuzberg/src/extractors/rtf/encoding.rs +116 -0
  162. data/vendor/kreuzberg/src/extractors/rtf/formatting.rs +24 -0
  163. data/vendor/kreuzberg/src/extractors/rtf/images.rs +72 -0
  164. data/vendor/kreuzberg/src/extractors/rtf/metadata.rs +216 -0
  165. data/vendor/kreuzberg/src/extractors/rtf/mod.rs +142 -0
  166. data/vendor/kreuzberg/src/extractors/rtf/parser.rs +259 -0
  167. data/vendor/kreuzberg/src/extractors/rtf/tables.rs +83 -0
  168. data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
  169. data/vendor/kreuzberg/src/extractors/text.rs +4 -0
  170. data/vendor/kreuzberg/src/extractors/typst.rs +2 -0
  171. data/vendor/kreuzberg/src/extractors/xml.rs +2 -0
  172. data/vendor/kreuzberg/src/keywords/processor.rs +14 -0
  173. data/vendor/kreuzberg/src/language_detection/processor.rs +10 -0
  174. data/vendor/kreuzberg/src/lib.rs +2 -2
  175. data/vendor/kreuzberg/src/mcp/errors.rs +312 -0
  176. data/vendor/kreuzberg/src/mcp/format.rs +211 -0
  177. data/vendor/kreuzberg/src/mcp/mod.rs +9 -3
  178. data/vendor/kreuzberg/src/mcp/params.rs +196 -0
  179. data/vendor/kreuzberg/src/mcp/server.rs +39 -1438
  180. data/vendor/kreuzberg/src/mcp/tools/cache.rs +179 -0
  181. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +403 -0
  182. data/vendor/kreuzberg/src/mcp/tools/mime.rs +150 -0
  183. data/vendor/kreuzberg/src/mcp/tools/mod.rs +11 -0
  184. data/vendor/kreuzberg/src/ocr/backends/easyocr.rs +96 -0
  185. data/vendor/kreuzberg/src/ocr/backends/mod.rs +7 -0
  186. data/vendor/kreuzberg/src/ocr/backends/paddleocr.rs +27 -0
  187. data/vendor/kreuzberg/src/ocr/backends/tesseract.rs +134 -0
  188. data/vendor/kreuzberg/src/ocr/hocr.rs +60 -16
  189. data/vendor/kreuzberg/src/ocr/language_registry.rs +11 -235
  190. data/vendor/kreuzberg/src/ocr/mod.rs +1 -0
  191. data/vendor/kreuzberg/src/ocr/processor/config.rs +203 -0
  192. data/vendor/kreuzberg/src/ocr/processor/execution.rs +494 -0
  193. data/vendor/kreuzberg/src/ocr/processor/mod.rs +265 -0
  194. data/vendor/kreuzberg/src/ocr/processor/validation.rs +145 -0
  195. data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +41 -24
  196. data/vendor/kreuzberg/src/pdf/bindings.rs +21 -8
  197. data/vendor/kreuzberg/src/pdf/hierarchy/bounding_box.rs +289 -0
  198. data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +199 -0
  199. data/vendor/kreuzberg/src/pdf/{hierarchy.rs → hierarchy/extraction.rs} +6 -346
  200. data/vendor/kreuzberg/src/pdf/hierarchy/mod.rs +18 -0
  201. data/vendor/kreuzberg/src/plugins/extractor/mod.rs +319 -0
  202. data/vendor/kreuzberg/src/plugins/extractor/registry.rs +434 -0
  203. data/vendor/kreuzberg/src/plugins/extractor/trait.rs +391 -0
  204. data/vendor/kreuzberg/src/plugins/mod.rs +13 -0
  205. data/vendor/kreuzberg/src/plugins/ocr.rs +11 -0
  206. data/vendor/kreuzberg/src/plugins/processor/mod.rs +365 -0
  207. data/vendor/kreuzberg/src/plugins/processor/registry.rs +37 -0
  208. data/vendor/kreuzberg/src/plugins/processor/trait.rs +284 -0
  209. data/vendor/kreuzberg/src/plugins/registry/extractor.rs +416 -0
  210. data/vendor/kreuzberg/src/plugins/registry/mod.rs +116 -0
  211. data/vendor/kreuzberg/src/plugins/registry/ocr.rs +293 -0
  212. data/vendor/kreuzberg/src/plugins/registry/processor.rs +304 -0
  213. data/vendor/kreuzberg/src/plugins/registry/validator.rs +238 -0
  214. data/vendor/kreuzberg/src/plugins/validator/mod.rs +424 -0
  215. data/vendor/kreuzberg/src/plugins/validator/registry.rs +355 -0
  216. data/vendor/kreuzberg/src/plugins/validator/trait.rs +276 -0
  217. data/vendor/kreuzberg/src/stopwords/languages/asian.rs +40 -0
  218. data/vendor/kreuzberg/src/stopwords/languages/germanic.rs +36 -0
  219. data/vendor/kreuzberg/src/stopwords/languages/mod.rs +10 -0
  220. data/vendor/kreuzberg/src/stopwords/languages/other.rs +44 -0
  221. data/vendor/kreuzberg/src/stopwords/languages/romance.rs +36 -0
  222. data/vendor/kreuzberg/src/stopwords/languages/slavic.rs +36 -0
  223. data/vendor/kreuzberg/src/stopwords/mod.rs +7 -33
  224. data/vendor/kreuzberg/src/text/quality.rs +1 -1
  225. data/vendor/kreuzberg/src/text/quality_processor.rs +10 -0
  226. data/vendor/kreuzberg/src/text/token_reduction/core/analysis.rs +238 -0
  227. data/vendor/kreuzberg/src/text/token_reduction/core/mod.rs +8 -0
  228. data/vendor/kreuzberg/src/text/token_reduction/core/punctuation.rs +54 -0
  229. data/vendor/kreuzberg/src/text/token_reduction/core/reducer.rs +384 -0
  230. data/vendor/kreuzberg/src/text/token_reduction/core/sentence_selection.rs +68 -0
  231. data/vendor/kreuzberg/src/text/token_reduction/core/word_filtering.rs +156 -0
  232. data/vendor/kreuzberg/src/text/token_reduction/filters/general.rs +377 -0
  233. data/vendor/kreuzberg/src/text/token_reduction/filters/html.rs +51 -0
  234. data/vendor/kreuzberg/src/text/token_reduction/filters/markdown.rs +285 -0
  235. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +131 -246
  236. data/vendor/kreuzberg/src/types/djot.rs +209 -0
  237. data/vendor/kreuzberg/src/types/extraction.rs +301 -0
  238. data/vendor/kreuzberg/src/types/formats.rs +443 -0
  239. data/vendor/kreuzberg/src/types/metadata.rs +560 -0
  240. data/vendor/kreuzberg/src/types/mod.rs +281 -0
  241. data/vendor/kreuzberg/src/types/page.rs +182 -0
  242. data/vendor/kreuzberg/src/types/serde_helpers.rs +132 -0
  243. data/vendor/kreuzberg/src/types/tables.rs +39 -0
  244. data/vendor/kreuzberg/src/utils/quality/heuristics.rs +58 -0
  245. data/vendor/kreuzberg/src/utils/{quality.rs → quality/mod.rs} +168 -489
  246. data/vendor/kreuzberg/src/utils/quality/patterns.rs +117 -0
  247. data/vendor/kreuzberg/src/utils/quality/scoring.rs +178 -0
  248. data/vendor/kreuzberg/src/utils/string_pool/buffer_pool.rs +325 -0
  249. data/vendor/kreuzberg/src/utils/string_pool/interned.rs +102 -0
  250. data/vendor/kreuzberg/src/utils/string_pool/language_pool.rs +119 -0
  251. data/vendor/kreuzberg/src/utils/string_pool/mime_pool.rs +235 -0
  252. data/vendor/kreuzberg/src/utils/string_pool/mod.rs +41 -0
  253. data/vendor/kreuzberg/tests/api_chunk.rs +313 -0
  254. data/vendor/kreuzberg/tests/api_embed.rs +6 -9
  255. data/vendor/kreuzberg/tests/batch_orchestration.rs +1 -0
  256. data/vendor/kreuzberg/tests/concurrency_stress.rs +7 -0
  257. data/vendor/kreuzberg/tests/core_integration.rs +1 -0
  258. data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +130 -0
  259. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +5 -14
  260. data/vendor/kreuzberg/tests/format_integration.rs +2 -0
  261. data/vendor/kreuzberg/tests/helpers/mod.rs +1 -0
  262. data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
  263. data/vendor/kreuzberg/tests/ocr_configuration.rs +16 -0
  264. data/vendor/kreuzberg/tests/ocr_errors.rs +18 -0
  265. data/vendor/kreuzberg/tests/ocr_quality.rs +9 -0
  266. data/vendor/kreuzberg/tests/ocr_stress.rs +1 -0
  267. data/vendor/kreuzberg/tests/pipeline_integration.rs +50 -0
  268. data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +13 -0
  269. data/vendor/kreuzberg/tests/plugin_system.rs +12 -0
  270. data/vendor/kreuzberg/tests/pptx_regression_tests.rs +504 -0
  271. data/vendor/kreuzberg/tests/registry_integration_tests.rs +2 -0
  272. data/vendor/kreuzberg-ffi/Cargo.toml +2 -1
  273. data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +2 -0
  274. data/vendor/kreuzberg-ffi/kreuzberg.h +347 -178
  275. data/vendor/kreuzberg-ffi/src/config/html.rs +318 -0
  276. data/vendor/kreuzberg-ffi/src/config/loader.rs +154 -0
  277. data/vendor/kreuzberg-ffi/src/config/merge.rs +104 -0
  278. data/vendor/kreuzberg-ffi/src/config/mod.rs +385 -0
  279. data/vendor/kreuzberg-ffi/src/config/parse.rs +91 -0
  280. data/vendor/kreuzberg-ffi/src/config/serialize.rs +118 -0
  281. data/vendor/kreuzberg-ffi/src/config_builder.rs +598 -0
  282. data/vendor/kreuzberg-ffi/src/error.rs +46 -14
  283. data/vendor/kreuzberg-ffi/src/helpers.rs +10 -0
  284. data/vendor/kreuzberg-ffi/src/html_options.rs +421 -0
  285. data/vendor/kreuzberg-ffi/src/lib.rs +16 -0
  286. data/vendor/kreuzberg-ffi/src/panic_shield.rs +11 -0
  287. data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +2 -0
  288. data/vendor/kreuzberg-ffi/src/result.rs +148 -122
  289. data/vendor/kreuzberg-ffi/src/result_view.rs +4 -0
  290. data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
  291. metadata +201 -28
  292. data/vendor/kreuzberg/src/api/server.rs +0 -518
  293. data/vendor/kreuzberg/src/core/config.rs +0 -1914
  294. data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
  295. data/vendor/kreuzberg/src/core/extractor.rs +0 -1200
  296. data/vendor/kreuzberg/src/core/pipeline.rs +0 -1223
  297. data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
  298. data/vendor/kreuzberg/src/extraction/html.rs +0 -1830
  299. data/vendor/kreuzberg/src/extraction/pptx.rs +0 -3102
  300. data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
  301. data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
  302. data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
  303. data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
  304. data/vendor/kreuzberg/src/ocr/processor.rs +0 -858
  305. data/vendor/kreuzberg/src/plugins/extractor.rs +0 -1042
  306. data/vendor/kreuzberg/src/plugins/processor.rs +0 -650
  307. data/vendor/kreuzberg/src/plugins/registry.rs +0 -1339
  308. data/vendor/kreuzberg/src/plugins/validator.rs +0 -967
  309. data/vendor/kreuzberg/src/text/token_reduction/core.rs +0 -832
  310. data/vendor/kreuzberg/src/types.rs +0 -1713
  311. data/vendor/kreuzberg/src/utils/string_pool.rs +0 -762
  312. data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
@@ -3,6 +3,110 @@
3
3
  require 'sorbet-runtime'
4
4
 
5
5
  module Kreuzberg
6
+ # Semantic element type classification.
7
+ #
8
+ # Categorizes text content into semantic units for downstream processing.
9
+ # Supports the element types commonly found in Unstructured documents.
10
+ #
11
+ # @example
12
+ # type = Kreuzberg::ElementType::TITLE
13
+ #
14
+ ElementType = T.type_alias do
15
+ T.any(
16
+ 'title',
17
+ 'narrative_text',
18
+ 'heading',
19
+ 'list_item',
20
+ 'table',
21
+ 'image',
22
+ 'page_break',
23
+ 'code_block',
24
+ 'block_quote',
25
+ 'footer',
26
+ 'header'
27
+ )
28
+ end
29
+
30
+ # Bounding box coordinates for element positioning.
31
+ #
32
+ # Represents rectangular coordinates for an element within a page.
33
+ #
34
+ # @example
35
+ # bbox = Kreuzberg::BoundingBox.new(
36
+ # x0: 10.0,
37
+ # y0: 20.0,
38
+ # x1: 100.0,
39
+ # y1: 50.0
40
+ # )
41
+ # puts "Width: #{bbox.x1 - bbox.x0}"
42
+ #
43
+ class BoundingBox < T::Struct
44
+ extend T::Sig
45
+
46
+ const :x0, Float
47
+
48
+ const :y0, Float
49
+
50
+ const :x1, Float
51
+
52
+ const :y1, Float
53
+ end
54
+
55
+ # Metadata for a semantic element.
56
+ #
57
+ # Provides contextual information about an extracted element including
58
+ # its position within the document and custom metadata fields.
59
+ #
60
+ # @example
61
+ # metadata = Kreuzberg::ElementMetadata.new(
62
+ # page_number: 1,
63
+ # filename: "document.pdf",
64
+ # coordinates: bbox,
65
+ # element_index: 5,
66
+ # additional: { "style" => "bold" }
67
+ # )
68
+ #
69
+ class ElementMetadata < T::Struct
70
+ extend T::Sig
71
+
72
+ const :page_number, T.nilable(Integer)
73
+
74
+ const :filename, T.nilable(String)
75
+
76
+ const :coordinates, T.nilable(BoundingBox)
77
+
78
+ const :element_index, T.nilable(Integer)
79
+
80
+ const :additional, T::Hash[String, String]
81
+ end
82
+
83
+ # Semantic element extracted from document.
84
+ #
85
+ # Represents a logical unit of content with semantic classification,
86
+ # unique identifier, and metadata for tracking origin and position.
87
+ # Compatible with Unstructured.io element format when output_format='element_based'.
88
+ #
89
+ # @example
90
+ # element = Kreuzberg::Element.new(
91
+ # element_id: "elem-abc123",
92
+ # element_type: "narrative_text",
93
+ # text: "This is the main content.",
94
+ # metadata: metadata
95
+ # )
96
+ # puts "#{element.element_type}: #{element.text}"
97
+ #
98
+ class Element < T::Struct
99
+ extend T::Sig
100
+
101
+ const :element_id, String
102
+
103
+ const :element_type, String
104
+
105
+ const :text, String
106
+
107
+ const :metadata, ElementMetadata
108
+ end
109
+
6
110
  # Header/Heading metadata
7
111
  #
8
112
  # Represents a heading element found in the HTML document
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.0.8'
4
+ VERSION = '4.1.1'
5
5
  end
data/lib/kreuzberg.rb CHANGED
@@ -83,10 +83,6 @@ module Kreuzberg
83
83
  module_function :validate_mime_type
84
84
 
85
85
  module_function :get_extensions_for_mime
86
-
87
- module_function :list_embedding_presets
88
-
89
- module_function :get_embedding_preset
90
86
  end
91
87
 
92
88
  require_relative 'kreuzberg/cache_api'
data/sig/kreuzberg.rbs CHANGED
@@ -250,7 +250,56 @@ module Kreuzberg
250
250
  tables: Array[table_hash]?,
251
251
  detected_languages: Array[String]?,
252
252
  chunks: Array[chunk_hash]?,
253
- images: Array[image_hash]?
253
+ images: Array[image_hash]?,
254
+ elements: Array[element_hash]?,
255
+ djot_content: djot_content_hash?
256
+ }
257
+
258
+ type djot_content_hash = {
259
+ plain_text: String,
260
+ blocks: Array[formatted_block_hash],
261
+ metadata_json: String,
262
+ tables: Array[table_hash],
263
+ images: Array[djot_image_hash],
264
+ links: Array[djot_link_hash],
265
+ footnotes: Array[footnote_hash],
266
+ attributes: Hash[String, attributes_hash]?
267
+ }
268
+
269
+ type formatted_block_hash = {
270
+ block_type: String,
271
+ level: Integer?,
272
+ content: String?,
273
+ children: Array[formatted_block_hash]?,
274
+ attributes: attributes_hash?
275
+ }
276
+
277
+ type djot_image_hash = {
278
+ url: String,
279
+ alt: String?,
280
+ title: String?,
281
+ attributes: attributes_hash?
282
+ }
283
+
284
+ type djot_link_hash = {
285
+ url: String,
286
+ text: String,
287
+ title: String?,
288
+ link_type: String?
289
+ }
290
+
291
+ type footnote_hash = {
292
+ label: String,
293
+ content: String
294
+ }
295
+
296
+ type attributes_hash = Hash[String, String | Integer | bool | Array[String] | nil]
297
+
298
+ type element_hash = {
299
+ element_id: String,
300
+ element_type: String,
301
+ text: String,
302
+ metadata: Hash[String, untyped]?
254
303
  }
255
304
 
256
305
  type table_hash = {
@@ -359,6 +408,60 @@ module Kreuzberg
359
408
  def to_h: () -> image_hash
360
409
  end
361
410
 
411
+ # Structured Djot document representation
412
+ class DjotContent
413
+ attr_reader plain_text: String
414
+ attr_reader blocks: Array[DjotContent::FormattedBlock]
415
+ attr_reader metadata: Hash[untyped, untyped]
416
+ attr_reader tables: Array[Table]
417
+ attr_reader images: Array[DjotContent::DjotImage]
418
+ attr_reader links: Array[DjotContent::DjotLink]
419
+ attr_reader footnotes: Array[DjotContent::Footnote]
420
+ attr_reader attributes: Hash[String, untyped]?
421
+
422
+ def initialize: (djot_content_hash hash) -> void
423
+ def to_h: () -> djot_content_hash
424
+
425
+ class FormattedBlock
426
+ attr_reader block_type: String
427
+ attr_reader level: Integer?
428
+ attr_reader content: String?
429
+ attr_reader children: Array[FormattedBlock]?
430
+ attr_reader attributes: Hash[String, untyped]?
431
+
432
+ def initialize: (formatted_block_hash hash) -> void
433
+ def to_h: () -> formatted_block_hash
434
+ end
435
+
436
+ class DjotImage
437
+ attr_reader url: String
438
+ attr_reader alt: String?
439
+ attr_reader title: String?
440
+ attr_reader attributes: Hash[String, untyped]?
441
+
442
+ def initialize: (djot_image_hash hash) -> void
443
+ def to_h: () -> djot_image_hash
444
+ end
445
+
446
+ class DjotLink
447
+ attr_reader url: String
448
+ attr_reader text: String
449
+ attr_reader title: String?
450
+ attr_reader link_type: String?
451
+
452
+ def initialize: (djot_link_hash hash) -> void
453
+ def to_h: () -> djot_link_hash
454
+ end
455
+
456
+ class Footnote
457
+ attr_reader label: String
458
+ attr_reader content: String
459
+
460
+ def initialize: (label: String, content: String) -> void
461
+ def to_h: () -> footnote_hash
462
+ end
463
+ end
464
+
362
465
  attr_reader content: String
363
466
  attr_reader mime_type: String
364
467
  attr_reader metadata: Hash[untyped, untyped]
@@ -367,6 +470,7 @@ module Kreuzberg
367
470
  attr_reader detected_languages: Array[String]?
368
471
  attr_reader chunks: Array[Chunk]?
369
472
  attr_reader images: Array[Image]?
473
+ attr_reader djot_content: DjotContent?
370
474
 
371
475
  def initialize: (extraction_result_hash hash) -> void
372
476
  def to_h: () -> Hash[Symbol, untyped]
@@ -21,7 +21,7 @@ extract_images = true
21
21
  passwords = ["secret", "backup"]
22
22
  extract_metadata = true
23
23
 
24
- [images]
24
+ [image_extraction]
25
25
  extract_images = true
26
26
  target_dpi = 600
27
27
  max_image_dimension = 2000
@@ -23,7 +23,7 @@ pdf_options:
23
23
  - password2
24
24
  extract_metadata: true
25
25
 
26
- images:
26
+ image_extraction:
27
27
  extract_images: true
28
28
  target_dpi: 300
29
29
  max_image_dimension: 4096
data/vendor/Cargo.toml CHANGED
@@ -3,7 +3,7 @@ members = ["kreuzberg", "kreuzberg-tesseract", "kreuzberg-ffi"]
3
3
  resolver = "2"
4
4
 
5
5
  [workspace.package]
6
- version = "4.0.8"
6
+ version = "4.1.1"
7
7
  edition = "2024"
8
8
  rust-version = "1.91"
9
9
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -29,7 +29,7 @@ serde = { version = "1.0.228", features = ["derive"] }
29
29
  serde_json = "1.0.149"
30
30
 
31
31
  # Error handling
32
- thiserror = "2.0.17"
32
+ thiserror = "2.0.18"
33
33
  anyhow = "1.0"
34
34
 
35
35
  # Async utilities
@@ -47,7 +47,7 @@ hex = "0.4.3"
47
47
  toml = "0.9.11"
48
48
  num_cpus = "1.17.0"
49
49
  once_cell = "1.21.3"
50
- html-to-markdown-rs = { version = "2.22.5", default-features = false }
50
+ html-to-markdown-rs = { version = "2.23.4", default-features = false }
51
51
  reqwest = { version = "0.13.1", default-features = false, features = ["json", "rustls"] }
52
52
  image = { version = "0.25.9", default-features = false }
53
53
  lzma-rust2 = { version = "0.15.7" }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.0.8"
3
+ version = "4.1.1"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -136,6 +136,7 @@ regex = "1.12.2"
136
136
  serde = { workspace = true }
137
137
  serde_json = { workspace = true }
138
138
  serde_yaml_ng = "0.10.0"
139
+ jotdown = "0.9"
139
140
  toml = { workspace = true }
140
141
  mime_guess = "2.0"
141
142
  rmp-serde = "1.3"
@@ -152,7 +153,7 @@ lopdf = { version = "0.39.0", optional = true }
152
153
  calamine = { version = "0.32.0", features = ["dates"], optional = true }
153
154
  polars = { version = "0.52.0", default-features = false, features = ["ipc"], optional = true }
154
155
  roxmltree = { version = "0.21.1", optional = true }
155
- zip = { version = "7.1.0", optional = true }
156
+ zip = { version = "7.2.0", optional = true }
156
157
  mail-parser = { version = "0.11.1", optional = true }
157
158
  msg_parser = { version = "0.1.1", optional = true }
158
159
  html-to-markdown-rs = { workspace = true, features = [
@@ -173,7 +174,7 @@ rst_parser = { version = "0.4", optional = true }
173
174
  fb2 = { version = "0.4", optional = true }
174
175
  typst-syntax = { version = "0.14", optional = true }
175
176
 
176
- kreuzberg-tesseract = { path = "../kreuzberg-tesseract", optional = true }
177
+ kreuzberg-tesseract = { path = "../kreuzberg-tesseract", version = "4.1", optional = true }
177
178
  image = { workspace = true, default-features = false, features = [
178
179
  "png",
179
180
  "jpeg",
@@ -215,7 +216,7 @@ smartcore = { version = "0.4", default-features = false, features = ["serde"] }
215
216
  tempfile = { workspace = true }
216
217
  filetime = "0.2"
217
218
  tar = "0.4.44"
218
- zip = "7.1.0"
219
+ zip = "7.2.0"
219
220
  serial_test = "3.3.1"
220
221
  anyhow = { workspace = true }
221
222
  tokio-test = "0.4"
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
17
17
 
18
18
  This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
19
19
 
20
- > **🚀 Version 4.0.0 Release Candidate**
20
+ > **🚀 Version 4.1.1 Release**
21
21
  > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
22
22
  >
23
23
  > **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
@@ -0,0 +1,69 @@
1
+ //! API server configuration loading.
2
+
3
+ use crate::{Result, core::ServerConfig};
4
+
5
+ /// Load ServerConfig with proper precedence order.
6
+ ///
7
+ /// This function implements the configuration hierarchy:
8
+ /// 1. File (if provided)
9
+ /// 2. Environment variables (via apply_env_overrides)
10
+ /// 3. Defaults
11
+ ///
12
+ /// The config file can be in flat format (server settings at root) or nested format
13
+ /// (server settings under [server] section alongside other configs like [ocr]).
14
+ ///
15
+ /// # Arguments
16
+ ///
17
+ /// * `config_path` - Optional path to a ServerConfig file (TOML, YAML, or JSON)
18
+ ///
19
+ /// # Returns
20
+ ///
21
+ /// A configured ServerConfig with proper precedence applied.
22
+ ///
23
+ /// # Errors
24
+ ///
25
+ /// Returns an error if:
26
+ /// - The config file path is provided but cannot be read
27
+ /// - The config file contains invalid server configuration
28
+ /// - Environment variable overrides contain invalid values
29
+ ///
30
+ /// # Examples
31
+ ///
32
+ /// ```no_run
33
+ /// use kreuzberg::api::load_server_config;
34
+ /// use std::path::Path;
35
+ ///
36
+ /// # fn example() -> kreuzberg::Result<()> {
37
+ /// // Load from file with env overrides
38
+ /// let config = load_server_config(Some(Path::new("server.toml")))?;
39
+ ///
40
+ /// // Or use defaults with env overrides
41
+ /// let config = load_server_config(None)?;
42
+ /// # Ok(())
43
+ /// # }
44
+ /// ```
45
+ pub fn load_server_config(config_path: Option<&std::path::Path>) -> Result<ServerConfig> {
46
+ let mut config = if let Some(path) = config_path {
47
+ ServerConfig::from_file(path)?
48
+ } else {
49
+ ServerConfig::default()
50
+ };
51
+
52
+ // Apply environment variable overrides with proper logging
53
+ config.apply_env_overrides()?;
54
+
55
+ tracing::info!(
56
+ "Server configuration loaded: host={}, port={}, request_body_limit={} MB, multipart_field_limit={} MB, CORS={}",
57
+ config.host,
58
+ config.port,
59
+ config.max_request_body_mb(),
60
+ config.max_multipart_field_mb(),
61
+ if config.cors_allows_all() {
62
+ "allow all origins".to_string()
63
+ } else {
64
+ format!("{} specific origins", config.cors_origins.len())
65
+ }
66
+ );
67
+
68
+ Ok(config)
69
+ }
@@ -10,8 +10,8 @@ use crate::{batch_extract_bytes, cache, extract_bytes};
10
10
  use super::{
11
11
  error::ApiError,
12
12
  types::{
13
- ApiState, CacheClearResponse, CacheStatsResponse, EmbedRequest, EmbedResponse, ExtractResponse, HealthResponse,
14
- InfoResponse,
13
+ ApiState, CacheClearResponse, CacheStatsResponse, ChunkRequest, ChunkResponse, EmbedRequest, EmbedResponse,
14
+ ExtractResponse, HealthResponse, InfoResponse,
15
15
  },
16
16
  };
17
17
 
@@ -85,6 +85,25 @@ pub async fn extract_handler(
85
85
  )))
86
86
  })?;
87
87
  }
88
+ "output_format" => {
89
+ let format_str = field
90
+ .text()
91
+ .await
92
+ .map_err(|e| ApiError::validation(crate::error::KreuzbergError::validation(e.to_string())))?;
93
+
94
+ config.output_format = match format_str.to_lowercase().as_str() {
95
+ "plain" => crate::core::config::OutputFormat::Plain,
96
+ "markdown" => crate::core::config::OutputFormat::Markdown,
97
+ "djot" => crate::core::config::OutputFormat::Djot,
98
+ "html" => crate::core::config::OutputFormat::Html,
99
+ _ => {
100
+ return Err(ApiError::validation(crate::error::KreuzbergError::validation(format!(
101
+ "Invalid output_format: '{}'. Valid values: 'plain', 'markdown', 'djot', 'html'",
102
+ format_str
103
+ ))));
104
+ }
105
+ };
106
+ }
88
107
  _ => {}
89
108
  }
90
109
  }
@@ -318,3 +337,81 @@ pub async fn embed_handler(Json(_request): Json<EmbedRequest>) -> Result<Json<Em
318
337
  "Embeddings feature is not enabled. Rebuild with --features embeddings".to_string(),
319
338
  )))
320
339
  }
340
+
341
+ /// Chunk text endpoint handler.
342
+ ///
343
+ /// POST /chunk
344
+ ///
345
+ /// Accepts JSON body with text and optional configuration.
346
+ /// Returns chunks with metadata.
347
+ #[cfg_attr(
348
+ feature = "otel",
349
+ tracing::instrument(
350
+ name = "api.chunk",
351
+ skip(request),
352
+ fields(text_length = request.text.len(), chunker_type = request.chunker_type.as_str())
353
+ )
354
+ )]
355
+ pub async fn chunk_handler(Json(request): Json<ChunkRequest>) -> Result<Json<ChunkResponse>, ApiError> {
356
+ use super::types::{ChunkItem, ChunkingConfigResponse};
357
+ use crate::chunking::{ChunkerType, ChunkingConfig, chunk_text};
358
+
359
+ // Validate input
360
+ if request.text.is_empty() {
361
+ return Err(ApiError::validation(crate::error::KreuzbergError::validation(
362
+ "Text cannot be empty",
363
+ )));
364
+ }
365
+
366
+ // Parse chunker_type
367
+ let chunker_type = match request.chunker_type.to_lowercase().as_str() {
368
+ "text" | "" => ChunkerType::Text,
369
+ "markdown" => ChunkerType::Markdown,
370
+ other => {
371
+ return Err(ApiError::validation(crate::error::KreuzbergError::validation(format!(
372
+ "Invalid chunker_type: '{}'. Valid values: 'text', 'markdown'",
373
+ other
374
+ ))));
375
+ }
376
+ };
377
+
378
+ // Build config with defaults
379
+ let cfg = request.config.unwrap_or_default();
380
+ let config = ChunkingConfig {
381
+ max_characters: cfg.max_characters.unwrap_or(2000),
382
+ overlap: cfg.overlap.unwrap_or(100),
383
+ trim: cfg.trim.unwrap_or(true),
384
+ chunker_type,
385
+ };
386
+
387
+ // Perform chunking
388
+ let result = chunk_text(&request.text, &config, None).map_err(ApiError::internal)?;
389
+
390
+ // Transform to response
391
+ let chunks = result
392
+ .chunks
393
+ .into_iter()
394
+ .map(|chunk| ChunkItem {
395
+ content: chunk.content,
396
+ byte_start: chunk.metadata.byte_start,
397
+ byte_end: chunk.metadata.byte_end,
398
+ chunk_index: chunk.metadata.chunk_index,
399
+ total_chunks: chunk.metadata.total_chunks,
400
+ first_page: chunk.metadata.first_page,
401
+ last_page: chunk.metadata.last_page,
402
+ })
403
+ .collect();
404
+
405
+ Ok(Json(ChunkResponse {
406
+ chunks,
407
+ chunk_count: result.chunk_count,
408
+ config: ChunkingConfigResponse {
409
+ max_characters: config.max_characters,
410
+ overlap: config.overlap,
411
+ trim: config.trim,
412
+ chunker_type: format!("{:?}", config.chunker_type).to_lowercase(),
413
+ },
414
+ input_size_bytes: request.text.len(),
415
+ chunker_type: request.chunker_type.to_lowercase(),
416
+ }))
417
+ }
@@ -7,6 +7,7 @@
7
7
  //!
8
8
  //! - `POST /extract` - Extract text from uploaded files (multipart form data)
9
9
  //! - `POST /embed` - Generate embeddings for text (JSON body with texts array)
10
+ //! - `POST /chunk` - Chunk text into smaller pieces (JSON body with text and config)
10
11
  //! - `GET /health` - Health check endpoint
11
12
  //! - `GET /info` - Server information
12
13
  //! - `GET /cache/stats` - Get cache statistics
@@ -76,19 +77,25 @@
76
77
  //! curl -X POST http://localhost:8000/embed \
77
78
  //! -H "Content-Type: application/json" \
78
79
  //! -d '{"texts":["Hello world","Second text"]}'
80
+ //!
81
+ //! # Chunk text
82
+ //! curl -X POST http://localhost:8000/chunk \
83
+ //! -H "Content-Type: application/json" \
84
+ //! -d '{"text":"Long text to chunk...","chunker_type":"text"}'
79
85
  //! ```
80
86
 
87
+ mod config;
81
88
  mod error;
82
89
  mod handlers;
83
- mod server;
90
+ mod router;
91
+ mod startup;
84
92
  mod types;
85
93
 
94
+ pub use config::load_server_config;
86
95
  pub use error::ApiError;
87
- pub use server::{
88
- create_router, create_router_with_limits, create_router_with_limits_and_server_config, load_server_config, serve,
89
- serve_default, serve_with_config, serve_with_config_and_limits, serve_with_server_config,
90
- };
96
+ pub use router::{create_router, create_router_with_limits, create_router_with_limits_and_server_config};
97
+ pub use startup::{serve, serve_default, serve_with_config, serve_with_config_and_limits, serve_with_server_config};
91
98
  pub use types::{
92
- ApiSizeLimits, ApiState, CacheClearResponse, CacheStatsResponse, EmbedRequest, EmbedResponse, ErrorResponse,
93
- ExtractResponse, HealthResponse, InfoResponse,
99
+ ApiSizeLimits, ApiState, CacheClearResponse, CacheStatsResponse, ChunkRequest, ChunkResponse, EmbedRequest,
100
+ EmbedResponse, ErrorResponse, ExtractResponse, HealthResponse, InfoResponse,
94
101
  };