kreuzberg 4.0.8 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +94 -98
- data/ext/kreuzberg_rb/native/Cargo.toml +4 -2
- data/ext/kreuzberg_rb/native/src/batch.rs +139 -0
- data/ext/kreuzberg_rb/native/src/config/mod.rs +10 -0
- data/ext/kreuzberg_rb/native/src/config/types.rs +1058 -0
- data/ext/kreuzberg_rb/native/src/error_handling.rs +125 -0
- data/ext/kreuzberg_rb/native/src/extraction.rs +79 -0
- data/ext/kreuzberg_rb/native/src/gc_guarded_value.rs +35 -0
- data/ext/kreuzberg_rb/native/src/helpers.rs +176 -0
- data/ext/kreuzberg_rb/native/src/lib.rs +342 -3622
- data/ext/kreuzberg_rb/native/src/metadata.rs +34 -0
- data/ext/kreuzberg_rb/native/src/plugins/mod.rs +92 -0
- data/ext/kreuzberg_rb/native/src/plugins/ocr_backend.rs +159 -0
- data/ext/kreuzberg_rb/native/src/plugins/post_processor.rs +126 -0
- data/ext/kreuzberg_rb/native/src/plugins/validator.rs +99 -0
- data/ext/kreuzberg_rb/native/src/result.rs +326 -0
- data/ext/kreuzberg_rb/native/src/validation.rs +4 -0
- data/lib/kreuzberg/config.rb +66 -0
- data/lib/kreuzberg/result.rb +107 -2
- data/lib/kreuzberg/types.rb +104 -0
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +0 -4
- data/sig/kreuzberg.rbs +105 -1
- data/vendor/Cargo.toml +3 -3
- data/vendor/kreuzberg/Cargo.toml +4 -3
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/api/config.rs +69 -0
- data/vendor/kreuzberg/src/api/handlers.rs +99 -2
- data/vendor/kreuzberg/src/api/mod.rs +14 -7
- data/vendor/kreuzberg/src/api/router.rs +214 -0
- data/vendor/kreuzberg/src/api/startup.rs +243 -0
- data/vendor/kreuzberg/src/api/types.rs +78 -0
- data/vendor/kreuzberg/src/cache/cleanup.rs +277 -0
- data/vendor/kreuzberg/src/cache/core.rs +428 -0
- data/vendor/kreuzberg/src/cache/mod.rs +21 -843
- data/vendor/kreuzberg/src/cache/utilities.rs +156 -0
- data/vendor/kreuzberg/src/chunking/boundaries.rs +301 -0
- data/vendor/kreuzberg/src/chunking/builder.rs +294 -0
- data/vendor/kreuzberg/src/chunking/config.rs +52 -0
- data/vendor/kreuzberg/src/chunking/core.rs +1017 -0
- data/vendor/kreuzberg/src/chunking/mod.rs +14 -2211
- data/vendor/kreuzberg/src/chunking/processor.rs +10 -0
- data/vendor/kreuzberg/src/chunking/validation.rs +686 -0
- data/vendor/kreuzberg/src/core/config/extraction/core.rs +169 -0
- data/vendor/kreuzberg/src/core/config/extraction/env.rs +179 -0
- data/vendor/kreuzberg/src/core/config/extraction/loaders.rs +204 -0
- data/vendor/kreuzberg/src/core/config/extraction/mod.rs +42 -0
- data/vendor/kreuzberg/src/core/config/extraction/types.rs +93 -0
- data/vendor/kreuzberg/src/core/config/formats.rs +135 -0
- data/vendor/kreuzberg/src/core/config/mod.rs +20 -0
- data/vendor/kreuzberg/src/core/config/ocr.rs +73 -0
- data/vendor/kreuzberg/src/core/config/page.rs +57 -0
- data/vendor/kreuzberg/src/core/config/pdf.rs +111 -0
- data/vendor/kreuzberg/src/core/config/processing.rs +312 -0
- data/vendor/kreuzberg/src/core/config_validation/dependencies.rs +187 -0
- data/vendor/kreuzberg/src/core/config_validation/mod.rs +386 -0
- data/vendor/kreuzberg/src/core/config_validation/sections.rs +401 -0
- data/vendor/kreuzberg/src/core/extractor/batch.rs +246 -0
- data/vendor/kreuzberg/src/core/extractor/bytes.rs +116 -0
- data/vendor/kreuzberg/src/core/extractor/file.rs +240 -0
- data/vendor/kreuzberg/src/core/extractor/helpers.rs +71 -0
- data/vendor/kreuzberg/src/core/extractor/legacy.rs +62 -0
- data/vendor/kreuzberg/src/core/extractor/mod.rs +490 -0
- data/vendor/kreuzberg/src/core/extractor/sync.rs +208 -0
- data/vendor/kreuzberg/src/core/mod.rs +4 -1
- data/vendor/kreuzberg/src/core/pipeline/cache.rs +60 -0
- data/vendor/kreuzberg/src/core/pipeline/execution.rs +89 -0
- data/vendor/kreuzberg/src/core/pipeline/features.rs +108 -0
- data/vendor/kreuzberg/src/core/pipeline/format.rs +392 -0
- data/vendor/kreuzberg/src/core/pipeline/initialization.rs +67 -0
- data/vendor/kreuzberg/src/core/pipeline/mod.rs +135 -0
- data/vendor/kreuzberg/src/core/pipeline/tests.rs +975 -0
- data/vendor/kreuzberg/src/core/server_config/env.rs +90 -0
- data/vendor/kreuzberg/src/core/server_config/loader.rs +202 -0
- data/vendor/kreuzberg/src/core/server_config/mod.rs +380 -0
- data/vendor/kreuzberg/src/core/server_config/tests/basic_tests.rs +124 -0
- data/vendor/kreuzberg/src/core/server_config/tests/env_tests.rs +216 -0
- data/vendor/kreuzberg/src/core/server_config/tests/file_loading_tests.rs +341 -0
- data/vendor/kreuzberg/src/core/server_config/tests/mod.rs +5 -0
- data/vendor/kreuzberg/src/core/server_config/validation.rs +17 -0
- data/vendor/kreuzberg/src/embeddings.rs +136 -13
- data/vendor/kreuzberg/src/extraction/{archive.rs → archive/mod.rs} +45 -239
- data/vendor/kreuzberg/src/extraction/archive/sevenz.rs +98 -0
- data/vendor/kreuzberg/src/extraction/archive/tar.rs +118 -0
- data/vendor/kreuzberg/src/extraction/archive/zip.rs +101 -0
- data/vendor/kreuzberg/src/extraction/html/converter.rs +592 -0
- data/vendor/kreuzberg/src/extraction/html/image_handling.rs +95 -0
- data/vendor/kreuzberg/src/extraction/html/mod.rs +53 -0
- data/vendor/kreuzberg/src/extraction/html/processor.rs +659 -0
- data/vendor/kreuzberg/src/extraction/html/stack_management.rs +103 -0
- data/vendor/kreuzberg/src/extraction/html/types.rs +28 -0
- data/vendor/kreuzberg/src/extraction/mod.rs +6 -2
- data/vendor/kreuzberg/src/extraction/pptx/container.rs +159 -0
- data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +168 -0
- data/vendor/kreuzberg/src/extraction/pptx/elements.rs +132 -0
- data/vendor/kreuzberg/src/extraction/pptx/image_handling.rs +57 -0
- data/vendor/kreuzberg/src/extraction/pptx/metadata.rs +160 -0
- data/vendor/kreuzberg/src/extraction/pptx/mod.rs +558 -0
- data/vendor/kreuzberg/src/extraction/pptx/parser.rs +379 -0
- data/vendor/kreuzberg/src/extraction/transform/content.rs +205 -0
- data/vendor/kreuzberg/src/extraction/transform/elements.rs +211 -0
- data/vendor/kreuzberg/src/extraction/transform/mod.rs +480 -0
- data/vendor/kreuzberg/src/extraction/transform/types.rs +27 -0
- data/vendor/kreuzberg/src/extractors/archive.rs +2 -0
- data/vendor/kreuzberg/src/extractors/bibtex.rs +2 -0
- data/vendor/kreuzberg/src/extractors/djot_format/attributes.rs +134 -0
- data/vendor/kreuzberg/src/extractors/djot_format/conversion.rs +223 -0
- data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +172 -0
- data/vendor/kreuzberg/src/extractors/djot_format/mod.rs +24 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/block_handlers.rs +271 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/content_extraction.rs +257 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/event_handlers.rs +101 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/inline_handlers.rs +201 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/mod.rs +16 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/state.rs +78 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/table_extraction.rs +68 -0
- data/vendor/kreuzberg/src/extractors/djot_format/parsing/text_extraction.rs +61 -0
- data/vendor/kreuzberg/src/extractors/djot_format/rendering.rs +452 -0
- data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +12 -1
- data/vendor/kreuzberg/src/extractors/email.rs +2 -0
- data/vendor/kreuzberg/src/extractors/epub/content.rs +333 -0
- data/vendor/kreuzberg/src/extractors/epub/metadata.rs +137 -0
- data/vendor/kreuzberg/src/extractors/epub/mod.rs +186 -0
- data/vendor/kreuzberg/src/extractors/epub/parsing.rs +86 -0
- data/vendor/kreuzberg/src/extractors/excel.rs +4 -0
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +2 -0
- data/vendor/kreuzberg/src/extractors/frontmatter_utils.rs +466 -0
- data/vendor/kreuzberg/src/extractors/html.rs +80 -8
- data/vendor/kreuzberg/src/extractors/image.rs +8 -1
- data/vendor/kreuzberg/src/extractors/jats/elements.rs +350 -0
- data/vendor/kreuzberg/src/extractors/jats/metadata.rs +21 -0
- data/vendor/kreuzberg/src/extractors/{jats.rs → jats/mod.rs} +10 -412
- data/vendor/kreuzberg/src/extractors/jats/parser.rs +52 -0
- data/vendor/kreuzberg/src/extractors/jupyter.rs +2 -0
- data/vendor/kreuzberg/src/extractors/latex/commands.rs +93 -0
- data/vendor/kreuzberg/src/extractors/latex/environments.rs +157 -0
- data/vendor/kreuzberg/src/extractors/latex/metadata.rs +27 -0
- data/vendor/kreuzberg/src/extractors/latex/mod.rs +146 -0
- data/vendor/kreuzberg/src/extractors/latex/parser.rs +231 -0
- data/vendor/kreuzberg/src/extractors/latex/utilities.rs +126 -0
- data/vendor/kreuzberg/src/extractors/markdown.rs +39 -162
- data/vendor/kreuzberg/src/extractors/mod.rs +9 -1
- data/vendor/kreuzberg/src/extractors/odt.rs +2 -0
- data/vendor/kreuzberg/src/extractors/opml/core.rs +165 -0
- data/vendor/kreuzberg/src/extractors/opml/mod.rs +31 -0
- data/vendor/kreuzberg/src/extractors/opml/parser.rs +479 -0
- data/vendor/kreuzberg/src/extractors/orgmode.rs +2 -0
- data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +106 -0
- data/vendor/kreuzberg/src/extractors/{pdf.rs → pdf/mod.rs} +25 -324
- data/vendor/kreuzberg/src/extractors/pdf/ocr.rs +214 -0
- data/vendor/kreuzberg/src/extractors/pdf/pages.rs +51 -0
- data/vendor/kreuzberg/src/extractors/pptx.rs +9 -2
- data/vendor/kreuzberg/src/extractors/rst.rs +2 -0
- data/vendor/kreuzberg/src/extractors/rtf/encoding.rs +116 -0
- data/vendor/kreuzberg/src/extractors/rtf/formatting.rs +24 -0
- data/vendor/kreuzberg/src/extractors/rtf/images.rs +72 -0
- data/vendor/kreuzberg/src/extractors/rtf/metadata.rs +216 -0
- data/vendor/kreuzberg/src/extractors/rtf/mod.rs +142 -0
- data/vendor/kreuzberg/src/extractors/rtf/parser.rs +259 -0
- data/vendor/kreuzberg/src/extractors/rtf/tables.rs +83 -0
- data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
- data/vendor/kreuzberg/src/extractors/text.rs +4 -0
- data/vendor/kreuzberg/src/extractors/typst.rs +2 -0
- data/vendor/kreuzberg/src/extractors/xml.rs +2 -0
- data/vendor/kreuzberg/src/keywords/processor.rs +14 -0
- data/vendor/kreuzberg/src/language_detection/processor.rs +10 -0
- data/vendor/kreuzberg/src/lib.rs +2 -2
- data/vendor/kreuzberg/src/mcp/errors.rs +312 -0
- data/vendor/kreuzberg/src/mcp/format.rs +211 -0
- data/vendor/kreuzberg/src/mcp/mod.rs +9 -3
- data/vendor/kreuzberg/src/mcp/params.rs +196 -0
- data/vendor/kreuzberg/src/mcp/server.rs +39 -1438
- data/vendor/kreuzberg/src/mcp/tools/cache.rs +179 -0
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +403 -0
- data/vendor/kreuzberg/src/mcp/tools/mime.rs +150 -0
- data/vendor/kreuzberg/src/mcp/tools/mod.rs +11 -0
- data/vendor/kreuzberg/src/ocr/backends/easyocr.rs +96 -0
- data/vendor/kreuzberg/src/ocr/backends/mod.rs +7 -0
- data/vendor/kreuzberg/src/ocr/backends/paddleocr.rs +27 -0
- data/vendor/kreuzberg/src/ocr/backends/tesseract.rs +134 -0
- data/vendor/kreuzberg/src/ocr/hocr.rs +60 -16
- data/vendor/kreuzberg/src/ocr/language_registry.rs +11 -235
- data/vendor/kreuzberg/src/ocr/mod.rs +1 -0
- data/vendor/kreuzberg/src/ocr/processor/config.rs +203 -0
- data/vendor/kreuzberg/src/ocr/processor/execution.rs +494 -0
- data/vendor/kreuzberg/src/ocr/processor/mod.rs +265 -0
- data/vendor/kreuzberg/src/ocr/processor/validation.rs +145 -0
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +41 -24
- data/vendor/kreuzberg/src/pdf/bindings.rs +21 -8
- data/vendor/kreuzberg/src/pdf/hierarchy/bounding_box.rs +289 -0
- data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +199 -0
- data/vendor/kreuzberg/src/pdf/{hierarchy.rs → hierarchy/extraction.rs} +6 -346
- data/vendor/kreuzberg/src/pdf/hierarchy/mod.rs +18 -0
- data/vendor/kreuzberg/src/plugins/extractor/mod.rs +319 -0
- data/vendor/kreuzberg/src/plugins/extractor/registry.rs +434 -0
- data/vendor/kreuzberg/src/plugins/extractor/trait.rs +391 -0
- data/vendor/kreuzberg/src/plugins/mod.rs +13 -0
- data/vendor/kreuzberg/src/plugins/ocr.rs +11 -0
- data/vendor/kreuzberg/src/plugins/processor/mod.rs +365 -0
- data/vendor/kreuzberg/src/plugins/processor/registry.rs +37 -0
- data/vendor/kreuzberg/src/plugins/processor/trait.rs +284 -0
- data/vendor/kreuzberg/src/plugins/registry/extractor.rs +416 -0
- data/vendor/kreuzberg/src/plugins/registry/mod.rs +116 -0
- data/vendor/kreuzberg/src/plugins/registry/ocr.rs +293 -0
- data/vendor/kreuzberg/src/plugins/registry/processor.rs +304 -0
- data/vendor/kreuzberg/src/plugins/registry/validator.rs +238 -0
- data/vendor/kreuzberg/src/plugins/validator/mod.rs +424 -0
- data/vendor/kreuzberg/src/plugins/validator/registry.rs +355 -0
- data/vendor/kreuzberg/src/plugins/validator/trait.rs +276 -0
- data/vendor/kreuzberg/src/stopwords/languages/asian.rs +40 -0
- data/vendor/kreuzberg/src/stopwords/languages/germanic.rs +36 -0
- data/vendor/kreuzberg/src/stopwords/languages/mod.rs +10 -0
- data/vendor/kreuzberg/src/stopwords/languages/other.rs +44 -0
- data/vendor/kreuzberg/src/stopwords/languages/romance.rs +36 -0
- data/vendor/kreuzberg/src/stopwords/languages/slavic.rs +36 -0
- data/vendor/kreuzberg/src/stopwords/mod.rs +7 -33
- data/vendor/kreuzberg/src/text/quality.rs +1 -1
- data/vendor/kreuzberg/src/text/quality_processor.rs +10 -0
- data/vendor/kreuzberg/src/text/token_reduction/core/analysis.rs +238 -0
- data/vendor/kreuzberg/src/text/token_reduction/core/mod.rs +8 -0
- data/vendor/kreuzberg/src/text/token_reduction/core/punctuation.rs +54 -0
- data/vendor/kreuzberg/src/text/token_reduction/core/reducer.rs +384 -0
- data/vendor/kreuzberg/src/text/token_reduction/core/sentence_selection.rs +68 -0
- data/vendor/kreuzberg/src/text/token_reduction/core/word_filtering.rs +156 -0
- data/vendor/kreuzberg/src/text/token_reduction/filters/general.rs +377 -0
- data/vendor/kreuzberg/src/text/token_reduction/filters/html.rs +51 -0
- data/vendor/kreuzberg/src/text/token_reduction/filters/markdown.rs +285 -0
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +131 -246
- data/vendor/kreuzberg/src/types/djot.rs +209 -0
- data/vendor/kreuzberg/src/types/extraction.rs +301 -0
- data/vendor/kreuzberg/src/types/formats.rs +443 -0
- data/vendor/kreuzberg/src/types/metadata.rs +560 -0
- data/vendor/kreuzberg/src/types/mod.rs +281 -0
- data/vendor/kreuzberg/src/types/page.rs +182 -0
- data/vendor/kreuzberg/src/types/serde_helpers.rs +132 -0
- data/vendor/kreuzberg/src/types/tables.rs +39 -0
- data/vendor/kreuzberg/src/utils/quality/heuristics.rs +58 -0
- data/vendor/kreuzberg/src/utils/{quality.rs → quality/mod.rs} +168 -489
- data/vendor/kreuzberg/src/utils/quality/patterns.rs +117 -0
- data/vendor/kreuzberg/src/utils/quality/scoring.rs +178 -0
- data/vendor/kreuzberg/src/utils/string_pool/buffer_pool.rs +325 -0
- data/vendor/kreuzberg/src/utils/string_pool/interned.rs +102 -0
- data/vendor/kreuzberg/src/utils/string_pool/language_pool.rs +119 -0
- data/vendor/kreuzberg/src/utils/string_pool/mime_pool.rs +235 -0
- data/vendor/kreuzberg/src/utils/string_pool/mod.rs +41 -0
- data/vendor/kreuzberg/tests/api_chunk.rs +313 -0
- data/vendor/kreuzberg/tests/api_embed.rs +6 -9
- data/vendor/kreuzberg/tests/batch_orchestration.rs +1 -0
- data/vendor/kreuzberg/tests/concurrency_stress.rs +7 -0
- data/vendor/kreuzberg/tests/core_integration.rs +1 -0
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +130 -0
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +5 -14
- data/vendor/kreuzberg/tests/format_integration.rs +2 -0
- data/vendor/kreuzberg/tests/helpers/mod.rs +1 -0
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/ocr_configuration.rs +16 -0
- data/vendor/kreuzberg/tests/ocr_errors.rs +18 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +9 -0
- data/vendor/kreuzberg/tests/ocr_stress.rs +1 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +50 -0
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +13 -0
- data/vendor/kreuzberg/tests/plugin_system.rs +12 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +2 -0
- data/vendor/kreuzberg-ffi/Cargo.toml +2 -1
- data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +2 -0
- data/vendor/kreuzberg-ffi/kreuzberg.h +347 -178
- data/vendor/kreuzberg-ffi/src/config/html.rs +318 -0
- data/vendor/kreuzberg-ffi/src/config/loader.rs +154 -0
- data/vendor/kreuzberg-ffi/src/config/merge.rs +104 -0
- data/vendor/kreuzberg-ffi/src/config/mod.rs +385 -0
- data/vendor/kreuzberg-ffi/src/config/parse.rs +91 -0
- data/vendor/kreuzberg-ffi/src/config/serialize.rs +118 -0
- data/vendor/kreuzberg-ffi/src/config_builder.rs +598 -0
- data/vendor/kreuzberg-ffi/src/error.rs +46 -14
- data/vendor/kreuzberg-ffi/src/helpers.rs +10 -0
- data/vendor/kreuzberg-ffi/src/html_options.rs +421 -0
- data/vendor/kreuzberg-ffi/src/lib.rs +16 -0
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +11 -0
- data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +2 -0
- data/vendor/kreuzberg-ffi/src/result.rs +148 -122
- data/vendor/kreuzberg-ffi/src/result_view.rs +4 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
- metadata +200 -28
- data/vendor/kreuzberg/src/api/server.rs +0 -518
- data/vendor/kreuzberg/src/core/config.rs +0 -1914
- data/vendor/kreuzberg/src/core/config_validation.rs +0 -949
- data/vendor/kreuzberg/src/core/extractor.rs +0 -1200
- data/vendor/kreuzberg/src/core/pipeline.rs +0 -1223
- data/vendor/kreuzberg/src/core/server_config.rs +0 -1220
- data/vendor/kreuzberg/src/extraction/html.rs +0 -1830
- data/vendor/kreuzberg/src/extraction/pptx.rs +0 -3102
- data/vendor/kreuzberg/src/extractors/epub.rs +0 -696
- data/vendor/kreuzberg/src/extractors/latex.rs +0 -653
- data/vendor/kreuzberg/src/extractors/opml.rs +0 -635
- data/vendor/kreuzberg/src/extractors/rtf.rs +0 -809
- data/vendor/kreuzberg/src/ocr/processor.rs +0 -858
- data/vendor/kreuzberg/src/plugins/extractor.rs +0 -1042
- data/vendor/kreuzberg/src/plugins/processor.rs +0 -650
- data/vendor/kreuzberg/src/plugins/registry.rs +0 -1339
- data/vendor/kreuzberg/src/plugins/validator.rs +0 -967
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +0 -832
- data/vendor/kreuzberg/src/types.rs +0 -1713
- data/vendor/kreuzberg/src/utils/string_pool.rs +0 -762
- data/vendor/kreuzberg-ffi/src/config.rs +0 -1341
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: df75a873a0c3547b2e6e44fa1c04e939372f5c01839c1d81ea671414e5c98016
|
|
4
|
+
data.tar.gz: e4f60f88486c2807c6bf45959e6e6160d509ed096236a5d5bf0c70c26cf8f2f8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fac3f1dc6ca132f71f8034536f3ca7b7df53542a760061932e447e5cde3906d61903bb9ade16e96b782de1b879be68c50ee59218c3b0ba908d26d515d90d4966
|
|
7
|
+
data.tar.gz: 79b7ab92c373b7a06d06fe324b3d06ad2a588c9e993a2c8cb23c474e3452d82914d2ac48eb9edc1b9475cbf337b4e09a160c242d3510b33364d7f511d264e36f
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.0
|
|
4
|
+
kreuzberg (4.1.0)
|
|
5
5
|
|
|
6
6
|
GEM
|
|
7
7
|
remote: https://rubygems.org/
|
|
@@ -207,7 +207,7 @@ CHECKSUMS
|
|
|
207
207
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
208
208
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
209
209
|
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
210
|
-
kreuzberg (4.0
|
|
210
|
+
kreuzberg (4.1.0)
|
|
211
211
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
212
212
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
213
213
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.1.0" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -307,7 +307,7 @@ dependencies = [
|
|
|
307
307
|
"num-traits",
|
|
308
308
|
"pastey 0.1.1",
|
|
309
309
|
"rayon",
|
|
310
|
-
"thiserror 2.0.
|
|
310
|
+
"thiserror 2.0.18",
|
|
311
311
|
"v_frame",
|
|
312
312
|
"y4m",
|
|
313
313
|
]
|
|
@@ -711,14 +711,14 @@ dependencies = [
|
|
|
711
711
|
"serde_json",
|
|
712
712
|
"syn",
|
|
713
713
|
"tempfile",
|
|
714
|
-
"toml 0.9.
|
|
714
|
+
"toml 0.9.11+spec-1.1.0",
|
|
715
715
|
]
|
|
716
716
|
|
|
717
717
|
[[package]]
|
|
718
718
|
name = "cc"
|
|
719
|
-
version = "1.2.
|
|
719
|
+
version = "1.2.53"
|
|
720
720
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
721
|
-
checksum = "
|
|
721
|
+
checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
|
|
722
722
|
dependencies = [
|
|
723
723
|
"find-msvc-tools",
|
|
724
724
|
"jobserver",
|
|
@@ -1515,7 +1515,7 @@ dependencies = [
|
|
|
1515
1515
|
"lebe",
|
|
1516
1516
|
"miniz_oxide",
|
|
1517
1517
|
"rayon-core",
|
|
1518
|
-
"smallvec
|
|
1518
|
+
"smallvec",
|
|
1519
1519
|
"zune-inflate",
|
|
1520
1520
|
]
|
|
1521
1521
|
|
|
@@ -1544,27 +1544,28 @@ checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
|
|
|
1544
1544
|
|
|
1545
1545
|
[[package]]
|
|
1546
1546
|
name = "fast_image_resize"
|
|
1547
|
-
version = "
|
|
1547
|
+
version = "6.0.0"
|
|
1548
1548
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1549
|
-
checksum = "
|
|
1549
|
+
checksum = "12dd43e5011e8d8411a3215a0d57a2ec5c68282fb90eb5d7221fab0113442174"
|
|
1550
1550
|
dependencies = [
|
|
1551
1551
|
"cfg-if",
|
|
1552
1552
|
"document-features",
|
|
1553
1553
|
"num-traits",
|
|
1554
|
-
"thiserror 2.0.
|
|
1554
|
+
"thiserror 2.0.18",
|
|
1555
1555
|
]
|
|
1556
1556
|
|
|
1557
1557
|
[[package]]
|
|
1558
1558
|
name = "fastembed"
|
|
1559
|
-
version = "5.
|
|
1559
|
+
version = "5.8.1"
|
|
1560
1560
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1561
|
-
checksum = "
|
|
1561
|
+
checksum = "59a3f841f27a44bcc32214f8df75cc9b6cea55dbbebbfe546735690eab5bb2d2"
|
|
1562
1562
|
dependencies = [
|
|
1563
1563
|
"anyhow",
|
|
1564
1564
|
"hf-hub",
|
|
1565
|
-
"ndarray
|
|
1565
|
+
"ndarray",
|
|
1566
1566
|
"ort",
|
|
1567
1567
|
"safetensors",
|
|
1568
|
+
"serde",
|
|
1568
1569
|
"serde_json",
|
|
1569
1570
|
"tokenizers",
|
|
1570
1571
|
]
|
|
@@ -1630,9 +1631,9 @@ dependencies = [
|
|
|
1630
1631
|
|
|
1631
1632
|
[[package]]
|
|
1632
1633
|
name = "find-msvc-tools"
|
|
1633
|
-
version = "0.1.
|
|
1634
|
+
version = "0.1.8"
|
|
1634
1635
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1635
|
-
checksum = "
|
|
1636
|
+
checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
|
|
1636
1637
|
|
|
1637
1638
|
[[package]]
|
|
1638
1639
|
name = "flate2"
|
|
@@ -1992,7 +1993,7 @@ dependencies = [
|
|
|
1992
1993
|
"reqwest 0.12.28",
|
|
1993
1994
|
"serde",
|
|
1994
1995
|
"serde_json",
|
|
1995
|
-
"thiserror 2.0.
|
|
1996
|
+
"thiserror 2.0.18",
|
|
1996
1997
|
"ureq 2.12.1",
|
|
1997
1998
|
"windows-sys 0.60.2",
|
|
1998
1999
|
]
|
|
@@ -2026,9 +2027,9 @@ dependencies = [
|
|
|
2026
2027
|
|
|
2027
2028
|
[[package]]
|
|
2028
2029
|
name = "html-to-markdown-rs"
|
|
2029
|
-
version = "2.
|
|
2030
|
+
version = "2.23.1"
|
|
2030
2031
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2031
|
-
checksum = "
|
|
2032
|
+
checksum = "e1f4d6781ac8dd203853803d27054ca4153c7fd0f3956cb7fc95dc06f42a1c46"
|
|
2032
2033
|
dependencies = [
|
|
2033
2034
|
"astral-tl",
|
|
2034
2035
|
"base64 0.22.1",
|
|
@@ -2041,7 +2042,7 @@ dependencies = [
|
|
|
2041
2042
|
"regex",
|
|
2042
2043
|
"serde",
|
|
2043
2044
|
"serde_json",
|
|
2044
|
-
"thiserror 2.0.
|
|
2045
|
+
"thiserror 2.0.18",
|
|
2045
2046
|
]
|
|
2046
2047
|
|
|
2047
2048
|
[[package]]
|
|
@@ -2135,7 +2136,7 @@ dependencies = [
|
|
|
2135
2136
|
"itoa",
|
|
2136
2137
|
"pin-project-lite",
|
|
2137
2138
|
"pin-utils",
|
|
2138
|
-
"smallvec
|
|
2139
|
+
"smallvec",
|
|
2139
2140
|
"tokio",
|
|
2140
2141
|
"want",
|
|
2141
2142
|
]
|
|
@@ -2282,7 +2283,7 @@ dependencies = [
|
|
|
2282
2283
|
"icu_normalizer_data",
|
|
2283
2284
|
"icu_properties",
|
|
2284
2285
|
"icu_provider",
|
|
2285
|
-
"smallvec
|
|
2286
|
+
"smallvec",
|
|
2286
2287
|
"zerovec",
|
|
2287
2288
|
]
|
|
2288
2289
|
|
|
@@ -2364,7 +2365,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
2364
2365
|
checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
|
|
2365
2366
|
dependencies = [
|
|
2366
2367
|
"idna_adapter",
|
|
2367
|
-
"smallvec
|
|
2368
|
+
"smallvec",
|
|
2368
2369
|
"utf8_iter",
|
|
2369
2370
|
]
|
|
2370
2371
|
|
|
@@ -2599,6 +2600,12 @@ dependencies = [
|
|
|
2599
2600
|
"libc",
|
|
2600
2601
|
]
|
|
2601
2602
|
|
|
2603
|
+
[[package]]
|
|
2604
|
+
name = "jotdown"
|
|
2605
|
+
version = "0.9.1"
|
|
2606
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2607
|
+
checksum = "086b08ec7a274cd60cd575ed3651ba081ee72dec0d39a6210e8adcff9efe3880"
|
|
2608
|
+
|
|
2602
2609
|
[[package]]
|
|
2603
2610
|
name = "js-sys"
|
|
2604
2611
|
version = "0.3.83"
|
|
@@ -2620,7 +2627,7 @@ dependencies = [
|
|
|
2620
2627
|
|
|
2621
2628
|
[[package]]
|
|
2622
2629
|
name = "kreuzberg"
|
|
2623
|
-
version = "4.
|
|
2630
|
+
version = "4.1.0"
|
|
2624
2631
|
dependencies = [
|
|
2625
2632
|
"ahash",
|
|
2626
2633
|
"async-trait",
|
|
@@ -2644,6 +2651,7 @@ dependencies = [
|
|
|
2644
2651
|
"image",
|
|
2645
2652
|
"indexmap",
|
|
2646
2653
|
"infer",
|
|
2654
|
+
"jotdown",
|
|
2647
2655
|
"kamadak-exif",
|
|
2648
2656
|
"kreuzberg-pdfium-render",
|
|
2649
2657
|
"kreuzberg-tesseract",
|
|
@@ -2655,7 +2663,7 @@ dependencies = [
|
|
|
2655
2663
|
"memchr",
|
|
2656
2664
|
"mime_guess",
|
|
2657
2665
|
"msg_parser",
|
|
2658
|
-
"ndarray
|
|
2666
|
+
"ndarray",
|
|
2659
2667
|
"num_cpus",
|
|
2660
2668
|
"once_cell",
|
|
2661
2669
|
"opentelemetry",
|
|
@@ -2666,7 +2674,7 @@ dependencies = [
|
|
|
2666
2674
|
"pkg-config",
|
|
2667
2675
|
"polars",
|
|
2668
2676
|
"pulldown-cmark",
|
|
2669
|
-
"quick-xml 0.
|
|
2677
|
+
"quick-xml 0.39.0",
|
|
2670
2678
|
"rake",
|
|
2671
2679
|
"rayon",
|
|
2672
2680
|
"regex",
|
|
@@ -2684,10 +2692,10 @@ dependencies = [
|
|
|
2684
2692
|
"smartcore",
|
|
2685
2693
|
"tar",
|
|
2686
2694
|
"text-splitter",
|
|
2687
|
-
"thiserror 2.0.
|
|
2695
|
+
"thiserror 2.0.18",
|
|
2688
2696
|
"tiff 0.11.0",
|
|
2689
2697
|
"tokio",
|
|
2690
|
-
"toml 0.9.
|
|
2698
|
+
"toml 0.9.11+spec-1.1.0",
|
|
2691
2699
|
"tower",
|
|
2692
2700
|
"tower-http",
|
|
2693
2701
|
"tracing",
|
|
@@ -2698,12 +2706,12 @@ dependencies = [
|
|
|
2698
2706
|
"uuid",
|
|
2699
2707
|
"whatlang",
|
|
2700
2708
|
"yake-rust",
|
|
2701
|
-
"zip 7.
|
|
2709
|
+
"zip 7.1.0",
|
|
2702
2710
|
]
|
|
2703
2711
|
|
|
2704
2712
|
[[package]]
|
|
2705
2713
|
name = "kreuzberg-ffi"
|
|
2706
|
-
version = "4.
|
|
2714
|
+
version = "4.1.0"
|
|
2707
2715
|
dependencies = [
|
|
2708
2716
|
"async-trait",
|
|
2709
2717
|
"cbindgen",
|
|
@@ -2752,19 +2760,21 @@ dependencies = [
|
|
|
2752
2760
|
"pretty_assertions",
|
|
2753
2761
|
"rb-sys",
|
|
2754
2762
|
"serde_json",
|
|
2763
|
+
"serde_yaml_ng",
|
|
2755
2764
|
"tokio",
|
|
2765
|
+
"toml 0.8.23",
|
|
2756
2766
|
]
|
|
2757
2767
|
|
|
2758
2768
|
[[package]]
|
|
2759
2769
|
name = "kreuzberg-tesseract"
|
|
2760
|
-
version = "4.
|
|
2770
|
+
version = "4.1.0"
|
|
2761
2771
|
dependencies = [
|
|
2762
2772
|
"cc",
|
|
2763
2773
|
"cmake",
|
|
2764
2774
|
"libc",
|
|
2765
2775
|
"reqwest 0.13.1",
|
|
2766
|
-
"thiserror 2.0.
|
|
2767
|
-
"zip 7.
|
|
2776
|
+
"thiserror 2.0.18",
|
|
2777
|
+
"zip 7.1.0",
|
|
2768
2778
|
]
|
|
2769
2779
|
|
|
2770
2780
|
[[package]]
|
|
@@ -2912,9 +2922,9 @@ dependencies = [
|
|
|
2912
2922
|
|
|
2913
2923
|
[[package]]
|
|
2914
2924
|
name = "lopdf"
|
|
2915
|
-
version = "0.
|
|
2925
|
+
version = "0.39.0"
|
|
2916
2926
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2917
|
-
checksum = "
|
|
2927
|
+
checksum = "f560f57dfb9142a02d673e137622fd515d4231e51feb8b4af28d92647d83f35b"
|
|
2918
2928
|
dependencies = [
|
|
2919
2929
|
"aes",
|
|
2920
2930
|
"bitflags",
|
|
@@ -2936,7 +2946,7 @@ dependencies = [
|
|
|
2936
2946
|
"rayon",
|
|
2937
2947
|
"sha2",
|
|
2938
2948
|
"stringprep",
|
|
2939
|
-
"thiserror 2.0.
|
|
2949
|
+
"thiserror 2.0.18",
|
|
2940
2950
|
"time",
|
|
2941
2951
|
"ttf-parser",
|
|
2942
2952
|
"weezl",
|
|
@@ -2978,9 +2988,9 @@ dependencies = [
|
|
|
2978
2988
|
|
|
2979
2989
|
[[package]]
|
|
2980
2990
|
name = "lzma-rust2"
|
|
2981
|
-
version = "0.15.
|
|
2991
|
+
version = "0.15.7"
|
|
2982
2992
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2983
|
-
checksum = "
|
|
2993
|
+
checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69"
|
|
2984
2994
|
dependencies = [
|
|
2985
2995
|
"crc",
|
|
2986
2996
|
"sha2",
|
|
@@ -3255,24 +3265,9 @@ dependencies = [
|
|
|
3255
3265
|
|
|
3256
3266
|
[[package]]
|
|
3257
3267
|
name = "ndarray"
|
|
3258
|
-
version = "0.
|
|
3268
|
+
version = "0.17.2"
|
|
3259
3269
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3260
|
-
checksum = "
|
|
3261
|
-
dependencies = [
|
|
3262
|
-
"matrixmultiply",
|
|
3263
|
-
"num-complex",
|
|
3264
|
-
"num-integer",
|
|
3265
|
-
"num-traits",
|
|
3266
|
-
"portable-atomic",
|
|
3267
|
-
"portable-atomic-util",
|
|
3268
|
-
"rawpointer",
|
|
3269
|
-
]
|
|
3270
|
-
|
|
3271
|
-
[[package]]
|
|
3272
|
-
name = "ndarray"
|
|
3273
|
-
version = "0.17.1"
|
|
3274
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3275
|
-
checksum = "0c7c9125e8f6f10c9da3aad044cc918cf8784fa34de857b1aa68038eb05a50a9"
|
|
3270
|
+
checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d"
|
|
3276
3271
|
dependencies = [
|
|
3277
3272
|
"matrixmultiply",
|
|
3278
3273
|
"num-complex",
|
|
@@ -3476,7 +3471,7 @@ dependencies = [
|
|
|
3476
3471
|
"serde",
|
|
3477
3472
|
"serde_json",
|
|
3478
3473
|
"serde_urlencoded",
|
|
3479
|
-
"thiserror 2.0.
|
|
3474
|
+
"thiserror 2.0.18",
|
|
3480
3475
|
"tokio",
|
|
3481
3476
|
"tracing",
|
|
3482
3477
|
"url",
|
|
@@ -3579,7 +3574,7 @@ dependencies = [
|
|
|
3579
3574
|
"futures-sink",
|
|
3580
3575
|
"js-sys",
|
|
3581
3576
|
"pin-project-lite",
|
|
3582
|
-
"thiserror 2.0.
|
|
3577
|
+
"thiserror 2.0.18",
|
|
3583
3578
|
"tracing",
|
|
3584
3579
|
]
|
|
3585
3580
|
|
|
@@ -3595,7 +3590,7 @@ dependencies = [
|
|
|
3595
3590
|
"opentelemetry",
|
|
3596
3591
|
"percent-encoding",
|
|
3597
3592
|
"rand 0.9.2",
|
|
3598
|
-
"thiserror 2.0.
|
|
3593
|
+
"thiserror 2.0.18",
|
|
3599
3594
|
"tokio",
|
|
3600
3595
|
"tokio-stream",
|
|
3601
3596
|
]
|
|
@@ -3623,25 +3618,22 @@ checksum = "cfdf547b633735ad9d67353aba48b3e685ab5ffb3195aaa9a1b1d8613e11b98c"
|
|
|
3623
3618
|
|
|
3624
3619
|
[[package]]
|
|
3625
3620
|
name = "ort"
|
|
3626
|
-
version = "2.0.0-rc.
|
|
3621
|
+
version = "2.0.0-rc.11"
|
|
3627
3622
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3628
|
-
checksum = "
|
|
3623
|
+
checksum = "4a5df903c0d2c07b56950f1058104ab0c8557159f2741782223704de9be73c3c"
|
|
3629
3624
|
dependencies = [
|
|
3630
|
-
"libloading 0.
|
|
3631
|
-
"ndarray
|
|
3625
|
+
"libloading 0.9.0",
|
|
3626
|
+
"ndarray",
|
|
3632
3627
|
"ort-sys",
|
|
3633
|
-
"smallvec
|
|
3628
|
+
"smallvec",
|
|
3634
3629
|
"tracing",
|
|
3635
3630
|
]
|
|
3636
3631
|
|
|
3637
3632
|
[[package]]
|
|
3638
3633
|
name = "ort-sys"
|
|
3639
|
-
version = "2.0.0-rc.
|
|
3634
|
+
version = "2.0.0-rc.11"
|
|
3640
3635
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3641
|
-
checksum = "
|
|
3642
|
-
dependencies = [
|
|
3643
|
-
"pkg-config",
|
|
3644
|
-
]
|
|
3636
|
+
checksum = "06503bb33f294c5f1ba484011e053bfa6ae227074bdb841e9863492dc5960d4b"
|
|
3645
3637
|
|
|
3646
3638
|
[[package]]
|
|
3647
3639
|
name = "outref"
|
|
@@ -3674,7 +3666,7 @@ dependencies = [
|
|
|
3674
3666
|
"cfg-if",
|
|
3675
3667
|
"libc",
|
|
3676
3668
|
"redox_syscall 0.5.18",
|
|
3677
|
-
"smallvec
|
|
3669
|
+
"smallvec",
|
|
3678
3670
|
"windows-link",
|
|
3679
3671
|
]
|
|
3680
3672
|
|
|
@@ -4536,6 +4528,16 @@ dependencies = [
|
|
|
4536
4528
|
"serde",
|
|
4537
4529
|
]
|
|
4538
4530
|
|
|
4531
|
+
[[package]]
|
|
4532
|
+
name = "quick-xml"
|
|
4533
|
+
version = "0.39.0"
|
|
4534
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4535
|
+
checksum = "f2e3bf4aa9d243beeb01a7b3bc30b77cfe2c44e24ec02d751a7104a53c2c49a1"
|
|
4536
|
+
dependencies = [
|
|
4537
|
+
"memchr",
|
|
4538
|
+
"serde",
|
|
4539
|
+
]
|
|
4540
|
+
|
|
4539
4541
|
[[package]]
|
|
4540
4542
|
name = "quinn"
|
|
4541
4543
|
version = "0.11.9"
|
|
@@ -4550,7 +4552,7 @@ dependencies = [
|
|
|
4550
4552
|
"rustc-hash 2.1.1",
|
|
4551
4553
|
"rustls",
|
|
4552
4554
|
"socket2",
|
|
4553
|
-
"thiserror 2.0.
|
|
4555
|
+
"thiserror 2.0.18",
|
|
4554
4556
|
"tokio",
|
|
4555
4557
|
"tracing",
|
|
4556
4558
|
"web-time",
|
|
@@ -4572,7 +4574,7 @@ dependencies = [
|
|
|
4572
4574
|
"rustls",
|
|
4573
4575
|
"rustls-pki-types",
|
|
4574
4576
|
"slab",
|
|
4575
|
-
"thiserror 2.0.
|
|
4577
|
+
"thiserror 2.0.18",
|
|
4576
4578
|
"tinyvec",
|
|
4577
4579
|
"tracing",
|
|
4578
4580
|
"web-time",
|
|
@@ -4714,7 +4716,7 @@ dependencies = [
|
|
|
4714
4716
|
"rand 0.9.2",
|
|
4715
4717
|
"rand_chacha",
|
|
4716
4718
|
"simd_helpers",
|
|
4717
|
-
"thiserror 2.0.
|
|
4719
|
+
"thiserror 2.0.18",
|
|
4718
4720
|
"v_frame",
|
|
4719
4721
|
"wasm-bindgen",
|
|
4720
4722
|
]
|
|
@@ -4856,7 +4858,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
|
|
|
4856
4858
|
dependencies = [
|
|
4857
4859
|
"getrandom 0.2.16",
|
|
4858
4860
|
"libredox",
|
|
4859
|
-
"thiserror 2.0.
|
|
4861
|
+
"thiserror 2.0.18",
|
|
4860
4862
|
]
|
|
4861
4863
|
|
|
4862
4864
|
[[package]]
|
|
@@ -5020,9 +5022,9 @@ dependencies = [
|
|
|
5020
5022
|
|
|
5021
5023
|
[[package]]
|
|
5022
5024
|
name = "rmcp"
|
|
5023
|
-
version = "0.
|
|
5025
|
+
version = "0.13.0"
|
|
5024
5026
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5025
|
-
checksum = "
|
|
5027
|
+
checksum = "d1815dbc06c414d720f8bc1951eccd66bc99efc6376331f1e7093a119b3eb508"
|
|
5026
5028
|
dependencies = [
|
|
5027
5029
|
"async-trait",
|
|
5028
5030
|
"axum",
|
|
@@ -5041,7 +5043,7 @@ dependencies = [
|
|
|
5041
5043
|
"serde",
|
|
5042
5044
|
"serde_json",
|
|
5043
5045
|
"sse-stream",
|
|
5044
|
-
"thiserror 2.0.
|
|
5046
|
+
"thiserror 2.0.18",
|
|
5045
5047
|
"tokio",
|
|
5046
5048
|
"tokio-stream",
|
|
5047
5049
|
"tokio-util",
|
|
@@ -5052,9 +5054,9 @@ dependencies = [
|
|
|
5052
5054
|
|
|
5053
5055
|
[[package]]
|
|
5054
5056
|
name = "rmcp-macros"
|
|
5055
|
-
version = "0.
|
|
5057
|
+
version = "0.13.0"
|
|
5056
5058
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5057
|
-
checksum = "
|
|
5059
|
+
checksum = "11f0bc7008fa102e771a76c6d2c9b253be3f2baa5964e060464d038ae1cbc573"
|
|
5058
5060
|
dependencies = [
|
|
5059
5061
|
"darling 0.23.0",
|
|
5060
5062
|
"proc-macro2",
|
|
@@ -5236,10 +5238,11 @@ checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
|
|
|
5236
5238
|
|
|
5237
5239
|
[[package]]
|
|
5238
5240
|
name = "safetensors"
|
|
5239
|
-
version = "0.
|
|
5241
|
+
version = "0.7.0"
|
|
5240
5242
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5241
|
-
checksum = "
|
|
5243
|
+
checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5"
|
|
5242
5244
|
dependencies = [
|
|
5245
|
+
"hashbrown 0.16.1",
|
|
5243
5246
|
"serde",
|
|
5244
5247
|
"serde_json",
|
|
5245
5248
|
]
|
|
@@ -5607,12 +5610,6 @@ version = "1.15.1"
|
|
|
5607
5610
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5608
5611
|
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
|
5609
5612
|
|
|
5610
|
-
[[package]]
|
|
5611
|
-
name = "smallvec"
|
|
5612
|
-
version = "2.0.0-alpha.10"
|
|
5613
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5614
|
-
checksum = "51d44cfb396c3caf6fbfd0ab422af02631b69ddd96d2eff0b0f0724f9024051b"
|
|
5615
|
-
|
|
5616
5613
|
[[package]]
|
|
5617
5614
|
name = "smartcore"
|
|
5618
5615
|
version = "0.4.8"
|
|
@@ -5922,7 +5919,7 @@ dependencies = [
|
|
|
5922
5919
|
"memchr",
|
|
5923
5920
|
"pulldown-cmark",
|
|
5924
5921
|
"strum",
|
|
5925
|
-
"thiserror 2.0.
|
|
5922
|
+
"thiserror 2.0.18",
|
|
5926
5923
|
]
|
|
5927
5924
|
|
|
5928
5925
|
[[package]]
|
|
@@ -5942,11 +5939,11 @@ dependencies = [
|
|
|
5942
5939
|
|
|
5943
5940
|
[[package]]
|
|
5944
5941
|
name = "thiserror"
|
|
5945
|
-
version = "2.0.
|
|
5942
|
+
version = "2.0.18"
|
|
5946
5943
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5947
|
-
checksum = "
|
|
5944
|
+
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
|
|
5948
5945
|
dependencies = [
|
|
5949
|
-
"thiserror-impl 2.0.
|
|
5946
|
+
"thiserror-impl 2.0.18",
|
|
5950
5947
|
]
|
|
5951
5948
|
|
|
5952
5949
|
[[package]]
|
|
@@ -5962,9 +5959,9 @@ dependencies = [
|
|
|
5962
5959
|
|
|
5963
5960
|
[[package]]
|
|
5964
5961
|
name = "thiserror-impl"
|
|
5965
|
-
version = "2.0.
|
|
5962
|
+
version = "2.0.18"
|
|
5966
5963
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5967
|
-
checksum = "
|
|
5964
|
+
checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
|
|
5968
5965
|
dependencies = [
|
|
5969
5966
|
"proc-macro2",
|
|
5970
5967
|
"quote",
|
|
@@ -6092,7 +6089,7 @@ dependencies = [
|
|
|
6092
6089
|
"serde",
|
|
6093
6090
|
"serde_json",
|
|
6094
6091
|
"spm_precompiled",
|
|
6095
|
-
"thiserror 2.0.
|
|
6092
|
+
"thiserror 2.0.18",
|
|
6096
6093
|
"unicode-normalization-alignments",
|
|
6097
6094
|
"unicode-segmentation",
|
|
6098
6095
|
"unicode_categories",
|
|
@@ -6183,9 +6180,9 @@ dependencies = [
|
|
|
6183
6180
|
|
|
6184
6181
|
[[package]]
|
|
6185
6182
|
name = "toml"
|
|
6186
|
-
version = "0.9.
|
|
6183
|
+
version = "0.9.11+spec-1.1.0"
|
|
6187
6184
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6188
|
-
checksum = "
|
|
6185
|
+
checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
|
|
6189
6186
|
dependencies = [
|
|
6190
6187
|
"indexmap",
|
|
6191
6188
|
"serde_core",
|
|
@@ -6351,8 +6348,8 @@ dependencies = [
|
|
|
6351
6348
|
"opentelemetry",
|
|
6352
6349
|
"opentelemetry_sdk",
|
|
6353
6350
|
"rustversion",
|
|
6354
|
-
"smallvec
|
|
6355
|
-
"thiserror 2.0.
|
|
6351
|
+
"smallvec",
|
|
6352
|
+
"thiserror 2.0.18",
|
|
6356
6353
|
"tracing",
|
|
6357
6354
|
"tracing-core",
|
|
6358
6355
|
"tracing-log",
|
|
@@ -6534,7 +6531,7 @@ version = "0.1.12"
|
|
|
6534
6531
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6535
6532
|
checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
|
|
6536
6533
|
dependencies = [
|
|
6537
|
-
"smallvec
|
|
6534
|
+
"smallvec",
|
|
6538
6535
|
]
|
|
6539
6536
|
|
|
6540
6537
|
[[package]]
|
|
@@ -7501,12 +7498,11 @@ dependencies = [
|
|
|
7501
7498
|
|
|
7502
7499
|
[[package]]
|
|
7503
7500
|
name = "zip"
|
|
7504
|
-
version = "7.
|
|
7501
|
+
version = "7.1.0"
|
|
7505
7502
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
7506
|
-
checksum = "
|
|
7503
|
+
checksum = "9013f1222db8a6d680f13a7ccdc60a781199cd09c2fa4eff58e728bb181757fc"
|
|
7507
7504
|
dependencies = [
|
|
7508
7505
|
"aes",
|
|
7509
|
-
"arbitrary",
|
|
7510
7506
|
"bzip2",
|
|
7511
7507
|
"constant_time_eq",
|
|
7512
7508
|
"crc32fast",
|
|
@@ -31,7 +31,7 @@ embeddings = ["kreuzberg/embeddings"]
|
|
|
31
31
|
|
|
32
32
|
[dependencies]
|
|
33
33
|
async-trait = "0.1.89"
|
|
34
|
-
kreuzberg = { path = "
|
|
34
|
+
kreuzberg = { path = "../../../../../crates/kreuzberg", default-features = false, features = [
|
|
35
35
|
"pdf",
|
|
36
36
|
"excel",
|
|
37
37
|
"office",
|
|
@@ -51,7 +51,7 @@ kreuzberg = { path = "../../../vendor/kreuzberg", default-features = false, feat
|
|
|
51
51
|
"bundled-pdfium",
|
|
52
52
|
"tokio-runtime",
|
|
53
53
|
] }
|
|
54
|
-
kreuzberg-ffi = { path = "
|
|
54
|
+
kreuzberg-ffi = { path = "../../../../../crates/kreuzberg-ffi" }
|
|
55
55
|
magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = [
|
|
56
56
|
"rb-sys",
|
|
57
57
|
] }
|
|
@@ -59,6 +59,8 @@ rb-sys = { version = "0.9.119", default-features = false, features = [
|
|
|
59
59
|
"stable-api-compiled-fallback",
|
|
60
60
|
] }
|
|
61
61
|
serde_json = "1.0.145"
|
|
62
|
+
toml = "0.8"
|
|
63
|
+
serde_yaml_ng = "0.10"
|
|
62
64
|
tokio = { version = "1.48.0", features = [
|
|
63
65
|
"rt",
|
|
64
66
|
"rt-multi-thread",
|