kreuzberg 4.1.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/libpdfium.so +0 -0
- data/kreuzberg.gemspec +13 -1
- data/lib/kreuzberg/config.rb +70 -35
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +5 -1
- data/spec/binding/batch_operations_spec.rb +80 -0
- data/spec/binding/metadata_types_spec.rb +77 -57
- data/spec/serialization_spec.rb +134 -0
- data/spec/unit/config/output_format_spec.rb +380 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/embeddings.rs +4 -4
- data/vendor/kreuzberg/src/mcp/format.rs +237 -39
- data/vendor/kreuzberg/src/mcp/params.rs +26 -33
- data/vendor/kreuzberg/src/mcp/server.rs +6 -3
- data/vendor/kreuzberg/src/mcp/tools/extraction.rs +16 -23
- data/vendor/kreuzberg/tests/api_chunk.rs +40 -30
- data/vendor/kreuzberg/tests/api_consistency.rs +349 -0
- data/vendor/kreuzberg/tests/api_embed.rs +84 -50
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +8 -2
- data/vendor/kreuzberg/tests/api_tests.rs +298 -139
- data/vendor/kreuzberg/tests/archive_integration.rs +63 -56
- data/vendor/kreuzberg/tests/batch_orchestration.rs +22 -14
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +13 -13
- data/vendor/kreuzberg/tests/batch_processing.rs +13 -9
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +10 -10
- data/vendor/kreuzberg/tests/concurrency_stress.rs +10 -6
- data/vendor/kreuzberg/tests/config_behavioral.rs +414 -0
- data/vendor/kreuzberg/tests/config_features.rs +19 -15
- data/vendor/kreuzberg/tests/config_integration_test.rs +68 -68
- data/vendor/kreuzberg/tests/config_loading_tests.rs +71 -62
- data/vendor/kreuzberg/tests/contract_mcp.rs +314 -0
- data/vendor/kreuzberg/tests/core_integration.rs +55 -53
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +23 -23
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +15 -14
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +4 -4
- data/vendor/kreuzberg/tests/email_integration.rs +7 -7
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/error_handling.rs +13 -11
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/html_table_test.rs +11 -11
- data/vendor/kreuzberg/tests/instrumentation_test.rs +18 -13
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +17 -17
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/keywords_integration.rs +25 -25
- data/vendor/kreuzberg/tests/keywords_quality.rs +9 -9
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/mcp_integration.rs +849 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +72 -41
- data/vendor/kreuzberg/tests/ocr_errors.rs +10 -4
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -1
- data/vendor/kreuzberg/tests/ocr_stress.rs +3 -3
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +6 -6
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +2 -2
- data/vendor/kreuzberg/tests/page_markers.rs +1 -1
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +6 -6
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +2 -2
- data/vendor/kreuzberg/tests/pipeline_integration.rs +77 -61
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +97 -77
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +78 -61
- data/vendor/kreuzberg/tests/plugin_system.rs +49 -46
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +109 -97
- data/vendor/kreuzberg/tests/pptx_regression_tests.rs +40 -30
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +26 -23
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +1 -1
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +18 -18
- data/vendor/kreuzberg/tests/security_validation.rs +20 -19
- data/vendor/kreuzberg/tests/serialization_integration.rs +112 -0
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +36 -36
- data/vendor/kreuzberg/tests/test_fastembed.rs +8 -8
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +9 -9
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +12 -9
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +10 -2
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.
|
|
4
|
+
version: 4.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-01-
|
|
11
|
+
date: 2026-01-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -209,6 +209,7 @@ files:
|
|
|
209
209
|
- ext/kreuzberg_rb/native/include/msvc_compat/strings.h
|
|
210
210
|
- ext/kreuzberg_rb/native/include/strings.h
|
|
211
211
|
- ext/kreuzberg_rb/native/include/unistd.h
|
|
212
|
+
- ext/kreuzberg_rb/native/libpdfium.so
|
|
212
213
|
- ext/kreuzberg_rb/native/src/batch.rs
|
|
213
214
|
- ext/kreuzberg_rb/native/src/config/mod.rs
|
|
214
215
|
- ext/kreuzberg_rb/native/src/config/types.rs
|
|
@@ -271,6 +272,7 @@ files:
|
|
|
271
272
|
- spec/fixtures/config.toml
|
|
272
273
|
- spec/fixtures/config.yaml
|
|
273
274
|
- spec/fixtures/invalid_config.toml
|
|
275
|
+
- spec/serialization_spec.rb
|
|
274
276
|
- spec/smoke/package_spec.rb
|
|
275
277
|
- spec/spec_helper.rb
|
|
276
278
|
- spec/unit/config/chunking_config_spec.rb
|
|
@@ -283,6 +285,7 @@ files:
|
|
|
283
285
|
- spec/unit/config/keyword_config_spec.rb
|
|
284
286
|
- spec/unit/config/language_detection_config_spec.rb
|
|
285
287
|
- spec/unit/config/ocr_config_spec.rb
|
|
288
|
+
- spec/unit/config/output_format_spec.rb
|
|
286
289
|
- spec/unit/config/page_config_spec.rb
|
|
287
290
|
- spec/unit/config/pdf_config_spec.rb
|
|
288
291
|
- spec/unit/config/postprocessor_config_spec.rb
|
|
@@ -705,6 +708,7 @@ files:
|
|
|
705
708
|
- vendor/kreuzberg/stopwords/zh_stopwords.json
|
|
706
709
|
- vendor/kreuzberg/stopwords/zu_stopwords.json
|
|
707
710
|
- vendor/kreuzberg/tests/api_chunk.rs
|
|
711
|
+
- vendor/kreuzberg/tests/api_consistency.rs
|
|
708
712
|
- vendor/kreuzberg/tests/api_embed.rs
|
|
709
713
|
- vendor/kreuzberg/tests/api_extract_multipart.rs
|
|
710
714
|
- vendor/kreuzberg/tests/api_large_pdf_extraction.rs
|
|
@@ -716,9 +720,11 @@ files:
|
|
|
716
720
|
- vendor/kreuzberg/tests/batch_processing.rs
|
|
717
721
|
- vendor/kreuzberg/tests/bibtex_parity_test.rs
|
|
718
722
|
- vendor/kreuzberg/tests/concurrency_stress.rs
|
|
723
|
+
- vendor/kreuzberg/tests/config_behavioral.rs
|
|
719
724
|
- vendor/kreuzberg/tests/config_features.rs
|
|
720
725
|
- vendor/kreuzberg/tests/config_integration_test.rs
|
|
721
726
|
- vendor/kreuzberg/tests/config_loading_tests.rs
|
|
727
|
+
- vendor/kreuzberg/tests/contract_mcp.rs
|
|
722
728
|
- vendor/kreuzberg/tests/core_integration.rs
|
|
723
729
|
- vendor/kreuzberg/tests/csv_integration.rs
|
|
724
730
|
- vendor/kreuzberg/tests/data/hierarchy_ground_truth.json
|
|
@@ -740,6 +746,7 @@ files:
|
|
|
740
746
|
- vendor/kreuzberg/tests/keywords_quality.rs
|
|
741
747
|
- vendor/kreuzberg/tests/latex_extractor_tests.rs
|
|
742
748
|
- vendor/kreuzberg/tests/markdown_extractor_tests.rs
|
|
749
|
+
- vendor/kreuzberg/tests/mcp_integration.rs
|
|
743
750
|
- vendor/kreuzberg/tests/mime_detection.rs
|
|
744
751
|
- vendor/kreuzberg/tests/ocr_configuration.rs
|
|
745
752
|
- vendor/kreuzberg/tests/ocr_errors.rs
|
|
@@ -766,6 +773,7 @@ files:
|
|
|
766
773
|
- vendor/kreuzberg/tests/rst_extractor_tests.rs
|
|
767
774
|
- vendor/kreuzberg/tests/rtf_extractor_tests.rs
|
|
768
775
|
- vendor/kreuzberg/tests/security_validation.rs
|
|
776
|
+
- vendor/kreuzberg/tests/serialization_integration.rs
|
|
769
777
|
- vendor/kreuzberg/tests/stopwords_integration_test.rs
|
|
770
778
|
- vendor/kreuzberg/tests/test_fastembed.rs
|
|
771
779
|
- vendor/kreuzberg/tests/typst_behavioral_tests.rs
|