kreuzberg 4.0.0.rc2 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -14
- data/.rspec +3 -3
- data/.rubocop.yaml +1 -1
- data/.rubocop.yml +543 -538
- data/Gemfile +8 -8
- data/Gemfile.lock +194 -6
- data/README.md +391 -426
- data/Rakefile +34 -25
- data/Steepfile +51 -47
- data/examples/async_patterns.rb +283 -341
- data/ext/kreuzberg_rb/extconf.rb +65 -45
- data/ext/kreuzberg_rb/native/.cargo/config.toml +23 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +7619 -6535
- data/ext/kreuzberg_rb/native/Cargo.toml +75 -44
- data/ext/kreuzberg_rb/native/README.md +425 -425
- data/ext/kreuzberg_rb/native/build.rs +15 -15
- data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -11
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -14
- data/ext/kreuzberg_rb/native/include/strings.h +20 -20
- data/ext/kreuzberg_rb/native/include/unistd.h +47 -47
- data/ext/kreuzberg_rb/native/src/lib.rs +3802 -2998
- data/extconf.rb +60 -28
- data/kreuzberg.gemspec +199 -148
- data/lib/kreuzberg/api_proxy.rb +126 -142
- data/lib/kreuzberg/cache_api.rb +67 -46
- data/lib/kreuzberg/cli.rb +47 -55
- data/lib/kreuzberg/cli_proxy.rb +117 -127
- data/lib/kreuzberg/config.rb +936 -691
- data/lib/kreuzberg/error_context.rb +136 -32
- data/lib/kreuzberg/errors.rb +116 -118
- data/lib/kreuzberg/extraction_api.rb +313 -85
- data/lib/kreuzberg/mcp_proxy.rb +177 -186
- data/lib/kreuzberg/ocr_backend_protocol.rb +40 -113
- data/lib/kreuzberg/post_processor_protocol.rb +15 -86
- data/lib/kreuzberg/result.rb +334 -216
- data/lib/kreuzberg/setup_lib_path.rb +99 -80
- data/lib/kreuzberg/types.rb +170 -0
- data/lib/kreuzberg/validator_protocol.rb +16 -89
- data/lib/kreuzberg/version.rb +5 -5
- data/lib/kreuzberg.rb +96 -103
- data/lib/libpdfium.so +0 -0
- data/sig/kreuzberg/internal.rbs +184 -184
- data/sig/kreuzberg.rbs +561 -520
- data/spec/binding/async_operations_spec.rb +473 -0
- data/spec/binding/batch_operations_spec.rb +595 -0
- data/spec/binding/batch_spec.rb +359 -0
- data/spec/binding/cache_spec.rb +227 -227
- data/spec/binding/cli_proxy_spec.rb +85 -85
- data/spec/binding/cli_spec.rb +55 -55
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_spec.rb +419 -345
- data/spec/binding/config_validation_spec.rb +377 -283
- data/spec/binding/embeddings_spec.rb +816 -0
- data/spec/binding/error_handling_spec.rb +399 -213
- data/spec/binding/error_recovery_spec.rb +488 -0
- data/spec/binding/errors_spec.rb +66 -66
- data/spec/binding/font_config_spec.rb +220 -0
- data/spec/binding/images_spec.rb +738 -0
- data/spec/binding/keywords_extraction_spec.rb +600 -0
- data/spec/binding/metadata_types_spec.rb +1228 -0
- data/spec/binding/pages_extraction_spec.rb +471 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -307
- data/spec/binding/plugins/postprocessor_spec.rb +269 -269
- data/spec/binding/plugins/validator_spec.rb +273 -274
- data/spec/binding/tables_spec.rb +641 -0
- data/spec/fixtures/config.toml +38 -39
- data/spec/fixtures/config.yaml +41 -41
- data/spec/fixtures/invalid_config.toml +3 -4
- data/spec/smoke/package_spec.rb +177 -178
- data/spec/spec_helper.rb +40 -42
- data/spec/unit/config/chunking_config_spec.rb +213 -0
- data/spec/unit/config/embedding_config_spec.rb +343 -0
- data/spec/unit/config/extraction_config_spec.rb +438 -0
- data/spec/unit/config/font_config_spec.rb +285 -0
- data/spec/unit/config/hierarchy_config_spec.rb +314 -0
- data/spec/unit/config/image_extraction_config_spec.rb +209 -0
- data/spec/unit/config/image_preprocessing_config_spec.rb +249 -0
- data/spec/unit/config/keyword_config_spec.rb +229 -0
- data/spec/unit/config/language_detection_config_spec.rb +258 -0
- data/spec/unit/config/ocr_config_spec.rb +171 -0
- data/spec/unit/config/page_config_spec.rb +221 -0
- data/spec/unit/config/pdf_config_spec.rb +267 -0
- data/spec/unit/config/postprocessor_config_spec.rb +290 -0
- data/spec/unit/config/tesseract_config_spec.rb +181 -0
- data/spec/unit/config/token_reduction_config_spec.rb +251 -0
- data/test/metadata_types_test.rb +959 -0
- data/vendor/Cargo.toml +61 -0
- data/vendor/kreuzberg/Cargo.toml +259 -204
- data/vendor/kreuzberg/README.md +263 -175
- data/vendor/kreuzberg/build.rs +782 -474
- data/vendor/kreuzberg/examples/bench_fixes.rs +71 -0
- data/vendor/kreuzberg/examples/test_pdfium_fork.rs +62 -0
- data/vendor/kreuzberg/src/api/error.rs +81 -81
- data/vendor/kreuzberg/src/api/handlers.rs +320 -199
- data/vendor/kreuzberg/src/api/mod.rs +94 -79
- data/vendor/kreuzberg/src/api/server.rs +518 -353
- data/vendor/kreuzberg/src/api/types.rs +206 -170
- data/vendor/kreuzberg/src/cache/mod.rs +1167 -1167
- data/vendor/kreuzberg/src/chunking/mod.rs +2303 -677
- data/vendor/kreuzberg/src/chunking/processor.rs +219 -0
- data/vendor/kreuzberg/src/core/batch_mode.rs +95 -95
- data/vendor/kreuzberg/src/core/batch_optimizations.rs +385 -0
- data/vendor/kreuzberg/src/core/config.rs +1914 -1032
- data/vendor/kreuzberg/src/core/config_validation.rs +949 -0
- data/vendor/kreuzberg/src/core/extractor.rs +1200 -1024
- data/vendor/kreuzberg/src/core/formats.rs +235 -0
- data/vendor/kreuzberg/src/core/io.rs +329 -329
- data/vendor/kreuzberg/src/core/mime.rs +605 -605
- data/vendor/kreuzberg/src/core/mod.rs +61 -45
- data/vendor/kreuzberg/src/core/pipeline.rs +1223 -984
- data/vendor/kreuzberg/src/core/server_config.rs +1220 -0
- data/vendor/kreuzberg/src/embeddings.rs +471 -432
- data/vendor/kreuzberg/src/error.rs +431 -431
- data/vendor/kreuzberg/src/extraction/archive.rs +959 -954
- data/vendor/kreuzberg/src/extraction/capacity.rs +263 -0
- data/vendor/kreuzberg/src/extraction/docx.rs +404 -40
- data/vendor/kreuzberg/src/extraction/email.rs +855 -854
- data/vendor/kreuzberg/src/extraction/excel.rs +697 -688
- data/vendor/kreuzberg/src/extraction/html.rs +1830 -553
- data/vendor/kreuzberg/src/extraction/image.rs +492 -368
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +574 -563
- data/vendor/kreuzberg/src/extraction/markdown.rs +216 -213
- data/vendor/kreuzberg/src/extraction/mod.rs +93 -81
- data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -398
- data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -247
- data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -240
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +130 -130
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -287
- data/vendor/kreuzberg/src/extraction/pptx.rs +3102 -3000
- data/vendor/kreuzberg/src/extraction/structured.rs +491 -490
- data/vendor/kreuzberg/src/extraction/table.rs +329 -328
- data/vendor/kreuzberg/src/extraction/text.rs +277 -269
- data/vendor/kreuzberg/src/extraction/xml.rs +333 -333
- data/vendor/kreuzberg/src/extractors/archive.rs +447 -446
- data/vendor/kreuzberg/src/extractors/bibtex.rs +470 -469
- data/vendor/kreuzberg/src/extractors/docbook.rs +504 -502
- data/vendor/kreuzberg/src/extractors/docx.rs +400 -367
- data/vendor/kreuzberg/src/extractors/email.rs +157 -143
- data/vendor/kreuzberg/src/extractors/epub.rs +696 -707
- data/vendor/kreuzberg/src/extractors/excel.rs +385 -343
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +492 -491
- data/vendor/kreuzberg/src/extractors/html.rs +419 -393
- data/vendor/kreuzberg/src/extractors/image.rs +219 -198
- data/vendor/kreuzberg/src/extractors/jats.rs +1054 -1051
- data/vendor/kreuzberg/src/extractors/jupyter.rs +368 -367
- data/vendor/kreuzberg/src/extractors/latex.rs +653 -652
- data/vendor/kreuzberg/src/extractors/markdown.rs +701 -700
- data/vendor/kreuzberg/src/extractors/mod.rs +429 -365
- data/vendor/kreuzberg/src/extractors/odt.rs +628 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +635 -634
- data/vendor/kreuzberg/src/extractors/orgmode.rs +529 -528
- data/vendor/kreuzberg/src/extractors/pdf.rs +761 -493
- data/vendor/kreuzberg/src/extractors/pptx.rs +279 -248
- data/vendor/kreuzberg/src/extractors/rst.rs +577 -576
- data/vendor/kreuzberg/src/extractors/rtf.rs +809 -810
- data/vendor/kreuzberg/src/extractors/security.rs +484 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +367 -367
- data/vendor/kreuzberg/src/extractors/structured.rs +142 -140
- data/vendor/kreuzberg/src/extractors/text.rs +265 -260
- data/vendor/kreuzberg/src/extractors/typst.rs +651 -650
- data/vendor/kreuzberg/src/extractors/xml.rs +147 -135
- data/vendor/kreuzberg/src/image/dpi.rs +164 -164
- data/vendor/kreuzberg/src/image/mod.rs +6 -6
- data/vendor/kreuzberg/src/image/preprocessing.rs +417 -417
- data/vendor/kreuzberg/src/image/resize.rs +89 -89
- data/vendor/kreuzberg/src/keywords/config.rs +154 -154
- data/vendor/kreuzberg/src/keywords/mod.rs +237 -237
- data/vendor/kreuzberg/src/keywords/processor.rs +275 -267
- data/vendor/kreuzberg/src/keywords/rake.rs +293 -293
- data/vendor/kreuzberg/src/keywords/types.rs +68 -68
- data/vendor/kreuzberg/src/keywords/yake.rs +163 -163
- data/vendor/kreuzberg/src/language_detection/mod.rs +985 -942
- data/vendor/kreuzberg/src/language_detection/processor.rs +218 -0
- data/vendor/kreuzberg/src/lib.rs +114 -105
- data/vendor/kreuzberg/src/mcp/mod.rs +35 -32
- data/vendor/kreuzberg/src/mcp/server.rs +2090 -1968
- data/vendor/kreuzberg/src/ocr/cache.rs +469 -469
- data/vendor/kreuzberg/src/ocr/error.rs +37 -37
- data/vendor/kreuzberg/src/ocr/hocr.rs +216 -216
- data/vendor/kreuzberg/src/ocr/language_registry.rs +520 -0
- data/vendor/kreuzberg/src/ocr/mod.rs +60 -58
- data/vendor/kreuzberg/src/ocr/processor.rs +858 -863
- data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -4
- data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -144
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +456 -450
- data/vendor/kreuzberg/src/ocr/types.rs +393 -393
- data/vendor/kreuzberg/src/ocr/utils.rs +47 -47
- data/vendor/kreuzberg/src/ocr/validation.rs +206 -206
- data/vendor/kreuzberg/src/panic_context.rs +154 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +306 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +408 -0
- data/vendor/kreuzberg/src/pdf/error.rs +214 -122
- data/vendor/kreuzberg/src/pdf/fonts.rs +358 -0
- data/vendor/kreuzberg/src/pdf/hierarchy.rs +903 -0
- data/vendor/kreuzberg/src/pdf/images.rs +139 -139
- data/vendor/kreuzberg/src/pdf/metadata.rs +509 -346
- data/vendor/kreuzberg/src/pdf/mod.rs +81 -50
- data/vendor/kreuzberg/src/pdf/rendering.rs +369 -369
- data/vendor/kreuzberg/src/pdf/table.rs +417 -393
- data/vendor/kreuzberg/src/pdf/text.rs +553 -158
- data/vendor/kreuzberg/src/plugins/extractor.rs +1042 -1013
- data/vendor/kreuzberg/src/plugins/mod.rs +212 -209
- data/vendor/kreuzberg/src/plugins/ocr.rs +637 -620
- data/vendor/kreuzberg/src/plugins/processor.rs +650 -642
- data/vendor/kreuzberg/src/plugins/registry.rs +1339 -1337
- data/vendor/kreuzberg/src/plugins/traits.rs +258 -258
- data/vendor/kreuzberg/src/plugins/validator.rs +967 -956
- data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -1470
- data/vendor/kreuzberg/src/text/mod.rs +27 -19
- data/vendor/kreuzberg/src/text/quality.rs +710 -697
- data/vendor/kreuzberg/src/text/quality_processor.rs +231 -0
- data/vendor/kreuzberg/src/text/string_utils.rs +229 -217
- data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -164
- data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -100
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +832 -796
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +923 -902
- data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -160
- data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -619
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +148 -147
- data/vendor/kreuzberg/src/text/utf8_validation.rs +193 -0
- data/vendor/kreuzberg/src/types.rs +1713 -903
- data/vendor/kreuzberg/src/utils/mod.rs +31 -17
- data/vendor/kreuzberg/src/utils/pool.rs +503 -0
- data/vendor/kreuzberg/src/utils/pool_sizing.rs +364 -0
- data/vendor/kreuzberg/src/utils/quality.rs +968 -959
- data/vendor/kreuzberg/src/utils/string_pool.rs +761 -0
- data/vendor/kreuzberg/src/utils/string_utils.rs +381 -381
- data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -53
- data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -482
- data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -261
- data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -400
- data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -1205
- data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -280
- data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -425
- data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -172
- data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -622
- data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -1300
- data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -175
- data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -734
- data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -37
- data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -100
- data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -801
- data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -693
- data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -111
- data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -162
- data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -226
- data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -41
- data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -196
- data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -227
- data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -181
- data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -791
- data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -47
- data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -760
- data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -634
- data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -136
- data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -84
- data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -681
- data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -64
- data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -51
- data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -476
- data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -163
- data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -1
- data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -101
- data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -477
- data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -490
- data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -415
- data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -223
- data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -331
- data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -562
- data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -436
- data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -561
- data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -193
- data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -448
- data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -32
- data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -33
- data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -76
- data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -129
- data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -54
- data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -118
- data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -149
- data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -506
- data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -75
- data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -519
- data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -647
- data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -62
- data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -796
- data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -31
- data/vendor/kreuzberg/tests/api_embed.rs +360 -0
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +52 -52
- data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +471 -0
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +289 -0
- data/vendor/kreuzberg/tests/api_tests.rs +1472 -966
- data/vendor/kreuzberg/tests/archive_integration.rs +545 -543
- data/vendor/kreuzberg/tests/batch_orchestration.rs +587 -556
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +154 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +328 -316
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +421 -421
- data/vendor/kreuzberg/tests/concurrency_stress.rs +541 -525
- data/vendor/kreuzberg/tests/config_features.rs +612 -598
- data/vendor/kreuzberg/tests/config_integration_test.rs +753 -0
- data/vendor/kreuzberg/tests/config_loading_tests.rs +416 -415
- data/vendor/kreuzberg/tests/core_integration.rs +519 -510
- data/vendor/kreuzberg/tests/csv_integration.rs +414 -414
- data/vendor/kreuzberg/tests/data/hierarchy_ground_truth.json +294 -0
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +500 -498
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -122
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -370
- data/vendor/kreuzberg/tests/email_integration.rs +327 -325
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +275 -275
- data/vendor/kreuzberg/tests/error_handling.rs +402 -393
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -228
- data/vendor/kreuzberg/tests/format_integration.rs +165 -159
- data/vendor/kreuzberg/tests/helpers/mod.rs +202 -142
- data/vendor/kreuzberg/tests/html_table_test.rs +551 -551
- data/vendor/kreuzberg/tests/image_integration.rs +255 -253
- data/vendor/kreuzberg/tests/instrumentation_test.rs +139 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +639 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +704 -704
- data/vendor/kreuzberg/tests/keywords_integration.rs +479 -479
- data/vendor/kreuzberg/tests/keywords_quality.rs +509 -509
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +496 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +490 -490
- data/vendor/kreuzberg/tests/mime_detection.rs +429 -428
- data/vendor/kreuzberg/tests/ocr_configuration.rs +514 -510
- data/vendor/kreuzberg/tests/ocr_errors.rs +698 -676
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +191 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +629 -627
- data/vendor/kreuzberg/tests/ocr_stress.rs +469 -469
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +674 -695
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +616 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +822 -822
- data/vendor/kreuzberg/tests/page_markers.rs +297 -0
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +301 -0
- data/vendor/kreuzberg/tests/pdf_hierarchy_quality.rs +589 -0
- data/vendor/kreuzberg/tests/pdf_integration.rs +45 -43
- data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +301 -0
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +475 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +340 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1446 -1411
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -771
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +577 -560
- data/vendor/kreuzberg/tests/plugin_system.rs +927 -921
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -783
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +587 -586
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +694 -692
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +775 -776
- data/vendor/kreuzberg/tests/security_validation.rs +416 -415
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -888
- data/vendor/kreuzberg/tests/test_fastembed.rs +631 -609
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1260 -1259
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +648 -647
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -87
- data/vendor/kreuzberg-ffi/Cargo.toml +67 -0
- data/vendor/kreuzberg-ffi/README.md +851 -0
- data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +227 -0
- data/vendor/kreuzberg-ffi/build.rs +168 -0
- data/vendor/kreuzberg-ffi/cbindgen.toml +37 -0
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
- data/vendor/kreuzberg-ffi/kreuzberg.h +3012 -0
- data/vendor/kreuzberg-ffi/src/batch_streaming.rs +588 -0
- data/vendor/kreuzberg-ffi/src/config.rs +1341 -0
- data/vendor/kreuzberg-ffi/src/error.rs +901 -0
- data/vendor/kreuzberg-ffi/src/extraction.rs +555 -0
- data/vendor/kreuzberg-ffi/src/helpers.rs +879 -0
- data/vendor/kreuzberg-ffi/src/lib.rs +977 -0
- data/vendor/kreuzberg-ffi/src/memory.rs +493 -0
- data/vendor/kreuzberg-ffi/src/mime.rs +329 -0
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +265 -0
- data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +442 -0
- data/vendor/kreuzberg-ffi/src/plugins/mod.rs +14 -0
- data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +628 -0
- data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +438 -0
- data/vendor/kreuzberg-ffi/src/plugins/validator.rs +329 -0
- data/vendor/kreuzberg-ffi/src/result.rs +510 -0
- data/vendor/kreuzberg-ffi/src/result_pool.rs +639 -0
- data/vendor/kreuzberg-ffi/src/result_view.rs +773 -0
- data/vendor/kreuzberg-ffi/src/string_intern.rs +568 -0
- data/vendor/kreuzberg-ffi/src/types.rs +363 -0
- data/vendor/kreuzberg-ffi/src/util.rs +210 -0
- data/vendor/kreuzberg-ffi/src/validation.rs +848 -0
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -0
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -0
- data/vendor/kreuzberg-tesseract/.crate-ignore +2 -0
- data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +57 -0
- data/vendor/{rb-sys/LICENSE-MIT → kreuzberg-tesseract/LICENSE} +22 -21
- data/vendor/kreuzberg-tesseract/README.md +399 -0
- data/vendor/kreuzberg-tesseract/build.rs +1127 -0
- data/vendor/kreuzberg-tesseract/patches/README.md +71 -0
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -0
- data/vendor/kreuzberg-tesseract/src/api.rs +1371 -0
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
- data/vendor/kreuzberg-tesseract/src/enums.rs +297 -0
- data/vendor/kreuzberg-tesseract/src/error.rs +81 -0
- data/vendor/kreuzberg-tesseract/src/lib.rs +145 -0
- data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -0
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -0
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -0
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -0
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -0
- metadata +196 -45
- data/vendor/kreuzberg/benches/otel_overhead.rs +0 -48
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/.cargo_vcs_info.json +0 -6
- data/vendor/rb-sys/Cargo.lock +0 -393
- data/vendor/rb-sys/Cargo.toml +0 -70
- data/vendor/rb-sys/Cargo.toml.orig +0 -57
- data/vendor/rb-sys/LICENSE-APACHE +0 -190
- data/vendor/rb-sys/bin/release.sh +0 -21
- data/vendor/rb-sys/build/features.rs +0 -108
- data/vendor/rb-sys/build/main.rs +0 -246
- data/vendor/rb-sys/build/stable_api_config.rs +0 -153
- data/vendor/rb-sys/build/version.rs +0 -48
- data/vendor/rb-sys/readme.md +0 -36
- data/vendor/rb-sys/src/bindings.rs +0 -21
- data/vendor/rb-sys/src/hidden.rs +0 -11
- data/vendor/rb-sys/src/lib.rs +0 -34
- data/vendor/rb-sys/src/macros.rs +0 -371
- data/vendor/rb-sys/src/memory.rs +0 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +0 -38
- data/vendor/rb-sys/src/special_consts.rs +0 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +0 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +0 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +0 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +0 -317
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +0 -315
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +0 -326
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +0 -327
- data/vendor/rb-sys/src/stable_api.rs +0 -261
- data/vendor/rb-sys/src/symbol.rs +0 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +0 -332
- data/vendor/rb-sys/src/utils.rs +0 -89
- data/vendor/rb-sys/src/value_type.rs +0 -7
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
//! Validator plugin FFI bindings
|
|
2
|
+
//!
|
|
3
|
+
//! Provides FFI bindings for registering and managing custom validators.
|
|
4
|
+
|
|
5
|
+
use std::ffi::{CStr, CString};
|
|
6
|
+
use std::os::raw::c_char;
|
|
7
|
+
use std::ptr;
|
|
8
|
+
use std::sync::Arc;
|
|
9
|
+
|
|
10
|
+
use async_trait::async_trait;
|
|
11
|
+
use kreuzberg::core::config::ExtractionConfig;
|
|
12
|
+
use kreuzberg::plugins::Plugin;
|
|
13
|
+
use kreuzberg::types::ExtractionResult;
|
|
14
|
+
use kreuzberg::{KreuzbergError, Result};
|
|
15
|
+
|
|
16
|
+
use crate::helpers::{clear_last_error, set_last_error};
|
|
17
|
+
use crate::{ffi_panic_guard, ffi_panic_guard_bool};
|
|
18
|
+
|
|
19
|
+
/// Validator callback function type for FFI.
|
|
20
|
+
///
|
|
21
|
+
/// This is a C function pointer that validates extraction results.
|
|
22
|
+
///
|
|
23
|
+
/// # Safety
|
|
24
|
+
///
|
|
25
|
+
/// The callback must:
|
|
26
|
+
/// - Not store the result_json pointer (it's only valid for the duration of the call)
|
|
27
|
+
/// - Return a valid null-terminated UTF-8 string (error message) if validation fails
|
|
28
|
+
/// - Return NULL if validation passes
|
|
29
|
+
/// - The returned string must be freeable by kreuzberg_free_string
|
|
30
|
+
type ValidatorCallback = unsafe extern "C" fn(result_json: *const c_char) -> *mut c_char;
|
|
31
|
+
|
|
32
|
+
/// FFI wrapper for custom Validators registered from Java/C.
|
|
33
|
+
///
|
|
34
|
+
/// This struct wraps a C function pointer and implements the Validator trait,
|
|
35
|
+
/// allowing custom validation implementations from FFI languages to be registered
|
|
36
|
+
/// and used within the Rust extraction pipeline.
|
|
37
|
+
struct FfiValidator {
|
|
38
|
+
name: String,
|
|
39
|
+
callback: ValidatorCallback,
|
|
40
|
+
priority: i32,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
impl FfiValidator {
|
|
44
|
+
fn new(name: String, callback: ValidatorCallback, priority: i32) -> Self {
|
|
45
|
+
Self {
|
|
46
|
+
name,
|
|
47
|
+
callback,
|
|
48
|
+
priority,
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
impl Plugin for FfiValidator {
|
|
54
|
+
fn name(&self) -> &str {
|
|
55
|
+
&self.name
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
fn version(&self) -> String {
|
|
59
|
+
"ffi-1.0.0".to_string()
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
fn initialize(&self) -> Result<()> {
|
|
63
|
+
Ok(())
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
fn shutdown(&self) -> Result<()> {
|
|
67
|
+
Ok(())
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
#[async_trait]
|
|
72
|
+
impl kreuzberg::plugins::Validator for FfiValidator {
|
|
73
|
+
fn priority(&self) -> i32 {
|
|
74
|
+
self.priority
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async fn validate(&self, result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
|
78
|
+
let result_json = serde_json::to_string(result).map_err(|e| KreuzbergError::Validation {
|
|
79
|
+
message: format!("Failed to serialize ExtractionResult: {}", e),
|
|
80
|
+
source: Some(Box::new(e)),
|
|
81
|
+
})?;
|
|
82
|
+
|
|
83
|
+
let callback = self.callback;
|
|
84
|
+
let validator_name = self.name.clone();
|
|
85
|
+
let result_json_owned = result_json.clone();
|
|
86
|
+
|
|
87
|
+
let error_msg = tokio::task::spawn_blocking(move || {
|
|
88
|
+
let result_cstring = CString::new(result_json_owned).map_err(|e| KreuzbergError::Validation {
|
|
89
|
+
message: format!("Failed to create C string from result JSON: {}", e),
|
|
90
|
+
source: Some(Box::new(e)),
|
|
91
|
+
})?;
|
|
92
|
+
|
|
93
|
+
let error_ptr = unsafe { callback(result_cstring.as_ptr()) };
|
|
94
|
+
|
|
95
|
+
if error_ptr.is_null() {
|
|
96
|
+
return Ok::<Option<String>, KreuzbergError>(None);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
let error_cstr = unsafe { CStr::from_ptr(error_ptr) };
|
|
100
|
+
let error_msg = error_cstr
|
|
101
|
+
.to_str()
|
|
102
|
+
.map_err(|e| KreuzbergError::Plugin {
|
|
103
|
+
message: format!("Validator returned invalid UTF-8: {}", e),
|
|
104
|
+
plugin_name: validator_name.clone(),
|
|
105
|
+
})?
|
|
106
|
+
.to_string();
|
|
107
|
+
|
|
108
|
+
unsafe { crate::memory::kreuzberg_free_string(error_ptr) };
|
|
109
|
+
|
|
110
|
+
Ok(Some(error_msg))
|
|
111
|
+
})
|
|
112
|
+
.await
|
|
113
|
+
.map_err(|e| KreuzbergError::Plugin {
|
|
114
|
+
message: format!("Validator task panicked: {}", e),
|
|
115
|
+
plugin_name: self.name.clone(),
|
|
116
|
+
})??;
|
|
117
|
+
|
|
118
|
+
if let Some(msg) = error_msg {
|
|
119
|
+
return Err(KreuzbergError::Validation {
|
|
120
|
+
message: msg,
|
|
121
|
+
source: None,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
Ok(())
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/// Register a custom Validator via FFI callback.
|
|
130
|
+
///
|
|
131
|
+
/// # Safety
|
|
132
|
+
///
|
|
133
|
+
/// - `name` must be a valid null-terminated C string
|
|
134
|
+
/// - `callback` must be a valid function pointer that:
|
|
135
|
+
/// - Does not store the result_json pointer
|
|
136
|
+
/// - Returns a null-terminated UTF-8 string (error message) if validation fails
|
|
137
|
+
/// - Returns NULL if validation passes
|
|
138
|
+
/// - The returned string must be freeable by kreuzberg_free_string
|
|
139
|
+
/// - `priority` determines the order of validation (higher priority runs first)
|
|
140
|
+
/// - Returns true on success, false on error (check kreuzberg_last_error)
|
|
141
|
+
///
|
|
142
|
+
/// # Example (C)
|
|
143
|
+
///
|
|
144
|
+
/// ```c
|
|
145
|
+
/// char* my_validator(const char* result_json) {
|
|
146
|
+
/// // Parse result_json, validate it
|
|
147
|
+
/// // Return error message if validation fails, NULL if passes
|
|
148
|
+
/// if (invalid) {
|
|
149
|
+
/// return strdup("Validation failed: content too short");
|
|
150
|
+
/// }
|
|
151
|
+
/// return NULL;
|
|
152
|
+
/// }
|
|
153
|
+
///
|
|
154
|
+
/// bool success = kreuzberg_register_validator("my-validator", my_validator, 100);
|
|
155
|
+
/// if (!success) {
|
|
156
|
+
/// const char* error = kreuzberg_last_error();
|
|
157
|
+
/// printf("Failed to register: %s\n", error);
|
|
158
|
+
/// }
|
|
159
|
+
/// ```
|
|
160
|
+
#[unsafe(no_mangle)]
|
|
161
|
+
pub unsafe extern "C" fn kreuzberg_register_validator(
|
|
162
|
+
name: *const c_char,
|
|
163
|
+
callback: ValidatorCallback,
|
|
164
|
+
priority: i32,
|
|
165
|
+
) -> bool {
|
|
166
|
+
ffi_panic_guard_bool!("kreuzberg_register_validator", {
|
|
167
|
+
clear_last_error();
|
|
168
|
+
|
|
169
|
+
if name.is_null() {
|
|
170
|
+
set_last_error("Validator name cannot be NULL".to_string());
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
let name_str = match unsafe { CStr::from_ptr(name) }.to_str() {
|
|
175
|
+
Ok(s) => s,
|
|
176
|
+
Err(e) => {
|
|
177
|
+
set_last_error(format!("Invalid UTF-8 in Validator name: {}", e));
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
if name_str.is_empty() {
|
|
183
|
+
set_last_error("Plugin name cannot be empty".to_string());
|
|
184
|
+
return false;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if name_str.chars().any(|c| c.is_whitespace()) {
|
|
188
|
+
set_last_error("Plugin name cannot contain whitespace".to_string());
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
let validator = Arc::new(FfiValidator::new(name_str.to_string(), callback, priority));
|
|
193
|
+
|
|
194
|
+
let registry = kreuzberg::plugins::registry::get_validator_registry();
|
|
195
|
+
let mut registry_guard = match registry.write() {
|
|
196
|
+
Ok(guard) => guard,
|
|
197
|
+
Err(e) => {
|
|
198
|
+
// ~keep: Lock poisoning indicates a panic in another thread holding the lock.
|
|
199
|
+
set_last_error(format!("Failed to acquire registry write lock: {}", e));
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
match registry_guard.register(validator) {
|
|
205
|
+
Ok(()) => true,
|
|
206
|
+
Err(e) => {
|
|
207
|
+
set_last_error(format!("Failed to register Validator: {}", e));
|
|
208
|
+
false
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
})
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/// Unregister a Validator by name.
|
|
215
|
+
///
|
|
216
|
+
/// # Safety
|
|
217
|
+
///
|
|
218
|
+
/// - `name` must be a valid null-terminated C string
|
|
219
|
+
/// - Returns true on success, false on error (check kreuzberg_last_error)
|
|
220
|
+
///
|
|
221
|
+
/// # Example (C)
|
|
222
|
+
///
|
|
223
|
+
/// ```c
|
|
224
|
+
/// bool success = kreuzberg_unregister_validator("my-validator");
|
|
225
|
+
/// if (!success) {
|
|
226
|
+
/// const char* error = kreuzberg_last_error();
|
|
227
|
+
/// printf("Failed to unregister: %s\n", error);
|
|
228
|
+
/// }
|
|
229
|
+
/// ```
|
|
230
|
+
#[unsafe(no_mangle)]
|
|
231
|
+
pub unsafe extern "C" fn kreuzberg_unregister_validator(name: *const c_char) -> bool {
|
|
232
|
+
ffi_panic_guard_bool!("kreuzberg_unregister_validator", {
|
|
233
|
+
clear_last_error();
|
|
234
|
+
|
|
235
|
+
if name.is_null() {
|
|
236
|
+
set_last_error("Validator name cannot be NULL".to_string());
|
|
237
|
+
return false;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
let name_str = match unsafe { CStr::from_ptr(name) }.to_str() {
|
|
241
|
+
Ok(s) => s,
|
|
242
|
+
Err(e) => {
|
|
243
|
+
set_last_error(format!("Invalid UTF-8 in Validator name: {}", e));
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
let registry = kreuzberg::plugins::registry::get_validator_registry();
|
|
249
|
+
let mut registry_guard = match registry.write() {
|
|
250
|
+
Ok(guard) => guard,
|
|
251
|
+
Err(e) => {
|
|
252
|
+
// ~keep: Lock poisoning indicates a panic in another thread holding the lock.
|
|
253
|
+
set_last_error(format!("Failed to acquire registry write lock: {}", e));
|
|
254
|
+
return false;
|
|
255
|
+
}
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
match registry_guard.remove(name_str) {
|
|
259
|
+
Ok(()) => true,
|
|
260
|
+
Err(e) => {
|
|
261
|
+
set_last_error(format!("Failed to remove Validator: {}", e));
|
|
262
|
+
false
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
})
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/// Clear all registered Validators.
|
|
269
|
+
///
|
|
270
|
+
/// # Safety
|
|
271
|
+
///
|
|
272
|
+
/// - Removes all validators. Subsequent extractions will skip custom validation.
|
|
273
|
+
/// - Returns true on success, false on error.
|
|
274
|
+
#[unsafe(no_mangle)]
|
|
275
|
+
pub unsafe extern "C" fn kreuzberg_clear_validators() -> bool {
|
|
276
|
+
ffi_panic_guard_bool!("kreuzberg_clear_validators", {
|
|
277
|
+
clear_last_error();
|
|
278
|
+
|
|
279
|
+
let registry = kreuzberg::plugins::registry::get_validator_registry();
|
|
280
|
+
let mut registry_guard = match registry.write() {
|
|
281
|
+
Ok(guard) => guard,
|
|
282
|
+
Err(e) => {
|
|
283
|
+
// ~keep: Lock poisoning indicates a panic in another thread holding the lock.
|
|
284
|
+
set_last_error(format!("Failed to acquire registry write lock: {}", e));
|
|
285
|
+
return false;
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
|
|
289
|
+
*registry_guard = Default::default();
|
|
290
|
+
true
|
|
291
|
+
})
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/// List all registered Validators as a JSON array of names.
|
|
295
|
+
///
|
|
296
|
+
/// # Safety
|
|
297
|
+
///
|
|
298
|
+
/// - Returned string must be freed with `kreuzberg_free_string`.
|
|
299
|
+
/// - Returns NULL on error (check `kreuzberg_last_error`).
|
|
300
|
+
#[unsafe(no_mangle)]
|
|
301
|
+
pub unsafe extern "C" fn kreuzberg_list_validators() -> *mut c_char {
|
|
302
|
+
ffi_panic_guard!("kreuzberg_list_validators", {
|
|
303
|
+
clear_last_error();
|
|
304
|
+
|
|
305
|
+
let registry = kreuzberg::plugins::registry::get_validator_registry();
|
|
306
|
+
let registry_guard = match registry.read() {
|
|
307
|
+
Ok(guard) => guard,
|
|
308
|
+
Err(e) => {
|
|
309
|
+
// ~keep: Lock poisoning indicates a panic in another thread holding the lock.
|
|
310
|
+
set_last_error(format!("Failed to acquire registry read lock: {}", e));
|
|
311
|
+
return ptr::null_mut();
|
|
312
|
+
}
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
match serde_json::to_string(®istry_guard.list()) {
|
|
316
|
+
Ok(json) => match CString::new(json) {
|
|
317
|
+
Ok(cstr) => cstr.into_raw(),
|
|
318
|
+
Err(e) => {
|
|
319
|
+
set_last_error(format!("Failed to create C string: {}", e));
|
|
320
|
+
ptr::null_mut()
|
|
321
|
+
}
|
|
322
|
+
},
|
|
323
|
+
Err(e) => {
|
|
324
|
+
set_last_error(format!("Failed to serialize Validator list: {}", e));
|
|
325
|
+
ptr::null_mut()
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
})
|
|
329
|
+
}
|