kreuzberg 4.0.0.pre.rc.13 → 4.0.0.pre.rc.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -14
- data/.rspec +3 -3
- data/.rubocop.yaml +1 -1
- data/.rubocop.yml +538 -538
- data/Gemfile +8 -8
- data/Gemfile.lock +104 -2
- data/README.md +454 -454
- data/Rakefile +33 -25
- data/Steepfile +47 -47
- data/examples/async_patterns.rb +341 -341
- data/ext/kreuzberg_rb/extconf.rb +45 -45
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -2
- data/ext/kreuzberg_rb/native/Cargo.lock +6750 -6941
- data/ext/kreuzberg_rb/native/Cargo.toml +53 -54
- data/ext/kreuzberg_rb/native/README.md +425 -425
- data/ext/kreuzberg_rb/native/build.rs +52 -15
- data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -11
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -14
- data/ext/kreuzberg_rb/native/include/strings.h +20 -20
- data/ext/kreuzberg_rb/native/include/unistd.h +47 -47
- data/ext/kreuzberg_rb/native/src/lib.rs +3158 -3158
- data/extconf.rb +28 -28
- data/kreuzberg.gemspec +214 -214
- data/lib/kreuzberg/api_proxy.rb +142 -142
- data/lib/kreuzberg/cache_api.rb +81 -81
- data/lib/kreuzberg/cli.rb +55 -55
- data/lib/kreuzberg/cli_proxy.rb +127 -127
- data/lib/kreuzberg/config.rb +724 -724
- data/lib/kreuzberg/error_context.rb +80 -80
- data/lib/kreuzberg/errors.rb +118 -118
- data/lib/kreuzberg/extraction_api.rb +340 -340
- data/lib/kreuzberg/mcp_proxy.rb +186 -186
- data/lib/kreuzberg/ocr_backend_protocol.rb +113 -113
- data/lib/kreuzberg/post_processor_protocol.rb +86 -86
- data/lib/kreuzberg/result.rb +279 -279
- data/lib/kreuzberg/setup_lib_path.rb +80 -80
- data/lib/kreuzberg/validator_protocol.rb +89 -89
- data/lib/kreuzberg/version.rb +5 -5
- data/lib/kreuzberg.rb +109 -109
- data/lib/{pdfium.dll → libpdfium.so} +0 -0
- data/sig/kreuzberg/internal.rbs +184 -184
- data/sig/kreuzberg.rbs +546 -546
- data/spec/binding/cache_spec.rb +227 -227
- data/spec/binding/cli_proxy_spec.rb +85 -85
- data/spec/binding/cli_spec.rb +55 -55
- data/spec/binding/config_spec.rb +345 -345
- data/spec/binding/config_validation_spec.rb +283 -283
- data/spec/binding/error_handling_spec.rb +213 -213
- data/spec/binding/errors_spec.rb +66 -66
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -307
- data/spec/binding/plugins/postprocessor_spec.rb +269 -269
- data/spec/binding/plugins/validator_spec.rb +274 -274
- data/spec/fixtures/config.toml +39 -39
- data/spec/fixtures/config.yaml +41 -41
- data/spec/fixtures/invalid_config.toml +4 -4
- data/spec/smoke/package_spec.rb +178 -178
- data/spec/spec_helper.rb +42 -42
- data/vendor/Cargo.toml +2 -2
- data/vendor/kreuzberg/Cargo.toml +5 -5
- data/vendor/kreuzberg/README.md +230 -230
- data/vendor/kreuzberg/benches/otel_overhead.rs +48 -48
- data/vendor/kreuzberg/build.rs +887 -843
- data/vendor/kreuzberg/src/api/error.rs +81 -81
- data/vendor/kreuzberg/src/api/handlers.rs +199 -199
- data/vendor/kreuzberg/src/api/mod.rs +87 -79
- data/vendor/kreuzberg/src/api/server.rs +353 -353
- data/vendor/kreuzberg/src/api/types.rs +170 -170
- data/vendor/kreuzberg/src/cache/mod.rs +1167 -1167
- data/vendor/kreuzberg/src/chunking/mod.rs +1877 -1877
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -220
- data/vendor/kreuzberg/src/core/batch_mode.rs +95 -95
- data/vendor/kreuzberg/src/core/config.rs +1080 -1080
- data/vendor/kreuzberg/src/core/extractor.rs +1156 -1156
- data/vendor/kreuzberg/src/core/io.rs +329 -329
- data/vendor/kreuzberg/src/core/mime.rs +605 -605
- data/vendor/kreuzberg/src/core/mod.rs +47 -47
- data/vendor/kreuzberg/src/core/pipeline.rs +1184 -1184
- data/vendor/kreuzberg/src/embeddings.rs +500 -500
- data/vendor/kreuzberg/src/error.rs +431 -431
- data/vendor/kreuzberg/src/extraction/archive.rs +954 -954
- data/vendor/kreuzberg/src/extraction/docx.rs +398 -398
- data/vendor/kreuzberg/src/extraction/email.rs +854 -854
- data/vendor/kreuzberg/src/extraction/excel.rs +688 -688
- data/vendor/kreuzberg/src/extraction/html.rs +634 -601
- data/vendor/kreuzberg/src/extraction/image.rs +491 -491
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +574 -574
- data/vendor/kreuzberg/src/extraction/markdown.rs +213 -213
- data/vendor/kreuzberg/src/extraction/mod.rs +81 -81
- data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -398
- data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -247
- data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -240
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +130 -130
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -284
- data/vendor/kreuzberg/src/extraction/pptx.rs +3100 -3100
- data/vendor/kreuzberg/src/extraction/structured.rs +490 -490
- data/vendor/kreuzberg/src/extraction/table.rs +328 -328
- data/vendor/kreuzberg/src/extraction/text.rs +269 -269
- data/vendor/kreuzberg/src/extraction/xml.rs +333 -333
- data/vendor/kreuzberg/src/extractors/archive.rs +447 -447
- data/vendor/kreuzberg/src/extractors/bibtex.rs +470 -470
- data/vendor/kreuzberg/src/extractors/docbook.rs +504 -504
- data/vendor/kreuzberg/src/extractors/docx.rs +400 -400
- data/vendor/kreuzberg/src/extractors/email.rs +157 -157
- data/vendor/kreuzberg/src/extractors/epub.rs +708 -708
- data/vendor/kreuzberg/src/extractors/excel.rs +345 -345
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +492 -492
- data/vendor/kreuzberg/src/extractors/html.rs +407 -407
- data/vendor/kreuzberg/src/extractors/image.rs +219 -219
- data/vendor/kreuzberg/src/extractors/jats.rs +1054 -1054
- data/vendor/kreuzberg/src/extractors/jupyter.rs +368 -368
- data/vendor/kreuzberg/src/extractors/latex.rs +653 -653
- data/vendor/kreuzberg/src/extractors/markdown.rs +701 -701
- data/vendor/kreuzberg/src/extractors/mod.rs +429 -429
- data/vendor/kreuzberg/src/extractors/odt.rs +628 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +635 -635
- data/vendor/kreuzberg/src/extractors/orgmode.rs +529 -529
- data/vendor/kreuzberg/src/extractors/pdf.rs +749 -749
- data/vendor/kreuzberg/src/extractors/pptx.rs +267 -267
- data/vendor/kreuzberg/src/extractors/rst.rs +577 -577
- data/vendor/kreuzberg/src/extractors/rtf.rs +809 -809
- data/vendor/kreuzberg/src/extractors/security.rs +484 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +367 -367
- data/vendor/kreuzberg/src/extractors/structured.rs +142 -142
- data/vendor/kreuzberg/src/extractors/text.rs +265 -265
- data/vendor/kreuzberg/src/extractors/typst.rs +651 -651
- data/vendor/kreuzberg/src/extractors/xml.rs +147 -147
- data/vendor/kreuzberg/src/image/dpi.rs +164 -164
- data/vendor/kreuzberg/src/image/mod.rs +6 -6
- data/vendor/kreuzberg/src/image/preprocessing.rs +417 -417
- data/vendor/kreuzberg/src/image/resize.rs +89 -89
- data/vendor/kreuzberg/src/keywords/config.rs +154 -154
- data/vendor/kreuzberg/src/keywords/mod.rs +237 -237
- data/vendor/kreuzberg/src/keywords/processor.rs +275 -275
- data/vendor/kreuzberg/src/keywords/rake.rs +293 -293
- data/vendor/kreuzberg/src/keywords/types.rs +68 -68
- data/vendor/kreuzberg/src/keywords/yake.rs +163 -163
- data/vendor/kreuzberg/src/language_detection/mod.rs +985 -985
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -219
- data/vendor/kreuzberg/src/lib.rs +113 -113
- data/vendor/kreuzberg/src/mcp/mod.rs +35 -35
- data/vendor/kreuzberg/src/mcp/server.rs +2076 -2076
- data/vendor/kreuzberg/src/ocr/cache.rs +469 -469
- data/vendor/kreuzberg/src/ocr/error.rs +37 -37
- data/vendor/kreuzberg/src/ocr/hocr.rs +216 -216
- data/vendor/kreuzberg/src/ocr/mod.rs +58 -58
- data/vendor/kreuzberg/src/ocr/processor.rs +863 -863
- data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -4
- data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -144
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +452 -452
- data/vendor/kreuzberg/src/ocr/types.rs +393 -393
- data/vendor/kreuzberg/src/ocr/utils.rs +47 -47
- data/vendor/kreuzberg/src/ocr/validation.rs +206 -206
- data/vendor/kreuzberg/src/panic_context.rs +154 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +44 -44
- data/vendor/kreuzberg/src/pdf/bundled.rs +452 -346
- data/vendor/kreuzberg/src/pdf/error.rs +130 -130
- data/vendor/kreuzberg/src/pdf/images.rs +139 -139
- data/vendor/kreuzberg/src/pdf/metadata.rs +489 -489
- data/vendor/kreuzberg/src/pdf/mod.rs +68 -68
- data/vendor/kreuzberg/src/pdf/rendering.rs +368 -368
- data/vendor/kreuzberg/src/pdf/table.rs +420 -420
- data/vendor/kreuzberg/src/pdf/text.rs +240 -240
- data/vendor/kreuzberg/src/plugins/extractor.rs +1044 -1044
- data/vendor/kreuzberg/src/plugins/mod.rs +212 -212
- data/vendor/kreuzberg/src/plugins/ocr.rs +639 -639
- data/vendor/kreuzberg/src/plugins/processor.rs +650 -650
- data/vendor/kreuzberg/src/plugins/registry.rs +1339 -1339
- data/vendor/kreuzberg/src/plugins/traits.rs +258 -258
- data/vendor/kreuzberg/src/plugins/validator.rs +967 -967
- data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -1470
- data/vendor/kreuzberg/src/text/mod.rs +25 -25
- data/vendor/kreuzberg/src/text/quality.rs +697 -697
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -219
- data/vendor/kreuzberg/src/text/string_utils.rs +217 -217
- data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -164
- data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -100
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +796 -796
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +902 -902
- data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -160
- data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -619
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +147 -147
- data/vendor/kreuzberg/src/types.rs +1055 -1055
- data/vendor/kreuzberg/src/utils/mod.rs +17 -17
- data/vendor/kreuzberg/src/utils/quality.rs +959 -959
- data/vendor/kreuzberg/src/utils/string_utils.rs +381 -381
- data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -53
- data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -482
- data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -261
- data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -400
- data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -1205
- data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -280
- data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -425
- data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -172
- data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -622
- data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -1300
- data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -175
- data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -734
- data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -37
- data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -100
- data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -801
- data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -693
- data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -111
- data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -162
- data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -226
- data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -41
- data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -196
- data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -227
- data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -181
- data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -791
- data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -47
- data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -760
- data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -634
- data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -136
- data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -84
- data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -681
- data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -64
- data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -51
- data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -476
- data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -163
- data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -1
- data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -101
- data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -477
- data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -490
- data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -415
- data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -223
- data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -331
- data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -562
- data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -436
- data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -561
- data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -193
- data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -448
- data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -32
- data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -33
- data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -76
- data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -129
- data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -54
- data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -118
- data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -149
- data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -506
- data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -75
- data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -519
- data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -647
- data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -62
- data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -796
- data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -31
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +52 -52
- data/vendor/kreuzberg/tests/api_tests.rs +966 -966
- data/vendor/kreuzberg/tests/archive_integration.rs +545 -545
- data/vendor/kreuzberg/tests/batch_orchestration.rs +556 -556
- data/vendor/kreuzberg/tests/batch_processing.rs +318 -318
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +421 -421
- data/vendor/kreuzberg/tests/concurrency_stress.rs +533 -533
- data/vendor/kreuzberg/tests/config_features.rs +612 -612
- data/vendor/kreuzberg/tests/config_loading_tests.rs +416 -416
- data/vendor/kreuzberg/tests/core_integration.rs +510 -510
- data/vendor/kreuzberg/tests/csv_integration.rs +414 -414
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +500 -500
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -122
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -370
- data/vendor/kreuzberg/tests/email_integration.rs +327 -327
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +275 -275
- data/vendor/kreuzberg/tests/error_handling.rs +402 -402
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -228
- data/vendor/kreuzberg/tests/format_integration.rs +165 -164
- data/vendor/kreuzberg/tests/helpers/mod.rs +142 -142
- data/vendor/kreuzberg/tests/html_table_test.rs +551 -551
- data/vendor/kreuzberg/tests/image_integration.rs +255 -255
- data/vendor/kreuzberg/tests/instrumentation_test.rs +139 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +639 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +704 -704
- data/vendor/kreuzberg/tests/keywords_integration.rs +479 -479
- data/vendor/kreuzberg/tests/keywords_quality.rs +509 -509
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +496 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +490 -490
- data/vendor/kreuzberg/tests/mime_detection.rs +429 -429
- data/vendor/kreuzberg/tests/ocr_configuration.rs +514 -514
- data/vendor/kreuzberg/tests/ocr_errors.rs +698 -698
- data/vendor/kreuzberg/tests/ocr_quality.rs +629 -629
- data/vendor/kreuzberg/tests/ocr_stress.rs +469 -469
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +674 -674
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +616 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +822 -822
- data/vendor/kreuzberg/tests/pdf_integration.rs +45 -45
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -374
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1436 -1436
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -776
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +560 -560
- data/vendor/kreuzberg/tests/plugin_system.rs +927 -927
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -783
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +587 -587
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +694 -694
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +775 -775
- data/vendor/kreuzberg/tests/security_validation.rs +416 -416
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -888
- data/vendor/kreuzberg/tests/test_fastembed.rs +631 -631
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1260 -1260
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +648 -648
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -87
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -13
- data/vendor/kreuzberg-tesseract/.crate-ignore +2 -2
- data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -2933
- data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
- data/vendor/kreuzberg-tesseract/LICENSE +22 -22
- data/vendor/kreuzberg-tesseract/README.md +399 -399
- data/vendor/kreuzberg-tesseract/build.rs +1354 -1354
- data/vendor/kreuzberg-tesseract/patches/README.md +71 -71
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -199
- data/vendor/kreuzberg-tesseract/src/api.rs +1371 -1371
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -77
- data/vendor/kreuzberg-tesseract/src/enums.rs +297 -297
- data/vendor/kreuzberg-tesseract/src/error.rs +81 -81
- data/vendor/kreuzberg-tesseract/src/lib.rs +145 -145
- data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -57
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -197
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -253
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -286
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -183
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -211
- data/vendor/rb-sys/.cargo_vcs_info.json +5 -5
- data/vendor/rb-sys/Cargo.lock +393 -393
- data/vendor/rb-sys/Cargo.toml +70 -70
- data/vendor/rb-sys/Cargo.toml.orig +57 -57
- data/vendor/rb-sys/LICENSE-APACHE +190 -190
- data/vendor/rb-sys/LICENSE-MIT +21 -21
- data/vendor/rb-sys/build/features.rs +111 -111
- data/vendor/rb-sys/build/main.rs +286 -286
- data/vendor/rb-sys/build/stable_api_config.rs +155 -155
- data/vendor/rb-sys/build/version.rs +50 -50
- data/vendor/rb-sys/readme.md +36 -36
- data/vendor/rb-sys/src/bindings.rs +21 -21
- data/vendor/rb-sys/src/hidden.rs +11 -11
- data/vendor/rb-sys/src/lib.rs +35 -35
- data/vendor/rb-sys/src/macros.rs +371 -371
- data/vendor/rb-sys/src/memory.rs +53 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +38 -38
- data/vendor/rb-sys/src/special_consts.rs +31 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +179 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +257 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +324 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +332 -332
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +325 -325
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +323 -323
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +339 -339
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +339 -339
- data/vendor/rb-sys/src/stable_api.rs +260 -260
- data/vendor/rb-sys/src/symbol.rs +31 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +330 -330
- data/vendor/rb-sys/src/utils.rs +89 -89
- data/vendor/rb-sys/src/value_type.rs +7 -7
- metadata +81 -22
- data/vendor/kreuzberg-ffi/Cargo.toml +0 -63
- data/vendor/kreuzberg-ffi/README.md +0 -851
- data/vendor/kreuzberg-ffi/build.rs +0 -176
- data/vendor/kreuzberg-ffi/cbindgen.toml +0 -27
- data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +0 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +0 -1087
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -3616
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +0 -247
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +0 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +0 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +0 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +0 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +0 -470
data/sig/kreuzberg/internal.rbs
CHANGED
|
@@ -1,184 +1,184 @@
|
|
|
1
|
-
# Internal modules - not part of public API but fully typed
|
|
2
|
-
|
|
3
|
-
module Kreuzberg
|
|
4
|
-
module SetupLibPath
|
|
5
|
-
# Public method
|
|
6
|
-
def configure: () -> void
|
|
7
|
-
def self.configure: () -> void
|
|
8
|
-
|
|
9
|
-
# Private methods (module_function makes them both instance and class methods)
|
|
10
|
-
def prepend_env: (String key, String value, ?separator: String) -> void
|
|
11
|
-
def self.prepend_env: (String key, String value, ?separator: String) -> void
|
|
12
|
-
def fix_macos_install_name: (String lib_dir) -> void
|
|
13
|
-
def self.fix_macos_install_name: (String lib_dir) -> void
|
|
14
|
-
def macos_bundle: (String lib_dir) -> String?
|
|
15
|
-
def self.macos_bundle: (String lib_dir) -> String?
|
|
16
|
-
def ensure_install_name: (String bundle) -> void
|
|
17
|
-
def self.ensure_install_name: (String bundle) -> void
|
|
18
|
-
def ensure_loader_rpath: (String bundle) -> void
|
|
19
|
-
def self.ensure_loader_rpath: (String bundle) -> void
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
module CLI
|
|
23
|
-
# All methods are both instance and class methods due to module_function
|
|
24
|
-
def extract: (String path, ?output: String, ?ocr: bool) -> String
|
|
25
|
-
def self.extract: (String path, ?output: String, ?ocr: bool) -> String
|
|
26
|
-
def detect: (String path) -> String
|
|
27
|
-
def self.detect: (String path) -> String
|
|
28
|
-
def version: () -> String
|
|
29
|
-
def self.version: () -> String
|
|
30
|
-
def help: () -> String
|
|
31
|
-
def self.help: () -> String
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
module CLIProxy
|
|
35
|
-
class Error < Kreuzberg::Errors::Error
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
class MissingBinaryError < Error
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
class CLIExecutionError < Error
|
|
42
|
-
attr_reader stderr: String
|
|
43
|
-
attr_reader status: Integer
|
|
44
|
-
|
|
45
|
-
def initialize: (String message, stderr: String, status: Integer) -> void
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# All methods are both instance and class methods due to module_function
|
|
49
|
-
def call: (Array[String] argv) -> String
|
|
50
|
-
def self.call: (Array[String] argv) -> String
|
|
51
|
-
def find_cli_binary: () -> Pathname
|
|
52
|
-
def self.find_cli_binary: () -> Pathname
|
|
53
|
-
def root_path: () -> Pathname
|
|
54
|
-
def self.root_path: () -> Pathname
|
|
55
|
-
def lib_path: () -> Pathname
|
|
56
|
-
def self.lib_path: () -> Pathname
|
|
57
|
-
def search_paths: (String binary_name) -> Array[Pathname]
|
|
58
|
-
def self.search_paths: (String binary_name) -> Array[Pathname]
|
|
59
|
-
def missing_binary_message: () -> String
|
|
60
|
-
def self.missing_binary_message: () -> String
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
module APIProxy
|
|
64
|
-
class Error < Kreuzberg::Errors::Error
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
class MissingBinaryError < Error
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
class ServerError < Error
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
class Server
|
|
74
|
-
attr_reader port: Integer
|
|
75
|
-
attr_reader host: String
|
|
76
|
-
attr_reader process: Process::Status?
|
|
77
|
-
|
|
78
|
-
def initialize: (?port: Integer, ?host: String) -> void
|
|
79
|
-
def start: () -> void
|
|
80
|
-
def stop: () -> void
|
|
81
|
-
def running?: () -> bool
|
|
82
|
-
def health_check: () -> bool
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Module function methods are both instance and class methods
|
|
86
|
-
def run: (?port: Integer, ?host: String) { (Server) -> untyped } -> untyped
|
|
87
|
-
def self.run: (?port: Integer, ?host: String) { (Server) -> untyped } -> untyped
|
|
88
|
-
def start_server: (?port: Integer?) -> Server
|
|
89
|
-
def self.start_server: (?port: Integer?) -> Server
|
|
90
|
-
def extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Kreuzberg::Result
|
|
91
|
-
def self.extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Kreuzberg::Result
|
|
92
|
-
def batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Array[Kreuzberg::Result]
|
|
93
|
-
def self.batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Array[Kreuzberg::Result]
|
|
94
|
-
def find_api_binary: () -> Pathname
|
|
95
|
-
def self.find_api_binary: () -> Pathname
|
|
96
|
-
def missing_binary_message: () -> String
|
|
97
|
-
def self.missing_binary_message: () -> String
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
module MCPProxy
|
|
101
|
-
class Error < Kreuzberg::Errors::Error
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
class MissingBinaryError < Error
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
class ServerError < Error
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
class Server
|
|
111
|
-
attr_reader pid: Integer?
|
|
112
|
-
attr_reader transport: String
|
|
113
|
-
|
|
114
|
-
def initialize: (?transport: String) -> void
|
|
115
|
-
def start: () -> (Integer | nil)
|
|
116
|
-
def stop: () -> void
|
|
117
|
-
def running?: () -> bool
|
|
118
|
-
def send_message: (Hash[untyped, untyped] message) -> void
|
|
119
|
-
def read_message: () -> (Hash[untyped, untyped] | nil)
|
|
120
|
-
|
|
121
|
-
private
|
|
122
|
-
|
|
123
|
-
def start_stdio: (Pathname binary) -> nil
|
|
124
|
-
def start_sse: (Pathname binary) -> (Integer | nil)
|
|
125
|
-
def close_pipes: () -> void
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
# Module function methods are both instance and class methods
|
|
129
|
-
def run: (?transport: String) { (Server) -> untyped } -> untyped
|
|
130
|
-
def self.run: (?transport: String) { (Server) -> untyped } -> untyped
|
|
131
|
-
def extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?) -> Kreuzberg::Result
|
|
132
|
-
def self.extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?) -> Kreuzberg::Result
|
|
133
|
-
def batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?) -> Array[Kreuzberg::Result]
|
|
134
|
-
def self.batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?) -> Array[Kreuzberg::Result]
|
|
135
|
-
def find_mcp_binary: () -> Pathname
|
|
136
|
-
def self.find_mcp_binary: () -> Pathname
|
|
137
|
-
def missing_binary_message: () -> String
|
|
138
|
-
def self.missing_binary_message: () -> String
|
|
139
|
-
end
|
|
140
|
-
|
|
141
|
-
# Cache API module (prepended to Kreuzberg singleton class)
|
|
142
|
-
module CacheAPI
|
|
143
|
-
@__cache_tracker: Hash[Symbol, Integer]
|
|
144
|
-
|
|
145
|
-
def clear_cache: () -> void
|
|
146
|
-
def cache_stats: () -> Hash[Symbol | String, Integer]
|
|
147
|
-
|
|
148
|
-
private
|
|
149
|
-
|
|
150
|
-
def record_cache_entry!: (Kreuzberg::Result | Array[Kreuzberg::Result] results, Hash[Symbol, untyped] opts) -> void
|
|
151
|
-
def reset_cache_tracker!: () -> nil
|
|
152
|
-
|
|
153
|
-
# Native methods (defined in Rust)
|
|
154
|
-
def native_clear_cache: () -> void
|
|
155
|
-
def native_cache_stats: () -> Hash[Symbol | String, Integer]
|
|
156
|
-
end
|
|
157
|
-
|
|
158
|
-
# Extraction API module (prepended to Kreuzberg singleton class)
|
|
159
|
-
module ExtractionAPI
|
|
160
|
-
def extract_file_sync: (String | Pathname path, ?mime_type: String?, ?config: config_input?) -> Result
|
|
161
|
-
def extract_bytes_sync: (String data, String mime_type, ?config: config_input?) -> Result
|
|
162
|
-
def batch_extract_files_sync: (Array[String | Pathname] paths, ?config: config_input?) -> Array[Result]
|
|
163
|
-
def extract_file: (String | Pathname path, ?mime_type: String?, ?config: config_input?) -> Result
|
|
164
|
-
def extract_bytes: (String data, String mime_type, ?config: config_input?) -> Result
|
|
165
|
-
def batch_extract_files: (Array[String | Pathname] paths, ?config: config_input?) -> Array[Result]
|
|
166
|
-
def batch_extract_bytes_sync: (Array[String] data_array, Array[String] mime_types, ?config: config_input?) -> Array[Result]
|
|
167
|
-
def batch_extract_bytes: (Array[String] data_array, Array[String] mime_types, ?config: config_input?) -> Array[Result]
|
|
168
|
-
|
|
169
|
-
def normalize_config: (config_input? config) -> Hash[Symbol, untyped]
|
|
170
|
-
|
|
171
|
-
# Native methods (defined in Rust)
|
|
172
|
-
def native_extract_file_sync: (String path, ?String? mime_type, **untyped opts) -> extraction_result_hash
|
|
173
|
-
def native_extract_bytes_sync: (String data, String mime_type, **untyped opts) -> extraction_result_hash
|
|
174
|
-
def native_batch_extract_files_sync: (Array[String] paths, **untyped opts) -> Array[extraction_result_hash]
|
|
175
|
-
def native_extract_file: (String path, ?String? mime_type, **untyped opts) -> extraction_result_hash
|
|
176
|
-
def native_extract_bytes: (String data, String mime_type, **untyped opts) -> extraction_result_hash
|
|
177
|
-
def native_batch_extract_files: (Array[String] paths, **untyped opts) -> Array[extraction_result_hash]
|
|
178
|
-
def native_batch_extract_bytes_sync: (Array[String] data_array, Array[String] mime_types, **untyped opts) -> Array[extraction_result_hash]
|
|
179
|
-
def native_batch_extract_bytes: (Array[String] data_array, Array[String] mime_types, **untyped opts) -> Array[extraction_result_hash]
|
|
180
|
-
|
|
181
|
-
# Cache API methods (from prepended CacheAPI module)
|
|
182
|
-
def record_cache_entry!: (Kreuzberg::Result | Array[Kreuzberg::Result] results, Hash[Symbol, untyped] opts) -> void
|
|
183
|
-
end
|
|
184
|
-
end
|
|
1
|
+
# Internal modules - not part of public API but fully typed
|
|
2
|
+
|
|
3
|
+
module Kreuzberg
|
|
4
|
+
module SetupLibPath
|
|
5
|
+
# Public method
|
|
6
|
+
def configure: () -> void
|
|
7
|
+
def self.configure: () -> void
|
|
8
|
+
|
|
9
|
+
# Private methods (module_function makes them both instance and class methods)
|
|
10
|
+
def prepend_env: (String key, String value, ?separator: String) -> void
|
|
11
|
+
def self.prepend_env: (String key, String value, ?separator: String) -> void
|
|
12
|
+
def fix_macos_install_name: (String lib_dir) -> void
|
|
13
|
+
def self.fix_macos_install_name: (String lib_dir) -> void
|
|
14
|
+
def macos_bundle: (String lib_dir) -> String?
|
|
15
|
+
def self.macos_bundle: (String lib_dir) -> String?
|
|
16
|
+
def ensure_install_name: (String bundle) -> void
|
|
17
|
+
def self.ensure_install_name: (String bundle) -> void
|
|
18
|
+
def ensure_loader_rpath: (String bundle) -> void
|
|
19
|
+
def self.ensure_loader_rpath: (String bundle) -> void
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
module CLI
|
|
23
|
+
# All methods are both instance and class methods due to module_function
|
|
24
|
+
def extract: (String path, ?output: String, ?ocr: bool) -> String
|
|
25
|
+
def self.extract: (String path, ?output: String, ?ocr: bool) -> String
|
|
26
|
+
def detect: (String path) -> String
|
|
27
|
+
def self.detect: (String path) -> String
|
|
28
|
+
def version: () -> String
|
|
29
|
+
def self.version: () -> String
|
|
30
|
+
def help: () -> String
|
|
31
|
+
def self.help: () -> String
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
module CLIProxy
|
|
35
|
+
class Error < Kreuzberg::Errors::Error
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
class MissingBinaryError < Error
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
class CLIExecutionError < Error
|
|
42
|
+
attr_reader stderr: String
|
|
43
|
+
attr_reader status: Integer
|
|
44
|
+
|
|
45
|
+
def initialize: (String message, stderr: String, status: Integer) -> void
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# All methods are both instance and class methods due to module_function
|
|
49
|
+
def call: (Array[String] argv) -> String
|
|
50
|
+
def self.call: (Array[String] argv) -> String
|
|
51
|
+
def find_cli_binary: () -> Pathname
|
|
52
|
+
def self.find_cli_binary: () -> Pathname
|
|
53
|
+
def root_path: () -> Pathname
|
|
54
|
+
def self.root_path: () -> Pathname
|
|
55
|
+
def lib_path: () -> Pathname
|
|
56
|
+
def self.lib_path: () -> Pathname
|
|
57
|
+
def search_paths: (String binary_name) -> Array[Pathname]
|
|
58
|
+
def self.search_paths: (String binary_name) -> Array[Pathname]
|
|
59
|
+
def missing_binary_message: () -> String
|
|
60
|
+
def self.missing_binary_message: () -> String
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
module APIProxy
|
|
64
|
+
class Error < Kreuzberg::Errors::Error
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
class MissingBinaryError < Error
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
class ServerError < Error
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
class Server
|
|
74
|
+
attr_reader port: Integer
|
|
75
|
+
attr_reader host: String
|
|
76
|
+
attr_reader process: Process::Status?
|
|
77
|
+
|
|
78
|
+
def initialize: (?port: Integer, ?host: String) -> void
|
|
79
|
+
def start: () -> void
|
|
80
|
+
def stop: () -> void
|
|
81
|
+
def running?: () -> bool
|
|
82
|
+
def health_check: () -> bool
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Module function methods are both instance and class methods
|
|
86
|
+
def run: (?port: Integer, ?host: String) { (Server) -> untyped } -> untyped
|
|
87
|
+
def self.run: (?port: Integer, ?host: String) { (Server) -> untyped } -> untyped
|
|
88
|
+
def start_server: (?port: Integer?) -> Server
|
|
89
|
+
def self.start_server: (?port: Integer?) -> Server
|
|
90
|
+
def extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Kreuzberg::Result
|
|
91
|
+
def self.extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Kreuzberg::Result
|
|
92
|
+
def batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Array[Kreuzberg::Result]
|
|
93
|
+
def self.batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?, ?server: Server?) -> Array[Kreuzberg::Result]
|
|
94
|
+
def find_api_binary: () -> Pathname
|
|
95
|
+
def self.find_api_binary: () -> Pathname
|
|
96
|
+
def missing_binary_message: () -> String
|
|
97
|
+
def self.missing_binary_message: () -> String
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
module MCPProxy
|
|
101
|
+
class Error < Kreuzberg::Errors::Error
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
class MissingBinaryError < Error
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
class ServerError < Error
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class Server
|
|
111
|
+
attr_reader pid: Integer?
|
|
112
|
+
attr_reader transport: String
|
|
113
|
+
|
|
114
|
+
def initialize: (?transport: String) -> void
|
|
115
|
+
def start: () -> (Integer | nil)
|
|
116
|
+
def stop: () -> void
|
|
117
|
+
def running?: () -> bool
|
|
118
|
+
def send_message: (Hash[untyped, untyped] message) -> void
|
|
119
|
+
def read_message: () -> (Hash[untyped, untyped] | nil)
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
def start_stdio: (Pathname binary) -> nil
|
|
124
|
+
def start_sse: (Pathname binary) -> (Integer | nil)
|
|
125
|
+
def close_pipes: () -> void
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Module function methods are both instance and class methods
|
|
129
|
+
def run: (?transport: String) { (Server) -> untyped } -> untyped
|
|
130
|
+
def self.run: (?transport: String) { (Server) -> untyped } -> untyped
|
|
131
|
+
def extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?) -> Kreuzberg::Result
|
|
132
|
+
def self.extract_file: (String path, ?mime_type: String?, ?config: Hash[Symbol, untyped]?) -> Kreuzberg::Result
|
|
133
|
+
def batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?) -> Array[Kreuzberg::Result]
|
|
134
|
+
def self.batch_extract: (Array[String] paths, ?config: Hash[Symbol, untyped]?) -> Array[Kreuzberg::Result]
|
|
135
|
+
def find_mcp_binary: () -> Pathname
|
|
136
|
+
def self.find_mcp_binary: () -> Pathname
|
|
137
|
+
def missing_binary_message: () -> String
|
|
138
|
+
def self.missing_binary_message: () -> String
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Cache API module (prepended to Kreuzberg singleton class)
|
|
142
|
+
module CacheAPI
|
|
143
|
+
@__cache_tracker: Hash[Symbol, Integer]
|
|
144
|
+
|
|
145
|
+
def clear_cache: () -> void
|
|
146
|
+
def cache_stats: () -> Hash[Symbol | String, Integer]
|
|
147
|
+
|
|
148
|
+
private
|
|
149
|
+
|
|
150
|
+
def record_cache_entry!: (Kreuzberg::Result | Array[Kreuzberg::Result] results, Hash[Symbol, untyped] opts) -> void
|
|
151
|
+
def reset_cache_tracker!: () -> nil
|
|
152
|
+
|
|
153
|
+
# Native methods (defined in Rust)
|
|
154
|
+
def native_clear_cache: () -> void
|
|
155
|
+
def native_cache_stats: () -> Hash[Symbol | String, Integer]
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Extraction API module (prepended to Kreuzberg singleton class)
|
|
159
|
+
module ExtractionAPI
|
|
160
|
+
def extract_file_sync: (String | Pathname path, ?mime_type: String?, ?config: config_input?) -> Result
|
|
161
|
+
def extract_bytes_sync: (String data, String mime_type, ?config: config_input?) -> Result
|
|
162
|
+
def batch_extract_files_sync: (Array[String | Pathname] paths, ?config: config_input?) -> Array[Result]
|
|
163
|
+
def extract_file: (String | Pathname path, ?mime_type: String?, ?config: config_input?) -> Result
|
|
164
|
+
def extract_bytes: (String data, String mime_type, ?config: config_input?) -> Result
|
|
165
|
+
def batch_extract_files: (Array[String | Pathname] paths, ?config: config_input?) -> Array[Result]
|
|
166
|
+
def batch_extract_bytes_sync: (Array[String] data_array, Array[String] mime_types, ?config: config_input?) -> Array[Result]
|
|
167
|
+
def batch_extract_bytes: (Array[String] data_array, Array[String] mime_types, ?config: config_input?) -> Array[Result]
|
|
168
|
+
|
|
169
|
+
def normalize_config: (config_input? config) -> Hash[Symbol, untyped]
|
|
170
|
+
|
|
171
|
+
# Native methods (defined in Rust)
|
|
172
|
+
def native_extract_file_sync: (String path, ?String? mime_type, **untyped opts) -> extraction_result_hash
|
|
173
|
+
def native_extract_bytes_sync: (String data, String mime_type, **untyped opts) -> extraction_result_hash
|
|
174
|
+
def native_batch_extract_files_sync: (Array[String] paths, **untyped opts) -> Array[extraction_result_hash]
|
|
175
|
+
def native_extract_file: (String path, ?String? mime_type, **untyped opts) -> extraction_result_hash
|
|
176
|
+
def native_extract_bytes: (String data, String mime_type, **untyped opts) -> extraction_result_hash
|
|
177
|
+
def native_batch_extract_files: (Array[String] paths, **untyped opts) -> Array[extraction_result_hash]
|
|
178
|
+
def native_batch_extract_bytes_sync: (Array[String] data_array, Array[String] mime_types, **untyped opts) -> Array[extraction_result_hash]
|
|
179
|
+
def native_batch_extract_bytes: (Array[String] data_array, Array[String] mime_types, **untyped opts) -> Array[extraction_result_hash]
|
|
180
|
+
|
|
181
|
+
# Cache API methods (from prepended CacheAPI module)
|
|
182
|
+
def record_cache_entry!: (Kreuzberg::Result | Array[Kreuzberg::Result] results, Hash[Symbol, untyped] opts) -> void
|
|
183
|
+
end
|
|
184
|
+
end
|