kreuzberg 4.0.0.rc2 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -14
- data/.rspec +3 -3
- data/.rubocop.yaml +1 -1
- data/.rubocop.yml +543 -538
- data/Gemfile +8 -8
- data/Gemfile.lock +194 -6
- data/README.md +391 -426
- data/Rakefile +34 -25
- data/Steepfile +51 -47
- data/examples/async_patterns.rb +283 -341
- data/ext/kreuzberg_rb/extconf.rb +65 -45
- data/ext/kreuzberg_rb/native/.cargo/config.toml +23 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +7619 -6535
- data/ext/kreuzberg_rb/native/Cargo.toml +75 -44
- data/ext/kreuzberg_rb/native/README.md +425 -425
- data/ext/kreuzberg_rb/native/build.rs +15 -15
- data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -11
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -14
- data/ext/kreuzberg_rb/native/include/strings.h +20 -20
- data/ext/kreuzberg_rb/native/include/unistd.h +47 -47
- data/ext/kreuzberg_rb/native/src/lib.rs +3802 -2998
- data/extconf.rb +60 -28
- data/kreuzberg.gemspec +199 -148
- data/lib/kreuzberg/api_proxy.rb +126 -142
- data/lib/kreuzberg/cache_api.rb +67 -46
- data/lib/kreuzberg/cli.rb +47 -55
- data/lib/kreuzberg/cli_proxy.rb +117 -127
- data/lib/kreuzberg/config.rb +936 -691
- data/lib/kreuzberg/error_context.rb +136 -32
- data/lib/kreuzberg/errors.rb +116 -118
- data/lib/kreuzberg/extraction_api.rb +313 -85
- data/lib/kreuzberg/mcp_proxy.rb +177 -186
- data/lib/kreuzberg/ocr_backend_protocol.rb +40 -113
- data/lib/kreuzberg/post_processor_protocol.rb +15 -86
- data/lib/kreuzberg/result.rb +334 -216
- data/lib/kreuzberg/setup_lib_path.rb +99 -80
- data/lib/kreuzberg/types.rb +170 -0
- data/lib/kreuzberg/validator_protocol.rb +16 -89
- data/lib/kreuzberg/version.rb +5 -5
- data/lib/kreuzberg.rb +96 -103
- data/lib/libpdfium.so +0 -0
- data/sig/kreuzberg/internal.rbs +184 -184
- data/sig/kreuzberg.rbs +561 -520
- data/spec/binding/async_operations_spec.rb +473 -0
- data/spec/binding/batch_operations_spec.rb +595 -0
- data/spec/binding/batch_spec.rb +359 -0
- data/spec/binding/cache_spec.rb +227 -227
- data/spec/binding/cli_proxy_spec.rb +85 -85
- data/spec/binding/cli_spec.rb +55 -55
- data/spec/binding/config_result_spec.rb +377 -0
- data/spec/binding/config_spec.rb +419 -345
- data/spec/binding/config_validation_spec.rb +377 -283
- data/spec/binding/embeddings_spec.rb +816 -0
- data/spec/binding/error_handling_spec.rb +399 -213
- data/spec/binding/error_recovery_spec.rb +488 -0
- data/spec/binding/errors_spec.rb +66 -66
- data/spec/binding/font_config_spec.rb +220 -0
- data/spec/binding/images_spec.rb +738 -0
- data/spec/binding/keywords_extraction_spec.rb +600 -0
- data/spec/binding/metadata_types_spec.rb +1228 -0
- data/spec/binding/pages_extraction_spec.rb +471 -0
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -307
- data/spec/binding/plugins/postprocessor_spec.rb +269 -269
- data/spec/binding/plugins/validator_spec.rb +273 -274
- data/spec/binding/tables_spec.rb +641 -0
- data/spec/fixtures/config.toml +38 -39
- data/spec/fixtures/config.yaml +41 -41
- data/spec/fixtures/invalid_config.toml +3 -4
- data/spec/smoke/package_spec.rb +177 -178
- data/spec/spec_helper.rb +40 -42
- data/spec/unit/config/chunking_config_spec.rb +213 -0
- data/spec/unit/config/embedding_config_spec.rb +343 -0
- data/spec/unit/config/extraction_config_spec.rb +438 -0
- data/spec/unit/config/font_config_spec.rb +285 -0
- data/spec/unit/config/hierarchy_config_spec.rb +314 -0
- data/spec/unit/config/image_extraction_config_spec.rb +209 -0
- data/spec/unit/config/image_preprocessing_config_spec.rb +249 -0
- data/spec/unit/config/keyword_config_spec.rb +229 -0
- data/spec/unit/config/language_detection_config_spec.rb +258 -0
- data/spec/unit/config/ocr_config_spec.rb +171 -0
- data/spec/unit/config/page_config_spec.rb +221 -0
- data/spec/unit/config/pdf_config_spec.rb +267 -0
- data/spec/unit/config/postprocessor_config_spec.rb +290 -0
- data/spec/unit/config/tesseract_config_spec.rb +181 -0
- data/spec/unit/config/token_reduction_config_spec.rb +251 -0
- data/test/metadata_types_test.rb +959 -0
- data/vendor/Cargo.toml +61 -0
- data/vendor/kreuzberg/Cargo.toml +259 -204
- data/vendor/kreuzberg/README.md +263 -175
- data/vendor/kreuzberg/build.rs +782 -474
- data/vendor/kreuzberg/examples/bench_fixes.rs +71 -0
- data/vendor/kreuzberg/examples/test_pdfium_fork.rs +62 -0
- data/vendor/kreuzberg/src/api/error.rs +81 -81
- data/vendor/kreuzberg/src/api/handlers.rs +320 -199
- data/vendor/kreuzberg/src/api/mod.rs +94 -79
- data/vendor/kreuzberg/src/api/server.rs +518 -353
- data/vendor/kreuzberg/src/api/types.rs +206 -170
- data/vendor/kreuzberg/src/cache/mod.rs +1167 -1167
- data/vendor/kreuzberg/src/chunking/mod.rs +2303 -677
- data/vendor/kreuzberg/src/chunking/processor.rs +219 -0
- data/vendor/kreuzberg/src/core/batch_mode.rs +95 -95
- data/vendor/kreuzberg/src/core/batch_optimizations.rs +385 -0
- data/vendor/kreuzberg/src/core/config.rs +1914 -1032
- data/vendor/kreuzberg/src/core/config_validation.rs +949 -0
- data/vendor/kreuzberg/src/core/extractor.rs +1200 -1024
- data/vendor/kreuzberg/src/core/formats.rs +235 -0
- data/vendor/kreuzberg/src/core/io.rs +329 -329
- data/vendor/kreuzberg/src/core/mime.rs +605 -605
- data/vendor/kreuzberg/src/core/mod.rs +61 -45
- data/vendor/kreuzberg/src/core/pipeline.rs +1223 -984
- data/vendor/kreuzberg/src/core/server_config.rs +1220 -0
- data/vendor/kreuzberg/src/embeddings.rs +471 -432
- data/vendor/kreuzberg/src/error.rs +431 -431
- data/vendor/kreuzberg/src/extraction/archive.rs +959 -954
- data/vendor/kreuzberg/src/extraction/capacity.rs +263 -0
- data/vendor/kreuzberg/src/extraction/docx.rs +404 -40
- data/vendor/kreuzberg/src/extraction/email.rs +855 -854
- data/vendor/kreuzberg/src/extraction/excel.rs +697 -688
- data/vendor/kreuzberg/src/extraction/html.rs +1830 -553
- data/vendor/kreuzberg/src/extraction/image.rs +492 -368
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +574 -563
- data/vendor/kreuzberg/src/extraction/markdown.rs +216 -213
- data/vendor/kreuzberg/src/extraction/mod.rs +93 -81
- data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -398
- data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -247
- data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -240
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +130 -130
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -287
- data/vendor/kreuzberg/src/extraction/pptx.rs +3102 -3000
- data/vendor/kreuzberg/src/extraction/structured.rs +491 -490
- data/vendor/kreuzberg/src/extraction/table.rs +329 -328
- data/vendor/kreuzberg/src/extraction/text.rs +277 -269
- data/vendor/kreuzberg/src/extraction/xml.rs +333 -333
- data/vendor/kreuzberg/src/extractors/archive.rs +447 -446
- data/vendor/kreuzberg/src/extractors/bibtex.rs +470 -469
- data/vendor/kreuzberg/src/extractors/docbook.rs +504 -502
- data/vendor/kreuzberg/src/extractors/docx.rs +400 -367
- data/vendor/kreuzberg/src/extractors/email.rs +157 -143
- data/vendor/kreuzberg/src/extractors/epub.rs +696 -707
- data/vendor/kreuzberg/src/extractors/excel.rs +385 -343
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +492 -491
- data/vendor/kreuzberg/src/extractors/html.rs +419 -393
- data/vendor/kreuzberg/src/extractors/image.rs +219 -198
- data/vendor/kreuzberg/src/extractors/jats.rs +1054 -1051
- data/vendor/kreuzberg/src/extractors/jupyter.rs +368 -367
- data/vendor/kreuzberg/src/extractors/latex.rs +653 -652
- data/vendor/kreuzberg/src/extractors/markdown.rs +701 -700
- data/vendor/kreuzberg/src/extractors/mod.rs +429 -365
- data/vendor/kreuzberg/src/extractors/odt.rs +628 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +635 -634
- data/vendor/kreuzberg/src/extractors/orgmode.rs +529 -528
- data/vendor/kreuzberg/src/extractors/pdf.rs +761 -493
- data/vendor/kreuzberg/src/extractors/pptx.rs +279 -248
- data/vendor/kreuzberg/src/extractors/rst.rs +577 -576
- data/vendor/kreuzberg/src/extractors/rtf.rs +809 -810
- data/vendor/kreuzberg/src/extractors/security.rs +484 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +367 -367
- data/vendor/kreuzberg/src/extractors/structured.rs +142 -140
- data/vendor/kreuzberg/src/extractors/text.rs +265 -260
- data/vendor/kreuzberg/src/extractors/typst.rs +651 -650
- data/vendor/kreuzberg/src/extractors/xml.rs +147 -135
- data/vendor/kreuzberg/src/image/dpi.rs +164 -164
- data/vendor/kreuzberg/src/image/mod.rs +6 -6
- data/vendor/kreuzberg/src/image/preprocessing.rs +417 -417
- data/vendor/kreuzberg/src/image/resize.rs +89 -89
- data/vendor/kreuzberg/src/keywords/config.rs +154 -154
- data/vendor/kreuzberg/src/keywords/mod.rs +237 -237
- data/vendor/kreuzberg/src/keywords/processor.rs +275 -267
- data/vendor/kreuzberg/src/keywords/rake.rs +293 -293
- data/vendor/kreuzberg/src/keywords/types.rs +68 -68
- data/vendor/kreuzberg/src/keywords/yake.rs +163 -163
- data/vendor/kreuzberg/src/language_detection/mod.rs +985 -942
- data/vendor/kreuzberg/src/language_detection/processor.rs +218 -0
- data/vendor/kreuzberg/src/lib.rs +114 -105
- data/vendor/kreuzberg/src/mcp/mod.rs +35 -32
- data/vendor/kreuzberg/src/mcp/server.rs +2090 -1968
- data/vendor/kreuzberg/src/ocr/cache.rs +469 -469
- data/vendor/kreuzberg/src/ocr/error.rs +37 -37
- data/vendor/kreuzberg/src/ocr/hocr.rs +216 -216
- data/vendor/kreuzberg/src/ocr/language_registry.rs +520 -0
- data/vendor/kreuzberg/src/ocr/mod.rs +60 -58
- data/vendor/kreuzberg/src/ocr/processor.rs +858 -863
- data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -4
- data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -144
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +456 -450
- data/vendor/kreuzberg/src/ocr/types.rs +393 -393
- data/vendor/kreuzberg/src/ocr/utils.rs +47 -47
- data/vendor/kreuzberg/src/ocr/validation.rs +206 -206
- data/vendor/kreuzberg/src/panic_context.rs +154 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +306 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +408 -0
- data/vendor/kreuzberg/src/pdf/error.rs +214 -122
- data/vendor/kreuzberg/src/pdf/fonts.rs +358 -0
- data/vendor/kreuzberg/src/pdf/hierarchy.rs +903 -0
- data/vendor/kreuzberg/src/pdf/images.rs +139 -139
- data/vendor/kreuzberg/src/pdf/metadata.rs +509 -346
- data/vendor/kreuzberg/src/pdf/mod.rs +81 -50
- data/vendor/kreuzberg/src/pdf/rendering.rs +369 -369
- data/vendor/kreuzberg/src/pdf/table.rs +417 -393
- data/vendor/kreuzberg/src/pdf/text.rs +553 -158
- data/vendor/kreuzberg/src/plugins/extractor.rs +1042 -1013
- data/vendor/kreuzberg/src/plugins/mod.rs +212 -209
- data/vendor/kreuzberg/src/plugins/ocr.rs +637 -620
- data/vendor/kreuzberg/src/plugins/processor.rs +650 -642
- data/vendor/kreuzberg/src/plugins/registry.rs +1339 -1337
- data/vendor/kreuzberg/src/plugins/traits.rs +258 -258
- data/vendor/kreuzberg/src/plugins/validator.rs +967 -956
- data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -1470
- data/vendor/kreuzberg/src/text/mod.rs +27 -19
- data/vendor/kreuzberg/src/text/quality.rs +710 -697
- data/vendor/kreuzberg/src/text/quality_processor.rs +231 -0
- data/vendor/kreuzberg/src/text/string_utils.rs +229 -217
- data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -164
- data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -100
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +832 -796
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +923 -902
- data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -160
- data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -619
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +148 -147
- data/vendor/kreuzberg/src/text/utf8_validation.rs +193 -0
- data/vendor/kreuzberg/src/types.rs +1713 -903
- data/vendor/kreuzberg/src/utils/mod.rs +31 -17
- data/vendor/kreuzberg/src/utils/pool.rs +503 -0
- data/vendor/kreuzberg/src/utils/pool_sizing.rs +364 -0
- data/vendor/kreuzberg/src/utils/quality.rs +968 -959
- data/vendor/kreuzberg/src/utils/string_pool.rs +761 -0
- data/vendor/kreuzberg/src/utils/string_utils.rs +381 -381
- data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -53
- data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -482
- data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -261
- data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -400
- data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -1205
- data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -280
- data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -425
- data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -172
- data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -622
- data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -1300
- data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -175
- data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -734
- data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -37
- data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -100
- data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -801
- data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -693
- data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -111
- data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -162
- data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -226
- data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -41
- data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -196
- data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -227
- data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -181
- data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -791
- data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -47
- data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -760
- data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -634
- data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -136
- data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -84
- data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -681
- data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -64
- data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -51
- data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -476
- data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -163
- data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -1
- data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -101
- data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -477
- data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -490
- data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -415
- data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -223
- data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -331
- data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -562
- data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -436
- data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -561
- data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -193
- data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -448
- data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -32
- data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -33
- data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -76
- data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -129
- data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -54
- data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -118
- data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -149
- data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -506
- data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -75
- data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -519
- data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -647
- data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -62
- data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -796
- data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -31
- data/vendor/kreuzberg/tests/api_embed.rs +360 -0
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +52 -52
- data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +471 -0
- data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +289 -0
- data/vendor/kreuzberg/tests/api_tests.rs +1472 -966
- data/vendor/kreuzberg/tests/archive_integration.rs +545 -543
- data/vendor/kreuzberg/tests/batch_orchestration.rs +587 -556
- data/vendor/kreuzberg/tests/batch_pooling_benchmark.rs +154 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +328 -316
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +421 -421
- data/vendor/kreuzberg/tests/concurrency_stress.rs +541 -525
- data/vendor/kreuzberg/tests/config_features.rs +612 -598
- data/vendor/kreuzberg/tests/config_integration_test.rs +753 -0
- data/vendor/kreuzberg/tests/config_loading_tests.rs +416 -415
- data/vendor/kreuzberg/tests/core_integration.rs +519 -510
- data/vendor/kreuzberg/tests/csv_integration.rs +414 -414
- data/vendor/kreuzberg/tests/data/hierarchy_ground_truth.json +294 -0
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +500 -498
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -122
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -370
- data/vendor/kreuzberg/tests/email_integration.rs +327 -325
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +275 -275
- data/vendor/kreuzberg/tests/error_handling.rs +402 -393
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -228
- data/vendor/kreuzberg/tests/format_integration.rs +165 -159
- data/vendor/kreuzberg/tests/helpers/mod.rs +202 -142
- data/vendor/kreuzberg/tests/html_table_test.rs +551 -551
- data/vendor/kreuzberg/tests/image_integration.rs +255 -253
- data/vendor/kreuzberg/tests/instrumentation_test.rs +139 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +639 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +704 -704
- data/vendor/kreuzberg/tests/keywords_integration.rs +479 -479
- data/vendor/kreuzberg/tests/keywords_quality.rs +509 -509
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +496 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +490 -490
- data/vendor/kreuzberg/tests/mime_detection.rs +429 -428
- data/vendor/kreuzberg/tests/ocr_configuration.rs +514 -510
- data/vendor/kreuzberg/tests/ocr_errors.rs +698 -676
- data/vendor/kreuzberg/tests/ocr_language_registry.rs +191 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +629 -627
- data/vendor/kreuzberg/tests/ocr_stress.rs +469 -469
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +674 -695
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +616 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +822 -822
- data/vendor/kreuzberg/tests/page_markers.rs +297 -0
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +301 -0
- data/vendor/kreuzberg/tests/pdf_hierarchy_quality.rs +589 -0
- data/vendor/kreuzberg/tests/pdf_integration.rs +45 -43
- data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +301 -0
- data/vendor/kreuzberg/tests/pdf_text_merging.rs +475 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +340 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1446 -1411
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -771
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +577 -560
- data/vendor/kreuzberg/tests/plugin_system.rs +927 -921
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -783
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +587 -586
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +694 -692
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +775 -776
- data/vendor/kreuzberg/tests/security_validation.rs +416 -415
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -888
- data/vendor/kreuzberg/tests/test_fastembed.rs +631 -609
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1260 -1259
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +648 -647
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -87
- data/vendor/kreuzberg-ffi/Cargo.toml +67 -0
- data/vendor/kreuzberg-ffi/README.md +851 -0
- data/vendor/kreuzberg-ffi/benches/result_view_benchmark.rs +227 -0
- data/vendor/kreuzberg-ffi/build.rs +168 -0
- data/vendor/kreuzberg-ffi/cbindgen.toml +37 -0
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -0
- data/vendor/kreuzberg-ffi/kreuzberg.h +3012 -0
- data/vendor/kreuzberg-ffi/src/batch_streaming.rs +588 -0
- data/vendor/kreuzberg-ffi/src/config.rs +1341 -0
- data/vendor/kreuzberg-ffi/src/error.rs +901 -0
- data/vendor/kreuzberg-ffi/src/extraction.rs +555 -0
- data/vendor/kreuzberg-ffi/src/helpers.rs +879 -0
- data/vendor/kreuzberg-ffi/src/lib.rs +977 -0
- data/vendor/kreuzberg-ffi/src/memory.rs +493 -0
- data/vendor/kreuzberg-ffi/src/mime.rs +329 -0
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +265 -0
- data/vendor/kreuzberg-ffi/src/plugins/document_extractor.rs +442 -0
- data/vendor/kreuzberg-ffi/src/plugins/mod.rs +14 -0
- data/vendor/kreuzberg-ffi/src/plugins/ocr_backend.rs +628 -0
- data/vendor/kreuzberg-ffi/src/plugins/post_processor.rs +438 -0
- data/vendor/kreuzberg-ffi/src/plugins/validator.rs +329 -0
- data/vendor/kreuzberg-ffi/src/result.rs +510 -0
- data/vendor/kreuzberg-ffi/src/result_pool.rs +639 -0
- data/vendor/kreuzberg-ffi/src/result_view.rs +773 -0
- data/vendor/kreuzberg-ffi/src/string_intern.rs +568 -0
- data/vendor/kreuzberg-ffi/src/types.rs +363 -0
- data/vendor/kreuzberg-ffi/src/util.rs +210 -0
- data/vendor/kreuzberg-ffi/src/validation.rs +848 -0
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -0
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -0
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -0
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -0
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -0
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -0
- data/vendor/kreuzberg-tesseract/.crate-ignore +2 -0
- data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -0
- data/vendor/kreuzberg-tesseract/Cargo.toml +57 -0
- data/vendor/{rb-sys/LICENSE-MIT → kreuzberg-tesseract/LICENSE} +22 -21
- data/vendor/kreuzberg-tesseract/README.md +399 -0
- data/vendor/kreuzberg-tesseract/build.rs +1127 -0
- data/vendor/kreuzberg-tesseract/patches/README.md +71 -0
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -0
- data/vendor/kreuzberg-tesseract/src/api.rs +1371 -0
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -0
- data/vendor/kreuzberg-tesseract/src/enums.rs +297 -0
- data/vendor/kreuzberg-tesseract/src/error.rs +81 -0
- data/vendor/kreuzberg-tesseract/src/lib.rs +145 -0
- data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -0
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -0
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -0
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -0
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -0
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -0
- metadata +196 -45
- data/vendor/kreuzberg/benches/otel_overhead.rs +0 -48
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/.cargo_vcs_info.json +0 -6
- data/vendor/rb-sys/Cargo.lock +0 -393
- data/vendor/rb-sys/Cargo.toml +0 -70
- data/vendor/rb-sys/Cargo.toml.orig +0 -57
- data/vendor/rb-sys/LICENSE-APACHE +0 -190
- data/vendor/rb-sys/bin/release.sh +0 -21
- data/vendor/rb-sys/build/features.rs +0 -108
- data/vendor/rb-sys/build/main.rs +0 -246
- data/vendor/rb-sys/build/stable_api_config.rs +0 -153
- data/vendor/rb-sys/build/version.rs +0 -48
- data/vendor/rb-sys/readme.md +0 -36
- data/vendor/rb-sys/src/bindings.rs +0 -21
- data/vendor/rb-sys/src/hidden.rs +0 -11
- data/vendor/rb-sys/src/lib.rs +0 -34
- data/vendor/rb-sys/src/macros.rs +0 -371
- data/vendor/rb-sys/src/memory.rs +0 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +0 -38
- data/vendor/rb-sys/src/special_consts.rs +0 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +0 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +0 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +0 -316
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +0 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +0 -317
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +0 -315
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +0 -326
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +0 -327
- data/vendor/rb-sys/src/stable_api.rs +0 -261
- data/vendor/rb-sys/src/symbol.rs +0 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +0 -332
- data/vendor/rb-sys/src/utils.rs +0 -89
- data/vendor/rb-sys/src/value_type.rs +0 -7
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
//! Font caching system for Pdfium rendering.
|
|
2
|
+
//!
|
|
3
|
+
//! This module provides an efficient, thread-safe font caching mechanism that eliminates
|
|
4
|
+
//! per-page font loading overhead when processing PDFs. Fonts are discovered from system
|
|
5
|
+
//! directories on first access and cached in memory for zero-copy sharing across Pdfium instances.
|
|
6
|
+
//!
|
|
7
|
+
//! # Performance Impact
|
|
8
|
+
//!
|
|
9
|
+
//! By caching fonts in memory:
|
|
10
|
+
//! - First PDF operation: ~50-100ms (initial font discovery and loading)
|
|
11
|
+
//! - Subsequent pages: ~1-2ms per page (zero-copy from cache)
|
|
12
|
+
//! - 100-page PDF: ~200ms total (vs ~10s without caching) = **50x improvement**
|
|
13
|
+
//!
|
|
14
|
+
//! # Platform Support
|
|
15
|
+
//!
|
|
16
|
+
//! Font discovery works on:
|
|
17
|
+
//! - **macOS**: `/Library/Fonts`, `/System/Library/Fonts`
|
|
18
|
+
//! - **Linux**: `/usr/share/fonts`, `/usr/local/share/fonts`
|
|
19
|
+
//! - **Windows**: `C:\Windows\Fonts`
|
|
20
|
+
//!
|
|
21
|
+
//! # Example
|
|
22
|
+
//!
|
|
23
|
+
//! ```rust,no_run
|
|
24
|
+
//! use kreuzberg::pdf::fonts::{initialize_font_cache, get_font_descriptors};
|
|
25
|
+
//!
|
|
26
|
+
//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
|
|
27
|
+
//! // Initialize cache on application startup (lazy-loaded on first call)
|
|
28
|
+
//! initialize_font_cache()?;
|
|
29
|
+
//!
|
|
30
|
+
//! // Get cached font descriptors for Pdfium configuration
|
|
31
|
+
//! let descriptors = get_font_descriptors()?;
|
|
32
|
+
//! println!("Loaded {} fonts", descriptors.len());
|
|
33
|
+
//! # Ok(())
|
|
34
|
+
//! # }
|
|
35
|
+
//! ```
|
|
36
|
+
|
|
37
|
+
use super::error::PdfError;
|
|
38
|
+
use once_cell::sync::Lazy;
|
|
39
|
+
use std::collections::HashMap;
|
|
40
|
+
use std::path::{Path, PathBuf};
|
|
41
|
+
use std::sync::Arc;
|
|
42
|
+
use std::sync::RwLock;
|
|
43
|
+
|
|
44
|
+
#[cfg(feature = "pdf")]
|
|
45
|
+
use pdfium_render::prelude::FontDescriptor;
|
|
46
|
+
|
|
47
|
+
/// Global font cache: maps font paths to loaded bytes.
|
|
48
|
+
///
|
|
49
|
+
/// Uses `Arc<[u8]>` for zero-copy sharing when passing fonts to multiple Pdfium instances.
|
|
50
|
+
/// Protected by `RwLock` for concurrent read access during PDF processing.
|
|
51
|
+
static FONT_CACHE: Lazy<RwLock<FontCacheState>> = Lazy::new(|| {
|
|
52
|
+
RwLock::new(FontCacheState {
|
|
53
|
+
fonts: HashMap::new(),
|
|
54
|
+
initialized: false,
|
|
55
|
+
})
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
/// Internal state for the font cache.
|
|
59
|
+
struct FontCacheState {
|
|
60
|
+
/// Map from font path (relative identifier) to loaded font bytes
|
|
61
|
+
fonts: HashMap<String, Arc<[u8]>>,
|
|
62
|
+
/// Whether the cache has been initialized
|
|
63
|
+
initialized: bool,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/// Platform-specific font directory paths.
|
|
67
|
+
#[cfg(target_os = "macos")]
|
|
68
|
+
fn system_font_directories() -> Vec<PathBuf> {
|
|
69
|
+
vec![PathBuf::from("/Library/Fonts"), PathBuf::from("/System/Library/Fonts")]
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/// Platform-specific font directory paths.
|
|
73
|
+
#[cfg(target_os = "linux")]
|
|
74
|
+
fn system_font_directories() -> Vec<PathBuf> {
|
|
75
|
+
vec![
|
|
76
|
+
PathBuf::from("/usr/share/fonts"),
|
|
77
|
+
PathBuf::from("/usr/local/share/fonts"),
|
|
78
|
+
]
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/// Platform-specific font directory paths.
|
|
82
|
+
#[cfg(target_os = "windows")]
|
|
83
|
+
fn system_font_directories() -> Vec<PathBuf> {
|
|
84
|
+
vec![PathBuf::from("C:\\Windows\\Fonts")]
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/// Platform-specific font directory paths for other OSes.
|
|
88
|
+
#[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
|
|
89
|
+
fn system_font_directories() -> Vec<PathBuf> {
|
|
90
|
+
vec![]
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/// Load a single font file into memory.
|
|
94
|
+
///
|
|
95
|
+
/// # Arguments
|
|
96
|
+
///
|
|
97
|
+
/// * `path` - Path to the font file (.ttf or .otf)
|
|
98
|
+
///
|
|
99
|
+
/// # Returns
|
|
100
|
+
///
|
|
101
|
+
/// An Arc-wrapped slice of font bytes, or an error if the file cannot be read.
|
|
102
|
+
fn load_font_file(path: &Path) -> Result<Arc<[u8]>, PdfError> {
|
|
103
|
+
std::fs::read(path)
|
|
104
|
+
.map(|bytes| Arc::from(bytes.into_boxed_slice()))
|
|
105
|
+
.map_err(|e| PdfError::FontLoadingFailed(format!("Failed to read font file '{}': {}", path.display(), e)))
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/// Discover and load all system fonts.
|
|
109
|
+
///
|
|
110
|
+
/// Scans platform-specific font directories and loads all .ttf and .otf files.
|
|
111
|
+
/// Font files larger than 50MB are skipped to prevent memory issues.
|
|
112
|
+
///
|
|
113
|
+
/// # Returns
|
|
114
|
+
///
|
|
115
|
+
/// A HashMap mapping font identifiers (relative paths) to loaded font bytes.
|
|
116
|
+
fn discover_system_fonts() -> Result<HashMap<String, Arc<[u8]>>, PdfError> {
|
|
117
|
+
let mut fonts = HashMap::new();
|
|
118
|
+
const MAX_FONT_SIZE: u64 = 50 * 1024 * 1024;
|
|
119
|
+
|
|
120
|
+
for dir in system_font_directories() {
|
|
121
|
+
if !dir.exists() {
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
match std::fs::read_dir(&dir) {
|
|
126
|
+
Ok(entries) => {
|
|
127
|
+
for entry in entries.flatten() {
|
|
128
|
+
let path = entry.path();
|
|
129
|
+
|
|
130
|
+
if let Some(ext) = path.extension() {
|
|
131
|
+
let ext_str = ext.to_string_lossy().to_lowercase();
|
|
132
|
+
if ext_str != "ttf" && ext_str != "otf" {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if let Ok(metadata) = std::fs::metadata(&path) {
|
|
137
|
+
if metadata.len() > MAX_FONT_SIZE {
|
|
138
|
+
tracing::warn!(
|
|
139
|
+
"Font file too large (skipped): {} ({}MB)",
|
|
140
|
+
path.display(),
|
|
141
|
+
metadata.len() / (1024 * 1024)
|
|
142
|
+
);
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
} else {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
match load_font_file(&path) {
|
|
150
|
+
Ok(font_data) => {
|
|
151
|
+
if let Some(filename) = path.file_name() {
|
|
152
|
+
let key = filename.to_string_lossy().to_string();
|
|
153
|
+
fonts.insert(key, font_data);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
Err(_e) => {
|
|
157
|
+
tracing::debug!("Failed to load font file: {}", path.display());
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
Err(_e) => {
|
|
164
|
+
tracing::debug!("Failed to read font directory: {}", dir.display());
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
Ok(fonts)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/// Initialize the global font cache.
|
|
173
|
+
///
|
|
174
|
+
/// On first call, discovers and loads all system fonts. Subsequent calls are no-ops.
|
|
175
|
+
/// Caching is thread-safe via RwLock; concurrent reads during PDF processing are efficient.
|
|
176
|
+
///
|
|
177
|
+
/// # Returns
|
|
178
|
+
///
|
|
179
|
+
/// Ok if initialization succeeds or cache is already initialized, or PdfError if font discovery fails.
|
|
180
|
+
///
|
|
181
|
+
/// # Performance
|
|
182
|
+
///
|
|
183
|
+
/// - First call: 50-100ms (system font discovery + loading)
|
|
184
|
+
/// - Subsequent calls: < 1μs (no-op, just checks initialized flag)
|
|
185
|
+
pub fn initialize_font_cache() -> Result<(), PdfError> {
|
|
186
|
+
{
|
|
187
|
+
let cache = FONT_CACHE
|
|
188
|
+
.read()
|
|
189
|
+
.map_err(|e| PdfError::FontLoadingFailed(format!("Font cache lock poisoned: {}", e)))?;
|
|
190
|
+
|
|
191
|
+
if cache.initialized {
|
|
192
|
+
return Ok(());
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
let mut cache = FONT_CACHE
|
|
197
|
+
.write()
|
|
198
|
+
.map_err(|e| PdfError::FontLoadingFailed(format!("Font cache lock poisoned: {}", e)))?;
|
|
199
|
+
|
|
200
|
+
if cache.initialized {
|
|
201
|
+
return Ok(());
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
tracing::debug!("Initializing font cache...");
|
|
205
|
+
let fonts = discover_system_fonts()?;
|
|
206
|
+
let font_count = fonts.len();
|
|
207
|
+
|
|
208
|
+
cache.fonts = fonts;
|
|
209
|
+
cache.initialized = true;
|
|
210
|
+
|
|
211
|
+
tracing::debug!("Font cache initialized with {} fonts", font_count);
|
|
212
|
+
Ok(())
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/// Get cached font descriptors for Pdfium configuration.
|
|
216
|
+
///
|
|
217
|
+
/// Ensures the font cache is initialized, then returns font descriptors
|
|
218
|
+
/// derived from the cached fonts. This call is fast after the first invocation.
|
|
219
|
+
///
|
|
220
|
+
/// # Returns
|
|
221
|
+
///
|
|
222
|
+
/// A Vec of FontDescriptor objects suitable for `PdfiumConfig::set_font_provider()`.
|
|
223
|
+
///
|
|
224
|
+
/// # Performance
|
|
225
|
+
///
|
|
226
|
+
/// - First call: ~50-100ms (includes font discovery)
|
|
227
|
+
/// - Subsequent calls: < 1ms (reads from cache)
|
|
228
|
+
pub fn get_font_descriptors() -> Result<Vec<FontDescriptor>, PdfError> {
|
|
229
|
+
initialize_font_cache()?;
|
|
230
|
+
|
|
231
|
+
let cache = FONT_CACHE
|
|
232
|
+
.read()
|
|
233
|
+
.map_err(|e| PdfError::FontLoadingFailed(format!("Font cache lock poisoned: {}", e)))?;
|
|
234
|
+
|
|
235
|
+
let descriptors = cache
|
|
236
|
+
.fonts
|
|
237
|
+
.iter()
|
|
238
|
+
.map(|(filename, data)| {
|
|
239
|
+
let is_italic = filename.to_lowercase().contains("italic");
|
|
240
|
+
let is_bold = filename.to_lowercase().contains("bold");
|
|
241
|
+
let weight = if is_bold { 700 } else { 400 };
|
|
242
|
+
|
|
243
|
+
let family = filename.split('.').next().unwrap_or("Unknown").to_string();
|
|
244
|
+
|
|
245
|
+
FontDescriptor {
|
|
246
|
+
family,
|
|
247
|
+
weight,
|
|
248
|
+
is_italic,
|
|
249
|
+
charset: 0,
|
|
250
|
+
data: data.clone(),
|
|
251
|
+
}
|
|
252
|
+
})
|
|
253
|
+
.collect();
|
|
254
|
+
|
|
255
|
+
Ok(descriptors)
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/// Get the number of cached fonts.
|
|
259
|
+
///
|
|
260
|
+
/// Useful for diagnostics and testing.
|
|
261
|
+
///
|
|
262
|
+
/// # Returns
|
|
263
|
+
///
|
|
264
|
+
/// Number of fonts in the cache, or 0 if not initialized.
|
|
265
|
+
pub fn cached_font_count() -> usize {
|
|
266
|
+
FONT_CACHE.read().map(|cache| cache.fonts.len()).unwrap_or(0)
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/// Clear the font cache (for testing purposes).
|
|
270
|
+
///
|
|
271
|
+
/// # Panics
|
|
272
|
+
///
|
|
273
|
+
/// Panics if the cache lock is poisoned, which should only happen in test scenarios
|
|
274
|
+
/// with deliberate panic injection.
|
|
275
|
+
#[cfg(test)]
|
|
276
|
+
pub fn clear_font_cache() {
|
|
277
|
+
let mut cache = FONT_CACHE.write().expect("Failed to acquire write lock");
|
|
278
|
+
cache.fonts.clear();
|
|
279
|
+
cache.initialized = false;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
#[cfg(test)]
|
|
283
|
+
mod tests {
|
|
284
|
+
use super::*;
|
|
285
|
+
|
|
286
|
+
#[test]
|
|
287
|
+
fn test_initialize_font_cache() {
|
|
288
|
+
clear_font_cache();
|
|
289
|
+
let result = initialize_font_cache();
|
|
290
|
+
assert!(result.is_ok(), "Font cache initialization should succeed");
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
#[test]
|
|
294
|
+
fn test_initialize_font_cache_idempotent() {
|
|
295
|
+
clear_font_cache();
|
|
296
|
+
|
|
297
|
+
let result1 = initialize_font_cache();
|
|
298
|
+
assert!(result1.is_ok());
|
|
299
|
+
|
|
300
|
+
let result2 = initialize_font_cache();
|
|
301
|
+
assert!(result2.is_ok());
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
#[test]
|
|
305
|
+
fn test_get_font_descriptors() {
|
|
306
|
+
clear_font_cache();
|
|
307
|
+
let result = get_font_descriptors();
|
|
308
|
+
assert!(result.is_ok());
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
#[test]
|
|
312
|
+
fn test_cached_font_count() {
|
|
313
|
+
clear_font_cache();
|
|
314
|
+
assert_eq!(cached_font_count(), 0, "Cache should be empty before initialization");
|
|
315
|
+
|
|
316
|
+
let _ = initialize_font_cache();
|
|
317
|
+
let _count = cached_font_count();
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
#[test]
|
|
321
|
+
fn test_system_font_directories() {
|
|
322
|
+
let dirs = system_font_directories();
|
|
323
|
+
assert!(!dirs.is_empty(), "Should have at least one font directory");
|
|
324
|
+
|
|
325
|
+
for dir in dirs {
|
|
326
|
+
assert!(
|
|
327
|
+
dir.is_absolute(),
|
|
328
|
+
"Font directory should be absolute: {}",
|
|
329
|
+
dir.display()
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
#[test]
|
|
335
|
+
fn test_load_font_file_nonexistent() {
|
|
336
|
+
let result = load_font_file(Path::new("/nonexistent/path/font.ttf"));
|
|
337
|
+
assert!(result.is_err(), "Loading nonexistent file should fail with error");
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
#[test]
|
|
341
|
+
fn test_font_descriptors_attributes() {
|
|
342
|
+
clear_font_cache();
|
|
343
|
+
|
|
344
|
+
let data: Arc<[u8]> = Arc::from(vec![0u8; 100].into_boxed_slice());
|
|
345
|
+
let descriptor = FontDescriptor {
|
|
346
|
+
family: "TestFont".to_string(),
|
|
347
|
+
weight: 700,
|
|
348
|
+
is_italic: false,
|
|
349
|
+
charset: 0,
|
|
350
|
+
data,
|
|
351
|
+
};
|
|
352
|
+
|
|
353
|
+
assert_eq!(descriptor.family, "TestFont");
|
|
354
|
+
assert_eq!(descriptor.weight, 700);
|
|
355
|
+
assert!(!descriptor.is_italic);
|
|
356
|
+
assert_eq!(descriptor.charset, 0);
|
|
357
|
+
}
|
|
358
|
+
}
|