kreuzberg 4.0.0.pre.rc.11 → 4.0.0.pre.rc.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -14
- data/.rspec +3 -3
- data/.rubocop.yaml +1 -1
- data/.rubocop.yml +538 -538
- data/Gemfile +8 -8
- data/Gemfile.lock +2 -105
- data/README.md +454 -454
- data/Rakefile +25 -25
- data/Steepfile +47 -47
- data/examples/async_patterns.rb +341 -341
- data/ext/kreuzberg_rb/extconf.rb +45 -45
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -2
- data/ext/kreuzberg_rb/native/Cargo.lock +6941 -6941
- data/ext/kreuzberg_rb/native/Cargo.toml +54 -54
- data/ext/kreuzberg_rb/native/README.md +425 -425
- data/ext/kreuzberg_rb/native/build.rs +15 -15
- data/ext/kreuzberg_rb/native/include/ieeefp.h +11 -11
- data/ext/kreuzberg_rb/native/include/msvc_compat/strings.h +14 -14
- data/ext/kreuzberg_rb/native/include/strings.h +20 -20
- data/ext/kreuzberg_rb/native/include/unistd.h +47 -47
- data/ext/kreuzberg_rb/native/src/lib.rs +3158 -3158
- data/extconf.rb +28 -28
- data/kreuzberg.gemspec +214 -214
- data/lib/kreuzberg/api_proxy.rb +142 -142
- data/lib/kreuzberg/cache_api.rb +81 -81
- data/lib/kreuzberg/cli.rb +55 -55
- data/lib/kreuzberg/cli_proxy.rb +127 -127
- data/lib/kreuzberg/config.rb +724 -724
- data/lib/kreuzberg/error_context.rb +80 -80
- data/lib/kreuzberg/errors.rb +118 -118
- data/lib/kreuzberg/extraction_api.rb +340 -340
- data/lib/kreuzberg/mcp_proxy.rb +186 -186
- data/lib/kreuzberg/ocr_backend_protocol.rb +113 -113
- data/lib/kreuzberg/post_processor_protocol.rb +86 -86
- data/lib/kreuzberg/result.rb +279 -279
- data/lib/kreuzberg/setup_lib_path.rb +80 -80
- data/lib/kreuzberg/validator_protocol.rb +89 -89
- data/lib/kreuzberg/version.rb +5 -5
- data/lib/kreuzberg.rb +109 -109
- data/lib/{libpdfium.dylib → pdfium.dll} +0 -0
- data/sig/kreuzberg/internal.rbs +184 -184
- data/sig/kreuzberg.rbs +546 -546
- data/spec/binding/cache_spec.rb +227 -227
- data/spec/binding/cli_proxy_spec.rb +85 -85
- data/spec/binding/cli_spec.rb +55 -55
- data/spec/binding/config_spec.rb +345 -345
- data/spec/binding/config_validation_spec.rb +283 -283
- data/spec/binding/error_handling_spec.rb +213 -213
- data/spec/binding/errors_spec.rb +66 -66
- data/spec/binding/plugins/ocr_backend_spec.rb +307 -307
- data/spec/binding/plugins/postprocessor_spec.rb +269 -269
- data/spec/binding/plugins/validator_spec.rb +274 -274
- data/spec/fixtures/config.toml +39 -39
- data/spec/fixtures/config.yaml +41 -41
- data/spec/fixtures/invalid_config.toml +4 -4
- data/spec/smoke/package_spec.rb +178 -178
- data/spec/spec_helper.rb +42 -42
- data/vendor/Cargo.toml +2 -1
- data/vendor/kreuzberg/Cargo.toml +2 -2
- data/vendor/kreuzberg/README.md +230 -230
- data/vendor/kreuzberg/benches/otel_overhead.rs +48 -48
- data/vendor/kreuzberg/build.rs +843 -843
- data/vendor/kreuzberg/src/api/error.rs +81 -81
- data/vendor/kreuzberg/src/api/handlers.rs +199 -199
- data/vendor/kreuzberg/src/api/mod.rs +79 -79
- data/vendor/kreuzberg/src/api/server.rs +353 -353
- data/vendor/kreuzberg/src/api/types.rs +170 -170
- data/vendor/kreuzberg/src/cache/mod.rs +1167 -1167
- data/vendor/kreuzberg/src/chunking/mod.rs +1877 -1877
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -220
- data/vendor/kreuzberg/src/core/batch_mode.rs +95 -95
- data/vendor/kreuzberg/src/core/config.rs +1080 -1080
- data/vendor/kreuzberg/src/core/extractor.rs +1156 -1156
- data/vendor/kreuzberg/src/core/io.rs +329 -329
- data/vendor/kreuzberg/src/core/mime.rs +605 -605
- data/vendor/kreuzberg/src/core/mod.rs +47 -47
- data/vendor/kreuzberg/src/core/pipeline.rs +1184 -1184
- data/vendor/kreuzberg/src/embeddings.rs +500 -500
- data/vendor/kreuzberg/src/error.rs +431 -431
- data/vendor/kreuzberg/src/extraction/archive.rs +954 -954
- data/vendor/kreuzberg/src/extraction/docx.rs +398 -398
- data/vendor/kreuzberg/src/extraction/email.rs +854 -854
- data/vendor/kreuzberg/src/extraction/excel.rs +688 -688
- data/vendor/kreuzberg/src/extraction/html.rs +601 -601
- data/vendor/kreuzberg/src/extraction/image.rs +491 -491
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +574 -562
- data/vendor/kreuzberg/src/extraction/markdown.rs +213 -213
- data/vendor/kreuzberg/src/extraction/mod.rs +81 -81
- data/vendor/kreuzberg/src/extraction/office_metadata/app_properties.rs +398 -398
- data/vendor/kreuzberg/src/extraction/office_metadata/core_properties.rs +247 -247
- data/vendor/kreuzberg/src/extraction/office_metadata/custom_properties.rs +240 -240
- data/vendor/kreuzberg/src/extraction/office_metadata/mod.rs +130 -130
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +284 -284
- data/vendor/kreuzberg/src/extraction/pptx.rs +3100 -3100
- data/vendor/kreuzberg/src/extraction/structured.rs +490 -490
- data/vendor/kreuzberg/src/extraction/table.rs +328 -328
- data/vendor/kreuzberg/src/extraction/text.rs +269 -269
- data/vendor/kreuzberg/src/extraction/xml.rs +333 -333
- data/vendor/kreuzberg/src/extractors/archive.rs +447 -447
- data/vendor/kreuzberg/src/extractors/bibtex.rs +470 -470
- data/vendor/kreuzberg/src/extractors/docbook.rs +504 -504
- data/vendor/kreuzberg/src/extractors/docx.rs +400 -400
- data/vendor/kreuzberg/src/extractors/email.rs +157 -157
- data/vendor/kreuzberg/src/extractors/epub.rs +708 -708
- data/vendor/kreuzberg/src/extractors/excel.rs +345 -345
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +492 -492
- data/vendor/kreuzberg/src/extractors/html.rs +407 -407
- data/vendor/kreuzberg/src/extractors/image.rs +219 -219
- data/vendor/kreuzberg/src/extractors/jats.rs +1054 -1054
- data/vendor/kreuzberg/src/extractors/jupyter.rs +368 -368
- data/vendor/kreuzberg/src/extractors/latex.rs +653 -653
- data/vendor/kreuzberg/src/extractors/markdown.rs +701 -701
- data/vendor/kreuzberg/src/extractors/mod.rs +429 -429
- data/vendor/kreuzberg/src/extractors/odt.rs +628 -628
- data/vendor/kreuzberg/src/extractors/opml.rs +635 -635
- data/vendor/kreuzberg/src/extractors/orgmode.rs +529 -529
- data/vendor/kreuzberg/src/extractors/pdf.rs +749 -722
- data/vendor/kreuzberg/src/extractors/pptx.rs +267 -267
- data/vendor/kreuzberg/src/extractors/rst.rs +577 -577
- data/vendor/kreuzberg/src/extractors/rtf.rs +809 -809
- data/vendor/kreuzberg/src/extractors/security.rs +484 -484
- data/vendor/kreuzberg/src/extractors/security_tests.rs +367 -367
- data/vendor/kreuzberg/src/extractors/structured.rs +142 -142
- data/vendor/kreuzberg/src/extractors/text.rs +265 -265
- data/vendor/kreuzberg/src/extractors/typst.rs +651 -651
- data/vendor/kreuzberg/src/extractors/xml.rs +147 -147
- data/vendor/kreuzberg/src/image/dpi.rs +164 -164
- data/vendor/kreuzberg/src/image/mod.rs +6 -6
- data/vendor/kreuzberg/src/image/preprocessing.rs +417 -417
- data/vendor/kreuzberg/src/image/resize.rs +89 -89
- data/vendor/kreuzberg/src/keywords/config.rs +154 -154
- data/vendor/kreuzberg/src/keywords/mod.rs +237 -237
- data/vendor/kreuzberg/src/keywords/processor.rs +275 -275
- data/vendor/kreuzberg/src/keywords/rake.rs +293 -293
- data/vendor/kreuzberg/src/keywords/types.rs +68 -68
- data/vendor/kreuzberg/src/keywords/yake.rs +163 -163
- data/vendor/kreuzberg/src/language_detection/mod.rs +985 -985
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -219
- data/vendor/kreuzberg/src/lib.rs +113 -113
- data/vendor/kreuzberg/src/mcp/mod.rs +35 -35
- data/vendor/kreuzberg/src/mcp/server.rs +2076 -2076
- data/vendor/kreuzberg/src/ocr/cache.rs +469 -469
- data/vendor/kreuzberg/src/ocr/error.rs +37 -37
- data/vendor/kreuzberg/src/ocr/hocr.rs +216 -216
- data/vendor/kreuzberg/src/ocr/mod.rs +58 -58
- data/vendor/kreuzberg/src/ocr/processor.rs +863 -863
- data/vendor/kreuzberg/src/ocr/table/mod.rs +4 -4
- data/vendor/kreuzberg/src/ocr/table/tsv_parser.rs +144 -144
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +452 -452
- data/vendor/kreuzberg/src/ocr/types.rs +393 -393
- data/vendor/kreuzberg/src/ocr/utils.rs +47 -47
- data/vendor/kreuzberg/src/ocr/validation.rs +206 -206
- data/vendor/kreuzberg/src/panic_context.rs +154 -154
- data/vendor/kreuzberg/src/pdf/bindings.rs +44 -44
- data/vendor/kreuzberg/src/pdf/bundled.rs +346 -346
- data/vendor/kreuzberg/src/pdf/error.rs +130 -130
- data/vendor/kreuzberg/src/pdf/images.rs +139 -139
- data/vendor/kreuzberg/src/pdf/metadata.rs +489 -489
- data/vendor/kreuzberg/src/pdf/mod.rs +68 -68
- data/vendor/kreuzberg/src/pdf/rendering.rs +368 -368
- data/vendor/kreuzberg/src/pdf/table.rs +420 -420
- data/vendor/kreuzberg/src/pdf/text.rs +240 -240
- data/vendor/kreuzberg/src/plugins/extractor.rs +1044 -1044
- data/vendor/kreuzberg/src/plugins/mod.rs +212 -212
- data/vendor/kreuzberg/src/plugins/ocr.rs +639 -639
- data/vendor/kreuzberg/src/plugins/processor.rs +650 -650
- data/vendor/kreuzberg/src/plugins/registry.rs +1339 -1339
- data/vendor/kreuzberg/src/plugins/traits.rs +258 -258
- data/vendor/kreuzberg/src/plugins/validator.rs +967 -967
- data/vendor/kreuzberg/src/stopwords/mod.rs +1470 -1470
- data/vendor/kreuzberg/src/text/mod.rs +25 -25
- data/vendor/kreuzberg/src/text/quality.rs +697 -697
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -219
- data/vendor/kreuzberg/src/text/string_utils.rs +217 -217
- data/vendor/kreuzberg/src/text/token_reduction/cjk_utils.rs +164 -164
- data/vendor/kreuzberg/src/text/token_reduction/config.rs +100 -100
- data/vendor/kreuzberg/src/text/token_reduction/core.rs +796 -796
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +902 -902
- data/vendor/kreuzberg/src/text/token_reduction/mod.rs +160 -160
- data/vendor/kreuzberg/src/text/token_reduction/semantic.rs +619 -619
- data/vendor/kreuzberg/src/text/token_reduction/simd_text.rs +147 -147
- data/vendor/kreuzberg/src/types.rs +1055 -1055
- data/vendor/kreuzberg/src/utils/mod.rs +17 -17
- data/vendor/kreuzberg/src/utils/quality.rs +959 -959
- data/vendor/kreuzberg/src/utils/string_utils.rs +381 -381
- data/vendor/kreuzberg/stopwords/af_stopwords.json +53 -53
- data/vendor/kreuzberg/stopwords/ar_stopwords.json +482 -482
- data/vendor/kreuzberg/stopwords/bg_stopwords.json +261 -261
- data/vendor/kreuzberg/stopwords/bn_stopwords.json +400 -400
- data/vendor/kreuzberg/stopwords/br_stopwords.json +1205 -1205
- data/vendor/kreuzberg/stopwords/ca_stopwords.json +280 -280
- data/vendor/kreuzberg/stopwords/cs_stopwords.json +425 -425
- data/vendor/kreuzberg/stopwords/da_stopwords.json +172 -172
- data/vendor/kreuzberg/stopwords/de_stopwords.json +622 -622
- data/vendor/kreuzberg/stopwords/el_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/en_stopwords.json +1300 -1300
- data/vendor/kreuzberg/stopwords/eo_stopwords.json +175 -175
- data/vendor/kreuzberg/stopwords/es_stopwords.json +734 -734
- data/vendor/kreuzberg/stopwords/et_stopwords.json +37 -37
- data/vendor/kreuzberg/stopwords/eu_stopwords.json +100 -100
- data/vendor/kreuzberg/stopwords/fa_stopwords.json +801 -801
- data/vendor/kreuzberg/stopwords/fi_stopwords.json +849 -849
- data/vendor/kreuzberg/stopwords/fr_stopwords.json +693 -693
- data/vendor/kreuzberg/stopwords/ga_stopwords.json +111 -111
- data/vendor/kreuzberg/stopwords/gl_stopwords.json +162 -162
- data/vendor/kreuzberg/stopwords/gu_stopwords.json +226 -226
- data/vendor/kreuzberg/stopwords/ha_stopwords.json +41 -41
- data/vendor/kreuzberg/stopwords/he_stopwords.json +196 -196
- data/vendor/kreuzberg/stopwords/hi_stopwords.json +227 -227
- data/vendor/kreuzberg/stopwords/hr_stopwords.json +181 -181
- data/vendor/kreuzberg/stopwords/hu_stopwords.json +791 -791
- data/vendor/kreuzberg/stopwords/hy_stopwords.json +47 -47
- data/vendor/kreuzberg/stopwords/id_stopwords.json +760 -760
- data/vendor/kreuzberg/stopwords/it_stopwords.json +634 -634
- data/vendor/kreuzberg/stopwords/ja_stopwords.json +136 -136
- data/vendor/kreuzberg/stopwords/kn_stopwords.json +84 -84
- data/vendor/kreuzberg/stopwords/ko_stopwords.json +681 -681
- data/vendor/kreuzberg/stopwords/ku_stopwords.json +64 -64
- data/vendor/kreuzberg/stopwords/la_stopwords.json +51 -51
- data/vendor/kreuzberg/stopwords/lt_stopwords.json +476 -476
- data/vendor/kreuzberg/stopwords/lv_stopwords.json +163 -163
- data/vendor/kreuzberg/stopwords/ml_stopwords.json +1 -1
- data/vendor/kreuzberg/stopwords/mr_stopwords.json +101 -101
- data/vendor/kreuzberg/stopwords/ms_stopwords.json +477 -477
- data/vendor/kreuzberg/stopwords/ne_stopwords.json +490 -490
- data/vendor/kreuzberg/stopwords/nl_stopwords.json +415 -415
- data/vendor/kreuzberg/stopwords/no_stopwords.json +223 -223
- data/vendor/kreuzberg/stopwords/pl_stopwords.json +331 -331
- data/vendor/kreuzberg/stopwords/pt_stopwords.json +562 -562
- data/vendor/kreuzberg/stopwords/ro_stopwords.json +436 -436
- data/vendor/kreuzberg/stopwords/ru_stopwords.json +561 -561
- data/vendor/kreuzberg/stopwords/si_stopwords.json +193 -193
- data/vendor/kreuzberg/stopwords/sk_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sl_stopwords.json +448 -448
- data/vendor/kreuzberg/stopwords/so_stopwords.json +32 -32
- data/vendor/kreuzberg/stopwords/st_stopwords.json +33 -33
- data/vendor/kreuzberg/stopwords/sv_stopwords.json +420 -420
- data/vendor/kreuzberg/stopwords/sw_stopwords.json +76 -76
- data/vendor/kreuzberg/stopwords/ta_stopwords.json +129 -129
- data/vendor/kreuzberg/stopwords/te_stopwords.json +54 -54
- data/vendor/kreuzberg/stopwords/th_stopwords.json +118 -118
- data/vendor/kreuzberg/stopwords/tl_stopwords.json +149 -149
- data/vendor/kreuzberg/stopwords/tr_stopwords.json +506 -506
- data/vendor/kreuzberg/stopwords/uk_stopwords.json +75 -75
- data/vendor/kreuzberg/stopwords/ur_stopwords.json +519 -519
- data/vendor/kreuzberg/stopwords/vi_stopwords.json +647 -647
- data/vendor/kreuzberg/stopwords/yo_stopwords.json +62 -62
- data/vendor/kreuzberg/stopwords/zh_stopwords.json +796 -796
- data/vendor/kreuzberg/stopwords/zu_stopwords.json +31 -31
- data/vendor/kreuzberg/tests/api_extract_multipart.rs +52 -52
- data/vendor/kreuzberg/tests/api_tests.rs +966 -966
- data/vendor/kreuzberg/tests/archive_integration.rs +545 -545
- data/vendor/kreuzberg/tests/batch_orchestration.rs +556 -556
- data/vendor/kreuzberg/tests/batch_processing.rs +318 -318
- data/vendor/kreuzberg/tests/bibtex_parity_test.rs +421 -421
- data/vendor/kreuzberg/tests/concurrency_stress.rs +533 -533
- data/vendor/kreuzberg/tests/config_features.rs +612 -612
- data/vendor/kreuzberg/tests/config_loading_tests.rs +416 -416
- data/vendor/kreuzberg/tests/core_integration.rs +510 -510
- data/vendor/kreuzberg/tests/csv_integration.rs +414 -414
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +500 -500
- data/vendor/kreuzberg/tests/docx_metadata_extraction_test.rs +122 -122
- data/vendor/kreuzberg/tests/docx_vs_pandoc_comparison.rs +370 -370
- data/vendor/kreuzberg/tests/email_integration.rs +327 -327
- data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +275 -275
- data/vendor/kreuzberg/tests/error_handling.rs +402 -402
- data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +228 -228
- data/vendor/kreuzberg/tests/format_integration.rs +164 -164
- data/vendor/kreuzberg/tests/helpers/mod.rs +142 -142
- data/vendor/kreuzberg/tests/html_table_test.rs +551 -551
- data/vendor/kreuzberg/tests/image_integration.rs +255 -255
- data/vendor/kreuzberg/tests/instrumentation_test.rs +139 -139
- data/vendor/kreuzberg/tests/jats_extractor_tests.rs +639 -639
- data/vendor/kreuzberg/tests/jupyter_extractor_tests.rs +704 -704
- data/vendor/kreuzberg/tests/keywords_integration.rs +479 -479
- data/vendor/kreuzberg/tests/keywords_quality.rs +509 -509
- data/vendor/kreuzberg/tests/latex_extractor_tests.rs +496 -496
- data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +490 -490
- data/vendor/kreuzberg/tests/mime_detection.rs +429 -429
- data/vendor/kreuzberg/tests/ocr_configuration.rs +514 -514
- data/vendor/kreuzberg/tests/ocr_errors.rs +698 -698
- data/vendor/kreuzberg/tests/ocr_quality.rs +629 -629
- data/vendor/kreuzberg/tests/ocr_stress.rs +469 -469
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +674 -674
- data/vendor/kreuzberg/tests/opml_extractor_tests.rs +616 -616
- data/vendor/kreuzberg/tests/orgmode_extractor_tests.rs +822 -822
- data/vendor/kreuzberg/tests/pdf_integration.rs +45 -45
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -374
- data/vendor/kreuzberg/tests/pipeline_integration.rs +1436 -1436
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +776 -776
- data/vendor/kreuzberg/tests/plugin_postprocessor_test.rs +560 -560
- data/vendor/kreuzberg/tests/plugin_system.rs +927 -927
- data/vendor/kreuzberg/tests/plugin_validator_test.rs +783 -783
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +587 -587
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +694 -694
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +775 -775
- data/vendor/kreuzberg/tests/security_validation.rs +416 -416
- data/vendor/kreuzberg/tests/stopwords_integration_test.rs +888 -888
- data/vendor/kreuzberg/tests/test_fastembed.rs +631 -631
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1260 -1260
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +648 -648
- data/vendor/kreuzberg/tests/xlsx_metadata_extraction_test.rs +87 -87
- data/vendor/kreuzberg-ffi/Cargo.toml +3 -3
- data/vendor/kreuzberg-ffi/README.md +851 -851
- data/vendor/kreuzberg-ffi/build.rs +176 -176
- data/vendor/kreuzberg-ffi/cbindgen.toml +27 -27
- data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +12 -12
- data/vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in +12 -12
- data/vendor/kreuzberg-ffi/kreuzberg.h +1087 -1087
- data/vendor/kreuzberg-ffi/src/lib.rs +3616 -3616
- data/vendor/kreuzberg-ffi/src/panic_shield.rs +247 -247
- data/vendor/kreuzberg-ffi/tests.disabled/README.md +48 -48
- data/vendor/kreuzberg-ffi/tests.disabled/config_loading_tests.rs +299 -299
- data/vendor/kreuzberg-ffi/tests.disabled/config_tests.rs +346 -346
- data/vendor/kreuzberg-ffi/tests.disabled/extractor_tests.rs +232 -232
- data/vendor/kreuzberg-ffi/tests.disabled/plugin_registration_tests.rs +470 -470
- data/vendor/kreuzberg-tesseract/.commitlintrc.json +13 -13
- data/vendor/kreuzberg-tesseract/.crate-ignore +2 -2
- data/vendor/kreuzberg-tesseract/Cargo.lock +2933 -2933
- data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
- data/vendor/kreuzberg-tesseract/LICENSE +22 -22
- data/vendor/kreuzberg-tesseract/README.md +399 -399
- data/vendor/kreuzberg-tesseract/build.rs +1354 -1354
- data/vendor/kreuzberg-tesseract/patches/README.md +71 -71
- data/vendor/kreuzberg-tesseract/patches/tesseract.diff +199 -199
- data/vendor/kreuzberg-tesseract/src/api.rs +1371 -1371
- data/vendor/kreuzberg-tesseract/src/choice_iterator.rs +77 -77
- data/vendor/kreuzberg-tesseract/src/enums.rs +297 -297
- data/vendor/kreuzberg-tesseract/src/error.rs +81 -81
- data/vendor/kreuzberg-tesseract/src/lib.rs +145 -145
- data/vendor/kreuzberg-tesseract/src/monitor.rs +57 -57
- data/vendor/kreuzberg-tesseract/src/mutable_iterator.rs +197 -197
- data/vendor/kreuzberg-tesseract/src/page_iterator.rs +253 -253
- data/vendor/kreuzberg-tesseract/src/result_iterator.rs +286 -286
- data/vendor/kreuzberg-tesseract/src/result_renderer.rs +183 -183
- data/vendor/kreuzberg-tesseract/tests/integration_test.rs +211 -211
- data/vendor/rb-sys/.cargo_vcs_info.json +5 -5
- data/vendor/rb-sys/Cargo.lock +393 -393
- data/vendor/rb-sys/Cargo.toml +70 -70
- data/vendor/rb-sys/Cargo.toml.orig +57 -57
- data/vendor/rb-sys/LICENSE-APACHE +190 -190
- data/vendor/rb-sys/LICENSE-MIT +21 -21
- data/vendor/rb-sys/build/features.rs +111 -111
- data/vendor/rb-sys/build/main.rs +286 -286
- data/vendor/rb-sys/build/stable_api_config.rs +155 -155
- data/vendor/rb-sys/build/version.rs +50 -50
- data/vendor/rb-sys/readme.md +36 -36
- data/vendor/rb-sys/src/bindings.rs +21 -21
- data/vendor/rb-sys/src/hidden.rs +11 -11
- data/vendor/rb-sys/src/lib.rs +35 -35
- data/vendor/rb-sys/src/macros.rs +371 -371
- data/vendor/rb-sys/src/memory.rs +53 -53
- data/vendor/rb-sys/src/ruby_abi_version.rs +38 -38
- data/vendor/rb-sys/src/special_consts.rs +31 -31
- data/vendor/rb-sys/src/stable_api/compiled.c +179 -179
- data/vendor/rb-sys/src/stable_api/compiled.rs +257 -257
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +324 -324
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +332 -332
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +325 -325
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +323 -323
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +339 -339
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +339 -339
- data/vendor/rb-sys/src/stable_api.rs +260 -260
- data/vendor/rb-sys/src/symbol.rs +31 -31
- data/vendor/rb-sys/src/tracking_allocator.rs +330 -330
- data/vendor/rb-sys/src/utils.rs +89 -89
- data/vendor/rb-sys/src/value_type.rs +7 -7
- metadata +7 -80
data/vendor/kreuzberg/build.rs
CHANGED
|
@@ -1,843 +1,843 @@
|
|
|
1
|
-
// Kreuzberg Build Script - PDFium Linking Configuration
|
|
2
|
-
//
|
|
3
|
-
// This build script handles PDFium library downloading and linking for the kreuzberg crate.
|
|
4
|
-
// It supports multiple linking strategies via Cargo features:
|
|
5
|
-
//
|
|
6
|
-
// 1. Default (pdf, bundled-pdfium): Download dynamic library and embed in binary
|
|
7
|
-
// - Self-contained binary that extracts library at runtime
|
|
8
|
-
// - Larger binary size but no external .so dependency
|
|
9
|
-
// - No PDFIUM_*_PATH environment variables needed
|
|
10
|
-
//
|
|
11
|
-
// 2. static-pdfium: Static linking (no runtime dependency)
|
|
12
|
-
// - REQUIRES: PDFIUM_STATIC_LIB_PATH environment variable pointing to libpdfium.a directory
|
|
13
|
-
// - Reason: bblanchon/pdfium-binaries only provides dynamic libraries
|
|
14
|
-
// - Use case: Docker with musl, fully static binaries
|
|
15
|
-
// - Note: libpdfium.a must be obtained separately (e.g., paulocoutinhox/pdfium-lib)
|
|
16
|
-
//
|
|
17
|
-
// 3. system-pdfium: Use system-installed pdfium
|
|
18
|
-
// - Detected via pkg-config or KREUZBERG_PDFIUM_SYSTEM_PATH
|
|
19
|
-
//
|
|
20
|
-
// Environment Variables:
|
|
21
|
-
// - PDFIUM_STATIC_LIB_PATH: Path to directory containing libpdfium.a (for static-pdfium)
|
|
22
|
-
// - KREUZBERG_PDFIUM_PREBUILT: Path to prebuilt pdfium directory (skip download)
|
|
23
|
-
// - KREUZBERG_PDFIUM_SYSTEM_PATH: System pdfium library path (for system-pdfium)
|
|
24
|
-
// - PDFIUM_VERSION: Override version for bblanchon/pdfium-binaries
|
|
25
|
-
// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: Number of download retries (default: 5)
|
|
26
|
-
|
|
27
|
-
use std::env;
|
|
28
|
-
use std::fs;
|
|
29
|
-
use std::io;
|
|
30
|
-
use std::path::{Path, PathBuf};
|
|
31
|
-
use std::process::Command;
|
|
32
|
-
use std::thread;
|
|
33
|
-
use std::time::Duration;
|
|
34
|
-
|
|
35
|
-
/// PDFium linking strategy
|
|
36
|
-
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
37
|
-
enum PdfiumLinkStrategy {
|
|
38
|
-
/// Download and link statically (static-pdfium feature)
|
|
39
|
-
DownloadStatic,
|
|
40
|
-
/// Download, link dynamically, and embed in binary (bundled-pdfium feature)
|
|
41
|
-
Bundled,
|
|
42
|
-
/// Use system-installed pdfium via pkg-config (system-pdfium feature)
|
|
43
|
-
System,
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
// ============================================================================
|
|
47
|
-
// MAIN BUILD ORCHESTRATION
|
|
48
|
-
// ============================================================================
|
|
49
|
-
|
|
50
|
-
fn main() {
|
|
51
|
-
let target = env::var("TARGET").unwrap();
|
|
52
|
-
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
|
53
|
-
|
|
54
|
-
println!("cargo::rustc-check-cfg=cfg(coverage)");
|
|
55
|
-
|
|
56
|
-
// Skip pdfium linking if the pdf feature is not enabled
|
|
57
|
-
if !cfg!(feature = "pdf") {
|
|
58
|
-
tracing::debug!("PDF feature not enabled, skipping pdfium linking");
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
let strategy = determine_link_strategy(&target);
|
|
63
|
-
|
|
64
|
-
tracing::debug!("Using PDFium linking strategy: {:?}", strategy);
|
|
65
|
-
|
|
66
|
-
match strategy {
|
|
67
|
-
PdfiumLinkStrategy::DownloadStatic => {
|
|
68
|
-
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
69
|
-
link_statically(&pdfium_dir, &target);
|
|
70
|
-
// Skip copy_lib_to_package - library embedded in binary
|
|
71
|
-
}
|
|
72
|
-
PdfiumLinkStrategy::Bundled => {
|
|
73
|
-
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
74
|
-
link_bundled(&pdfium_dir, &target, &out_dir);
|
|
75
|
-
// Skip copy_lib_to_package - each binary extracts its own
|
|
76
|
-
}
|
|
77
|
-
PdfiumLinkStrategy::System => {
|
|
78
|
-
link_system(&target);
|
|
79
|
-
// No download or copy needed
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
link_system_frameworks(&target);
|
|
84
|
-
println!("cargo:rerun-if-changed=build.rs");
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// ============================================================================
|
|
88
|
-
// FEATURE & STRATEGY VALIDATION
|
|
89
|
-
// ============================================================================
|
|
90
|
-
|
|
91
|
-
/// Determine which linking strategy to use based on features and target
|
|
92
|
-
fn determine_link_strategy(target: &str) -> PdfiumLinkStrategy {
|
|
93
|
-
// WASM handling: check for PDFIUM_WASM_LIB environment variable
|
|
94
|
-
if target.contains("wasm") {
|
|
95
|
-
if let Ok(wasm_lib) = env::var("PDFIUM_WASM_LIB") {
|
|
96
|
-
println!("cargo:rustc-link-search=native={}", wasm_lib);
|
|
97
|
-
println!("cargo:rustc-link-lib=static=pdfium");
|
|
98
|
-
return PdfiumLinkStrategy::DownloadStatic;
|
|
99
|
-
}
|
|
100
|
-
// For WASM without explicit PDFIUM_WASM_LIB, use bundled strategy
|
|
101
|
-
// This downloads pdfium-lib which provides WASM-compatible builds
|
|
102
|
-
println!("cargo:warning=WASM build using bundled PDFium (set PDFIUM_WASM_LIB to link custom WASM PDFium)");
|
|
103
|
-
return PdfiumLinkStrategy::Bundled;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
let system_pdfium = cfg!(feature = "system-pdfium");
|
|
107
|
-
let bundled_pdfium = cfg!(feature = "bundled-pdfium");
|
|
108
|
-
let static_pdfium = cfg!(feature = "static-pdfium");
|
|
109
|
-
|
|
110
|
-
let enabled_count = usize::from(system_pdfium) + usize::from(bundled_pdfium) + usize::from(static_pdfium);
|
|
111
|
-
if enabled_count > 1 {
|
|
112
|
-
println!(
|
|
113
|
-
"cargo:warning=Multiple PDFium linking strategies enabled (static-pdfium={}, bundled-pdfium={}, system-pdfium={}); using bundled-pdfium for this build",
|
|
114
|
-
static_pdfium, bundled_pdfium, system_pdfium
|
|
115
|
-
);
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
// Feature-based strategy selection.
|
|
119
|
-
// Prefer bundled-pdfium when multiple strategies are enabled (e.g. `--all-features`) because it
|
|
120
|
-
// does not require external PDFIUM_STATIC_LIB_PATH and does not depend on a system install.
|
|
121
|
-
if bundled_pdfium {
|
|
122
|
-
return PdfiumLinkStrategy::Bundled;
|
|
123
|
-
}
|
|
124
|
-
if system_pdfium {
|
|
125
|
-
return PdfiumLinkStrategy::System;
|
|
126
|
-
}
|
|
127
|
-
if static_pdfium {
|
|
128
|
-
return PdfiumLinkStrategy::DownloadStatic;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
// Default: download and link dynamically (bundled-pdfium preferred if pdf not already selected)
|
|
132
|
-
// When only 'pdf' feature is enabled (no linking strategy), default to bundled-pdfium
|
|
133
|
-
PdfiumLinkStrategy::Bundled
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
// ============================================================================
|
|
137
|
-
// DOWNLOAD & PREBUILT ORCHESTRATION
|
|
138
|
-
// ============================================================================
|
|
139
|
-
|
|
140
|
-
/// Download PDFium or use prebuilt directory
|
|
141
|
-
///
|
|
142
|
-
/// This is the main orchestrator function that:
|
|
143
|
-
/// 1. Checks for `KREUZBERG_PDFIUM_PREBUILT` environment variable
|
|
144
|
-
/// 2. If set and valid, uses prebuilt pdfium directory
|
|
145
|
-
/// 3. If not set, downloads pdfium to out_dir (with caching)
|
|
146
|
-
/// 4. Returns PathBuf to pdfium directory
|
|
147
|
-
///
|
|
148
|
-
/// Reuses all existing helper functions:
|
|
149
|
-
/// - `get_pdfium_url_and_lib()` - determines download URL for target
|
|
150
|
-
/// - `download_and_extract_pdfium()` - downloads with retry logic
|
|
151
|
-
/// - `runtime_library_info()` - platform-specific library names
|
|
152
|
-
/// - `prepare_prebuilt_pdfium()` - handles prebuilt copy
|
|
153
|
-
fn download_or_use_prebuilt(target: &str, out_dir: &Path) -> PathBuf {
|
|
154
|
-
let (download_url, _lib_name) = get_pdfium_url_and_lib(target);
|
|
155
|
-
let pdfium_dir = out_dir.join("pdfium");
|
|
156
|
-
|
|
157
|
-
// Check for prebuilt pdfium directory
|
|
158
|
-
if let Some(prebuilt) = env::var_os("KREUZBERG_PDFIUM_PREBUILT") {
|
|
159
|
-
let prebuilt_path = PathBuf::from(prebuilt);
|
|
160
|
-
if prebuilt_path.exists() {
|
|
161
|
-
prepare_prebuilt_pdfium(&prebuilt_path, &pdfium_dir)
|
|
162
|
-
.unwrap_or_else(|err| panic!("Failed to copy Pdfium from {}: {}", prebuilt_path.display(), err));
|
|
163
|
-
if target.contains("windows") {
|
|
164
|
-
ensure_windows_import_library(&pdfium_dir);
|
|
165
|
-
}
|
|
166
|
-
return pdfium_dir;
|
|
167
|
-
} else {
|
|
168
|
-
panic!(
|
|
169
|
-
"Environment variable KREUZBERG_PDFIUM_PREBUILT points to '{}' but the directory does not exist",
|
|
170
|
-
prebuilt_path.display()
|
|
171
|
-
);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// Check if library already exists (cache validation) using flexible detection
|
|
176
|
-
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
177
|
-
let lib_found = find_pdfium_library(&pdfium_dir, &runtime_lib_name, runtime_subdir).is_ok();
|
|
178
|
-
|
|
179
|
-
let import_lib_exists = if target.contains("windows") {
|
|
180
|
-
let lib_dir = pdfium_dir.join("lib");
|
|
181
|
-
lib_dir.join("pdfium.lib").exists() || lib_dir.join("pdfium.dll.lib").exists()
|
|
182
|
-
} else {
|
|
183
|
-
true
|
|
184
|
-
};
|
|
185
|
-
|
|
186
|
-
if !lib_found || !import_lib_exists {
|
|
187
|
-
tracing::debug!("Pdfium library not found, downloading for target: {}", target);
|
|
188
|
-
tracing::debug!("Download URL: {}", download_url);
|
|
189
|
-
download_and_extract_pdfium(&download_url, &pdfium_dir);
|
|
190
|
-
} else {
|
|
191
|
-
tracing::debug!("Pdfium library already cached at {}", pdfium_dir.display());
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Windows-specific: ensure pdfium.lib exists
|
|
195
|
-
if target.contains("windows") {
|
|
196
|
-
ensure_windows_import_library(&pdfium_dir);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
pdfium_dir
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
fn ensure_windows_import_library(pdfium_dir: &Path) {
|
|
203
|
-
let lib_dir = pdfium_dir.join("lib");
|
|
204
|
-
let dll_lib = lib_dir.join("pdfium.dll.lib");
|
|
205
|
-
let expected_lib = lib_dir.join("pdfium.lib");
|
|
206
|
-
|
|
207
|
-
if dll_lib.exists() && !expected_lib.exists() {
|
|
208
|
-
tracing::debug!(
|
|
209
|
-
"Ensuring Windows import library at {} (source: {})",
|
|
210
|
-
expected_lib.display(),
|
|
211
|
-
dll_lib.display()
|
|
212
|
-
);
|
|
213
|
-
fs::copy(&dll_lib, &expected_lib).unwrap_or_else(|err| {
|
|
214
|
-
panic!(
|
|
215
|
-
"Failed to copy Windows import library from {} to {}: {}",
|
|
216
|
-
dll_lib.display(),
|
|
217
|
-
expected_lib.display(),
|
|
218
|
-
err
|
|
219
|
-
)
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// ============================================================================
|
|
225
|
-
// DOWNLOAD UTILITIES
|
|
226
|
-
// ============================================================================
|
|
227
|
-
|
|
228
|
-
/// Fetch the latest release version from a GitHub repository
|
|
229
|
-
///
|
|
230
|
-
/// Uses curl to query the GitHub API and extract the tag_name from the
|
|
231
|
-
/// latest release JSON response. Falls back to "7529" if API call fails.
|
|
232
|
-
fn get_latest_version(repo: &str) -> String {
|
|
233
|
-
let api_url = format!("https://api.github.com/repos/{}/releases/latest", repo);
|
|
234
|
-
|
|
235
|
-
let output = Command::new("curl").args(["-s", &api_url]).output();
|
|
236
|
-
|
|
237
|
-
if let Ok(output) = output
|
|
238
|
-
&& output.status.success()
|
|
239
|
-
{
|
|
240
|
-
let json = String::from_utf8_lossy(&output.stdout);
|
|
241
|
-
if let Some(start) = json.find("\"tag_name\":") {
|
|
242
|
-
let after_colon = &json[start + "\"tag_name\":".len()..];
|
|
243
|
-
if let Some(opening_quote) = after_colon.find('"')
|
|
244
|
-
&& let Some(closing_quote) = after_colon[opening_quote + 1..].find('"')
|
|
245
|
-
{
|
|
246
|
-
let tag_start = opening_quote + 1;
|
|
247
|
-
let tag = &after_colon[tag_start..tag_start + closing_quote];
|
|
248
|
-
return tag.split('/').next_back().unwrap_or(tag).to_string();
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
"7529".to_string()
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
/// Get the download URL and library name for the target platform
|
|
257
|
-
///
|
|
258
|
-
/// Determines platform/architecture from target triple and constructs
|
|
259
|
-
/// the appropriate GitHub release download URL. Supports:
|
|
260
|
-
/// - WASM: paulocoutinhox/pdfium-lib
|
|
261
|
-
/// - Other platforms: bblanchon/pdfium-binaries
|
|
262
|
-
fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
263
|
-
if target.contains("wasm") {
|
|
264
|
-
let version = env::var("PDFIUM_WASM_VERSION")
|
|
265
|
-
.ok()
|
|
266
|
-
.filter(|v| !v.is_empty())
|
|
267
|
-
.unwrap_or_else(|| get_latest_version("paulocoutinhox/pdfium-lib"));
|
|
268
|
-
tracing::debug!("Using pdfium-lib version: {}", version);
|
|
269
|
-
|
|
270
|
-
// WASM builds use a single 'wasm.tgz' asset regardless of architecture
|
|
271
|
-
// The archive contains both wasm32 and wasm64 if available
|
|
272
|
-
return (
|
|
273
|
-
format!(
|
|
274
|
-
"https://github.com/paulocoutinhox/pdfium-lib/releases/download/{}/wasm.tgz",
|
|
275
|
-
version
|
|
276
|
-
),
|
|
277
|
-
"pdfium".to_string(),
|
|
278
|
-
);
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
let (platform, arch) = if target.contains("darwin") {
|
|
282
|
-
let arch = if target.contains("aarch64") { "arm64" } else { "x64" };
|
|
283
|
-
("mac", arch)
|
|
284
|
-
} else if target.contains("linux") {
|
|
285
|
-
let arch = if target.contains("aarch64") {
|
|
286
|
-
"arm64"
|
|
287
|
-
} else if target.contains("arm") {
|
|
288
|
-
"arm"
|
|
289
|
-
} else {
|
|
290
|
-
"x64"
|
|
291
|
-
};
|
|
292
|
-
("linux", arch)
|
|
293
|
-
} else if target.contains("windows") {
|
|
294
|
-
let arch = if target.contains("aarch64") {
|
|
295
|
-
"arm64"
|
|
296
|
-
} else if target.contains("i686") {
|
|
297
|
-
"x86"
|
|
298
|
-
} else {
|
|
299
|
-
"x64"
|
|
300
|
-
};
|
|
301
|
-
("win", arch)
|
|
302
|
-
} else {
|
|
303
|
-
panic!("Unsupported target platform: {}", target);
|
|
304
|
-
};
|
|
305
|
-
|
|
306
|
-
let version = env::var("PDFIUM_VERSION")
|
|
307
|
-
.ok()
|
|
308
|
-
.filter(|v| !v.is_empty())
|
|
309
|
-
.unwrap_or_else(|| get_latest_version("bblanchon/pdfium-binaries"));
|
|
310
|
-
tracing::debug!("Using pdfium-binaries version: {}", version);
|
|
311
|
-
|
|
312
|
-
let url = format!(
|
|
313
|
-
"https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/{}/pdfium-{}-{}.tgz",
|
|
314
|
-
version, platform, arch
|
|
315
|
-
);
|
|
316
|
-
|
|
317
|
-
(url, "pdfium".to_string())
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
/// Download and extract PDFium archive with retry logic
|
|
321
|
-
///
|
|
322
|
-
/// Features:
|
|
323
|
-
/// - Exponential backoff retry (configurable via env vars)
|
|
324
|
-
/// - File type validation (gzip check)
|
|
325
|
-
/// - Windows-specific import library handling (pdfium.dll.lib -> pdfium.lib)
|
|
326
|
-
/// - Environment variables:
|
|
327
|
-
/// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: number of retries (default: 5)
|
|
328
|
-
/// - KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS: initial backoff in seconds (default: 2)
|
|
329
|
-
fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
|
|
330
|
-
fs::create_dir_all(dest_dir).expect("Failed to create pdfium directory");
|
|
331
|
-
|
|
332
|
-
let archive_path = dest_dir.join("pdfium.tar.gz");
|
|
333
|
-
let retries = env::var("KREUZBERG_PDFIUM_DOWNLOAD_RETRIES")
|
|
334
|
-
.ok()
|
|
335
|
-
.and_then(|value| value.parse::<u32>().ok())
|
|
336
|
-
.filter(|value| *value > 0)
|
|
337
|
-
.unwrap_or(5);
|
|
338
|
-
let base_delay = env::var("KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS")
|
|
339
|
-
.ok()
|
|
340
|
-
.and_then(|value| value.parse::<u64>().ok())
|
|
341
|
-
.filter(|value| *value > 0)
|
|
342
|
-
.unwrap_or(2);
|
|
343
|
-
|
|
344
|
-
let archive_path_str = archive_path
|
|
345
|
-
.to_str()
|
|
346
|
-
.unwrap_or_else(|| panic!("Non-UTF8 path for archive: {}", archive_path.display()));
|
|
347
|
-
let mut last_error = String::new();
|
|
348
|
-
|
|
349
|
-
for attempt in 1..=retries {
|
|
350
|
-
let _ = fs::remove_file(&archive_path);
|
|
351
|
-
tracing::debug!(
|
|
352
|
-
"Downloading Pdfium archive from: {} (attempt {}/{})",
|
|
353
|
-
url,
|
|
354
|
-
attempt,
|
|
355
|
-
retries
|
|
356
|
-
);
|
|
357
|
-
|
|
358
|
-
let status = Command::new("curl")
|
|
359
|
-
.args(["-f", "-L", "-o", archive_path_str, url])
|
|
360
|
-
.status();
|
|
361
|
-
|
|
362
|
-
match status {
|
|
363
|
-
Ok(code) if code.success() => {
|
|
364
|
-
last_error.clear();
|
|
365
|
-
break;
|
|
366
|
-
}
|
|
367
|
-
Ok(code) => {
|
|
368
|
-
last_error = format!("curl exited with {:?}", code.code());
|
|
369
|
-
}
|
|
370
|
-
Err(err) => {
|
|
371
|
-
last_error = format!("failed to spawn curl: {err}");
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
if attempt == retries {
|
|
376
|
-
panic!(
|
|
377
|
-
"Failed to download Pdfium from {} after {} attempts. Last error: {}",
|
|
378
|
-
url, retries, last_error
|
|
379
|
-
);
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
let exponent = u32::min(attempt, 5);
|
|
383
|
-
let multiplier = 1u64 << exponent;
|
|
384
|
-
let delay_secs = base_delay.saturating_mul(multiplier).min(30);
|
|
385
|
-
println!(
|
|
386
|
-
"cargo:warning=Pdfium download failed (attempt {}/{}) - {}. Retrying in {}s",
|
|
387
|
-
attempt, retries, last_error, delay_secs
|
|
388
|
-
);
|
|
389
|
-
thread::sleep(Duration::from_secs(delay_secs));
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
let file_type = Command::new("file")
|
|
393
|
-
.arg(archive_path.to_str().unwrap())
|
|
394
|
-
.output()
|
|
395
|
-
.expect("Failed to check file type");
|
|
396
|
-
|
|
397
|
-
let file_type_output = String::from_utf8_lossy(&file_type.stdout);
|
|
398
|
-
tracing::debug!("Downloaded file type: {}", file_type_output.trim());
|
|
399
|
-
|
|
400
|
-
if !file_type_output.to_lowercase().contains("gzip") && !file_type_output.to_lowercase().contains("compressed") {
|
|
401
|
-
fs::remove_file(&archive_path).ok();
|
|
402
|
-
panic!(
|
|
403
|
-
"Downloaded file is not a valid gzip archive. URL may be incorrect or version unavailable: {}",
|
|
404
|
-
url
|
|
405
|
-
);
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
tracing::debug!("Extracting Pdfium archive...");
|
|
409
|
-
let status = Command::new("tar")
|
|
410
|
-
.args(["-xzf", archive_path.to_str().unwrap(), "-C", dest_dir.to_str().unwrap()])
|
|
411
|
-
.status()
|
|
412
|
-
.expect("Failed to execute tar");
|
|
413
|
-
|
|
414
|
-
if !status.success() {
|
|
415
|
-
fs::remove_file(&archive_path).ok();
|
|
416
|
-
panic!("Failed to extract Pdfium archive from {}", url);
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
fs::remove_file(&archive_path).ok();
|
|
420
|
-
|
|
421
|
-
let target = env::var("TARGET").unwrap();
|
|
422
|
-
if target.contains("windows") {
|
|
423
|
-
let lib_dir = dest_dir.join("lib");
|
|
424
|
-
let dll_lib = lib_dir.join("pdfium.dll.lib");
|
|
425
|
-
let expected_lib = lib_dir.join("pdfium.lib");
|
|
426
|
-
|
|
427
|
-
if dll_lib.exists() {
|
|
428
|
-
tracing::debug!("Ensuring Windows import library at {}", expected_lib.display());
|
|
429
|
-
if let Err(err) = fs::copy(&dll_lib, &expected_lib) {
|
|
430
|
-
panic!("Failed to copy pdfium.dll.lib to pdfium.lib: {err}");
|
|
431
|
-
}
|
|
432
|
-
} else {
|
|
433
|
-
tracing::debug!("Warning: Expected {} not found after extraction", dll_lib.display());
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
tracing::debug!("Pdfium downloaded and extracted successfully");
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
// ============================================================================
|
|
441
|
-
// PREBUILT HANDLING
|
|
442
|
-
// ============================================================================
|
|
443
|
-
|
|
444
|
-
/// Prepare prebuilt PDFium by copying to destination directory
|
|
445
|
-
///
|
|
446
|
-
/// Removes existing destination if present, then recursively copies
|
|
447
|
-
/// all files from prebuilt source to destination.
|
|
448
|
-
fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
|
|
449
|
-
if dest_dir.exists() {
|
|
450
|
-
fs::remove_dir_all(dest_dir)?;
|
|
451
|
-
}
|
|
452
|
-
copy_dir_all(prebuilt_src, dest_dir)
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
/// Recursively copy directory tree
|
|
456
|
-
///
|
|
457
|
-
/// Used by `prepare_prebuilt_pdfium()` to copy entire pdfium directory
|
|
458
|
-
/// structure, preserving all files and subdirectories.
|
|
459
|
-
fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
|
|
460
|
-
fs::create_dir_all(dst)?;
|
|
461
|
-
for entry in fs::read_dir(src)? {
|
|
462
|
-
let entry = entry?;
|
|
463
|
-
let file_type = entry.file_type()?;
|
|
464
|
-
let target_path = dst.join(entry.file_name());
|
|
465
|
-
if file_type.is_dir() {
|
|
466
|
-
copy_dir_all(&entry.path(), &target_path)?;
|
|
467
|
-
} else {
|
|
468
|
-
fs::copy(entry.path(), &target_path)?;
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
Ok(())
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
// ============================================================================
|
|
475
|
-
// PLATFORM UTILITIES
|
|
476
|
-
// ============================================================================
|
|
477
|
-
|
|
478
|
-
/// Get platform-specific runtime library name and subdirectory
|
|
479
|
-
///
|
|
480
|
-
/// Returns tuple of (library_name, subdirectory) for the target platform:
|
|
481
|
-
/// - WASM: ("libpdfium.a", "release/lib")
|
|
482
|
-
/// - Windows: ("pdfium.dll", "bin")
|
|
483
|
-
/// - macOS: ("libpdfium.dylib", "lib")
|
|
484
|
-
/// - Linux: ("libpdfium.so", "lib")
|
|
485
|
-
fn runtime_library_info(target: &str) -> (String, &'static str) {
|
|
486
|
-
if target.contains("wasm") {
|
|
487
|
-
// pdfium-lib `wasm.tgz` extracts into `release/lib/libpdfium.a`
|
|
488
|
-
("libpdfium.a".to_string(), "release/lib")
|
|
489
|
-
} else if target.contains("windows") {
|
|
490
|
-
("pdfium.dll".to_string(), "bin")
|
|
491
|
-
} else if target.contains("darwin") {
|
|
492
|
-
("libpdfium.dylib".to_string(), "lib")
|
|
493
|
-
} else {
|
|
494
|
-
("libpdfium.so".to_string(), "lib")
|
|
495
|
-
}
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
/// Find PDFium library in archive with flexible directory detection
|
|
499
|
-
///
|
|
500
|
-
/// Attempts to locate the library at multiple possible locations:
|
|
501
|
-
/// - {subdir}/{lib_name} (standard location)
|
|
502
|
-
/// - {lib_name} (root of archive)
|
|
503
|
-
/// - bin/{lib_name} (alternative location)
|
|
504
|
-
/// - lib/{lib_name} (explicit lib directory)
|
|
505
|
-
///
|
|
506
|
-
/// This handles variations in archive structure across different platform builds,
|
|
507
|
-
/// particularly macOS ARM64 where the archive structure may differ.
|
|
508
|
-
///
|
|
509
|
-
/// Returns the full path to the library if found, or an error with available files.
|
|
510
|
-
fn find_pdfium_library(pdfium_dir: &Path, lib_name: &str, expected_subdir: &str) -> Result<PathBuf, String> {
|
|
511
|
-
// Candidates in priority order
|
|
512
|
-
let candidates = [
|
|
513
|
-
pdfium_dir.join(expected_subdir).join(lib_name), // Standard: lib/libpdfium.dylib
|
|
514
|
-
pdfium_dir.join(lib_name), // Root: libpdfium.dylib
|
|
515
|
-
pdfium_dir.join("bin").join(lib_name), // Alternative: bin/libpdfium.dylib
|
|
516
|
-
pdfium_dir.join("lib").join(lib_name), // Explicit lib: lib/libpdfium.dylib
|
|
517
|
-
];
|
|
518
|
-
|
|
519
|
-
// Try each candidate
|
|
520
|
-
for candidate in &candidates {
|
|
521
|
-
if candidate.exists() {
|
|
522
|
-
tracing::debug!("Found PDFium library at: {}", candidate.display());
|
|
523
|
-
return Ok(candidate.clone());
|
|
524
|
-
}
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
// Library not found - provide detailed error with directory listing
|
|
528
|
-
let mut error_msg = format!(
|
|
529
|
-
"PDFium library not found at expected location: {}/{}\n\n",
|
|
530
|
-
pdfium_dir.display(),
|
|
531
|
-
expected_subdir
|
|
532
|
-
);
|
|
533
|
-
error_msg.push_str("Attempted locations:\n");
|
|
534
|
-
for candidate in &candidates {
|
|
535
|
-
error_msg.push_str(&format!(" - {}\n", candidate.display()));
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
// List actual contents of pdfium directory for debugging
|
|
539
|
-
error_msg.push_str("\nActual archive contents:\n");
|
|
540
|
-
if let Ok(entries) = fs::read_dir(pdfium_dir) {
|
|
541
|
-
for entry in entries.flatten() {
|
|
542
|
-
let path = entry.path();
|
|
543
|
-
let file_type = if path.is_dir() { "dir" } else { "file" };
|
|
544
|
-
error_msg.push_str(&format!(" {} ({})\n", path.display(), file_type));
|
|
545
|
-
|
|
546
|
-
// Show contents of subdirectories
|
|
547
|
-
if path.is_dir()
|
|
548
|
-
&& let Ok(sub_entries) = fs::read_dir(&path)
|
|
549
|
-
{
|
|
550
|
-
for sub_entry in sub_entries.flatten() {
|
|
551
|
-
let sub_path = sub_entry.path();
|
|
552
|
-
let sub_type = if sub_path.is_dir() { "dir" } else { "file" };
|
|
553
|
-
error_msg.push_str(&format!(" {} ({})\n", sub_path.display(), sub_type));
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
Err(error_msg)
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
// ============================================================================
|
|
563
|
-
// LINKING STRATEGIES
|
|
564
|
-
// ============================================================================
|
|
565
|
-
|
|
566
|
-
/// Link PDFium dynamically (default)
|
|
567
|
-
///
|
|
568
|
-
/// Sets up linker to use PDFium as a dynamic library (.dylib/.so/.dll)
|
|
569
|
-
/// with platform-specific rpath configuration for runtime library discovery.
|
|
570
|
-
/// Supports flexible archive structures by adding multiple possible lib directories.
|
|
571
|
-
fn link_dynamically(pdfium_dir: &Path, target: &str) {
|
|
572
|
-
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
573
|
-
|
|
574
|
-
// Find the actual library location (handles multiple possible archive structures)
|
|
575
|
-
let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
576
|
-
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
577
|
-
Err(err) => panic!("{}", err),
|
|
578
|
-
};
|
|
579
|
-
|
|
580
|
-
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
581
|
-
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
582
|
-
|
|
583
|
-
// Also add standard lib directory for compatibility
|
|
584
|
-
let std_lib_dir = pdfium_dir.join("lib");
|
|
585
|
-
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
586
|
-
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
// Add bin directory for platforms where it might be needed
|
|
590
|
-
let bin_dir = pdfium_dir.join("bin");
|
|
591
|
-
if bin_dir.exists() && bin_dir != lib_path {
|
|
592
|
-
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
// Set rpath for dynamic linking
|
|
596
|
-
if target.contains("darwin") {
|
|
597
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
598
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
599
|
-
} else if target.contains("linux") {
|
|
600
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
601
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
/// Link PDFium statically (static-pdfium feature)
|
|
606
|
-
///
|
|
607
|
-
/// Embeds PDFium into the binary as a static library. Adds system
|
|
608
|
-
/// dependencies required for static linking on Linux.
|
|
609
|
-
/// Supports flexible archive structures by finding library in multiple locations.
|
|
610
|
-
///
|
|
611
|
-
/// Environment Variables:
|
|
612
|
-
/// - `PDFIUM_STATIC_LIB_PATH`: Path to directory containing libpdfium.a (for Docker/musl builds)
|
|
613
|
-
///
|
|
614
|
-
/// Note: bblanchon/pdfium-binaries only provides dynamic libraries.
|
|
615
|
-
/// On macOS, this will fallback to dynamic linking with a warning.
|
|
616
|
-
/// On Linux, you must provide PDFIUM_STATIC_LIB_PATH pointing to a static build.
|
|
617
|
-
fn link_statically(pdfium_dir: &Path, target: &str) {
|
|
618
|
-
// For static linking, we need libpdfium.a (not .dylib or .so)
|
|
619
|
-
let static_lib_name = "libpdfium.a";
|
|
620
|
-
let lib_subdir = if target.contains("wasm") { "release/lib" } else { "lib" };
|
|
621
|
-
|
|
622
|
-
// First, check if user provided a static library path via environment variable
|
|
623
|
-
if let Ok(custom_path) = env::var("PDFIUM_STATIC_LIB_PATH") {
|
|
624
|
-
let custom_lib_dir = PathBuf::from(&custom_path);
|
|
625
|
-
|
|
626
|
-
if !custom_lib_dir.exists() {
|
|
627
|
-
panic!(
|
|
628
|
-
"PDFIUM_STATIC_LIB_PATH points to '{}' but the directory does not exist",
|
|
629
|
-
custom_path
|
|
630
|
-
);
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
let custom_lib = custom_lib_dir.join(static_lib_name);
|
|
634
|
-
if !custom_lib.exists() {
|
|
635
|
-
panic!(
|
|
636
|
-
"PDFIUM_STATIC_LIB_PATH points to '{}' but {} not found.\n\
|
|
637
|
-
Expected to find: {}",
|
|
638
|
-
custom_path,
|
|
639
|
-
static_lib_name,
|
|
640
|
-
custom_lib.display()
|
|
641
|
-
);
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
tracing::debug!("Using custom static PDFium from: {}", custom_lib.display());
|
|
645
|
-
println!("cargo:rustc-link-search=native={}", custom_lib_dir.display());
|
|
646
|
-
println!("cargo:rustc-link-lib=static=pdfium");
|
|
647
|
-
|
|
648
|
-
// Static linking requires additional system dependencies
|
|
649
|
-
if target.contains("linux") {
|
|
650
|
-
println!("cargo:rustc-link-lib=dylib=pthread");
|
|
651
|
-
println!("cargo:rustc-link-lib=dylib=dl");
|
|
652
|
-
} else if target.contains("windows") {
|
|
653
|
-
println!("cargo:rustc-link-lib=dylib=ws2_32");
|
|
654
|
-
println!("cargo:rustc-link-lib=dylib=userenv");
|
|
655
|
-
}
|
|
656
|
-
|
|
657
|
-
return;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// Find the actual library location (handles multiple possible archive structures)
|
|
661
|
-
let lib_path = match find_pdfium_library(pdfium_dir, static_lib_name, lib_subdir) {
|
|
662
|
-
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
663
|
-
Err(_err) => {
|
|
664
|
-
// Static library not found - check if we're on macOS and can fallback
|
|
665
|
-
if target.contains("darwin") {
|
|
666
|
-
eprintln!("cargo:warning=Static PDFium library (libpdfium.a) not found for macOS.");
|
|
667
|
-
eprintln!("cargo:warning=bblanchon/pdfium-binaries only provides dynamic libraries.");
|
|
668
|
-
eprintln!("cargo:warning=Falling back to dynamic linking for local development.");
|
|
669
|
-
eprintln!("cargo:warning=Production Linux builds require PDFIUM_STATIC_LIB_PATH.");
|
|
670
|
-
|
|
671
|
-
// Fallback to dynamic linking on macOS
|
|
672
|
-
link_dynamically(pdfium_dir, target);
|
|
673
|
-
return;
|
|
674
|
-
} else {
|
|
675
|
-
// On Linux/Windows, provide helpful error with actionable steps
|
|
676
|
-
panic!(
|
|
677
|
-
"Static PDFium library (libpdfium.a) not found.\n\n\
|
|
678
|
-
bblanchon/pdfium-binaries only provides dynamic libraries.\n\n\
|
|
679
|
-
For static linking (required for Docker with musl), you must:\n\n\
|
|
680
|
-
1. Build static PDFium or obtain from a source that provides it\n\
|
|
681
|
-
- See: https://github.com/ajrcarey/pdfium-render/issues/53\n\
|
|
682
|
-
- Or use: https://github.com/paulocoutinhox/pdfium-lib (provides static builds)\n\n\
|
|
683
|
-
2. Set environment variable pointing to the directory containing libpdfium.a:\n\
|
|
684
|
-
export PDFIUM_STATIC_LIB_PATH=/path/to/pdfium/lib\n\n\
|
|
685
|
-
3. Or use alternative features:\n\
|
|
686
|
-
- 'pdf' (dynamic linking, requires .so at runtime)\n\
|
|
687
|
-
- 'bundled-pdfium' (embeds dynamic library in binary)\n\
|
|
688
|
-
- 'system-pdfium' (use system-installed pdfium)\n\n\
|
|
689
|
-
Example Dockerfile pattern:\n\
|
|
690
|
-
FROM alpine:latest as pdfium-builder\n\
|
|
691
|
-
# Download/build static libpdfium.a\n\
|
|
692
|
-
\n\
|
|
693
|
-
FROM rust:alpine as builder\n\
|
|
694
|
-
ENV PDFIUM_STATIC_LIB_PATH=/pdfium/lib\n\
|
|
695
|
-
COPY --from=pdfium-builder /path/to/libpdfium.a /pdfium/lib/"
|
|
696
|
-
);
|
|
697
|
-
}
|
|
698
|
-
}
|
|
699
|
-
};
|
|
700
|
-
|
|
701
|
-
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
702
|
-
println!("cargo:rustc-link-lib=static=pdfium");
|
|
703
|
-
|
|
704
|
-
// Also add standard lib directory for compatibility
|
|
705
|
-
let std_lib_dir = pdfium_dir.join("lib");
|
|
706
|
-
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
707
|
-
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
708
|
-
}
|
|
709
|
-
|
|
710
|
-
// Add bin directory for platforms where it might be needed
|
|
711
|
-
let bin_dir = pdfium_dir.join("bin");
|
|
712
|
-
if bin_dir.exists() && bin_dir != lib_path {
|
|
713
|
-
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
714
|
-
}
|
|
715
|
-
|
|
716
|
-
// Static linking requires additional system dependencies
|
|
717
|
-
if target.contains("linux") {
|
|
718
|
-
println!("cargo:rustc-link-lib=dylib=pthread");
|
|
719
|
-
println!("cargo:rustc-link-lib=dylib=dl");
|
|
720
|
-
} else if target.contains("windows") {
|
|
721
|
-
println!("cargo:rustc-link-lib=dylib=ws2_32");
|
|
722
|
-
println!("cargo:rustc-link-lib=dylib=userenv");
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
/// Link PDFium bundled (bundled-pdfium feature)
|
|
727
|
-
///
|
|
728
|
-
/// Links dynamically but copies library to OUT_DIR for embedding in binary.
|
|
729
|
-
/// Each binary extracts and uses its own copy of the PDFium library.
|
|
730
|
-
/// Supports flexible archive structures by finding library in multiple locations.
|
|
731
|
-
///
|
|
732
|
-
/// For WASM targets, links statically using the bundled static library.
|
|
733
|
-
fn link_bundled(pdfium_dir: &Path, target: &str, out_dir: &Path) {
|
|
734
|
-
// Copy library to OUT_DIR for bundling using flexible detection
|
|
735
|
-
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
736
|
-
let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
737
|
-
Ok(path) => path,
|
|
738
|
-
Err(err) => panic!("{}", err),
|
|
739
|
-
};
|
|
740
|
-
let bundled_lib = out_dir.join(&runtime_lib_name);
|
|
741
|
-
|
|
742
|
-
fs::copy(&src_lib, &bundled_lib)
|
|
743
|
-
.unwrap_or_else(|err| panic!("Failed to copy library to OUT_DIR for bundling: {}", err));
|
|
744
|
-
|
|
745
|
-
// Emit environment variable with bundled library path
|
|
746
|
-
let bundled_path = bundled_lib
|
|
747
|
-
.to_str()
|
|
748
|
-
.unwrap_or_else(|| panic!("Non-UTF8 path for bundled library: {}", bundled_lib.display()));
|
|
749
|
-
println!("cargo:rustc-env=KREUZBERG_PDFIUM_BUNDLED_PATH={}", bundled_path);
|
|
750
|
-
|
|
751
|
-
// For WASM, link statically using the bundled library
|
|
752
|
-
if target.contains("wasm") {
|
|
753
|
-
let lib_dir = bundled_lib
|
|
754
|
-
.parent()
|
|
755
|
-
.unwrap_or_else(|| panic!("Invalid bundled library path: {}", bundled_lib.display()));
|
|
756
|
-
println!("cargo:rustc-link-search=native={}", lib_dir.display());
|
|
757
|
-
println!("cargo:rustc-link-lib=static=pdfium");
|
|
758
|
-
tracing::debug!("Bundled PDFium static library linked for WASM at: {}", bundled_path);
|
|
759
|
-
} else {
|
|
760
|
-
tracing::debug!("Bundled PDFium library at: {}", bundled_path);
|
|
761
|
-
}
|
|
762
|
-
}
|
|
763
|
-
|
|
764
|
-
/// Link system-installed PDFium (system-pdfium feature)
|
|
765
|
-
///
|
|
766
|
-
/// Attempts to find PDFium via pkg-config first, then falls back to
|
|
767
|
-
/// environment variables (KREUZBERG_PDFIUM_SYSTEM_PATH, KREUZBERG_PDFIUM_SYSTEM_INCLUDE).
|
|
768
|
-
fn link_system(_target: &str) {
|
|
769
|
-
// Try pkg-config first
|
|
770
|
-
match pkg_config::Config::new().atleast_version("5.0").probe("pdfium") {
|
|
771
|
-
Ok(library) => {
|
|
772
|
-
tracing::debug!("Found system pdfium via pkg-config");
|
|
773
|
-
for include_path in &library.include_paths {
|
|
774
|
-
println!("cargo:include={}", include_path.display());
|
|
775
|
-
}
|
|
776
|
-
return;
|
|
777
|
-
}
|
|
778
|
-
Err(err) => {
|
|
779
|
-
tracing::debug!("pkg-config probe failed: {}", err);
|
|
780
|
-
}
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
// Fallback to environment variables
|
|
784
|
-
let lib_path = env::var("KREUZBERG_PDFIUM_SYSTEM_PATH").ok();
|
|
785
|
-
let include_path = env::var("KREUZBERG_PDFIUM_SYSTEM_INCLUDE").ok();
|
|
786
|
-
|
|
787
|
-
if let Some(lib_dir) = lib_path {
|
|
788
|
-
let lib_dir_path = PathBuf::from(&lib_dir);
|
|
789
|
-
if !lib_dir_path.exists() {
|
|
790
|
-
panic!(
|
|
791
|
-
"KREUZBERG_PDFIUM_SYSTEM_PATH points to '{}' but the directory does not exist",
|
|
792
|
-
lib_dir
|
|
793
|
-
);
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
println!("cargo:rustc-link-search=native={}", lib_dir);
|
|
797
|
-
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
798
|
-
|
|
799
|
-
if let Some(inc_dir) = include_path {
|
|
800
|
-
println!("cargo:include={}", inc_dir);
|
|
801
|
-
}
|
|
802
|
-
|
|
803
|
-
tracing::debug!("Using system pdfium from: {}", lib_dir);
|
|
804
|
-
return;
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
// No system pdfium found
|
|
808
|
-
panic!(
|
|
809
|
-
"system-pdfium feature enabled but pdfium not found.\n\
|
|
810
|
-
\n\
|
|
811
|
-
Please install pdfium system-wide or provide:\n\
|
|
812
|
-
- KREUZBERG_PDFIUM_SYSTEM_PATH: path to directory containing libpdfium\n\
|
|
813
|
-
- KREUZBERG_PDFIUM_SYSTEM_INCLUDE: path to pdfium headers (optional)\n\
|
|
814
|
-
\n\
|
|
815
|
-
Alternatively, use a different linking strategy:\n\
|
|
816
|
-
- Default (dynamic): cargo build --features pdf\n\
|
|
817
|
-
- Static linking: cargo build --features pdf,static-pdfium\n\
|
|
818
|
-
- Bundled: cargo build --features pdf,bundled-pdfium"
|
|
819
|
-
);
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
/// Link system frameworks and standard libraries
|
|
823
|
-
///
|
|
824
|
-
/// Adds platform-specific system libraries required for PDFium linking:
|
|
825
|
-
/// - macOS: CoreFoundation, CoreGraphics, CoreText, AppKit, libc++
|
|
826
|
-
/// - Linux: stdc++, libm
|
|
827
|
-
/// - Windows: gdi32, user32, advapi32
|
|
828
|
-
fn link_system_frameworks(target: &str) {
|
|
829
|
-
if target.contains("darwin") {
|
|
830
|
-
println!("cargo:rustc-link-lib=framework=CoreFoundation");
|
|
831
|
-
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
|
832
|
-
println!("cargo:rustc-link-lib=framework=CoreText");
|
|
833
|
-
println!("cargo:rustc-link-lib=framework=AppKit");
|
|
834
|
-
println!("cargo:rustc-link-lib=dylib=c++");
|
|
835
|
-
} else if target.contains("linux") {
|
|
836
|
-
println!("cargo:rustc-link-lib=dylib=stdc++");
|
|
837
|
-
println!("cargo:rustc-link-lib=dylib=m");
|
|
838
|
-
} else if target.contains("windows") {
|
|
839
|
-
println!("cargo:rustc-link-lib=dylib=gdi32");
|
|
840
|
-
println!("cargo:rustc-link-lib=dylib=user32");
|
|
841
|
-
println!("cargo:rustc-link-lib=dylib=advapi32");
|
|
842
|
-
}
|
|
843
|
-
}
|
|
1
|
+
// Kreuzberg Build Script - PDFium Linking Configuration
|
|
2
|
+
//
|
|
3
|
+
// This build script handles PDFium library downloading and linking for the kreuzberg crate.
|
|
4
|
+
// It supports multiple linking strategies via Cargo features:
|
|
5
|
+
//
|
|
6
|
+
// 1. Default (pdf, bundled-pdfium): Download dynamic library and embed in binary
|
|
7
|
+
// - Self-contained binary that extracts library at runtime
|
|
8
|
+
// - Larger binary size but no external .so dependency
|
|
9
|
+
// - No PDFIUM_*_PATH environment variables needed
|
|
10
|
+
//
|
|
11
|
+
// 2. static-pdfium: Static linking (no runtime dependency)
|
|
12
|
+
// - REQUIRES: PDFIUM_STATIC_LIB_PATH environment variable pointing to libpdfium.a directory
|
|
13
|
+
// - Reason: bblanchon/pdfium-binaries only provides dynamic libraries
|
|
14
|
+
// - Use case: Docker with musl, fully static binaries
|
|
15
|
+
// - Note: libpdfium.a must be obtained separately (e.g., paulocoutinhox/pdfium-lib)
|
|
16
|
+
//
|
|
17
|
+
// 3. system-pdfium: Use system-installed pdfium
|
|
18
|
+
// - Detected via pkg-config or KREUZBERG_PDFIUM_SYSTEM_PATH
|
|
19
|
+
//
|
|
20
|
+
// Environment Variables:
|
|
21
|
+
// - PDFIUM_STATIC_LIB_PATH: Path to directory containing libpdfium.a (for static-pdfium)
|
|
22
|
+
// - KREUZBERG_PDFIUM_PREBUILT: Path to prebuilt pdfium directory (skip download)
|
|
23
|
+
// - KREUZBERG_PDFIUM_SYSTEM_PATH: System pdfium library path (for system-pdfium)
|
|
24
|
+
// - PDFIUM_VERSION: Override version for bblanchon/pdfium-binaries
|
|
25
|
+
// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: Number of download retries (default: 5)
|
|
26
|
+
|
|
27
|
+
use std::env;
|
|
28
|
+
use std::fs;
|
|
29
|
+
use std::io;
|
|
30
|
+
use std::path::{Path, PathBuf};
|
|
31
|
+
use std::process::Command;
|
|
32
|
+
use std::thread;
|
|
33
|
+
use std::time::Duration;
|
|
34
|
+
|
|
35
|
+
/// PDFium linking strategy
|
|
36
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
37
|
+
enum PdfiumLinkStrategy {
|
|
38
|
+
/// Download and link statically (static-pdfium feature)
|
|
39
|
+
DownloadStatic,
|
|
40
|
+
/// Download, link dynamically, and embed in binary (bundled-pdfium feature)
|
|
41
|
+
Bundled,
|
|
42
|
+
/// Use system-installed pdfium via pkg-config (system-pdfium feature)
|
|
43
|
+
System,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// MAIN BUILD ORCHESTRATION
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
fn main() {
|
|
51
|
+
let target = env::var("TARGET").unwrap();
|
|
52
|
+
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
|
53
|
+
|
|
54
|
+
println!("cargo::rustc-check-cfg=cfg(coverage)");
|
|
55
|
+
|
|
56
|
+
// Skip pdfium linking if the pdf feature is not enabled
|
|
57
|
+
if !cfg!(feature = "pdf") {
|
|
58
|
+
tracing::debug!("PDF feature not enabled, skipping pdfium linking");
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let strategy = determine_link_strategy(&target);
|
|
63
|
+
|
|
64
|
+
tracing::debug!("Using PDFium linking strategy: {:?}", strategy);
|
|
65
|
+
|
|
66
|
+
match strategy {
|
|
67
|
+
PdfiumLinkStrategy::DownloadStatic => {
|
|
68
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
69
|
+
link_statically(&pdfium_dir, &target);
|
|
70
|
+
// Skip copy_lib_to_package - library embedded in binary
|
|
71
|
+
}
|
|
72
|
+
PdfiumLinkStrategy::Bundled => {
|
|
73
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
74
|
+
link_bundled(&pdfium_dir, &target, &out_dir);
|
|
75
|
+
// Skip copy_lib_to_package - each binary extracts its own
|
|
76
|
+
}
|
|
77
|
+
PdfiumLinkStrategy::System => {
|
|
78
|
+
link_system(&target);
|
|
79
|
+
// No download or copy needed
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
link_system_frameworks(&target);
|
|
84
|
+
println!("cargo:rerun-if-changed=build.rs");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ============================================================================
|
|
88
|
+
// FEATURE & STRATEGY VALIDATION
|
|
89
|
+
// ============================================================================
|
|
90
|
+
|
|
91
|
+
/// Determine which linking strategy to use based on features and target
|
|
92
|
+
fn determine_link_strategy(target: &str) -> PdfiumLinkStrategy {
|
|
93
|
+
// WASM handling: check for PDFIUM_WASM_LIB environment variable
|
|
94
|
+
if target.contains("wasm") {
|
|
95
|
+
if let Ok(wasm_lib) = env::var("PDFIUM_WASM_LIB") {
|
|
96
|
+
println!("cargo:rustc-link-search=native={}", wasm_lib);
|
|
97
|
+
println!("cargo:rustc-link-lib=static=pdfium");
|
|
98
|
+
return PdfiumLinkStrategy::DownloadStatic;
|
|
99
|
+
}
|
|
100
|
+
// For WASM without explicit PDFIUM_WASM_LIB, use bundled strategy
|
|
101
|
+
// This downloads pdfium-lib which provides WASM-compatible builds
|
|
102
|
+
println!("cargo:warning=WASM build using bundled PDFium (set PDFIUM_WASM_LIB to link custom WASM PDFium)");
|
|
103
|
+
return PdfiumLinkStrategy::Bundled;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
let system_pdfium = cfg!(feature = "system-pdfium");
|
|
107
|
+
let bundled_pdfium = cfg!(feature = "bundled-pdfium");
|
|
108
|
+
let static_pdfium = cfg!(feature = "static-pdfium");
|
|
109
|
+
|
|
110
|
+
let enabled_count = usize::from(system_pdfium) + usize::from(bundled_pdfium) + usize::from(static_pdfium);
|
|
111
|
+
if enabled_count > 1 {
|
|
112
|
+
println!(
|
|
113
|
+
"cargo:warning=Multiple PDFium linking strategies enabled (static-pdfium={}, bundled-pdfium={}, system-pdfium={}); using bundled-pdfium for this build",
|
|
114
|
+
static_pdfium, bundled_pdfium, system_pdfium
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Feature-based strategy selection.
|
|
119
|
+
// Prefer bundled-pdfium when multiple strategies are enabled (e.g. `--all-features`) because it
|
|
120
|
+
// does not require external PDFIUM_STATIC_LIB_PATH and does not depend on a system install.
|
|
121
|
+
if bundled_pdfium {
|
|
122
|
+
return PdfiumLinkStrategy::Bundled;
|
|
123
|
+
}
|
|
124
|
+
if system_pdfium {
|
|
125
|
+
return PdfiumLinkStrategy::System;
|
|
126
|
+
}
|
|
127
|
+
if static_pdfium {
|
|
128
|
+
return PdfiumLinkStrategy::DownloadStatic;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Default: download and link dynamically (bundled-pdfium preferred if pdf not already selected)
|
|
132
|
+
// When only 'pdf' feature is enabled (no linking strategy), default to bundled-pdfium
|
|
133
|
+
PdfiumLinkStrategy::Bundled
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ============================================================================
|
|
137
|
+
// DOWNLOAD & PREBUILT ORCHESTRATION
|
|
138
|
+
// ============================================================================
|
|
139
|
+
|
|
140
|
+
/// Download PDFium or use prebuilt directory
|
|
141
|
+
///
|
|
142
|
+
/// This is the main orchestrator function that:
|
|
143
|
+
/// 1. Checks for `KREUZBERG_PDFIUM_PREBUILT` environment variable
|
|
144
|
+
/// 2. If set and valid, uses prebuilt pdfium directory
|
|
145
|
+
/// 3. If not set, downloads pdfium to out_dir (with caching)
|
|
146
|
+
/// 4. Returns PathBuf to pdfium directory
|
|
147
|
+
///
|
|
148
|
+
/// Reuses all existing helper functions:
|
|
149
|
+
/// - `get_pdfium_url_and_lib()` - determines download URL for target
|
|
150
|
+
/// - `download_and_extract_pdfium()` - downloads with retry logic
|
|
151
|
+
/// - `runtime_library_info()` - platform-specific library names
|
|
152
|
+
/// - `prepare_prebuilt_pdfium()` - handles prebuilt copy
|
|
153
|
+
fn download_or_use_prebuilt(target: &str, out_dir: &Path) -> PathBuf {
|
|
154
|
+
let (download_url, _lib_name) = get_pdfium_url_and_lib(target);
|
|
155
|
+
let pdfium_dir = out_dir.join("pdfium");
|
|
156
|
+
|
|
157
|
+
// Check for prebuilt pdfium directory
|
|
158
|
+
if let Some(prebuilt) = env::var_os("KREUZBERG_PDFIUM_PREBUILT") {
|
|
159
|
+
let prebuilt_path = PathBuf::from(prebuilt);
|
|
160
|
+
if prebuilt_path.exists() {
|
|
161
|
+
prepare_prebuilt_pdfium(&prebuilt_path, &pdfium_dir)
|
|
162
|
+
.unwrap_or_else(|err| panic!("Failed to copy Pdfium from {}: {}", prebuilt_path.display(), err));
|
|
163
|
+
if target.contains("windows") {
|
|
164
|
+
ensure_windows_import_library(&pdfium_dir);
|
|
165
|
+
}
|
|
166
|
+
return pdfium_dir;
|
|
167
|
+
} else {
|
|
168
|
+
panic!(
|
|
169
|
+
"Environment variable KREUZBERG_PDFIUM_PREBUILT points to '{}' but the directory does not exist",
|
|
170
|
+
prebuilt_path.display()
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Check if library already exists (cache validation) using flexible detection
|
|
176
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
177
|
+
let lib_found = find_pdfium_library(&pdfium_dir, &runtime_lib_name, runtime_subdir).is_ok();
|
|
178
|
+
|
|
179
|
+
let import_lib_exists = if target.contains("windows") {
|
|
180
|
+
let lib_dir = pdfium_dir.join("lib");
|
|
181
|
+
lib_dir.join("pdfium.lib").exists() || lib_dir.join("pdfium.dll.lib").exists()
|
|
182
|
+
} else {
|
|
183
|
+
true
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
if !lib_found || !import_lib_exists {
|
|
187
|
+
tracing::debug!("Pdfium library not found, downloading for target: {}", target);
|
|
188
|
+
tracing::debug!("Download URL: {}", download_url);
|
|
189
|
+
download_and_extract_pdfium(&download_url, &pdfium_dir);
|
|
190
|
+
} else {
|
|
191
|
+
tracing::debug!("Pdfium library already cached at {}", pdfium_dir.display());
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Windows-specific: ensure pdfium.lib exists
|
|
195
|
+
if target.contains("windows") {
|
|
196
|
+
ensure_windows_import_library(&pdfium_dir);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
pdfium_dir
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
fn ensure_windows_import_library(pdfium_dir: &Path) {
|
|
203
|
+
let lib_dir = pdfium_dir.join("lib");
|
|
204
|
+
let dll_lib = lib_dir.join("pdfium.dll.lib");
|
|
205
|
+
let expected_lib = lib_dir.join("pdfium.lib");
|
|
206
|
+
|
|
207
|
+
if dll_lib.exists() && !expected_lib.exists() {
|
|
208
|
+
tracing::debug!(
|
|
209
|
+
"Ensuring Windows import library at {} (source: {})",
|
|
210
|
+
expected_lib.display(),
|
|
211
|
+
dll_lib.display()
|
|
212
|
+
);
|
|
213
|
+
fs::copy(&dll_lib, &expected_lib).unwrap_or_else(|err| {
|
|
214
|
+
panic!(
|
|
215
|
+
"Failed to copy Windows import library from {} to {}: {}",
|
|
216
|
+
dll_lib.display(),
|
|
217
|
+
expected_lib.display(),
|
|
218
|
+
err
|
|
219
|
+
)
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// ============================================================================
|
|
225
|
+
// DOWNLOAD UTILITIES
|
|
226
|
+
// ============================================================================
|
|
227
|
+
|
|
228
|
+
/// Fetch the latest release version from a GitHub repository
|
|
229
|
+
///
|
|
230
|
+
/// Uses curl to query the GitHub API and extract the tag_name from the
|
|
231
|
+
/// latest release JSON response. Falls back to "7529" if API call fails.
|
|
232
|
+
fn get_latest_version(repo: &str) -> String {
|
|
233
|
+
let api_url = format!("https://api.github.com/repos/{}/releases/latest", repo);
|
|
234
|
+
|
|
235
|
+
let output = Command::new("curl").args(["-s", &api_url]).output();
|
|
236
|
+
|
|
237
|
+
if let Ok(output) = output
|
|
238
|
+
&& output.status.success()
|
|
239
|
+
{
|
|
240
|
+
let json = String::from_utf8_lossy(&output.stdout);
|
|
241
|
+
if let Some(start) = json.find("\"tag_name\":") {
|
|
242
|
+
let after_colon = &json[start + "\"tag_name\":".len()..];
|
|
243
|
+
if let Some(opening_quote) = after_colon.find('"')
|
|
244
|
+
&& let Some(closing_quote) = after_colon[opening_quote + 1..].find('"')
|
|
245
|
+
{
|
|
246
|
+
let tag_start = opening_quote + 1;
|
|
247
|
+
let tag = &after_colon[tag_start..tag_start + closing_quote];
|
|
248
|
+
return tag.split('/').next_back().unwrap_or(tag).to_string();
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
"7529".to_string()
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/// Get the download URL and library name for the target platform
|
|
257
|
+
///
|
|
258
|
+
/// Determines platform/architecture from target triple and constructs
|
|
259
|
+
/// the appropriate GitHub release download URL. Supports:
|
|
260
|
+
/// - WASM: paulocoutinhox/pdfium-lib
|
|
261
|
+
/// - Other platforms: bblanchon/pdfium-binaries
|
|
262
|
+
fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
263
|
+
if target.contains("wasm") {
|
|
264
|
+
let version = env::var("PDFIUM_WASM_VERSION")
|
|
265
|
+
.ok()
|
|
266
|
+
.filter(|v| !v.is_empty())
|
|
267
|
+
.unwrap_or_else(|| get_latest_version("paulocoutinhox/pdfium-lib"));
|
|
268
|
+
tracing::debug!("Using pdfium-lib version: {}", version);
|
|
269
|
+
|
|
270
|
+
// WASM builds use a single 'wasm.tgz' asset regardless of architecture
|
|
271
|
+
// The archive contains both wasm32 and wasm64 if available
|
|
272
|
+
return (
|
|
273
|
+
format!(
|
|
274
|
+
"https://github.com/paulocoutinhox/pdfium-lib/releases/download/{}/wasm.tgz",
|
|
275
|
+
version
|
|
276
|
+
),
|
|
277
|
+
"pdfium".to_string(),
|
|
278
|
+
);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
let (platform, arch) = if target.contains("darwin") {
|
|
282
|
+
let arch = if target.contains("aarch64") { "arm64" } else { "x64" };
|
|
283
|
+
("mac", arch)
|
|
284
|
+
} else if target.contains("linux") {
|
|
285
|
+
let arch = if target.contains("aarch64") {
|
|
286
|
+
"arm64"
|
|
287
|
+
} else if target.contains("arm") {
|
|
288
|
+
"arm"
|
|
289
|
+
} else {
|
|
290
|
+
"x64"
|
|
291
|
+
};
|
|
292
|
+
("linux", arch)
|
|
293
|
+
} else if target.contains("windows") {
|
|
294
|
+
let arch = if target.contains("aarch64") {
|
|
295
|
+
"arm64"
|
|
296
|
+
} else if target.contains("i686") {
|
|
297
|
+
"x86"
|
|
298
|
+
} else {
|
|
299
|
+
"x64"
|
|
300
|
+
};
|
|
301
|
+
("win", arch)
|
|
302
|
+
} else {
|
|
303
|
+
panic!("Unsupported target platform: {}", target);
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
let version = env::var("PDFIUM_VERSION")
|
|
307
|
+
.ok()
|
|
308
|
+
.filter(|v| !v.is_empty())
|
|
309
|
+
.unwrap_or_else(|| get_latest_version("bblanchon/pdfium-binaries"));
|
|
310
|
+
tracing::debug!("Using pdfium-binaries version: {}", version);
|
|
311
|
+
|
|
312
|
+
let url = format!(
|
|
313
|
+
"https://github.com/bblanchon/pdfium-binaries/releases/download/chromium/{}/pdfium-{}-{}.tgz",
|
|
314
|
+
version, platform, arch
|
|
315
|
+
);
|
|
316
|
+
|
|
317
|
+
(url, "pdfium".to_string())
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/// Download and extract PDFium archive with retry logic
|
|
321
|
+
///
|
|
322
|
+
/// Features:
|
|
323
|
+
/// - Exponential backoff retry (configurable via env vars)
|
|
324
|
+
/// - File type validation (gzip check)
|
|
325
|
+
/// - Windows-specific import library handling (pdfium.dll.lib -> pdfium.lib)
|
|
326
|
+
/// - Environment variables:
|
|
327
|
+
/// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: number of retries (default: 5)
|
|
328
|
+
/// - KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS: initial backoff in seconds (default: 2)
|
|
329
|
+
fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
|
|
330
|
+
fs::create_dir_all(dest_dir).expect("Failed to create pdfium directory");
|
|
331
|
+
|
|
332
|
+
let archive_path = dest_dir.join("pdfium.tar.gz");
|
|
333
|
+
let retries = env::var("KREUZBERG_PDFIUM_DOWNLOAD_RETRIES")
|
|
334
|
+
.ok()
|
|
335
|
+
.and_then(|value| value.parse::<u32>().ok())
|
|
336
|
+
.filter(|value| *value > 0)
|
|
337
|
+
.unwrap_or(5);
|
|
338
|
+
let base_delay = env::var("KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS")
|
|
339
|
+
.ok()
|
|
340
|
+
.and_then(|value| value.parse::<u64>().ok())
|
|
341
|
+
.filter(|value| *value > 0)
|
|
342
|
+
.unwrap_or(2);
|
|
343
|
+
|
|
344
|
+
let archive_path_str = archive_path
|
|
345
|
+
.to_str()
|
|
346
|
+
.unwrap_or_else(|| panic!("Non-UTF8 path for archive: {}", archive_path.display()));
|
|
347
|
+
let mut last_error = String::new();
|
|
348
|
+
|
|
349
|
+
for attempt in 1..=retries {
|
|
350
|
+
let _ = fs::remove_file(&archive_path);
|
|
351
|
+
tracing::debug!(
|
|
352
|
+
"Downloading Pdfium archive from: {} (attempt {}/{})",
|
|
353
|
+
url,
|
|
354
|
+
attempt,
|
|
355
|
+
retries
|
|
356
|
+
);
|
|
357
|
+
|
|
358
|
+
let status = Command::new("curl")
|
|
359
|
+
.args(["-f", "-L", "-o", archive_path_str, url])
|
|
360
|
+
.status();
|
|
361
|
+
|
|
362
|
+
match status {
|
|
363
|
+
Ok(code) if code.success() => {
|
|
364
|
+
last_error.clear();
|
|
365
|
+
break;
|
|
366
|
+
}
|
|
367
|
+
Ok(code) => {
|
|
368
|
+
last_error = format!("curl exited with {:?}", code.code());
|
|
369
|
+
}
|
|
370
|
+
Err(err) => {
|
|
371
|
+
last_error = format!("failed to spawn curl: {err}");
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if attempt == retries {
|
|
376
|
+
panic!(
|
|
377
|
+
"Failed to download Pdfium from {} after {} attempts. Last error: {}",
|
|
378
|
+
url, retries, last_error
|
|
379
|
+
);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
let exponent = u32::min(attempt, 5);
|
|
383
|
+
let multiplier = 1u64 << exponent;
|
|
384
|
+
let delay_secs = base_delay.saturating_mul(multiplier).min(30);
|
|
385
|
+
println!(
|
|
386
|
+
"cargo:warning=Pdfium download failed (attempt {}/{}) - {}. Retrying in {}s",
|
|
387
|
+
attempt, retries, last_error, delay_secs
|
|
388
|
+
);
|
|
389
|
+
thread::sleep(Duration::from_secs(delay_secs));
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
let file_type = Command::new("file")
|
|
393
|
+
.arg(archive_path.to_str().unwrap())
|
|
394
|
+
.output()
|
|
395
|
+
.expect("Failed to check file type");
|
|
396
|
+
|
|
397
|
+
let file_type_output = String::from_utf8_lossy(&file_type.stdout);
|
|
398
|
+
tracing::debug!("Downloaded file type: {}", file_type_output.trim());
|
|
399
|
+
|
|
400
|
+
if !file_type_output.to_lowercase().contains("gzip") && !file_type_output.to_lowercase().contains("compressed") {
|
|
401
|
+
fs::remove_file(&archive_path).ok();
|
|
402
|
+
panic!(
|
|
403
|
+
"Downloaded file is not a valid gzip archive. URL may be incorrect or version unavailable: {}",
|
|
404
|
+
url
|
|
405
|
+
);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
tracing::debug!("Extracting Pdfium archive...");
|
|
409
|
+
let status = Command::new("tar")
|
|
410
|
+
.args(["-xzf", archive_path.to_str().unwrap(), "-C", dest_dir.to_str().unwrap()])
|
|
411
|
+
.status()
|
|
412
|
+
.expect("Failed to execute tar");
|
|
413
|
+
|
|
414
|
+
if !status.success() {
|
|
415
|
+
fs::remove_file(&archive_path).ok();
|
|
416
|
+
panic!("Failed to extract Pdfium archive from {}", url);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
fs::remove_file(&archive_path).ok();
|
|
420
|
+
|
|
421
|
+
let target = env::var("TARGET").unwrap();
|
|
422
|
+
if target.contains("windows") {
|
|
423
|
+
let lib_dir = dest_dir.join("lib");
|
|
424
|
+
let dll_lib = lib_dir.join("pdfium.dll.lib");
|
|
425
|
+
let expected_lib = lib_dir.join("pdfium.lib");
|
|
426
|
+
|
|
427
|
+
if dll_lib.exists() {
|
|
428
|
+
tracing::debug!("Ensuring Windows import library at {}", expected_lib.display());
|
|
429
|
+
if let Err(err) = fs::copy(&dll_lib, &expected_lib) {
|
|
430
|
+
panic!("Failed to copy pdfium.dll.lib to pdfium.lib: {err}");
|
|
431
|
+
}
|
|
432
|
+
} else {
|
|
433
|
+
tracing::debug!("Warning: Expected {} not found after extraction", dll_lib.display());
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
tracing::debug!("Pdfium downloaded and extracted successfully");
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// ============================================================================
|
|
441
|
+
// PREBUILT HANDLING
|
|
442
|
+
// ============================================================================
|
|
443
|
+
|
|
444
|
+
/// Prepare prebuilt PDFium by copying to destination directory
|
|
445
|
+
///
|
|
446
|
+
/// Removes existing destination if present, then recursively copies
|
|
447
|
+
/// all files from prebuilt source to destination.
|
|
448
|
+
fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
|
|
449
|
+
if dest_dir.exists() {
|
|
450
|
+
fs::remove_dir_all(dest_dir)?;
|
|
451
|
+
}
|
|
452
|
+
copy_dir_all(prebuilt_src, dest_dir)
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/// Recursively copy directory tree
|
|
456
|
+
///
|
|
457
|
+
/// Used by `prepare_prebuilt_pdfium()` to copy entire pdfium directory
|
|
458
|
+
/// structure, preserving all files and subdirectories.
|
|
459
|
+
fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
|
|
460
|
+
fs::create_dir_all(dst)?;
|
|
461
|
+
for entry in fs::read_dir(src)? {
|
|
462
|
+
let entry = entry?;
|
|
463
|
+
let file_type = entry.file_type()?;
|
|
464
|
+
let target_path = dst.join(entry.file_name());
|
|
465
|
+
if file_type.is_dir() {
|
|
466
|
+
copy_dir_all(&entry.path(), &target_path)?;
|
|
467
|
+
} else {
|
|
468
|
+
fs::copy(entry.path(), &target_path)?;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
Ok(())
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// ============================================================================
|
|
475
|
+
// PLATFORM UTILITIES
|
|
476
|
+
// ============================================================================
|
|
477
|
+
|
|
478
|
+
/// Get platform-specific runtime library name and subdirectory
|
|
479
|
+
///
|
|
480
|
+
/// Returns tuple of (library_name, subdirectory) for the target platform:
|
|
481
|
+
/// - WASM: ("libpdfium.a", "release/lib")
|
|
482
|
+
/// - Windows: ("pdfium.dll", "bin")
|
|
483
|
+
/// - macOS: ("libpdfium.dylib", "lib")
|
|
484
|
+
/// - Linux: ("libpdfium.so", "lib")
|
|
485
|
+
fn runtime_library_info(target: &str) -> (String, &'static str) {
|
|
486
|
+
if target.contains("wasm") {
|
|
487
|
+
// pdfium-lib `wasm.tgz` extracts into `release/lib/libpdfium.a`
|
|
488
|
+
("libpdfium.a".to_string(), "release/lib")
|
|
489
|
+
} else if target.contains("windows") {
|
|
490
|
+
("pdfium.dll".to_string(), "bin")
|
|
491
|
+
} else if target.contains("darwin") {
|
|
492
|
+
("libpdfium.dylib".to_string(), "lib")
|
|
493
|
+
} else {
|
|
494
|
+
("libpdfium.so".to_string(), "lib")
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/// Find PDFium library in archive with flexible directory detection
|
|
499
|
+
///
|
|
500
|
+
/// Attempts to locate the library at multiple possible locations:
|
|
501
|
+
/// - {subdir}/{lib_name} (standard location)
|
|
502
|
+
/// - {lib_name} (root of archive)
|
|
503
|
+
/// - bin/{lib_name} (alternative location)
|
|
504
|
+
/// - lib/{lib_name} (explicit lib directory)
|
|
505
|
+
///
|
|
506
|
+
/// This handles variations in archive structure across different platform builds,
|
|
507
|
+
/// particularly macOS ARM64 where the archive structure may differ.
|
|
508
|
+
///
|
|
509
|
+
/// Returns the full path to the library if found, or an error with available files.
|
|
510
|
+
fn find_pdfium_library(pdfium_dir: &Path, lib_name: &str, expected_subdir: &str) -> Result<PathBuf, String> {
|
|
511
|
+
// Candidates in priority order
|
|
512
|
+
let candidates = [
|
|
513
|
+
pdfium_dir.join(expected_subdir).join(lib_name), // Standard: lib/libpdfium.dylib
|
|
514
|
+
pdfium_dir.join(lib_name), // Root: libpdfium.dylib
|
|
515
|
+
pdfium_dir.join("bin").join(lib_name), // Alternative: bin/libpdfium.dylib
|
|
516
|
+
pdfium_dir.join("lib").join(lib_name), // Explicit lib: lib/libpdfium.dylib
|
|
517
|
+
];
|
|
518
|
+
|
|
519
|
+
// Try each candidate
|
|
520
|
+
for candidate in &candidates {
|
|
521
|
+
if candidate.exists() {
|
|
522
|
+
tracing::debug!("Found PDFium library at: {}", candidate.display());
|
|
523
|
+
return Ok(candidate.clone());
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// Library not found - provide detailed error with directory listing
|
|
528
|
+
let mut error_msg = format!(
|
|
529
|
+
"PDFium library not found at expected location: {}/{}\n\n",
|
|
530
|
+
pdfium_dir.display(),
|
|
531
|
+
expected_subdir
|
|
532
|
+
);
|
|
533
|
+
error_msg.push_str("Attempted locations:\n");
|
|
534
|
+
for candidate in &candidates {
|
|
535
|
+
error_msg.push_str(&format!(" - {}\n", candidate.display()));
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// List actual contents of pdfium directory for debugging
|
|
539
|
+
error_msg.push_str("\nActual archive contents:\n");
|
|
540
|
+
if let Ok(entries) = fs::read_dir(pdfium_dir) {
|
|
541
|
+
for entry in entries.flatten() {
|
|
542
|
+
let path = entry.path();
|
|
543
|
+
let file_type = if path.is_dir() { "dir" } else { "file" };
|
|
544
|
+
error_msg.push_str(&format!(" {} ({})\n", path.display(), file_type));
|
|
545
|
+
|
|
546
|
+
// Show contents of subdirectories
|
|
547
|
+
if path.is_dir()
|
|
548
|
+
&& let Ok(sub_entries) = fs::read_dir(&path)
|
|
549
|
+
{
|
|
550
|
+
for sub_entry in sub_entries.flatten() {
|
|
551
|
+
let sub_path = sub_entry.path();
|
|
552
|
+
let sub_type = if sub_path.is_dir() { "dir" } else { "file" };
|
|
553
|
+
error_msg.push_str(&format!(" {} ({})\n", sub_path.display(), sub_type));
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
Err(error_msg)
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// ============================================================================
|
|
563
|
+
// LINKING STRATEGIES
|
|
564
|
+
// ============================================================================
|
|
565
|
+
|
|
566
|
+
/// Link PDFium dynamically (default)
|
|
567
|
+
///
|
|
568
|
+
/// Sets up linker to use PDFium as a dynamic library (.dylib/.so/.dll)
|
|
569
|
+
/// with platform-specific rpath configuration for runtime library discovery.
|
|
570
|
+
/// Supports flexible archive structures by adding multiple possible lib directories.
|
|
571
|
+
fn link_dynamically(pdfium_dir: &Path, target: &str) {
|
|
572
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
573
|
+
|
|
574
|
+
// Find the actual library location (handles multiple possible archive structures)
|
|
575
|
+
let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
576
|
+
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
577
|
+
Err(err) => panic!("{}", err),
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
581
|
+
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
582
|
+
|
|
583
|
+
// Also add standard lib directory for compatibility
|
|
584
|
+
let std_lib_dir = pdfium_dir.join("lib");
|
|
585
|
+
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
586
|
+
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// Add bin directory for platforms where it might be needed
|
|
590
|
+
let bin_dir = pdfium_dir.join("bin");
|
|
591
|
+
if bin_dir.exists() && bin_dir != lib_path {
|
|
592
|
+
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Set rpath for dynamic linking
|
|
596
|
+
if target.contains("darwin") {
|
|
597
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
598
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
599
|
+
} else if target.contains("linux") {
|
|
600
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
601
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
/// Link PDFium statically (static-pdfium feature)
|
|
606
|
+
///
|
|
607
|
+
/// Embeds PDFium into the binary as a static library. Adds system
|
|
608
|
+
/// dependencies required for static linking on Linux.
|
|
609
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
610
|
+
///
|
|
611
|
+
/// Environment Variables:
|
|
612
|
+
/// - `PDFIUM_STATIC_LIB_PATH`: Path to directory containing libpdfium.a (for Docker/musl builds)
|
|
613
|
+
///
|
|
614
|
+
/// Note: bblanchon/pdfium-binaries only provides dynamic libraries.
|
|
615
|
+
/// On macOS, this will fallback to dynamic linking with a warning.
|
|
616
|
+
/// On Linux, you must provide PDFIUM_STATIC_LIB_PATH pointing to a static build.
|
|
617
|
+
fn link_statically(pdfium_dir: &Path, target: &str) {
|
|
618
|
+
// For static linking, we need libpdfium.a (not .dylib or .so)
|
|
619
|
+
let static_lib_name = "libpdfium.a";
|
|
620
|
+
let lib_subdir = if target.contains("wasm") { "release/lib" } else { "lib" };
|
|
621
|
+
|
|
622
|
+
// First, check if user provided a static library path via environment variable
|
|
623
|
+
if let Ok(custom_path) = env::var("PDFIUM_STATIC_LIB_PATH") {
|
|
624
|
+
let custom_lib_dir = PathBuf::from(&custom_path);
|
|
625
|
+
|
|
626
|
+
if !custom_lib_dir.exists() {
|
|
627
|
+
panic!(
|
|
628
|
+
"PDFIUM_STATIC_LIB_PATH points to '{}' but the directory does not exist",
|
|
629
|
+
custom_path
|
|
630
|
+
);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
let custom_lib = custom_lib_dir.join(static_lib_name);
|
|
634
|
+
if !custom_lib.exists() {
|
|
635
|
+
panic!(
|
|
636
|
+
"PDFIUM_STATIC_LIB_PATH points to '{}' but {} not found.\n\
|
|
637
|
+
Expected to find: {}",
|
|
638
|
+
custom_path,
|
|
639
|
+
static_lib_name,
|
|
640
|
+
custom_lib.display()
|
|
641
|
+
);
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
tracing::debug!("Using custom static PDFium from: {}", custom_lib.display());
|
|
645
|
+
println!("cargo:rustc-link-search=native={}", custom_lib_dir.display());
|
|
646
|
+
println!("cargo:rustc-link-lib=static=pdfium");
|
|
647
|
+
|
|
648
|
+
// Static linking requires additional system dependencies
|
|
649
|
+
if target.contains("linux") {
|
|
650
|
+
println!("cargo:rustc-link-lib=dylib=pthread");
|
|
651
|
+
println!("cargo:rustc-link-lib=dylib=dl");
|
|
652
|
+
} else if target.contains("windows") {
|
|
653
|
+
println!("cargo:rustc-link-lib=dylib=ws2_32");
|
|
654
|
+
println!("cargo:rustc-link-lib=dylib=userenv");
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
return;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// Find the actual library location (handles multiple possible archive structures)
|
|
661
|
+
let lib_path = match find_pdfium_library(pdfium_dir, static_lib_name, lib_subdir) {
|
|
662
|
+
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
663
|
+
Err(_err) => {
|
|
664
|
+
// Static library not found - check if we're on macOS and can fallback
|
|
665
|
+
if target.contains("darwin") {
|
|
666
|
+
eprintln!("cargo:warning=Static PDFium library (libpdfium.a) not found for macOS.");
|
|
667
|
+
eprintln!("cargo:warning=bblanchon/pdfium-binaries only provides dynamic libraries.");
|
|
668
|
+
eprintln!("cargo:warning=Falling back to dynamic linking for local development.");
|
|
669
|
+
eprintln!("cargo:warning=Production Linux builds require PDFIUM_STATIC_LIB_PATH.");
|
|
670
|
+
|
|
671
|
+
// Fallback to dynamic linking on macOS
|
|
672
|
+
link_dynamically(pdfium_dir, target);
|
|
673
|
+
return;
|
|
674
|
+
} else {
|
|
675
|
+
// On Linux/Windows, provide helpful error with actionable steps
|
|
676
|
+
panic!(
|
|
677
|
+
"Static PDFium library (libpdfium.a) not found.\n\n\
|
|
678
|
+
bblanchon/pdfium-binaries only provides dynamic libraries.\n\n\
|
|
679
|
+
For static linking (required for Docker with musl), you must:\n\n\
|
|
680
|
+
1. Build static PDFium or obtain from a source that provides it\n\
|
|
681
|
+
- See: https://github.com/ajrcarey/pdfium-render/issues/53\n\
|
|
682
|
+
- Or use: https://github.com/paulocoutinhox/pdfium-lib (provides static builds)\n\n\
|
|
683
|
+
2. Set environment variable pointing to the directory containing libpdfium.a:\n\
|
|
684
|
+
export PDFIUM_STATIC_LIB_PATH=/path/to/pdfium/lib\n\n\
|
|
685
|
+
3. Or use alternative features:\n\
|
|
686
|
+
- 'pdf' (dynamic linking, requires .so at runtime)\n\
|
|
687
|
+
- 'bundled-pdfium' (embeds dynamic library in binary)\n\
|
|
688
|
+
- 'system-pdfium' (use system-installed pdfium)\n\n\
|
|
689
|
+
Example Dockerfile pattern:\n\
|
|
690
|
+
FROM alpine:latest as pdfium-builder\n\
|
|
691
|
+
# Download/build static libpdfium.a\n\
|
|
692
|
+
\n\
|
|
693
|
+
FROM rust:alpine as builder\n\
|
|
694
|
+
ENV PDFIUM_STATIC_LIB_PATH=/pdfium/lib\n\
|
|
695
|
+
COPY --from=pdfium-builder /path/to/libpdfium.a /pdfium/lib/"
|
|
696
|
+
);
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
};
|
|
700
|
+
|
|
701
|
+
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
702
|
+
println!("cargo:rustc-link-lib=static=pdfium");
|
|
703
|
+
|
|
704
|
+
// Also add standard lib directory for compatibility
|
|
705
|
+
let std_lib_dir = pdfium_dir.join("lib");
|
|
706
|
+
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
707
|
+
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Add bin directory for platforms where it might be needed
|
|
711
|
+
let bin_dir = pdfium_dir.join("bin");
|
|
712
|
+
if bin_dir.exists() && bin_dir != lib_path {
|
|
713
|
+
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
// Static linking requires additional system dependencies
|
|
717
|
+
if target.contains("linux") {
|
|
718
|
+
println!("cargo:rustc-link-lib=dylib=pthread");
|
|
719
|
+
println!("cargo:rustc-link-lib=dylib=dl");
|
|
720
|
+
} else if target.contains("windows") {
|
|
721
|
+
println!("cargo:rustc-link-lib=dylib=ws2_32");
|
|
722
|
+
println!("cargo:rustc-link-lib=dylib=userenv");
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
/// Link PDFium bundled (bundled-pdfium feature)
|
|
727
|
+
///
|
|
728
|
+
/// Links dynamically but copies library to OUT_DIR for embedding in binary.
|
|
729
|
+
/// Each binary extracts and uses its own copy of the PDFium library.
|
|
730
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
731
|
+
///
|
|
732
|
+
/// For WASM targets, links statically using the bundled static library.
|
|
733
|
+
fn link_bundled(pdfium_dir: &Path, target: &str, out_dir: &Path) {
|
|
734
|
+
// Copy library to OUT_DIR for bundling using flexible detection
|
|
735
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
736
|
+
let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
737
|
+
Ok(path) => path,
|
|
738
|
+
Err(err) => panic!("{}", err),
|
|
739
|
+
};
|
|
740
|
+
let bundled_lib = out_dir.join(&runtime_lib_name);
|
|
741
|
+
|
|
742
|
+
fs::copy(&src_lib, &bundled_lib)
|
|
743
|
+
.unwrap_or_else(|err| panic!("Failed to copy library to OUT_DIR for bundling: {}", err));
|
|
744
|
+
|
|
745
|
+
// Emit environment variable with bundled library path
|
|
746
|
+
let bundled_path = bundled_lib
|
|
747
|
+
.to_str()
|
|
748
|
+
.unwrap_or_else(|| panic!("Non-UTF8 path for bundled library: {}", bundled_lib.display()));
|
|
749
|
+
println!("cargo:rustc-env=KREUZBERG_PDFIUM_BUNDLED_PATH={}", bundled_path);
|
|
750
|
+
|
|
751
|
+
// For WASM, link statically using the bundled library
|
|
752
|
+
if target.contains("wasm") {
|
|
753
|
+
let lib_dir = bundled_lib
|
|
754
|
+
.parent()
|
|
755
|
+
.unwrap_or_else(|| panic!("Invalid bundled library path: {}", bundled_lib.display()));
|
|
756
|
+
println!("cargo:rustc-link-search=native={}", lib_dir.display());
|
|
757
|
+
println!("cargo:rustc-link-lib=static=pdfium");
|
|
758
|
+
tracing::debug!("Bundled PDFium static library linked for WASM at: {}", bundled_path);
|
|
759
|
+
} else {
|
|
760
|
+
tracing::debug!("Bundled PDFium library at: {}", bundled_path);
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
/// Link system-installed PDFium (system-pdfium feature)
|
|
765
|
+
///
|
|
766
|
+
/// Attempts to find PDFium via pkg-config first, then falls back to
|
|
767
|
+
/// environment variables (KREUZBERG_PDFIUM_SYSTEM_PATH, KREUZBERG_PDFIUM_SYSTEM_INCLUDE).
|
|
768
|
+
fn link_system(_target: &str) {
|
|
769
|
+
// Try pkg-config first
|
|
770
|
+
match pkg_config::Config::new().atleast_version("5.0").probe("pdfium") {
|
|
771
|
+
Ok(library) => {
|
|
772
|
+
tracing::debug!("Found system pdfium via pkg-config");
|
|
773
|
+
for include_path in &library.include_paths {
|
|
774
|
+
println!("cargo:include={}", include_path.display());
|
|
775
|
+
}
|
|
776
|
+
return;
|
|
777
|
+
}
|
|
778
|
+
Err(err) => {
|
|
779
|
+
tracing::debug!("pkg-config probe failed: {}", err);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
// Fallback to environment variables
|
|
784
|
+
let lib_path = env::var("KREUZBERG_PDFIUM_SYSTEM_PATH").ok();
|
|
785
|
+
let include_path = env::var("KREUZBERG_PDFIUM_SYSTEM_INCLUDE").ok();
|
|
786
|
+
|
|
787
|
+
if let Some(lib_dir) = lib_path {
|
|
788
|
+
let lib_dir_path = PathBuf::from(&lib_dir);
|
|
789
|
+
if !lib_dir_path.exists() {
|
|
790
|
+
panic!(
|
|
791
|
+
"KREUZBERG_PDFIUM_SYSTEM_PATH points to '{}' but the directory does not exist",
|
|
792
|
+
lib_dir
|
|
793
|
+
);
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
println!("cargo:rustc-link-search=native={}", lib_dir);
|
|
797
|
+
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
798
|
+
|
|
799
|
+
if let Some(inc_dir) = include_path {
|
|
800
|
+
println!("cargo:include={}", inc_dir);
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
tracing::debug!("Using system pdfium from: {}", lib_dir);
|
|
804
|
+
return;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// No system pdfium found
|
|
808
|
+
panic!(
|
|
809
|
+
"system-pdfium feature enabled but pdfium not found.\n\
|
|
810
|
+
\n\
|
|
811
|
+
Please install pdfium system-wide or provide:\n\
|
|
812
|
+
- KREUZBERG_PDFIUM_SYSTEM_PATH: path to directory containing libpdfium\n\
|
|
813
|
+
- KREUZBERG_PDFIUM_SYSTEM_INCLUDE: path to pdfium headers (optional)\n\
|
|
814
|
+
\n\
|
|
815
|
+
Alternatively, use a different linking strategy:\n\
|
|
816
|
+
- Default (dynamic): cargo build --features pdf\n\
|
|
817
|
+
- Static linking: cargo build --features pdf,static-pdfium\n\
|
|
818
|
+
- Bundled: cargo build --features pdf,bundled-pdfium"
|
|
819
|
+
);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
/// Link system frameworks and standard libraries
|
|
823
|
+
///
|
|
824
|
+
/// Adds platform-specific system libraries required for PDFium linking:
|
|
825
|
+
/// - macOS: CoreFoundation, CoreGraphics, CoreText, AppKit, libc++
|
|
826
|
+
/// - Linux: stdc++, libm
|
|
827
|
+
/// - Windows: gdi32, user32, advapi32
|
|
828
|
+
fn link_system_frameworks(target: &str) {
|
|
829
|
+
if target.contains("darwin") {
|
|
830
|
+
println!("cargo:rustc-link-lib=framework=CoreFoundation");
|
|
831
|
+
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
|
832
|
+
println!("cargo:rustc-link-lib=framework=CoreText");
|
|
833
|
+
println!("cargo:rustc-link-lib=framework=AppKit");
|
|
834
|
+
println!("cargo:rustc-link-lib=dylib=c++");
|
|
835
|
+
} else if target.contains("linux") {
|
|
836
|
+
println!("cargo:rustc-link-lib=dylib=stdc++");
|
|
837
|
+
println!("cargo:rustc-link-lib=dylib=m");
|
|
838
|
+
} else if target.contains("windows") {
|
|
839
|
+
println!("cargo:rustc-link-lib=dylib=gdi32");
|
|
840
|
+
println!("cargo:rustc-link-lib=dylib=user32");
|
|
841
|
+
println!("cargo:rustc-link-lib=dylib=advapi32");
|
|
842
|
+
}
|
|
843
|
+
}
|