kreuzberg 4.0.0.pre.rc.6 → 4.0.0.pre.rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/README.md +15 -9
- data/ext/kreuzberg_rb/native/.cargo/config.toml +2 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +511 -325
- data/ext/kreuzberg_rb/native/Cargo.toml +13 -3
- data/ext/kreuzberg_rb/native/src/lib.rs +139 -2
- data/kreuzberg.gemspec +38 -4
- data/lib/kreuzberg/config.rb +34 -1
- data/lib/kreuzberg/result.rb +77 -14
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +23 -6
- data/vendor/kreuzberg/Cargo.toml +32 -11
- data/vendor/kreuzberg/README.md +54 -8
- data/vendor/kreuzberg/build.rs +549 -132
- data/vendor/kreuzberg/src/chunking/mod.rs +1279 -79
- data/vendor/kreuzberg/src/chunking/processor.rs +220 -0
- data/vendor/kreuzberg/src/core/config.rs +49 -1
- data/vendor/kreuzberg/src/core/extractor.rs +134 -2
- data/vendor/kreuzberg/src/core/mod.rs +4 -2
- data/vendor/kreuzberg/src/core/pipeline.rs +188 -1
- data/vendor/kreuzberg/src/extraction/docx.rs +358 -0
- data/vendor/kreuzberg/src/extraction/html.rs +24 -8
- data/vendor/kreuzberg/src/extraction/image.rs +124 -1
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +1 -2
- data/vendor/kreuzberg/src/extraction/office_metadata/odt_properties.rs +0 -3
- data/vendor/kreuzberg/src/extraction/pptx.rs +187 -87
- data/vendor/kreuzberg/src/extractors/archive.rs +1 -0
- data/vendor/kreuzberg/src/extractors/bibtex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/docbook.rs +2 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +50 -17
- data/vendor/kreuzberg/src/extractors/email.rs +29 -15
- data/vendor/kreuzberg/src/extractors/epub.rs +1 -0
- data/vendor/kreuzberg/src/extractors/excel.rs +2 -0
- data/vendor/kreuzberg/src/extractors/fictionbook.rs +1 -0
- data/vendor/kreuzberg/src/extractors/html.rs +29 -15
- data/vendor/kreuzberg/src/extractors/image.rs +25 -4
- data/vendor/kreuzberg/src/extractors/jats.rs +3 -0
- data/vendor/kreuzberg/src/extractors/jupyter.rs +1 -0
- data/vendor/kreuzberg/src/extractors/latex.rs +1 -0
- data/vendor/kreuzberg/src/extractors/markdown.rs +1 -0
- data/vendor/kreuzberg/src/extractors/mod.rs +78 -14
- data/vendor/kreuzberg/src/extractors/odt.rs +3 -3
- data/vendor/kreuzberg/src/extractors/opml.rs +1 -0
- data/vendor/kreuzberg/src/extractors/orgmode.rs +1 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +197 -17
- data/vendor/kreuzberg/src/extractors/pptx.rs +32 -13
- data/vendor/kreuzberg/src/extractors/rst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/rtf.rs +3 -4
- data/vendor/kreuzberg/src/extractors/structured.rs +2 -0
- data/vendor/kreuzberg/src/extractors/text.rs +7 -2
- data/vendor/kreuzberg/src/extractors/typst.rs +1 -0
- data/vendor/kreuzberg/src/extractors/xml.rs +27 -15
- data/vendor/kreuzberg/src/keywords/processor.rs +9 -1
- data/vendor/kreuzberg/src/language_detection/mod.rs +43 -0
- data/vendor/kreuzberg/src/language_detection/processor.rs +219 -0
- data/vendor/kreuzberg/src/lib.rs +10 -2
- data/vendor/kreuzberg/src/mcp/mod.rs +3 -0
- data/vendor/kreuzberg/src/mcp/server.rs +120 -12
- data/vendor/kreuzberg/src/ocr/tesseract_backend.rs +2 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +328 -0
- data/vendor/kreuzberg/src/pdf/error.rs +8 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +238 -95
- data/vendor/kreuzberg/src/pdf/mod.rs +18 -2
- data/vendor/kreuzberg/src/pdf/rendering.rs +1 -2
- data/vendor/kreuzberg/src/pdf/table.rs +26 -2
- data/vendor/kreuzberg/src/pdf/text.rs +89 -7
- data/vendor/kreuzberg/src/plugins/extractor.rs +34 -3
- data/vendor/kreuzberg/src/plugins/mod.rs +3 -0
- data/vendor/kreuzberg/src/plugins/ocr.rs +22 -3
- data/vendor/kreuzberg/src/plugins/processor.rs +8 -0
- data/vendor/kreuzberg/src/plugins/registry.rs +2 -0
- data/vendor/kreuzberg/src/plugins/validator.rs +11 -0
- data/vendor/kreuzberg/src/text/mod.rs +6 -0
- data/vendor/kreuzberg/src/text/quality_processor.rs +219 -0
- data/vendor/kreuzberg/src/types.rs +173 -21
- data/vendor/kreuzberg/tests/archive_integration.rs +2 -0
- data/vendor/kreuzberg/tests/batch_processing.rs +5 -3
- data/vendor/kreuzberg/tests/concurrency_stress.rs +14 -6
- data/vendor/kreuzberg/tests/config_features.rs +15 -1
- data/vendor/kreuzberg/tests/config_loading_tests.rs +1 -0
- data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/email_integration.rs +2 -0
- data/vendor/kreuzberg/tests/error_handling.rs +43 -34
- data/vendor/kreuzberg/tests/format_integration.rs +2 -0
- data/vendor/kreuzberg/tests/image_integration.rs +2 -0
- data/vendor/kreuzberg/tests/mime_detection.rs +17 -16
- data/vendor/kreuzberg/tests/ocr_configuration.rs +4 -0
- data/vendor/kreuzberg/tests/ocr_errors.rs +22 -0
- data/vendor/kreuzberg/tests/ocr_quality.rs +2 -0
- data/vendor/kreuzberg/tests/odt_extractor_tests.rs +0 -21
- data/vendor/kreuzberg/tests/pdf_integration.rs +2 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
- data/vendor/kreuzberg/tests/pipeline_integration.rs +25 -0
- data/vendor/kreuzberg/tests/plugin_ocr_backend_test.rs +5 -0
- data/vendor/kreuzberg/tests/plugin_system.rs +6 -0
- data/vendor/kreuzberg/tests/registry_integration_tests.rs +1 -0
- data/vendor/kreuzberg/tests/rst_extractor_tests.rs +2 -0
- data/vendor/kreuzberg/tests/rtf_extractor_tests.rs +0 -1
- data/vendor/kreuzberg/tests/security_validation.rs +1 -0
- data/vendor/kreuzberg/tests/test_fastembed.rs +45 -23
- data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1 -0
- data/vendor/kreuzberg/tests/typst_extractor_tests.rs +3 -2
- data/vendor/rb-sys/.cargo_vcs_info.json +2 -2
- data/vendor/rb-sys/Cargo.lock +15 -15
- data/vendor/rb-sys/Cargo.toml +4 -4
- data/vendor/rb-sys/Cargo.toml.orig +4 -4
- data/vendor/rb-sys/build/features.rs +5 -2
- data/vendor/rb-sys/build/main.rs +55 -15
- data/vendor/rb-sys/build/stable_api_config.rs +4 -2
- data/vendor/rb-sys/build/version.rs +3 -1
- data/vendor/rb-sys/src/lib.rs +1 -0
- data/vendor/rb-sys/src/macros.rs +2 -2
- data/vendor/rb-sys/src/special_consts.rs +1 -1
- data/vendor/rb-sys/src/stable_api/compiled.rs +1 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +12 -4
- data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +19 -6
- data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +17 -5
- data/vendor/rb-sys/src/stable_api.rs +0 -1
- data/vendor/rb-sys/src/tracking_allocator.rs +1 -3
- metadata +13 -10
- data/vendor/kreuzberg/src/extractors/fictionbook.rs.backup2 +0 -738
- data/vendor/rb-sys/.cargo-ok +0 -1
- data/vendor/rb-sys/src/stable_api/ruby_2_6.rs +0 -316
data/vendor/kreuzberg/build.rs
CHANGED
|
@@ -6,21 +6,137 @@ use std::process::Command;
|
|
|
6
6
|
use std::thread;
|
|
7
7
|
use std::time::Duration;
|
|
8
8
|
|
|
9
|
+
/// PDFium linking strategy
|
|
10
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
11
|
+
enum PdfiumLinkStrategy {
|
|
12
|
+
/// Download and link dynamically (default behavior)
|
|
13
|
+
DownloadDynamic,
|
|
14
|
+
/// Download and link statically (pdf-static feature)
|
|
15
|
+
DownloadStatic,
|
|
16
|
+
/// Download, link dynamically, and embed in binary (pdf-bundled feature)
|
|
17
|
+
Bundled,
|
|
18
|
+
/// Use system-installed pdfium via pkg-config (pdf-system feature)
|
|
19
|
+
System,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// MAIN BUILD ORCHESTRATION
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
9
26
|
fn main() {
|
|
10
27
|
let target = env::var("TARGET").unwrap();
|
|
11
28
|
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
|
12
29
|
|
|
13
30
|
println!("cargo::rustc-check-cfg=cfg(coverage)");
|
|
14
31
|
|
|
15
|
-
|
|
32
|
+
// Skip pdfium linking if the pdf feature is not enabled
|
|
33
|
+
if !cfg!(feature = "pdf") {
|
|
34
|
+
tracing::debug!("PDF feature not enabled, skipping pdfium linking");
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
validate_feature_exclusivity();
|
|
39
|
+
let strategy = determine_link_strategy(&target);
|
|
40
|
+
|
|
41
|
+
tracing::debug!("Using PDFium linking strategy: {:?}", strategy);
|
|
42
|
+
|
|
43
|
+
match strategy {
|
|
44
|
+
PdfiumLinkStrategy::DownloadDynamic => {
|
|
45
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
46
|
+
link_dynamically(&pdfium_dir, &target);
|
|
47
|
+
copy_lib_to_package(&pdfium_dir, &target);
|
|
48
|
+
}
|
|
49
|
+
PdfiumLinkStrategy::DownloadStatic => {
|
|
50
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
51
|
+
link_statically(&pdfium_dir, &target);
|
|
52
|
+
// Skip copy_lib_to_package - library embedded in binary
|
|
53
|
+
}
|
|
54
|
+
PdfiumLinkStrategy::Bundled => {
|
|
55
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
56
|
+
link_bundled(&pdfium_dir, &target, &out_dir);
|
|
57
|
+
// Skip copy_lib_to_package - each binary extracts its own
|
|
58
|
+
}
|
|
59
|
+
PdfiumLinkStrategy::System => {
|
|
60
|
+
link_system(&target);
|
|
61
|
+
// No download or copy needed
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
link_system_frameworks(&target);
|
|
66
|
+
println!("cargo:rerun-if-changed=build.rs");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ============================================================================
|
|
70
|
+
// FEATURE & STRATEGY VALIDATION
|
|
71
|
+
// ============================================================================
|
|
72
|
+
|
|
73
|
+
/// Validate that only one linking strategy feature is enabled at a time
|
|
74
|
+
fn validate_feature_exclusivity() {
|
|
75
|
+
let strategies = [
|
|
76
|
+
cfg!(feature = "pdf-static"),
|
|
77
|
+
cfg!(feature = "pdf-bundled"),
|
|
78
|
+
cfg!(feature = "pdf-system"),
|
|
79
|
+
];
|
|
80
|
+
let count = strategies.iter().filter(|&&x| x).count();
|
|
81
|
+
|
|
82
|
+
if count > 1 {
|
|
83
|
+
panic!(
|
|
84
|
+
"Only one of pdf-static, pdf-bundled, pdf-system can be enabled at once.\n\
|
|
85
|
+
Please choose a single PDFium linking strategy."
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/// Determine which linking strategy to use based on features and target
|
|
91
|
+
fn determine_link_strategy(target: &str) -> PdfiumLinkStrategy {
|
|
92
|
+
// WASM always uses static linking
|
|
93
|
+
if target.contains("wasm") {
|
|
94
|
+
return PdfiumLinkStrategy::DownloadStatic;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Feature-based strategy selection (priority order)
|
|
98
|
+
if cfg!(feature = "pdf-system") {
|
|
99
|
+
return PdfiumLinkStrategy::System;
|
|
100
|
+
}
|
|
101
|
+
if cfg!(feature = "pdf-bundled") {
|
|
102
|
+
return PdfiumLinkStrategy::Bundled;
|
|
103
|
+
}
|
|
104
|
+
if cfg!(feature = "pdf-static") {
|
|
105
|
+
return PdfiumLinkStrategy::DownloadStatic;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Default: download and link dynamically
|
|
109
|
+
PdfiumLinkStrategy::DownloadDynamic
|
|
110
|
+
}
|
|
16
111
|
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// DOWNLOAD & PREBUILT ORCHESTRATION
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
/// Download PDFium or use prebuilt directory
|
|
117
|
+
///
|
|
118
|
+
/// This is the main orchestrator function that:
|
|
119
|
+
/// 1. Checks for `KREUZBERG_PDFIUM_PREBUILT` environment variable
|
|
120
|
+
/// 2. If set and valid, uses prebuilt pdfium directory
|
|
121
|
+
/// 3. If not set, downloads pdfium to out_dir (with caching)
|
|
122
|
+
/// 4. Returns PathBuf to pdfium directory
|
|
123
|
+
///
|
|
124
|
+
/// Reuses all existing helper functions:
|
|
125
|
+
/// - `get_pdfium_url_and_lib()` - determines download URL for target
|
|
126
|
+
/// - `download_and_extract_pdfium()` - downloads with retry logic
|
|
127
|
+
/// - `runtime_library_info()` - platform-specific library names
|
|
128
|
+
/// - `prepare_prebuilt_pdfium()` - handles prebuilt copy
|
|
129
|
+
fn download_or_use_prebuilt(target: &str, out_dir: &Path) -> PathBuf {
|
|
130
|
+
let (download_url, _lib_name) = get_pdfium_url_and_lib(target);
|
|
17
131
|
let pdfium_dir = out_dir.join("pdfium");
|
|
18
132
|
|
|
133
|
+
// Check for prebuilt pdfium directory
|
|
19
134
|
if let Some(prebuilt) = env::var_os("KREUZBERG_PDFIUM_PREBUILT") {
|
|
20
135
|
let prebuilt_path = PathBuf::from(prebuilt);
|
|
21
136
|
if prebuilt_path.exists() {
|
|
22
137
|
prepare_prebuilt_pdfium(&prebuilt_path, &pdfium_dir)
|
|
23
138
|
.unwrap_or_else(|err| panic!("Failed to copy Pdfium from {}: {}", prebuilt_path.display(), err));
|
|
139
|
+
return pdfium_dir;
|
|
24
140
|
} else {
|
|
25
141
|
panic!(
|
|
26
142
|
"Environment variable KREUZBERG_PDFIUM_PREBUILT points to '{}' but the directory does not exist",
|
|
@@ -29,8 +145,10 @@ fn main() {
|
|
|
29
145
|
}
|
|
30
146
|
}
|
|
31
147
|
|
|
32
|
-
|
|
33
|
-
let
|
|
148
|
+
// Check if library already exists (cache validation) using flexible detection
|
|
149
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
150
|
+
let lib_found = find_pdfium_library(&pdfium_dir, &runtime_lib_name, runtime_subdir).is_ok();
|
|
151
|
+
|
|
34
152
|
let import_lib_exists = if target.contains("windows") {
|
|
35
153
|
let lib_dir = pdfium_dir.join("lib");
|
|
36
154
|
lib_dir.join("pdfium.lib").exists() || lib_dir.join("pdfium.dll.lib").exists()
|
|
@@ -38,14 +156,15 @@ fn main() {
|
|
|
38
156
|
true
|
|
39
157
|
};
|
|
40
158
|
|
|
41
|
-
if !
|
|
159
|
+
if !lib_found || !import_lib_exists {
|
|
42
160
|
tracing::debug!("Pdfium library not found, downloading for target: {}", target);
|
|
43
161
|
tracing::debug!("Download URL: {}", download_url);
|
|
44
162
|
download_and_extract_pdfium(&download_url, &pdfium_dir);
|
|
45
163
|
} else {
|
|
46
|
-
tracing::debug!("Pdfium library already
|
|
164
|
+
tracing::debug!("Pdfium library already cached at {}", pdfium_dir.display());
|
|
47
165
|
}
|
|
48
166
|
|
|
167
|
+
// Windows-specific: ensure pdfium.lib exists
|
|
49
168
|
if target.contains("windows") {
|
|
50
169
|
let lib_dir = pdfium_dir.join("lib");
|
|
51
170
|
let dll_lib = lib_dir.join("pdfium.dll.lib");
|
|
@@ -57,38 +176,17 @@ fn main() {
|
|
|
57
176
|
}
|
|
58
177
|
}
|
|
59
178
|
|
|
60
|
-
|
|
61
|
-
println!("cargo:rustc-link-search=native={}", lib_dir.display());
|
|
62
|
-
println!("cargo:rustc-link-lib=dylib={}", lib_name);
|
|
63
|
-
|
|
64
|
-
if target.contains("darwin") {
|
|
65
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
66
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
67
|
-
} else if target.contains("linux") {
|
|
68
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
69
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
copy_lib_to_package(&pdfium_dir, &target);
|
|
73
|
-
|
|
74
|
-
if target.contains("darwin") {
|
|
75
|
-
println!("cargo:rustc-link-lib=framework=CoreFoundation");
|
|
76
|
-
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
|
77
|
-
println!("cargo:rustc-link-lib=framework=CoreText");
|
|
78
|
-
println!("cargo:rustc-link-lib=framework=AppKit");
|
|
79
|
-
println!("cargo:rustc-link-lib=dylib=c++");
|
|
80
|
-
} else if target.contains("linux") {
|
|
81
|
-
println!("cargo:rustc-link-lib=dylib=stdc++");
|
|
82
|
-
println!("cargo:rustc-link-lib=dylib=m");
|
|
83
|
-
} else if target.contains("windows") {
|
|
84
|
-
println!("cargo:rustc-link-lib=dylib=gdi32");
|
|
85
|
-
println!("cargo:rustc-link-lib=dylib=user32");
|
|
86
|
-
println!("cargo:rustc-link-lib=dylib=advapi32");
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
println!("cargo:rerun-if-changed=build.rs");
|
|
179
|
+
pdfium_dir
|
|
90
180
|
}
|
|
91
181
|
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// DOWNLOAD UTILITIES
|
|
184
|
+
// ============================================================================
|
|
185
|
+
|
|
186
|
+
/// Fetch the latest release version from a GitHub repository
|
|
187
|
+
///
|
|
188
|
+
/// Uses curl to query the GitHub API and extract the tag_name from the
|
|
189
|
+
/// latest release JSON response. Falls back to "7529" if API call fails.
|
|
92
190
|
fn get_latest_version(repo: &str) -> String {
|
|
93
191
|
let api_url = format!("https://api.github.com/repos/{}/releases/latest", repo);
|
|
94
192
|
|
|
@@ -113,6 +211,12 @@ fn get_latest_version(repo: &str) -> String {
|
|
|
113
211
|
"7529".to_string()
|
|
114
212
|
}
|
|
115
213
|
|
|
214
|
+
/// Get the download URL and library name for the target platform
|
|
215
|
+
///
|
|
216
|
+
/// Determines platform/architecture from target triple and constructs
|
|
217
|
+
/// the appropriate GitHub release download URL. Supports:
|
|
218
|
+
/// - WASM: paulocoutinhox/pdfium-lib
|
|
219
|
+
/// - Other platforms: bblanchon/pdfium-binaries
|
|
116
220
|
fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
117
221
|
if target.contains("wasm") {
|
|
118
222
|
let version = env::var("PDFIUM_WASM_VERSION")
|
|
@@ -121,11 +225,12 @@ fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
|
121
225
|
.unwrap_or_else(|| get_latest_version("paulocoutinhox/pdfium-lib"));
|
|
122
226
|
tracing::debug!("Using pdfium-lib version: {}", version);
|
|
123
227
|
|
|
124
|
-
|
|
228
|
+
// WASM builds use a single 'wasm.tgz' asset regardless of architecture
|
|
229
|
+
// The archive contains both wasm32 and wasm64 if available
|
|
125
230
|
return (
|
|
126
231
|
format!(
|
|
127
|
-
"https://github.com/paulocoutinhox/pdfium-lib/releases/download/{}/
|
|
128
|
-
version
|
|
232
|
+
"https://github.com/paulocoutinhox/pdfium-lib/releases/download/{}/wasm.tgz",
|
|
233
|
+
version
|
|
129
234
|
),
|
|
130
235
|
"pdfium".to_string(),
|
|
131
236
|
);
|
|
@@ -170,6 +275,15 @@ fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
|
170
275
|
(url, "pdfium".to_string())
|
|
171
276
|
}
|
|
172
277
|
|
|
278
|
+
/// Download and extract PDFium archive with retry logic
|
|
279
|
+
///
|
|
280
|
+
/// Features:
|
|
281
|
+
/// - Exponential backoff retry (configurable via env vars)
|
|
282
|
+
/// - File type validation (gzip check)
|
|
283
|
+
/// - Windows-specific import library handling (pdfium.dll.lib -> pdfium.lib)
|
|
284
|
+
/// - Environment variables:
|
|
285
|
+
/// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: number of retries (default: 5)
|
|
286
|
+
/// - KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS: initial backoff in seconds (default: 2)
|
|
173
287
|
fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
|
|
174
288
|
fs::create_dir_all(dest_dir).expect("Failed to create pdfium directory");
|
|
175
289
|
|
|
@@ -281,15 +395,404 @@ fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
|
|
|
281
395
|
tracing::debug!("Pdfium downloaded and extracted successfully");
|
|
282
396
|
}
|
|
283
397
|
|
|
284
|
-
|
|
398
|
+
// ============================================================================
|
|
399
|
+
// PREBUILT HANDLING
|
|
400
|
+
// ============================================================================
|
|
401
|
+
|
|
402
|
+
/// Prepare prebuilt PDFium by copying to destination directory
|
|
403
|
+
///
|
|
404
|
+
/// Removes existing destination if present, then recursively copies
|
|
405
|
+
/// all files from prebuilt source to destination.
|
|
406
|
+
fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
|
|
407
|
+
if dest_dir.exists() {
|
|
408
|
+
fs::remove_dir_all(dest_dir)?;
|
|
409
|
+
}
|
|
410
|
+
copy_dir_all(prebuilt_src, dest_dir)
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/// Recursively copy directory tree
|
|
414
|
+
///
|
|
415
|
+
/// Used by `prepare_prebuilt_pdfium()` to copy entire pdfium directory
|
|
416
|
+
/// structure, preserving all files and subdirectories.
|
|
417
|
+
fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
|
|
418
|
+
fs::create_dir_all(dst)?;
|
|
419
|
+
for entry in fs::read_dir(src)? {
|
|
420
|
+
let entry = entry?;
|
|
421
|
+
let file_type = entry.file_type()?;
|
|
422
|
+
let target_path = dst.join(entry.file_name());
|
|
423
|
+
if file_type.is_dir() {
|
|
424
|
+
copy_dir_all(&entry.path(), &target_path)?;
|
|
425
|
+
} else {
|
|
426
|
+
fs::copy(entry.path(), &target_path)?;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
Ok(())
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// ============================================================================
|
|
433
|
+
// PLATFORM UTILITIES
|
|
434
|
+
// ============================================================================
|
|
435
|
+
|
|
436
|
+
/// Get platform-specific runtime library name and subdirectory
|
|
437
|
+
///
|
|
438
|
+
/// Returns tuple of (library_name, subdirectory) for the target platform:
|
|
439
|
+
/// - WASM: ("libpdfium.a", "lib")
|
|
440
|
+
/// - Windows: ("pdfium.dll", "bin")
|
|
441
|
+
/// - macOS: ("libpdfium.dylib", "lib")
|
|
442
|
+
/// - Linux: ("libpdfium.so", "lib")
|
|
443
|
+
fn runtime_library_info(target: &str) -> (String, &'static str) {
|
|
444
|
+
if target.contains("wasm") {
|
|
445
|
+
("libpdfium.a".to_string(), "lib")
|
|
446
|
+
} else if target.contains("windows") {
|
|
447
|
+
("pdfium.dll".to_string(), "bin")
|
|
448
|
+
} else if target.contains("darwin") {
|
|
449
|
+
("libpdfium.dylib".to_string(), "lib")
|
|
450
|
+
} else {
|
|
451
|
+
("libpdfium.so".to_string(), "lib")
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/// Find PDFium library in archive with flexible directory detection
|
|
456
|
+
///
|
|
457
|
+
/// Attempts to locate the library at multiple possible locations:
|
|
458
|
+
/// - {subdir}/{lib_name} (standard location)
|
|
459
|
+
/// - {lib_name} (root of archive)
|
|
460
|
+
/// - bin/{lib_name} (alternative location)
|
|
461
|
+
/// - lib/{lib_name} (explicit lib directory)
|
|
462
|
+
///
|
|
463
|
+
/// This handles variations in archive structure across different platform builds,
|
|
464
|
+
/// particularly macOS ARM64 where the archive structure may differ.
|
|
465
|
+
///
|
|
466
|
+
/// Returns the full path to the library if found, or an error with available files.
|
|
467
|
+
fn find_pdfium_library(pdfium_dir: &Path, lib_name: &str, expected_subdir: &str) -> Result<PathBuf, String> {
|
|
468
|
+
// Candidates in priority order
|
|
469
|
+
let candidates = [
|
|
470
|
+
pdfium_dir.join(expected_subdir).join(lib_name), // Standard: lib/libpdfium.dylib
|
|
471
|
+
pdfium_dir.join(lib_name), // Root: libpdfium.dylib
|
|
472
|
+
pdfium_dir.join("bin").join(lib_name), // Alternative: bin/libpdfium.dylib
|
|
473
|
+
pdfium_dir.join("lib").join(lib_name), // Explicit lib: lib/libpdfium.dylib
|
|
474
|
+
];
|
|
475
|
+
|
|
476
|
+
// Try each candidate
|
|
477
|
+
for candidate in &candidates {
|
|
478
|
+
if candidate.exists() {
|
|
479
|
+
tracing::debug!("Found PDFium library at: {}", candidate.display());
|
|
480
|
+
return Ok(candidate.clone());
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Library not found - provide detailed error with directory listing
|
|
485
|
+
let mut error_msg = format!(
|
|
486
|
+
"PDFium library not found at expected location: {}/{}\n\n",
|
|
487
|
+
pdfium_dir.display(),
|
|
488
|
+
expected_subdir
|
|
489
|
+
);
|
|
490
|
+
error_msg.push_str("Attempted locations:\n");
|
|
491
|
+
for candidate in &candidates {
|
|
492
|
+
error_msg.push_str(&format!(" - {}\n", candidate.display()));
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// List actual contents of pdfium directory for debugging
|
|
496
|
+
error_msg.push_str("\nActual archive contents:\n");
|
|
497
|
+
if let Ok(entries) = fs::read_dir(pdfium_dir) {
|
|
498
|
+
for entry in entries.flatten() {
|
|
499
|
+
let path = entry.path();
|
|
500
|
+
let file_type = if path.is_dir() { "dir" } else { "file" };
|
|
501
|
+
error_msg.push_str(&format!(" {} ({})\n", path.display(), file_type));
|
|
502
|
+
|
|
503
|
+
// Show contents of subdirectories
|
|
504
|
+
if path.is_dir()
|
|
505
|
+
&& let Ok(sub_entries) = fs::read_dir(&path)
|
|
506
|
+
{
|
|
507
|
+
for sub_entry in sub_entries.flatten() {
|
|
508
|
+
let sub_path = sub_entry.path();
|
|
509
|
+
let sub_type = if sub_path.is_dir() { "dir" } else { "file" };
|
|
510
|
+
error_msg.push_str(&format!(" {} ({})\n", sub_path.display(), sub_type));
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
Err(error_msg)
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/// Fix macOS install name (rpath) for dynamic library
|
|
520
|
+
///
|
|
521
|
+
/// Uses install_name_tool to set the install name to @rpath/{lib_name}
|
|
522
|
+
/// to enable relative path loading on macOS.
|
|
523
|
+
fn fix_macos_install_name(lib_path: &Path, lib_name: &str) {
|
|
524
|
+
let new_install_name = format!("@rpath/{}", lib_name);
|
|
525
|
+
|
|
526
|
+
tracing::debug!("Fixing install_name for {} to {}", lib_path.display(), new_install_name);
|
|
527
|
+
|
|
528
|
+
let status = Command::new("install_name_tool")
|
|
529
|
+
.arg("-id")
|
|
530
|
+
.arg(&new_install_name)
|
|
531
|
+
.arg(lib_path)
|
|
532
|
+
.status();
|
|
533
|
+
|
|
534
|
+
match status {
|
|
535
|
+
Ok(s) if s.success() => {
|
|
536
|
+
tracing::debug!("Successfully updated install_name");
|
|
537
|
+
}
|
|
538
|
+
Ok(s) => {
|
|
539
|
+
tracing::debug!("install_name_tool failed with status: {}", s);
|
|
540
|
+
}
|
|
541
|
+
Err(e) => {
|
|
542
|
+
tracing::debug!("Failed to run install_name_tool: {}", e);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/// Code sign binary on macOS if needed
|
|
548
|
+
///
|
|
549
|
+
/// Uses codesign to sign the binary. Identity from KREUZBERG_CODESIGN_IDENTITY
|
|
550
|
+
/// env var (default: "-" for adhoc signing). Only runs on apple-darwin targets.
|
|
551
|
+
fn codesign_if_needed(target: &str, binary: &Path) {
|
|
552
|
+
if !target.contains("apple-darwin") || !binary.exists() {
|
|
553
|
+
return;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
let identity = env::var("KREUZBERG_CODESIGN_IDENTITY").unwrap_or_else(|_| "-".to_string());
|
|
557
|
+
let status = Command::new("codesign")
|
|
558
|
+
.arg("--force")
|
|
559
|
+
.arg("--timestamp=none")
|
|
560
|
+
.arg("--sign")
|
|
561
|
+
.arg(identity)
|
|
562
|
+
.arg(binary)
|
|
563
|
+
.status();
|
|
564
|
+
|
|
565
|
+
match status {
|
|
566
|
+
Ok(result) if result.success() => {
|
|
567
|
+
tracing::debug!("Codesigned {}", binary.display());
|
|
568
|
+
}
|
|
569
|
+
Ok(result) => {
|
|
570
|
+
tracing::debug!(
|
|
571
|
+
"codesign exited with status {} while signing {}",
|
|
572
|
+
result,
|
|
573
|
+
binary.display()
|
|
574
|
+
);
|
|
575
|
+
}
|
|
576
|
+
Err(err) => {
|
|
577
|
+
tracing::debug!("Failed to run codesign for {}: {}", binary.display(), err);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// ============================================================================
|
|
583
|
+
// LINKING STRATEGIES
|
|
584
|
+
// ============================================================================
|
|
585
|
+
|
|
586
|
+
/// Link PDFium dynamically (default)
|
|
587
|
+
///
|
|
588
|
+
/// Sets up linker to use PDFium as a dynamic library (.dylib/.so/.dll)
|
|
589
|
+
/// with platform-specific rpath configuration for runtime library discovery.
|
|
590
|
+
/// Supports flexible archive structures by adding multiple possible lib directories.
|
|
591
|
+
fn link_dynamically(pdfium_dir: &Path, target: &str) {
|
|
285
592
|
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
286
|
-
let src_lib = pdfium_dir.join(runtime_subdir).join(&runtime_lib_name);
|
|
287
593
|
|
|
288
|
-
|
|
289
|
-
|
|
594
|
+
// Find the actual library location (handles multiple possible archive structures)
|
|
595
|
+
let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
596
|
+
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
597
|
+
Err(err) => panic!("{}", err),
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
601
|
+
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
602
|
+
|
|
603
|
+
// Also add standard lib directory for compatibility
|
|
604
|
+
let std_lib_dir = pdfium_dir.join("lib");
|
|
605
|
+
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
606
|
+
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// Add bin directory for platforms where it might be needed
|
|
610
|
+
let bin_dir = pdfium_dir.join("bin");
|
|
611
|
+
if bin_dir.exists() && bin_dir != lib_path {
|
|
612
|
+
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Set rpath for dynamic linking
|
|
616
|
+
if target.contains("darwin") {
|
|
617
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
618
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
619
|
+
} else if target.contains("linux") {
|
|
620
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
621
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/// Link PDFium statically (pdf-static feature)
|
|
626
|
+
///
|
|
627
|
+
/// Embeds PDFium into the binary as a static library. Adds system
|
|
628
|
+
/// dependencies required for static linking on Linux.
|
|
629
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
630
|
+
fn link_statically(pdfium_dir: &Path, target: &str) {
|
|
631
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
632
|
+
|
|
633
|
+
// Find the actual library location (handles multiple possible archive structures)
|
|
634
|
+
let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
635
|
+
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
636
|
+
Err(err) => panic!("{}", err),
|
|
637
|
+
};
|
|
638
|
+
|
|
639
|
+
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
640
|
+
println!("cargo:rustc-link-lib=static=pdfium");
|
|
641
|
+
|
|
642
|
+
// Also add standard lib directory for compatibility
|
|
643
|
+
let std_lib_dir = pdfium_dir.join("lib");
|
|
644
|
+
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
645
|
+
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Add bin directory for platforms where it might be needed
|
|
649
|
+
let bin_dir = pdfium_dir.join("bin");
|
|
650
|
+
if bin_dir.exists() && bin_dir != lib_path {
|
|
651
|
+
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// Static linking requires additional system dependencies
|
|
655
|
+
if target.contains("linux") {
|
|
656
|
+
// Linux requires additional libraries for static linking
|
|
657
|
+
println!("cargo:rustc-link-lib=dylib=pthread");
|
|
658
|
+
println!("cargo:rustc-link-lib=dylib=dl");
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/// Link PDFium bundled (pdf-bundled feature)
|
|
663
|
+
///
|
|
664
|
+
/// Links dynamically but copies library to OUT_DIR for embedding in binary.
|
|
665
|
+
/// Each binary extracts and uses its own copy of the PDFium library.
|
|
666
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
667
|
+
fn link_bundled(pdfium_dir: &Path, target: &str, out_dir: &Path) {
|
|
668
|
+
// Link dynamically for build
|
|
669
|
+
link_dynamically(pdfium_dir, target);
|
|
670
|
+
|
|
671
|
+
// Copy library to OUT_DIR for bundling using flexible detection
|
|
672
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
673
|
+
let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
674
|
+
Ok(path) => path,
|
|
675
|
+
Err(err) => panic!("{}", err),
|
|
676
|
+
};
|
|
677
|
+
let bundled_lib = out_dir.join(&runtime_lib_name);
|
|
678
|
+
|
|
679
|
+
fs::copy(&src_lib, &bundled_lib)
|
|
680
|
+
.unwrap_or_else(|err| panic!("Failed to copy library to OUT_DIR for bundling: {}", err));
|
|
681
|
+
|
|
682
|
+
// Emit environment variable with bundled library path
|
|
683
|
+
let bundled_path = bundled_lib
|
|
684
|
+
.to_str()
|
|
685
|
+
.unwrap_or_else(|| panic!("Non-UTF8 path for bundled library: {}", bundled_lib.display()));
|
|
686
|
+
println!("cargo:rustc-env=KREUZBERG_PDFIUM_BUNDLED_PATH={}", bundled_path);
|
|
687
|
+
|
|
688
|
+
tracing::debug!("Bundled PDFium library at: {}", bundled_path);
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
/// Link system-installed PDFium (pdf-system feature)
|
|
692
|
+
///
|
|
693
|
+
/// Attempts to find PDFium via pkg-config first, then falls back to
|
|
694
|
+
/// environment variables (KREUZBERG_PDFIUM_SYSTEM_PATH, KREUZBERG_PDFIUM_SYSTEM_INCLUDE).
|
|
695
|
+
fn link_system(_target: &str) {
|
|
696
|
+
// Try pkg-config first
|
|
697
|
+
match pkg_config::Config::new().atleast_version("5.0").probe("pdfium") {
|
|
698
|
+
Ok(library) => {
|
|
699
|
+
tracing::debug!("Found system pdfium via pkg-config");
|
|
700
|
+
for include_path in &library.include_paths {
|
|
701
|
+
println!("cargo:include={}", include_path.display());
|
|
702
|
+
}
|
|
703
|
+
return;
|
|
704
|
+
}
|
|
705
|
+
Err(err) => {
|
|
706
|
+
tracing::debug!("pkg-config probe failed: {}", err);
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Fallback to environment variables
|
|
711
|
+
let lib_path = env::var("KREUZBERG_PDFIUM_SYSTEM_PATH").ok();
|
|
712
|
+
let include_path = env::var("KREUZBERG_PDFIUM_SYSTEM_INCLUDE").ok();
|
|
713
|
+
|
|
714
|
+
if let Some(lib_dir) = lib_path {
|
|
715
|
+
let lib_dir_path = PathBuf::from(&lib_dir);
|
|
716
|
+
if !lib_dir_path.exists() {
|
|
717
|
+
panic!(
|
|
718
|
+
"KREUZBERG_PDFIUM_SYSTEM_PATH points to '{}' but the directory does not exist",
|
|
719
|
+
lib_dir
|
|
720
|
+
);
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
println!("cargo:rustc-link-search=native={}", lib_dir);
|
|
724
|
+
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
725
|
+
|
|
726
|
+
if let Some(inc_dir) = include_path {
|
|
727
|
+
println!("cargo:include={}", inc_dir);
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
tracing::debug!("Using system pdfium from: {}", lib_dir);
|
|
290
731
|
return;
|
|
291
732
|
}
|
|
292
733
|
|
|
734
|
+
// No system pdfium found
|
|
735
|
+
panic!(
|
|
736
|
+
"pdf-system feature enabled but pdfium not found.\n\
|
|
737
|
+
\n\
|
|
738
|
+
Please install pdfium system-wide or provide:\n\
|
|
739
|
+
- KREUZBERG_PDFIUM_SYSTEM_PATH: path to directory containing libpdfium\n\
|
|
740
|
+
- KREUZBERG_PDFIUM_SYSTEM_INCLUDE: path to pdfium headers (optional)\n\
|
|
741
|
+
\n\
|
|
742
|
+
Alternatively, use a different linking strategy:\n\
|
|
743
|
+
- Default (dynamic): cargo build --features pdf\n\
|
|
744
|
+
- Static linking: cargo build --features pdf,pdf-static\n\
|
|
745
|
+
- Bundled: cargo build --features pdf,pdf-bundled"
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/// Link system frameworks and standard libraries
|
|
750
|
+
///
|
|
751
|
+
/// Adds platform-specific system libraries required for PDFium linking:
|
|
752
|
+
/// - macOS: CoreFoundation, CoreGraphics, CoreText, AppKit, libc++
|
|
753
|
+
/// - Linux: stdc++, libm
|
|
754
|
+
/// - Windows: gdi32, user32, advapi32
|
|
755
|
+
fn link_system_frameworks(target: &str) {
|
|
756
|
+
if target.contains("darwin") {
|
|
757
|
+
println!("cargo:rustc-link-lib=framework=CoreFoundation");
|
|
758
|
+
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
|
759
|
+
println!("cargo:rustc-link-lib=framework=CoreText");
|
|
760
|
+
println!("cargo:rustc-link-lib=framework=AppKit");
|
|
761
|
+
println!("cargo:rustc-link-lib=dylib=c++");
|
|
762
|
+
} else if target.contains("linux") {
|
|
763
|
+
println!("cargo:rustc-link-lib=dylib=stdc++");
|
|
764
|
+
println!("cargo:rustc-link-lib=dylib=m");
|
|
765
|
+
} else if target.contains("windows") {
|
|
766
|
+
println!("cargo:rustc-link-lib=dylib=gdi32");
|
|
767
|
+
println!("cargo:rustc-link-lib=dylib=user32");
|
|
768
|
+
println!("cargo:rustc-link-lib=dylib=advapi32");
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// ============================================================================
|
|
773
|
+
// LIBRARY DISTRIBUTION
|
|
774
|
+
// ============================================================================
|
|
775
|
+
|
|
776
|
+
/// Copy PDFium library to various package directories
|
|
777
|
+
///
|
|
778
|
+
/// Distributes the compiled/downloaded PDFium library to:
|
|
779
|
+
/// - CLI target directories (debug/release)
|
|
780
|
+
/// - Python package directory
|
|
781
|
+
/// - Node.js package directory
|
|
782
|
+
/// - Ruby gem directory
|
|
783
|
+
///
|
|
784
|
+
/// On macOS, also fixes install_name and applies code signing.
|
|
785
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
786
|
+
fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
|
|
787
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
788
|
+
let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
789
|
+
Ok(path) => path,
|
|
790
|
+
Err(err) => {
|
|
791
|
+
tracing::debug!("Failed to locate PDFium library: {}", err);
|
|
792
|
+
return;
|
|
793
|
+
}
|
|
794
|
+
};
|
|
795
|
+
|
|
293
796
|
if target.contains("darwin") {
|
|
294
797
|
fix_macos_install_name(&src_lib, &runtime_lib_name);
|
|
295
798
|
codesign_if_needed(target, &src_lib);
|
|
@@ -314,7 +817,6 @@ fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
|
|
|
314
817
|
);
|
|
315
818
|
}
|
|
316
819
|
|
|
317
|
-
// Also copy to target/{profile} for Java FFI (Maven expects it here)
|
|
318
820
|
let simple_target_dir = workspace_root.join("target").join(&profile);
|
|
319
821
|
if simple_target_dir != target_dir {
|
|
320
822
|
fs::create_dir_all(&simple_target_dir).ok();
|
|
@@ -359,6 +861,10 @@ fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
|
|
|
359
861
|
}
|
|
360
862
|
}
|
|
361
863
|
|
|
864
|
+
/// Copy library to destination if needed (based on modification time)
|
|
865
|
+
///
|
|
866
|
+
/// Only copies if destination doesn't exist or source is newer than destination.
|
|
867
|
+
/// Applies platform-specific post-processing (code signing on macOS).
|
|
362
868
|
fn copy_lib_if_needed(src: &Path, dest: &Path, package_name: &str, target: &str) {
|
|
363
869
|
use std::fs;
|
|
364
870
|
|
|
@@ -383,92 +889,3 @@ fn copy_lib_if_needed(src: &Path, dest: &Path, package_name: &str, target: &str)
|
|
|
383
889
|
}
|
|
384
890
|
}
|
|
385
891
|
}
|
|
386
|
-
|
|
387
|
-
fn codesign_if_needed(target: &str, binary: &Path) {
|
|
388
|
-
if !target.contains("apple-darwin") || !binary.exists() {
|
|
389
|
-
return;
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
let identity = env::var("KREUZBERG_CODESIGN_IDENTITY").unwrap_or_else(|_| "-".to_string());
|
|
393
|
-
let status = Command::new("codesign")
|
|
394
|
-
.arg("--force")
|
|
395
|
-
.arg("--timestamp=none")
|
|
396
|
-
.arg("--sign")
|
|
397
|
-
.arg(identity)
|
|
398
|
-
.arg(binary)
|
|
399
|
-
.status();
|
|
400
|
-
|
|
401
|
-
match status {
|
|
402
|
-
Ok(result) if result.success() => {
|
|
403
|
-
tracing::debug!("Codesigned {}", binary.display());
|
|
404
|
-
}
|
|
405
|
-
Ok(result) => {
|
|
406
|
-
tracing::debug!(
|
|
407
|
-
"codesign exited with status {} while signing {}",
|
|
408
|
-
result,
|
|
409
|
-
binary.display()
|
|
410
|
-
);
|
|
411
|
-
}
|
|
412
|
-
Err(err) => {
|
|
413
|
-
tracing::debug!("Failed to run codesign for {}: {}", binary.display(), err);
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
fn runtime_library_info(target: &str) -> (String, &'static str) {
|
|
419
|
-
if target.contains("windows") {
|
|
420
|
-
("pdfium.dll".to_string(), "bin")
|
|
421
|
-
} else if target.contains("darwin") {
|
|
422
|
-
("libpdfium.dylib".to_string(), "lib")
|
|
423
|
-
} else {
|
|
424
|
-
("libpdfium.so".to_string(), "lib")
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
|
|
429
|
-
if dest_dir.exists() {
|
|
430
|
-
fs::remove_dir_all(dest_dir)?;
|
|
431
|
-
}
|
|
432
|
-
copy_dir_all(prebuilt_src, dest_dir)
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
|
|
436
|
-
fs::create_dir_all(dst)?;
|
|
437
|
-
for entry in fs::read_dir(src)? {
|
|
438
|
-
let entry = entry?;
|
|
439
|
-
let file_type = entry.file_type()?;
|
|
440
|
-
let target_path = dst.join(entry.file_name());
|
|
441
|
-
if file_type.is_dir() {
|
|
442
|
-
copy_dir_all(&entry.path(), &target_path)?;
|
|
443
|
-
} else {
|
|
444
|
-
fs::copy(entry.path(), &target_path)?;
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
Ok(())
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
fn fix_macos_install_name(lib_path: &Path, lib_name: &str) {
|
|
451
|
-
use std::process::Command;
|
|
452
|
-
|
|
453
|
-
let new_install_name = format!("@rpath/{}", lib_name);
|
|
454
|
-
|
|
455
|
-
tracing::debug!("Fixing install_name for {} to {}", lib_path.display(), new_install_name);
|
|
456
|
-
|
|
457
|
-
let status = Command::new("install_name_tool")
|
|
458
|
-
.arg("-id")
|
|
459
|
-
.arg(&new_install_name)
|
|
460
|
-
.arg(lib_path)
|
|
461
|
-
.status();
|
|
462
|
-
|
|
463
|
-
match status {
|
|
464
|
-
Ok(s) if s.success() => {
|
|
465
|
-
tracing::debug!("Successfully updated install_name");
|
|
466
|
-
}
|
|
467
|
-
Ok(s) => {
|
|
468
|
-
tracing::debug!("install_name_tool failed with status: {}", s);
|
|
469
|
-
}
|
|
470
|
-
Err(e) => {
|
|
471
|
-
tracing::debug!("Failed to run install_name_tool: {}", e);
|
|
472
|
-
}
|
|
473
|
-
}
|
|
474
|
-
}
|