kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -5
- data/ext/kreuzberg_rb/native/.cargo/config.toml +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +0 -6
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/vendor/kreuzberg/Cargo.toml +9 -2
- data/vendor/kreuzberg/README.md +41 -0
- data/vendor/kreuzberg/build.rs +539 -133
- data/vendor/kreuzberg/src/api/mod.rs +0 -2
- data/vendor/kreuzberg/src/extraction/docx.rs +1 -1
- data/vendor/kreuzberg/src/extractors/pdf.rs +6 -3
- data/vendor/kreuzberg/src/mcp/mod.rs +3 -2
- data/vendor/kreuzberg/src/mcp/server.rs +106 -0
- data/vendor/kreuzberg/src/pdf/bundled.rs +328 -0
- data/vendor/kreuzberg/src/pdf/mod.rs +4 -0
- data/vendor/kreuzberg/tests/pdfium_linking.rs +374 -0
- data/vendor/rb-sys/bin/release.sh +8 -9
- data/vendor/rb-sys/src/lib.rs +1 -0
- metadata +4 -2
data/vendor/kreuzberg/build.rs
CHANGED
|
@@ -6,6 +6,23 @@ use std::process::Command;
|
|
|
6
6
|
use std::thread;
|
|
7
7
|
use std::time::Duration;
|
|
8
8
|
|
|
9
|
+
/// PDFium linking strategy
|
|
10
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
11
|
+
enum PdfiumLinkStrategy {
|
|
12
|
+
/// Download and link dynamically (default behavior)
|
|
13
|
+
DownloadDynamic,
|
|
14
|
+
/// Download and link statically (pdf-static feature)
|
|
15
|
+
DownloadStatic,
|
|
16
|
+
/// Download, link dynamically, and embed in binary (pdf-bundled feature)
|
|
17
|
+
Bundled,
|
|
18
|
+
/// Use system-installed pdfium via pkg-config (pdf-system feature)
|
|
19
|
+
System,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// MAIN BUILD ORCHESTRATION
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
9
26
|
fn main() {
|
|
10
27
|
let target = env::var("TARGET").unwrap();
|
|
11
28
|
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
|
@@ -18,15 +35,108 @@ fn main() {
|
|
|
18
35
|
return;
|
|
19
36
|
}
|
|
20
37
|
|
|
21
|
-
|
|
38
|
+
validate_feature_exclusivity();
|
|
39
|
+
let strategy = determine_link_strategy(&target);
|
|
40
|
+
|
|
41
|
+
tracing::debug!("Using PDFium linking strategy: {:?}", strategy);
|
|
42
|
+
|
|
43
|
+
match strategy {
|
|
44
|
+
PdfiumLinkStrategy::DownloadDynamic => {
|
|
45
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
46
|
+
link_dynamically(&pdfium_dir, &target);
|
|
47
|
+
copy_lib_to_package(&pdfium_dir, &target);
|
|
48
|
+
}
|
|
49
|
+
PdfiumLinkStrategy::DownloadStatic => {
|
|
50
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
51
|
+
link_statically(&pdfium_dir, &target);
|
|
52
|
+
// Skip copy_lib_to_package - library embedded in binary
|
|
53
|
+
}
|
|
54
|
+
PdfiumLinkStrategy::Bundled => {
|
|
55
|
+
let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
|
|
56
|
+
link_bundled(&pdfium_dir, &target, &out_dir);
|
|
57
|
+
// Skip copy_lib_to_package - each binary extracts its own
|
|
58
|
+
}
|
|
59
|
+
PdfiumLinkStrategy::System => {
|
|
60
|
+
link_system(&target);
|
|
61
|
+
// No download or copy needed
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
link_system_frameworks(&target);
|
|
66
|
+
println!("cargo:rerun-if-changed=build.rs");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ============================================================================
|
|
70
|
+
// FEATURE & STRATEGY VALIDATION
|
|
71
|
+
// ============================================================================
|
|
72
|
+
|
|
73
|
+
/// Validate that only one linking strategy feature is enabled at a time
|
|
74
|
+
fn validate_feature_exclusivity() {
|
|
75
|
+
let strategies = [
|
|
76
|
+
cfg!(feature = "pdf-static"),
|
|
77
|
+
cfg!(feature = "pdf-bundled"),
|
|
78
|
+
cfg!(feature = "pdf-system"),
|
|
79
|
+
];
|
|
80
|
+
let count = strategies.iter().filter(|&&x| x).count();
|
|
81
|
+
|
|
82
|
+
if count > 1 {
|
|
83
|
+
panic!(
|
|
84
|
+
"Only one of pdf-static, pdf-bundled, pdf-system can be enabled at once.\n\
|
|
85
|
+
Please choose a single PDFium linking strategy."
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/// Determine which linking strategy to use based on features and target
|
|
91
|
+
fn determine_link_strategy(target: &str) -> PdfiumLinkStrategy {
|
|
92
|
+
// WASM always uses static linking
|
|
93
|
+
if target.contains("wasm") {
|
|
94
|
+
return PdfiumLinkStrategy::DownloadStatic;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Feature-based strategy selection (priority order)
|
|
98
|
+
if cfg!(feature = "pdf-system") {
|
|
99
|
+
return PdfiumLinkStrategy::System;
|
|
100
|
+
}
|
|
101
|
+
if cfg!(feature = "pdf-bundled") {
|
|
102
|
+
return PdfiumLinkStrategy::Bundled;
|
|
103
|
+
}
|
|
104
|
+
if cfg!(feature = "pdf-static") {
|
|
105
|
+
return PdfiumLinkStrategy::DownloadStatic;
|
|
106
|
+
}
|
|
22
107
|
|
|
108
|
+
// Default: download and link dynamically
|
|
109
|
+
PdfiumLinkStrategy::DownloadDynamic
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// DOWNLOAD & PREBUILT ORCHESTRATION
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
/// Download PDFium or use prebuilt directory
|
|
117
|
+
///
|
|
118
|
+
/// This is the main orchestrator function that:
|
|
119
|
+
/// 1. Checks for `KREUZBERG_PDFIUM_PREBUILT` environment variable
|
|
120
|
+
/// 2. If set and valid, uses prebuilt pdfium directory
|
|
121
|
+
/// 3. If not set, downloads pdfium to out_dir (with caching)
|
|
122
|
+
/// 4. Returns PathBuf to pdfium directory
|
|
123
|
+
///
|
|
124
|
+
/// Reuses all existing helper functions:
|
|
125
|
+
/// - `get_pdfium_url_and_lib()` - determines download URL for target
|
|
126
|
+
/// - `download_and_extract_pdfium()` - downloads with retry logic
|
|
127
|
+
/// - `runtime_library_info()` - platform-specific library names
|
|
128
|
+
/// - `prepare_prebuilt_pdfium()` - handles prebuilt copy
|
|
129
|
+
fn download_or_use_prebuilt(target: &str, out_dir: &Path) -> PathBuf {
|
|
130
|
+
let (download_url, _lib_name) = get_pdfium_url_and_lib(target);
|
|
23
131
|
let pdfium_dir = out_dir.join("pdfium");
|
|
24
132
|
|
|
133
|
+
// Check for prebuilt pdfium directory
|
|
25
134
|
if let Some(prebuilt) = env::var_os("KREUZBERG_PDFIUM_PREBUILT") {
|
|
26
135
|
let prebuilt_path = PathBuf::from(prebuilt);
|
|
27
136
|
if prebuilt_path.exists() {
|
|
28
137
|
prepare_prebuilt_pdfium(&prebuilt_path, &pdfium_dir)
|
|
29
138
|
.unwrap_or_else(|err| panic!("Failed to copy Pdfium from {}: {}", prebuilt_path.display(), err));
|
|
139
|
+
return pdfium_dir;
|
|
30
140
|
} else {
|
|
31
141
|
panic!(
|
|
32
142
|
"Environment variable KREUZBERG_PDFIUM_PREBUILT points to '{}' but the directory does not exist",
|
|
@@ -35,8 +145,10 @@ fn main() {
|
|
|
35
145
|
}
|
|
36
146
|
}
|
|
37
147
|
|
|
38
|
-
|
|
39
|
-
let
|
|
148
|
+
// Check if library already exists (cache validation) using flexible detection
|
|
149
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
150
|
+
let lib_found = find_pdfium_library(&pdfium_dir, &runtime_lib_name, runtime_subdir).is_ok();
|
|
151
|
+
|
|
40
152
|
let import_lib_exists = if target.contains("windows") {
|
|
41
153
|
let lib_dir = pdfium_dir.join("lib");
|
|
42
154
|
lib_dir.join("pdfium.lib").exists() || lib_dir.join("pdfium.dll.lib").exists()
|
|
@@ -44,14 +156,15 @@ fn main() {
|
|
|
44
156
|
true
|
|
45
157
|
};
|
|
46
158
|
|
|
47
|
-
if !
|
|
159
|
+
if !lib_found || !import_lib_exists {
|
|
48
160
|
tracing::debug!("Pdfium library not found, downloading for target: {}", target);
|
|
49
161
|
tracing::debug!("Download URL: {}", download_url);
|
|
50
162
|
download_and_extract_pdfium(&download_url, &pdfium_dir);
|
|
51
163
|
} else {
|
|
52
|
-
tracing::debug!("Pdfium library already
|
|
164
|
+
tracing::debug!("Pdfium library already cached at {}", pdfium_dir.display());
|
|
53
165
|
}
|
|
54
166
|
|
|
167
|
+
// Windows-specific: ensure pdfium.lib exists
|
|
55
168
|
if target.contains("windows") {
|
|
56
169
|
let lib_dir = pdfium_dir.join("lib");
|
|
57
170
|
let dll_lib = lib_dir.join("pdfium.dll.lib");
|
|
@@ -63,41 +176,17 @@ fn main() {
|
|
|
63
176
|
}
|
|
64
177
|
}
|
|
65
178
|
|
|
66
|
-
|
|
67
|
-
println!("cargo:rustc-link-search=native={}", lib_dir.display());
|
|
68
|
-
|
|
69
|
-
// WASM requires static linking
|
|
70
|
-
let link_type = if target.contains("wasm") { "static" } else { "dylib" };
|
|
71
|
-
println!("cargo:rustc-link-lib={}={}", link_type, lib_name);
|
|
72
|
-
|
|
73
|
-
if target.contains("darwin") {
|
|
74
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
75
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
76
|
-
} else if target.contains("linux") {
|
|
77
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
78
|
-
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
copy_lib_to_package(&pdfium_dir, &target);
|
|
82
|
-
|
|
83
|
-
if target.contains("darwin") {
|
|
84
|
-
println!("cargo:rustc-link-lib=framework=CoreFoundation");
|
|
85
|
-
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
|
86
|
-
println!("cargo:rustc-link-lib=framework=CoreText");
|
|
87
|
-
println!("cargo:rustc-link-lib=framework=AppKit");
|
|
88
|
-
println!("cargo:rustc-link-lib=dylib=c++");
|
|
89
|
-
} else if target.contains("linux") {
|
|
90
|
-
println!("cargo:rustc-link-lib=dylib=stdc++");
|
|
91
|
-
println!("cargo:rustc-link-lib=dylib=m");
|
|
92
|
-
} else if target.contains("windows") {
|
|
93
|
-
println!("cargo:rustc-link-lib=dylib=gdi32");
|
|
94
|
-
println!("cargo:rustc-link-lib=dylib=user32");
|
|
95
|
-
println!("cargo:rustc-link-lib=dylib=advapi32");
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
println!("cargo:rerun-if-changed=build.rs");
|
|
179
|
+
pdfium_dir
|
|
99
180
|
}
|
|
100
181
|
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// DOWNLOAD UTILITIES
|
|
184
|
+
// ============================================================================
|
|
185
|
+
|
|
186
|
+
/// Fetch the latest release version from a GitHub repository
|
|
187
|
+
///
|
|
188
|
+
/// Uses curl to query the GitHub API and extract the tag_name from the
|
|
189
|
+
/// latest release JSON response. Falls back to "7529" if API call fails.
|
|
101
190
|
fn get_latest_version(repo: &str) -> String {
|
|
102
191
|
let api_url = format!("https://api.github.com/repos/{}/releases/latest", repo);
|
|
103
192
|
|
|
@@ -122,6 +211,12 @@ fn get_latest_version(repo: &str) -> String {
|
|
|
122
211
|
"7529".to_string()
|
|
123
212
|
}
|
|
124
213
|
|
|
214
|
+
/// Get the download URL and library name for the target platform
|
|
215
|
+
///
|
|
216
|
+
/// Determines platform/architecture from target triple and constructs
|
|
217
|
+
/// the appropriate GitHub release download URL. Supports:
|
|
218
|
+
/// - WASM: paulocoutinhox/pdfium-lib
|
|
219
|
+
/// - Other platforms: bblanchon/pdfium-binaries
|
|
125
220
|
fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
126
221
|
if target.contains("wasm") {
|
|
127
222
|
let version = env::var("PDFIUM_WASM_VERSION")
|
|
@@ -180,6 +275,15 @@ fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
|
|
|
180
275
|
(url, "pdfium".to_string())
|
|
181
276
|
}
|
|
182
277
|
|
|
278
|
+
/// Download and extract PDFium archive with retry logic
|
|
279
|
+
///
|
|
280
|
+
/// Features:
|
|
281
|
+
/// - Exponential backoff retry (configurable via env vars)
|
|
282
|
+
/// - File type validation (gzip check)
|
|
283
|
+
/// - Windows-specific import library handling (pdfium.dll.lib -> pdfium.lib)
|
|
284
|
+
/// - Environment variables:
|
|
285
|
+
/// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: number of retries (default: 5)
|
|
286
|
+
/// - KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS: initial backoff in seconds (default: 2)
|
|
183
287
|
fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
|
|
184
288
|
fs::create_dir_all(dest_dir).expect("Failed to create pdfium directory");
|
|
185
289
|
|
|
@@ -291,15 +395,404 @@ fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
|
|
|
291
395
|
tracing::debug!("Pdfium downloaded and extracted successfully");
|
|
292
396
|
}
|
|
293
397
|
|
|
294
|
-
|
|
398
|
+
// ============================================================================
|
|
399
|
+
// PREBUILT HANDLING
|
|
400
|
+
// ============================================================================
|
|
401
|
+
|
|
402
|
+
/// Prepare prebuilt PDFium by copying to destination directory
|
|
403
|
+
///
|
|
404
|
+
/// Removes existing destination if present, then recursively copies
|
|
405
|
+
/// all files from prebuilt source to destination.
|
|
406
|
+
fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
|
|
407
|
+
if dest_dir.exists() {
|
|
408
|
+
fs::remove_dir_all(dest_dir)?;
|
|
409
|
+
}
|
|
410
|
+
copy_dir_all(prebuilt_src, dest_dir)
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/// Recursively copy directory tree
|
|
414
|
+
///
|
|
415
|
+
/// Used by `prepare_prebuilt_pdfium()` to copy entire pdfium directory
|
|
416
|
+
/// structure, preserving all files and subdirectories.
|
|
417
|
+
fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
|
|
418
|
+
fs::create_dir_all(dst)?;
|
|
419
|
+
for entry in fs::read_dir(src)? {
|
|
420
|
+
let entry = entry?;
|
|
421
|
+
let file_type = entry.file_type()?;
|
|
422
|
+
let target_path = dst.join(entry.file_name());
|
|
423
|
+
if file_type.is_dir() {
|
|
424
|
+
copy_dir_all(&entry.path(), &target_path)?;
|
|
425
|
+
} else {
|
|
426
|
+
fs::copy(entry.path(), &target_path)?;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
Ok(())
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// ============================================================================
|
|
433
|
+
// PLATFORM UTILITIES
|
|
434
|
+
// ============================================================================
|
|
435
|
+
|
|
436
|
+
/// Get platform-specific runtime library name and subdirectory
|
|
437
|
+
///
|
|
438
|
+
/// Returns tuple of (library_name, subdirectory) for the target platform:
|
|
439
|
+
/// - WASM: ("libpdfium.a", "lib")
|
|
440
|
+
/// - Windows: ("pdfium.dll", "bin")
|
|
441
|
+
/// - macOS: ("libpdfium.dylib", "lib")
|
|
442
|
+
/// - Linux: ("libpdfium.so", "lib")
|
|
443
|
+
fn runtime_library_info(target: &str) -> (String, &'static str) {
|
|
444
|
+
if target.contains("wasm") {
|
|
445
|
+
("libpdfium.a".to_string(), "lib")
|
|
446
|
+
} else if target.contains("windows") {
|
|
447
|
+
("pdfium.dll".to_string(), "bin")
|
|
448
|
+
} else if target.contains("darwin") {
|
|
449
|
+
("libpdfium.dylib".to_string(), "lib")
|
|
450
|
+
} else {
|
|
451
|
+
("libpdfium.so".to_string(), "lib")
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/// Find PDFium library in archive with flexible directory detection
|
|
456
|
+
///
|
|
457
|
+
/// Attempts to locate the library at multiple possible locations:
|
|
458
|
+
/// - {subdir}/{lib_name} (standard location)
|
|
459
|
+
/// - {lib_name} (root of archive)
|
|
460
|
+
/// - bin/{lib_name} (alternative location)
|
|
461
|
+
/// - lib/{lib_name} (explicit lib directory)
|
|
462
|
+
///
|
|
463
|
+
/// This handles variations in archive structure across different platform builds,
|
|
464
|
+
/// particularly macOS ARM64 where the archive structure may differ.
|
|
465
|
+
///
|
|
466
|
+
/// Returns the full path to the library if found, or an error with available files.
|
|
467
|
+
fn find_pdfium_library(pdfium_dir: &Path, lib_name: &str, expected_subdir: &str) -> Result<PathBuf, String> {
|
|
468
|
+
// Candidates in priority order
|
|
469
|
+
let candidates = [
|
|
470
|
+
pdfium_dir.join(expected_subdir).join(lib_name), // Standard: lib/libpdfium.dylib
|
|
471
|
+
pdfium_dir.join(lib_name), // Root: libpdfium.dylib
|
|
472
|
+
pdfium_dir.join("bin").join(lib_name), // Alternative: bin/libpdfium.dylib
|
|
473
|
+
pdfium_dir.join("lib").join(lib_name), // Explicit lib: lib/libpdfium.dylib
|
|
474
|
+
];
|
|
475
|
+
|
|
476
|
+
// Try each candidate
|
|
477
|
+
for candidate in &candidates {
|
|
478
|
+
if candidate.exists() {
|
|
479
|
+
tracing::debug!("Found PDFium library at: {}", candidate.display());
|
|
480
|
+
return Ok(candidate.clone());
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Library not found - provide detailed error with directory listing
|
|
485
|
+
let mut error_msg = format!(
|
|
486
|
+
"PDFium library not found at expected location: {}/{}\n\n",
|
|
487
|
+
pdfium_dir.display(),
|
|
488
|
+
expected_subdir
|
|
489
|
+
);
|
|
490
|
+
error_msg.push_str("Attempted locations:\n");
|
|
491
|
+
for candidate in &candidates {
|
|
492
|
+
error_msg.push_str(&format!(" - {}\n", candidate.display()));
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// List actual contents of pdfium directory for debugging
|
|
496
|
+
error_msg.push_str("\nActual archive contents:\n");
|
|
497
|
+
if let Ok(entries) = fs::read_dir(pdfium_dir) {
|
|
498
|
+
for entry in entries.flatten() {
|
|
499
|
+
let path = entry.path();
|
|
500
|
+
let file_type = if path.is_dir() { "dir" } else { "file" };
|
|
501
|
+
error_msg.push_str(&format!(" {} ({})\n", path.display(), file_type));
|
|
502
|
+
|
|
503
|
+
// Show contents of subdirectories
|
|
504
|
+
if path.is_dir()
|
|
505
|
+
&& let Ok(sub_entries) = fs::read_dir(&path)
|
|
506
|
+
{
|
|
507
|
+
for sub_entry in sub_entries.flatten() {
|
|
508
|
+
let sub_path = sub_entry.path();
|
|
509
|
+
let sub_type = if sub_path.is_dir() { "dir" } else { "file" };
|
|
510
|
+
error_msg.push_str(&format!(" {} ({})\n", sub_path.display(), sub_type));
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
Err(error_msg)
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/// Fix macOS install name (rpath) for dynamic library
|
|
520
|
+
///
|
|
521
|
+
/// Uses install_name_tool to set the install name to @rpath/{lib_name}
|
|
522
|
+
/// to enable relative path loading on macOS.
|
|
523
|
+
fn fix_macos_install_name(lib_path: &Path, lib_name: &str) {
|
|
524
|
+
let new_install_name = format!("@rpath/{}", lib_name);
|
|
525
|
+
|
|
526
|
+
tracing::debug!("Fixing install_name for {} to {}", lib_path.display(), new_install_name);
|
|
527
|
+
|
|
528
|
+
let status = Command::new("install_name_tool")
|
|
529
|
+
.arg("-id")
|
|
530
|
+
.arg(&new_install_name)
|
|
531
|
+
.arg(lib_path)
|
|
532
|
+
.status();
|
|
533
|
+
|
|
534
|
+
match status {
|
|
535
|
+
Ok(s) if s.success() => {
|
|
536
|
+
tracing::debug!("Successfully updated install_name");
|
|
537
|
+
}
|
|
538
|
+
Ok(s) => {
|
|
539
|
+
tracing::debug!("install_name_tool failed with status: {}", s);
|
|
540
|
+
}
|
|
541
|
+
Err(e) => {
|
|
542
|
+
tracing::debug!("Failed to run install_name_tool: {}", e);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/// Code sign binary on macOS if needed
|
|
548
|
+
///
|
|
549
|
+
/// Uses codesign to sign the binary. Identity from KREUZBERG_CODESIGN_IDENTITY
|
|
550
|
+
/// env var (default: "-" for adhoc signing). Only runs on apple-darwin targets.
|
|
551
|
+
fn codesign_if_needed(target: &str, binary: &Path) {
|
|
552
|
+
if !target.contains("apple-darwin") || !binary.exists() {
|
|
553
|
+
return;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
let identity = env::var("KREUZBERG_CODESIGN_IDENTITY").unwrap_or_else(|_| "-".to_string());
|
|
557
|
+
let status = Command::new("codesign")
|
|
558
|
+
.arg("--force")
|
|
559
|
+
.arg("--timestamp=none")
|
|
560
|
+
.arg("--sign")
|
|
561
|
+
.arg(identity)
|
|
562
|
+
.arg(binary)
|
|
563
|
+
.status();
|
|
564
|
+
|
|
565
|
+
match status {
|
|
566
|
+
Ok(result) if result.success() => {
|
|
567
|
+
tracing::debug!("Codesigned {}", binary.display());
|
|
568
|
+
}
|
|
569
|
+
Ok(result) => {
|
|
570
|
+
tracing::debug!(
|
|
571
|
+
"codesign exited with status {} while signing {}",
|
|
572
|
+
result,
|
|
573
|
+
binary.display()
|
|
574
|
+
);
|
|
575
|
+
}
|
|
576
|
+
Err(err) => {
|
|
577
|
+
tracing::debug!("Failed to run codesign for {}: {}", binary.display(), err);
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// ============================================================================
|
|
583
|
+
// LINKING STRATEGIES
|
|
584
|
+
// ============================================================================
|
|
585
|
+
|
|
586
|
+
/// Link PDFium dynamically (default)
|
|
587
|
+
///
|
|
588
|
+
/// Sets up linker to use PDFium as a dynamic library (.dylib/.so/.dll)
|
|
589
|
+
/// with platform-specific rpath configuration for runtime library discovery.
|
|
590
|
+
/// Supports flexible archive structures by adding multiple possible lib directories.
|
|
591
|
+
fn link_dynamically(pdfium_dir: &Path, target: &str) {
|
|
592
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
593
|
+
|
|
594
|
+
// Find the actual library location (handles multiple possible archive structures)
|
|
595
|
+
let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
596
|
+
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
597
|
+
Err(err) => panic!("{}", err),
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
601
|
+
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
602
|
+
|
|
603
|
+
// Also add standard lib directory for compatibility
|
|
604
|
+
let std_lib_dir = pdfium_dir.join("lib");
|
|
605
|
+
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
606
|
+
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// Add bin directory for platforms where it might be needed
|
|
610
|
+
let bin_dir = pdfium_dir.join("bin");
|
|
611
|
+
if bin_dir.exists() && bin_dir != lib_path {
|
|
612
|
+
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Set rpath for dynamic linking
|
|
616
|
+
if target.contains("darwin") {
|
|
617
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
|
618
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
|
|
619
|
+
} else if target.contains("linux") {
|
|
620
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
|
|
621
|
+
println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/// Link PDFium statically (pdf-static feature)
|
|
626
|
+
///
|
|
627
|
+
/// Embeds PDFium into the binary as a static library. Adds system
|
|
628
|
+
/// dependencies required for static linking on Linux.
|
|
629
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
630
|
+
fn link_statically(pdfium_dir: &Path, target: &str) {
|
|
631
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
632
|
+
|
|
633
|
+
// Find the actual library location (handles multiple possible archive structures)
|
|
634
|
+
let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
635
|
+
Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
|
|
636
|
+
Err(err) => panic!("{}", err),
|
|
637
|
+
};
|
|
638
|
+
|
|
639
|
+
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
|
640
|
+
println!("cargo:rustc-link-lib=static=pdfium");
|
|
641
|
+
|
|
642
|
+
// Also add standard lib directory for compatibility
|
|
643
|
+
let std_lib_dir = pdfium_dir.join("lib");
|
|
644
|
+
if std_lib_dir.exists() && std_lib_dir != lib_path {
|
|
645
|
+
println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Add bin directory for platforms where it might be needed
|
|
649
|
+
let bin_dir = pdfium_dir.join("bin");
|
|
650
|
+
if bin_dir.exists() && bin_dir != lib_path {
|
|
651
|
+
println!("cargo:rustc-link-search=native={}", bin_dir.display());
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// Static linking requires additional system dependencies
|
|
655
|
+
if target.contains("linux") {
|
|
656
|
+
// Linux requires additional libraries for static linking
|
|
657
|
+
println!("cargo:rustc-link-lib=dylib=pthread");
|
|
658
|
+
println!("cargo:rustc-link-lib=dylib=dl");
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/// Link PDFium bundled (pdf-bundled feature)
|
|
663
|
+
///
|
|
664
|
+
/// Links dynamically but copies library to OUT_DIR for embedding in binary.
|
|
665
|
+
/// Each binary extracts and uses its own copy of the PDFium library.
|
|
666
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
667
|
+
fn link_bundled(pdfium_dir: &Path, target: &str, out_dir: &Path) {
|
|
668
|
+
// Link dynamically for build
|
|
669
|
+
link_dynamically(pdfium_dir, target);
|
|
670
|
+
|
|
671
|
+
// Copy library to OUT_DIR for bundling using flexible detection
|
|
295
672
|
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
296
|
-
let src_lib = pdfium_dir
|
|
673
|
+
let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
674
|
+
Ok(path) => path,
|
|
675
|
+
Err(err) => panic!("{}", err),
|
|
676
|
+
};
|
|
677
|
+
let bundled_lib = out_dir.join(&runtime_lib_name);
|
|
678
|
+
|
|
679
|
+
fs::copy(&src_lib, &bundled_lib)
|
|
680
|
+
.unwrap_or_else(|err| panic!("Failed to copy library to OUT_DIR for bundling: {}", err));
|
|
681
|
+
|
|
682
|
+
// Emit environment variable with bundled library path
|
|
683
|
+
let bundled_path = bundled_lib
|
|
684
|
+
.to_str()
|
|
685
|
+
.unwrap_or_else(|| panic!("Non-UTF8 path for bundled library: {}", bundled_lib.display()));
|
|
686
|
+
println!("cargo:rustc-env=KREUZBERG_PDFIUM_BUNDLED_PATH={}", bundled_path);
|
|
687
|
+
|
|
688
|
+
tracing::debug!("Bundled PDFium library at: {}", bundled_path);
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
/// Link system-installed PDFium (pdf-system feature)
|
|
692
|
+
///
|
|
693
|
+
/// Attempts to find PDFium via pkg-config first, then falls back to
|
|
694
|
+
/// environment variables (KREUZBERG_PDFIUM_SYSTEM_PATH, KREUZBERG_PDFIUM_SYSTEM_INCLUDE).
|
|
695
|
+
fn link_system(_target: &str) {
|
|
696
|
+
// Try pkg-config first
|
|
697
|
+
match pkg_config::Config::new().atleast_version("5.0").probe("pdfium") {
|
|
698
|
+
Ok(library) => {
|
|
699
|
+
tracing::debug!("Found system pdfium via pkg-config");
|
|
700
|
+
for include_path in &library.include_paths {
|
|
701
|
+
println!("cargo:include={}", include_path.display());
|
|
702
|
+
}
|
|
703
|
+
return;
|
|
704
|
+
}
|
|
705
|
+
Err(err) => {
|
|
706
|
+
tracing::debug!("pkg-config probe failed: {}", err);
|
|
707
|
+
}
|
|
708
|
+
}
|
|
297
709
|
|
|
298
|
-
|
|
299
|
-
|
|
710
|
+
// Fallback to environment variables
|
|
711
|
+
let lib_path = env::var("KREUZBERG_PDFIUM_SYSTEM_PATH").ok();
|
|
712
|
+
let include_path = env::var("KREUZBERG_PDFIUM_SYSTEM_INCLUDE").ok();
|
|
713
|
+
|
|
714
|
+
if let Some(lib_dir) = lib_path {
|
|
715
|
+
let lib_dir_path = PathBuf::from(&lib_dir);
|
|
716
|
+
if !lib_dir_path.exists() {
|
|
717
|
+
panic!(
|
|
718
|
+
"KREUZBERG_PDFIUM_SYSTEM_PATH points to '{}' but the directory does not exist",
|
|
719
|
+
lib_dir
|
|
720
|
+
);
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
println!("cargo:rustc-link-search=native={}", lib_dir);
|
|
724
|
+
println!("cargo:rustc-link-lib=dylib=pdfium");
|
|
725
|
+
|
|
726
|
+
if let Some(inc_dir) = include_path {
|
|
727
|
+
println!("cargo:include={}", inc_dir);
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
tracing::debug!("Using system pdfium from: {}", lib_dir);
|
|
300
731
|
return;
|
|
301
732
|
}
|
|
302
733
|
|
|
734
|
+
// No system pdfium found
|
|
735
|
+
panic!(
|
|
736
|
+
"pdf-system feature enabled but pdfium not found.\n\
|
|
737
|
+
\n\
|
|
738
|
+
Please install pdfium system-wide or provide:\n\
|
|
739
|
+
- KREUZBERG_PDFIUM_SYSTEM_PATH: path to directory containing libpdfium\n\
|
|
740
|
+
- KREUZBERG_PDFIUM_SYSTEM_INCLUDE: path to pdfium headers (optional)\n\
|
|
741
|
+
\n\
|
|
742
|
+
Alternatively, use a different linking strategy:\n\
|
|
743
|
+
- Default (dynamic): cargo build --features pdf\n\
|
|
744
|
+
- Static linking: cargo build --features pdf,pdf-static\n\
|
|
745
|
+
- Bundled: cargo build --features pdf,pdf-bundled"
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/// Link system frameworks and standard libraries
|
|
750
|
+
///
|
|
751
|
+
/// Adds platform-specific system libraries required for PDFium linking:
|
|
752
|
+
/// - macOS: CoreFoundation, CoreGraphics, CoreText, AppKit, libc++
|
|
753
|
+
/// - Linux: stdc++, libm
|
|
754
|
+
/// - Windows: gdi32, user32, advapi32
|
|
755
|
+
fn link_system_frameworks(target: &str) {
|
|
756
|
+
if target.contains("darwin") {
|
|
757
|
+
println!("cargo:rustc-link-lib=framework=CoreFoundation");
|
|
758
|
+
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
|
759
|
+
println!("cargo:rustc-link-lib=framework=CoreText");
|
|
760
|
+
println!("cargo:rustc-link-lib=framework=AppKit");
|
|
761
|
+
println!("cargo:rustc-link-lib=dylib=c++");
|
|
762
|
+
} else if target.contains("linux") {
|
|
763
|
+
println!("cargo:rustc-link-lib=dylib=stdc++");
|
|
764
|
+
println!("cargo:rustc-link-lib=dylib=m");
|
|
765
|
+
} else if target.contains("windows") {
|
|
766
|
+
println!("cargo:rustc-link-lib=dylib=gdi32");
|
|
767
|
+
println!("cargo:rustc-link-lib=dylib=user32");
|
|
768
|
+
println!("cargo:rustc-link-lib=dylib=advapi32");
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// ============================================================================
|
|
773
|
+
// LIBRARY DISTRIBUTION
|
|
774
|
+
// ============================================================================
|
|
775
|
+
|
|
776
|
+
/// Copy PDFium library to various package directories
|
|
777
|
+
///
|
|
778
|
+
/// Distributes the compiled/downloaded PDFium library to:
|
|
779
|
+
/// - CLI target directories (debug/release)
|
|
780
|
+
/// - Python package directory
|
|
781
|
+
/// - Node.js package directory
|
|
782
|
+
/// - Ruby gem directory
|
|
783
|
+
///
|
|
784
|
+
/// On macOS, also fixes install_name and applies code signing.
|
|
785
|
+
/// Supports flexible archive structures by finding library in multiple locations.
|
|
786
|
+
fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
|
|
787
|
+
let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
|
|
788
|
+
let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
|
|
789
|
+
Ok(path) => path,
|
|
790
|
+
Err(err) => {
|
|
791
|
+
tracing::debug!("Failed to locate PDFium library: {}", err);
|
|
792
|
+
return;
|
|
793
|
+
}
|
|
794
|
+
};
|
|
795
|
+
|
|
303
796
|
if target.contains("darwin") {
|
|
304
797
|
fix_macos_install_name(&src_lib, &runtime_lib_name);
|
|
305
798
|
codesign_if_needed(target, &src_lib);
|
|
@@ -368,6 +861,10 @@ fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
|
|
|
368
861
|
}
|
|
369
862
|
}
|
|
370
863
|
|
|
864
|
+
/// Copy library to destination if needed (based on modification time)
|
|
865
|
+
///
|
|
866
|
+
/// Only copies if destination doesn't exist or source is newer than destination.
|
|
867
|
+
/// Applies platform-specific post-processing (code signing on macOS).
|
|
371
868
|
fn copy_lib_if_needed(src: &Path, dest: &Path, package_name: &str, target: &str) {
|
|
372
869
|
use std::fs;
|
|
373
870
|
|
|
@@ -392,94 +889,3 @@ fn copy_lib_if_needed(src: &Path, dest: &Path, package_name: &str, target: &str)
|
|
|
392
889
|
}
|
|
393
890
|
}
|
|
394
891
|
}
|
|
395
|
-
|
|
396
|
-
fn codesign_if_needed(target: &str, binary: &Path) {
|
|
397
|
-
if !target.contains("apple-darwin") || !binary.exists() {
|
|
398
|
-
return;
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
let identity = env::var("KREUZBERG_CODESIGN_IDENTITY").unwrap_or_else(|_| "-".to_string());
|
|
402
|
-
let status = Command::new("codesign")
|
|
403
|
-
.arg("--force")
|
|
404
|
-
.arg("--timestamp=none")
|
|
405
|
-
.arg("--sign")
|
|
406
|
-
.arg(identity)
|
|
407
|
-
.arg(binary)
|
|
408
|
-
.status();
|
|
409
|
-
|
|
410
|
-
match status {
|
|
411
|
-
Ok(result) if result.success() => {
|
|
412
|
-
tracing::debug!("Codesigned {}", binary.display());
|
|
413
|
-
}
|
|
414
|
-
Ok(result) => {
|
|
415
|
-
tracing::debug!(
|
|
416
|
-
"codesign exited with status {} while signing {}",
|
|
417
|
-
result,
|
|
418
|
-
binary.display()
|
|
419
|
-
);
|
|
420
|
-
}
|
|
421
|
-
Err(err) => {
|
|
422
|
-
tracing::debug!("Failed to run codesign for {}: {}", binary.display(), err);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
fn runtime_library_info(target: &str) -> (String, &'static str) {
|
|
428
|
-
if target.contains("wasm") {
|
|
429
|
-
("libpdfium.a".to_string(), "lib")
|
|
430
|
-
} else if target.contains("windows") {
|
|
431
|
-
("pdfium.dll".to_string(), "bin")
|
|
432
|
-
} else if target.contains("darwin") {
|
|
433
|
-
("libpdfium.dylib".to_string(), "lib")
|
|
434
|
-
} else {
|
|
435
|
-
("libpdfium.so".to_string(), "lib")
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
|
|
440
|
-
if dest_dir.exists() {
|
|
441
|
-
fs::remove_dir_all(dest_dir)?;
|
|
442
|
-
}
|
|
443
|
-
copy_dir_all(prebuilt_src, dest_dir)
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
|
|
447
|
-
fs::create_dir_all(dst)?;
|
|
448
|
-
for entry in fs::read_dir(src)? {
|
|
449
|
-
let entry = entry?;
|
|
450
|
-
let file_type = entry.file_type()?;
|
|
451
|
-
let target_path = dst.join(entry.file_name());
|
|
452
|
-
if file_type.is_dir() {
|
|
453
|
-
copy_dir_all(&entry.path(), &target_path)?;
|
|
454
|
-
} else {
|
|
455
|
-
fs::copy(entry.path(), &target_path)?;
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
Ok(())
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
fn fix_macos_install_name(lib_path: &Path, lib_name: &str) {
|
|
462
|
-
use std::process::Command;
|
|
463
|
-
|
|
464
|
-
let new_install_name = format!("@rpath/{}", lib_name);
|
|
465
|
-
|
|
466
|
-
tracing::debug!("Fixing install_name for {} to {}", lib_path.display(), new_install_name);
|
|
467
|
-
|
|
468
|
-
let status = Command::new("install_name_tool")
|
|
469
|
-
.arg("-id")
|
|
470
|
-
.arg(&new_install_name)
|
|
471
|
-
.arg(lib_path)
|
|
472
|
-
.status();
|
|
473
|
-
|
|
474
|
-
match status {
|
|
475
|
-
Ok(s) if s.success() => {
|
|
476
|
-
tracing::debug!("Successfully updated install_name");
|
|
477
|
-
}
|
|
478
|
-
Ok(s) => {
|
|
479
|
-
tracing::debug!("install_name_tool failed with status: {}", s);
|
|
480
|
-
}
|
|
481
|
-
Err(e) => {
|
|
482
|
-
tracing::debug!("Failed to run install_name_tool: {}", e);
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
}
|