kreuzberg 4.0.0.pre.rc.7 → 4.0.0.pre.rc.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,23 @@ use std::process::Command;
6
6
  use std::thread;
7
7
  use std::time::Duration;
8
8
 
9
+ /// PDFium linking strategy
10
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
11
+ enum PdfiumLinkStrategy {
12
+ /// Download and link dynamically (default behavior)
13
+ DownloadDynamic,
14
+ /// Download and link statically (pdf-static feature)
15
+ DownloadStatic,
16
+ /// Download, link dynamically, and embed in binary (pdf-bundled feature)
17
+ Bundled,
18
+ /// Use system-installed pdfium via pkg-config (pdf-system feature)
19
+ System,
20
+ }
21
+
22
+ // ============================================================================
23
+ // MAIN BUILD ORCHESTRATION
24
+ // ============================================================================
25
+
9
26
  fn main() {
10
27
  let target = env::var("TARGET").unwrap();
11
28
  let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
@@ -18,15 +35,108 @@ fn main() {
18
35
  return;
19
36
  }
20
37
 
21
- let (download_url, lib_name) = get_pdfium_url_and_lib(&target);
38
+ validate_feature_exclusivity();
39
+ let strategy = determine_link_strategy(&target);
40
+
41
+ tracing::debug!("Using PDFium linking strategy: {:?}", strategy);
42
+
43
+ match strategy {
44
+ PdfiumLinkStrategy::DownloadDynamic => {
45
+ let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
46
+ link_dynamically(&pdfium_dir, &target);
47
+ copy_lib_to_package(&pdfium_dir, &target);
48
+ }
49
+ PdfiumLinkStrategy::DownloadStatic => {
50
+ let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
51
+ link_statically(&pdfium_dir, &target);
52
+ // Skip copy_lib_to_package - library embedded in binary
53
+ }
54
+ PdfiumLinkStrategy::Bundled => {
55
+ let pdfium_dir = download_or_use_prebuilt(&target, &out_dir);
56
+ link_bundled(&pdfium_dir, &target, &out_dir);
57
+ // Skip copy_lib_to_package - each binary extracts its own
58
+ }
59
+ PdfiumLinkStrategy::System => {
60
+ link_system(&target);
61
+ // No download or copy needed
62
+ }
63
+ }
64
+
65
+ link_system_frameworks(&target);
66
+ println!("cargo:rerun-if-changed=build.rs");
67
+ }
68
+
69
+ // ============================================================================
70
+ // FEATURE & STRATEGY VALIDATION
71
+ // ============================================================================
72
+
73
+ /// Validate that only one linking strategy feature is enabled at a time
74
+ fn validate_feature_exclusivity() {
75
+ let strategies = [
76
+ cfg!(feature = "pdf-static"),
77
+ cfg!(feature = "pdf-bundled"),
78
+ cfg!(feature = "pdf-system"),
79
+ ];
80
+ let count = strategies.iter().filter(|&&x| x).count();
81
+
82
+ if count > 1 {
83
+ panic!(
84
+ "Only one of pdf-static, pdf-bundled, pdf-system can be enabled at once.\n\
85
+ Please choose a single PDFium linking strategy."
86
+ );
87
+ }
88
+ }
89
+
90
+ /// Determine which linking strategy to use based on features and target
91
+ fn determine_link_strategy(target: &str) -> PdfiumLinkStrategy {
92
+ // WASM always uses static linking
93
+ if target.contains("wasm") {
94
+ return PdfiumLinkStrategy::DownloadStatic;
95
+ }
96
+
97
+ // Feature-based strategy selection (priority order)
98
+ if cfg!(feature = "pdf-system") {
99
+ return PdfiumLinkStrategy::System;
100
+ }
101
+ if cfg!(feature = "pdf-bundled") {
102
+ return PdfiumLinkStrategy::Bundled;
103
+ }
104
+ if cfg!(feature = "pdf-static") {
105
+ return PdfiumLinkStrategy::DownloadStatic;
106
+ }
22
107
 
108
+ // Default: download and link dynamically
109
+ PdfiumLinkStrategy::DownloadDynamic
110
+ }
111
+
112
+ // ============================================================================
113
+ // DOWNLOAD & PREBUILT ORCHESTRATION
114
+ // ============================================================================
115
+
116
+ /// Download PDFium or use prebuilt directory
117
+ ///
118
+ /// This is the main orchestrator function that:
119
+ /// 1. Checks for `KREUZBERG_PDFIUM_PREBUILT` environment variable
120
+ /// 2. If set and valid, uses prebuilt pdfium directory
121
+ /// 3. If not set, downloads pdfium to out_dir (with caching)
122
+ /// 4. Returns PathBuf to pdfium directory
123
+ ///
124
+ /// Reuses all existing helper functions:
125
+ /// - `get_pdfium_url_and_lib()` - determines download URL for target
126
+ /// - `download_and_extract_pdfium()` - downloads with retry logic
127
+ /// - `runtime_library_info()` - platform-specific library names
128
+ /// - `prepare_prebuilt_pdfium()` - handles prebuilt copy
129
+ fn download_or_use_prebuilt(target: &str, out_dir: &Path) -> PathBuf {
130
+ let (download_url, _lib_name) = get_pdfium_url_and_lib(target);
23
131
  let pdfium_dir = out_dir.join("pdfium");
24
132
 
133
+ // Check for prebuilt pdfium directory
25
134
  if let Some(prebuilt) = env::var_os("KREUZBERG_PDFIUM_PREBUILT") {
26
135
  let prebuilt_path = PathBuf::from(prebuilt);
27
136
  if prebuilt_path.exists() {
28
137
  prepare_prebuilt_pdfium(&prebuilt_path, &pdfium_dir)
29
138
  .unwrap_or_else(|err| panic!("Failed to copy Pdfium from {}: {}", prebuilt_path.display(), err));
139
+ return pdfium_dir;
30
140
  } else {
31
141
  panic!(
32
142
  "Environment variable KREUZBERG_PDFIUM_PREBUILT points to '{}' but the directory does not exist",
@@ -35,8 +145,10 @@ fn main() {
35
145
  }
36
146
  }
37
147
 
38
- let (runtime_lib_name, runtime_subdir) = runtime_library_info(&target);
39
- let runtime_lib_path = pdfium_dir.join(runtime_subdir).join(&runtime_lib_name);
148
+ // Check if library already exists (cache validation) using flexible detection
149
+ let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
150
+ let lib_found = find_pdfium_library(&pdfium_dir, &runtime_lib_name, runtime_subdir).is_ok();
151
+
40
152
  let import_lib_exists = if target.contains("windows") {
41
153
  let lib_dir = pdfium_dir.join("lib");
42
154
  lib_dir.join("pdfium.lib").exists() || lib_dir.join("pdfium.dll.lib").exists()
@@ -44,14 +156,15 @@ fn main() {
44
156
  true
45
157
  };
46
158
 
47
- if !runtime_lib_path.exists() || !import_lib_exists {
159
+ if !lib_found || !import_lib_exists {
48
160
  tracing::debug!("Pdfium library not found, downloading for target: {}", target);
49
161
  tracing::debug!("Download URL: {}", download_url);
50
162
  download_and_extract_pdfium(&download_url, &pdfium_dir);
51
163
  } else {
52
- tracing::debug!("Pdfium library already present at {}", runtime_lib_path.display());
164
+ tracing::debug!("Pdfium library already cached at {}", pdfium_dir.display());
53
165
  }
54
166
 
167
+ // Windows-specific: ensure pdfium.lib exists
55
168
  if target.contains("windows") {
56
169
  let lib_dir = pdfium_dir.join("lib");
57
170
  let dll_lib = lib_dir.join("pdfium.dll.lib");
@@ -63,41 +176,17 @@ fn main() {
63
176
  }
64
177
  }
65
178
 
66
- let lib_dir = pdfium_dir.join("lib");
67
- println!("cargo:rustc-link-search=native={}", lib_dir.display());
68
-
69
- // WASM requires static linking
70
- let link_type = if target.contains("wasm") { "static" } else { "dylib" };
71
- println!("cargo:rustc-link-lib={}={}", link_type, lib_name);
72
-
73
- if target.contains("darwin") {
74
- println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
75
- println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
76
- } else if target.contains("linux") {
77
- println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
78
- println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
79
- }
80
-
81
- copy_lib_to_package(&pdfium_dir, &target);
82
-
83
- if target.contains("darwin") {
84
- println!("cargo:rustc-link-lib=framework=CoreFoundation");
85
- println!("cargo:rustc-link-lib=framework=CoreGraphics");
86
- println!("cargo:rustc-link-lib=framework=CoreText");
87
- println!("cargo:rustc-link-lib=framework=AppKit");
88
- println!("cargo:rustc-link-lib=dylib=c++");
89
- } else if target.contains("linux") {
90
- println!("cargo:rustc-link-lib=dylib=stdc++");
91
- println!("cargo:rustc-link-lib=dylib=m");
92
- } else if target.contains("windows") {
93
- println!("cargo:rustc-link-lib=dylib=gdi32");
94
- println!("cargo:rustc-link-lib=dylib=user32");
95
- println!("cargo:rustc-link-lib=dylib=advapi32");
96
- }
97
-
98
- println!("cargo:rerun-if-changed=build.rs");
179
+ pdfium_dir
99
180
  }
100
181
 
182
+ // ============================================================================
183
+ // DOWNLOAD UTILITIES
184
+ // ============================================================================
185
+
186
+ /// Fetch the latest release version from a GitHub repository
187
+ ///
188
+ /// Uses curl to query the GitHub API and extract the tag_name from the
189
+ /// latest release JSON response. Falls back to "7529" if API call fails.
101
190
  fn get_latest_version(repo: &str) -> String {
102
191
  let api_url = format!("https://api.github.com/repos/{}/releases/latest", repo);
103
192
 
@@ -122,6 +211,12 @@ fn get_latest_version(repo: &str) -> String {
122
211
  "7529".to_string()
123
212
  }
124
213
 
214
+ /// Get the download URL and library name for the target platform
215
+ ///
216
+ /// Determines platform/architecture from target triple and constructs
217
+ /// the appropriate GitHub release download URL. Supports:
218
+ /// - WASM: paulocoutinhox/pdfium-lib
219
+ /// - Other platforms: bblanchon/pdfium-binaries
125
220
  fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
126
221
  if target.contains("wasm") {
127
222
  let version = env::var("PDFIUM_WASM_VERSION")
@@ -180,6 +275,15 @@ fn get_pdfium_url_and_lib(target: &str) -> (String, String) {
180
275
  (url, "pdfium".to_string())
181
276
  }
182
277
 
278
+ /// Download and extract PDFium archive with retry logic
279
+ ///
280
+ /// Features:
281
+ /// - Exponential backoff retry (configurable via env vars)
282
+ /// - File type validation (gzip check)
283
+ /// - Windows-specific import library handling (pdfium.dll.lib -> pdfium.lib)
284
+ /// - Environment variables:
285
+ /// - KREUZBERG_PDFIUM_DOWNLOAD_RETRIES: number of retries (default: 5)
286
+ /// - KREUZBERG_PDFIUM_DOWNLOAD_BACKOFF_SECS: initial backoff in seconds (default: 2)
183
287
  fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
184
288
  fs::create_dir_all(dest_dir).expect("Failed to create pdfium directory");
185
289
 
@@ -291,15 +395,404 @@ fn download_and_extract_pdfium(url: &str, dest_dir: &Path) {
291
395
  tracing::debug!("Pdfium downloaded and extracted successfully");
292
396
  }
293
397
 
294
- fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
398
+ // ============================================================================
399
+ // PREBUILT HANDLING
400
+ // ============================================================================
401
+
402
+ /// Prepare prebuilt PDFium by copying to destination directory
403
+ ///
404
+ /// Removes existing destination if present, then recursively copies
405
+ /// all files from prebuilt source to destination.
406
+ fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
407
+ if dest_dir.exists() {
408
+ fs::remove_dir_all(dest_dir)?;
409
+ }
410
+ copy_dir_all(prebuilt_src, dest_dir)
411
+ }
412
+
413
+ /// Recursively copy directory tree
414
+ ///
415
+ /// Used by `prepare_prebuilt_pdfium()` to copy entire pdfium directory
416
+ /// structure, preserving all files and subdirectories.
417
+ fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
418
+ fs::create_dir_all(dst)?;
419
+ for entry in fs::read_dir(src)? {
420
+ let entry = entry?;
421
+ let file_type = entry.file_type()?;
422
+ let target_path = dst.join(entry.file_name());
423
+ if file_type.is_dir() {
424
+ copy_dir_all(&entry.path(), &target_path)?;
425
+ } else {
426
+ fs::copy(entry.path(), &target_path)?;
427
+ }
428
+ }
429
+ Ok(())
430
+ }
431
+
432
+ // ============================================================================
433
+ // PLATFORM UTILITIES
434
+ // ============================================================================
435
+
436
+ /// Get platform-specific runtime library name and subdirectory
437
+ ///
438
+ /// Returns tuple of (library_name, subdirectory) for the target platform:
439
+ /// - WASM: ("libpdfium.a", "lib")
440
+ /// - Windows: ("pdfium.dll", "bin")
441
+ /// - macOS: ("libpdfium.dylib", "lib")
442
+ /// - Linux: ("libpdfium.so", "lib")
443
+ fn runtime_library_info(target: &str) -> (String, &'static str) {
444
+ if target.contains("wasm") {
445
+ ("libpdfium.a".to_string(), "lib")
446
+ } else if target.contains("windows") {
447
+ ("pdfium.dll".to_string(), "bin")
448
+ } else if target.contains("darwin") {
449
+ ("libpdfium.dylib".to_string(), "lib")
450
+ } else {
451
+ ("libpdfium.so".to_string(), "lib")
452
+ }
453
+ }
454
+
455
+ /// Find PDFium library in archive with flexible directory detection
456
+ ///
457
+ /// Attempts to locate the library at multiple possible locations:
458
+ /// - {subdir}/{lib_name} (standard location)
459
+ /// - {lib_name} (root of archive)
460
+ /// - bin/{lib_name} (alternative location)
461
+ /// - lib/{lib_name} (explicit lib directory)
462
+ ///
463
+ /// This handles variations in archive structure across different platform builds,
464
+ /// particularly macOS ARM64 where the archive structure may differ.
465
+ ///
466
+ /// Returns the full path to the library if found, or an error with available files.
467
+ fn find_pdfium_library(pdfium_dir: &Path, lib_name: &str, expected_subdir: &str) -> Result<PathBuf, String> {
468
+ // Candidates in priority order
469
+ let candidates = [
470
+ pdfium_dir.join(expected_subdir).join(lib_name), // Standard: lib/libpdfium.dylib
471
+ pdfium_dir.join(lib_name), // Root: libpdfium.dylib
472
+ pdfium_dir.join("bin").join(lib_name), // Alternative: bin/libpdfium.dylib
473
+ pdfium_dir.join("lib").join(lib_name), // Explicit lib: lib/libpdfium.dylib
474
+ ];
475
+
476
+ // Try each candidate
477
+ for candidate in &candidates {
478
+ if candidate.exists() {
479
+ tracing::debug!("Found PDFium library at: {}", candidate.display());
480
+ return Ok(candidate.clone());
481
+ }
482
+ }
483
+
484
+ // Library not found - provide detailed error with directory listing
485
+ let mut error_msg = format!(
486
+ "PDFium library not found at expected location: {}/{}\n\n",
487
+ pdfium_dir.display(),
488
+ expected_subdir
489
+ );
490
+ error_msg.push_str("Attempted locations:\n");
491
+ for candidate in &candidates {
492
+ error_msg.push_str(&format!(" - {}\n", candidate.display()));
493
+ }
494
+
495
+ // List actual contents of pdfium directory for debugging
496
+ error_msg.push_str("\nActual archive contents:\n");
497
+ if let Ok(entries) = fs::read_dir(pdfium_dir) {
498
+ for entry in entries.flatten() {
499
+ let path = entry.path();
500
+ let file_type = if path.is_dir() { "dir" } else { "file" };
501
+ error_msg.push_str(&format!(" {} ({})\n", path.display(), file_type));
502
+
503
+ // Show contents of subdirectories
504
+ if path.is_dir()
505
+ && let Ok(sub_entries) = fs::read_dir(&path)
506
+ {
507
+ for sub_entry in sub_entries.flatten() {
508
+ let sub_path = sub_entry.path();
509
+ let sub_type = if sub_path.is_dir() { "dir" } else { "file" };
510
+ error_msg.push_str(&format!(" {} ({})\n", sub_path.display(), sub_type));
511
+ }
512
+ }
513
+ }
514
+ }
515
+
516
+ Err(error_msg)
517
+ }
518
+
519
+ /// Fix macOS install name (rpath) for dynamic library
520
+ ///
521
+ /// Uses install_name_tool to set the install name to @rpath/{lib_name}
522
+ /// to enable relative path loading on macOS.
523
+ fn fix_macos_install_name(lib_path: &Path, lib_name: &str) {
524
+ let new_install_name = format!("@rpath/{}", lib_name);
525
+
526
+ tracing::debug!("Fixing install_name for {} to {}", lib_path.display(), new_install_name);
527
+
528
+ let status = Command::new("install_name_tool")
529
+ .arg("-id")
530
+ .arg(&new_install_name)
531
+ .arg(lib_path)
532
+ .status();
533
+
534
+ match status {
535
+ Ok(s) if s.success() => {
536
+ tracing::debug!("Successfully updated install_name");
537
+ }
538
+ Ok(s) => {
539
+ tracing::debug!("install_name_tool failed with status: {}", s);
540
+ }
541
+ Err(e) => {
542
+ tracing::debug!("Failed to run install_name_tool: {}", e);
543
+ }
544
+ }
545
+ }
546
+
547
+ /// Code sign binary on macOS if needed
548
+ ///
549
+ /// Uses codesign to sign the binary. Identity from KREUZBERG_CODESIGN_IDENTITY
550
+ /// env var (default: "-" for adhoc signing). Only runs on apple-darwin targets.
551
+ fn codesign_if_needed(target: &str, binary: &Path) {
552
+ if !target.contains("apple-darwin") || !binary.exists() {
553
+ return;
554
+ }
555
+
556
+ let identity = env::var("KREUZBERG_CODESIGN_IDENTITY").unwrap_or_else(|_| "-".to_string());
557
+ let status = Command::new("codesign")
558
+ .arg("--force")
559
+ .arg("--timestamp=none")
560
+ .arg("--sign")
561
+ .arg(identity)
562
+ .arg(binary)
563
+ .status();
564
+
565
+ match status {
566
+ Ok(result) if result.success() => {
567
+ tracing::debug!("Codesigned {}", binary.display());
568
+ }
569
+ Ok(result) => {
570
+ tracing::debug!(
571
+ "codesign exited with status {} while signing {}",
572
+ result,
573
+ binary.display()
574
+ );
575
+ }
576
+ Err(err) => {
577
+ tracing::debug!("Failed to run codesign for {}: {}", binary.display(), err);
578
+ }
579
+ }
580
+ }
581
+
582
+ // ============================================================================
583
+ // LINKING STRATEGIES
584
+ // ============================================================================
585
+
586
+ /// Link PDFium dynamically (default)
587
+ ///
588
+ /// Sets up linker to use PDFium as a dynamic library (.dylib/.so/.dll)
589
+ /// with platform-specific rpath configuration for runtime library discovery.
590
+ /// Supports flexible archive structures by adding multiple possible lib directories.
591
+ fn link_dynamically(pdfium_dir: &Path, target: &str) {
592
+ let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
593
+
594
+ // Find the actual library location (handles multiple possible archive structures)
595
+ let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
596
+ Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
597
+ Err(err) => panic!("{}", err),
598
+ };
599
+
600
+ println!("cargo:rustc-link-search=native={}", lib_path.display());
601
+ println!("cargo:rustc-link-lib=dylib=pdfium");
602
+
603
+ // Also add standard lib directory for compatibility
604
+ let std_lib_dir = pdfium_dir.join("lib");
605
+ if std_lib_dir.exists() && std_lib_dir != lib_path {
606
+ println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
607
+ }
608
+
609
+ // Add bin directory for platforms where it might be needed
610
+ let bin_dir = pdfium_dir.join("bin");
611
+ if bin_dir.exists() && bin_dir != lib_path {
612
+ println!("cargo:rustc-link-search=native={}", bin_dir.display());
613
+ }
614
+
615
+ // Set rpath for dynamic linking
616
+ if target.contains("darwin") {
617
+ println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
618
+ println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
619
+ } else if target.contains("linux") {
620
+ println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
621
+ println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
622
+ }
623
+ }
624
+
625
+ /// Link PDFium statically (pdf-static feature)
626
+ ///
627
+ /// Embeds PDFium into the binary as a static library. Adds system
628
+ /// dependencies required for static linking on Linux.
629
+ /// Supports flexible archive structures by finding library in multiple locations.
630
+ fn link_statically(pdfium_dir: &Path, target: &str) {
631
+ let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
632
+
633
+ // Find the actual library location (handles multiple possible archive structures)
634
+ let lib_path = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
635
+ Ok(path) => path.parent().unwrap_or(pdfium_dir).to_path_buf(),
636
+ Err(err) => panic!("{}", err),
637
+ };
638
+
639
+ println!("cargo:rustc-link-search=native={}", lib_path.display());
640
+ println!("cargo:rustc-link-lib=static=pdfium");
641
+
642
+ // Also add standard lib directory for compatibility
643
+ let std_lib_dir = pdfium_dir.join("lib");
644
+ if std_lib_dir.exists() && std_lib_dir != lib_path {
645
+ println!("cargo:rustc-link-search=native={}", std_lib_dir.display());
646
+ }
647
+
648
+ // Add bin directory for platforms where it might be needed
649
+ let bin_dir = pdfium_dir.join("bin");
650
+ if bin_dir.exists() && bin_dir != lib_path {
651
+ println!("cargo:rustc-link-search=native={}", bin_dir.display());
652
+ }
653
+
654
+ // Static linking requires additional system dependencies
655
+ if target.contains("linux") {
656
+ // Linux requires additional libraries for static linking
657
+ println!("cargo:rustc-link-lib=dylib=pthread");
658
+ println!("cargo:rustc-link-lib=dylib=dl");
659
+ }
660
+ }
661
+
662
+ /// Link PDFium bundled (pdf-bundled feature)
663
+ ///
664
+ /// Links dynamically but copies library to OUT_DIR for embedding in binary.
665
+ /// Each binary extracts and uses its own copy of the PDFium library.
666
+ /// Supports flexible archive structures by finding library in multiple locations.
667
+ fn link_bundled(pdfium_dir: &Path, target: &str, out_dir: &Path) {
668
+ // Link dynamically for build
669
+ link_dynamically(pdfium_dir, target);
670
+
671
+ // Copy library to OUT_DIR for bundling using flexible detection
295
672
  let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
296
- let src_lib = pdfium_dir.join(runtime_subdir).join(&runtime_lib_name);
673
+ let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
674
+ Ok(path) => path,
675
+ Err(err) => panic!("{}", err),
676
+ };
677
+ let bundled_lib = out_dir.join(&runtime_lib_name);
678
+
679
+ fs::copy(&src_lib, &bundled_lib)
680
+ .unwrap_or_else(|err| panic!("Failed to copy library to OUT_DIR for bundling: {}", err));
681
+
682
+ // Emit environment variable with bundled library path
683
+ let bundled_path = bundled_lib
684
+ .to_str()
685
+ .unwrap_or_else(|| panic!("Non-UTF8 path for bundled library: {}", bundled_lib.display()));
686
+ println!("cargo:rustc-env=KREUZBERG_PDFIUM_BUNDLED_PATH={}", bundled_path);
687
+
688
+ tracing::debug!("Bundled PDFium library at: {}", bundled_path);
689
+ }
690
+
691
+ /// Link system-installed PDFium (pdf-system feature)
692
+ ///
693
+ /// Attempts to find PDFium via pkg-config first, then falls back to
694
+ /// environment variables (KREUZBERG_PDFIUM_SYSTEM_PATH, KREUZBERG_PDFIUM_SYSTEM_INCLUDE).
695
+ fn link_system(_target: &str) {
696
+ // Try pkg-config first
697
+ match pkg_config::Config::new().atleast_version("5.0").probe("pdfium") {
698
+ Ok(library) => {
699
+ tracing::debug!("Found system pdfium via pkg-config");
700
+ for include_path in &library.include_paths {
701
+ println!("cargo:include={}", include_path.display());
702
+ }
703
+ return;
704
+ }
705
+ Err(err) => {
706
+ tracing::debug!("pkg-config probe failed: {}", err);
707
+ }
708
+ }
297
709
 
298
- if !src_lib.exists() {
299
- tracing::debug!("Source library not found: {}", src_lib.display());
710
+ // Fallback to environment variables
711
+ let lib_path = env::var("KREUZBERG_PDFIUM_SYSTEM_PATH").ok();
712
+ let include_path = env::var("KREUZBERG_PDFIUM_SYSTEM_INCLUDE").ok();
713
+
714
+ if let Some(lib_dir) = lib_path {
715
+ let lib_dir_path = PathBuf::from(&lib_dir);
716
+ if !lib_dir_path.exists() {
717
+ panic!(
718
+ "KREUZBERG_PDFIUM_SYSTEM_PATH points to '{}' but the directory does not exist",
719
+ lib_dir
720
+ );
721
+ }
722
+
723
+ println!("cargo:rustc-link-search=native={}", lib_dir);
724
+ println!("cargo:rustc-link-lib=dylib=pdfium");
725
+
726
+ if let Some(inc_dir) = include_path {
727
+ println!("cargo:include={}", inc_dir);
728
+ }
729
+
730
+ tracing::debug!("Using system pdfium from: {}", lib_dir);
300
731
  return;
301
732
  }
302
733
 
734
+ // No system pdfium found
735
+ panic!(
736
+ "pdf-system feature enabled but pdfium not found.\n\
737
+ \n\
738
+ Please install pdfium system-wide or provide:\n\
739
+ - KREUZBERG_PDFIUM_SYSTEM_PATH: path to directory containing libpdfium\n\
740
+ - KREUZBERG_PDFIUM_SYSTEM_INCLUDE: path to pdfium headers (optional)\n\
741
+ \n\
742
+ Alternatively, use a different linking strategy:\n\
743
+ - Default (dynamic): cargo build --features pdf\n\
744
+ - Static linking: cargo build --features pdf,pdf-static\n\
745
+ - Bundled: cargo build --features pdf,pdf-bundled"
746
+ );
747
+ }
748
+
749
+ /// Link system frameworks and standard libraries
750
+ ///
751
+ /// Adds platform-specific system libraries required for PDFium linking:
752
+ /// - macOS: CoreFoundation, CoreGraphics, CoreText, AppKit, libc++
753
+ /// - Linux: stdc++, libm
754
+ /// - Windows: gdi32, user32, advapi32
755
+ fn link_system_frameworks(target: &str) {
756
+ if target.contains("darwin") {
757
+ println!("cargo:rustc-link-lib=framework=CoreFoundation");
758
+ println!("cargo:rustc-link-lib=framework=CoreGraphics");
759
+ println!("cargo:rustc-link-lib=framework=CoreText");
760
+ println!("cargo:rustc-link-lib=framework=AppKit");
761
+ println!("cargo:rustc-link-lib=dylib=c++");
762
+ } else if target.contains("linux") {
763
+ println!("cargo:rustc-link-lib=dylib=stdc++");
764
+ println!("cargo:rustc-link-lib=dylib=m");
765
+ } else if target.contains("windows") {
766
+ println!("cargo:rustc-link-lib=dylib=gdi32");
767
+ println!("cargo:rustc-link-lib=dylib=user32");
768
+ println!("cargo:rustc-link-lib=dylib=advapi32");
769
+ }
770
+ }
771
+
772
+ // ============================================================================
773
+ // LIBRARY DISTRIBUTION
774
+ // ============================================================================
775
+
776
+ /// Copy PDFium library to various package directories
777
+ ///
778
+ /// Distributes the compiled/downloaded PDFium library to:
779
+ /// - CLI target directories (debug/release)
780
+ /// - Python package directory
781
+ /// - Node.js package directory
782
+ /// - Ruby gem directory
783
+ ///
784
+ /// On macOS, also fixes install_name and applies code signing.
785
+ /// Supports flexible archive structures by finding library in multiple locations.
786
+ fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
787
+ let (runtime_lib_name, runtime_subdir) = runtime_library_info(target);
788
+ let src_lib = match find_pdfium_library(pdfium_dir, &runtime_lib_name, runtime_subdir) {
789
+ Ok(path) => path,
790
+ Err(err) => {
791
+ tracing::debug!("Failed to locate PDFium library: {}", err);
792
+ return;
793
+ }
794
+ };
795
+
303
796
  if target.contains("darwin") {
304
797
  fix_macos_install_name(&src_lib, &runtime_lib_name);
305
798
  codesign_if_needed(target, &src_lib);
@@ -368,6 +861,10 @@ fn copy_lib_to_package(pdfium_dir: &Path, target: &str) {
368
861
  }
369
862
  }
370
863
 
864
+ /// Copy library to destination if needed (based on modification time)
865
+ ///
866
+ /// Only copies if destination doesn't exist or source is newer than destination.
867
+ /// Applies platform-specific post-processing (code signing on macOS).
371
868
  fn copy_lib_if_needed(src: &Path, dest: &Path, package_name: &str, target: &str) {
372
869
  use std::fs;
373
870
 
@@ -392,94 +889,3 @@ fn copy_lib_if_needed(src: &Path, dest: &Path, package_name: &str, target: &str)
392
889
  }
393
890
  }
394
891
  }
395
-
396
- fn codesign_if_needed(target: &str, binary: &Path) {
397
- if !target.contains("apple-darwin") || !binary.exists() {
398
- return;
399
- }
400
-
401
- let identity = env::var("KREUZBERG_CODESIGN_IDENTITY").unwrap_or_else(|_| "-".to_string());
402
- let status = Command::new("codesign")
403
- .arg("--force")
404
- .arg("--timestamp=none")
405
- .arg("--sign")
406
- .arg(identity)
407
- .arg(binary)
408
- .status();
409
-
410
- match status {
411
- Ok(result) if result.success() => {
412
- tracing::debug!("Codesigned {}", binary.display());
413
- }
414
- Ok(result) => {
415
- tracing::debug!(
416
- "codesign exited with status {} while signing {}",
417
- result,
418
- binary.display()
419
- );
420
- }
421
- Err(err) => {
422
- tracing::debug!("Failed to run codesign for {}: {}", binary.display(), err);
423
- }
424
- }
425
- }
426
-
427
- fn runtime_library_info(target: &str) -> (String, &'static str) {
428
- if target.contains("wasm") {
429
- ("libpdfium.a".to_string(), "lib")
430
- } else if target.contains("windows") {
431
- ("pdfium.dll".to_string(), "bin")
432
- } else if target.contains("darwin") {
433
- ("libpdfium.dylib".to_string(), "lib")
434
- } else {
435
- ("libpdfium.so".to_string(), "lib")
436
- }
437
- }
438
-
439
- fn prepare_prebuilt_pdfium(prebuilt_src: &Path, dest_dir: &Path) -> io::Result<()> {
440
- if dest_dir.exists() {
441
- fs::remove_dir_all(dest_dir)?;
442
- }
443
- copy_dir_all(prebuilt_src, dest_dir)
444
- }
445
-
446
- fn copy_dir_all(src: &Path, dst: &Path) -> io::Result<()> {
447
- fs::create_dir_all(dst)?;
448
- for entry in fs::read_dir(src)? {
449
- let entry = entry?;
450
- let file_type = entry.file_type()?;
451
- let target_path = dst.join(entry.file_name());
452
- if file_type.is_dir() {
453
- copy_dir_all(&entry.path(), &target_path)?;
454
- } else {
455
- fs::copy(entry.path(), &target_path)?;
456
- }
457
- }
458
- Ok(())
459
- }
460
-
461
- fn fix_macos_install_name(lib_path: &Path, lib_name: &str) {
462
- use std::process::Command;
463
-
464
- let new_install_name = format!("@rpath/{}", lib_name);
465
-
466
- tracing::debug!("Fixing install_name for {} to {}", lib_path.display(), new_install_name);
467
-
468
- let status = Command::new("install_name_tool")
469
- .arg("-id")
470
- .arg(&new_install_name)
471
- .arg(lib_path)
472
- .status();
473
-
474
- match status {
475
- Ok(s) if s.success() => {
476
- tracing::debug!("Successfully updated install_name");
477
- }
478
- Ok(s) => {
479
- tracing::debug!("install_name_tool failed with status: {}", s);
480
- }
481
- Err(e) => {
482
- tracing::debug!("Failed to run install_name_tool: {}", e);
483
- }
484
- }
485
- }