kreuzberg 4.0.0.pre.rc.20 → 4.0.0.pre.rc.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8f4c578323928e218a33fd0941a5b98e598da2a67567ecf59e1f76ac02299ac1
4
- data.tar.gz: dde6bdee61e7baf36f2028ca3d06a746fa254263ee597fa2e47e28879d4afa06
3
+ metadata.gz: bc17da6af86a2e71001e5e5844c764fbabdc29e1b777b368d179fd505f8440f1
4
+ data.tar.gz: eae32510b4c628b8aa73dd943fc0adf60c316efc2e20d2910e84a0f60bf9da24
5
5
  SHA512:
6
- metadata.gz: d1e0a132d36eb9f6ce3abe27ac258a37f08b3028dafe6cebff55145a2218ea324682c3273134fd4626f1f5bd9f326cdf6e18270d9db759a1caafd2447052c1b4
7
- data.tar.gz: a62f75e8d66d289532a5943c472c30642f3215e3d46555618632c4bfaf8826ea077b2ecb585e46b180706df12808b5206ff8ce697ff4962e76ea674c7eeaf952
6
+ metadata.gz: 869652660b57b61427d58ef01d832efc6854aa20c232db1c5fe0d1ce8601a70368c675d4b904be18d86f5b8d8b4730dbbfb050744457b03cf69063060c7521e8
7
+ data.tar.gz: 0c534272a1ddf5427221fde1917ab682b86adc7c1773d49865612240134b7576959f2bb2e4101fd00a5193016e6e141e0d01d1d29cfd82e4e1f25d057185be8a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.0.pre.rc.20)
4
+ kreuzberg (4.0.0.pre.rc.21)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -66,7 +66,7 @@ GEM
66
66
  rb-fsevent (0.11.2)
67
67
  rb-inotify (0.11.1)
68
68
  ffi (~> 1.0)
69
- rb_sys (0.9.123)
69
+ rb_sys (0.9.119)
70
70
  rake-compiler-dock (= 1.10.0)
71
71
  rbs (3.10.0)
72
72
  logger
@@ -147,7 +147,7 @@ DEPENDENCIES
147
147
  pry-byebug (~> 3.10)
148
148
  rake (~> 13.0)
149
149
  rake-compiler (~> 1.2)
150
- rb_sys (~> 0.9.119)
150
+ rb_sys (= 0.9.119)
151
151
  rbs (~> 3.0)
152
152
  rspec (~> 3.12)
153
153
  rubocop (~> 1.66)
@@ -3,7 +3,7 @@
3
3
 
4
4
  [package]
5
5
  name = "kreuzberg-rb"
6
- version = "4.0.0-rc.20"
6
+ version = "4.0.0-rc.21"
7
7
  edition = "2024"
8
8
  rust-version = "1.91"
9
9
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
data/kreuzberg.gemspec CHANGED
@@ -201,7 +201,7 @@ Gem::Specification.new do |spec|
201
201
  spec.add_development_dependency 'bundler', '~> 4.0'
202
202
  spec.add_development_dependency 'rake', '~> 13.0'
203
203
  spec.add_development_dependency 'rake-compiler', '~> 1.2'
204
- spec.add_development_dependency 'rb_sys', '~> 0.9.119'
204
+ spec.add_development_dependency 'rb_sys', '0.9.119'
205
205
  spec.add_development_dependency 'rspec', '~> 3.12'
206
206
  unless Gem.win_platform?
207
207
  spec.add_development_dependency 'rbs', '~> 3.0'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.0.0-rc.20'
4
+ VERSION = '4.0.0-rc.21'
5
5
  end
data/vendor/Cargo.toml CHANGED
@@ -2,7 +2,7 @@
2
2
  members = ["kreuzberg", "kreuzberg-tesseract", "kreuzberg-ffi"]
3
3
 
4
4
  [workspace.package]
5
- version = "4.0.0-rc.20"
5
+ version = "4.0.0-rc.21"
6
6
  edition = "2024"
7
7
  rust-version = "1.91"
8
8
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.0.0-rc.20"
3
+ version = "4.0.0-rc.21"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1254,6 +1254,9 @@ enable_quality_processing = true
1254
1254
  let config1 = ExtractionConfig::from_toml_file(&config_path).unwrap();
1255
1255
  assert!(!config1.use_cache);
1256
1256
 
1257
+ // Sleep to ensure mtime changes (some filesystems have 1-second granularity)
1258
+ std::thread::sleep(std::time::Duration::from_secs(1));
1259
+
1257
1260
  fs::write(
1258
1261
  &config_path,
1259
1262
  r#"
@@ -293,49 +293,32 @@ impl EpubExtractor {
293
293
  }
294
294
 
295
295
  /// Extract metadata from EPUB OPF file
296
- fn extract_metadata(opf_xml: &str) -> Result<BTreeMap<String, serde_json::Value>> {
297
- let mut metadata = BTreeMap::new();
296
+ fn extract_metadata(opf_xml: &str) -> Result<(OepbMetadata, BTreeMap<String, serde_json::Value>)> {
297
+ let mut additional_metadata = BTreeMap::new();
298
298
 
299
299
  let (epub_metadata, _) = Self::parse_opf(opf_xml)?;
300
300
 
301
- if let Some(title) = epub_metadata.title {
302
- metadata.insert("title".to_string(), serde_json::json!(title));
301
+ if let Some(identifier) = epub_metadata.identifier.clone() {
302
+ additional_metadata.insert("identifier".to_string(), serde_json::json!(identifier));
303
303
  }
304
304
 
305
- if let Some(creator) = epub_metadata.creator {
306
- metadata.insert("creator".to_string(), serde_json::json!(creator.clone()));
307
- metadata.insert("authors".to_string(), serde_json::json!(vec![creator]));
305
+ if let Some(publisher) = epub_metadata.publisher.clone() {
306
+ additional_metadata.insert("publisher".to_string(), serde_json::json!(publisher));
308
307
  }
309
308
 
310
- if let Some(date) = epub_metadata.date {
311
- metadata.insert("date".to_string(), serde_json::json!(date));
309
+ if let Some(subject) = epub_metadata.subject.clone() {
310
+ additional_metadata.insert("subject".to_string(), serde_json::json!(subject));
312
311
  }
313
312
 
314
- if let Some(language) = epub_metadata.language {
315
- metadata.insert("language".to_string(), serde_json::json!(language));
313
+ if let Some(description) = epub_metadata.description.clone() {
314
+ additional_metadata.insert("description".to_string(), serde_json::json!(description));
316
315
  }
317
316
 
318
- if let Some(identifier) = epub_metadata.identifier {
319
- metadata.insert("identifier".to_string(), serde_json::json!(identifier));
317
+ if let Some(rights) = epub_metadata.rights.clone() {
318
+ additional_metadata.insert("rights".to_string(), serde_json::json!(rights));
320
319
  }
321
320
 
322
- if let Some(publisher) = epub_metadata.publisher {
323
- metadata.insert("publisher".to_string(), serde_json::json!(publisher));
324
- }
325
-
326
- if let Some(subject) = epub_metadata.subject {
327
- metadata.insert("subject".to_string(), serde_json::json!(subject));
328
- }
329
-
330
- if let Some(description) = epub_metadata.description {
331
- metadata.insert("description".to_string(), serde_json::json!(description));
332
- }
333
-
334
- if let Some(rights) = epub_metadata.rights {
335
- metadata.insert("rights".to_string(), serde_json::json!(rights));
336
- }
337
-
338
- Ok(metadata)
321
+ Ok((epub_metadata, additional_metadata))
339
322
  }
340
323
 
341
324
  /// Parse container.xml to find the OPF file path
@@ -564,13 +547,18 @@ impl DocumentExtractor for EpubExtractor {
564
547
 
565
548
  let extracted_content = Self::extract_content(&mut archive, &opf_path, &manifest_dir)?;
566
549
 
567
- let metadata_btree = Self::extract_metadata(&opf_xml)?;
568
- let metadata_map: std::collections::HashMap<String, serde_json::Value> = metadata_btree.into_iter().collect();
550
+ let (epub_metadata, additional_metadata) = Self::extract_metadata(&opf_xml)?;
551
+ let metadata_map: std::collections::HashMap<String, serde_json::Value> =
552
+ additional_metadata.into_iter().collect();
569
553
 
570
554
  Ok(ExtractionResult {
571
555
  content: extracted_content,
572
556
  mime_type: mime_type.to_string(),
573
557
  metadata: Metadata {
558
+ title: epub_metadata.title,
559
+ authors: epub_metadata.creator.map(|c| vec![c]),
560
+ language: epub_metadata.language,
561
+ created_at: epub_metadata.date,
574
562
  additional: metadata_map,
575
563
  ..Default::default()
576
564
  },
@@ -408,10 +408,10 @@ impl DocumentExtractor for PdfExtractor {
408
408
  }
409
409
  },
410
410
  )?;
411
- let pdfium = Pdfium {};
411
+ let pdfium = Pdfium;
412
412
 
413
413
  let document = pdfium.load_pdf_from_byte_slice(content, None).map_err(|e| {
414
- let err_msg = e.to_string();
414
+ let err_msg = crate::pdf::error::format_pdfium_error(e);
415
415
  if err_msg.contains("password") || err_msg.contains("Password") {
416
416
  PdfError::PasswordRequired
417
417
  } else {
@@ -433,10 +433,10 @@ impl DocumentExtractor for PdfExtractor {
433
433
 
434
434
  crate::pdf::bindings::bind_pdfium(PdfError::MetadataExtractionFailed, "initialize Pdfium")?;
435
435
 
436
- let pdfium = Pdfium {};
436
+ let pdfium = Pdfium;
437
437
 
438
438
  let document = pdfium.load_pdf_from_byte_slice(&content_owned, None).map_err(|e| {
439
- let err_msg = e.to_string();
439
+ let err_msg = crate::pdf::error::format_pdfium_error(e);
440
440
  if err_msg.contains("password") || err_msg.contains("Password") {
441
441
  PdfError::PasswordRequired
442
442
  } else {
@@ -465,10 +465,10 @@ impl DocumentExtractor for PdfExtractor {
465
465
  } else {
466
466
  crate::pdf::bindings::bind_pdfium(PdfError::MetadataExtractionFailed, "initialize Pdfium")?;
467
467
 
468
- let pdfium = Pdfium {};
468
+ let pdfium = Pdfium;
469
469
 
470
470
  let document = pdfium.load_pdf_from_byte_slice(content, None).map_err(|e| {
471
- let err_msg = e.to_string();
471
+ let err_msg = crate::pdf::error::format_pdfium_error(e);
472
472
  if err_msg.contains("password") || err_msg.contains("Password") {
473
473
  PdfError::PasswordRequired
474
474
  } else {
@@ -484,10 +484,10 @@ impl DocumentExtractor for PdfExtractor {
484
484
  {
485
485
  crate::pdf::bindings::bind_pdfium(PdfError::MetadataExtractionFailed, "initialize Pdfium")?;
486
486
 
487
- let pdfium = Pdfium {};
487
+ let pdfium = Pdfium;
488
488
 
489
489
  let document = pdfium.load_pdf_from_byte_slice(content, None).map_err(|e| {
490
- let err_msg = e.to_string();
490
+ let err_msg = crate::pdf::error::format_pdfium_error(e);
491
491
  if err_msg.contains("password") || err_msg.contains("Password") {
492
492
  PdfError::PasswordRequired
493
493
  } else {
@@ -82,7 +82,7 @@ fn bind_pdfium_impl() -> Result<(Option<PathBuf>, Box<dyn PdfiumLibraryBindings>
82
82
  /// Instead of failing permanently, we recover by extracting the inner value from the
83
83
  /// poisoned lock and proceeding. This ensures PDF extraction can continue even if an
84
84
  /// earlier panic occurred, as long as the state is consistent.
85
- pub(crate) fn bind_pdfium(map_err: fn(String) -> PdfError, context: &'static str) -> Result<(), PdfError> {
85
+ pub(crate) fn bind_pdfium(map_err: fn(String) -> PdfError, context: &'static str) -> Result<Pdfium, PdfError> {
86
86
  let mut state = PDFIUM_STATE.lock().unwrap_or_else(|poisoned| {
87
87
  // SAFETY: Recovering from a poisoned lock is safe here because:
88
88
  // 1. The poisoned state still contains valid data (just a guard from a panicked thread)
@@ -95,27 +95,25 @@ pub(crate) fn bind_pdfium(map_err: fn(String) -> PdfError, context: &'static str
95
95
  match &*state {
96
96
  InitializationState::Uninitialized => match bind_pdfium_impl() {
97
97
  Ok((lib_dir, bindings)) => {
98
- // Initialize Pdfium singleton with the bindings
99
- let _ = Pdfium::new(bindings);
98
+ // Initialize Pdfium singleton with the bindings and return it
99
+ let pdfium = Pdfium::new(bindings);
100
100
  *state = InitializationState::Initialized { lib_dir };
101
+ Ok(pdfium)
101
102
  }
102
103
  Err(err) => {
103
104
  *state = InitializationState::Failed(err.clone());
104
- return Err(map_err(format!("Pdfium initialization failed ({}): {}", context, err)));
105
+ Err(map_err(format!("Pdfium initialization failed ({}): {}", context, err)))
105
106
  }
106
107
  },
107
- InitializationState::Failed(err) => {
108
- return Err(map_err(format!(
109
- "Pdfium initialization previously failed ({}): {}",
110
- context, err
111
- )));
112
- }
108
+ InitializationState::Failed(err) => Err(map_err(format!(
109
+ "Pdfium initialization previously failed ({}): {}",
110
+ context, err
111
+ ))),
113
112
  InitializationState::Initialized { .. } => {
114
- // Already initialized, nothing to do
113
+ // Already initialized, return a new accessor to the singleton
114
+ Ok(Pdfium)
115
115
  }
116
116
  }
117
-
118
- Ok(())
119
117
  }
120
118
 
121
119
  #[cfg(test)]
@@ -128,6 +126,9 @@ mod tests {
128
126
  // First call should initialize
129
127
  let result = bind_pdfium(PdfError::TextExtractionFailed, "test context");
130
128
  assert!(result.is_ok(), "First bind_pdfium call should succeed");
129
+ // Verify the returned Pdfium instance is usable
130
+ let pdfium = result.unwrap();
131
+ assert!(pdfium.is_pdfium_ready(), "Pdfium should be initialized");
131
132
  }
132
133
 
133
134
  #[test]
@@ -52,6 +52,34 @@ impl From<lopdf::Error> for PdfError {
52
52
 
53
53
  pub type Result<T> = std::result::Result<T, PdfError>;
54
54
 
55
+ /// Format a pdfium error for display.
56
+ ///
57
+ /// The kreuzberg-pdfium-render fork's error type doesn't implement Display,
58
+ /// so Debug formatting produces messages like "PdfiumLibraryInternalError(FormatError,)"
59
+ /// with trailing commas and parentheses. This function cleans up the formatting.
60
+ pub(crate) fn format_pdfium_error<E: std::fmt::Debug>(error: E) -> String {
61
+ let debug_msg = format!("{:?}", error);
62
+
63
+ // Extract the variant name and clean up Debug formatting
64
+ // "PdfiumLibraryInternalError(FormatError,)" -> "PdfiumLibraryInternalError: FormatError"
65
+ // "SomeError" -> "SomeError"
66
+ if let Some(paren_idx) = debug_msg.find('(') {
67
+ let variant = &debug_msg[..paren_idx];
68
+ let inner = &debug_msg[paren_idx + 1..];
69
+
70
+ // Remove trailing ",)" or ")"
71
+ let inner_clean = inner.trim_end_matches(')').trim_end_matches(',');
72
+
73
+ if inner_clean.is_empty() {
74
+ variant.to_string()
75
+ } else {
76
+ format!("{}: {}", variant, inner_clean)
77
+ }
78
+ } else {
79
+ debug_msg
80
+ }
81
+ }
82
+
55
83
  #[cfg(test)]
56
84
  mod tests {
57
85
  use super::*;
@@ -135,4 +163,65 @@ mod tests {
135
163
  let err = PdfError::FontLoadingFailed("missing font file".to_string());
136
164
  assert_eq!(err.to_string(), "Font loading failed: missing font file");
137
165
  }
166
+
167
+ #[test]
168
+ fn test_format_pdfium_error_with_inner_value() {
169
+ // Simulate pdfium error: "PdfiumLibraryInternalError(FormatError,)"
170
+ #[derive(Debug)]
171
+ #[allow(dead_code)]
172
+ struct MockError(String);
173
+
174
+ let error = MockError("FormatError,".to_string());
175
+ let formatted = format_pdfium_error(error);
176
+ // Should clean up the trailing comma
177
+ assert!(formatted.contains("MockError"));
178
+ assert!(formatted.contains("FormatError"));
179
+ }
180
+
181
+ #[test]
182
+ fn test_format_pdfium_error_simple() {
183
+ // Simulate simple error without parentheses
184
+ #[derive(Debug)]
185
+ struct SimpleError;
186
+
187
+ let formatted = format_pdfium_error(SimpleError);
188
+ assert_eq!(formatted, "SimpleError");
189
+ }
190
+
191
+ #[test]
192
+ fn test_format_pdfium_error_empty_inner() {
193
+ // Simulate error with empty inner: "SomeError()"
194
+ #[derive(Debug)]
195
+ struct EmptyInner;
196
+
197
+ let formatted = format_pdfium_error(EmptyInner);
198
+ // Will be "EmptyInner" since the formatting doesn't add parentheses
199
+ assert_eq!(formatted, "EmptyInner");
200
+ }
201
+
202
+ #[test]
203
+ fn test_format_pdfium_error_cleans_trailing_comma() {
204
+ // This test simulates the actual pdfium error format
205
+ // "PdfiumLibraryInternalError(FormatError,)" should become
206
+ // "PdfiumLibraryInternalError: FormatError"
207
+ #[derive(Debug)]
208
+ #[allow(dead_code)]
209
+ enum PdfiumError {
210
+ PdfiumLibraryInternalError(InternalError),
211
+ }
212
+
213
+ #[derive(Debug)]
214
+ #[allow(dead_code)]
215
+ enum InternalError {
216
+ FormatError,
217
+ }
218
+
219
+ let error = PdfiumError::PdfiumLibraryInternalError(InternalError::FormatError);
220
+ let formatted = format_pdfium_error(error);
221
+
222
+ // Should not contain trailing comma or redundant parentheses
223
+ assert!(!formatted.contains(",)"));
224
+ assert!(formatted.contains("PdfiumLibraryInternalError"));
225
+ assert!(formatted.contains("FormatError"));
226
+ }
138
227
  }
@@ -86,12 +86,10 @@ pub fn extract_metadata(pdf_bytes: &[u8]) -> Result<PdfMetadata> {
86
86
  ///
87
87
  /// Returns only PDF-specific metadata (version, producer, encryption status, dimensions).
88
88
  pub fn extract_metadata_with_password(pdf_bytes: &[u8], password: Option<&str>) -> Result<PdfMetadata> {
89
- bind_pdfium(PdfError::MetadataExtractionFailed, "metadata extraction")?;
90
-
91
- let pdfium = Pdfium {};
89
+ let pdfium = bind_pdfium(PdfError::MetadataExtractionFailed, "metadata extraction")?;
92
90
 
93
91
  let document = pdfium.load_pdf_from_byte_slice(pdf_bytes, password).map_err(|e| {
94
- let err_msg = e.to_string();
92
+ let err_msg = super::error::format_pdfium_error(e);
95
93
  if (err_msg.contains("password") || err_msg.contains("Password")) && password.is_some() {
96
94
  PdfError::InvalidPassword
97
95
  } else if err_msg.contains("password") || err_msg.contains("Password") {
@@ -33,9 +33,7 @@ pub struct PdfRenderer {
33
33
 
34
34
  impl PdfRenderer {
35
35
  pub fn new() -> Result<Self> {
36
- bind_pdfium(PdfError::RenderingFailed, "page rendering")?;
37
-
38
- let pdfium = Pdfium {};
36
+ let pdfium = bind_pdfium(PdfError::RenderingFailed, "page rendering")?;
39
37
  Ok(Self { pdfium })
40
38
  }
41
39
 
@@ -56,7 +54,7 @@ impl PdfRenderer {
56
54
  password: Option<&str>,
57
55
  ) -> Result<DynamicImage> {
58
56
  let document = self.pdfium.load_pdf_from_byte_slice(pdf_bytes, password).map_err(|e| {
59
- let err_msg = e.to_string();
57
+ let err_msg = super::error::format_pdfium_error(e);
60
58
  if (err_msg.contains("password") || err_msg.contains("Password")) && password.is_some() {
61
59
  PdfError::InvalidPassword
62
60
  } else if err_msg.contains("password") || err_msg.contains("Password") {
@@ -114,7 +112,7 @@ impl PdfRenderer {
114
112
  password: Option<&str>,
115
113
  ) -> Result<Vec<DynamicImage>> {
116
114
  let document = self.pdfium.load_pdf_from_byte_slice(pdf_bytes, password).map_err(|e| {
117
- let err_msg = e.to_string();
115
+ let err_msg = super::error::format_pdfium_error(e);
118
116
  if (err_msg.contains("password") || err_msg.contains("Password")) && password.is_some() {
119
117
  PdfError::InvalidPassword
120
118
  } else if err_msg.contains("password") || err_msg.contains("Password") {
@@ -241,7 +239,12 @@ mod tests {
241
239
 
242
240
  #[test]
243
241
  fn test_renderer_size() {
244
- assert!(size_of::<PdfRenderer>() > 0);
242
+ // PdfRenderer may be a zero-sized type (ZST) since Pdfium is a ZST.
243
+ // The important thing is that the size is consistent and the type is valid.
244
+ // We just verify the type can be instantiated rather than checking specific size.
245
+ use std::mem::size_of;
246
+ let _size = size_of::<PdfRenderer>();
247
+ // If this compiles and runs, the type is valid regardless of size
245
248
  }
246
249
 
247
250
  #[test]
@@ -19,9 +19,7 @@ pub struct PdfTextExtractor {
19
19
 
20
20
  impl PdfTextExtractor {
21
21
  pub fn new() -> Result<Self> {
22
- bind_pdfium(PdfError::TextExtractionFailed, "text extraction")?;
23
-
24
- let pdfium = Pdfium {};
22
+ let pdfium = bind_pdfium(PdfError::TextExtractionFailed, "text extraction")?;
25
23
  Ok(Self { pdfium })
26
24
  }
27
25
 
@@ -31,7 +29,7 @@ impl PdfTextExtractor {
31
29
 
32
30
  pub fn extract_text_with_password(&self, pdf_bytes: &[u8], password: Option<&str>) -> Result<String> {
33
31
  let document = self.pdfium.load_pdf_from_byte_slice(pdf_bytes, password).map_err(|e| {
34
- let err_msg = e.to_string();
32
+ let err_msg = super::error::format_pdfium_error(e);
35
33
  if (err_msg.contains("password") || err_msg.contains("Password")) && password.is_some() {
36
34
  PdfError::InvalidPassword
37
35
  } else if err_msg.contains("password") || err_msg.contains("Password") {
@@ -67,7 +65,7 @@ impl PdfTextExtractor {
67
65
 
68
66
  pub fn get_page_count(&self, pdf_bytes: &[u8]) -> Result<usize> {
69
67
  let document = self.pdfium.load_pdf_from_byte_slice(pdf_bytes, None).map_err(|e| {
70
- let err_msg = e.to_string();
68
+ let err_msg = super::error::format_pdfium_error(e);
71
69
  if err_msg.contains("password") || err_msg.contains("Password") {
72
70
  PdfError::PasswordRequired
73
71
  } else {
@@ -154,6 +154,17 @@ fn copy_pdfium_from_dir(src_dir: &Path, dest_dir: &Path) -> Result<(), String> {
154
154
 
155
155
  if file_name_str.starts_with("libpdfium") || file_name_str.starts_with("pdfium") {
156
156
  let dest_file = dest_dir.join(file_name);
157
+
158
+ // On Windows, skip copy if destination already exists and is accessible
159
+ // This avoids "Access denied" errors when the DLL is in use
160
+ if dest_file.exists() {
161
+ eprintln!(
162
+ "PDFium library already exists at {}, skipping copy",
163
+ dest_file.display()
164
+ );
165
+ return Ok(());
166
+ }
167
+
157
168
  match fs::copy(&path, &dest_file) {
158
169
  Ok(bytes_copied) => {
159
170
  eprintln!(
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg-tesseract"
3
- version = "4.0.0-rc.20"
3
+ version = "4.0.0-rc.21"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0.pre.rc.20
4
+ version: 4.0.0.pre.rc.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-25 00:00:00.000000000 Z
11
+ date: 2025-12-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -56,14 +56,14 @@ dependencies:
56
56
  name: rb_sys
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - '='
60
60
  - !ruby/object:Gem::Version
61
61
  version: 0.9.119
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - '='
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.9.119
69
69
  - !ruby/object:Gem::Dependency