kreuzberg 4.0.0.pre.rc.16 → 4.0.0.pre.rc.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe3add89c26722e26baf090f7b9a0c32671c449be6a34ea4285a5f6d15548b72
4
- data.tar.gz: 49147ceab3fddc3161ff0df55f7c535134d63da7ce2577aad905c91179e875f3
3
+ metadata.gz: f91977b1472bb6211f3ac2efad274e8cbc77dc5ed9832529eccbebeae1f74b4f
4
+ data.tar.gz: b8a32377a80cfec656e8ddd65576dc220f497fc65b7b45b307db54a0b3b4a274
5
5
  SHA512:
6
- metadata.gz: 5f2e0ab3d3dd4c975a99dcbf4a2e81347673eb74687034f8ef72cc3ece6561fbbed70811edc7363c911385c2d7c2eb0be2d8fa990872845458a3d3f5f019422c
7
- data.tar.gz: 530bf825eb92e9a3df838ab14ec68277b17e575833ecdf0af11e32d8749e101e4fe68195841524c5e6b41c31a2330076e9da4507f5edf047d8670ad26c9dd928
6
+ metadata.gz: cb391d9f82848e0b19b0c8df2cce7db455d1b73ba5e5c6dd63a2cc87732d8dd0cd6596ca7f9b305061d9400db95c5890292efbd16af7e55a9434f3f29a337642
7
+ data.tar.gz: 9e41afcc217e00d9feb3f8c4adecb7152743227f2c77f4bcfd9fd5e3d4b64b01171d3bdbb2b1290e10d2efdf13cdb53feb8fee01a95b6ba4d87ea76425b56692
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.0.pre.rc.16)
4
+ kreuzberg (4.0.0.pre.rc.17)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -33,7 +33,7 @@ GEM
33
33
  ffi (1.17.2-arm64-darwin)
34
34
  ffi (1.17.2-x86_64-linux-gnu)
35
35
  fileutils (1.8.0)
36
- i18n (1.14.7)
36
+ i18n (1.14.8)
37
37
  concurrent-ruby (~> 1.0)
38
38
  json (2.18.0)
39
39
  language_server-protocol (3.17.0.5)
@@ -2354,7 +2354,7 @@ dependencies = [
2354
2354
 
2355
2355
  [[package]]
2356
2356
  name = "kreuzberg-rb"
2357
- version = "4.0.0-rc.16"
2357
+ version = "4.0.0-rc.17"
2358
2358
  dependencies = [
2359
2359
  "async-trait",
2360
2360
  "html-to-markdown-rs",
@@ -7,7 +7,7 @@ rb-sys = { path = "../../../vendor/rb-sys" }
7
7
 
8
8
  [package]
9
9
  name = "kreuzberg-rb"
10
- version = "4.0.0-rc.16"
10
+ version = "4.0.0-rc.17"
11
11
  edition = "2024"
12
12
  rust-version = "1.91"
13
13
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,52 +1,75 @@
1
- #[cfg(target_os = "macos")]
2
- fn main() {
3
- if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
4
- let lib_path = std::path::Path::new(&cargo_manifest_dir)
5
- .parent()
6
- .and_then(|p| p.parent())
7
- .and_then(|p| p.parent())
8
- .and_then(|p| p.parent())
9
- .and_then(|p| p.parent())
10
- .map(|p| p.join("target/release"))
11
- .expect("Failed to construct lib path");
12
- println!("cargo:rustc-link-search={}", lib_path.display());
13
- }
14
- println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
15
- println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
16
- println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
17
- }
1
+ use std::env;
2
+ use std::path::PathBuf;
18
3
 
19
- #[cfg(target_os = "linux")]
20
4
  fn main() {
21
- if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
22
- let lib_path = std::path::Path::new(&cargo_manifest_dir)
23
- .parent()
24
- .and_then(|p| p.parent())
25
- .and_then(|p| p.parent())
26
- .and_then(|p| p.parent())
27
- .and_then(|p| p.parent())
28
- .map(|p| p.join("target/release"))
29
- .expect("Failed to construct lib path");
30
- println!("cargo:rustc-link-search={}", lib_path.display());
5
+ let target = env::var("TARGET").unwrap();
6
+ let profile = env::var("PROFILE").unwrap_or_else(|_| "release".to_string());
7
+
8
+ // Try to locate kreuzberg-ffi library built alongside this crate
9
+ let cargo_manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
10
+ let manifest_path = PathBuf::from(&cargo_manifest_dir);
11
+
12
+ // Prefer host target layout, but include target-triple layout for cross builds.
13
+ // IMPORTANT: Only search lib directories, NOT deps directories.
14
+ // The deps/ directories may contain dylibs with hardcoded install_name paths,
15
+ // which causes load errors on macOS when users install the gem.
16
+ if let Some(packages_root) = manifest_path
17
+ .parent()
18
+ .and_then(|p| p.parent())
19
+ .and_then(|p| p.parent())
20
+ .and_then(|p| p.parent())
21
+ .and_then(|p| p.parent())
22
+ {
23
+ let host_lib_dir = packages_root.join("target").join(&profile);
24
+ let target_lib_dir = packages_root.join("target").join(&target).join(&profile);
25
+
26
+ // Try to find the static library and link it directly on Unix-like systems
27
+ // to avoid the linker preferring dylib over static lib.
28
+ if !target.contains("windows") {
29
+ let static_lib_name = if target.contains("windows") {
30
+ "kreuzberg_ffi.lib"
31
+ } else {
32
+ "libkreuzberg_ffi.a"
33
+ };
34
+
35
+ // Check both host and target lib directories for the static library
36
+ for lib_dir in [&host_lib_dir, &target_lib_dir] {
37
+ let static_lib = lib_dir.join(static_lib_name);
38
+ if static_lib.exists() {
39
+ // Found static library, link it directly by passing the full path
40
+ println!("cargo:rustc-link-arg={}", static_lib.display());
41
+ // Don't add the library search path or -l flag
42
+ // Jump to platform-specific configuration
43
+ if target.contains("darwin") {
44
+ println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
45
+ println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
46
+ } else if target.contains("linux") {
47
+ println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
48
+ }
49
+ println!("cargo:rerun-if-changed=build.rs");
50
+ return;
51
+ }
52
+ }
53
+ }
54
+
55
+ // Fallback: Add search paths and use standard linking
56
+ for dir in [host_lib_dir, target_lib_dir] {
57
+ if dir.exists() {
58
+ println!("cargo:rustc-link-search=native={}", dir.display());
59
+ }
60
+ }
31
61
  }
32
- println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
33
- println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
34
- }
35
62
 
36
- #[cfg(target_os = "windows")]
37
- fn main() {
38
- if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
39
- let lib_path = std::path::Path::new(&cargo_manifest_dir)
40
- .parent()
41
- .and_then(|p| p.parent())
42
- .and_then(|p| p.parent())
43
- .and_then(|p| p.parent())
44
- .and_then(|p| p.parent())
45
- .map(|p| p.join("target/release"))
46
- .expect("Failed to construct lib path");
47
- println!("cargo:rustc-link-search={}", lib_path.display());
63
+ // Link the kreuzberg-ffi library
64
+ // When kreuzberg-ffi is built, its symbols become available for linking
65
+ println!("cargo:rustc-link-lib=static=kreuzberg_ffi");
66
+
67
+ if target.contains("darwin") {
68
+ println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
69
+ println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
70
+ } else if target.contains("linux") {
71
+ println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
48
72
  }
49
- }
50
73
 
51
- #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
52
- fn main() {}
74
+ println!("cargo:rerun-if-changed=build.rs");
75
+ }
@@ -78,8 +78,7 @@ pub struct CMetadataField {
78
78
  }
79
79
 
80
80
  // These C ABI functions are provided by the kreuzberg-ffi crate
81
- // We declare them here to ensure proper linking on all platforms
82
- #[link(name = "kreuzberg_ffi", kind = "static")]
81
+ // Linking is handled by build.rs to ensure static linking
83
82
  unsafe extern "C" {
84
83
  pub fn kreuzberg_last_error_code() -> i32;
85
84
  pub fn kreuzberg_last_panic_context() -> *mut c_char;
@@ -3071,7 +3070,7 @@ fn validate_chunking_params(max_chars: usize, max_overlap: usize) -> Result<i32,
3071
3070
  /// Gets valid binarization methods as a JSON string
3072
3071
  ///
3073
3072
  /// @return [String] JSON array of valid binarization methods
3074
- fn get_valid_binarization_methods(ruby: &Ruby) -> Result<String, Error> {
3073
+ fn get_valid_binarization_methods(_ruby: &Ruby) -> Result<String, Error> {
3075
3074
  let ptr = unsafe { kreuzberg_get_valid_binarization_methods() };
3076
3075
  if ptr.is_null() {
3077
3076
  return Err(runtime_error("Failed to get valid binarization methods"));
@@ -3095,7 +3094,7 @@ fn get_valid_binarization_methods(ruby: &Ruby) -> Result<String, Error> {
3095
3094
  /// Gets valid language codes as a JSON string
3096
3095
  ///
3097
3096
  /// @return [String] JSON array of valid language codes
3098
- fn get_valid_language_codes(ruby: &Ruby) -> Result<String, Error> {
3097
+ fn get_valid_language_codes(_ruby: &Ruby) -> Result<String, Error> {
3099
3098
  let ptr = unsafe { kreuzberg_get_valid_language_codes() };
3100
3099
  if ptr.is_null() {
3101
3100
  return Err(runtime_error("Failed to get valid language codes"));
@@ -3119,7 +3118,7 @@ fn get_valid_language_codes(ruby: &Ruby) -> Result<String, Error> {
3119
3118
  /// Gets valid OCR backends as a JSON string
3120
3119
  ///
3121
3120
  /// @return [String] JSON array of valid OCR backends
3122
- fn get_valid_ocr_backends(ruby: &Ruby) -> Result<String, Error> {
3121
+ fn get_valid_ocr_backends(_ruby: &Ruby) -> Result<String, Error> {
3123
3122
  let ptr = unsafe { kreuzberg_get_valid_ocr_backends() };
3124
3123
  if ptr.is_null() {
3125
3124
  return Err(runtime_error("Failed to get valid OCR backends"));
@@ -3143,7 +3142,7 @@ fn get_valid_ocr_backends(ruby: &Ruby) -> Result<String, Error> {
3143
3142
  /// Gets valid token reduction levels as a JSON string
3144
3143
  ///
3145
3144
  /// @return [String] JSON array of valid token reduction levels
3146
- fn get_valid_token_reduction_levels(ruby: &Ruby) -> Result<String, Error> {
3145
+ fn get_valid_token_reduction_levels(_ruby: &Ruby) -> Result<String, Error> {
3147
3146
  let ptr = unsafe { kreuzberg_get_valid_token_reduction_levels() };
3148
3147
  if ptr.is_null() {
3149
3148
  return Err(runtime_error("Failed to get valid token reduction levels"));
@@ -3395,7 +3394,7 @@ fn get_error_details_native(ruby: &Ruby) -> Result<Value, Error> {
3395
3394
  // SAFETY: FFI function is thread-safe and returns a struct with allocated C strings
3396
3395
  let details = unsafe { kreuzberg_get_error_details() };
3397
3396
 
3398
- let hash = RHash::new();
3397
+ let hash = ruby.hash_new();
3399
3398
 
3400
3399
  // Convert C strings to Ruby strings, handling nulls safely
3401
3400
  // SAFETY: All non-null pointers from FFI must be valid C strings
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.0.0-rc.16'
4
+ VERSION = '4.0.0-rc.17'
5
5
  end
data/vendor/Cargo.toml CHANGED
@@ -2,7 +2,7 @@
2
2
  members = ["kreuzberg", "kreuzberg-tesseract"]
3
3
 
4
4
  [workspace.package]
5
- version = "4.0.0-rc.16"
5
+ version = "4.0.0-rc.17"
6
6
  edition = "2024"
7
7
  rust-version = "1.91"
8
8
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.0.0-rc.16"
3
+ version = "4.0.0-rc.17"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -163,6 +163,7 @@ fn convert_html_with_options_large_stack(html: String, options: ConversionOption
163
163
  }
164
164
 
165
165
  #[cfg(not(target_arch = "wasm32"))]
166
+ #[allow(dead_code)]
166
167
  fn convert_inline_images_with_large_stack(
167
168
  html: String,
168
169
  options: ConversionOptions,
@@ -204,20 +205,6 @@ fn extract_panic_reason(panic: &Box<dyn Any + Send + 'static>) -> String {
204
205
  }
205
206
 
206
207
  // WASM implementations skip dedicated stack (not supported) and process inline
207
- #[cfg(target_arch = "wasm32")]
208
- fn convert_html_with_options_large_stack(html: String, options: ConversionOptions) -> Result<String> {
209
- convert_html_with_options(&html, options)
210
- }
211
-
212
- #[cfg(target_arch = "wasm32")]
213
- fn convert_inline_images_with_large_stack(
214
- html: String,
215
- options: ConversionOptions,
216
- image_config: LibInlineImageConfig,
217
- ) -> Result<HtmlExtraction> {
218
- convert_inline_images_with_options(&html, options, image_config)
219
- }
220
-
221
208
  /// Convert HTML to markdown with optional configuration.
222
209
  ///
223
210
  /// Uses sensible defaults if no configuration is provided:
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg-tesseract"
3
- version = "4.0.0-rc.16"
3
+ version = "4.0.0-rc.17"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0.pre.rc.16
4
+ version: 4.0.0.pre.rc.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-21 00:00:00.000000000 Z
11
+ date: 2025-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler