kreuzberg 4.0.0.pre.rc.11 → 4.0.0.pre.rc.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/Rakefile +8 -0
- data/ext/kreuzberg_rb/native/Cargo.lock +41 -42
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
- data/ext/kreuzberg_rb/native/src/lib.rs +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/vendor/Cargo.toml +2 -1
- data/vendor/kreuzberg/Cargo.toml +6 -6
- data/vendor/kreuzberg/src/extraction/archive.rs +25 -25
- data/vendor/kreuzberg/src/extraction/libreoffice.rs +12 -0
- data/vendor/kreuzberg/src/extractors/pdf.rs +28 -1
- data/vendor/kreuzberg/src/pdf/bindings.rs +2 -2
- data/vendor/kreuzberg/src/pdf/bundled.rs +5 -5
- data/vendor/kreuzberg/src/pdf/mod.rs +2 -2
- data/vendor/kreuzberg-ffi/Cargo.toml +3 -3
- data/vendor/kreuzberg-tesseract/Cargo.toml +3 -3
- metadata +7 -11
- data/vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc +0 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1b0f873b7da0609856d3f396a8d43cc30bcd62f7919fa0ec7572f32f990f99f6
|
|
4
|
+
data.tar.gz: ea902df98f54a593113a999dc08c7497d38008d0df3881a677249453b4ca3886
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 197bb0ad826ab4362efcff8dc5fded982360d9cb252d5150c786a762d28d3bf98c7d72b6e9334dd40ae1e65712a54c0261dbd71a037f098cafb599642269dae3
|
|
7
|
+
data.tar.gz: e206c9553e656a00ee7722e7f570475037689e874540b37fe5d9c6353e64ba0de05c82893b5d2ae057d68d3320d3e59c624553b8fff08f2ae3c896506c6c275b
|
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
|
@@ -6,6 +6,13 @@ require 'rspec/core/rake_task'
|
|
|
6
6
|
|
|
7
7
|
GEMSPEC = Gem::Specification.load(File.expand_path('kreuzberg.gemspec', __dir__))
|
|
8
8
|
|
|
9
|
+
# Vendor kreuzberg core crates before compilation
|
|
10
|
+
task :vendor do
|
|
11
|
+
vendor_script = File.expand_path('../../scripts/ci/ruby/vendor-kreuzberg-core.sh', __dir__)
|
|
12
|
+
puts 'Vendoring kreuzberg core crates...'
|
|
13
|
+
sh "bash #{vendor_script}"
|
|
14
|
+
end
|
|
15
|
+
|
|
9
16
|
Rake::ExtensionTask.new('kreuzberg_rb', GEMSPEC) do |ext|
|
|
10
17
|
ext.lib_dir = 'lib'
|
|
11
18
|
ext.ext_dir = 'ext/kreuzberg_rb'
|
|
@@ -21,5 +28,6 @@ end
|
|
|
21
28
|
|
|
22
29
|
RSpec::Core::RakeTask.new(:spec)
|
|
23
30
|
|
|
31
|
+
task compile: :vendor
|
|
24
32
|
task spec: :compile
|
|
25
33
|
task default: :spec
|
|
@@ -677,14 +677,14 @@ dependencies = [
|
|
|
677
677
|
"serde_json",
|
|
678
678
|
"syn",
|
|
679
679
|
"tempfile",
|
|
680
|
-
"toml 0.9.
|
|
680
|
+
"toml 0.9.10+spec-1.1.0",
|
|
681
681
|
]
|
|
682
682
|
|
|
683
683
|
[[package]]
|
|
684
684
|
name = "cc"
|
|
685
|
-
version = "1.2.
|
|
685
|
+
version = "1.2.50"
|
|
686
686
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
687
|
-
checksum = "
|
|
687
|
+
checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c"
|
|
688
688
|
dependencies = [
|
|
689
689
|
"find-msvc-tools",
|
|
690
690
|
"jobserver",
|
|
@@ -809,9 +809,9 @@ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
|
|
|
809
809
|
|
|
810
810
|
[[package]]
|
|
811
811
|
name = "cmake"
|
|
812
|
-
version = "0.1.
|
|
812
|
+
version = "0.1.57"
|
|
813
813
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
814
|
-
checksum = "
|
|
814
|
+
checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
|
|
815
815
|
dependencies = [
|
|
816
816
|
"cc",
|
|
817
817
|
]
|
|
@@ -1038,12 +1038,12 @@ dependencies = [
|
|
|
1038
1038
|
|
|
1039
1039
|
[[package]]
|
|
1040
1040
|
name = "darling"
|
|
1041
|
-
version = "0.
|
|
1041
|
+
version = "0.23.0"
|
|
1042
1042
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1043
|
-
checksum = "
|
|
1043
|
+
checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
|
|
1044
1044
|
dependencies = [
|
|
1045
|
-
"darling_core 0.
|
|
1046
|
-
"darling_macro 0.
|
|
1045
|
+
"darling_core 0.23.0",
|
|
1046
|
+
"darling_macro 0.23.0",
|
|
1047
1047
|
]
|
|
1048
1048
|
|
|
1049
1049
|
[[package]]
|
|
@@ -1062,11 +1062,10 @@ dependencies = [
|
|
|
1062
1062
|
|
|
1063
1063
|
[[package]]
|
|
1064
1064
|
name = "darling_core"
|
|
1065
|
-
version = "0.
|
|
1065
|
+
version = "0.23.0"
|
|
1066
1066
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1067
|
-
checksum = "
|
|
1067
|
+
checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
|
|
1068
1068
|
dependencies = [
|
|
1069
|
-
"fnv",
|
|
1070
1069
|
"ident_case",
|
|
1071
1070
|
"proc-macro2",
|
|
1072
1071
|
"quote",
|
|
@@ -1087,11 +1086,11 @@ dependencies = [
|
|
|
1087
1086
|
|
|
1088
1087
|
[[package]]
|
|
1089
1088
|
name = "darling_macro"
|
|
1090
|
-
version = "0.
|
|
1089
|
+
version = "0.23.0"
|
|
1091
1090
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1092
|
-
checksum = "
|
|
1091
|
+
checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
|
|
1093
1092
|
dependencies = [
|
|
1094
|
-
"darling_core 0.
|
|
1093
|
+
"darling_core 0.23.0",
|
|
1095
1094
|
"quote",
|
|
1096
1095
|
"syn",
|
|
1097
1096
|
]
|
|
@@ -1427,9 +1426,9 @@ dependencies = [
|
|
|
1427
1426
|
|
|
1428
1427
|
[[package]]
|
|
1429
1428
|
name = "fastembed"
|
|
1430
|
-
version = "5.
|
|
1429
|
+
version = "5.5.0"
|
|
1431
1430
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1432
|
-
checksum = "
|
|
1431
|
+
checksum = "de72c516a1484c70ba0d98597dafc6274484b542c9ee54e7a326160baa013849"
|
|
1433
1432
|
dependencies = [
|
|
1434
1433
|
"anyhow",
|
|
1435
1434
|
"hf-hub",
|
|
@@ -1873,9 +1872,9 @@ dependencies = [
|
|
|
1873
1872
|
|
|
1874
1873
|
[[package]]
|
|
1875
1874
|
name = "html-to-markdown-rs"
|
|
1876
|
-
version = "2.
|
|
1875
|
+
version = "2.15.0"
|
|
1877
1876
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1878
|
-
checksum = "
|
|
1877
|
+
checksum = "7741e7928e84f3f3497c84b8dd27e9fcc3368bd133e44ca800715eb34a1d58c8"
|
|
1879
1878
|
dependencies = [
|
|
1880
1879
|
"astral-tl",
|
|
1881
1880
|
"base64 0.22.1",
|
|
@@ -2415,7 +2414,7 @@ dependencies = [
|
|
|
2415
2414
|
|
|
2416
2415
|
[[package]]
|
|
2417
2416
|
name = "kreuzberg"
|
|
2418
|
-
version = "4.0.0-rc.
|
|
2417
|
+
version = "4.0.0-rc.13"
|
|
2419
2418
|
dependencies = [
|
|
2420
2419
|
"ahash",
|
|
2421
2420
|
"async-trait",
|
|
@@ -2450,7 +2449,7 @@ dependencies = [
|
|
|
2450
2449
|
"opentelemetry",
|
|
2451
2450
|
"opentelemetry_sdk",
|
|
2452
2451
|
"org",
|
|
2453
|
-
"
|
|
2452
|
+
"pastey 0.2.1",
|
|
2454
2453
|
"pdfium-render",
|
|
2455
2454
|
"pkg-config",
|
|
2456
2455
|
"polars",
|
|
@@ -2474,7 +2473,7 @@ dependencies = [
|
|
|
2474
2473
|
"thiserror 2.0.17",
|
|
2475
2474
|
"tiff",
|
|
2476
2475
|
"tokio",
|
|
2477
|
-
"toml 0.9.
|
|
2476
|
+
"toml 0.9.10+spec-1.1.0",
|
|
2478
2477
|
"tower",
|
|
2479
2478
|
"tower-http",
|
|
2480
2479
|
"tracing",
|
|
@@ -2489,7 +2488,7 @@ dependencies = [
|
|
|
2489
2488
|
|
|
2490
2489
|
[[package]]
|
|
2491
2490
|
name = "kreuzberg-ffi"
|
|
2492
|
-
version = "4.0.0-rc.
|
|
2491
|
+
version = "4.0.0-rc.13"
|
|
2493
2492
|
dependencies = [
|
|
2494
2493
|
"async-trait",
|
|
2495
2494
|
"cbindgen",
|
|
@@ -2502,7 +2501,7 @@ dependencies = [
|
|
|
2502
2501
|
|
|
2503
2502
|
[[package]]
|
|
2504
2503
|
name = "kreuzberg-rb"
|
|
2505
|
-
version = "4.0.0-rc.
|
|
2504
|
+
version = "4.0.0-rc.14"
|
|
2506
2505
|
dependencies = [
|
|
2507
2506
|
"async-trait",
|
|
2508
2507
|
"html-to-markdown-rs",
|
|
@@ -2517,7 +2516,7 @@ dependencies = [
|
|
|
2517
2516
|
|
|
2518
2517
|
[[package]]
|
|
2519
2518
|
name = "kreuzberg-tesseract"
|
|
2520
|
-
version = "4.0.0-rc.
|
|
2519
|
+
version = "4.0.0-rc.13"
|
|
2521
2520
|
dependencies = [
|
|
2522
2521
|
"cc",
|
|
2523
2522
|
"cmake",
|
|
@@ -4633,9 +4632,9 @@ dependencies = [
|
|
|
4633
4632
|
|
|
4634
4633
|
[[package]]
|
|
4635
4634
|
name = "rmcp"
|
|
4636
|
-
version = "0.
|
|
4635
|
+
version = "0.12.0"
|
|
4637
4636
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4638
|
-
checksum = "
|
|
4637
|
+
checksum = "528d42f8176e6e5e71ea69182b17d1d0a19a6b3b894b564678b74cd7cab13cfa"
|
|
4639
4638
|
dependencies = [
|
|
4640
4639
|
"async-trait",
|
|
4641
4640
|
"axum",
|
|
@@ -4665,11 +4664,11 @@ dependencies = [
|
|
|
4665
4664
|
|
|
4666
4665
|
[[package]]
|
|
4667
4666
|
name = "rmcp-macros"
|
|
4668
|
-
version = "0.
|
|
4667
|
+
version = "0.12.0"
|
|
4669
4668
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4670
|
-
checksum = "
|
|
4669
|
+
checksum = "e3f81daaa494eb8e985c9462f7d6ce1ab05e5299f48aafd76cdd3d8b060e6f59"
|
|
4671
4670
|
dependencies = [
|
|
4672
|
-
"darling 0.
|
|
4671
|
+
"darling 0.23.0",
|
|
4673
4672
|
"proc-macro2",
|
|
4674
4673
|
"quote",
|
|
4675
4674
|
"serde_json",
|
|
@@ -4999,9 +4998,9 @@ dependencies = [
|
|
|
4999
4998
|
|
|
5000
4999
|
[[package]]
|
|
5001
5000
|
name = "serde_spanned"
|
|
5002
|
-
version = "1.0.
|
|
5001
|
+
version = "1.0.4"
|
|
5003
5002
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5004
|
-
checksum = "
|
|
5003
|
+
checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
|
|
5005
5004
|
dependencies = [
|
|
5006
5005
|
"serde_core",
|
|
5007
5006
|
]
|
|
@@ -5678,14 +5677,14 @@ dependencies = [
|
|
|
5678
5677
|
|
|
5679
5678
|
[[package]]
|
|
5680
5679
|
name = "toml"
|
|
5681
|
-
version = "0.9.
|
|
5680
|
+
version = "0.9.10+spec-1.1.0"
|
|
5682
5681
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5683
|
-
checksum = "
|
|
5682
|
+
checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48"
|
|
5684
5683
|
dependencies = [
|
|
5685
5684
|
"indexmap",
|
|
5686
5685
|
"serde_core",
|
|
5687
|
-
"serde_spanned 1.0.
|
|
5688
|
-
"toml_datetime 0.7.
|
|
5686
|
+
"serde_spanned 1.0.4",
|
|
5687
|
+
"toml_datetime 0.7.5+spec-1.1.0",
|
|
5689
5688
|
"toml_parser",
|
|
5690
5689
|
"toml_writer",
|
|
5691
5690
|
"winnow",
|
|
@@ -5702,9 +5701,9 @@ dependencies = [
|
|
|
5702
5701
|
|
|
5703
5702
|
[[package]]
|
|
5704
5703
|
name = "toml_datetime"
|
|
5705
|
-
version = "0.7.
|
|
5704
|
+
version = "0.7.5+spec-1.1.0"
|
|
5706
5705
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5707
|
-
checksum = "
|
|
5706
|
+
checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
|
|
5708
5707
|
dependencies = [
|
|
5709
5708
|
"serde_core",
|
|
5710
5709
|
]
|
|
@@ -5725,9 +5724,9 @@ dependencies = [
|
|
|
5725
5724
|
|
|
5726
5725
|
[[package]]
|
|
5727
5726
|
name = "toml_parser"
|
|
5728
|
-
version = "1.0.
|
|
5727
|
+
version = "1.0.6+spec-1.1.0"
|
|
5729
5728
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5730
|
-
checksum = "
|
|
5729
|
+
checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
|
|
5731
5730
|
dependencies = [
|
|
5732
5731
|
"winnow",
|
|
5733
5732
|
]
|
|
@@ -5740,9 +5739,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
|
|
|
5740
5739
|
|
|
5741
5740
|
[[package]]
|
|
5742
5741
|
name = "toml_writer"
|
|
5743
|
-
version = "1.0.
|
|
5742
|
+
version = "1.0.6+spec-1.1.0"
|
|
5744
5743
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5745
|
-
checksum = "
|
|
5744
|
+
checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
|
|
5746
5745
|
|
|
5747
5746
|
[[package]]
|
|
5748
5747
|
name = "tower"
|
|
@@ -59,7 +59,7 @@ use std::ffi::c_char;
|
|
|
59
59
|
// These C ABI functions are provided by the kreuzberg-ffi crate
|
|
60
60
|
// We declare them here to ensure proper linking on all platforms
|
|
61
61
|
#[link(name = "kreuzberg_ffi", kind = "static")]
|
|
62
|
-
extern "C" {
|
|
62
|
+
unsafe extern "C" {
|
|
63
63
|
pub fn kreuzberg_last_error_code() -> i32;
|
|
64
64
|
pub fn kreuzberg_last_panic_context() -> *mut c_char;
|
|
65
65
|
pub fn kreuzberg_free_string(s: *mut c_char);
|
data/lib/kreuzberg/version.rb
CHANGED
data/vendor/Cargo.toml
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
members = ["kreuzberg", "kreuzberg-ffi", "kreuzberg-tesseract"]
|
|
3
3
|
|
|
4
4
|
[workspace.package]
|
|
5
|
-
version = "4.0.0-rc.
|
|
5
|
+
version = "4.0.0-rc.14"
|
|
6
6
|
edition = "2024"
|
|
7
7
|
rust-version = "1.91"
|
|
8
8
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -33,6 +33,7 @@ tracing = "0.1"
|
|
|
33
33
|
ahash = "0.8.12"
|
|
34
34
|
base64 = "0.22.1"
|
|
35
35
|
hex = "0.4.3"
|
|
36
|
+
toml = "0.9.10"
|
|
36
37
|
num_cpus = "1.17.0"
|
|
37
38
|
once_cell = "1.21.3"
|
|
38
39
|
html-to-markdown-rs = { version = "2.14.11", default-features = false }
|
data/vendor/kreuzberg/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg"
|
|
3
|
-
version = "4.0.0-rc.
|
|
3
|
+
version = "4.0.0-rc.14"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -52,7 +52,7 @@ office = [
|
|
|
52
52
|
email = ["dep:mail-parser", "dep:msg_parser"]
|
|
53
53
|
html = ["dep:html-to-markdown-rs"]
|
|
54
54
|
xml = ["dep:quick-xml", "dep:roxmltree"]
|
|
55
|
-
archives = ["dep:zip", "dep:tar", "dep:sevenz-
|
|
55
|
+
archives = ["dep:zip", "dep:tar", "dep:sevenz-rust2"]
|
|
56
56
|
|
|
57
57
|
# Processing features
|
|
58
58
|
ocr = [
|
|
@@ -145,7 +145,7 @@ regex = "1.12.2"
|
|
|
145
145
|
serde = { version = "1.0.228", features = ["derive"] }
|
|
146
146
|
serde_json = "1.0.145"
|
|
147
147
|
serde_yaml_ng = "0.10.0"
|
|
148
|
-
toml =
|
|
148
|
+
toml = "0.9.10"
|
|
149
149
|
mime_guess = "2.0"
|
|
150
150
|
rmp-serde = "1.3"
|
|
151
151
|
thiserror = "2.0.17"
|
|
@@ -163,13 +163,13 @@ lopdf = { version = "0.38.0", optional = true }
|
|
|
163
163
|
calamine = { version = "0.32.0", features = ["dates"], optional = true }
|
|
164
164
|
polars = { version = "0.52.0", default-features = false, features = ["ipc"], optional = true }
|
|
165
165
|
roxmltree = { version = "0.21.1", optional = true }
|
|
166
|
-
zip = { version = "
|
|
166
|
+
zip = { version = "7.0.0", optional = true }
|
|
167
167
|
mail-parser = { version = "0.11.1", optional = true }
|
|
168
168
|
msg_parser = { version = "0.1.1", optional = true }
|
|
169
169
|
html-to-markdown-rs = { version = "2.14.11", default-features = false, features = ["inline-images"], optional = true }
|
|
170
170
|
quick-xml = { version = "0.38.4", features = ["serialize"], optional = true }
|
|
171
171
|
tar = { version = "0.4.44", optional = true }
|
|
172
|
-
sevenz-
|
|
172
|
+
sevenz-rust2 = { version = "0.20.0", optional = true }
|
|
173
173
|
docx-lite = { version = "0.2.0", optional = true }
|
|
174
174
|
|
|
175
175
|
pulldown-cmark = { version = "0.13", optional = true }
|
|
@@ -228,7 +228,7 @@ infer = "0.19.0"
|
|
|
228
228
|
tempfile = "3.23.0"
|
|
229
229
|
filetime = "0.2"
|
|
230
230
|
tar = "0.4.44"
|
|
231
|
-
zip = "
|
|
231
|
+
zip = "7.0.0"
|
|
232
232
|
serial_test = "3.2.0"
|
|
233
233
|
anyhow = "1.0"
|
|
234
234
|
tokio-test = "0.4"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
//! This module provides functions for extracting file lists and contents from archives.
|
|
4
4
|
|
|
5
5
|
use crate::error::{KreuzbergError, Result};
|
|
6
|
-
use
|
|
6
|
+
use sevenz_rust2::{ArchiveReader, Password};
|
|
7
7
|
use std::collections::HashMap;
|
|
8
8
|
use std::io::{Cursor, Read};
|
|
9
9
|
use tar::Archive as TarArchive;
|
|
@@ -179,7 +179,7 @@ pub fn extract_tar_text_content(bytes: &[u8]) -> Result<HashMap<String, String>>
|
|
|
179
179
|
/// Extract metadata from a 7z archive.
|
|
180
180
|
pub fn extract_7z_metadata(bytes: &[u8]) -> Result<ArchiveMetadata> {
|
|
181
181
|
let cursor = Cursor::new(bytes);
|
|
182
|
-
let archive =
|
|
182
|
+
let archive = ArchiveReader::new(cursor, Password::empty())
|
|
183
183
|
.map_err(|e| KreuzbergError::parsing(format!("Failed to read 7z archive: {}", e)))?;
|
|
184
184
|
|
|
185
185
|
let mut file_list = Vec::new();
|
|
@@ -212,7 +212,7 @@ pub fn extract_7z_metadata(bytes: &[u8]) -> Result<ArchiveMetadata> {
|
|
|
212
212
|
/// Only extracts files with common text extensions: .txt, .md, .json, .xml, .html, .csv, .log
|
|
213
213
|
pub fn extract_7z_text_content(bytes: &[u8]) -> Result<HashMap<String, String>> {
|
|
214
214
|
let cursor = Cursor::new(bytes);
|
|
215
|
-
let mut archive =
|
|
215
|
+
let mut archive = ArchiveReader::new(cursor, Password::empty())
|
|
216
216
|
.map_err(|e| KreuzbergError::parsing(format!("Failed to read 7z archive: {}", e)))?;
|
|
217
217
|
|
|
218
218
|
let mut contents = HashMap::new();
|
|
@@ -459,26 +459,26 @@ mod tests {
|
|
|
459
459
|
|
|
460
460
|
#[test]
|
|
461
461
|
fn test_extract_7z_metadata_with_files() {
|
|
462
|
-
use
|
|
462
|
+
use sevenz_rust2::{ArchiveEntry, ArchiveWriter};
|
|
463
463
|
|
|
464
|
-
let
|
|
465
|
-
|
|
466
|
-
let mut sz =
|
|
464
|
+
let cursor = {
|
|
465
|
+
let cursor = Cursor::new(Vec::new());
|
|
466
|
+
let mut sz = ArchiveWriter::new(cursor).unwrap();
|
|
467
467
|
|
|
468
468
|
sz.push_archive_entry(
|
|
469
|
-
|
|
469
|
+
ArchiveEntry::new_file("test.txt"),
|
|
470
470
|
Some(Cursor::new(b"Hello 7z!".to_vec())),
|
|
471
471
|
)
|
|
472
472
|
.unwrap();
|
|
473
473
|
|
|
474
474
|
sz.push_archive_entry(
|
|
475
|
-
|
|
475
|
+
ArchiveEntry::new_file("data.json"),
|
|
476
476
|
Some(Cursor::new(b"{\"key\":\"value\"}".to_vec())),
|
|
477
477
|
)
|
|
478
478
|
.unwrap();
|
|
479
479
|
|
|
480
|
-
sz.finish().unwrap()
|
|
481
|
-
}
|
|
480
|
+
sz.finish().unwrap()
|
|
481
|
+
};
|
|
482
482
|
|
|
483
483
|
let bytes = cursor.into_inner();
|
|
484
484
|
let metadata = extract_7z_metadata(&bytes).unwrap();
|
|
@@ -834,26 +834,26 @@ mod tests {
|
|
|
834
834
|
|
|
835
835
|
#[test]
|
|
836
836
|
fn test_extract_7z_text_content() {
|
|
837
|
-
use
|
|
837
|
+
use sevenz_rust2::{ArchiveEntry, ArchiveWriter};
|
|
838
838
|
|
|
839
|
-
let
|
|
840
|
-
|
|
841
|
-
let mut sz =
|
|
839
|
+
let cursor = {
|
|
840
|
+
let cursor = Cursor::new(Vec::new());
|
|
841
|
+
let mut sz = ArchiveWriter::new(cursor).unwrap();
|
|
842
842
|
|
|
843
843
|
sz.push_archive_entry(
|
|
844
|
-
|
|
844
|
+
ArchiveEntry::new_file("test.txt"),
|
|
845
845
|
Some(Cursor::new(b"Hello 7z text!".to_vec())),
|
|
846
846
|
)
|
|
847
847
|
.unwrap();
|
|
848
848
|
|
|
849
849
|
sz.push_archive_entry(
|
|
850
|
-
|
|
850
|
+
ArchiveEntry::new_file("readme.md"),
|
|
851
851
|
Some(Cursor::new(b"# 7z README".to_vec())),
|
|
852
852
|
)
|
|
853
853
|
.unwrap();
|
|
854
854
|
|
|
855
|
-
sz.finish().unwrap()
|
|
856
|
-
}
|
|
855
|
+
sz.finish().unwrap()
|
|
856
|
+
};
|
|
857
857
|
|
|
858
858
|
let bytes = cursor.into_inner();
|
|
859
859
|
let contents = extract_7z_text_content(&bytes).unwrap();
|
|
@@ -865,13 +865,13 @@ mod tests {
|
|
|
865
865
|
|
|
866
866
|
#[test]
|
|
867
867
|
fn test_extract_7z_empty_archive() {
|
|
868
|
-
use
|
|
868
|
+
use sevenz_rust2::ArchiveWriter;
|
|
869
869
|
|
|
870
|
-
let
|
|
871
|
-
|
|
872
|
-
let sz =
|
|
873
|
-
sz.finish().unwrap()
|
|
874
|
-
}
|
|
870
|
+
let cursor = {
|
|
871
|
+
let cursor = Cursor::new(Vec::new());
|
|
872
|
+
let sz = ArchiveWriter::new(cursor).unwrap();
|
|
873
|
+
sz.finish().unwrap()
|
|
874
|
+
};
|
|
875
875
|
|
|
876
876
|
let bytes = cursor.into_inner();
|
|
877
877
|
let metadata = extract_7z_metadata(&bytes).unwrap();
|
|
@@ -361,6 +361,7 @@ mod tests {
|
|
|
361
361
|
use super::*;
|
|
362
362
|
|
|
363
363
|
#[tokio::test]
|
|
364
|
+
#[cfg(not(target_os = "windows"))]
|
|
364
365
|
async fn test_check_libreoffice_available() {
|
|
365
366
|
let result = check_libreoffice_available().await;
|
|
366
367
|
if result.is_err() {
|
|
@@ -370,6 +371,7 @@ mod tests {
|
|
|
370
371
|
}
|
|
371
372
|
|
|
372
373
|
#[tokio::test]
|
|
374
|
+
#[cfg(not(target_os = "windows"))]
|
|
373
375
|
async fn test_convert_office_doc_missing_file() {
|
|
374
376
|
if check_libreoffice_available().await.is_err() {
|
|
375
377
|
return;
|
|
@@ -391,6 +393,7 @@ mod tests {
|
|
|
391
393
|
}
|
|
392
394
|
|
|
393
395
|
#[tokio::test]
|
|
396
|
+
#[cfg(not(target_os = "windows"))]
|
|
394
397
|
async fn test_convert_doc_to_docx_empty_bytes() {
|
|
395
398
|
if check_libreoffice_available().await.is_err() {
|
|
396
399
|
return;
|
|
@@ -403,6 +406,7 @@ mod tests {
|
|
|
403
406
|
}
|
|
404
407
|
|
|
405
408
|
#[tokio::test]
|
|
409
|
+
#[cfg(not(target_os = "windows"))]
|
|
406
410
|
async fn test_convert_ppt_to_pptx_empty_bytes() {
|
|
407
411
|
if check_libreoffice_available().await.is_err() {
|
|
408
412
|
return;
|
|
@@ -415,6 +419,7 @@ mod tests {
|
|
|
415
419
|
}
|
|
416
420
|
|
|
417
421
|
#[tokio::test]
|
|
422
|
+
#[cfg(not(target_os = "windows"))]
|
|
418
423
|
async fn test_convert_doc_to_docx_invalid_doc() {
|
|
419
424
|
if check_libreoffice_available().await.is_err() {
|
|
420
425
|
return;
|
|
@@ -427,6 +432,7 @@ mod tests {
|
|
|
427
432
|
}
|
|
428
433
|
|
|
429
434
|
#[tokio::test]
|
|
435
|
+
#[cfg(not(target_os = "windows"))]
|
|
430
436
|
async fn test_convert_ppt_to_pptx_invalid_ppt() {
|
|
431
437
|
if check_libreoffice_available().await.is_err() {
|
|
432
438
|
return;
|
|
@@ -439,6 +445,7 @@ mod tests {
|
|
|
439
445
|
}
|
|
440
446
|
|
|
441
447
|
#[tokio::test]
|
|
448
|
+
#[cfg(not(target_os = "windows"))]
|
|
442
449
|
async fn test_convert_office_doc_invalid_target_format() {
|
|
443
450
|
if check_libreoffice_available().await.is_err() {
|
|
444
451
|
return;
|
|
@@ -459,6 +466,7 @@ mod tests {
|
|
|
459
466
|
}
|
|
460
467
|
|
|
461
468
|
#[tokio::test]
|
|
469
|
+
#[cfg(not(target_os = "windows"))]
|
|
462
470
|
async fn test_check_libreoffice_missing_dependency_error() {
|
|
463
471
|
let result = check_libreoffice_available().await;
|
|
464
472
|
|
|
@@ -473,6 +481,7 @@ mod tests {
|
|
|
473
481
|
}
|
|
474
482
|
|
|
475
483
|
#[tokio::test]
|
|
484
|
+
#[cfg(not(target_os = "windows"))]
|
|
476
485
|
async fn test_convert_office_doc_creates_output_dir() {
|
|
477
486
|
if check_libreoffice_available().await.is_err() {
|
|
478
487
|
return;
|
|
@@ -507,6 +516,7 @@ mod tests {
|
|
|
507
516
|
}
|
|
508
517
|
|
|
509
518
|
#[tokio::test]
|
|
519
|
+
#[cfg(not(target_os = "windows"))]
|
|
510
520
|
async fn test_convert_doc_to_docx_temp_cleanup() {
|
|
511
521
|
if check_libreoffice_available().await.is_err() {
|
|
512
522
|
return;
|
|
@@ -517,6 +527,7 @@ mod tests {
|
|
|
517
527
|
}
|
|
518
528
|
|
|
519
529
|
#[tokio::test]
|
|
530
|
+
#[cfg(not(target_os = "windows"))]
|
|
520
531
|
async fn test_convert_ppt_to_pptx_temp_cleanup() {
|
|
521
532
|
if check_libreoffice_available().await.is_err() {
|
|
522
533
|
return;
|
|
@@ -527,6 +538,7 @@ mod tests {
|
|
|
527
538
|
}
|
|
528
539
|
|
|
529
540
|
#[tokio::test]
|
|
541
|
+
#[cfg(not(target_os = "windows"))]
|
|
530
542
|
async fn test_convert_office_doc_timeout_kills_process() {
|
|
531
543
|
if check_libreoffice_available().await.is_err() {
|
|
532
544
|
return;
|
|
@@ -5,6 +5,7 @@ use crate::core::config::ExtractionConfig;
|
|
|
5
5
|
use crate::plugins::{DocumentExtractor, Plugin};
|
|
6
6
|
use crate::types::{ExtractionResult, Metadata, PageContent};
|
|
7
7
|
use async_trait::async_trait;
|
|
8
|
+
#[cfg(feature = "tokio-runtime")]
|
|
8
9
|
use std::path::Path;
|
|
9
10
|
|
|
10
11
|
#[cfg(feature = "pdf")]
|
|
@@ -370,7 +371,7 @@ impl DocumentExtractor for PdfExtractor {
|
|
|
370
371
|
|
|
371
372
|
(pdf_metadata, native_text, tables, page_contents)
|
|
372
373
|
}
|
|
373
|
-
#[cfg(not(target_arch = "wasm32"))]
|
|
374
|
+
#[cfg(all(not(target_arch = "wasm32"), feature = "tokio-runtime"))]
|
|
374
375
|
{
|
|
375
376
|
if crate::core::batch_mode::is_batch_mode() {
|
|
376
377
|
let content_owned = content.to_vec();
|
|
@@ -441,6 +442,32 @@ impl DocumentExtractor for PdfExtractor {
|
|
|
441
442
|
(pdf_metadata, native_text, tables, page_contents)
|
|
442
443
|
}
|
|
443
444
|
}
|
|
445
|
+
#[cfg(all(not(target_arch = "wasm32"), not(feature = "tokio-runtime")))]
|
|
446
|
+
{
|
|
447
|
+
let bindings =
|
|
448
|
+
crate::pdf::bindings::bind_pdfium(PdfError::MetadataExtractionFailed, "initialize Pdfium")?;
|
|
449
|
+
|
|
450
|
+
let pdfium = Pdfium::new(bindings);
|
|
451
|
+
|
|
452
|
+
let document = pdfium.load_pdf_from_byte_slice(content, None).map_err(|e| {
|
|
453
|
+
let err_msg = e.to_string();
|
|
454
|
+
if err_msg.contains("password") || err_msg.contains("Password") {
|
|
455
|
+
PdfError::PasswordRequired
|
|
456
|
+
} else {
|
|
457
|
+
PdfError::InvalidPdf(err_msg)
|
|
458
|
+
}
|
|
459
|
+
})?;
|
|
460
|
+
|
|
461
|
+
let (native_text, boundaries, page_contents) =
|
|
462
|
+
crate::pdf::text::extract_text_from_pdf_document(&document, config.pages.as_ref())?;
|
|
463
|
+
|
|
464
|
+
let pdf_metadata =
|
|
465
|
+
crate::pdf::metadata::extract_metadata_from_document(&document, boundaries.as_deref())?;
|
|
466
|
+
|
|
467
|
+
let tables = extract_tables_from_document(&document, &pdf_metadata)?;
|
|
468
|
+
|
|
469
|
+
(pdf_metadata, native_text, tables, page_contents)
|
|
470
|
+
}
|
|
444
471
|
};
|
|
445
472
|
|
|
446
473
|
#[cfg(feature = "ocr")]
|
|
@@ -5,7 +5,7 @@ pub(crate) fn bind_pdfium(
|
|
|
5
5
|
map_err: fn(String) -> PdfError,
|
|
6
6
|
context: &'static str,
|
|
7
7
|
) -> Result<Box<dyn PdfiumLibraryBindings>, PdfError> {
|
|
8
|
-
#[cfg(all(feature = "pdf", feature = "
|
|
8
|
+
#[cfg(all(feature = "pdf", feature = "bundled-pdfium"))]
|
|
9
9
|
{
|
|
10
10
|
// WASM target: use dynamic binding to WASM module
|
|
11
11
|
// SAFETY: pdfium-render handles WASM module lifecycle internally.
|
|
@@ -36,7 +36,7 @@ pub(crate) fn bind_pdfium(
|
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
#[cfg(all(feature = "pdf", not(feature = "
|
|
39
|
+
#[cfg(all(feature = "pdf", not(feature = "bundled-pdfium")))]
|
|
40
40
|
{
|
|
41
41
|
Pdfium::bind_to_system_library()
|
|
42
42
|
.map_err(|e| map_err(format!("Failed to initialize Pdfium ({}): {}", context, e)))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
//! Runtime extraction of bundled PDFium library.
|
|
2
2
|
//!
|
|
3
|
-
//! When the `
|
|
3
|
+
//! When the `bundled-pdfium` feature is enabled, the PDFium library is embedded in the binary
|
|
4
4
|
//! using `include_bytes!` during compilation. This module handles runtime extraction to a
|
|
5
5
|
//! temporary directory and provides the path for dynamic loading.
|
|
6
6
|
//!
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
//! # Example
|
|
16
16
|
//!
|
|
17
17
|
//! ```rust,ignore
|
|
18
|
-
//! # #[cfg(feature = "
|
|
18
|
+
//! # #[cfg(feature = "bundled-pdfium")]
|
|
19
19
|
//! # {
|
|
20
20
|
//! use kreuzberg::pdf::bundled::extract_bundled_pdfium;
|
|
21
21
|
//!
|
|
@@ -280,7 +280,7 @@ mod tests {
|
|
|
280
280
|
}
|
|
281
281
|
|
|
282
282
|
#[test]
|
|
283
|
-
#[cfg(feature = "
|
|
283
|
+
#[cfg(feature = "bundled-pdfium")]
|
|
284
284
|
fn test_extract_bundled_pdfium() {
|
|
285
285
|
let result = extract_bundled_pdfium();
|
|
286
286
|
assert!(result.is_ok());
|
|
@@ -299,7 +299,7 @@ mod tests {
|
|
|
299
299
|
}
|
|
300
300
|
|
|
301
301
|
#[test]
|
|
302
|
-
#[cfg(feature = "
|
|
302
|
+
#[cfg(feature = "bundled-pdfium")]
|
|
303
303
|
fn test_extract_bundled_pdfium_reuses_existing() {
|
|
304
304
|
// First extraction
|
|
305
305
|
let result1 = extract_bundled_pdfium();
|
|
@@ -326,7 +326,7 @@ mod tests {
|
|
|
326
326
|
|
|
327
327
|
#[test]
|
|
328
328
|
#[cfg(unix)]
|
|
329
|
-
#[cfg(feature = "
|
|
329
|
+
#[cfg(feature = "bundled-pdfium")]
|
|
330
330
|
fn test_extract_bundled_pdfium_permissions() {
|
|
331
331
|
let result = extract_bundled_pdfium();
|
|
332
332
|
assert!(result.is_ok());
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
//! functionality in the PDF extractor for rendering pages to images.
|
|
38
38
|
#[cfg(feature = "pdf")]
|
|
39
39
|
pub(crate) mod bindings;
|
|
40
|
-
#[cfg(all(feature = "pdf", feature = "
|
|
40
|
+
#[cfg(all(feature = "pdf", feature = "bundled-pdfium"))]
|
|
41
41
|
pub mod bundled;
|
|
42
42
|
#[cfg(feature = "pdf")]
|
|
43
43
|
pub mod error;
|
|
@@ -52,7 +52,7 @@ pub mod table;
|
|
|
52
52
|
#[cfg(feature = "pdf")]
|
|
53
53
|
pub mod text;
|
|
54
54
|
|
|
55
|
-
#[cfg(all(feature = "pdf", feature = "
|
|
55
|
+
#[cfg(all(feature = "pdf", feature = "bundled-pdfium"))]
|
|
56
56
|
pub use bundled::extract_bundled_pdfium;
|
|
57
57
|
#[cfg(feature = "pdf")]
|
|
58
58
|
pub use error::PdfError;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg-ffi"
|
|
3
|
-
version = "4.0.0-rc.
|
|
3
|
+
version = "4.0.0-rc.14"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -46,7 +46,7 @@ serde_json = "1.0.145"
|
|
|
46
46
|
serde = { version = "1.0.228", features = ["derive"] }
|
|
47
47
|
async-trait = "0.1.89"
|
|
48
48
|
tokio = { version = "1.48.0", features = ["rt", "rt-multi-thread", "macros", "sync", "process", "fs", "time", "io-util"] }
|
|
49
|
-
html-to-markdown-rs = { version = "2.
|
|
49
|
+
html-to-markdown-rs = { version = "2.15.0", default-features = false }
|
|
50
50
|
|
|
51
51
|
[target.'cfg(not(all(windows, target_env = "gnu")))'.dependencies]
|
|
52
52
|
kreuzberg = { path = "../kreuzberg", features = ["full", "bundled-pdfium"] }
|
|
@@ -54,7 +54,7 @@ serde_json = "1.0.145"
|
|
|
54
54
|
serde = { version = "1.0.228", features = ["derive"] }
|
|
55
55
|
async-trait = "0.1.89"
|
|
56
56
|
tokio = { version = "1.48.0", features = ["rt", "rt-multi-thread", "macros", "sync", "process", "fs", "time", "io-util"] }
|
|
57
|
-
html-to-markdown-rs = { version = "2.
|
|
57
|
+
html-to-markdown-rs = { version = "2.15.0", default-features = false }
|
|
58
58
|
|
|
59
59
|
[build-dependencies]
|
|
60
60
|
cbindgen = "0.29"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg-tesseract"
|
|
3
|
-
version = "4.0.0-rc.
|
|
3
|
+
version = "4.0.0-rc.14"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
@@ -24,13 +24,13 @@ thiserror = "2.0.17"
|
|
|
24
24
|
image = { workspace = true }
|
|
25
25
|
|
|
26
26
|
[build-dependencies]
|
|
27
|
-
cc = { version = "1.2.
|
|
27
|
+
cc = { version = "1.2.50", optional = true }
|
|
28
28
|
cmake = { version = "0.1.57", optional = true }
|
|
29
29
|
reqwest = { version = "0.12.25", default-features = false, features = [
|
|
30
30
|
"blocking",
|
|
31
31
|
"rustls-tls",
|
|
32
32
|
], optional = true }
|
|
33
|
-
zip = { version = "
|
|
33
|
+
zip = { version = "7.0.0", optional = true }
|
|
34
34
|
|
|
35
35
|
[features]
|
|
36
36
|
default = ["static-linking"]
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kreuzberg
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.0.0.pre.rc.
|
|
4
|
+
version: 4.0.0.pre.rc.14
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date: 2025-12-
|
|
10
|
+
date: 2025-12-20 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: bundler
|
|
@@ -237,7 +236,6 @@ files:
|
|
|
237
236
|
- vendor/kreuzberg-ffi/README.md
|
|
238
237
|
- vendor/kreuzberg-ffi/build.rs
|
|
239
238
|
- vendor/kreuzberg-ffi/cbindgen.toml
|
|
240
|
-
- vendor/kreuzberg-ffi/kreuzberg-ffi-install.pc
|
|
241
239
|
- vendor/kreuzberg-ffi/kreuzberg-ffi.pc.in
|
|
242
240
|
- vendor/kreuzberg-ffi/kreuzberg.h
|
|
243
241
|
- vendor/kreuzberg-ffi/src/lib.rs
|
|
@@ -546,14 +544,13 @@ homepage: https://github.com/kreuzberg-dev/kreuzberg
|
|
|
546
544
|
licenses:
|
|
547
545
|
- MIT
|
|
548
546
|
metadata:
|
|
549
|
-
|
|
550
|
-
source_code_uri: https://github.com/kreuzberg-dev/kreuzberg
|
|
547
|
+
bug_tracker_uri: https://github.com/kreuzberg-dev/kreuzberg/issues
|
|
551
548
|
changelog_uri: https://github.com/kreuzberg-dev/kreuzberg/blob/main/CHANGELOG.md
|
|
552
549
|
documentation_uri: https://docs.kreuzberg.dev
|
|
553
|
-
|
|
554
|
-
rubygems_mfa_required: 'true'
|
|
550
|
+
homepage_uri: https://github.com/kreuzberg-dev/kreuzberg
|
|
555
551
|
keywords: document-intelligence,document-extraction,ocr,rust,bindings
|
|
556
|
-
|
|
552
|
+
rubygems_mfa_required: 'true'
|
|
553
|
+
source_code_uri: https://github.com/kreuzberg-dev/kreuzberg
|
|
557
554
|
rdoc_options: []
|
|
558
555
|
require_paths:
|
|
559
556
|
- lib
|
|
@@ -568,8 +565,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
568
565
|
- !ruby/object:Gem::Version
|
|
569
566
|
version: '0'
|
|
570
567
|
requirements: []
|
|
571
|
-
rubygems_version:
|
|
572
|
-
signing_key:
|
|
568
|
+
rubygems_version: 4.0.2
|
|
573
569
|
specification_version: 4
|
|
574
570
|
summary: High-performance document intelligence framework
|
|
575
571
|
test_files: []
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
prefix=/usr/local
|
|
2
|
-
exec_prefix=${prefix}
|
|
3
|
-
libdir=${exec_prefix}/lib
|
|
4
|
-
includedir=${prefix}/include
|
|
5
|
-
|
|
6
|
-
Name: kreuzberg-ffi
|
|
7
|
-
Description: C FFI bindings for Kreuzberg document intelligence library
|
|
8
|
-
Version: 4.0.0-rc.11
|
|
9
|
-
URL: https://kreuzberg.dev
|
|
10
|
-
Libs: -L${libdir} -lkreuzberg_ffi
|
|
11
|
-
Libs.private: -framework CoreFoundation -framework Security -lpthread
|
|
12
|
-
Cflags: -I${includedir}
|