kreuzberg 4.8.2 → 4.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +91 -82
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
- data/ext/kreuzberg_rb/native/src/config/types.rs +1 -0
- data/lib/kreuzberg/version.rb +1 -1
- data/lib/kreuzberg.rb +1 -5
- data/vendor/Cargo.toml +6 -6
- data/vendor/kreuzberg/Cargo.toml +7 -7
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/core/config/mod.rs +1 -1
- data/vendor/kreuzberg/src/core/config/pdf.rs +23 -0
- data/vendor/kreuzberg/src/doc_orientation.rs +1 -1
- data/vendor/kreuzberg/src/embeddings/mod.rs +15 -1
- data/vendor/kreuzberg/src/extraction/derive.rs +15 -3
- data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +18 -1
- data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +140 -0
- data/vendor/kreuzberg/src/extractors/pdf/mod.rs +349 -0
- data/vendor/kreuzberg/src/layout/models/slanet.rs +1 -1
- data/vendor/kreuzberg/src/layout/models/table_classifier.rs +1 -1
- data/vendor/kreuzberg/src/layout/models/tatr.rs +1 -1
- data/vendor/kreuzberg/src/layout/session.rs +1 -1
- data/vendor/kreuzberg/src/lib.rs +1 -1
- data/vendor/kreuzberg/src/pdf/hierarchy/extraction.rs +4 -0
- data/vendor/kreuzberg/src/pdf/mod.rs +2 -0
- data/vendor/kreuzberg/src/pdf/oxide/annotations.rs +258 -0
- data/vendor/kreuzberg/src/pdf/oxide/hierarchy.rs +235 -0
- data/vendor/kreuzberg/src/pdf/oxide/images.rs +53 -0
- data/vendor/kreuzberg/src/pdf/oxide/metadata.rs +381 -0
- data/vendor/kreuzberg/src/pdf/oxide/mod.rs +43 -0
- data/vendor/kreuzberg/src/pdf/oxide/table.rs +243 -0
- data/vendor/kreuzberg/src/pdf/oxide/text.rs +249 -0
- data/vendor/kreuzberg/src/pdf/oxide_text.rs +8 -6
- data/vendor/kreuzberg/src/pdf/structure/adapters.rs +1 -0
- data/vendor/kreuzberg/src/pdf/structure/assembly.rs +1 -0
- data/vendor/kreuzberg/src/pdf/structure/bridge.rs +51 -0
- data/vendor/kreuzberg/src/pdf/structure/classify.rs +3 -0
- data/vendor/kreuzberg/src/pdf/structure/content_convert.rs +3 -0
- data/vendor/kreuzberg/src/pdf/structure/layout_classify.rs +1 -0
- data/vendor/kreuzberg/src/pdf/structure/mod.rs +2 -0
- data/vendor/kreuzberg/src/pdf/structure/paragraphs.rs +2 -0
- data/vendor/kreuzberg/src/pdf/structure/pipeline.rs +240 -1
- data/vendor/kreuzberg/src/pdf/table_reconstruct.rs +1 -0
- data/vendor/kreuzberg/src/pdf/text.rs +1 -1
- data/vendor/kreuzberg/src/rendering/comrak_bridge.rs +46 -2
- data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +5 -1
- data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +2 -1
- data/vendor/kreuzberg-ffi/Cargo.toml +3 -34
- data/vendor/kreuzberg-ffi/kreuzberg.h +2 -2
- data/vendor/kreuzberg-ffi/src/config/merge.rs +0 -3
- data/vendor/kreuzberg-ffi/src/config_builder.rs +0 -6
- data/vendor/kreuzberg-ffi/src/lib.rs +0 -1
- data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
- data/vendor/kreuzberg-paddle-ocr/src/base_net.rs +1 -3
- data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
- metadata +9 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 16deeaa47cb35ded0b844af72d43b74da5539084f21a79d17513be2da9ac2f0b
|
|
4
|
+
data.tar.gz: 64715b14cffac78a796853e9f5d9a2d0969427de9d59a243c87a5d20699dcce3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e362717e5db0fad6a9494737e53c2444a84cb76fd274c70283a6650eef0891e9ced2af424b2ed9501eb749f21fcfb2ca3b4f8c7b336d1a248bb99f4a7e69131e
|
|
7
|
+
data.tar.gz: 5d05d862a170f0efe0f6f6a9867846bb3b000136f638b1efe6ddee5e94310dc92495a40a7dff204b4098f92989175e1691ba10897be884ca960477d48dcbc6ca
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.4" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -699,9 +699,9 @@ dependencies = [
|
|
|
699
699
|
|
|
700
700
|
[[package]]
|
|
701
701
|
name = "cc"
|
|
702
|
-
version = "1.2.
|
|
702
|
+
version = "1.2.60"
|
|
703
703
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
704
|
-
checksum = "
|
|
704
|
+
checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20"
|
|
705
705
|
dependencies = [
|
|
706
706
|
"find-msvc-tools",
|
|
707
707
|
"jobserver",
|
|
@@ -1162,19 +1162,20 @@ dependencies = [
|
|
|
1162
1162
|
|
|
1163
1163
|
[[package]]
|
|
1164
1164
|
name = "ctor"
|
|
1165
|
-
version = "0.
|
|
1165
|
+
version = "0.9.1"
|
|
1166
1166
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1167
|
-
checksum = "
|
|
1167
|
+
checksum = "c1c888a2a4f677017373fb6c01e13e318dd9e78758445ed5eb985e355d3f8281"
|
|
1168
1168
|
dependencies = [
|
|
1169
1169
|
"ctor-proc-macro",
|
|
1170
1170
|
"dtor",
|
|
1171
|
+
"link-section",
|
|
1171
1172
|
]
|
|
1172
1173
|
|
|
1173
1174
|
[[package]]
|
|
1174
1175
|
name = "ctor-proc-macro"
|
|
1175
|
-
version = "0.0.
|
|
1176
|
+
version = "0.0.12"
|
|
1176
1177
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1177
|
-
checksum = "
|
|
1178
|
+
checksum = "a7ab264ea985f1bd27887d7b21ea2bb046728e05d11909ca138d700c494730db"
|
|
1178
1179
|
|
|
1179
1180
|
[[package]]
|
|
1180
1181
|
name = "darling"
|
|
@@ -1416,18 +1417,18 @@ dependencies = [
|
|
|
1416
1417
|
|
|
1417
1418
|
[[package]]
|
|
1418
1419
|
name = "dtor"
|
|
1419
|
-
version = "0.
|
|
1420
|
+
version = "0.6.0"
|
|
1420
1421
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1421
|
-
checksum = "
|
|
1422
|
+
checksum = "30e4690622ab6700ced40fc370a3f07b7d111f0154bb6fb08f73b4c8834f75b6"
|
|
1422
1423
|
dependencies = [
|
|
1423
1424
|
"dtor-proc-macro",
|
|
1424
1425
|
]
|
|
1425
1426
|
|
|
1426
1427
|
[[package]]
|
|
1427
1428
|
name = "dtor-proc-macro"
|
|
1428
|
-
version = "0.0.
|
|
1429
|
+
version = "0.0.12"
|
|
1429
1430
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1430
|
-
checksum = "
|
|
1431
|
+
checksum = "8c98b077c7463d01d22dde8a24378ddf1ca7263dc687cffbed38819ea6c21131"
|
|
1431
1432
|
|
|
1432
1433
|
[[package]]
|
|
1433
1434
|
name = "dunce"
|
|
@@ -1600,12 +1601,9 @@ dependencies = [
|
|
|
1600
1601
|
|
|
1601
1602
|
[[package]]
|
|
1602
1603
|
name = "fearless_simd"
|
|
1603
|
-
version = "0.
|
|
1604
|
+
version = "0.4.0"
|
|
1604
1605
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1605
|
-
checksum = "
|
|
1606
|
-
dependencies = [
|
|
1607
|
-
"bytemuck",
|
|
1608
|
-
]
|
|
1606
|
+
checksum = "76258897e51fd156ee03b6246ea53f3e0eb395d0b327e9961c4fc4c8b2fa151a"
|
|
1609
1607
|
|
|
1610
1608
|
[[package]]
|
|
1611
1609
|
name = "filetime"
|
|
@@ -1868,9 +1866,9 @@ dependencies = [
|
|
|
1868
1866
|
|
|
1869
1867
|
[[package]]
|
|
1870
1868
|
name = "gif"
|
|
1871
|
-
version = "0.14.
|
|
1869
|
+
version = "0.14.2"
|
|
1872
1870
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1873
|
-
checksum = "
|
|
1871
|
+
checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159"
|
|
1874
1872
|
dependencies = [
|
|
1875
1873
|
"color_quant",
|
|
1876
1874
|
"weezl",
|
|
@@ -2029,6 +2027,12 @@ dependencies = [
|
|
|
2029
2027
|
"foldhash 0.2.0",
|
|
2030
2028
|
]
|
|
2031
2029
|
|
|
2030
|
+
[[package]]
|
|
2031
|
+
name = "hashbrown"
|
|
2032
|
+
version = "0.17.0"
|
|
2033
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2034
|
+
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
|
|
2035
|
+
|
|
2032
2036
|
[[package]]
|
|
2033
2037
|
name = "hashify"
|
|
2034
2038
|
version = "0.2.9"
|
|
@@ -2058,9 +2062,9 @@ dependencies = [
|
|
|
2058
2062
|
|
|
2059
2063
|
[[package]]
|
|
2060
2064
|
name = "hayro-jpeg2000"
|
|
2061
|
-
version = "0.3.
|
|
2065
|
+
version = "0.3.5"
|
|
2062
2066
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2063
|
-
checksum = "
|
|
2067
|
+
checksum = "c75ab947623ef4ccaa7acf0579edf7cbb5a73838e3839a7be73335e522f433a1"
|
|
2064
2068
|
dependencies = [
|
|
2065
2069
|
"fearless_simd",
|
|
2066
2070
|
]
|
|
@@ -2094,7 +2098,7 @@ dependencies = [
|
|
|
2094
2098
|
"indicatif 0.17.11",
|
|
2095
2099
|
"libc",
|
|
2096
2100
|
"log",
|
|
2097
|
-
"rand 0.9.
|
|
2101
|
+
"rand 0.9.3",
|
|
2098
2102
|
"serde",
|
|
2099
2103
|
"serde_json",
|
|
2100
2104
|
"thiserror 2.0.18",
|
|
@@ -2113,7 +2117,7 @@ dependencies = [
|
|
|
2113
2117
|
"indicatif 0.18.4",
|
|
2114
2118
|
"libc",
|
|
2115
2119
|
"log",
|
|
2116
|
-
"rand 0.9.
|
|
2120
|
+
"rand 0.9.3",
|
|
2117
2121
|
"serde",
|
|
2118
2122
|
"serde_json",
|
|
2119
2123
|
"thiserror 2.0.18",
|
|
@@ -2244,15 +2248,14 @@ dependencies = [
|
|
|
2244
2248
|
|
|
2245
2249
|
[[package]]
|
|
2246
2250
|
name = "hyper-rustls"
|
|
2247
|
-
version = "0.27.
|
|
2251
|
+
version = "0.27.8"
|
|
2248
2252
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2249
|
-
checksum = "
|
|
2253
|
+
checksum = "c2b52f86d1d4bc0d6b4e6826d960b1b333217e07d36b882dca570a5e1c48895b"
|
|
2250
2254
|
dependencies = [
|
|
2251
2255
|
"http",
|
|
2252
2256
|
"hyper",
|
|
2253
2257
|
"hyper-util",
|
|
2254
2258
|
"rustls",
|
|
2255
|
-
"rustls-pki-types",
|
|
2256
2259
|
"tokio",
|
|
2257
2260
|
"tokio-rustls",
|
|
2258
2261
|
"tower-service",
|
|
@@ -2526,7 +2529,7 @@ dependencies = [
|
|
|
2526
2529
|
"itertools 0.14.0",
|
|
2527
2530
|
"nalgebra",
|
|
2528
2531
|
"num",
|
|
2529
|
-
"rand 0.9.
|
|
2532
|
+
"rand 0.9.3",
|
|
2530
2533
|
"rand_distr",
|
|
2531
2534
|
]
|
|
2532
2535
|
|
|
@@ -2538,12 +2541,12 @@ checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8"
|
|
|
2538
2541
|
|
|
2539
2542
|
[[package]]
|
|
2540
2543
|
name = "indexmap"
|
|
2541
|
-
version = "2.
|
|
2544
|
+
version = "2.14.0"
|
|
2542
2545
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2543
|
-
checksum = "
|
|
2546
|
+
checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
|
|
2544
2547
|
dependencies = [
|
|
2545
2548
|
"equivalent",
|
|
2546
|
-
"hashbrown 0.
|
|
2549
|
+
"hashbrown 0.17.0",
|
|
2547
2550
|
"serde",
|
|
2548
2551
|
"serde_core",
|
|
2549
2552
|
]
|
|
@@ -2759,9 +2762,9 @@ checksum = "086b08ec7a274cd60cd575ed3651ba081ee72dec0d39a6210e8adcff9efe3880"
|
|
|
2759
2762
|
|
|
2760
2763
|
[[package]]
|
|
2761
2764
|
name = "js-sys"
|
|
2762
|
-
version = "0.3.
|
|
2765
|
+
version = "0.3.95"
|
|
2763
2766
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2764
|
-
checksum = "
|
|
2767
|
+
checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca"
|
|
2765
2768
|
dependencies = [
|
|
2766
2769
|
"cfg-if",
|
|
2767
2770
|
"futures-util",
|
|
@@ -2780,7 +2783,7 @@ dependencies = [
|
|
|
2780
2783
|
|
|
2781
2784
|
[[package]]
|
|
2782
2785
|
name = "kreuzberg"
|
|
2783
|
-
version = "4.8.
|
|
2786
|
+
version = "4.8.3"
|
|
2784
2787
|
dependencies = [
|
|
2785
2788
|
"ahash",
|
|
2786
2789
|
"async-trait",
|
|
@@ -2875,7 +2878,7 @@ dependencies = [
|
|
|
2875
2878
|
|
|
2876
2879
|
[[package]]
|
|
2877
2880
|
name = "kreuzberg-ffi"
|
|
2878
|
-
version = "4.8.
|
|
2881
|
+
version = "4.8.3"
|
|
2879
2882
|
dependencies = [
|
|
2880
2883
|
"ahash",
|
|
2881
2884
|
"async-trait",
|
|
@@ -2891,7 +2894,7 @@ dependencies = [
|
|
|
2891
2894
|
|
|
2892
2895
|
[[package]]
|
|
2893
2896
|
name = "kreuzberg-paddle-ocr"
|
|
2894
|
-
version = "4.8.
|
|
2897
|
+
version = "4.8.3"
|
|
2895
2898
|
dependencies = [
|
|
2896
2899
|
"geo-clipper",
|
|
2897
2900
|
"geo-types",
|
|
@@ -2905,7 +2908,7 @@ dependencies = [
|
|
|
2905
2908
|
|
|
2906
2909
|
[[package]]
|
|
2907
2910
|
name = "kreuzberg-pdfium-render"
|
|
2908
|
-
version = "4.8.
|
|
2911
|
+
version = "4.8.3"
|
|
2909
2912
|
dependencies = [
|
|
2910
2913
|
"bitflags",
|
|
2911
2914
|
"bytemuck",
|
|
@@ -2928,7 +2931,7 @@ dependencies = [
|
|
|
2928
2931
|
|
|
2929
2932
|
[[package]]
|
|
2930
2933
|
name = "kreuzberg-rb"
|
|
2931
|
-
version = "4.8.
|
|
2934
|
+
version = "4.8.4"
|
|
2932
2935
|
dependencies = [
|
|
2933
2936
|
"async-trait",
|
|
2934
2937
|
"html-to-markdown-rs",
|
|
@@ -2945,7 +2948,7 @@ dependencies = [
|
|
|
2945
2948
|
|
|
2946
2949
|
[[package]]
|
|
2947
2950
|
name = "kreuzberg-tesseract"
|
|
2948
|
-
version = "4.8.
|
|
2951
|
+
version = "4.8.3"
|
|
2949
2952
|
dependencies = [
|
|
2950
2953
|
"cc",
|
|
2951
2954
|
"cmake",
|
|
@@ -2980,9 +2983,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
|
|
|
2980
2983
|
|
|
2981
2984
|
[[package]]
|
|
2982
2985
|
name = "libc"
|
|
2983
|
-
version = "0.2.
|
|
2986
|
+
version = "0.2.185"
|
|
2984
2987
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2985
|
-
checksum = "
|
|
2988
|
+
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
|
|
2986
2989
|
|
|
2987
2990
|
[[package]]
|
|
2988
2991
|
name = "libfuzzer-sys"
|
|
@@ -3022,16 +3025,22 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
|
|
|
3022
3025
|
|
|
3023
3026
|
[[package]]
|
|
3024
3027
|
name = "libredox"
|
|
3025
|
-
version = "0.1.
|
|
3028
|
+
version = "0.1.16"
|
|
3026
3029
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3027
|
-
checksum = "
|
|
3030
|
+
checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
|
|
3028
3031
|
dependencies = [
|
|
3029
3032
|
"bitflags",
|
|
3030
3033
|
"libc",
|
|
3031
3034
|
"plain",
|
|
3032
|
-
"redox_syscall 0.7.
|
|
3035
|
+
"redox_syscall 0.7.4",
|
|
3033
3036
|
]
|
|
3034
3037
|
|
|
3038
|
+
[[package]]
|
|
3039
|
+
name = "link-section"
|
|
3040
|
+
version = "0.0.12"
|
|
3041
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3042
|
+
checksum = "f52437d47b0358721ec869cc7374b2a21f7b2237af9b439c0391341a1fbfbf1b"
|
|
3043
|
+
|
|
3035
3044
|
[[package]]
|
|
3036
3045
|
name = "linux-raw-sys"
|
|
3037
3046
|
version = "0.12.1"
|
|
@@ -3116,7 +3125,7 @@ dependencies = [
|
|
|
3116
3125
|
"md-5",
|
|
3117
3126
|
"nom 8.0.0",
|
|
3118
3127
|
"nom_locate",
|
|
3119
|
-
"rand 0.10.
|
|
3128
|
+
"rand 0.10.1",
|
|
3120
3129
|
"rangemap",
|
|
3121
3130
|
"rayon",
|
|
3122
3131
|
"sha2 0.10.9",
|
|
@@ -3631,9 +3640,9 @@ dependencies = [
|
|
|
3631
3640
|
|
|
3632
3641
|
[[package]]
|
|
3633
3642
|
name = "openssl"
|
|
3634
|
-
version = "0.10.
|
|
3643
|
+
version = "0.10.77"
|
|
3635
3644
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3636
|
-
checksum = "
|
|
3645
|
+
checksum = "bfe4646e360ec77dff7dde40ed3d6c5fee52d156ef4a62f53973d38294dad87f"
|
|
3637
3646
|
dependencies = [
|
|
3638
3647
|
"bitflags",
|
|
3639
3648
|
"cfg-if",
|
|
@@ -3663,9 +3672,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
|
|
|
3663
3672
|
|
|
3664
3673
|
[[package]]
|
|
3665
3674
|
name = "openssl-sys"
|
|
3666
|
-
version = "0.9.
|
|
3675
|
+
version = "0.9.113"
|
|
3667
3676
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3668
|
-
checksum = "
|
|
3677
|
+
checksum = "ad2f2c0eba47118757e4c6d2bff2838f3e0523380021356e7875e858372ce644"
|
|
3669
3678
|
dependencies = [
|
|
3670
3679
|
"cc",
|
|
3671
3680
|
"libc",
|
|
@@ -3698,7 +3707,7 @@ dependencies = [
|
|
|
3698
3707
|
"futures-util",
|
|
3699
3708
|
"opentelemetry",
|
|
3700
3709
|
"percent-encoding",
|
|
3701
|
-
"rand 0.9.
|
|
3710
|
+
"rand 0.9.3",
|
|
3702
3711
|
"thiserror 2.0.18",
|
|
3703
3712
|
"tokio",
|
|
3704
3713
|
"tokio-stream",
|
|
@@ -3860,9 +3869,9 @@ checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590"
|
|
|
3860
3869
|
|
|
3861
3870
|
[[package]]
|
|
3862
3871
|
name = "pkg-config"
|
|
3863
|
-
version = "0.3.
|
|
3872
|
+
version = "0.3.33"
|
|
3864
3873
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3865
|
-
checksum = "
|
|
3874
|
+
checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
|
|
3866
3875
|
|
|
3867
3876
|
[[package]]
|
|
3868
3877
|
name = "plain"
|
|
@@ -4065,7 +4074,7 @@ dependencies = [
|
|
|
4065
4074
|
"bytes",
|
|
4066
4075
|
"getrandom 0.3.4",
|
|
4067
4076
|
"lru-slab",
|
|
4068
|
-
"rand 0.9.
|
|
4077
|
+
"rand 0.9.3",
|
|
4069
4078
|
"ring",
|
|
4070
4079
|
"rustc-hash",
|
|
4071
4080
|
"rustls",
|
|
@@ -4131,9 +4140,9 @@ dependencies = [
|
|
|
4131
4140
|
|
|
4132
4141
|
[[package]]
|
|
4133
4142
|
name = "rand"
|
|
4134
|
-
version = "0.9.
|
|
4143
|
+
version = "0.9.3"
|
|
4135
4144
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4136
|
-
checksum = "
|
|
4145
|
+
checksum = "7ec095654a25171c2124e9e3393a930bddbffdc939556c914957a4c3e0a87166"
|
|
4137
4146
|
dependencies = [
|
|
4138
4147
|
"rand_chacha",
|
|
4139
4148
|
"rand_core 0.9.5",
|
|
@@ -4141,9 +4150,9 @@ dependencies = [
|
|
|
4141
4150
|
|
|
4142
4151
|
[[package]]
|
|
4143
4152
|
name = "rand"
|
|
4144
|
-
version = "0.10.
|
|
4153
|
+
version = "0.10.1"
|
|
4145
4154
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4146
|
-
checksum = "
|
|
4155
|
+
checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
|
|
4147
4156
|
dependencies = [
|
|
4148
4157
|
"chacha20",
|
|
4149
4158
|
"getrandom 0.4.2",
|
|
@@ -4182,7 +4191,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
4182
4191
|
checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
|
|
4183
4192
|
dependencies = [
|
|
4184
4193
|
"num-traits",
|
|
4185
|
-
"rand 0.9.
|
|
4194
|
+
"rand 0.9.3",
|
|
4186
4195
|
]
|
|
4187
4196
|
|
|
4188
4197
|
[[package]]
|
|
@@ -4218,7 +4227,7 @@ dependencies = [
|
|
|
4218
4227
|
"num-traits",
|
|
4219
4228
|
"paste",
|
|
4220
4229
|
"profiling",
|
|
4221
|
-
"rand 0.9.
|
|
4230
|
+
"rand 0.9.3",
|
|
4222
4231
|
"rand_chacha",
|
|
4223
4232
|
"simd_helpers",
|
|
4224
4233
|
"thiserror 2.0.18",
|
|
@@ -4319,9 +4328,9 @@ dependencies = [
|
|
|
4319
4328
|
|
|
4320
4329
|
[[package]]
|
|
4321
4330
|
name = "redox_syscall"
|
|
4322
|
-
version = "0.7.
|
|
4331
|
+
version = "0.7.4"
|
|
4323
4332
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4324
|
-
checksum = "
|
|
4333
|
+
checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
|
|
4325
4334
|
dependencies = [
|
|
4326
4335
|
"bitflags",
|
|
4327
4336
|
]
|
|
@@ -4454,9 +4463,9 @@ dependencies = [
|
|
|
4454
4463
|
|
|
4455
4464
|
[[package]]
|
|
4456
4465
|
name = "rmcp"
|
|
4457
|
-
version = "1.
|
|
4466
|
+
version = "1.4.0"
|
|
4458
4467
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4459
|
-
checksum = "
|
|
4468
|
+
checksum = "f542f74cf247da16f19bbc87e298cd201e912314f4083e88cdd671f44f5fcb53"
|
|
4460
4469
|
dependencies = [
|
|
4461
4470
|
"async-trait",
|
|
4462
4471
|
"base64 0.22.1",
|
|
@@ -4468,7 +4477,7 @@ dependencies = [
|
|
|
4468
4477
|
"http-body-util",
|
|
4469
4478
|
"pastey 0.2.1",
|
|
4470
4479
|
"pin-project-lite",
|
|
4471
|
-
"rand 0.10.
|
|
4480
|
+
"rand 0.10.1",
|
|
4472
4481
|
"rmcp-macros",
|
|
4473
4482
|
"schemars",
|
|
4474
4483
|
"serde",
|
|
@@ -4485,9 +4494,9 @@ dependencies = [
|
|
|
4485
4494
|
|
|
4486
4495
|
[[package]]
|
|
4487
4496
|
name = "rmcp-macros"
|
|
4488
|
-
version = "1.
|
|
4497
|
+
version = "1.4.0"
|
|
4489
4498
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4490
|
-
checksum = "
|
|
4499
|
+
checksum = "b2391e4ae47f314e70eaafb6c7bd82e495e770b935448864446302143019151f"
|
|
4491
4500
|
dependencies = [
|
|
4492
4501
|
"darling 0.23.0",
|
|
4493
4502
|
"proc-macro2",
|
|
@@ -4551,9 +4560,9 @@ dependencies = [
|
|
|
4551
4560
|
|
|
4552
4561
|
[[package]]
|
|
4553
4562
|
name = "rustls"
|
|
4554
|
-
version = "0.23.
|
|
4563
|
+
version = "0.23.38"
|
|
4555
4564
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4556
|
-
checksum = "
|
|
4565
|
+
checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21"
|
|
4557
4566
|
dependencies = [
|
|
4558
4567
|
"aws-lc-rs",
|
|
4559
4568
|
"log",
|
|
@@ -4616,9 +4625,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
|
|
|
4616
4625
|
|
|
4617
4626
|
[[package]]
|
|
4618
4627
|
name = "rustls-webpki"
|
|
4619
|
-
version = "0.103.
|
|
4628
|
+
version = "0.103.11"
|
|
4620
4629
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4621
|
-
checksum = "
|
|
4630
|
+
checksum = "20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4"
|
|
4622
4631
|
dependencies = [
|
|
4623
4632
|
"aws-lc-rs",
|
|
4624
4633
|
"ring",
|
|
@@ -5032,9 +5041,9 @@ dependencies = [
|
|
|
5032
5041
|
|
|
5033
5042
|
[[package]]
|
|
5034
5043
|
name = "sse-stream"
|
|
5035
|
-
version = "0.2.
|
|
5044
|
+
version = "0.2.2"
|
|
5036
5045
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5037
|
-
checksum = "
|
|
5046
|
+
checksum = "2c5e6deb40826033bd7b11c7ef25ef71193fabd71f680f40dd16538a2704d2f4"
|
|
5038
5047
|
dependencies = [
|
|
5039
5048
|
"bytes",
|
|
5040
5049
|
"futures-util",
|
|
@@ -5369,7 +5378,7 @@ dependencies = [
|
|
|
5369
5378
|
"monostate",
|
|
5370
5379
|
"onig",
|
|
5371
5380
|
"paste",
|
|
5372
|
-
"rand 0.9.
|
|
5381
|
+
"rand 0.9.3",
|
|
5373
5382
|
"rayon",
|
|
5374
5383
|
"rayon-cond",
|
|
5375
5384
|
"regex",
|
|
@@ -6040,9 +6049,9 @@ dependencies = [
|
|
|
6040
6049
|
|
|
6041
6050
|
[[package]]
|
|
6042
6051
|
name = "wasm-bindgen"
|
|
6043
|
-
version = "0.2.
|
|
6052
|
+
version = "0.2.118"
|
|
6044
6053
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6045
|
-
checksum = "
|
|
6054
|
+
checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89"
|
|
6046
6055
|
dependencies = [
|
|
6047
6056
|
"cfg-if",
|
|
6048
6057
|
"once_cell",
|
|
@@ -6053,9 +6062,9 @@ dependencies = [
|
|
|
6053
6062
|
|
|
6054
6063
|
[[package]]
|
|
6055
6064
|
name = "wasm-bindgen-futures"
|
|
6056
|
-
version = "0.4.
|
|
6065
|
+
version = "0.4.68"
|
|
6057
6066
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6058
|
-
checksum = "
|
|
6067
|
+
checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8"
|
|
6059
6068
|
dependencies = [
|
|
6060
6069
|
"js-sys",
|
|
6061
6070
|
"wasm-bindgen",
|
|
@@ -6063,9 +6072,9 @@ dependencies = [
|
|
|
6063
6072
|
|
|
6064
6073
|
[[package]]
|
|
6065
6074
|
name = "wasm-bindgen-macro"
|
|
6066
|
-
version = "0.2.
|
|
6075
|
+
version = "0.2.118"
|
|
6067
6076
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6068
|
-
checksum = "
|
|
6077
|
+
checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed"
|
|
6069
6078
|
dependencies = [
|
|
6070
6079
|
"quote",
|
|
6071
6080
|
"wasm-bindgen-macro-support",
|
|
@@ -6073,9 +6082,9 @@ dependencies = [
|
|
|
6073
6082
|
|
|
6074
6083
|
[[package]]
|
|
6075
6084
|
name = "wasm-bindgen-macro-support"
|
|
6076
|
-
version = "0.2.
|
|
6085
|
+
version = "0.2.118"
|
|
6077
6086
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6078
|
-
checksum = "
|
|
6087
|
+
checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904"
|
|
6079
6088
|
dependencies = [
|
|
6080
6089
|
"bumpalo",
|
|
6081
6090
|
"proc-macro2",
|
|
@@ -6086,9 +6095,9 @@ dependencies = [
|
|
|
6086
6095
|
|
|
6087
6096
|
[[package]]
|
|
6088
6097
|
name = "wasm-bindgen-shared"
|
|
6089
|
-
version = "0.2.
|
|
6098
|
+
version = "0.2.118"
|
|
6090
6099
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6091
|
-
checksum = "
|
|
6100
|
+
checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129"
|
|
6092
6101
|
dependencies = [
|
|
6093
6102
|
"unicode-ident",
|
|
6094
6103
|
]
|
|
@@ -6142,9 +6151,9 @@ dependencies = [
|
|
|
6142
6151
|
|
|
6143
6152
|
[[package]]
|
|
6144
6153
|
name = "web-sys"
|
|
6145
|
-
version = "0.3.
|
|
6154
|
+
version = "0.3.95"
|
|
6146
6155
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6147
|
-
checksum = "
|
|
6156
|
+
checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d"
|
|
6148
6157
|
dependencies = [
|
|
6149
6158
|
"js-sys",
|
|
6150
6159
|
"wasm-bindgen",
|
data/lib/kreuzberg/version.rb
CHANGED
data/lib/kreuzberg.rb
CHANGED
|
@@ -37,15 +37,11 @@ module Kreuzberg
|
|
|
37
37
|
autoload :DocumentStructure, 'kreuzberg/types'
|
|
38
38
|
autoload :PdfAnnotation, 'kreuzberg/types'
|
|
39
39
|
autoload :PdfAnnotationBoundingBox, 'kreuzberg/types'
|
|
40
|
+
autoload :KeywordAlgorithm, 'kreuzberg/types'
|
|
40
41
|
|
|
41
42
|
ExtractionConfig = Config::Extraction
|
|
42
43
|
PageConfig = Config::PageConfig
|
|
43
44
|
|
|
44
|
-
module KeywordAlgorithm
|
|
45
|
-
YAKE = :yake
|
|
46
|
-
RAKE = :rake
|
|
47
|
-
end
|
|
48
|
-
|
|
49
45
|
@__cache_tracker = { entries: 0, bytes: 0 }
|
|
50
46
|
|
|
51
47
|
class << self
|
data/vendor/Cargo.toml
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
members = ["kreuzberg", "kreuzberg-ffi", "kreuzberg-tesseract", "kreuzberg-paddle-ocr", "kreuzberg-pdfium-render"]
|
|
3
3
|
|
|
4
4
|
[workspace.package]
|
|
5
|
-
version = "4.8.
|
|
5
|
+
version = "4.8.4"
|
|
6
6
|
edition = "2024"
|
|
7
7
|
rust-version = "1.91"
|
|
8
8
|
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
|
|
@@ -23,7 +23,7 @@ clap = { version = "4.6", features = ["derive", "color", "suggestions"] }
|
|
|
23
23
|
comrak = { version = "0.52", default-features = false }
|
|
24
24
|
console_error_panic_hook = "0.1"
|
|
25
25
|
criterion = { version = "0.8", features = ["html_reports"] }
|
|
26
|
-
ctor = "0.
|
|
26
|
+
ctor = "0.10"
|
|
27
27
|
dbase = "0.7"
|
|
28
28
|
futures = "0.3"
|
|
29
29
|
getrandom = { version = "0.4.2", features = ["wasm_js"] }
|
|
@@ -32,10 +32,10 @@ html-to-markdown-rs = { version = "3.1.0", default-features = false }
|
|
|
32
32
|
image = { version = "0.25.10", default-features = false }
|
|
33
33
|
itertools = "0.14"
|
|
34
34
|
js-sys = "0.3"
|
|
35
|
-
kreuzberg = { path = "./crates/kreuzberg", version = "4.8.
|
|
36
|
-
kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.8.
|
|
35
|
+
kreuzberg = { path = "./crates/kreuzberg", version = "4.8.4", default-features = false }
|
|
36
|
+
kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.8.4" }
|
|
37
37
|
lazy_static = "1.5.0"
|
|
38
|
-
libc = "0.2.
|
|
38
|
+
libc = "0.2.185"
|
|
39
39
|
liter-llm = { version = "1.2", features = ["native-http", "tracing"], default-features = false }
|
|
40
40
|
log = "0.4"
|
|
41
41
|
lzma-rust2 = { version = "0.16.2" }
|
|
@@ -45,7 +45,7 @@ num_cpus = "1.17.0"
|
|
|
45
45
|
once_cell = "1.21.4"
|
|
46
46
|
ort = { version = "2.0.0-rc.12", features = ["std", "api-18"], default-features = false }
|
|
47
47
|
parking_lot = "0.12.5"
|
|
48
|
-
pdf_oxide = { version = "0.3.
|
|
48
|
+
pdf_oxide = { version = "0.3.30", default-features = false }
|
|
49
49
|
pdfium-render = { package = "kreuzberg-pdfium-render", path = "crates/kreuzberg-pdfium-render", version = "4.3" }
|
|
50
50
|
rayon = "1.11.0"
|
|
51
51
|
reqwest = { version = "0.13.2", default-features = false }
|