kreuzberg 4.8.2 → 4.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/kreuzberg_rb/native/Cargo.lock +91 -82
  4. data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
  5. data/ext/kreuzberg_rb/native/src/config/types.rs +1 -0
  6. data/lib/kreuzberg/version.rb +1 -1
  7. data/lib/kreuzberg.rb +1 -5
  8. data/vendor/Cargo.toml +6 -6
  9. data/vendor/kreuzberg/Cargo.toml +7 -7
  10. data/vendor/kreuzberg/README.md +1 -1
  11. data/vendor/kreuzberg/src/core/config/mod.rs +1 -1
  12. data/vendor/kreuzberg/src/core/config/pdf.rs +23 -0
  13. data/vendor/kreuzberg/src/doc_orientation.rs +1 -1
  14. data/vendor/kreuzberg/src/embeddings/mod.rs +15 -1
  15. data/vendor/kreuzberg/src/extraction/derive.rs +15 -3
  16. data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +18 -1
  17. data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +140 -0
  18. data/vendor/kreuzberg/src/extractors/pdf/mod.rs +349 -0
  19. data/vendor/kreuzberg/src/layout/models/slanet.rs +1 -1
  20. data/vendor/kreuzberg/src/layout/models/table_classifier.rs +1 -1
  21. data/vendor/kreuzberg/src/layout/models/tatr.rs +1 -1
  22. data/vendor/kreuzberg/src/layout/session.rs +1 -1
  23. data/vendor/kreuzberg/src/lib.rs +1 -1
  24. data/vendor/kreuzberg/src/pdf/hierarchy/extraction.rs +4 -0
  25. data/vendor/kreuzberg/src/pdf/mod.rs +2 -0
  26. data/vendor/kreuzberg/src/pdf/oxide/annotations.rs +258 -0
  27. data/vendor/kreuzberg/src/pdf/oxide/hierarchy.rs +235 -0
  28. data/vendor/kreuzberg/src/pdf/oxide/images.rs +53 -0
  29. data/vendor/kreuzberg/src/pdf/oxide/metadata.rs +381 -0
  30. data/vendor/kreuzberg/src/pdf/oxide/mod.rs +43 -0
  31. data/vendor/kreuzberg/src/pdf/oxide/table.rs +243 -0
  32. data/vendor/kreuzberg/src/pdf/oxide/text.rs +249 -0
  33. data/vendor/kreuzberg/src/pdf/oxide_text.rs +8 -6
  34. data/vendor/kreuzberg/src/pdf/structure/adapters.rs +1 -0
  35. data/vendor/kreuzberg/src/pdf/structure/assembly.rs +1 -0
  36. data/vendor/kreuzberg/src/pdf/structure/bridge.rs +51 -0
  37. data/vendor/kreuzberg/src/pdf/structure/classify.rs +3 -0
  38. data/vendor/kreuzberg/src/pdf/structure/content_convert.rs +3 -0
  39. data/vendor/kreuzberg/src/pdf/structure/layout_classify.rs +1 -0
  40. data/vendor/kreuzberg/src/pdf/structure/mod.rs +2 -0
  41. data/vendor/kreuzberg/src/pdf/structure/paragraphs.rs +2 -0
  42. data/vendor/kreuzberg/src/pdf/structure/pipeline.rs +240 -1
  43. data/vendor/kreuzberg/src/pdf/table_reconstruct.rs +1 -0
  44. data/vendor/kreuzberg/src/pdf/text.rs +1 -1
  45. data/vendor/kreuzberg/src/rendering/comrak_bridge.rs +46 -2
  46. data/vendor/kreuzberg/tests/pdf_hierarchy_detection.rs +5 -1
  47. data/vendor/kreuzberg/tests/pdf_ocr_triggering.rs +2 -1
  48. data/vendor/kreuzberg-ffi/Cargo.toml +3 -34
  49. data/vendor/kreuzberg-ffi/kreuzberg.h +2 -2
  50. data/vendor/kreuzberg-ffi/src/config/merge.rs +0 -3
  51. data/vendor/kreuzberg-ffi/src/config_builder.rs +0 -6
  52. data/vendor/kreuzberg-ffi/src/lib.rs +0 -1
  53. data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
  54. data/vendor/kreuzberg-paddle-ocr/src/base_net.rs +1 -3
  55. data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
  56. data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
  57. metadata +9 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ec647109dee8229fd50ce9fe4d87f13bbb769779b22b4862cac321052610d6c
4
- data.tar.gz: 18c3cf6df8e339f4286da7fe9b8da84185fa93abf89fcbf339b259c0095a9a5c
3
+ metadata.gz: 16deeaa47cb35ded0b844af72d43b74da5539084f21a79d17513be2da9ac2f0b
4
+ data.tar.gz: 64715b14cffac78a796853e9f5d9a2d0969427de9d59a243c87a5d20699dcce3
5
5
  SHA512:
6
- metadata.gz: f8c0ab16048bdb9026b55ff15f3ae342af9178a2ab6b1fd85777c3271da35e46474bee025af429745d20584604d520d85a4c8e4ea96bffc49ef5dfca55471b6f
7
- data.tar.gz: 46ec0ff10138bd48d7b9ffada23956cd557616d8fdf09090f16562d4daab04381176b2881e08bfdfc1f726a0061e1e25feefa695620fef530444662abb866605
6
+ metadata.gz: e362717e5db0fad6a9494737e53c2444a84cb76fd274c70283a6650eef0891e9ced2af424b2ed9501eb749f21fcfb2ca3b4f8c7b336d1a248bb99f4a7e69131e
7
+ data.tar.gz: 5d05d862a170f0efe0f6f6a9867846bb3b000136f638b1efe6ddee5e94310dc92495a40a7dff204b4098f92989175e1691ba10897be884ca960477d48dcbc6ca
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.2" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.8.4" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -699,9 +699,9 @@ dependencies = [
699
699
 
700
700
  [[package]]
701
701
  name = "cc"
702
- version = "1.2.59"
702
+ version = "1.2.60"
703
703
  source = "registry+https://github.com/rust-lang/crates.io-index"
704
- checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283"
704
+ checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20"
705
705
  dependencies = [
706
706
  "find-msvc-tools",
707
707
  "jobserver",
@@ -1162,19 +1162,20 @@ dependencies = [
1162
1162
 
1163
1163
  [[package]]
1164
1164
  name = "ctor"
1165
- version = "0.8.0"
1165
+ version = "0.9.1"
1166
1166
  source = "registry+https://github.com/rust-lang/crates.io-index"
1167
- checksum = "352d39c2f7bef1d6ad73db6f5160efcaed66d94ef8c6c573a8410c00bf909a98"
1167
+ checksum = "c1c888a2a4f677017373fb6c01e13e318dd9e78758445ed5eb985e355d3f8281"
1168
1168
  dependencies = [
1169
1169
  "ctor-proc-macro",
1170
1170
  "dtor",
1171
+ "link-section",
1171
1172
  ]
1172
1173
 
1173
1174
  [[package]]
1174
1175
  name = "ctor-proc-macro"
1175
- version = "0.0.7"
1176
+ version = "0.0.12"
1176
1177
  source = "registry+https://github.com/rust-lang/crates.io-index"
1177
- checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1"
1178
+ checksum = "a7ab264ea985f1bd27887d7b21ea2bb046728e05d11909ca138d700c494730db"
1178
1179
 
1179
1180
  [[package]]
1180
1181
  name = "darling"
@@ -1416,18 +1417,18 @@ dependencies = [
1416
1417
 
1417
1418
  [[package]]
1418
1419
  name = "dtor"
1419
- version = "0.3.0"
1420
+ version = "0.6.0"
1420
1421
  source = "registry+https://github.com/rust-lang/crates.io-index"
1421
- checksum = "f1057d6c64987086ff8ed0fd3fbf377a6b7d205cc7715868cd401705f715cbe4"
1422
+ checksum = "30e4690622ab6700ced40fc370a3f07b7d111f0154bb6fb08f73b4c8834f75b6"
1422
1423
  dependencies = [
1423
1424
  "dtor-proc-macro",
1424
1425
  ]
1425
1426
 
1426
1427
  [[package]]
1427
1428
  name = "dtor-proc-macro"
1428
- version = "0.0.6"
1429
+ version = "0.0.12"
1429
1430
  source = "registry+https://github.com/rust-lang/crates.io-index"
1430
- checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5"
1431
+ checksum = "8c98b077c7463d01d22dde8a24378ddf1ca7263dc687cffbed38819ea6c21131"
1431
1432
 
1432
1433
  [[package]]
1433
1434
  name = "dunce"
@@ -1600,12 +1601,9 @@ dependencies = [
1600
1601
 
1601
1602
  [[package]]
1602
1603
  name = "fearless_simd"
1603
- version = "0.3.0"
1604
+ version = "0.4.0"
1604
1605
  source = "registry+https://github.com/rust-lang/crates.io-index"
1605
- checksum = "8fb2907d1f08b2b316b9223ced5b0e89d87028ba8deae9764741dba8ff7f3903"
1606
- dependencies = [
1607
- "bytemuck",
1608
- ]
1606
+ checksum = "76258897e51fd156ee03b6246ea53f3e0eb395d0b327e9961c4fc4c8b2fa151a"
1609
1607
 
1610
1608
  [[package]]
1611
1609
  name = "filetime"
@@ -1868,9 +1866,9 @@ dependencies = [
1868
1866
 
1869
1867
  [[package]]
1870
1868
  name = "gif"
1871
- version = "0.14.1"
1869
+ version = "0.14.2"
1872
1870
  source = "registry+https://github.com/rust-lang/crates.io-index"
1873
- checksum = "f5df2ba84018d80c213569363bdcd0c64e6933c67fe4c1d60ecf822971a3c35e"
1871
+ checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159"
1874
1872
  dependencies = [
1875
1873
  "color_quant",
1876
1874
  "weezl",
@@ -2029,6 +2027,12 @@ dependencies = [
2029
2027
  "foldhash 0.2.0",
2030
2028
  ]
2031
2029
 
2030
+ [[package]]
2031
+ name = "hashbrown"
2032
+ version = "0.17.0"
2033
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2034
+ checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
2035
+
2032
2036
  [[package]]
2033
2037
  name = "hashify"
2034
2038
  version = "0.2.9"
@@ -2058,9 +2062,9 @@ dependencies = [
2058
2062
 
2059
2063
  [[package]]
2060
2064
  name = "hayro-jpeg2000"
2061
- version = "0.3.4"
2065
+ version = "0.3.5"
2062
2066
  source = "registry+https://github.com/rust-lang/crates.io-index"
2063
- checksum = "c1a74cfc18c0093ef8009a0d6c1ba3024df0cce228503a14c1372e1e23eed43e"
2067
+ checksum = "c75ab947623ef4ccaa7acf0579edf7cbb5a73838e3839a7be73335e522f433a1"
2064
2068
  dependencies = [
2065
2069
  "fearless_simd",
2066
2070
  ]
@@ -2094,7 +2098,7 @@ dependencies = [
2094
2098
  "indicatif 0.17.11",
2095
2099
  "libc",
2096
2100
  "log",
2097
- "rand 0.9.2",
2101
+ "rand 0.9.3",
2098
2102
  "serde",
2099
2103
  "serde_json",
2100
2104
  "thiserror 2.0.18",
@@ -2113,7 +2117,7 @@ dependencies = [
2113
2117
  "indicatif 0.18.4",
2114
2118
  "libc",
2115
2119
  "log",
2116
- "rand 0.9.2",
2120
+ "rand 0.9.3",
2117
2121
  "serde",
2118
2122
  "serde_json",
2119
2123
  "thiserror 2.0.18",
@@ -2244,15 +2248,14 @@ dependencies = [
2244
2248
 
2245
2249
  [[package]]
2246
2250
  name = "hyper-rustls"
2247
- version = "0.27.7"
2251
+ version = "0.27.8"
2248
2252
  source = "registry+https://github.com/rust-lang/crates.io-index"
2249
- checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
2253
+ checksum = "c2b52f86d1d4bc0d6b4e6826d960b1b333217e07d36b882dca570a5e1c48895b"
2250
2254
  dependencies = [
2251
2255
  "http",
2252
2256
  "hyper",
2253
2257
  "hyper-util",
2254
2258
  "rustls",
2255
- "rustls-pki-types",
2256
2259
  "tokio",
2257
2260
  "tokio-rustls",
2258
2261
  "tower-service",
@@ -2526,7 +2529,7 @@ dependencies = [
2526
2529
  "itertools 0.14.0",
2527
2530
  "nalgebra",
2528
2531
  "num",
2529
- "rand 0.9.2",
2532
+ "rand 0.9.3",
2530
2533
  "rand_distr",
2531
2534
  ]
2532
2535
 
@@ -2538,12 +2541,12 @@ checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8"
2538
2541
 
2539
2542
  [[package]]
2540
2543
  name = "indexmap"
2541
- version = "2.13.1"
2544
+ version = "2.14.0"
2542
2545
  source = "registry+https://github.com/rust-lang/crates.io-index"
2543
- checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff"
2546
+ checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
2544
2547
  dependencies = [
2545
2548
  "equivalent",
2546
- "hashbrown 0.16.1",
2549
+ "hashbrown 0.17.0",
2547
2550
  "serde",
2548
2551
  "serde_core",
2549
2552
  ]
@@ -2759,9 +2762,9 @@ checksum = "086b08ec7a274cd60cd575ed3651ba081ee72dec0d39a6210e8adcff9efe3880"
2759
2762
 
2760
2763
  [[package]]
2761
2764
  name = "js-sys"
2762
- version = "0.3.94"
2765
+ version = "0.3.95"
2763
2766
  source = "registry+https://github.com/rust-lang/crates.io-index"
2764
- checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9"
2767
+ checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca"
2765
2768
  dependencies = [
2766
2769
  "cfg-if",
2767
2770
  "futures-util",
@@ -2780,7 +2783,7 @@ dependencies = [
2780
2783
 
2781
2784
  [[package]]
2782
2785
  name = "kreuzberg"
2783
- version = "4.8.1"
2786
+ version = "4.8.3"
2784
2787
  dependencies = [
2785
2788
  "ahash",
2786
2789
  "async-trait",
@@ -2875,7 +2878,7 @@ dependencies = [
2875
2878
 
2876
2879
  [[package]]
2877
2880
  name = "kreuzberg-ffi"
2878
- version = "4.8.1"
2881
+ version = "4.8.3"
2879
2882
  dependencies = [
2880
2883
  "ahash",
2881
2884
  "async-trait",
@@ -2891,7 +2894,7 @@ dependencies = [
2891
2894
 
2892
2895
  [[package]]
2893
2896
  name = "kreuzberg-paddle-ocr"
2894
- version = "4.8.1"
2897
+ version = "4.8.3"
2895
2898
  dependencies = [
2896
2899
  "geo-clipper",
2897
2900
  "geo-types",
@@ -2905,7 +2908,7 @@ dependencies = [
2905
2908
 
2906
2909
  [[package]]
2907
2910
  name = "kreuzberg-pdfium-render"
2908
- version = "4.8.1"
2911
+ version = "4.8.3"
2909
2912
  dependencies = [
2910
2913
  "bitflags",
2911
2914
  "bytemuck",
@@ -2928,7 +2931,7 @@ dependencies = [
2928
2931
 
2929
2932
  [[package]]
2930
2933
  name = "kreuzberg-rb"
2931
- version = "4.8.1"
2934
+ version = "4.8.4"
2932
2935
  dependencies = [
2933
2936
  "async-trait",
2934
2937
  "html-to-markdown-rs",
@@ -2945,7 +2948,7 @@ dependencies = [
2945
2948
 
2946
2949
  [[package]]
2947
2950
  name = "kreuzberg-tesseract"
2948
- version = "4.8.1"
2951
+ version = "4.8.3"
2949
2952
  dependencies = [
2950
2953
  "cc",
2951
2954
  "cmake",
@@ -2980,9 +2983,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
2980
2983
 
2981
2984
  [[package]]
2982
2985
  name = "libc"
2983
- version = "0.2.184"
2986
+ version = "0.2.185"
2984
2987
  source = "registry+https://github.com/rust-lang/crates.io-index"
2985
- checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
2988
+ checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
2986
2989
 
2987
2990
  [[package]]
2988
2991
  name = "libfuzzer-sys"
@@ -3022,16 +3025,22 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
3022
3025
 
3023
3026
  [[package]]
3024
3027
  name = "libredox"
3025
- version = "0.1.15"
3028
+ version = "0.1.16"
3026
3029
  source = "registry+https://github.com/rust-lang/crates.io-index"
3027
- checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08"
3030
+ checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
3028
3031
  dependencies = [
3029
3032
  "bitflags",
3030
3033
  "libc",
3031
3034
  "plain",
3032
- "redox_syscall 0.7.3",
3035
+ "redox_syscall 0.7.4",
3033
3036
  ]
3034
3037
 
3038
+ [[package]]
3039
+ name = "link-section"
3040
+ version = "0.0.12"
3041
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3042
+ checksum = "f52437d47b0358721ec869cc7374b2a21f7b2237af9b439c0391341a1fbfbf1b"
3043
+
3035
3044
  [[package]]
3036
3045
  name = "linux-raw-sys"
3037
3046
  version = "0.12.1"
@@ -3116,7 +3125,7 @@ dependencies = [
3116
3125
  "md-5",
3117
3126
  "nom 8.0.0",
3118
3127
  "nom_locate",
3119
- "rand 0.10.0",
3128
+ "rand 0.10.1",
3120
3129
  "rangemap",
3121
3130
  "rayon",
3122
3131
  "sha2 0.10.9",
@@ -3631,9 +3640,9 @@ dependencies = [
3631
3640
 
3632
3641
  [[package]]
3633
3642
  name = "openssl"
3634
- version = "0.10.76"
3643
+ version = "0.10.77"
3635
3644
  source = "registry+https://github.com/rust-lang/crates.io-index"
3636
- checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf"
3645
+ checksum = "bfe4646e360ec77dff7dde40ed3d6c5fee52d156ef4a62f53973d38294dad87f"
3637
3646
  dependencies = [
3638
3647
  "bitflags",
3639
3648
  "cfg-if",
@@ -3663,9 +3672,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
3663
3672
 
3664
3673
  [[package]]
3665
3674
  name = "openssl-sys"
3666
- version = "0.9.112"
3675
+ version = "0.9.113"
3667
3676
  source = "registry+https://github.com/rust-lang/crates.io-index"
3668
- checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb"
3677
+ checksum = "ad2f2c0eba47118757e4c6d2bff2838f3e0523380021356e7875e858372ce644"
3669
3678
  dependencies = [
3670
3679
  "cc",
3671
3680
  "libc",
@@ -3698,7 +3707,7 @@ dependencies = [
3698
3707
  "futures-util",
3699
3708
  "opentelemetry",
3700
3709
  "percent-encoding",
3701
- "rand 0.9.2",
3710
+ "rand 0.9.3",
3702
3711
  "thiserror 2.0.18",
3703
3712
  "tokio",
3704
3713
  "tokio-stream",
@@ -3860,9 +3869,9 @@ checksum = "ad78bf43dcf80e8f950c92b84f938a0fc7590b7f6866fbcbeca781609c115590"
3860
3869
 
3861
3870
  [[package]]
3862
3871
  name = "pkg-config"
3863
- version = "0.3.32"
3872
+ version = "0.3.33"
3864
3873
  source = "registry+https://github.com/rust-lang/crates.io-index"
3865
- checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
3874
+ checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
3866
3875
 
3867
3876
  [[package]]
3868
3877
  name = "plain"
@@ -4065,7 +4074,7 @@ dependencies = [
4065
4074
  "bytes",
4066
4075
  "getrandom 0.3.4",
4067
4076
  "lru-slab",
4068
- "rand 0.9.2",
4077
+ "rand 0.9.3",
4069
4078
  "ring",
4070
4079
  "rustc-hash",
4071
4080
  "rustls",
@@ -4131,9 +4140,9 @@ dependencies = [
4131
4140
 
4132
4141
  [[package]]
4133
4142
  name = "rand"
4134
- version = "0.9.2"
4143
+ version = "0.9.3"
4135
4144
  source = "registry+https://github.com/rust-lang/crates.io-index"
4136
- checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
4145
+ checksum = "7ec095654a25171c2124e9e3393a930bddbffdc939556c914957a4c3e0a87166"
4137
4146
  dependencies = [
4138
4147
  "rand_chacha",
4139
4148
  "rand_core 0.9.5",
@@ -4141,9 +4150,9 @@ dependencies = [
4141
4150
 
4142
4151
  [[package]]
4143
4152
  name = "rand"
4144
- version = "0.10.0"
4153
+ version = "0.10.1"
4145
4154
  source = "registry+https://github.com/rust-lang/crates.io-index"
4146
- checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
4155
+ checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
4147
4156
  dependencies = [
4148
4157
  "chacha20",
4149
4158
  "getrandom 0.4.2",
@@ -4182,7 +4191,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
4182
4191
  checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
4183
4192
  dependencies = [
4184
4193
  "num-traits",
4185
- "rand 0.9.2",
4194
+ "rand 0.9.3",
4186
4195
  ]
4187
4196
 
4188
4197
  [[package]]
@@ -4218,7 +4227,7 @@ dependencies = [
4218
4227
  "num-traits",
4219
4228
  "paste",
4220
4229
  "profiling",
4221
- "rand 0.9.2",
4230
+ "rand 0.9.3",
4222
4231
  "rand_chacha",
4223
4232
  "simd_helpers",
4224
4233
  "thiserror 2.0.18",
@@ -4319,9 +4328,9 @@ dependencies = [
4319
4328
 
4320
4329
  [[package]]
4321
4330
  name = "redox_syscall"
4322
- version = "0.7.3"
4331
+ version = "0.7.4"
4323
4332
  source = "registry+https://github.com/rust-lang/crates.io-index"
4324
- checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16"
4333
+ checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
4325
4334
  dependencies = [
4326
4335
  "bitflags",
4327
4336
  ]
@@ -4454,9 +4463,9 @@ dependencies = [
4454
4463
 
4455
4464
  [[package]]
4456
4465
  name = "rmcp"
4457
- version = "1.3.0"
4466
+ version = "1.4.0"
4458
4467
  source = "registry+https://github.com/rust-lang/crates.io-index"
4459
- checksum = "2231b2c085b371c01bc90c0e6c1cab8834711b6394533375bdbf870b0166d419"
4468
+ checksum = "f542f74cf247da16f19bbc87e298cd201e912314f4083e88cdd671f44f5fcb53"
4460
4469
  dependencies = [
4461
4470
  "async-trait",
4462
4471
  "base64 0.22.1",
@@ -4468,7 +4477,7 @@ dependencies = [
4468
4477
  "http-body-util",
4469
4478
  "pastey 0.2.1",
4470
4479
  "pin-project-lite",
4471
- "rand 0.10.0",
4480
+ "rand 0.10.1",
4472
4481
  "rmcp-macros",
4473
4482
  "schemars",
4474
4483
  "serde",
@@ -4485,9 +4494,9 @@ dependencies = [
4485
4494
 
4486
4495
  [[package]]
4487
4496
  name = "rmcp-macros"
4488
- version = "1.3.0"
4497
+ version = "1.4.0"
4489
4498
  source = "registry+https://github.com/rust-lang/crates.io-index"
4490
- checksum = "36ea0e100fadf81be85d7ff70f86cd805c7572601d4ab2946207f36540854b43"
4499
+ checksum = "b2391e4ae47f314e70eaafb6c7bd82e495e770b935448864446302143019151f"
4491
4500
  dependencies = [
4492
4501
  "darling 0.23.0",
4493
4502
  "proc-macro2",
@@ -4551,9 +4560,9 @@ dependencies = [
4551
4560
 
4552
4561
  [[package]]
4553
4562
  name = "rustls"
4554
- version = "0.23.37"
4563
+ version = "0.23.38"
4555
4564
  source = "registry+https://github.com/rust-lang/crates.io-index"
4556
- checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
4565
+ checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21"
4557
4566
  dependencies = [
4558
4567
  "aws-lc-rs",
4559
4568
  "log",
@@ -4616,9 +4625,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
4616
4625
 
4617
4626
  [[package]]
4618
4627
  name = "rustls-webpki"
4619
- version = "0.103.10"
4628
+ version = "0.103.11"
4620
4629
  source = "registry+https://github.com/rust-lang/crates.io-index"
4621
- checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
4630
+ checksum = "20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4"
4622
4631
  dependencies = [
4623
4632
  "aws-lc-rs",
4624
4633
  "ring",
@@ -5032,9 +5041,9 @@ dependencies = [
5032
5041
 
5033
5042
  [[package]]
5034
5043
  name = "sse-stream"
5035
- version = "0.2.1"
5044
+ version = "0.2.2"
5036
5045
  source = "registry+https://github.com/rust-lang/crates.io-index"
5037
- checksum = "eb4dc4d33c68ec1f27d386b5610a351922656e1fdf5c05bbaad930cd1519479a"
5046
+ checksum = "2c5e6deb40826033bd7b11c7ef25ef71193fabd71f680f40dd16538a2704d2f4"
5038
5047
  dependencies = [
5039
5048
  "bytes",
5040
5049
  "futures-util",
@@ -5369,7 +5378,7 @@ dependencies = [
5369
5378
  "monostate",
5370
5379
  "onig",
5371
5380
  "paste",
5372
- "rand 0.9.2",
5381
+ "rand 0.9.3",
5373
5382
  "rayon",
5374
5383
  "rayon-cond",
5375
5384
  "regex",
@@ -6040,9 +6049,9 @@ dependencies = [
6040
6049
 
6041
6050
  [[package]]
6042
6051
  name = "wasm-bindgen"
6043
- version = "0.2.117"
6052
+ version = "0.2.118"
6044
6053
  source = "registry+https://github.com/rust-lang/crates.io-index"
6045
- checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0"
6054
+ checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89"
6046
6055
  dependencies = [
6047
6056
  "cfg-if",
6048
6057
  "once_cell",
@@ -6053,9 +6062,9 @@ dependencies = [
6053
6062
 
6054
6063
  [[package]]
6055
6064
  name = "wasm-bindgen-futures"
6056
- version = "0.4.67"
6065
+ version = "0.4.68"
6057
6066
  source = "registry+https://github.com/rust-lang/crates.io-index"
6058
- checksum = "03623de6905b7206edd0a75f69f747f134b7f0a2323392d664448bf2d3c5d87e"
6067
+ checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8"
6059
6068
  dependencies = [
6060
6069
  "js-sys",
6061
6070
  "wasm-bindgen",
@@ -6063,9 +6072,9 @@ dependencies = [
6063
6072
 
6064
6073
  [[package]]
6065
6074
  name = "wasm-bindgen-macro"
6066
- version = "0.2.117"
6075
+ version = "0.2.118"
6067
6076
  source = "registry+https://github.com/rust-lang/crates.io-index"
6068
- checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be"
6077
+ checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed"
6069
6078
  dependencies = [
6070
6079
  "quote",
6071
6080
  "wasm-bindgen-macro-support",
@@ -6073,9 +6082,9 @@ dependencies = [
6073
6082
 
6074
6083
  [[package]]
6075
6084
  name = "wasm-bindgen-macro-support"
6076
- version = "0.2.117"
6085
+ version = "0.2.118"
6077
6086
  source = "registry+https://github.com/rust-lang/crates.io-index"
6078
- checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2"
6087
+ checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904"
6079
6088
  dependencies = [
6080
6089
  "bumpalo",
6081
6090
  "proc-macro2",
@@ -6086,9 +6095,9 @@ dependencies = [
6086
6095
 
6087
6096
  [[package]]
6088
6097
  name = "wasm-bindgen-shared"
6089
- version = "0.2.117"
6098
+ version = "0.2.118"
6090
6099
  source = "registry+https://github.com/rust-lang/crates.io-index"
6091
- checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b"
6100
+ checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129"
6092
6101
  dependencies = [
6093
6102
  "unicode-ident",
6094
6103
  ]
@@ -6142,9 +6151,9 @@ dependencies = [
6142
6151
 
6143
6152
  [[package]]
6144
6153
  name = "web-sys"
6145
- version = "0.3.94"
6154
+ version = "0.3.95"
6146
6155
  source = "registry+https://github.com/rust-lang/crates.io-index"
6147
- checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a"
6156
+ checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d"
6148
6157
  dependencies = [
6149
6158
  "js-sys",
6150
6159
  "wasm-bindgen",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg-rb"
3
- version = "4.8.2"
3
+ version = "4.8.4"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
@@ -334,6 +334,7 @@ pub fn parse_pdf_config(ruby: &Ruby, hash: RHash) -> Result<PdfConfig, Error> {
334
334
  };
335
335
 
336
336
  let config = PdfConfig {
337
+ backend: kreuzberg::PdfBackend::default(),
337
338
  extract_images,
338
339
  passwords,
339
340
  extract_metadata,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.8.2'
4
+ VERSION = '4.8.4'
5
5
  end
data/lib/kreuzberg.rb CHANGED
@@ -37,15 +37,11 @@ module Kreuzberg
37
37
  autoload :DocumentStructure, 'kreuzberg/types'
38
38
  autoload :PdfAnnotation, 'kreuzberg/types'
39
39
  autoload :PdfAnnotationBoundingBox, 'kreuzberg/types'
40
+ autoload :KeywordAlgorithm, 'kreuzberg/types'
40
41
 
41
42
  ExtractionConfig = Config::Extraction
42
43
  PageConfig = Config::PageConfig
43
44
 
44
- module KeywordAlgorithm
45
- YAKE = :yake
46
- RAKE = :rake
47
- end
48
-
49
45
  @__cache_tracker = { entries: 0, bytes: 0 }
50
46
 
51
47
  class << self
data/vendor/Cargo.toml CHANGED
@@ -2,7 +2,7 @@
2
2
  members = ["kreuzberg", "kreuzberg-ffi", "kreuzberg-tesseract", "kreuzberg-paddle-ocr", "kreuzberg-pdfium-render"]
3
3
 
4
4
  [workspace.package]
5
- version = "4.8.2"
5
+ version = "4.8.4"
6
6
  edition = "2024"
7
7
  rust-version = "1.91"
8
8
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
@@ -23,7 +23,7 @@ clap = { version = "4.6", features = ["derive", "color", "suggestions"] }
23
23
  comrak = { version = "0.52", default-features = false }
24
24
  console_error_panic_hook = "0.1"
25
25
  criterion = { version = "0.8", features = ["html_reports"] }
26
- ctor = "0.8"
26
+ ctor = "0.10"
27
27
  dbase = "0.7"
28
28
  futures = "0.3"
29
29
  getrandom = { version = "0.4.2", features = ["wasm_js"] }
@@ -32,10 +32,10 @@ html-to-markdown-rs = { version = "3.1.0", default-features = false }
32
32
  image = { version = "0.25.10", default-features = false }
33
33
  itertools = "0.14"
34
34
  js-sys = "0.3"
35
- kreuzberg = { path = "./crates/kreuzberg", version = "4.8.2", default-features = false }
36
- kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.8.2" }
35
+ kreuzberg = { path = "./crates/kreuzberg", version = "4.8.4", default-features = false }
36
+ kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.8.4" }
37
37
  lazy_static = "1.5.0"
38
- libc = "0.2.184"
38
+ libc = "0.2.185"
39
39
  liter-llm = { version = "1.2", features = ["native-http", "tracing"], default-features = false }
40
40
  log = "0.4"
41
41
  lzma-rust2 = { version = "0.16.2" }
@@ -45,7 +45,7 @@ num_cpus = "1.17.0"
45
45
  once_cell = "1.21.4"
46
46
  ort = { version = "2.0.0-rc.12", features = ["std", "api-18"], default-features = false }
47
47
  parking_lot = "0.12.5"
48
- pdf_oxide = { version = "0.3.22", default-features = false }
48
+ pdf_oxide = { version = "0.3.30", default-features = false }
49
49
  pdfium-render = { package = "kreuzberg-pdfium-render", path = "crates/kreuzberg-pdfium-render", version = "4.3" }
50
50
  rayon = "1.11.0"
51
51
  reqwest = { version = "0.13.2", default-features = false }