kreuzberg 4.4.4 → 4.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +8 -8
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +31 -60
  5. data/ext/kreuzberg_rb/native/Cargo.toml +4 -49
  6. data/lib/kreuzberg/version.rb +1 -1
  7. data/vendor/Cargo.toml +9 -7
  8. data/vendor/kreuzberg/Cargo.toml +47 -30
  9. data/vendor/kreuzberg/README.md +1 -1
  10. data/vendor/kreuzberg/src/api/handlers.rs +9 -0
  11. data/vendor/kreuzberg/src/core/config/pdf.rs +15 -0
  12. data/vendor/kreuzberg/src/extractors/pdf/mod.rs +40 -35
  13. data/vendor/kreuzberg/src/mcp/params.rs +12 -0
  14. data/vendor/kreuzberg/src/mcp/tools/extraction.rs +26 -3
  15. data/vendor/kreuzberg/src/pdf/markdown/paragraphs.rs +260 -6
  16. data/vendor/kreuzberg/src/utils/json_utils.rs +135 -0
  17. data/vendor/kreuzberg/src/utils/mod.rs +2 -1
  18. data/vendor/kreuzberg/tests/pdf_integration.rs +41 -1
  19. data/vendor/kreuzberg-ffi/Cargo.toml +6 -6
  20. data/vendor/kreuzberg-ffi/kreuzberg.h +238 -90
  21. data/vendor/kreuzberg-ffi/src/config_builder.rs +442 -2
  22. data/vendor/kreuzberg-ffi/src/error.rs +6 -6
  23. data/vendor/kreuzberg-ffi/src/helpers.rs +29 -13
  24. data/vendor/kreuzberg-ffi/src/lib.rs +6 -3
  25. data/vendor/kreuzberg-ffi/src/memory.rs +4 -0
  26. data/vendor/kreuzberg-ffi/src/result_pool.rs +9 -13
  27. data/vendor/kreuzberg-ffi/src/result_view.rs +61 -65
  28. data/vendor/kreuzberg-ffi/src/string_intern.rs +6 -11
  29. data/vendor/kreuzberg-ffi/src/types.rs +53 -48
  30. data/vendor/kreuzberg-paddle-ocr/Cargo.toml +3 -3
  31. data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -8
  32. data/vendor/kreuzberg-pdfium-render/src/bindgen/pdfium_7678.rs +14 -0
  33. data/vendor/kreuzberg-pdfium-render/src/bindings/dynamic_bindings.rs +23 -0
  34. data/vendor/kreuzberg-pdfium-render/src/bindings/static_bindings.rs +17 -0
  35. data/vendor/kreuzberg-pdfium-render/src/bindings/wasm_bindings.rs +109 -0
  36. data/vendor/kreuzberg-pdfium-render/src/bindings.rs +39 -0
  37. data/vendor/kreuzberg-pdfium-render/src/lib.rs +0 -6
  38. data/vendor/kreuzberg-pdfium-render/src/pdf/action.rs +1 -0
  39. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/group.rs +2 -4
  40. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/struct_element.rs +8 -0
  41. data/vendor/kreuzberg-pdfium-render/src/pdf/document/permissions.rs +1 -0
  42. data/vendor/kreuzberg-pdfium-render/src/pdf/document.rs +34 -37
  43. data/vendor/kreuzberg-pdfium-render/src/pdf/link.rs +4 -4
  44. data/vendor/kreuzberg-pdfium-render/src/pdfium.rs +1 -0
  45. data/vendor/kreuzberg-pdfium-render/src/utils.rs +9 -10
  46. data/vendor/kreuzberg-tesseract/Cargo.toml +10 -6
  47. metadata +3 -8
  48. data/vendor/kreuzberg-pdfium-render/src/pdf/document/attachment.rs +0 -184
  49. data/vendor/kreuzberg-pdfium-render/src/pdf/document/attachments.rs +0 -289
  50. data/vendor/kreuzberg-pdfium-render/src/pdf/document/bookmark.rs +0 -538
  51. data/vendor/kreuzberg-pdfium-render/src/pdf/document/bookmarks.rs +0 -234
  52. data/vendor/kreuzberg-pdfium-render/src/pdf/document/signature.rs +0 -186
  53. data/vendor/kreuzberg-pdfium-render/src/pdf/document/signatures.rs +0 -109
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c1af55de32ad8ed5cef33e40072ffa0f172e4bf642b46abdca67a8ee43f2de6
4
- data.tar.gz: 2ca11e1c00d242f88fce093708f9e3ddb192a3cd0c1b05334c61b1224e005b95
3
+ metadata.gz: c376485167aade739cda1b3ac0ba9f8f19bc6c69201f29ab6a13fdb1b9615c9e
4
+ data.tar.gz: e89130964d89de12fb5dd2c179f695d239605eaf0dd39312afaeb8c96114d0e0
5
5
  SHA512:
6
- metadata.gz: dc1e3770ceeb162f0f56ad1414591413a4c888ea2d007cd2850a699aed5b7a535218dae8b5f69ec0d88beb1ba1298a40d559c7383be3abaf0239a9a26afd86d2
7
- data.tar.gz: 9d84aa28f46fc251e224fdfa57ac042742180820c3f77b80eae69ec1b52560ded49dc4fb8f7716bd8f0090912081481936b0a78a5ed826f140be4da2c902d86a
6
+ metadata.gz: 740a010ae02293ec8228ed99f01c62f6d139953d77cd11de7913769f083b0e349986fd1c5b930f505176dcf39c1475d15077cf67bddec40a755e154f8974cd8d
7
+ data.tar.gz: 8b04c3aa6a73b2284d81948ed0ab954b42f7605a3609e976f47914d5d274f70dc13b0432e7890bbc426b238627da3ed76a79f4c7c915e05859c3e4aef4e28600
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.4.4)
4
+ kreuzberg (4.4.5)
5
5
  rb_sys (~> 0.9.119)
6
6
  sorbet-runtime (~> 0.5)
7
7
 
@@ -49,7 +49,7 @@ GEM
49
49
  i18n (1.14.8)
50
50
  concurrent-ruby (~> 1.0)
51
51
  io-console (0.8.2)
52
- json (2.19.0)
52
+ json (2.19.1)
53
53
  json-schema (6.2.0)
54
54
  addressable (~> 2.8)
55
55
  bigdecimal (>= 3.1, < 5)
@@ -122,7 +122,7 @@ GEM
122
122
  rubocop-ast (>= 1.49.0, < 2.0)
123
123
  ruby-progressbar (~> 1.7)
124
124
  unicode-display_width (>= 2.4.0, < 4.0)
125
- rubocop-ast (1.49.0)
125
+ rubocop-ast (1.49.1)
126
126
  parser (>= 3.3.7.2)
127
127
  prism (~> 1.7)
128
128
  rubocop-performance (1.26.1)
@@ -134,7 +134,7 @@ GEM
134
134
  rubocop (~> 1.81)
135
135
  ruby-progressbar (1.13.0)
136
136
  securerandom (0.4.1)
137
- sorbet-runtime (0.6.12997)
137
+ sorbet-runtime (0.6.13011)
138
138
  steep (1.10.0)
139
139
  activesupport (>= 5.1)
140
140
  concurrent-ruby (>= 1.1.10)
@@ -220,9 +220,9 @@ CHECKSUMS
220
220
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
221
221
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
222
222
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
223
- json (2.19.0) sha256=bc5202f083618b3af7aba3184146ec9d820f8f6de261838b577173475e499d9a
223
+ json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
224
224
  json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
225
- kreuzberg (4.4.4)
225
+ kreuzberg (4.4.5)
226
226
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
227
227
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
228
228
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
@@ -254,12 +254,12 @@ CHECKSUMS
254
254
  rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
255
255
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
256
256
  rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
257
- rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
257
+ rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
258
258
  rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
259
259
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
260
260
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
261
261
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
262
- sorbet-runtime (0.6.12997) sha256=5e84f6168c10e15b994fccb808ba64bbb8b3b027ea7bf083a9a3815a8b765c3f
262
+ sorbet-runtime (0.6.13011) sha256=d451e380097747d64d39595fbbb6db2a198310f9eff0f810cd6e5696b402833f
263
263
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
264
264
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
265
265
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.4" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.5" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -1390,9 +1390,9 @@ dependencies = [
1390
1390
 
1391
1391
  [[package]]
1392
1392
  name = "fastembed"
1393
- version = "5.12.0"
1393
+ version = "5.12.1"
1394
1394
  source = "registry+https://github.com/rust-lang/crates.io-index"
1395
- checksum = "e7b01c79c5cb8ab3ce31c3d52916fda278e14cac027ff3a9cb66c419ed7288f8"
1395
+ checksum = "b609359080bf7dfff6ac5ace3d6944355ede4c8a51406a316202ae86ff8346a8"
1396
1396
  dependencies = [
1397
1397
  "anyhow",
1398
1398
  "hf-hub 0.4.3",
@@ -1901,9 +1901,9 @@ dependencies = [
1901
1901
 
1902
1902
  [[package]]
1903
1903
  name = "hayro-jpeg2000"
1904
- version = "0.3.3"
1904
+ version = "0.3.4"
1905
1905
  source = "registry+https://github.com/rust-lang/crates.io-index"
1906
- checksum = "16bca62a003c7e4f4bedc659500e0be1b8fd7b0a432b1884418a86d35de5b04c"
1906
+ checksum = "c1a74cfc18c0093ef8009a0d6c1ba3024df0cce228503a14c1372e1e23eed43e"
1907
1907
  dependencies = [
1908
1908
  "fearless_simd",
1909
1909
  ]
@@ -1977,9 +1977,9 @@ dependencies = [
1977
1977
 
1978
1978
  [[package]]
1979
1979
  name = "html-to-markdown-rs"
1980
- version = "2.28.1"
1980
+ version = "2.28.2"
1981
1981
  source = "registry+https://github.com/rust-lang/crates.io-index"
1982
- checksum = "9acd7c0cb550ef05c40eed72a866a220c64c1b37b3ff57ae53064a8e47e3552d"
1982
+ checksum = "3f9377e16af590b764fd98fd176027cf8831c5335f8964f3f643753e38913a4e"
1983
1983
  dependencies = [
1984
1984
  "ahash",
1985
1985
  "astral-tl",
@@ -2329,9 +2329,9 @@ dependencies = [
2329
2329
 
2330
2330
  [[package]]
2331
2331
  name = "image"
2332
- version = "0.25.9"
2332
+ version = "0.25.10"
2333
2333
  source = "registry+https://github.com/rust-lang/crates.io-index"
2334
- checksum = "e6506c6c10786659413faa717ceebcb8f70731c0a60cbae39795fdf114519c1a"
2334
+ checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104"
2335
2335
  dependencies = [
2336
2336
  "bytemuck",
2337
2337
  "byteorder-lite",
@@ -2344,9 +2344,9 @@ dependencies = [
2344
2344
  "png",
2345
2345
  "ravif",
2346
2346
  "rayon",
2347
- "tiff 0.10.3",
2348
- "zune-core 0.5.1",
2349
- "zune-jpeg 0.5.12",
2347
+ "tiff",
2348
+ "zune-core",
2349
+ "zune-jpeg",
2350
2350
  ]
2351
2351
 
2352
2352
  [[package]]
@@ -2659,7 +2659,7 @@ dependencies = [
2659
2659
  "tar",
2660
2660
  "text-splitter",
2661
2661
  "thiserror 2.0.18",
2662
- "tiff 0.11.3",
2662
+ "tiff",
2663
2663
  "tokio",
2664
2664
  "toml 1.0.6+spec-1.1.0",
2665
2665
  "tower",
@@ -2729,7 +2729,7 @@ dependencies = [
2729
2729
 
2730
2730
  [[package]]
2731
2731
  name = "kreuzberg-rb"
2732
- version = "4.4.3"
2732
+ version = "4.4.5"
2733
2733
  dependencies = [
2734
2734
  "async-trait",
2735
2735
  "html-to-markdown-rs",
@@ -2793,9 +2793,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
2793
2793
 
2794
2794
  [[package]]
2795
2795
  name = "libc"
2796
- version = "0.2.182"
2796
+ version = "0.2.183"
2797
2797
  source = "registry+https://github.com/rust-lang/crates.io-index"
2798
- checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
2798
+ checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
2799
2799
 
2800
2800
  [[package]]
2801
2801
  name = "libfuzzer-sys"
@@ -3118,9 +3118,9 @@ dependencies = [
3118
3118
 
3119
3119
  [[package]]
3120
3120
  name = "moxcms"
3121
- version = "0.7.11"
3121
+ version = "0.8.1"
3122
3122
  source = "registry+https://github.com/rust-lang/crates.io-index"
3123
- checksum = "ac9557c559cd6fc9867e122e20d2cbefc9ca29d80d027a8e39310920ed2f0a97"
3123
+ checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b"
3124
3124
  dependencies = [
3125
3125
  "num-traits",
3126
3126
  "pxfm",
@@ -3821,9 +3821,9 @@ dependencies = [
3821
3821
 
3822
3822
  [[package]]
3823
3823
  name = "quinn-proto"
3824
- version = "0.11.13"
3824
+ version = "0.11.14"
3825
3825
  source = "registry+https://github.com/rust-lang/crates.io-index"
3826
- checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
3826
+ checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
3827
3827
  dependencies = [
3828
3828
  "aws-lc-rs",
3829
3829
  "bytes",
@@ -3992,9 +3992,9 @@ dependencies = [
3992
3992
 
3993
3993
  [[package]]
3994
3994
  name = "ravif"
3995
- version = "0.12.0"
3995
+ version = "0.13.0"
3996
3996
  source = "registry+https://github.com/rust-lang/crates.io-index"
3997
- checksum = "ef69c1990ceef18a116855938e74793a5f7496ee907562bd0857b6ac734ab285"
3997
+ checksum = "e52310197d971b0f5be7fe6b57530dcd27beb35c1b013f29d66c1ad73fbbcc45"
3998
3998
  dependencies = [
3999
3999
  "avif-serialize",
4000
4000
  "imgref",
@@ -4482,9 +4482,9 @@ dependencies = [
4482
4482
 
4483
4483
  [[package]]
4484
4484
  name = "schannel"
4485
- version = "0.1.28"
4485
+ version = "0.1.29"
4486
4486
  source = "registry+https://github.com/rust-lang/crates.io-index"
4487
- checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
4487
+ checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939"
4488
4488
  dependencies = [
4489
4489
  "windows-sys 0.61.2",
4490
4490
  ]
@@ -4991,9 +4991,9 @@ dependencies = [
4991
4991
 
4992
4992
  [[package]]
4993
4993
  name = "tempfile"
4994
- version = "3.26.0"
4994
+ version = "3.27.0"
4995
4995
  source = "registry+https://github.com/rust-lang/crates.io-index"
4996
- checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
4996
+ checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
4997
4997
  dependencies = [
4998
4998
  "fastrand",
4999
4999
  "getrandom 0.4.2",
@@ -5079,20 +5079,6 @@ dependencies = [
5079
5079
  "cfg-if",
5080
5080
  ]
5081
5081
 
5082
- [[package]]
5083
- name = "tiff"
5084
- version = "0.10.3"
5085
- source = "registry+https://github.com/rust-lang/crates.io-index"
5086
- checksum = "af9605de7fee8d9551863fd692cce7637f548dbd9db9180fcc07ccc6d26c336f"
5087
- dependencies = [
5088
- "fax",
5089
- "flate2",
5090
- "half",
5091
- "quick-error",
5092
- "weezl",
5093
- "zune-jpeg 0.4.21",
5094
- ]
5095
-
5096
5082
  [[package]]
5097
5083
  name = "tiff"
5098
5084
  version = "0.11.3"
@@ -5104,7 +5090,7 @@ dependencies = [
5104
5090
  "half",
5105
5091
  "quick-error",
5106
5092
  "weezl",
5107
- "zune-jpeg 0.5.12",
5093
+ "zune-jpeg",
5108
5094
  ]
5109
5095
 
5110
5096
  [[package]]
@@ -6483,18 +6469,18 @@ dependencies = [
6483
6469
 
6484
6470
  [[package]]
6485
6471
  name = "zerocopy"
6486
- version = "0.8.40"
6472
+ version = "0.8.42"
6487
6473
  source = "registry+https://github.com/rust-lang/crates.io-index"
6488
- checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5"
6474
+ checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3"
6489
6475
  dependencies = [
6490
6476
  "zerocopy-derive",
6491
6477
  ]
6492
6478
 
6493
6479
  [[package]]
6494
6480
  name = "zerocopy-derive"
6495
- version = "0.8.40"
6481
+ version = "0.8.42"
6496
6482
  source = "registry+https://github.com/rust-lang/crates.io-index"
6497
- checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953"
6483
+ checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f"
6498
6484
  dependencies = [
6499
6485
  "proc-macro2",
6500
6486
  "quote",
@@ -6613,12 +6599,6 @@ dependencies = [
6613
6599
  "simd-adler32",
6614
6600
  ]
6615
6601
 
6616
- [[package]]
6617
- name = "zune-core"
6618
- version = "0.4.12"
6619
- source = "registry+https://github.com/rust-lang/crates.io-index"
6620
- checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a"
6621
-
6622
6602
  [[package]]
6623
6603
  name = "zune-core"
6624
6604
  version = "0.5.1"
@@ -6634,20 +6614,11 @@ dependencies = [
6634
6614
  "simd-adler32",
6635
6615
  ]
6636
6616
 
6637
- [[package]]
6638
- name = "zune-jpeg"
6639
- version = "0.4.21"
6640
- source = "registry+https://github.com/rust-lang/crates.io-index"
6641
- checksum = "29ce2c8a9384ad323cf564b67da86e21d3cfdff87908bc1223ed5c99bc792713"
6642
- dependencies = [
6643
- "zune-core 0.4.12",
6644
- ]
6645
-
6646
6617
  [[package]]
6647
6618
  name = "zune-jpeg"
6648
6619
  version = "0.5.12"
6649
6620
  source = "registry+https://github.com/rust-lang/crates.io-index"
6650
6621
  checksum = "410e9ecef634c709e3831c2cfdb8d9c32164fae1c67496d5b68fff728eec37fe"
6651
6622
  dependencies = [
6652
- "zune-core 0.5.1",
6623
+ "zune-core",
6653
6624
  ]
@@ -1,51 +1,6 @@
1
- [workspace]
2
-
3
- [workspace.dependencies]
4
- bytes = { version = "1", features = ["serde"] }
5
- serde = { version = "1.0.228", features = ["derive"] }
6
- serde_json = { version = "1.0.149" }
7
- tokio = { version = "1.50.0", features = [
8
- "rt",
9
- "rt-multi-thread",
10
- "macros",
11
- "sync",
12
- "process",
13
- "fs",
14
- "time",
15
- "io-util",
16
- ] }
17
- thiserror = "2.0.18"
18
- anyhow = "1.0"
19
- libc = "0.2.182"
20
- async-trait = "0.1.89"
21
- tracing = "0.1"
22
- ahash = "0.8.12"
23
- base64 = "0.22.1"
24
- hex = "0.4.3"
25
- num_cpus = "1.17.0"
26
- once_cell = "1.21.3"
27
- parking_lot = "0.12.5"
28
- html-to-markdown-rs = { version = "2.28.1", default-features = false }
29
- reqwest = { version = "0.13.2", default-features = false }
30
- image = { version = "0.25.9", default-features = false }
31
- toml = "1.0.6"
32
- tempfile = "3.26.0"
33
- lzma-rust2 = { version = "0.16.2" }
34
- log = "0.4"
35
- getrandom = { version = "0.4.2", features = ["wasm_js"] }
36
- console_error_panic_hook = "0.1"
37
- ctor = "0.6"
38
- lazy_static = "1.5.0"
39
- rayon = "1.11.0"
40
- chrono = "0.4"
41
- itertools = "0.14"
42
-
43
- [workspace.lints.clippy]
44
- collapsible_if = "allow"
45
-
46
1
  [package]
47
2
  name = "kreuzberg-rb"
48
- version = "4.4.4"
3
+ version = "4.4.5"
49
4
  edition = "2024"
50
5
  rust-version = "1.91"
51
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -58,8 +13,8 @@ description = "Ruby bindings (Magnus) for Kreuzberg - high-performance document
58
13
  keywords = ["ruby", "magnus", "document", "extraction", "bindings"]
59
14
  categories = ["api-bindings", "text-processing"]
60
15
 
61
- [lints]
62
- workspace = true
16
+ [lints.clippy]
17
+ collapsible_if = "allow"
63
18
 
64
19
  [lib]
65
20
  name = "kreuzberg_rb"
@@ -111,7 +66,7 @@ tokio = { version = "1.50.0", features = [
111
66
  "time",
112
67
  "io-util",
113
68
  ] }
114
- html-to-markdown-rs = { version = "2.28.1", default-features = false }
69
+ html-to-markdown-rs = { version = "2.28.2", default-features = false }
115
70
 
116
71
  [dev-dependencies]
117
72
  pretty_assertions = "1.4"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.4.4'
4
+ VERSION = '4.4.5'
5
5
  end
data/vendor/Cargo.toml CHANGED
@@ -2,7 +2,7 @@
2
2
  members = ["kreuzberg", "kreuzberg-ffi", "kreuzberg-tesseract", "kreuzberg-paddle-ocr", "kreuzberg-pdfium-render"]
3
3
 
4
4
  [workspace.package]
5
- version = "4.4.4"
5
+ version = "4.4.5"
6
6
  edition = "2024"
7
7
  rust-version = "1.91"
8
8
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -23,26 +23,28 @@ criterion = { version = "0.8", features = ["html_reports"] }
23
23
  ctor = "0.6"
24
24
  getrandom = { version = "0.4.2", features = ["wasm_js"] }
25
25
  hex = "0.4.3"
26
- html-to-markdown-rs = { version = "2.28.1", default-features = false }
27
- image = { version = "0.25.9", default-features = false }
26
+ html-to-markdown-rs = { version = "2.28.2", default-features = false }
27
+ image = { version = "0.25.10", default-features = false }
28
28
  itertools = "0.14"
29
29
  js-sys = "0.3"
30
+ kreuzberg = { path = "./crates/kreuzberg", version = "4.4.5", default-features = false }
31
+ kreuzberg-ffi = { path = "./crates/kreuzberg-ffi", version = "4.4.5" }
30
32
  lazy_static = "1.5.0"
31
- libc = "0.2.182"
33
+ libc = "0.2.183"
32
34
  log = "0.4"
33
35
  lzma-rust2 = { version = "0.16.2" }
34
36
  num_cpus = "1.17.0"
35
37
  once_cell = "1.21.3"
36
38
  ort = { version = "=2.0.0-rc.11", default-features = false }
37
39
  parking_lot = "0.12.5"
38
- pdfium-render = { package = "kreuzberg-pdfium-render", version = "4.3" }
40
+ pdfium-render = { package = "kreuzberg-pdfium-render", path = "crates/kreuzberg-pdfium-render", version = "4.3" }
39
41
  rayon = "1.11.0"
40
42
  reqwest = { version = "0.13.2", default-features = false }
41
43
  serde = { version = "1.0.228", features = ["derive"] }
42
44
  serde_json = { version = "1.0.149" }
43
- tempfile = "3.26.0"
45
+ tempfile = "3.27.0"
44
46
  thiserror = "2.0.18"
45
- tokio = { version = "1.50.0", features = ["rt", "rt-multi-thread", "macros", "sync", "process", "fs", "time", "io-util"] }
47
+ tokio = { version = "1.50.0", features = ["sync", "process", "fs", "io-util"] }
46
48
  toml = "1.0.6"
47
49
  tracing = "0.1"
48
50
  wasm-bindgen = { version = "0.2", features = ["enable-interning"] }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.4.4"
3
+ version = "4.4.5"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -25,13 +25,13 @@ pool-metrics = []
25
25
 
26
26
  simd-utf8 = ["dep:simdutf8"]
27
27
 
28
- tokio-runtime = ["dep:tokio"]
28
+ tokio-runtime = ["dep:tokio", "tokio/rt", "tokio/rt-multi-thread"]
29
29
 
30
30
  pdf = ["dep:pdfium-render", "dep:lopdf", "dep:image"]
31
31
  static-pdfium = ["pdf"]
32
32
  bundled-pdfium = ["pdf"]
33
33
  system-pdfium = ["pdf"]
34
- excel = ["dep:calamine", "tokio-runtime"]
34
+ excel = ["dep:calamine"]
35
35
  excel-wasm = ["dep:calamine"]
36
36
  office = [
37
37
  "dep:cfb",
@@ -70,6 +70,7 @@ paddle-ocr = [
70
70
  "dep:sha2",
71
71
  "dep:image",
72
72
  "dep:hf-hub",
73
+ "dep:ureq",
73
74
  "html",
74
75
  "tokio-runtime",
75
76
  "ocr",
@@ -105,7 +106,7 @@ wasm-target = [
105
106
  ]
106
107
  wasm-threads = ["dep:wasm-bindgen-rayon"]
107
108
 
108
- full = [
109
+ formats = [
109
110
  "pdf",
110
111
  "excel",
111
112
  "office",
@@ -113,18 +114,13 @@ full = [
113
114
  "html",
114
115
  "xml",
115
116
  "archives",
116
- "ocr",
117
- "paddle-ocr",
118
117
  "language-detection",
119
118
  "chunking",
120
- "embeddings",
121
119
  "quality",
122
120
  "keywords",
123
121
  "mdx",
124
- "api",
125
- "mcp",
126
- "otel",
127
122
  ]
123
+ full = ["formats", "ocr", "paddle-ocr", "embeddings", "api", "mcp", "otel"]
128
124
  server = ["pdf", "excel", "html", "ocr", "paddle-ocr", "chunking", "api", "mcp"]
129
125
 
130
126
  [build-dependencies]
@@ -141,7 +137,7 @@ dashmap = "6.1"
141
137
  simdutf8 = { version = "0.1", optional = true }
142
138
  hex = "0.4.3"
143
139
  lazy_static = "1.5.0"
144
- libc = "0.2.182"
140
+ libc = "0.2.183"
145
141
  memchr = "2.8.0"
146
142
  num_cpus = "1.17.0"
147
143
  once_cell = "1.21.3"
@@ -157,20 +153,23 @@ toml = "1.0.6"
157
153
  mime_guess = "2.0"
158
154
  rmp-serde = "1.3"
159
155
  thiserror = "2.0.18"
160
- tokio = { version = "1.50.0", features = ["rt", "rt-multi-thread", "macros", "sync", "process", "fs", "time", "io-util"] , optional = true }
156
+ tokio = { version = "1.50.0", features = ["sync", "process", "fs", "io-util"], optional = true }
161
157
  indexmap = "2.13.0"
162
158
  tracing = "0.1"
163
- pdfium-render = { package = "kreuzberg-pdfium-render", path = "../kreuzberg-pdfium-render" , features = ["thread_safe", "image_latest"], optional = true }
159
+ pdfium-render = { package = "kreuzberg-pdfium-render", path = "../kreuzberg-pdfium-render", features = ["thread_safe", "image_latest"], optional = true }
164
160
  lopdf = { version = "0.39.0", optional = true }
165
- calamine = { version = "0.33.0", features = ["dates"], optional = true }
161
+ calamine = { version = "0.34.0", features = ["dates"], optional = true }
166
162
 
167
163
  roxmltree = { version = "0.21.1", optional = true }
168
164
  zip = { version = "8.2.0", optional = true, default-features = false, features = [
169
165
  "deflate-flate2",
170
166
  ] }
171
167
  mail-parser = { version = "0.11.2", optional = true }
172
- html-to-markdown-rs = { version = "2.28.1", default-features = false , features = [
173
- "inline-images", "metadata", "visitor", ], optional = true }
168
+ html-to-markdown-rs = { version = "2.28.2", default-features = false, features = [
169
+ "inline-images",
170
+ "metadata",
171
+ "visitor",
172
+ ], optional = true }
174
173
  cfb = { version = "0.14.0", optional = true }
175
174
  quick-xml = { version = "0.39.2", features = ["serialize"], optional = true }
176
175
  tar = { version = "0.4.44", optional = true }
@@ -187,9 +186,17 @@ biblib = { version = "0.4", default-features = false, features = [
187
186
  ], optional = true }
188
187
  org = { version = "0.3", optional = true }
189
188
 
190
- kreuzberg-tesseract = { path = "../kreuzberg-tesseract", version = "4.3", optional = true }
191
- image = { version = "0.25.9", default-features = false , features = [
192
- "png", "jpeg", "webp", "bmp", "tiff", "gif", "pnm", "rayon", ], optional = true }
189
+ kreuzberg-tesseract = { path = "../kreuzberg-tesseract", optional = true }
190
+ image = { version = "0.25.10", default-features = false, features = [
191
+ "png",
192
+ "jpeg",
193
+ "webp",
194
+ "bmp",
195
+ "tiff",
196
+ "gif",
197
+ "pnm",
198
+ "rayon",
199
+ ], optional = true }
193
200
  tiff = { version = "0.11", optional = true }
194
201
  fast_image_resize = { version = "6.0.0", optional = true }
195
202
  kamadak-exif = { version = "0.6.1", optional = true }
@@ -209,7 +216,7 @@ axum = { version = "0.8", features = ["macros", "json", "multipart"], optional =
209
216
  tower = { version = "0.5", optional = true }
210
217
  tower-http = { version = "0.6", features = ["cors", "trace", "limit"], optional = true }
211
218
  utoipa = { version = "5.4", features = ["axum_extras"], optional = true }
212
- rmcp = { version = "1.1.0", features = [
219
+ rmcp = { version = "1.1.1", features = [
213
220
  "server",
214
221
  "macros",
215
222
  "base64",
@@ -224,25 +231,29 @@ infer = "0.19.0"
224
231
  sha2 = { version = "0.10", optional = true }
225
232
 
226
233
  [dev-dependencies]
227
- tempfile = "3.26.0"
234
+ tempfile = "3.27.0"
228
235
  filetime = "0.2"
229
236
  tar = "0.4.44"
230
237
  zip = { version = "8.2.0", default-features = false, features = ["deflate-flate2"] }
231
238
  serial_test = "3.4.0"
232
239
  anyhow = "1.0"
240
+ tokio = { version = "1.50.0", features = ["macros", "time"] }
233
241
  tokio-test = "0.4"
234
242
  tracing-subscriber = { version = "0.3", features = ["env-filter"] }
235
243
  criterion = { version = "0.8", features = ["html_reports"] }
236
- image = { version = "0.25.9", default-features = false , features = ["png"] }
244
+ image = { version = "0.25.10", default-features = false, features = ["png"] }
237
245
 
238
246
  [target.'cfg(all(not(target_os = "windows"), not(target_arch = "wasm32")))'.dependencies]
239
247
  pprof = { version = "0.15.0", features = ["flamegraph"], optional = true }
240
248
  # PaddleOCR via ONNX Runtime - not available on WASM (vendored from paddle-ocr-rs)
241
- kreuzberg-paddle-ocr = { path = "../kreuzberg-paddle-ocr", version = "4.3", optional = true }
249
+ kreuzberg-paddle-ocr = { path = "../kreuzberg-paddle-ocr", optional = true }
242
250
  # Use rustls on non-Windows platforms (Linux, macOS)
243
251
  # Blocking feature needed for model downloads
244
- reqwest = { version = "0.13.2", default-features = false , features = [
245
- "json", "rustls", "blocking", ], optional = true }
252
+ reqwest = { version = "0.13.2", default-features = false, features = [
253
+ "json",
254
+ "rustls",
255
+ "blocking",
256
+ ], optional = true }
246
257
  # Use rustls-tls for fastembed on non-Windows platforms
247
258
  fastembed = { version = "5.12", default-features = false, features = [
248
259
  "hf-hub-rustls-tls",
@@ -250,15 +261,18 @@ fastembed = { version = "5.12", default-features = false, features = [
250
261
  ], optional = true }
251
262
  hf-hub = { version = "0.5", default-features = false, features = ["ureq"], optional = true }
252
263
  # Force ureq (transitive dep via hf-hub) to use rustls on non-Windows
253
- ureq = { version = "3.2", default-features = false, features = ["rustls", "json"] }
264
+ ureq = { version = "3.2", default-features = false, features = ["rustls", "json"], optional = true }
254
265
 
255
266
  # Use native-tls on Windows to avoid aws-lc-sys CMake build issues with MinGW
256
267
  [target.'cfg(all(target_os = "windows", not(target_arch = "wasm32")))'.dependencies]
257
268
  # PaddleOCR via ONNX Runtime - not available on WASM (vendored from paddle-ocr-rs)
258
- kreuzberg-paddle-ocr = { path = "../kreuzberg-paddle-ocr", version = "4.3", optional = true }
269
+ kreuzberg-paddle-ocr = { path = "../kreuzberg-paddle-ocr", optional = true }
259
270
  # Blocking feature needed for model downloads
260
- reqwest = { version = "0.13.2", default-features = false , features = [
261
- "json", "native-tls", "blocking", ], optional = true }
271
+ reqwest = { version = "0.13.2", default-features = false, features = [
272
+ "json",
273
+ "native-tls",
274
+ "blocking",
275
+ ], optional = true }
262
276
  # Use native-tls for fastembed on Windows
263
277
  fastembed = { version = "5.12", default-features = false, features = [
264
278
  "hf-hub-native-tls",
@@ -266,7 +280,10 @@ fastembed = { version = "5.12", default-features = false, features = [
266
280
  ], optional = true }
267
281
  hf-hub = { version = "0.5", default-features = false, features = ["ureq"], optional = true }
268
282
  # Force ureq (transitive dep via hf-hub) to use native-tls on Windows
269
- ureq = { version = "3.2", default-features = false, features = ["native-tls", "json"] }
283
+ ureq = { version = "3.2", default-features = false, features = [
284
+ "native-tls",
285
+ "json",
286
+ ], optional = true }
270
287
 
271
288
  [target.'cfg(target_arch = "wasm32")'.dependencies]
272
289
  wasm-bindgen-rayon = { version = "1.3", optional = true }
@@ -17,7 +17,7 @@ High-performance document intelligence library for Rust. Extract text, metadata,
17
17
 
18
18
  This is the core Rust library that powers the Python, TypeScript, and Ruby bindings.
19
19
 
20
- > **🚀 Version 4.4.4 Release**
20
+ > **🚀 Version 4.4.5 Release**
21
21
  > This is a pre-release version. We invite you to test the library and [report any issues](https://github.com/kreuzberg-dev/kreuzberg/issues) you encounter.
22
22
  >
23
23
  > **Note**: The Rust crate is not currently published to crates.io for this RC. Use git dependencies or language bindings (Python, TypeScript, Ruby) instead.
@@ -170,6 +170,15 @@ pub async fn extract_handler(
170
170
  }
171
171
  };
172
172
  }
173
+ "pdf_password" => {
174
+ let pwd = field
175
+ .text()
176
+ .await
177
+ .map_err(|e| ApiError::validation(crate::error::KreuzbergError::validation(e.to_string())))?;
178
+ let cfg = config.get_or_insert_with(|| (*state.default_config).clone());
179
+ let pdf_opts = cfg.pdf_options.get_or_insert_with(Default::default);
180
+ pdf_opts.passwords.get_or_insert_with(Vec::new).push(pwd);
181
+ }
173
182
  _ => {}
174
183
  }
175
184
  }