kreuzberg 5.0.0.pre.rc.30 → 5.0.0.pre.rc.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 329bbeb566e9d1e7e8e89d90e96560fb107476f3b52471bf61e9120f84bbaad3
4
- data.tar.gz: 014d2b64d5ceb9e75c6f783de42c48d149c95398935d5acf77b69e78db130d10
3
+ metadata.gz: 12aacf589df9ea6e0e80ada2ef3c41194a5e74aa4185a438abce37554bea8e5d
4
+ data.tar.gz: 9f3cb43f31d255d734ae628e54905dca73d127c01a5c26ed7330d2eaf7a6e50f
5
5
  SHA512:
6
- metadata.gz: d03759c1705908e4073c2432b115f19ec5febf82de052ed63520d84a5d2d6d1c9e122f113c7f536dcf720f9c18605bc9234b38fd2e277af3c7b3ec32c72d889c
7
- data.tar.gz: 65d513133b42735259d32eede4935155d5e81e29112b1d3aa55596091c616231e44ba66da43e8ca11bc0aebf3e048a22aca452f391df407b51dd8a9950bfe841
6
+ metadata.gz: d071bcbbbeccd55799a47aaad69a74b72fa5ac1289c5cf61a8dedf287759e738a6c6e5876def7a95f1308db62c65c9a8978c731292ebb3333efe06162154cc13
7
+ data.tar.gz: 0d4770ba862de8637a724916efc1d4cd76057b313e8fa5314413169c115cfe198e26ee018f7576796f0fbf57a2d9118c78e0aac939f0068c5aea1381c1e23c73
@@ -206,9 +206,9 @@ checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
206
206
 
207
207
  [[package]]
208
208
  name = "arrayvec"
209
- version = "0.7.6"
209
+ version = "0.7.7"
210
210
  source = "registry+https://github.com/rust-lang/crates.io-index"
211
- checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
211
+ checksum = "f02882884d3e1bc524fb12c79f107f6ad0e1cfd498c536ffb494301740995dfe"
212
212
 
213
213
  [[package]]
214
214
  name = "as-slice"
@@ -824,9 +824,9 @@ dependencies = [
824
824
 
825
825
  [[package]]
826
826
  name = "cc"
827
- version = "1.2.64"
827
+ version = "1.2.65"
828
828
  source = "registry+https://github.com/rust-lang/crates.io-index"
829
- checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f"
829
+ checksum = "e228eec9be7c17ccb640b59b36a5cd805ea2a564a4c5e162c2f659fea30d3b96"
830
830
  dependencies = [
831
831
  "find-msvc-tools",
832
832
  "jobserver",
@@ -1465,6 +1465,38 @@ version = "0.1.12"
1465
1465
  source = "registry+https://github.com/rust-lang/crates.io-index"
1466
1466
  checksum = "ac6b926516df9c60bfa16e107b21086399f8285a44ca9711344b9e553c5146e2"
1467
1467
 
1468
+ [[package]]
1469
+ name = "defmt"
1470
+ version = "1.1.0"
1471
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1472
+ checksum = "a6e524506490a1953d237cb87b1cfc1e46f88c18f10a22dfe0f507dc6bfc7f7f"
1473
+ dependencies = [
1474
+ "bitflags 1.3.2",
1475
+ "defmt-macros",
1476
+ ]
1477
+
1478
+ [[package]]
1479
+ name = "defmt-macros"
1480
+ version = "1.1.0"
1481
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1482
+ checksum = "f0a27770e9c8f719a79d8b638281f4d828f77d8fd61e0bd94451b9b85e576a0b"
1483
+ dependencies = [
1484
+ "defmt-parser",
1485
+ "proc-macro-error2",
1486
+ "proc-macro2",
1487
+ "quote",
1488
+ "syn 2.0.118",
1489
+ ]
1490
+
1491
+ [[package]]
1492
+ name = "defmt-parser"
1493
+ version = "1.0.0"
1494
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1495
+ checksum = "10d60334b3b2e7c9d91ef8150abfb6fa4c1c39ebbcf4a81c2e346aad939fee3e"
1496
+ dependencies = [
1497
+ "thiserror 2.0.18",
1498
+ ]
1499
+
1468
1500
  [[package]]
1469
1501
  name = "der"
1470
1502
  version = "0.8.0"
@@ -2111,9 +2143,9 @@ checksum = "312d2295c7302019c395cfb90dacd00a82a2eabd700429bba9c7a3f38dbbe11b"
2111
2143
 
2112
2144
  [[package]]
2113
2145
  name = "gcp_auth"
2114
- version = "0.12.6"
2146
+ version = "0.12.7"
2115
2147
  source = "registry+https://github.com/rust-lang/crates.io-index"
2116
- checksum = "c2b3d0b409a042a380111af38136310839af8ac1a0917fb6e84515ed1e4bf3ee"
2148
+ checksum = "26d27dbcc645b60b8e7f6e2868a9d7102ece97d1bb49c1288b5321fcc67f7260"
2117
2149
  dependencies = [
2118
2150
  "async-trait",
2119
2151
  "base64 0.22.1",
@@ -2125,6 +2157,7 @@ dependencies = [
2125
2157
  "hyper-rustls",
2126
2158
  "hyper-util",
2127
2159
  "ring",
2160
+ "rustls",
2128
2161
  "rustls-pki-types",
2129
2162
  "serde",
2130
2163
  "serde_json",
@@ -2482,9 +2515,9 @@ dependencies = [
2482
2515
 
2483
2516
  [[package]]
2484
2517
  name = "html-to-markdown-rs"
2485
- version = "3.6.15"
2518
+ version = "3.7.1"
2486
2519
  source = "registry+https://github.com/rust-lang/crates.io-index"
2487
- checksum = "de0dd81706c600309dda21949e23a30344d0153d2325faeeaf1122839b93651d"
2520
+ checksum = "965c9e8ff7244c3946c9508d9c4aa0ae355a5cccdea366689c20ec1303fef56c"
2488
2521
  dependencies = [
2489
2522
  "ahash 0.8.12",
2490
2523
  "astral-tl",
@@ -3063,10 +3096,11 @@ checksum = "47f142fe24a9c9944451e8349de0a56af5f3e7226dc46f3ed4d4ecc0b85af75e"
3063
3096
 
3064
3097
  [[package]]
3065
3098
  name = "jiff"
3066
- version = "0.2.28"
3099
+ version = "0.2.29"
3067
3100
  source = "registry+https://github.com/rust-lang/crates.io-index"
3068
- checksum = "4603d3033e49e2b0e31229fcab20a5d40089c607d975cd9c80551dc69eed9102"
3101
+ checksum = "34f877a98676d2fb664698d74cc6a51ce6c484ce8c770f05d0108ec9090aeb46"
3069
3102
  dependencies = [
3103
+ "defmt",
3070
3104
  "jiff-static",
3071
3105
  "jiff-tzdb-platform",
3072
3106
  "log",
@@ -3078,9 +3112,9 @@ dependencies = [
3078
3112
 
3079
3113
  [[package]]
3080
3114
  name = "jiff-static"
3081
- version = "0.2.28"
3115
+ version = "0.2.29"
3082
3116
  source = "registry+https://github.com/rust-lang/crates.io-index"
3083
- checksum = "782d32378dddf207193ac91cefb848ad41abb58195c95168e1291227a0832b47"
3117
+ checksum = "0666b5ab5ecaca213fc2a85b8c0083d9004e84ee2d5f9a7e0017aaf50986f25f"
3084
3118
  dependencies = [
3085
3119
  "proc-macro2",
3086
3120
  "quote",
@@ -3233,8 +3267,6 @@ dependencies = [
3233
3267
  "referencing",
3234
3268
  "regex",
3235
3269
  "regex-syntax",
3236
- "reqwest 0.13.4",
3237
- "rustls",
3238
3270
  "serde",
3239
3271
  "serde_json",
3240
3272
  "unicode-general-category",
@@ -3243,9 +3275,9 @@ dependencies = [
3243
3275
 
3244
3276
  [[package]]
3245
3277
  name = "kreuzberg"
3246
- version = "5.0.0-rc.30"
3278
+ version = "5.0.0-rc.32"
3247
3279
  source = "registry+https://github.com/rust-lang/crates.io-index"
3248
- checksum = "a5fc97a9058d41eeed6589717b843cbe1b452a40967ac805330e0d2e999c6a43"
3280
+ checksum = "fd3f7f001f2ea8d359463333c3f24389b56d58f0bb0802c1dc7d8163925bfd9f"
3249
3281
  dependencies = [
3250
3282
  "ahash 0.8.12",
3251
3283
  "async-trait",
@@ -3378,9 +3410,9 @@ dependencies = [
3378
3410
 
3379
3411
  [[package]]
3380
3412
  name = "kreuzberg-libheif"
3381
- version = "5.0.0-rc.30"
3413
+ version = "5.0.0-rc.32"
3382
3414
  source = "registry+https://github.com/rust-lang/crates.io-index"
3383
- checksum = "4f08a60d0135deceb0a3852e6627644a2f4a7069d7455fffc87e02e6ba59880e"
3415
+ checksum = "52762c0f3d21e23b592b5e598824f58dbcd69917a7a95209e4f85c2e294c5162"
3384
3416
  dependencies = [
3385
3417
  "cfg-if",
3386
3418
  "enumn",
@@ -3402,9 +3434,9 @@ dependencies = [
3402
3434
 
3403
3435
  [[package]]
3404
3436
  name = "kreuzberg-paddle-ocr"
3405
- version = "5.0.0-rc.30"
3437
+ version = "5.0.0-rc.32"
3406
3438
  source = "registry+https://github.com/rust-lang/crates.io-index"
3407
- checksum = "9b6f4516c42aad6d30227faabbb06e0896b8fb17bfcec2cdef5f9f73471df5d3"
3439
+ checksum = "f45f032fca2841d98d42cf393b1b94fa87de2c4be395923f5167ac720d8e1feb"
3408
3440
  dependencies = [
3409
3441
  "geo-clipper",
3410
3442
  "geo-types",
@@ -3418,7 +3450,7 @@ dependencies = [
3418
3450
 
3419
3451
  [[package]]
3420
3452
  name = "kreuzberg-rb"
3421
- version = "5.0.0-rc.30"
3453
+ version = "5.0.0-rc.32"
3422
3454
  dependencies = [
3423
3455
  "async-trait",
3424
3456
  "kreuzberg",
@@ -3431,9 +3463,9 @@ dependencies = [
3431
3463
 
3432
3464
  [[package]]
3433
3465
  name = "kreuzberg-tesseract"
3434
- version = "5.0.0-rc.30"
3466
+ version = "5.0.0-rc.32"
3435
3467
  source = "registry+https://github.com/rust-lang/crates.io-index"
3436
- checksum = "f2c9a29237793847b570680837898082c2b0dc958db0bb68f390fac4570246e3"
3468
+ checksum = "ca1bae2b7d1c6d3b2a25c8ee622c0a024f80221c5179d214f72f09eeae6afda1"
3437
3469
  dependencies = [
3438
3470
  "cc",
3439
3471
  "cmake",
@@ -3550,9 +3582,9 @@ checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
3550
3582
 
3551
3583
  [[package]]
3552
3584
  name = "liter-llm"
3553
- version = "1.7.1"
3585
+ version = "1.8.1"
3554
3586
  source = "registry+https://github.com/rust-lang/crates.io-index"
3555
- checksum = "0552ad2a855bd57c422933227a1d664ad66b1e773dd2244a14194500880ef544"
3587
+ checksum = "9fa5f699609fb9153fb48d5857b302b25e0088f7ebbb0de5e8847fa442a3315a"
3556
3588
  dependencies = [
3557
3589
  "ahash 0.8.12",
3558
3590
  "async-trait",
@@ -3623,9 +3655,9 @@ dependencies = [
3623
3655
 
3624
3656
  [[package]]
3625
3657
  name = "log"
3626
- version = "0.4.32"
3658
+ version = "0.4.33"
3627
3659
  source = "registry+https://github.com/rust-lang/crates.io-index"
3628
- checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
3660
+ checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad"
3629
3661
 
3630
3662
  [[package]]
3631
3663
  name = "loop9"
@@ -3765,9 +3797,9 @@ dependencies = [
3765
3797
 
3766
3798
  [[package]]
3767
3799
  name = "mail-parser"
3768
- version = "0.11.3"
3800
+ version = "0.11.4"
3769
3801
  source = "registry+https://github.com/rust-lang/crates.io-index"
3770
- checksum = "d8a2420e9ce11c2b0583ca97ddff7ab2398c8a613154e9b72e3bafdbf767f1d7"
3802
+ checksum = "f2c0e7e0704500930be5b6c629f30d23fd1dde4d1800e138e04b3fa302e64d51"
3771
3803
  dependencies = [
3772
3804
  "hashify",
3773
3805
  ]
@@ -3851,9 +3883,9 @@ checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
3851
3883
 
3852
3884
  [[package]]
3853
3885
  name = "memmap2"
3854
- version = "0.9.10"
3886
+ version = "0.9.11"
3855
3887
  source = "registry+https://github.com/rust-lang/crates.io-index"
3856
- checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
3888
+ checksum = "d1219ed1b7f229ee7104d281dd01d6802fe28bb6e95d292942c4daacdeb798c0"
3857
3889
  dependencies = [
3858
3890
  "libc",
3859
3891
  ]
@@ -4479,9 +4511,9 @@ dependencies = [
4479
4511
 
4480
4512
  [[package]]
4481
4513
  name = "pdf_oxide"
4482
- version = "0.3.66"
4514
+ version = "0.3.67"
4483
4515
  source = "registry+https://github.com/rust-lang/crates.io-index"
4484
- checksum = "b30ba994e5ce01c9d584ead6dc5d28d69a6f24c180873ee262e4bb9e2f5dadff"
4516
+ checksum = "ebe40b26fba42c41c162dbfd33a6d4a6f90a57b6f114dddaa1806d973f909c66"
4485
4517
  dependencies = [
4486
4518
  "aes 0.9.1",
4487
4519
  "base64 0.22.1",
@@ -4783,6 +4815,28 @@ dependencies = [
4783
4815
  "toml_edit",
4784
4816
  ]
4785
4817
 
4818
+ [[package]]
4819
+ name = "proc-macro-error-attr2"
4820
+ version = "2.0.0"
4821
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4822
+ checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5"
4823
+ dependencies = [
4824
+ "proc-macro2",
4825
+ "quote",
4826
+ ]
4827
+
4828
+ [[package]]
4829
+ name = "proc-macro-error2"
4830
+ version = "2.0.1"
4831
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4832
+ checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802"
4833
+ dependencies = [
4834
+ "proc-macro-error-attr2",
4835
+ "proc-macro2",
4836
+ "quote",
4837
+ "syn 2.0.118",
4838
+ ]
4839
+
4786
4840
  [[package]]
4787
4841
  name = "proc-macro2"
4788
4842
  version = "1.0.106"
@@ -4935,9 +4989,9 @@ dependencies = [
4935
4989
 
4936
4990
  [[package]]
4937
4991
  name = "quinn"
4938
- version = "0.11.9"
4992
+ version = "0.11.11"
4939
4993
  source = "registry+https://github.com/rust-lang/crates.io-index"
4940
- checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
4994
+ checksum = "0c1a41e437b6bbd489372cd4971de128e85c855f56c57f283d20ff016cf7c0a8"
4941
4995
  dependencies = [
4942
4996
  "bytes",
4943
4997
  "cfg_aliases",
@@ -4955,9 +5009,9 @@ dependencies = [
4955
5009
 
4956
5010
  [[package]]
4957
5011
  name = "quinn-proto"
4958
- version = "0.11.14"
5012
+ version = "0.11.15"
4959
5013
  source = "registry+https://github.com/rust-lang/crates.io-index"
4960
- checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
5014
+ checksum = "4fcb935c5bec503c2f0e306bdd3e58bb9029dcb14fa8d9ac76e3a5256ac0763e"
4961
5015
  dependencies = [
4962
5016
  "aws-lc-rs",
4963
5017
  "bytes",
@@ -4991,9 +5045,9 @@ dependencies = [
4991
5045
 
4992
5046
  [[package]]
4993
5047
  name = "quote"
4994
- version = "1.0.45"
5048
+ version = "1.0.46"
4995
5049
  source = "registry+https://github.com/rust-lang/crates.io-index"
4996
- checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
5050
+ checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368"
4997
5051
  dependencies = [
4998
5052
  "proc-macro2",
4999
5053
  ]
@@ -5659,9 +5713,9 @@ dependencies = [
5659
5713
 
5660
5714
  [[package]]
5661
5715
  name = "rustls"
5662
- version = "0.23.40"
5716
+ version = "0.23.41"
5663
5717
  source = "registry+https://github.com/rust-lang/crates.io-index"
5664
- checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b"
5718
+ checksum = "6b92b125634d9b795e7beca796cc790df15a7fb38323bf3196fda83292d06b1f"
5665
5719
  dependencies = [
5666
5720
  "aws-lc-rs",
5667
5721
  "log",
@@ -6760,9 +6814,9 @@ dependencies = [
6760
6814
 
6761
6815
  [[package]]
6762
6816
  name = "time"
6763
- version = "0.3.49"
6817
+ version = "0.3.51"
6764
6818
  source = "registry+https://github.com/rust-lang/crates.io-index"
6765
- checksum = "711a53c2d47bbd818258c498c8dbfe186a2526c631495cfe7e078567f86b8469"
6819
+ checksum = "85c17d80feb7334b40c484e45ed1a5273dfd8bfda537c3be2e74a06a6686f327"
6766
6820
  dependencies = [
6767
6821
  "deranged",
6768
6822
  "num-conv",
@@ -6780,9 +6834,9 @@ checksum = "9e1c906769ad99c88eaa54e728060edef082f8e358ff32030cb7c7d315e81109"
6780
6834
 
6781
6835
  [[package]]
6782
6836
  name = "time-macros"
6783
- version = "0.2.29"
6837
+ version = "0.2.30"
6784
6838
  source = "registry+https://github.com/rust-lang/crates.io-index"
6785
- checksum = "71c652a3727a9cbb9a02f707f530b618ce00d0ccd762009c8c23bd191df3c17d"
6839
+ checksum = "dcef1a61bdb119096e153208ec5cbec23944ce8bca13be5c7f60c634f7403935"
6786
6840
  dependencies = [
6787
6841
  "num-conv",
6788
6842
  "time-core",
@@ -7193,9 +7247,9 @@ checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782"
7193
7247
 
7194
7248
  [[package]]
7195
7249
  name = "tree-sitter-language-pack"
7196
- version = "1.9.1"
7250
+ version = "1.10.3"
7197
7251
  source = "registry+https://github.com/rust-lang/crates.io-index"
7198
- checksum = "09a9d3b46347363ce7dc86d1df53ebe6af125cb9b30c24461d190aa942da6b50"
7252
+ checksum = "4a19cf15df25708c7c837024c7fda7ca92e218ebc87652abd0ff07b057ff7105"
7199
7253
  dependencies = [
7200
7254
  "ahash 0.8.12",
7201
7255
  "cc",
@@ -8404,9 +8458,9 @@ dependencies = [
8404
8458
 
8405
8459
  [[package]]
8406
8460
  name = "zlib-rs"
8407
- version = "0.6.3"
8461
+ version = "0.6.4"
8408
8462
  source = "registry+https://github.com/rust-lang/crates.io-index"
8409
- checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513"
8463
+ checksum = "977347db8caa080403f6b6b7c1cda9479a8e869316f7e13a59b19076a40f94e3"
8410
8464
 
8411
8465
  [[package]]
8412
8466
  name = "zmij"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg-rb"
3
- version = "5.0.0-rc.30"
3
+ version = "5.0.0-rc.32"
4
4
  edition = "2024"
5
5
  license = "Elastic-2.0"
6
6
  description = "High-performance document intelligence library"
@@ -58,7 +58,7 @@ xml = ["kreuzberg/xml"]
58
58
 
59
59
  [dependencies]
60
60
  async-trait = "0.1"
61
- kreuzberg = { version = "5.0.0-rc.30", features = ["full", "pdf", "ocr", "paddle-ocr", "paddle-ocr-types", "layout-detection", "layout-types", "embeddings", "embedding-presets", "reranker", "reranker-presets", "chunking", "keywords-yake", "keywords-rake", "language-detection", "html", "tree-sitter", "office", "email", "archives", "stopwords", "auto-rotate", "auto-rotate-types", "tokio-runtime", "api", "api-types", "mcp", "liter-llm", "quality", "svg", "transcription", "transcription-types", "classification", "captioning", "ner-onnx", "ner-llm", "diff", "markdown-footnotes", "redaction", "enrichment", "heuristics", "presets", "structured"] }
61
+ kreuzberg = { version = "5.0.0-rc.32", features = ["full", "pdf", "ocr", "paddle-ocr", "paddle-ocr-types", "layout-detection", "layout-types", "embeddings", "embedding-presets", "reranker", "reranker-presets", "chunking", "keywords-yake", "keywords-rake", "language-detection", "html", "tree-sitter", "office", "email", "archives", "stopwords", "auto-rotate", "auto-rotate-types", "tokio-runtime", "api", "api-types", "mcp", "liter-llm", "quality", "svg", "transcription", "transcription-types", "classification", "captioning", "ner-onnx", "ner-llm", "diff", "markdown-footnotes", "redaction", "enrichment", "heuristics", "presets", "structured"] }
62
62
  magnus = "0.8"
63
63
  rb-sys = ">=0.9, <0.9.128"
64
64
  serde = { version = "1", features = ["derive"] }
@@ -1,5 +1,5 @@
1
1
  // This file is auto-generated by alef. DO NOT EDIT.
2
- // alef:hash:1865aced39145e92bc65e9f4ca999b62f1f2b003e5fe0ed4bd2b1050a8f64a0f
2
+ // alef:hash:6a863205e7a486a4e1b97dac288554ebbf65a8b3ad1aaa6799965109e6543cd4
3
3
  // Re-generate with: alef generate
4
4
  #![allow(dead_code, unused_imports, unused_variables)]
5
5
  #![allow(
@@ -20448,9 +20448,11 @@ impl magnus::TryConvert for ImageOutputFormat {
20448
20448
  })?
20449
20449
  };
20450
20450
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
20451
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
20451
20452
  // If that fails, try treating it as a plain string value and wrap in quotes
20452
20453
  // If both fail, try as Custom variant (for untagged enum support)
20453
20454
  serde_json::from_str(&json_str)
20455
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
20454
20456
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
20455
20457
  .or_else(|_| {
20456
20458
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -20509,6 +20511,7 @@ impl magnus::TryConvert for OutputFormat {
20509
20511
  })?
20510
20512
  };
20511
20513
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
20514
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
20512
20515
  // If that fails, try treating it as a plain string value and wrap in quotes
20513
20516
  // If both fail, try as Custom variant (for untagged enum support)
20514
20517
  serde_json::from_str(&json_str)
@@ -20803,9 +20806,11 @@ impl magnus::TryConvert for VlmFallbackPolicy {
20803
20806
  })?
20804
20807
  };
20805
20808
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
20809
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
20806
20810
  // If that fails, try treating it as a plain string value and wrap in quotes
20807
20811
  // If both fail, try as Custom variant (for untagged enum support)
20808
20812
  serde_json::from_str(&json_str)
20813
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "mode": json_str })))
20809
20814
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
20810
20815
  .or_else(|_| {
20811
20816
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -20950,9 +20955,11 @@ impl magnus::TryConvert for ChunkSizing {
20950
20955
  })?
20951
20956
  };
20952
20957
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
20958
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
20953
20959
  // If that fails, try treating it as a plain string value and wrap in quotes
20954
20960
  // If both fail, try as Custom variant (for untagged enum support)
20955
20961
  serde_json::from_str(&json_str)
20962
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
20956
20963
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
20957
20964
  .or_else(|_| {
20958
20965
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -21011,9 +21018,11 @@ impl magnus::TryConvert for EmbeddingModelType {
21011
21018
  })?
21012
21019
  };
21013
21020
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
21021
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
21014
21022
  // If that fails, try treating it as a plain string value and wrap in quotes
21015
21023
  // If both fail, try as Custom variant (for untagged enum support)
21016
21024
  serde_json::from_str(&json_str)
21025
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
21017
21026
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
21018
21027
  .or_else(|_| {
21019
21028
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -21083,9 +21092,11 @@ impl magnus::TryConvert for RerankerModelType {
21083
21092
  })?
21084
21093
  };
21085
21094
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
21095
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
21086
21096
  // If that fails, try treating it as a plain string value and wrap in quotes
21087
21097
  // If both fail, try as Custom variant (for untagged enum support)
21088
21098
  serde_json::from_str(&json_str)
21099
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
21089
21100
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
21090
21101
  .or_else(|_| {
21091
21102
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -21822,9 +21833,11 @@ impl magnus::TryConvert for NodeContent {
21822
21833
  })?
21823
21834
  };
21824
21835
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
21836
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
21825
21837
  // If that fails, try treating it as a plain string value and wrap in quotes
21826
21838
  // If both fail, try as Custom variant (for untagged enum support)
21827
21839
  serde_json::from_str(&json_str)
21840
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "node_type": json_str })))
21828
21841
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
21829
21842
  .or_else(|_| {
21830
21843
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -21889,9 +21902,11 @@ impl magnus::TryConvert for AnnotationKind {
21889
21902
  })?
21890
21903
  };
21891
21904
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
21905
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
21892
21906
  // If that fails, try treating it as a plain string value and wrap in quotes
21893
21907
  // If both fail, try as Custom variant (for untagged enum support)
21894
21908
  serde_json::from_str(&json_str)
21909
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "annotation_type": json_str })))
21895
21910
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
21896
21911
  .or_else(|_| {
21897
21912
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -21954,6 +21969,7 @@ impl magnus::TryConvert for EntityCategory {
21954
21969
  })?
21955
21970
  };
21956
21971
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
21972
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
21957
21973
  // If that fails, try treating it as a plain string value and wrap in quotes
21958
21974
  // If both fail, try as Custom variant (for untagged enum support)
21959
21975
  serde_json::from_str(&json_str)
@@ -22389,9 +22405,11 @@ impl magnus::TryConvert for FormatMetadata {
22389
22405
  })?
22390
22406
  };
22391
22407
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
22408
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
22392
22409
  // If that fails, try treating it as a plain string value and wrap in quotes
22393
22410
  // If both fail, try as Custom variant (for untagged enum support)
22394
22411
  serde_json::from_str(&json_str)
22412
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "format_type": json_str })))
22395
22413
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
22396
22414
  .or_else(|_| {
22397
22415
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -22778,9 +22796,11 @@ impl magnus::TryConvert for OcrBoundingGeometry {
22778
22796
  })?
22779
22797
  };
22780
22798
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
22799
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
22781
22800
  // If that fails, try treating it as a plain string value and wrap in quotes
22782
22801
  // If both fail, try as Custom variant (for untagged enum support)
22783
22802
  serde_json::from_str(&json_str)
22803
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
22784
22804
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
22785
22805
  .or_else(|_| {
22786
22806
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -22986,6 +23006,7 @@ impl magnus::TryConvert for PiiCategory {
22986
23006
  })?
22987
23007
  };
22988
23008
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23009
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
22989
23010
  // If that fails, try treating it as a plain string value and wrap in quotes
22990
23011
  // If both fail, try as Custom variant (for untagged enum support)
22991
23012
  serde_json::from_str(&json_str)
@@ -23043,9 +23064,11 @@ impl magnus::TryConvert for DiffLine {
23043
23064
  })?
23044
23065
  };
23045
23066
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23067
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
23046
23068
  // If that fails, try treating it as a plain string value and wrap in quotes
23047
23069
  // If both fail, try as Custom variant (for untagged enum support)
23048
23070
  serde_json::from_str(&json_str)
23071
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "kind": json_str })))
23049
23072
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
23050
23073
  .or_else(|_| {
23051
23074
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -23172,9 +23195,11 @@ impl magnus::TryConvert for RevisionAnchor {
23172
23195
  })?
23173
23196
  };
23174
23197
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23198
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
23175
23199
  // If that fails, try treating it as a plain string value and wrap in quotes
23176
23200
  // If both fail, try as Custom variant (for untagged enum support)
23177
23201
  serde_json::from_str(&json_str)
23202
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
23178
23203
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
23179
23204
  .or_else(|_| {
23180
23205
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -23415,9 +23440,11 @@ impl magnus::TryConvert for EnrichStatus {
23415
23440
  })?
23416
23441
  };
23417
23442
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23443
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
23418
23444
  // If that fails, try treating it as a plain string value and wrap in quotes
23419
23445
  // If both fail, try as Custom variant (for untagged enum support)
23420
23446
  serde_json::from_str(&json_str)
23447
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "status": json_str })))
23421
23448
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
23422
23449
  .or_else(|_| {
23423
23450
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -23519,9 +23546,11 @@ impl magnus::TryConvert for ChunkingDecision {
23519
23546
  })?
23520
23547
  };
23521
23548
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23549
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
23522
23550
  // If that fails, try treating it as a plain string value and wrap in quotes
23523
23551
  // If both fail, try as Custom variant (for untagged enum support)
23524
23552
  serde_json::from_str(&json_str)
23553
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
23525
23554
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
23526
23555
  .or_else(|_| {
23527
23556
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -23597,9 +23626,11 @@ impl magnus::TryConvert for NoChunkingReason {
23597
23626
  })?
23598
23627
  };
23599
23628
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23629
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
23600
23630
  // If that fails, try treating it as a plain string value and wrap in quotes
23601
23631
  // If both fail, try as Custom variant (for untagged enum support)
23602
23632
  serde_json::from_str(&json_str)
23633
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
23603
23634
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
23604
23635
  .or_else(|_| {
23605
23636
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -23658,9 +23689,11 @@ impl magnus::TryConvert for ChunkingReason {
23658
23689
  })?
23659
23690
  };
23660
23691
  // Try deserializing as JSON first (handles JSON strings like "\"markdown\"" or "{\"click\":{\"selector\":\"...\"}}\"")
23692
+ // For internally-tagged enums, a bare variant string is wrapped as {"<tag>": value}.
23661
23693
  // If that fails, try treating it as a plain string value and wrap in quotes
23662
23694
  // If both fail, try as Custom variant (for untagged enum support)
23663
23695
  serde_json::from_str(&json_str)
23696
+ .or_else(|_| serde_json::from_value(serde_json::json!({ "type": json_str })))
23664
23697
  .or_else(|_| serde_json::from_str(&format!("\"{json_str}\"")))
23665
23698
  .or_else(|_| {
23666
23699
  // Try as a JSON string for Custom variant (untagged enums accept any remaining value)
@@ -25743,6 +25776,30 @@ fn render_pdf_page_to_png(args: &[magnus::Value]) -> Result<Vec<u8>, Error> {
25743
25776
  Ok(result.to_vec())
25744
25777
  }
25745
25778
 
25779
+ #[cfg(feature = "pdf")]
25780
+ fn pdf_page_count(args: &[magnus::Value]) -> Result<usize, Error> {
25781
+ let args = magnus::scan_args::scan_args::<(Vec<u8>,), (Option<magnus::Value>,), (), (), (), ()>(args)?;
25782
+ let (pdf_bytes,) = args.required;
25783
+
25784
+ let (password,) = args.optional;
25785
+
25786
+ let password: Option<String> = password.and_then(|v| {
25787
+ if v.is_nil() {
25788
+ None
25789
+ } else {
25790
+ Some(String::try_convert(v).unwrap_or_default())
25791
+ }
25792
+ });
25793
+
25794
+ let result = kreuzberg::pdf_page_count(&pdf_bytes, password.as_deref()).map_err(|e| {
25795
+ magnus::Error::new(
25796
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
25797
+ e.to_string(),
25798
+ )
25799
+ })?;
25800
+ Ok(result)
25801
+ }
25802
+
25746
25803
  #[cfg(all(feature = "captioning", feature = "tokio-runtime"))]
25747
25804
  fn caption_image(args: &[magnus::Value]) -> Result<String, Error> {
25748
25805
  let args =
@@ -37819,6 +37876,8 @@ fn ruby_init(ruby: &Ruby) -> Result<(), Error> {
37819
37876
 
37820
37877
  module.define_module_function("render_pdf_page_to_png", function!(render_pdf_page_to_png, -1))?;
37821
37878
 
37879
+ module.define_module_function("pdf_page_count", function!(pdf_page_count, -1))?;
37880
+
37822
37881
  module.define_module_function("caption_image", function!(caption_image, -1))?;
37823
37882
 
37824
37883
  module.define_module_function("caption_image_async", function!(caption_image_async, -1))?;
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:1865aced39145e92bc65e9f4ca999b62f1f2b003e5fe0ed4bd2b1050a8f64a0f
2
+ # alef:hash:6a863205e7a486a4e1b97dac288554ebbf65a8b3ad1aaa6799965109e6543cd4
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
@@ -1,10 +1,10 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:1865aced39145e92bc65e9f4ca999b62f1f2b003e5fe0ed4bd2b1050a8f64a0f
2
+ # alef:hash:6a863205e7a486a4e1b97dac288554ebbf65a8b3ad1aaa6799965109e6543cd4
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
6
6
 
7
7
  module Kreuzberg
8
8
  ## The version string for this package.
9
- VERSION = "5.0.0.pre.rc.30"
9
+ VERSION = "5.0.0.pre.rc.32"
10
10
  end
data/lib/kreuzberg.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:1865aced39145e92bc65e9f4ca999b62f1f2b003e5fe0ed4bd2b1050a8f64a0f
2
+ # alef:hash:6a863205e7a486a4e1b97dac288554ebbf65a8b3ad1aaa6799965109e6543cd4
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
data/lib/kreuzberg_rb.so CHANGED
Binary file
data/sig/types.rbs CHANGED
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:1865aced39145e92bc65e9f4ca999b62f1f2b003e5fe0ed4bd2b1050a8f64a0f
2
+ # alef:hash:6a863205e7a486a4e1b97dac288554ebbf65a8b3ad1aaa6799965109e6543cd4
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
 
@@ -2504,6 +2504,8 @@ module Kreuzberg
2504
2504
 
2505
2505
  def self.render_pdf_page_to_png: (String pdf_bytes, Integer page_index, ?Integer dpi, ?String password) -> String
2506
2506
 
2507
+ def self.pdf_page_count: (String pdf_bytes, ?String password) -> Integer
2508
+
2507
2509
  def self.caption_image: (String image_bytes, LlmConfig llm_config, ?String custom_prompt) -> String
2508
2510
 
2509
2511
  def self.caption_image_file: (String path, LlmConfig llm_config, ?String custom_prompt) -> String
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzberg
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0.pre.rc.30
4
+ version: 5.0.0.pre.rc.32
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld <naaman@kreuzberg.dev>
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-06-22 00:00:00.000000000 Z
11
+ date: 2026-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys