kreuzberg 4.0.0.pre.rc.14 → 4.0.0.pre.rc.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b0f873b7da0609856d3f396a8d43cc30bcd62f7919fa0ec7572f32f990f99f6
4
- data.tar.gz: ea902df98f54a593113a999dc08c7497d38008d0df3881a677249453b4ca3886
3
+ metadata.gz: 1ac94696cb48598d98ae55f75c69c59e1d248577b965a3921e21998ee33d2352
4
+ data.tar.gz: 684e9f74a5f0d5c2c52677fec3cec493707b084dc77815396b237864dfeded90
5
5
  SHA512:
6
- metadata.gz: 197bb0ad826ab4362efcff8dc5fded982360d9cb252d5150c786a762d28d3bf98c7d72b6e9334dd40ae1e65712a54c0261dbd71a037f098cafb599642269dae3
7
- data.tar.gz: e206c9553e656a00ee7722e7f570475037689e874540b37fe5d9c6353e64ba0de05c82893b5d2ae057d68d3320d3e59c624553b8fff08f2ae3c896506c6c275b
6
+ metadata.gz: 6ed0b13217aad741e169850f155a28f921a37a41ffa95fb12a733798b49625f7a9db030eae90ddf00ee3e367b5a563a426fa301c8a16604c8ad5ca3ba78432fc
7
+ data.tar.gz: 6c3acf2fb24f573a65e81fdac91f3735a6e2335c340d79a453d73fb43b63b807a8b9e93bbbba38a8c55550be72f0f503513b142238a1c3965e279d8ed522b3ae
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.0.pre.rc.14)
4
+ kreuzberg (4.0.0.pre.rc.15)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -136,7 +136,6 @@ GEM
136
136
  yard (0.9.38)
137
137
 
138
138
  PLATFORMS
139
- arm64-darwin-23
140
139
  arm64-darwin-24
141
140
  x86_64-linux
142
141
 
@@ -75,56 +75,6 @@ dependencies = [
75
75
  "libc",
76
76
  ]
77
77
 
78
- [[package]]
79
- name = "anstream"
80
- version = "0.6.21"
81
- source = "registry+https://github.com/rust-lang/crates.io-index"
82
- checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
83
- dependencies = [
84
- "anstyle",
85
- "anstyle-parse",
86
- "anstyle-query",
87
- "anstyle-wincon",
88
- "colorchoice",
89
- "is_terminal_polyfill",
90
- "utf8parse",
91
- ]
92
-
93
- [[package]]
94
- name = "anstyle"
95
- version = "1.0.13"
96
- source = "registry+https://github.com/rust-lang/crates.io-index"
97
- checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
98
-
99
- [[package]]
100
- name = "anstyle-parse"
101
- version = "0.2.7"
102
- source = "registry+https://github.com/rust-lang/crates.io-index"
103
- checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
104
- dependencies = [
105
- "utf8parse",
106
- ]
107
-
108
- [[package]]
109
- name = "anstyle-query"
110
- version = "1.1.5"
111
- source = "registry+https://github.com/rust-lang/crates.io-index"
112
- checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
113
- dependencies = [
114
- "windows-sys 0.61.2",
115
- ]
116
-
117
- [[package]]
118
- name = "anstyle-wincon"
119
- version = "3.0.11"
120
- source = "registry+https://github.com/rust-lang/crates.io-index"
121
- checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
122
- dependencies = [
123
- "anstyle",
124
- "once_cell_polyfill",
125
- "windows-sys 0.61.2",
126
- ]
127
-
128
78
  [[package]]
129
79
  name = "anyhow"
130
80
  version = "1.0.100"
@@ -466,30 +416,15 @@ dependencies = [
466
416
  "syn",
467
417
  ]
468
418
 
469
- [[package]]
470
- name = "bit-set"
471
- version = "0.6.0"
472
- source = "registry+https://github.com/rust-lang/crates.io-index"
473
- checksum = "f0481a0e032742109b1133a095184ee93d88f3dc9e0d28a5d033dc77a073f44f"
474
- dependencies = [
475
- "bit-vec 0.7.0",
476
- ]
477
-
478
419
  [[package]]
479
420
  name = "bit-set"
480
421
  version = "0.8.0"
481
422
  source = "registry+https://github.com/rust-lang/crates.io-index"
482
423
  checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
483
424
  dependencies = [
484
- "bit-vec 0.8.0",
425
+ "bit-vec",
485
426
  ]
486
427
 
487
- [[package]]
488
- name = "bit-vec"
489
- version = "0.7.0"
490
- source = "registry+https://github.com/rust-lang/crates.io-index"
491
- checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
492
-
493
428
  [[package]]
494
429
  name = "bit-vec"
495
430
  version = "0.8.0"
@@ -661,25 +596,6 @@ dependencies = [
661
596
  "cipher",
662
597
  ]
663
598
 
664
- [[package]]
665
- name = "cbindgen"
666
- version = "0.29.2"
667
- source = "registry+https://github.com/rust-lang/crates.io-index"
668
- checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799"
669
- dependencies = [
670
- "clap",
671
- "heck",
672
- "indexmap",
673
- "log",
674
- "proc-macro2",
675
- "quote",
676
- "serde",
677
- "serde_json",
678
- "syn",
679
- "tempfile",
680
- "toml 0.9.10+spec-1.1.0",
681
- ]
682
-
683
599
  [[package]]
684
600
  name = "cc"
685
601
  version = "1.2.50"
@@ -780,33 +696,6 @@ dependencies = [
780
696
  "libloading 0.8.9",
781
697
  ]
782
698
 
783
- [[package]]
784
- name = "clap"
785
- version = "4.5.53"
786
- source = "registry+https://github.com/rust-lang/crates.io-index"
787
- checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
788
- dependencies = [
789
- "clap_builder",
790
- ]
791
-
792
- [[package]]
793
- name = "clap_builder"
794
- version = "4.5.53"
795
- source = "registry+https://github.com/rust-lang/crates.io-index"
796
- checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
797
- dependencies = [
798
- "anstream",
799
- "anstyle",
800
- "clap_lex",
801
- "strsim",
802
- ]
803
-
804
- [[package]]
805
- name = "clap_lex"
806
- version = "0.7.6"
807
- source = "registry+https://github.com/rust-lang/crates.io-index"
808
- checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
809
-
810
699
  [[package]]
811
700
  name = "cmake"
812
701
  version = "0.1.57"
@@ -831,12 +720,6 @@ version = "1.1.0"
831
720
  source = "registry+https://github.com/rust-lang/crates.io-index"
832
721
  checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
833
722
 
834
- [[package]]
835
- name = "colorchoice"
836
- version = "1.0.4"
837
- source = "registry+https://github.com/rust-lang/crates.io-index"
838
- checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
839
-
840
723
  [[package]]
841
724
  name = "compact_str"
842
725
  version = "0.9.0"
@@ -945,9 +828,9 @@ dependencies = [
945
828
 
946
829
  [[package]]
947
830
  name = "crc"
948
- version = "3.4.0"
831
+ version = "3.3.0"
949
832
  source = "registry+https://github.com/rust-lang/crates.io-index"
950
- checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d"
833
+ checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
951
834
  dependencies = [
952
835
  "crc-catalog",
953
836
  ]
@@ -1401,7 +1284,7 @@ version = "0.14.0"
1401
1284
  source = "registry+https://github.com/rust-lang/crates.io-index"
1402
1285
  checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
1403
1286
  dependencies = [
1404
- "bit-set 0.8.0",
1287
+ "bit-set",
1405
1288
  "regex-automata",
1406
1289
  "regex-syntax",
1407
1290
  ]
@@ -1497,17 +1380,6 @@ dependencies = [
1497
1380
  "windows-sys 0.60.2",
1498
1381
  ]
1499
1382
 
1500
- [[package]]
1501
- name = "filetime_creation"
1502
- version = "0.2.0"
1503
- source = "registry+https://github.com/rust-lang/crates.io-index"
1504
- checksum = "c25b5d475550e559de5b0c0084761c65325444e3b6c9e298af9cefe7a9ef3a5f"
1505
- dependencies = [
1506
- "cfg-if",
1507
- "filetime",
1508
- "windows-sys 0.52.0",
1509
- ]
1510
-
1511
1383
  [[package]]
1512
1384
  name = "find-msvc-tools"
1513
1385
  version = "0.1.5"
@@ -2312,12 +2184,6 @@ dependencies = [
2312
2184
  "serde",
2313
2185
  ]
2314
2186
 
2315
- [[package]]
2316
- name = "is_terminal_polyfill"
2317
- version = "1.70.2"
2318
- source = "registry+https://github.com/rust-lang/crates.io-index"
2319
- checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
2320
-
2321
2187
  [[package]]
2322
2188
  name = "itertools"
2323
2189
  version = "0.12.1"
@@ -2414,7 +2280,7 @@ dependencies = [
2414
2280
 
2415
2281
  [[package]]
2416
2282
  name = "kreuzberg"
2417
- version = "4.0.0-rc.13"
2283
+ version = "4.0.0-rc.14"
2418
2284
  dependencies = [
2419
2285
  "ahash",
2420
2286
  "async-trait",
@@ -2467,7 +2333,7 @@ dependencies = [
2467
2333
  "serde",
2468
2334
  "serde_json",
2469
2335
  "serde_yaml_ng",
2470
- "sevenz-rust",
2336
+ "sevenz-rust2",
2471
2337
  "tar",
2472
2338
  "text-splitter",
2473
2339
  "thiserror 2.0.17",
@@ -2483,30 +2349,16 @@ dependencies = [
2483
2349
  "uuid",
2484
2350
  "whatlang",
2485
2351
  "yake-rust",
2486
- "zip 6.0.0",
2487
- ]
2488
-
2489
- [[package]]
2490
- name = "kreuzberg-ffi"
2491
- version = "4.0.0-rc.13"
2492
- dependencies = [
2493
- "async-trait",
2494
- "cbindgen",
2495
- "html-to-markdown-rs",
2496
- "kreuzberg",
2497
- "serde",
2498
- "serde_json",
2499
- "tokio",
2352
+ "zip 7.0.0",
2500
2353
  ]
2501
2354
 
2502
2355
  [[package]]
2503
2356
  name = "kreuzberg-rb"
2504
- version = "4.0.0-rc.14"
2357
+ version = "4.0.0-rc.15"
2505
2358
  dependencies = [
2506
2359
  "async-trait",
2507
2360
  "html-to-markdown-rs",
2508
2361
  "kreuzberg",
2509
- "kreuzberg-ffi",
2510
2362
  "magnus",
2511
2363
  "pretty_assertions",
2512
2364
  "rb-sys",
@@ -2516,14 +2368,14 @@ dependencies = [
2516
2368
 
2517
2369
  [[package]]
2518
2370
  name = "kreuzberg-tesseract"
2519
- version = "4.0.0-rc.13"
2371
+ version = "4.0.0-rc.14"
2520
2372
  dependencies = [
2521
2373
  "cc",
2522
2374
  "cmake",
2523
2375
  "libc",
2524
2376
  "reqwest",
2525
2377
  "thiserror 2.0.17",
2526
- "zip 6.0.0",
2378
+ "zip 7.0.0",
2527
2379
  ]
2528
2380
 
2529
2381
  [[package]]
@@ -2726,20 +2578,11 @@ dependencies = [
2726
2578
  "libc",
2727
2579
  ]
2728
2580
 
2729
- [[package]]
2730
- name = "lzma-rust"
2731
- version = "0.1.7"
2732
- source = "registry+https://github.com/rust-lang/crates.io-index"
2733
- checksum = "5baab2bbbd7d75a144d671e9ff79270e903957d92fb7386fd39034c709bd2661"
2734
- dependencies = [
2735
- "byteorder",
2736
- ]
2737
-
2738
2581
  [[package]]
2739
2582
  name = "lzma-rust2"
2740
- version = "0.13.0"
2583
+ version = "0.15.4"
2741
2584
  source = "registry+https://github.com/rust-lang/crates.io-index"
2742
- checksum = "c60a23ffb90d527e23192f1246b14746e2f7f071cb84476dd879071696c18a4a"
2585
+ checksum = "48172246aa7c3ea28e423295dd1ca2589a24617cc4e588bb8cfe177cb2c54d95"
2743
2586
  dependencies = [
2744
2587
  "crc",
2745
2588
  "sha2",
@@ -3076,16 +2919,6 @@ dependencies = [
3076
2919
  "chrono",
3077
2920
  ]
3078
2921
 
3079
- [[package]]
3080
- name = "nt-time"
3081
- version = "0.8.1"
3082
- source = "registry+https://github.com/rust-lang/crates.io-index"
3083
- checksum = "2de419e64947cd8830e66beb584acc3fb42ed411d103e3c794dda355d1b374b5"
3084
- dependencies = [
3085
- "chrono",
3086
- "time",
3087
- ]
3088
-
3089
2922
  [[package]]
3090
2923
  name = "num-bigint"
3091
2924
  version = "0.4.6"
@@ -3218,12 +3051,6 @@ version = "1.21.3"
3218
3051
  source = "registry+https://github.com/rust-lang/crates.io-index"
3219
3052
  checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
3220
3053
 
3221
- [[package]]
3222
- name = "once_cell_polyfill"
3223
- version = "1.70.2"
3224
- source = "registry+https://github.com/rust-lang/crates.io-index"
3225
- checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
3226
-
3227
3054
  [[package]]
3228
3055
  name = "onig"
3229
3056
  version = "6.5.1"
@@ -5042,18 +4869,19 @@ dependencies = [
5042
4869
  ]
5043
4870
 
5044
4871
  [[package]]
5045
- name = "sevenz-rust"
5046
- version = "0.6.1"
4872
+ name = "sevenz-rust2"
4873
+ version = "0.20.0"
5047
4874
  source = "registry+https://github.com/rust-lang/crates.io-index"
5048
- checksum = "26482cf1ecce4540dc782fc70019eba89ffc4d87b3717eb5ec524b5db6fdefef"
4875
+ checksum = "611081ec4fc67633b979fc0c24385de90fa60acd18126d796c8758a24294a950"
5049
4876
  dependencies = [
5050
- "bit-set 0.6.0",
5051
- "byteorder",
5052
- "crc",
5053
- "filetime_creation",
4877
+ "aes",
4878
+ "bzip2",
4879
+ "cbc",
4880
+ "crc32fast",
4881
+ "getrandom 0.3.4",
5054
4882
  "js-sys",
5055
- "lzma-rust",
5056
- "nt-time",
4883
+ "lzma-rust2",
4884
+ "ppmd-rust",
5057
4885
  "sha2",
5058
4886
  "wasm-bindgen",
5059
4887
  ]
@@ -5401,19 +5229,6 @@ dependencies = [
5401
5229
  "xattr",
5402
5230
  ]
5403
5231
 
5404
- [[package]]
5405
- name = "tempfile"
5406
- version = "3.23.0"
5407
- source = "registry+https://github.com/rust-lang/crates.io-index"
5408
- checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
5409
- dependencies = [
5410
- "fastrand",
5411
- "getrandom 0.3.4",
5412
- "once_cell",
5413
- "rustix",
5414
- "windows-sys 0.61.2",
5415
- ]
5416
-
5417
5232
  [[package]]
5418
5233
  name = "tendril"
5419
5234
  version = "0.4.3"
@@ -6122,12 +5937,6 @@ version = "1.0.4"
6122
5937
  source = "registry+https://github.com/rust-lang/crates.io-index"
6123
5938
  checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
6124
5939
 
6125
- [[package]]
6126
- name = "utf8parse"
6127
- version = "0.2.2"
6128
- source = "registry+https://github.com/rust-lang/crates.io-index"
6129
- checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
6130
-
6131
5940
  [[package]]
6132
5941
  name = "uuid"
6133
5942
  version = "1.19.0"
@@ -6829,9 +6638,9 @@ dependencies = [
6829
6638
 
6830
6639
  [[package]]
6831
6640
  name = "zip"
6832
- version = "6.0.0"
6641
+ version = "7.0.0"
6833
6642
  source = "registry+https://github.com/rust-lang/crates.io-index"
6834
- checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b"
6643
+ checksum = "bdd8a47718a4ee5fe78e07667cd36f3de80e7c2bfe727c7074245ffc7303c037"
6835
6644
  dependencies = [
6836
6645
  "aes",
6837
6646
  "arbitrary",
@@ -6840,6 +6649,7 @@ dependencies = [
6840
6649
  "crc32fast",
6841
6650
  "deflate64",
6842
6651
  "flate2",
6652
+ "generic-array",
6843
6653
  "getrandom 0.3.4",
6844
6654
  "hmac",
6845
6655
  "indexmap",
@@ -7,7 +7,7 @@ rb-sys = { path = "../../../vendor/rb-sys" }
7
7
 
8
8
  [package]
9
9
  name = "kreuzberg-rb"
10
- version = "4.0.0-rc.14"
10
+ version = "4.0.0-rc.15"
11
11
  edition = "2024"
12
12
  rust-version = "1.91"
13
13
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -30,7 +30,6 @@ default = []
30
30
  [dependencies]
31
31
  async-trait = "0.1.89"
32
32
  kreuzberg = { path = "../../../vendor/kreuzberg", features = ["full"] }
33
- kreuzberg-ffi = { path = "../../../vendor/kreuzberg-ffi", features = ["embeddings"] }
34
33
  magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = [
35
34
  "rb-sys",
36
35
  ] }
@@ -1,5 +1,16 @@
1
1
  #[cfg(target_os = "macos")]
2
2
  fn main() {
3
+ if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
4
+ let lib_path = std::path::Path::new(&cargo_manifest_dir)
5
+ .parent()
6
+ .and_then(|p| p.parent())
7
+ .and_then(|p| p.parent())
8
+ .and_then(|p| p.parent())
9
+ .and_then(|p| p.parent())
10
+ .map(|p| p.join("target/release"))
11
+ .expect("Failed to construct lib path");
12
+ println!("cargo:rustc-link-search={}", lib_path.display());
13
+ }
3
14
  println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
4
15
  println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
5
16
  println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
@@ -7,9 +18,35 @@ fn main() {
7
18
 
8
19
  #[cfg(target_os = "linux")]
9
20
  fn main() {
21
+ if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
22
+ let lib_path = std::path::Path::new(&cargo_manifest_dir)
23
+ .parent()
24
+ .and_then(|p| p.parent())
25
+ .and_then(|p| p.parent())
26
+ .and_then(|p| p.parent())
27
+ .and_then(|p| p.parent())
28
+ .map(|p| p.join("target/release"))
29
+ .expect("Failed to construct lib path");
30
+ println!("cargo:rustc-link-search={}", lib_path.display());
31
+ }
10
32
  println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
11
33
  println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
12
34
  }
13
35
 
14
- #[cfg(not(any(target_os = "macos", target_os = "linux")))]
36
+ #[cfg(target_os = "windows")]
37
+ fn main() {
38
+ if let Ok(cargo_manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
39
+ let lib_path = std::path::Path::new(&cargo_manifest_dir)
40
+ .parent()
41
+ .and_then(|p| p.parent())
42
+ .and_then(|p| p.parent())
43
+ .and_then(|p| p.parent())
44
+ .and_then(|p| p.parent())
45
+ .map(|p| p.join("target/release"))
46
+ .expect("Failed to construct lib path");
47
+ println!("cargo:rustc-link-search={}", lib_path.display());
48
+ }
49
+ }
50
+
51
+ #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
15
52
  fn main() {}
@@ -223,7 +223,7 @@ module Kreuzberg
223
223
  end
224
224
 
225
225
  def parse_chunks(chunks_data)
226
- return nil if chunks_data.nil?
226
+ return [] if chunks_data.nil? || chunks_data.empty?
227
227
 
228
228
  chunks_data.map do |chunk_hash|
229
229
  Chunk.new(
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.0.0-rc.14'
4
+ VERSION = '4.0.0-rc.15'
5
5
  end
Binary file
data/vendor/Cargo.toml CHANGED
@@ -1,8 +1,8 @@
1
1
  [workspace]
2
- members = ["kreuzberg", "kreuzberg-ffi", "kreuzberg-tesseract"]
2
+ members = ["kreuzberg", "kreuzberg-tesseract"]
3
3
 
4
4
  [workspace.package]
5
- version = "4.0.0-rc.14"
5
+ version = "4.0.0-rc.15"
6
6
  edition = "2024"
7
7
  rust-version = "1.91"
8
8
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzberg"
3
- version = "4.0.0-rc.14"
3
+ version = "4.0.0-rc.15"
4
4
  edition = "2024"
5
5
  rust-version = "1.91"
6
6
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -228,7 +228,10 @@ fn ensure_windows_import_library(pdfium_dir: &Path) {
228
228
  /// Fetch the latest release version from a GitHub repository
229
229
  ///
230
230
  /// Uses curl to query the GitHub API and extract the tag_name from the
231
- /// latest release JSON response. Falls back to "7529" if API call fails.
231
+ /// latest release JSON response. Uses improved JSON parsing with fallback logic.
232
+ ///
233
+ /// For WASM (paulocoutinhox/pdfium-lib), falls back to known stable versions.
234
+ /// For non-WASM (bblanchon/pdfium-binaries), uses a different fallback strategy.
232
235
  fn get_latest_version(repo: &str) -> String {
233
236
  let api_url = format!("https://api.github.com/repos/{}/releases/latest", repo);
234
237
 
@@ -238,19 +241,60 @@ fn get_latest_version(repo: &str) -> String {
238
241
  && output.status.success()
239
242
  {
240
243
  let json = String::from_utf8_lossy(&output.stdout);
241
- if let Some(start) = json.find("\"tag_name\":") {
242
- let after_colon = &json[start + "\"tag_name\":".len()..];
243
- if let Some(opening_quote) = after_colon.find('"')
244
- && let Some(closing_quote) = after_colon[opening_quote + 1..].find('"')
245
- {
246
- let tag_start = opening_quote + 1;
247
- let tag = &after_colon[tag_start..tag_start + closing_quote];
248
- return tag.split('/').next_back().unwrap_or(tag).to_string();
244
+
245
+ // Try to extract tag_name from JSON
246
+ if let Some(tag) = extract_tag_from_json(&json) {
247
+ return tag;
248
+ }
249
+ }
250
+
251
+ // Fallback versions based on repository
252
+ // These are stable versions known to have all required assets
253
+ if repo.contains("paulocoutinhox") {
254
+ eprintln!(
255
+ "cargo:warning=Failed to fetch latest PDFium WASM version from GitHub API, using fallback version 7442b"
256
+ );
257
+ "7442b".to_string()
258
+ } else if repo.contains("bblanchon") {
259
+ eprintln!(
260
+ "cargo:warning=Failed to fetch latest PDFium binaries version from GitHub API, using fallback version 7568"
261
+ );
262
+ "7568".to_string()
263
+ } else {
264
+ eprintln!(
265
+ "cargo:warning=Failed to fetch latest PDFium version from GitHub API (unknown repository: {})",
266
+ repo
267
+ );
268
+ String::new()
269
+ }
270
+ }
271
+
272
+ /// Extract tag_name from GitHub API JSON response
273
+ ///
274
+ /// Parses JSON by finding the tag_name field and extracting the value between quotes.
275
+ /// Handles various JSON formatting variations.
276
+ fn extract_tag_from_json(json: &str) -> Option<String> {
277
+ // Look for "tag_name": "..." pattern
278
+ if let Some(start) = json.find("\"tag_name\"") {
279
+ let after_colon = &json[start + "\"tag_name\"".len()..];
280
+
281
+ // Skip whitespace and colon
282
+ let after_colon = after_colon.trim_start();
283
+ let after_colon = after_colon.strip_prefix(':')?;
284
+ let after_colon = after_colon.trim_start();
285
+
286
+ // Extract value between quotes
287
+ if let Some(opening_quote) = after_colon.find('"') {
288
+ let value_start = opening_quote + 1;
289
+ if let Some(closing_quote) = after_colon[value_start..].find('"') {
290
+ let tag = &after_colon[value_start..value_start + closing_quote];
291
+ // Handle releases with '/' in tag (e.g., "chromium/1234")
292
+ return Some(tag.split('/').next_back().unwrap_or(tag).to_string());
249
293
  }
250
294
  }
251
295
  }
252
296
 
253
- "7529".to_string()
297
+ None
254
298
  }
255
299
 
256
300
  /// Get the download URL and library name for the target platform
@@ -8,6 +8,8 @@
8
8
  //! - `POST /extract` - Extract text from uploaded files (multipart form data)
9
9
  //! - `GET /health` - Health check endpoint
10
10
  //! - `GET /info` - Server information
11
+ //! - `GET /cache/stats` - Get cache statistics
12
+ //! - `DELETE /cache/clear` - Clear all cached files
11
13
  //!
12
14
  //! # Examples
13
15
  //!
@@ -62,6 +64,12 @@
62
64
  //!
63
65
  //! # Server info
64
66
  //! curl http://localhost:8000/info
67
+ //!
68
+ //! # Cache statistics
69
+ //! curl http://localhost:8000/cache/stats
70
+ //!
71
+ //! # Clear cache
72
+ //! curl -X DELETE http://localhost:8000/cache/clear
65
73
  //! ```
66
74
 
67
75
  mod error;