kreuzberg 4.0.0.pre.rc.18 → 4.0.0.pre.rc.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/ext/kreuzberg_rb/native/Cargo.lock +274 -13
  4. data/ext/kreuzberg_rb/native/Cargo.toml +2 -5
  5. data/ext/kreuzberg_rb/native/build.rs +3 -66
  6. data/lib/kreuzberg/config.rb +41 -6
  7. data/lib/kreuzberg/version.rb +1 -1
  8. data/sig/kreuzberg.rbs +10 -3
  9. data/spec/binding/config_spec.rb +74 -0
  10. data/spec/binding/font_config_spec.rb +220 -0
  11. data/vendor/Cargo.toml +2 -1
  12. data/vendor/kreuzberg/Cargo.toml +10 -9
  13. data/vendor/kreuzberg/src/api/handlers.rs +3 -2
  14. data/vendor/kreuzberg/src/api/server.rs +211 -36
  15. data/vendor/kreuzberg/src/api/types.rs +20 -7
  16. data/vendor/kreuzberg/src/core/batch_optimizations.rs +106 -26
  17. data/vendor/kreuzberg/src/core/config.rs +253 -22
  18. data/vendor/kreuzberg/src/core/extractor.rs +41 -3
  19. data/vendor/kreuzberg/src/core/pipeline.rs +15 -59
  20. data/vendor/kreuzberg/src/extraction/capacity.rs +270 -0
  21. data/vendor/kreuzberg/src/extraction/docx.rs +26 -17
  22. data/vendor/kreuzberg/src/extraction/excel.rs +132 -128
  23. data/vendor/kreuzberg/src/extraction/html.rs +13 -14
  24. data/vendor/kreuzberg/src/extraction/markdown.rs +4 -3
  25. data/vendor/kreuzberg/src/extraction/mod.rs +12 -0
  26. data/vendor/kreuzberg/src/extraction/pptx.rs +4 -1
  27. data/vendor/kreuzberg/src/extractors/docbook.rs +1 -1
  28. data/vendor/kreuzberg/src/extractors/email.rs +1 -1
  29. data/vendor/kreuzberg/src/extractors/excel.rs +62 -21
  30. data/vendor/kreuzberg/src/extractors/fictionbook.rs +1 -1
  31. data/vendor/kreuzberg/src/extractors/html.rs +30 -18
  32. data/vendor/kreuzberg/src/extractors/jats.rs +1 -1
  33. data/vendor/kreuzberg/src/extractors/markdown.rs +3 -3
  34. data/vendor/kreuzberg/src/extractors/orgmode.rs +2 -2
  35. data/vendor/kreuzberg/src/extractors/pdf.rs +33 -25
  36. data/vendor/kreuzberg/src/extractors/typst.rs +2 -2
  37. data/vendor/kreuzberg/src/pdf/bindings.rs +0 -37
  38. data/vendor/kreuzberg/src/pdf/metadata.rs +37 -27
  39. data/vendor/kreuzberg/src/pdf/table.rs +20 -20
  40. data/vendor/kreuzberg/src/text/quality.rs +30 -15
  41. data/vendor/kreuzberg/src/text/quality_processor.rs +3 -11
  42. data/vendor/kreuzberg/src/text/token_reduction/core.rs +75 -60
  43. data/vendor/kreuzberg/src/text/token_reduction/filters.rs +20 -8
  44. data/vendor/kreuzberg/src/types.rs +5 -4
  45. data/vendor/kreuzberg/src/utils/mod.rs +3 -0
  46. data/vendor/kreuzberg/src/utils/pool.rs +172 -15
  47. data/vendor/kreuzberg/src/utils/pool_sizing.rs +393 -0
  48. data/vendor/kreuzberg/src/utils/string_pool.rs +373 -10
  49. data/vendor/kreuzberg/tests/api_large_pdf_extraction.rs +504 -0
  50. data/vendor/kreuzberg/tests/api_large_pdf_extraction_diagnostics.rs +302 -0
  51. data/vendor/kreuzberg/tests/api_tests.rs +514 -0
  52. data/vendor/kreuzberg/tests/concurrency_stress.rs +4 -0
  53. data/vendor/kreuzberg/tests/email_integration.rs +1 -1
  54. data/vendor/kreuzberg/tests/jats_extractor_tests.rs +1 -1
  55. data/vendor/kreuzberg/tests/markdown_extractor_tests.rs +2 -2
  56. data/vendor/kreuzberg/tests/ocr_language_registry.rs +1 -0
  57. data/vendor/kreuzberg/tests/pdfium_linking.rs +4 -4
  58. data/vendor/kreuzberg/tests/pipeline_integration.rs +8 -0
  59. data/vendor/kreuzberg/tests/typst_behavioral_tests.rs +1 -1
  60. data/vendor/kreuzberg-ffi/src/batch_streaming.rs +1 -3
  61. data/vendor/kreuzberg-ffi/src/lib.rs +6 -4
  62. data/vendor/kreuzberg-ffi/src/result_pool.rs +1 -1
  63. data/vendor/kreuzberg-ffi/src/result_view.rs +8 -8
  64. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  65. data/vendor/kreuzberg-tesseract/build.rs +0 -227
  66. metadata +7 -33
  67. data/vendor/rb-sys/.cargo_vcs_info.json +0 -6
  68. data/vendor/rb-sys/Cargo.lock +0 -393
  69. data/vendor/rb-sys/Cargo.toml +0 -70
  70. data/vendor/rb-sys/Cargo.toml.orig +0 -57
  71. data/vendor/rb-sys/LICENSE-APACHE +0 -190
  72. data/vendor/rb-sys/LICENSE-MIT +0 -21
  73. data/vendor/rb-sys/build/features.rs +0 -111
  74. data/vendor/rb-sys/build/main.rs +0 -286
  75. data/vendor/rb-sys/build/stable_api_config.rs +0 -155
  76. data/vendor/rb-sys/build/version.rs +0 -50
  77. data/vendor/rb-sys/readme.md +0 -36
  78. data/vendor/rb-sys/src/bindings.rs +0 -21
  79. data/vendor/rb-sys/src/hidden.rs +0 -11
  80. data/vendor/rb-sys/src/lib.rs +0 -35
  81. data/vendor/rb-sys/src/macros.rs +0 -371
  82. data/vendor/rb-sys/src/memory.rs +0 -53
  83. data/vendor/rb-sys/src/ruby_abi_version.rs +0 -38
  84. data/vendor/rb-sys/src/special_consts.rs +0 -31
  85. data/vendor/rb-sys/src/stable_api/compiled.c +0 -179
  86. data/vendor/rb-sys/src/stable_api/compiled.rs +0 -257
  87. data/vendor/rb-sys/src/stable_api/ruby_2_7.rs +0 -324
  88. data/vendor/rb-sys/src/stable_api/ruby_3_0.rs +0 -332
  89. data/vendor/rb-sys/src/stable_api/ruby_3_1.rs +0 -325
  90. data/vendor/rb-sys/src/stable_api/ruby_3_2.rs +0 -323
  91. data/vendor/rb-sys/src/stable_api/ruby_3_3.rs +0 -339
  92. data/vendor/rb-sys/src/stable_api/ruby_3_4.rs +0 -339
  93. data/vendor/rb-sys/src/stable_api.rs +0 -260
  94. data/vendor/rb-sys/src/symbol.rs +0 -31
  95. data/vendor/rb-sys/src/tracking_allocator.rs +0 -330
  96. data/vendor/rb-sys/src/utils.rs +0 -89
  97. data/vendor/rb-sys/src/value_type.rs +0 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a1c1d7ddcc45217bf5d9ea47a4d9d3ef9f41ed5a4bd87f4ff1f2ada7cfe0bca6
4
- data.tar.gz: 167fb6c623c9e4368bcd2388e0ef4631d170d0e167cc0555c2dc7cd814bff9eb
3
+ metadata.gz: 0f5df1c1138122d449d77193b97ee6c4f40de044077765f1d68ce4f0bc6aba2a
4
+ data.tar.gz: c48abedda657f892a912cd9cca7f40167fa3257d75f98527e0bc95da4580e630
5
5
  SHA512:
6
- metadata.gz: 37661576ba03012b1549c2388e0aee4c24cdb0e05cb34164fc85ca654c20236b28829678531c50fe46260433c43907c3f03cf516753f12c78ee2026a0b14a446
7
- data.tar.gz: 96e1b3b10589fa7f47fc9609158da556e7d889a61963953c72e5f92337897828d68934d8dc0a66d7b2e28774204b6b1bf2c95226d5c26c84a98a01ce797e24df
6
+ metadata.gz: a2a0a7854003f48d69eb89cf79a3252aadba11f001edfe7ba4d03f16198b3d68394bd84589c5b379c7a4dcd4784391a2fd3b1c5ce636d8a490382a77d62fd671
7
+ data.tar.gz: f3d571515eb5598e34fdc8dd18296cd069a6fa25e7cf9017a9f3f1980a82fcebca977e9fc18e361d0f00386f72109ffe8f3e1afcf15dcbc35b5e6472b3f83853
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.0.0.pre.rc.18)
4
+ kreuzberg (4.0.0.pre.rc.19)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -68,7 +68,7 @@ GEM
68
68
  ffi (~> 1.0)
69
69
  rb_sys (0.9.123)
70
70
  rake-compiler-dock (= 1.10.0)
71
- rbs (3.9.5)
71
+ rbs (3.10.0)
72
72
  logger
73
73
  regexp_parser (2.11.3)
74
74
  rspec (3.13.2)
@@ -84,7 +84,7 @@ GEM
84
84
  diff-lcs (>= 1.2.0, < 2.0)
85
85
  rspec-support (~> 3.13.0)
86
86
  rspec-support (3.13.6)
87
- rubocop (1.82.0)
87
+ rubocop (1.82.1)
88
88
  json (~> 2.3)
89
89
  language_server-protocol (~> 3.17.0.2)
90
90
  lint_roller (~> 1.1.0)
@@ -75,6 +75,56 @@ dependencies = [
75
75
  "libc",
76
76
  ]
77
77
 
78
+ [[package]]
79
+ name = "anstream"
80
+ version = "0.6.21"
81
+ source = "registry+https://github.com/rust-lang/crates.io-index"
82
+ checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
83
+ dependencies = [
84
+ "anstyle",
85
+ "anstyle-parse",
86
+ "anstyle-query",
87
+ "anstyle-wincon",
88
+ "colorchoice",
89
+ "is_terminal_polyfill",
90
+ "utf8parse",
91
+ ]
92
+
93
+ [[package]]
94
+ name = "anstyle"
95
+ version = "1.0.13"
96
+ source = "registry+https://github.com/rust-lang/crates.io-index"
97
+ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
98
+
99
+ [[package]]
100
+ name = "anstyle-parse"
101
+ version = "0.2.7"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
104
+ dependencies = [
105
+ "utf8parse",
106
+ ]
107
+
108
+ [[package]]
109
+ name = "anstyle-query"
110
+ version = "1.1.5"
111
+ source = "registry+https://github.com/rust-lang/crates.io-index"
112
+ checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
113
+ dependencies = [
114
+ "windows-sys 0.61.2",
115
+ ]
116
+
117
+ [[package]]
118
+ name = "anstyle-wincon"
119
+ version = "3.0.11"
120
+ source = "registry+https://github.com/rust-lang/crates.io-index"
121
+ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
122
+ dependencies = [
123
+ "anstyle",
124
+ "once_cell_polyfill",
125
+ "windows-sys 0.61.2",
126
+ ]
127
+
78
128
  [[package]]
79
129
  name = "anyhow"
80
130
  version = "1.0.100"
@@ -608,6 +658,25 @@ dependencies = [
608
658
  "cipher",
609
659
  ]
610
660
 
661
+ [[package]]
662
+ name = "cbindgen"
663
+ version = "0.29.2"
664
+ source = "registry+https://github.com/rust-lang/crates.io-index"
665
+ checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799"
666
+ dependencies = [
667
+ "clap",
668
+ "heck",
669
+ "indexmap",
670
+ "log",
671
+ "proc-macro2",
672
+ "quote",
673
+ "serde",
674
+ "serde_json",
675
+ "syn",
676
+ "tempfile",
677
+ "toml 0.9.10+spec-1.1.0",
678
+ ]
679
+
611
680
  [[package]]
612
681
  name = "cc"
613
682
  version = "1.2.50"
@@ -708,6 +777,33 @@ dependencies = [
708
777
  "libloading 0.8.9",
709
778
  ]
710
779
 
780
+ [[package]]
781
+ name = "clap"
782
+ version = "4.5.53"
783
+ source = "registry+https://github.com/rust-lang/crates.io-index"
784
+ checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8"
785
+ dependencies = [
786
+ "clap_builder",
787
+ ]
788
+
789
+ [[package]]
790
+ name = "clap_builder"
791
+ version = "4.5.53"
792
+ source = "registry+https://github.com/rust-lang/crates.io-index"
793
+ checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00"
794
+ dependencies = [
795
+ "anstream",
796
+ "anstyle",
797
+ "clap_lex",
798
+ "strsim",
799
+ ]
800
+
801
+ [[package]]
802
+ name = "clap_lex"
803
+ version = "0.7.6"
804
+ source = "registry+https://github.com/rust-lang/crates.io-index"
805
+ checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
806
+
711
807
  [[package]]
712
808
  name = "cmake"
713
809
  version = "0.1.57"
@@ -732,6 +828,12 @@ version = "1.1.0"
732
828
  source = "registry+https://github.com/rust-lang/crates.io-index"
733
829
  checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
734
830
 
831
+ [[package]]
832
+ name = "colorchoice"
833
+ version = "1.0.4"
834
+ source = "registry+https://github.com/rust-lang/crates.io-index"
835
+ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
836
+
735
837
  [[package]]
736
838
  name = "compact_str"
737
839
  version = "0.9.0"
@@ -1104,13 +1206,34 @@ dependencies = [
1104
1206
  "subtle",
1105
1207
  ]
1106
1208
 
1209
+ [[package]]
1210
+ name = "dirs"
1211
+ version = "5.0.1"
1212
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1213
+ checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
1214
+ dependencies = [
1215
+ "dirs-sys 0.4.1",
1216
+ ]
1217
+
1107
1218
  [[package]]
1108
1219
  name = "dirs"
1109
1220
  version = "6.0.0"
1110
1221
  source = "registry+https://github.com/rust-lang/crates.io-index"
1111
1222
  checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
1112
1223
  dependencies = [
1113
- "dirs-sys",
1224
+ "dirs-sys 0.5.0",
1225
+ ]
1226
+
1227
+ [[package]]
1228
+ name = "dirs-sys"
1229
+ version = "0.4.1"
1230
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1231
+ checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
1232
+ dependencies = [
1233
+ "libc",
1234
+ "option-ext",
1235
+ "redox_users 0.4.6",
1236
+ "windows-sys 0.48.0",
1114
1237
  ]
1115
1238
 
1116
1239
  [[package]]
@@ -1121,7 +1244,7 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
1121
1244
  dependencies = [
1122
1245
  "libc",
1123
1246
  "option-ext",
1124
- "redox_users",
1247
+ "redox_users 0.5.2",
1125
1248
  "windows-sys 0.61.2",
1126
1249
  ]
1127
1250
 
@@ -1739,7 +1862,7 @@ version = "0.4.3"
1739
1862
  source = "registry+https://github.com/rust-lang/crates.io-index"
1740
1863
  checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
1741
1864
  dependencies = [
1742
- "dirs",
1865
+ "dirs 6.0.0",
1743
1866
  "http",
1744
1867
  "indicatif",
1745
1868
  "libc",
@@ -1782,15 +1905,16 @@ dependencies = [
1782
1905
 
1783
1906
  [[package]]
1784
1907
  name = "html-to-markdown-rs"
1785
- version = "2.15.0"
1908
+ version = "2.16.1"
1786
1909
  source = "registry+https://github.com/rust-lang/crates.io-index"
1787
- checksum = "7741e7928e84f3f3497c84b8dd27e9fcc3368bd133e44ca800715eb34a1d58c8"
1910
+ checksum = "eda029e154a976514850a89a56a1f07f03fb0611e0e8fc2357fd4ec739d63acc"
1788
1911
  dependencies = [
1789
1912
  "astral-tl",
1790
1913
  "base64 0.22.1",
1791
1914
  "html-escape",
1792
1915
  "html5ever",
1793
1916
  "image",
1917
+ "lru",
1794
1918
  "markup5ever_rcdom",
1795
1919
  "once_cell",
1796
1920
  "regex",
@@ -2222,6 +2346,12 @@ dependencies = [
2222
2346
  "serde",
2223
2347
  ]
2224
2348
 
2349
+ [[package]]
2350
+ name = "is_terminal_polyfill"
2351
+ version = "1.70.2"
2352
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2353
+ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
2354
+
2225
2355
  [[package]]
2226
2356
  name = "itertools"
2227
2357
  version = "0.12.1"
@@ -2318,7 +2448,7 @@ dependencies = [
2318
2448
 
2319
2449
  [[package]]
2320
2450
  name = "kreuzberg"
2321
- version = "4.0.0-rc.17"
2451
+ version = "4.0.0-rc.18"
2322
2452
  dependencies = [
2323
2453
  "ahash",
2324
2454
  "async-trait",
@@ -2330,6 +2460,7 @@ dependencies = [
2330
2460
  "calamine",
2331
2461
  "chardetng",
2332
2462
  "dashmap",
2463
+ "dirs 5.0.1",
2333
2464
  "docx-lite",
2334
2465
  "encoding_rs",
2335
2466
  "fast_image_resize",
@@ -2355,6 +2486,7 @@ dependencies = [
2355
2486
  "opentelemetry",
2356
2487
  "opentelemetry_sdk",
2357
2488
  "org",
2489
+ "parking_lot",
2358
2490
  "pastey 0.2.1",
2359
2491
  "pdfium-render",
2360
2492
  "pkg-config",
@@ -2393,13 +2525,27 @@ dependencies = [
2393
2525
  "zip 7.0.0",
2394
2526
  ]
2395
2527
 
2528
+ [[package]]
2529
+ name = "kreuzberg-ffi"
2530
+ version = "4.0.0-rc.18"
2531
+ dependencies = [
2532
+ "async-trait",
2533
+ "cbindgen",
2534
+ "html-to-markdown-rs",
2535
+ "kreuzberg",
2536
+ "serde",
2537
+ "serde_json",
2538
+ "tokio",
2539
+ ]
2540
+
2396
2541
  [[package]]
2397
2542
  name = "kreuzberg-rb"
2398
- version = "4.0.0-rc.17"
2543
+ version = "4.0.0-rc.19"
2399
2544
  dependencies = [
2400
2545
  "async-trait",
2401
2546
  "html-to-markdown-rs",
2402
2547
  "kreuzberg",
2548
+ "kreuzberg-ffi",
2403
2549
  "magnus",
2404
2550
  "pretty_assertions",
2405
2551
  "rb-sys",
@@ -2409,7 +2555,9 @@ dependencies = [
2409
2555
 
2410
2556
  [[package]]
2411
2557
  name = "kreuzberg-tesseract"
2412
- version = "4.0.0-rc.17"
2558
+ version = "4.0.0-rc.18"
2559
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2560
+ checksum = "477791cd4bba39222e187ae6e235643e034a87c96f2c6fb5796667020560adba"
2413
2561
  dependencies = [
2414
2562
  "cc",
2415
2563
  "cmake",
@@ -2594,6 +2742,15 @@ dependencies = [
2594
2742
  "weezl",
2595
2743
  ]
2596
2744
 
2745
+ [[package]]
2746
+ name = "lru"
2747
+ version = "0.16.2"
2748
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2749
+ checksum = "96051b46fc183dc9cd4a223960ef37b9af631b55191852a8274bfef064cda20f"
2750
+ dependencies = [
2751
+ "hashbrown 0.16.1",
2752
+ ]
2753
+
2597
2754
  [[package]]
2598
2755
  name = "lru-slab"
2599
2756
  version = "0.1.2"
@@ -3092,6 +3249,12 @@ version = "1.21.3"
3092
3249
  source = "registry+https://github.com/rust-lang/crates.io-index"
3093
3250
  checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
3094
3251
 
3252
+ [[package]]
3253
+ name = "once_cell_polyfill"
3254
+ version = "1.70.2"
3255
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3256
+ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
3257
+
3095
3258
  [[package]]
3096
3259
  name = "onig"
3097
3260
  version = "6.5.1"
@@ -4325,16 +4488,18 @@ dependencies = [
4325
4488
 
4326
4489
  [[package]]
4327
4490
  name = "rb-sys"
4328
- version = "0.9.119"
4491
+ version = "0.9.123"
4492
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4493
+ checksum = "45fb1a185af97ee456f1c9e56dbe6e2e662bec4fdeaf83c4c28e0e6adfb18816"
4329
4494
  dependencies = [
4330
4495
  "rb-sys-build",
4331
4496
  ]
4332
4497
 
4333
4498
  [[package]]
4334
4499
  name = "rb-sys-build"
4335
- version = "0.9.119"
4500
+ version = "0.9.123"
4336
4501
  source = "registry+https://github.com/rust-lang/crates.io-index"
4337
- checksum = "2e0109499e06c85f56df4abad7d9c642ea8a2dd821d1d7132b4d1b69534677f3"
4502
+ checksum = "a58ebd02d7a6033e6a5f6f8d150c1e9f16506039092b84a73e6bedce6d3adf41"
4338
4503
  dependencies = [
4339
4504
  "bindgen",
4340
4505
  "lazy_static",
@@ -4380,6 +4545,17 @@ dependencies = [
4380
4545
  "bitflags",
4381
4546
  ]
4382
4547
 
4548
+ [[package]]
4549
+ name = "redox_users"
4550
+ version = "0.4.6"
4551
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4552
+ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
4553
+ dependencies = [
4554
+ "getrandom 0.2.16",
4555
+ "libredox",
4556
+ "thiserror 1.0.69",
4557
+ ]
4558
+
4383
4559
  [[package]]
4384
4560
  name = "redox_users"
4385
4561
  version = "0.5.2"
@@ -4442,9 +4618,9 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
4442
4618
 
4443
4619
  [[package]]
4444
4620
  name = "reqwest"
4445
- version = "0.12.26"
4621
+ version = "0.12.28"
4446
4622
  source = "registry+https://github.com/rust-lang/crates.io-index"
4447
- checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f"
4623
+ checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
4448
4624
  dependencies = [
4449
4625
  "base64 0.22.1",
4450
4626
  "bytes",
@@ -5282,6 +5458,19 @@ dependencies = [
5282
5458
  "xattr",
5283
5459
  ]
5284
5460
 
5461
+ [[package]]
5462
+ name = "tempfile"
5463
+ version = "3.23.0"
5464
+ source = "registry+https://github.com/rust-lang/crates.io-index"
5465
+ checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
5466
+ dependencies = [
5467
+ "fastrand",
5468
+ "getrandom 0.3.4",
5469
+ "once_cell",
5470
+ "rustix",
5471
+ "windows-sys 0.61.2",
5472
+ ]
5473
+
5285
5474
  [[package]]
5286
5475
  name = "tendril"
5287
5476
  version = "0.4.3"
@@ -5990,6 +6179,12 @@ version = "1.0.4"
5990
6179
  source = "registry+https://github.com/rust-lang/crates.io-index"
5991
6180
  checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
5992
6181
 
6182
+ [[package]]
6183
+ name = "utf8parse"
6184
+ version = "0.2.2"
6185
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6186
+ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
6187
+
5993
6188
  [[package]]
5994
6189
  name = "uuid"
5995
6190
  version = "1.19.0"
@@ -6306,6 +6501,15 @@ dependencies = [
6306
6501
  "windows-link",
6307
6502
  ]
6308
6503
 
6504
+ [[package]]
6505
+ name = "windows-sys"
6506
+ version = "0.48.0"
6507
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6508
+ checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
6509
+ dependencies = [
6510
+ "windows-targets 0.48.5",
6511
+ ]
6512
+
6309
6513
  [[package]]
6310
6514
  name = "windows-sys"
6311
6515
  version = "0.52.0"
@@ -6342,6 +6546,21 @@ dependencies = [
6342
6546
  "windows-link",
6343
6547
  ]
6344
6548
 
6549
+ [[package]]
6550
+ name = "windows-targets"
6551
+ version = "0.48.5"
6552
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6553
+ checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
6554
+ dependencies = [
6555
+ "windows_aarch64_gnullvm 0.48.5",
6556
+ "windows_aarch64_msvc 0.48.5",
6557
+ "windows_i686_gnu 0.48.5",
6558
+ "windows_i686_msvc 0.48.5",
6559
+ "windows_x86_64_gnu 0.48.5",
6560
+ "windows_x86_64_gnullvm 0.48.5",
6561
+ "windows_x86_64_msvc 0.48.5",
6562
+ ]
6563
+
6345
6564
  [[package]]
6346
6565
  name = "windows-targets"
6347
6566
  version = "0.52.6"
@@ -6375,6 +6594,12 @@ dependencies = [
6375
6594
  "windows_x86_64_msvc 0.53.1",
6376
6595
  ]
6377
6596
 
6597
+ [[package]]
6598
+ name = "windows_aarch64_gnullvm"
6599
+ version = "0.48.5"
6600
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6601
+ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
6602
+
6378
6603
  [[package]]
6379
6604
  name = "windows_aarch64_gnullvm"
6380
6605
  version = "0.52.6"
@@ -6387,6 +6612,12 @@ version = "0.53.1"
6387
6612
  source = "registry+https://github.com/rust-lang/crates.io-index"
6388
6613
  checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
6389
6614
 
6615
+ [[package]]
6616
+ name = "windows_aarch64_msvc"
6617
+ version = "0.48.5"
6618
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6619
+ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
6620
+
6390
6621
  [[package]]
6391
6622
  name = "windows_aarch64_msvc"
6392
6623
  version = "0.52.6"
@@ -6399,6 +6630,12 @@ version = "0.53.1"
6399
6630
  source = "registry+https://github.com/rust-lang/crates.io-index"
6400
6631
  checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
6401
6632
 
6633
+ [[package]]
6634
+ name = "windows_i686_gnu"
6635
+ version = "0.48.5"
6636
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6637
+ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
6638
+
6402
6639
  [[package]]
6403
6640
  name = "windows_i686_gnu"
6404
6641
  version = "0.52.6"
@@ -6423,6 +6660,12 @@ version = "0.53.1"
6423
6660
  source = "registry+https://github.com/rust-lang/crates.io-index"
6424
6661
  checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
6425
6662
 
6663
+ [[package]]
6664
+ name = "windows_i686_msvc"
6665
+ version = "0.48.5"
6666
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6667
+ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
6668
+
6426
6669
  [[package]]
6427
6670
  name = "windows_i686_msvc"
6428
6671
  version = "0.52.6"
@@ -6435,6 +6678,12 @@ version = "0.53.1"
6435
6678
  source = "registry+https://github.com/rust-lang/crates.io-index"
6436
6679
  checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
6437
6680
 
6681
+ [[package]]
6682
+ name = "windows_x86_64_gnu"
6683
+ version = "0.48.5"
6684
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6685
+ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
6686
+
6438
6687
  [[package]]
6439
6688
  name = "windows_x86_64_gnu"
6440
6689
  version = "0.52.6"
@@ -6447,6 +6696,12 @@ version = "0.53.1"
6447
6696
  source = "registry+https://github.com/rust-lang/crates.io-index"
6448
6697
  checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
6449
6698
 
6699
+ [[package]]
6700
+ name = "windows_x86_64_gnullvm"
6701
+ version = "0.48.5"
6702
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6703
+ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
6704
+
6450
6705
  [[package]]
6451
6706
  name = "windows_x86_64_gnullvm"
6452
6707
  version = "0.52.6"
@@ -6459,6 +6714,12 @@ version = "0.53.1"
6459
6714
  source = "registry+https://github.com/rust-lang/crates.io-index"
6460
6715
  checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
6461
6716
 
6717
+ [[package]]
6718
+ name = "windows_x86_64_msvc"
6719
+ version = "0.48.5"
6720
+ source = "registry+https://github.com/rust-lang/crates.io-index"
6721
+ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
6722
+
6462
6723
  [[package]]
6463
6724
  name = "windows_x86_64_msvc"
6464
6725
  version = "0.52.6"
@@ -1,13 +1,9 @@
1
1
  # This crate is excluded from the workspace to use a vendored kreuzberg crate for gem packaging
2
2
  [workspace]
3
3
 
4
- [patch.crates-io]
5
- # Patch rb-sys to fix Windows i32/i64 type mismatch in tracking_allocator.rs
6
- rb-sys = { path = "../../../vendor/rb-sys" }
7
-
8
4
  [package]
9
5
  name = "kreuzberg-rb"
10
- version = "4.0.0-rc.18"
6
+ version = "4.0.0-rc.19"
11
7
  edition = "2024"
12
8
  rust-version = "1.91"
13
9
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
@@ -30,6 +26,7 @@ default = []
30
26
  [dependencies]
31
27
  async-trait = "0.1.89"
32
28
  kreuzberg = { path = "../../../vendor/kreuzberg", features = ["full"] }
29
+ kreuzberg-ffi = { path = "../../../vendor/kreuzberg-ffi" }
33
30
  magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = [
34
31
  "rb-sys",
35
32
  ] }
@@ -1,71 +1,7 @@
1
- use std::env;
2
- use std::path::PathBuf;
3
-
4
1
  fn main() {
5
- let target = env::var("TARGET").unwrap();
6
- let profile = env::var("PROFILE").unwrap_or_else(|_| "release".to_string());
7
-
8
- // Try to locate kreuzberg-ffi library built alongside this crate
9
- let cargo_manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
10
- let manifest_path = PathBuf::from(&cargo_manifest_dir);
11
-
12
- // Prefer host target layout, but include target-triple layout for cross builds.
13
- // IMPORTANT: Only search lib directories, NOT deps directories.
14
- // The deps/ directories may contain dylibs with hardcoded install_name paths,
15
- // which causes load errors on macOS when users install the gem.
16
- if let Some(packages_root) = manifest_path
17
- .parent()
18
- .and_then(|p| p.parent())
19
- .and_then(|p| p.parent())
20
- .and_then(|p| p.parent())
21
- .and_then(|p| p.parent())
22
- {
23
- let host_lib_dir = packages_root.join("target").join(&profile);
24
- let target_lib_dir = packages_root.join("target").join(&target).join(&profile);
25
-
26
- // Try to find the static library and link it directly on Unix-like systems
27
- // to avoid the linker preferring dylib over static lib.
28
- if !target.contains("windows") {
29
- let static_lib_name = if target.contains("windows") {
30
- "kreuzberg_ffi.lib"
31
- } else {
32
- "libkreuzberg_ffi.a"
33
- };
34
-
35
- // Check both host and target lib directories for the static library
36
- for lib_dir in [&host_lib_dir, &target_lib_dir] {
37
- let static_lib = lib_dir.join(static_lib_name);
38
- if static_lib.exists() {
39
- // Found static library, link it directly by passing the full path
40
- println!("cargo:rustc-link-arg={}", static_lib.display());
41
- // Don't add the library search path or -l flag
42
- // Jump to platform-specific configuration
43
- if target.contains("darwin") {
44
- println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
45
- println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
46
- } else if target.contains("linux") {
47
- println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
48
- }
49
- println!("cargo:rerun-if-changed=build.rs");
50
- return;
51
- }
52
- }
53
- }
54
-
55
- // Fallback: Add search paths and use standard linking
56
- for dir in [host_lib_dir, target_lib_dir] {
57
- println!("cargo:rustc-link-search=native={}", dir.display());
58
- }
59
- }
60
-
61
- // Link the kreuzberg-ffi library
62
- // When kreuzberg-ffi is built, its symbols become available for linking
63
- if target.contains("windows") {
64
- println!("cargo:rustc-link-lib=dylib=kreuzberg_ffi");
65
- } else {
66
- println!("cargo:rustc-link-lib=static=kreuzberg_ffi");
67
- }
2
+ let target = std::env::var("TARGET").unwrap();
68
3
 
4
+ // Configure platform-specific linker settings
69
5
  if target.contains("darwin") {
70
6
  println!("cargo:rustc-link-arg=-Wl,-undefined,dynamic_lookup");
71
7
  println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
@@ -73,5 +9,6 @@ fn main() {
73
9
  println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
74
10
  }
75
11
 
12
+ // kreuzberg-ffi is a cargo dependency that will be linked via Cargo's build system
76
13
  println!("cargo:rerun-if-changed=build.rs");
77
14
  }