red-candle 1.7.0 → 1.8.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +260 -60
- data/ext/candle/Cargo.toml +1 -1
- data/ext/candle/src/gvl.rs +58 -0
- data/ext/candle/src/lib.rs +1 -0
- data/ext/candle/src/ruby/embedding_model.rs +16 -14
- data/ext/candle/src/ruby/llm.rs +9 -3
- data/ext/candle/src/ruby/ner.rs +8 -4
- data/ext/candle/src/ruby/reranker.rs +89 -85
- data/lib/candle/version.rb +1 -1
- data/lib/candle.rb +15 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3b4ad17010839981938eb7c963ce84b02aee0779412da77ca8f3dca5f3ff9ec8
|
|
4
|
+
data.tar.gz: 296b45a80b1cf14c4f2a9d20a0984f06da7433f6dbb92290c7e0e10e194481ad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7d154e4a2fe3b4afddf59908c270820ae12e26d28934eff2a7cd628a7a66c153c1e02427f4939d891c27ceb6250ef26bc51f97f6862e9d7e8c4531a354b7dea4
|
|
7
|
+
data.tar.gz: f1bb5d35dac744b44ca9879334116436c80f495e722b8edfb68bd90022df3e32840b040bb1504d7acc0a93b70b31a27781ae098b4d2520184a9cae2ecdde899a
|
data/Cargo.lock
CHANGED
|
@@ -174,9 +174,9 @@ dependencies = [
|
|
|
174
174
|
|
|
175
175
|
[[package]]
|
|
176
176
|
name = "aws-lc-rs"
|
|
177
|
-
version = "1.
|
|
177
|
+
version = "1.17.0"
|
|
178
178
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
179
|
-
checksum = "
|
|
179
|
+
checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00"
|
|
180
180
|
dependencies = [
|
|
181
181
|
"aws-lc-sys",
|
|
182
182
|
"zeroize",
|
|
@@ -184,11 +184,10 @@ dependencies = [
|
|
|
184
184
|
|
|
185
185
|
[[package]]
|
|
186
186
|
name = "aws-lc-sys"
|
|
187
|
-
version = "0.
|
|
187
|
+
version = "0.41.0"
|
|
188
188
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
189
|
-
checksum = "
|
|
189
|
+
checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4"
|
|
190
190
|
dependencies = [
|
|
191
|
-
"bindgen 0.72.1",
|
|
192
191
|
"cc",
|
|
193
192
|
"cmake",
|
|
194
193
|
"dunce",
|
|
@@ -268,26 +267,6 @@ dependencies = [
|
|
|
268
267
|
"syn",
|
|
269
268
|
]
|
|
270
269
|
|
|
271
|
-
[[package]]
|
|
272
|
-
name = "bindgen"
|
|
273
|
-
version = "0.72.1"
|
|
274
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
275
|
-
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
|
276
|
-
dependencies = [
|
|
277
|
-
"bitflags 2.9.4",
|
|
278
|
-
"cexpr",
|
|
279
|
-
"clang-sys",
|
|
280
|
-
"itertools 0.13.0",
|
|
281
|
-
"log",
|
|
282
|
-
"prettyplease",
|
|
283
|
-
"proc-macro2",
|
|
284
|
-
"quote",
|
|
285
|
-
"regex",
|
|
286
|
-
"rustc-hash 2.1.1",
|
|
287
|
-
"shlex",
|
|
288
|
-
"syn",
|
|
289
|
-
]
|
|
290
|
-
|
|
291
270
|
[[package]]
|
|
292
271
|
name = "bindgen_cuda"
|
|
293
272
|
version = "0.1.6"
|
|
@@ -427,7 +406,7 @@ dependencies = [
|
|
|
427
406
|
"image",
|
|
428
407
|
"magnus",
|
|
429
408
|
"outlines-core",
|
|
430
|
-
"rand 0.
|
|
409
|
+
"rand 0.10.1",
|
|
431
410
|
"safetensors 0.3.3",
|
|
432
411
|
"serde",
|
|
433
412
|
"serde_json",
|
|
@@ -458,7 +437,7 @@ dependencies = [
|
|
|
458
437
|
"num_cpus",
|
|
459
438
|
"objc2-foundation",
|
|
460
439
|
"objc2-metal",
|
|
461
|
-
"rand 0.9.
|
|
440
|
+
"rand 0.9.4",
|
|
462
441
|
"rand_distr",
|
|
463
442
|
"rayon",
|
|
464
443
|
"safetensors 0.7.0",
|
|
@@ -520,7 +499,7 @@ dependencies = [
|
|
|
520
499
|
"candle-nn",
|
|
521
500
|
"fancy-regex 0.17.0",
|
|
522
501
|
"num-traits",
|
|
523
|
-
"rand 0.9.
|
|
502
|
+
"rand 0.9.4",
|
|
524
503
|
"rayon",
|
|
525
504
|
"serde",
|
|
526
505
|
"serde_json",
|
|
@@ -581,6 +560,17 @@ version = "0.2.1"
|
|
|
581
560
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
582
561
|
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
|
|
583
562
|
|
|
563
|
+
[[package]]
|
|
564
|
+
name = "chacha20"
|
|
565
|
+
version = "0.10.0"
|
|
566
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
567
|
+
checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601"
|
|
568
|
+
dependencies = [
|
|
569
|
+
"cfg-if",
|
|
570
|
+
"cpufeatures 0.3.0",
|
|
571
|
+
"rand_core 0.10.1",
|
|
572
|
+
]
|
|
573
|
+
|
|
584
574
|
[[package]]
|
|
585
575
|
name = "chrono"
|
|
586
576
|
version = "0.4.42"
|
|
@@ -693,6 +683,15 @@ dependencies = [
|
|
|
693
683
|
"libc",
|
|
694
684
|
]
|
|
695
685
|
|
|
686
|
+
[[package]]
|
|
687
|
+
name = "cpufeatures"
|
|
688
|
+
version = "0.3.0"
|
|
689
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
690
|
+
checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
|
|
691
|
+
dependencies = [
|
|
692
|
+
"libc",
|
|
693
|
+
]
|
|
694
|
+
|
|
696
695
|
[[package]]
|
|
697
696
|
name = "crc32fast"
|
|
698
697
|
version = "1.5.0"
|
|
@@ -981,7 +980,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
981
980
|
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
|
982
981
|
dependencies = [
|
|
983
982
|
"libc",
|
|
984
|
-
"windows-sys 0.
|
|
983
|
+
"windows-sys 0.52.0",
|
|
985
984
|
]
|
|
986
985
|
|
|
987
986
|
[[package]]
|
|
@@ -1102,7 +1101,7 @@ dependencies = [
|
|
|
1102
1101
|
"cudarc 0.19.3",
|
|
1103
1102
|
"half",
|
|
1104
1103
|
"num-traits",
|
|
1105
|
-
"rand 0.9.
|
|
1104
|
+
"rand 0.9.4",
|
|
1106
1105
|
"rand_distr",
|
|
1107
1106
|
]
|
|
1108
1107
|
|
|
@@ -1121,6 +1120,12 @@ version = "1.0.7"
|
|
|
1121
1120
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1122
1121
|
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
|
1123
1122
|
|
|
1123
|
+
[[package]]
|
|
1124
|
+
name = "foldhash"
|
|
1125
|
+
version = "0.1.5"
|
|
1126
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1127
|
+
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
|
1128
|
+
|
|
1124
1129
|
[[package]]
|
|
1125
1130
|
name = "foldhash"
|
|
1126
1131
|
version = "0.2.0"
|
|
@@ -1543,11 +1548,25 @@ dependencies = [
|
|
|
1543
1548
|
"cfg-if",
|
|
1544
1549
|
"js-sys",
|
|
1545
1550
|
"libc",
|
|
1546
|
-
"r-efi",
|
|
1551
|
+
"r-efi 5.3.0",
|
|
1547
1552
|
"wasi 0.14.5+wasi-0.2.4",
|
|
1548
1553
|
"wasm-bindgen",
|
|
1549
1554
|
]
|
|
1550
1555
|
|
|
1556
|
+
[[package]]
|
|
1557
|
+
name = "getrandom"
|
|
1558
|
+
version = "0.4.2"
|
|
1559
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1560
|
+
checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
|
|
1561
|
+
dependencies = [
|
|
1562
|
+
"cfg-if",
|
|
1563
|
+
"libc",
|
|
1564
|
+
"r-efi 6.0.0",
|
|
1565
|
+
"rand_core 0.10.1",
|
|
1566
|
+
"wasip2",
|
|
1567
|
+
"wasip3",
|
|
1568
|
+
]
|
|
1569
|
+
|
|
1551
1570
|
[[package]]
|
|
1552
1571
|
name = "getset"
|
|
1553
1572
|
version = "0.1.6"
|
|
@@ -1611,7 +1630,7 @@ dependencies = [
|
|
|
1611
1630
|
"cfg-if",
|
|
1612
1631
|
"crunchy",
|
|
1613
1632
|
"num-traits",
|
|
1614
|
-
"rand 0.9.
|
|
1633
|
+
"rand 0.9.4",
|
|
1615
1634
|
"rand_distr",
|
|
1616
1635
|
]
|
|
1617
1636
|
|
|
@@ -1620,6 +1639,9 @@ name = "hashbrown"
|
|
|
1620
1639
|
version = "0.15.5"
|
|
1621
1640
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1622
1641
|
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
|
1642
|
+
dependencies = [
|
|
1643
|
+
"foldhash 0.1.5",
|
|
1644
|
+
]
|
|
1623
1645
|
|
|
1624
1646
|
[[package]]
|
|
1625
1647
|
name = "hashbrown"
|
|
@@ -1629,7 +1651,7 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
|
|
1629
1651
|
dependencies = [
|
|
1630
1652
|
"allocator-api2",
|
|
1631
1653
|
"equivalent",
|
|
1632
|
-
"foldhash",
|
|
1654
|
+
"foldhash 0.2.0",
|
|
1633
1655
|
"serde",
|
|
1634
1656
|
"serde_core",
|
|
1635
1657
|
]
|
|
@@ -1659,7 +1681,7 @@ dependencies = [
|
|
|
1659
1681
|
"log",
|
|
1660
1682
|
"native-tls",
|
|
1661
1683
|
"num_cpus",
|
|
1662
|
-
"rand 0.8.
|
|
1684
|
+
"rand 0.8.6",
|
|
1663
1685
|
"reqwest",
|
|
1664
1686
|
"rustls",
|
|
1665
1687
|
"serde",
|
|
@@ -1900,6 +1922,12 @@ dependencies = [
|
|
|
1900
1922
|
"zerovec",
|
|
1901
1923
|
]
|
|
1902
1924
|
|
|
1925
|
+
[[package]]
|
|
1926
|
+
name = "id-arena"
|
|
1927
|
+
version = "2.3.0"
|
|
1928
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1929
|
+
checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
|
|
1930
|
+
|
|
1903
1931
|
[[package]]
|
|
1904
1932
|
name = "ident_case"
|
|
1905
1933
|
version = "1.0.1"
|
|
@@ -1975,6 +2003,7 @@ checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921"
|
|
|
1975
2003
|
dependencies = [
|
|
1976
2004
|
"equivalent",
|
|
1977
2005
|
"hashbrown 0.15.5",
|
|
2006
|
+
"serde",
|
|
1978
2007
|
]
|
|
1979
2008
|
|
|
1980
2009
|
[[package]]
|
|
@@ -2115,6 +2144,12 @@ version = "1.3.0"
|
|
|
2115
2144
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2116
2145
|
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
2117
2146
|
|
|
2147
|
+
[[package]]
|
|
2148
|
+
name = "leb128fmt"
|
|
2149
|
+
version = "0.1.0"
|
|
2150
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2151
|
+
checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
|
2152
|
+
|
|
2118
2153
|
[[package]]
|
|
2119
2154
|
name = "lebe"
|
|
2120
2155
|
version = "0.5.3"
|
|
@@ -2144,7 +2179,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
2144
2179
|
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
|
|
2145
2180
|
dependencies = [
|
|
2146
2181
|
"cfg-if",
|
|
2147
|
-
"windows-targets 0.
|
|
2182
|
+
"windows-targets 0.52.6",
|
|
2148
2183
|
]
|
|
2149
2184
|
|
|
2150
2185
|
[[package]]
|
|
@@ -2653,15 +2688,14 @@ dependencies = [
|
|
|
2653
2688
|
|
|
2654
2689
|
[[package]]
|
|
2655
2690
|
name = "openssl"
|
|
2656
|
-
version = "0.10.
|
|
2691
|
+
version = "0.10.80"
|
|
2657
2692
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2658
|
-
checksum = "
|
|
2693
|
+
checksum = "a45fa2aa886c42762255da344f0a0d313e254066c46aad76f300c3d3da62d967"
|
|
2659
2694
|
dependencies = [
|
|
2660
2695
|
"bitflags 2.9.4",
|
|
2661
2696
|
"cfg-if",
|
|
2662
2697
|
"foreign-types 0.3.2",
|
|
2663
2698
|
"libc",
|
|
2664
|
-
"once_cell",
|
|
2665
2699
|
"openssl-macros",
|
|
2666
2700
|
"openssl-sys",
|
|
2667
2701
|
]
|
|
@@ -2685,9 +2719,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
|
|
|
2685
2719
|
|
|
2686
2720
|
[[package]]
|
|
2687
2721
|
name = "openssl-sys"
|
|
2688
|
-
version = "0.9.
|
|
2722
|
+
version = "0.9.116"
|
|
2689
2723
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2690
|
-
checksum = "
|
|
2724
|
+
checksum = "f28a22dc7140cda5f096e5e7724a6962ca81a7f8bfd2979f9b18c11af56318c4"
|
|
2691
2725
|
dependencies = [
|
|
2692
2726
|
"cc",
|
|
2693
2727
|
"libc",
|
|
@@ -2939,7 +2973,7 @@ dependencies = [
|
|
|
2939
2973
|
"bytes",
|
|
2940
2974
|
"getrandom 0.3.3",
|
|
2941
2975
|
"lru-slab",
|
|
2942
|
-
"rand 0.9.
|
|
2976
|
+
"rand 0.9.4",
|
|
2943
2977
|
"ring",
|
|
2944
2978
|
"rustc-hash 2.1.1",
|
|
2945
2979
|
"rustls",
|
|
@@ -2962,7 +2996,7 @@ dependencies = [
|
|
|
2962
2996
|
"once_cell",
|
|
2963
2997
|
"socket2",
|
|
2964
2998
|
"tracing",
|
|
2965
|
-
"windows-sys 0.
|
|
2999
|
+
"windows-sys 0.52.0",
|
|
2966
3000
|
]
|
|
2967
3001
|
|
|
2968
3002
|
[[package]]
|
|
@@ -2980,11 +3014,17 @@ version = "5.3.0"
|
|
|
2980
3014
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2981
3015
|
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
|
|
2982
3016
|
|
|
3017
|
+
[[package]]
|
|
3018
|
+
name = "r-efi"
|
|
3019
|
+
version = "6.0.0"
|
|
3020
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3021
|
+
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
|
|
3022
|
+
|
|
2983
3023
|
[[package]]
|
|
2984
3024
|
name = "rand"
|
|
2985
|
-
version = "0.8.
|
|
3025
|
+
version = "0.8.6"
|
|
2986
3026
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2987
|
-
checksum = "
|
|
3027
|
+
checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
|
|
2988
3028
|
dependencies = [
|
|
2989
3029
|
"libc",
|
|
2990
3030
|
"rand_chacha 0.3.1",
|
|
@@ -2993,14 +3033,25 @@ dependencies = [
|
|
|
2993
3033
|
|
|
2994
3034
|
[[package]]
|
|
2995
3035
|
name = "rand"
|
|
2996
|
-
version = "0.9.
|
|
3036
|
+
version = "0.9.4"
|
|
2997
3037
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2998
|
-
checksum = "
|
|
3038
|
+
checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
|
|
2999
3039
|
dependencies = [
|
|
3000
3040
|
"rand_chacha 0.9.0",
|
|
3001
3041
|
"rand_core 0.9.3",
|
|
3002
3042
|
]
|
|
3003
3043
|
|
|
3044
|
+
[[package]]
|
|
3045
|
+
name = "rand"
|
|
3046
|
+
version = "0.10.1"
|
|
3047
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3048
|
+
checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
|
|
3049
|
+
dependencies = [
|
|
3050
|
+
"chacha20",
|
|
3051
|
+
"getrandom 0.4.2",
|
|
3052
|
+
"rand_core 0.10.1",
|
|
3053
|
+
]
|
|
3054
|
+
|
|
3004
3055
|
[[package]]
|
|
3005
3056
|
name = "rand_chacha"
|
|
3006
3057
|
version = "0.3.1"
|
|
@@ -3039,6 +3090,12 @@ dependencies = [
|
|
|
3039
3090
|
"getrandom 0.3.3",
|
|
3040
3091
|
]
|
|
3041
3092
|
|
|
3093
|
+
[[package]]
|
|
3094
|
+
name = "rand_core"
|
|
3095
|
+
version = "0.10.1"
|
|
3096
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3097
|
+
checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
|
|
3098
|
+
|
|
3042
3099
|
[[package]]
|
|
3043
3100
|
name = "rand_distr"
|
|
3044
3101
|
version = "0.5.1"
|
|
@@ -3046,7 +3103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
3046
3103
|
checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
|
|
3047
3104
|
dependencies = [
|
|
3048
3105
|
"num-traits",
|
|
3049
|
-
"rand 0.9.
|
|
3106
|
+
"rand 0.9.4",
|
|
3050
3107
|
]
|
|
3051
3108
|
|
|
3052
3109
|
[[package]]
|
|
@@ -3076,7 +3133,7 @@ dependencies = [
|
|
|
3076
3133
|
"num-traits",
|
|
3077
3134
|
"paste",
|
|
3078
3135
|
"profiling",
|
|
3079
|
-
"rand 0.9.
|
|
3136
|
+
"rand 0.9.4",
|
|
3080
3137
|
"rand_chacha 0.9.0",
|
|
3081
3138
|
"simd_helpers",
|
|
3082
3139
|
"thiserror 2.0.16",
|
|
@@ -3165,7 +3222,7 @@ version = "0.9.124"
|
|
|
3165
3222
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3166
3223
|
checksum = "568068db4102230882e6d4ae8de6632e224ca75fe5970f6e026a04e91ed635d3"
|
|
3167
3224
|
dependencies = [
|
|
3168
|
-
"bindgen
|
|
3225
|
+
"bindgen",
|
|
3169
3226
|
"lazy_static",
|
|
3170
3227
|
"proc-macro2",
|
|
3171
3228
|
"quote",
|
|
@@ -3330,7 +3387,7 @@ dependencies = [
|
|
|
3330
3387
|
"errno",
|
|
3331
3388
|
"libc",
|
|
3332
3389
|
"linux-raw-sys",
|
|
3333
|
-
"windows-sys 0.
|
|
3390
|
+
"windows-sys 0.52.0",
|
|
3334
3391
|
]
|
|
3335
3392
|
|
|
3336
3393
|
[[package]]
|
|
@@ -3361,9 +3418,9 @@ dependencies = [
|
|
|
3361
3418
|
|
|
3362
3419
|
[[package]]
|
|
3363
3420
|
name = "rustls-webpki"
|
|
3364
|
-
version = "0.103.
|
|
3421
|
+
version = "0.103.13"
|
|
3365
3422
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3366
|
-
checksum = "
|
|
3423
|
+
checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
|
|
3367
3424
|
dependencies = [
|
|
3368
3425
|
"aws-lc-rs",
|
|
3369
3426
|
"ring",
|
|
@@ -3455,6 +3512,12 @@ dependencies = [
|
|
|
3455
3512
|
"libc",
|
|
3456
3513
|
]
|
|
3457
3514
|
|
|
3515
|
+
[[package]]
|
|
3516
|
+
name = "semver"
|
|
3517
|
+
version = "1.0.28"
|
|
3518
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3519
|
+
checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
|
|
3520
|
+
|
|
3458
3521
|
[[package]]
|
|
3459
3522
|
name = "seq-macro"
|
|
3460
3523
|
version = "0.3.6"
|
|
@@ -3541,7 +3604,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
3541
3604
|
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
|
3542
3605
|
dependencies = [
|
|
3543
3606
|
"cfg-if",
|
|
3544
|
-
"cpufeatures",
|
|
3607
|
+
"cpufeatures 0.2.17",
|
|
3545
3608
|
"digest",
|
|
3546
3609
|
]
|
|
3547
3610
|
|
|
@@ -3728,9 +3791,9 @@ dependencies = [
|
|
|
3728
3791
|
|
|
3729
3792
|
[[package]]
|
|
3730
3793
|
name = "tar"
|
|
3731
|
-
version = "0.4.
|
|
3794
|
+
version = "0.4.46"
|
|
3732
3795
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3733
|
-
checksum = "
|
|
3796
|
+
checksum = "3f6221d9a6003c78398e3b239969f352578258df48c8eb051caadae0015bc840"
|
|
3734
3797
|
dependencies = [
|
|
3735
3798
|
"filetime",
|
|
3736
3799
|
"libc",
|
|
@@ -3747,7 +3810,7 @@ dependencies = [
|
|
|
3747
3810
|
"getrandom 0.3.3",
|
|
3748
3811
|
"once_cell",
|
|
3749
3812
|
"rustix",
|
|
3750
|
-
"windows-sys 0.
|
|
3813
|
+
"windows-sys 0.52.0",
|
|
3751
3814
|
]
|
|
3752
3815
|
|
|
3753
3816
|
[[package]]
|
|
@@ -3847,7 +3910,7 @@ dependencies = [
|
|
|
3847
3910
|
"monostate",
|
|
3848
3911
|
"onig",
|
|
3849
3912
|
"paste",
|
|
3850
|
-
"rand 0.8.
|
|
3913
|
+
"rand 0.8.6",
|
|
3851
3914
|
"rayon",
|
|
3852
3915
|
"rayon-cond 0.3.0",
|
|
3853
3916
|
"regex",
|
|
@@ -3882,7 +3945,7 @@ dependencies = [
|
|
|
3882
3945
|
"monostate",
|
|
3883
3946
|
"onig",
|
|
3884
3947
|
"paste",
|
|
3885
|
-
"rand 0.9.
|
|
3948
|
+
"rand 0.9.4",
|
|
3886
3949
|
"rayon",
|
|
3887
3950
|
"rayon-cond 0.4.0",
|
|
3888
3951
|
"regex",
|
|
@@ -4168,6 +4231,12 @@ version = "0.2.1"
|
|
|
4168
4231
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4169
4232
|
checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
|
|
4170
4233
|
|
|
4234
|
+
[[package]]
|
|
4235
|
+
name = "unicode-xid"
|
|
4236
|
+
version = "0.2.6"
|
|
4237
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4238
|
+
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
|
4239
|
+
|
|
4171
4240
|
[[package]]
|
|
4172
4241
|
name = "unicode_categories"
|
|
4173
4242
|
version = "0.1.1"
|
|
@@ -4304,7 +4373,16 @@ version = "1.0.0+wasi-0.2.4"
|
|
|
4304
4373
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4305
4374
|
checksum = "03fa2761397e5bd52002cd7e73110c71af2109aca4e521a9f40473fe685b0a24"
|
|
4306
4375
|
dependencies = [
|
|
4307
|
-
"wit-bindgen",
|
|
4376
|
+
"wit-bindgen 0.45.1",
|
|
4377
|
+
]
|
|
4378
|
+
|
|
4379
|
+
[[package]]
|
|
4380
|
+
name = "wasip3"
|
|
4381
|
+
version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
|
|
4382
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4383
|
+
checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
|
|
4384
|
+
dependencies = [
|
|
4385
|
+
"wit-bindgen 0.51.0",
|
|
4308
4386
|
]
|
|
4309
4387
|
|
|
4310
4388
|
[[package]]
|
|
@@ -4379,6 +4457,28 @@ dependencies = [
|
|
|
4379
4457
|
"unicode-ident",
|
|
4380
4458
|
]
|
|
4381
4459
|
|
|
4460
|
+
[[package]]
|
|
4461
|
+
name = "wasm-encoder"
|
|
4462
|
+
version = "0.244.0"
|
|
4463
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4464
|
+
checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
|
|
4465
|
+
dependencies = [
|
|
4466
|
+
"leb128fmt",
|
|
4467
|
+
"wasmparser",
|
|
4468
|
+
]
|
|
4469
|
+
|
|
4470
|
+
[[package]]
|
|
4471
|
+
name = "wasm-metadata"
|
|
4472
|
+
version = "0.244.0"
|
|
4473
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4474
|
+
checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
|
|
4475
|
+
dependencies = [
|
|
4476
|
+
"anyhow",
|
|
4477
|
+
"indexmap",
|
|
4478
|
+
"wasm-encoder",
|
|
4479
|
+
"wasmparser",
|
|
4480
|
+
]
|
|
4481
|
+
|
|
4382
4482
|
[[package]]
|
|
4383
4483
|
name = "wasm-streams"
|
|
4384
4484
|
version = "0.4.2"
|
|
@@ -4392,6 +4492,18 @@ dependencies = [
|
|
|
4392
4492
|
"web-sys",
|
|
4393
4493
|
]
|
|
4394
4494
|
|
|
4495
|
+
[[package]]
|
|
4496
|
+
name = "wasmparser"
|
|
4497
|
+
version = "0.244.0"
|
|
4498
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4499
|
+
checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
|
|
4500
|
+
dependencies = [
|
|
4501
|
+
"bitflags 2.9.4",
|
|
4502
|
+
"hashbrown 0.15.5",
|
|
4503
|
+
"indexmap",
|
|
4504
|
+
"semver",
|
|
4505
|
+
]
|
|
4506
|
+
|
|
4395
4507
|
[[package]]
|
|
4396
4508
|
name = "web-sys"
|
|
4397
4509
|
version = "0.3.78"
|
|
@@ -4458,7 +4570,7 @@ version = "0.1.11"
|
|
|
4458
4570
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4459
4571
|
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
|
4460
4572
|
dependencies = [
|
|
4461
|
-
"windows-sys 0.
|
|
4573
|
+
"windows-sys 0.48.0",
|
|
4462
4574
|
]
|
|
4463
4575
|
|
|
4464
4576
|
[[package]]
|
|
@@ -4807,6 +4919,94 @@ version = "0.45.1"
|
|
|
4807
4919
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4808
4920
|
checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36"
|
|
4809
4921
|
|
|
4922
|
+
[[package]]
|
|
4923
|
+
name = "wit-bindgen"
|
|
4924
|
+
version = "0.51.0"
|
|
4925
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4926
|
+
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
|
|
4927
|
+
dependencies = [
|
|
4928
|
+
"wit-bindgen-rust-macro",
|
|
4929
|
+
]
|
|
4930
|
+
|
|
4931
|
+
[[package]]
|
|
4932
|
+
name = "wit-bindgen-core"
|
|
4933
|
+
version = "0.51.0"
|
|
4934
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4935
|
+
checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
|
|
4936
|
+
dependencies = [
|
|
4937
|
+
"anyhow",
|
|
4938
|
+
"heck",
|
|
4939
|
+
"wit-parser",
|
|
4940
|
+
]
|
|
4941
|
+
|
|
4942
|
+
[[package]]
|
|
4943
|
+
name = "wit-bindgen-rust"
|
|
4944
|
+
version = "0.51.0"
|
|
4945
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4946
|
+
checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
|
|
4947
|
+
dependencies = [
|
|
4948
|
+
"anyhow",
|
|
4949
|
+
"heck",
|
|
4950
|
+
"indexmap",
|
|
4951
|
+
"prettyplease",
|
|
4952
|
+
"syn",
|
|
4953
|
+
"wasm-metadata",
|
|
4954
|
+
"wit-bindgen-core",
|
|
4955
|
+
"wit-component",
|
|
4956
|
+
]
|
|
4957
|
+
|
|
4958
|
+
[[package]]
|
|
4959
|
+
name = "wit-bindgen-rust-macro"
|
|
4960
|
+
version = "0.51.0"
|
|
4961
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4962
|
+
checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
|
|
4963
|
+
dependencies = [
|
|
4964
|
+
"anyhow",
|
|
4965
|
+
"prettyplease",
|
|
4966
|
+
"proc-macro2",
|
|
4967
|
+
"quote",
|
|
4968
|
+
"syn",
|
|
4969
|
+
"wit-bindgen-core",
|
|
4970
|
+
"wit-bindgen-rust",
|
|
4971
|
+
]
|
|
4972
|
+
|
|
4973
|
+
[[package]]
|
|
4974
|
+
name = "wit-component"
|
|
4975
|
+
version = "0.244.0"
|
|
4976
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4977
|
+
checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
|
|
4978
|
+
dependencies = [
|
|
4979
|
+
"anyhow",
|
|
4980
|
+
"bitflags 2.9.4",
|
|
4981
|
+
"indexmap",
|
|
4982
|
+
"log",
|
|
4983
|
+
"serde",
|
|
4984
|
+
"serde_derive",
|
|
4985
|
+
"serde_json",
|
|
4986
|
+
"wasm-encoder",
|
|
4987
|
+
"wasm-metadata",
|
|
4988
|
+
"wasmparser",
|
|
4989
|
+
"wit-parser",
|
|
4990
|
+
]
|
|
4991
|
+
|
|
4992
|
+
[[package]]
|
|
4993
|
+
name = "wit-parser"
|
|
4994
|
+
version = "0.244.0"
|
|
4995
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4996
|
+
checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
|
|
4997
|
+
dependencies = [
|
|
4998
|
+
"anyhow",
|
|
4999
|
+
"id-arena",
|
|
5000
|
+
"indexmap",
|
|
5001
|
+
"log",
|
|
5002
|
+
"semver",
|
|
5003
|
+
"serde",
|
|
5004
|
+
"serde_derive",
|
|
5005
|
+
"serde_json",
|
|
5006
|
+
"unicode-xid",
|
|
5007
|
+
"wasmparser",
|
|
5008
|
+
]
|
|
5009
|
+
|
|
4810
5010
|
[[package]]
|
|
4811
5011
|
name = "writeable"
|
|
4812
5012
|
version = "0.6.1"
|
data/ext/candle/Cargo.toml
CHANGED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/// GVL (Global VM Lock) release support for Ruby.
|
|
2
|
+
///
|
|
3
|
+
/// Ruby's GVL prevents other Ruby threads from running while native code
|
|
4
|
+
/// executes. For long-running operations (LLM inference, reranking, embedding),
|
|
5
|
+
/// we release the GVL so other threads (TUI render loops, HTTP servers, etc.)
|
|
6
|
+
/// can run concurrently.
|
|
7
|
+
///
|
|
8
|
+
/// SAFETY: Code running without the GVL must NOT call any Ruby API.
|
|
9
|
+
|
|
10
|
+
use std::os::raw::c_void;
|
|
11
|
+
|
|
12
|
+
type UnblockFn = unsafe extern "C" fn(*mut c_void);
|
|
13
|
+
|
|
14
|
+
extern "C" {
|
|
15
|
+
fn rb_thread_call_without_gvl(
|
|
16
|
+
func: unsafe extern "C" fn(*mut c_void) -> *mut c_void,
|
|
17
|
+
data1: *mut c_void,
|
|
18
|
+
ubf: Option<UnblockFn>,
|
|
19
|
+
data2: *mut c_void,
|
|
20
|
+
) -> *mut c_void;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/// Run a closure without the GVL. The closure must not call any Ruby API.
|
|
24
|
+
pub fn without_gvl<F, R>(f: F) -> R
|
|
25
|
+
where
|
|
26
|
+
F: FnOnce() -> R,
|
|
27
|
+
{
|
|
28
|
+
struct CallData<F, R> {
|
|
29
|
+
func: Option<F>,
|
|
30
|
+
result: Option<R>,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
unsafe extern "C" fn call_func<F, R>(data: *mut c_void) -> *mut c_void
|
|
34
|
+
where
|
|
35
|
+
F: FnOnce() -> R,
|
|
36
|
+
{
|
|
37
|
+
let data = &mut *(data as *mut CallData<F, R>);
|
|
38
|
+
let func = data.func.take().unwrap();
|
|
39
|
+
data.result = Some(func());
|
|
40
|
+
std::ptr::null_mut()
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
let mut data = CallData {
|
|
44
|
+
func: Some(f),
|
|
45
|
+
result: None,
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
unsafe {
|
|
49
|
+
rb_thread_call_without_gvl(
|
|
50
|
+
call_func::<F, R>,
|
|
51
|
+
&mut data as *mut _ as *mut c_void,
|
|
52
|
+
None,
|
|
53
|
+
std::ptr::null_mut(),
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
data.result.unwrap()
|
|
58
|
+
}
|
data/ext/candle/src/lib.rs
CHANGED
|
@@ -104,14 +104,15 @@ impl EmbeddingModel {
|
|
|
104
104
|
/// pooling_method: "pooled", "pooled_normalized", or "cls" (default: "pooled")
|
|
105
105
|
pub fn embedding(&self, input: String, pooling_method: String) -> Result<Tensor> {
|
|
106
106
|
let ruby = Ruby::get().unwrap();
|
|
107
|
-
match &self.0.model {
|
|
108
|
-
Some(model) => {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
107
|
+
match (&self.0.model, &self.0.tokenizer) {
|
|
108
|
+
(Some(model), Some(tokenizer)) => {
|
|
109
|
+
let result = crate::gvl::without_gvl(|| {
|
|
110
|
+
self.compute_embedding(input, model, tokenizer, &pooling_method)
|
|
111
|
+
});
|
|
112
|
+
Ok(Tensor(result?))
|
|
113
113
|
}
|
|
114
|
-
None => Err(magnus::Error::new(ruby.exception_runtime_error(), "Model not found"))
|
|
114
|
+
(None, _) => Err(magnus::Error::new(ruby.exception_runtime_error(), "Model not found")),
|
|
115
|
+
(_, None) => Err(magnus::Error::new(ruby.exception_runtime_error(), "Tokenizer not found")),
|
|
115
116
|
}
|
|
116
117
|
}
|
|
117
118
|
|
|
@@ -119,14 +120,15 @@ impl EmbeddingModel {
|
|
|
119
120
|
/// &RETURNS&: Tensor
|
|
120
121
|
pub fn embeddings(&self, input: String) -> Result<Tensor> {
|
|
121
122
|
let ruby = Ruby::get().unwrap();
|
|
122
|
-
match &self.0.model {
|
|
123
|
-
Some(model) => {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
123
|
+
match (&self.0.model, &self.0.tokenizer) {
|
|
124
|
+
(Some(model), Some(tokenizer)) => {
|
|
125
|
+
let result = crate::gvl::without_gvl(|| {
|
|
126
|
+
self.compute_embeddings(input, model, tokenizer)
|
|
127
|
+
});
|
|
128
|
+
Ok(Tensor(result?))
|
|
128
129
|
}
|
|
129
|
-
None => Err(magnus::Error::new(ruby.exception_runtime_error(), "Model not found"))
|
|
130
|
+
(None, _) => Err(magnus::Error::new(ruby.exception_runtime_error(), "Model not found")),
|
|
131
|
+
(_, None) => Err(magnus::Error::new(ruby.exception_runtime_error(), "Tokenizer not found")),
|
|
130
132
|
}
|
|
131
133
|
}
|
|
132
134
|
|
data/ext/candle/src/ruby/llm.rs
CHANGED
|
@@ -5,6 +5,7 @@ use std::sync::Arc;
|
|
|
5
5
|
use crate::llm::{GenerationConfig as RustGenerationConfig, TextGenerator, mistral::Mistral as RustMistral, llama::Llama as RustLlama, gemma::Gemma as RustGemma, qwen::Qwen as RustQwen, qwen3::Qwen3 as RustQwen3, phi::Phi as RustPhi, granite::Granite as RustGranite, granitemoehybrid::GraniteMoeHybrid as RustGraniteMoeHybrid, glm4::Glm4 as RustGlm4, QuantizedGGUF as RustQuantizedGGUF};
|
|
6
6
|
use crate::ruby::{Result, Device};
|
|
7
7
|
use crate::ruby::structured::StructuredConstraint;
|
|
8
|
+
use crate::gvl;
|
|
8
9
|
|
|
9
10
|
// Use an enum to handle different model types instead of trait objects
|
|
10
11
|
enum ModelType {
|
|
@@ -422,7 +423,7 @@ impl LLM {
|
|
|
422
423
|
})
|
|
423
424
|
}
|
|
424
425
|
|
|
425
|
-
/// Generate text from a prompt
|
|
426
|
+
/// Generate text from a prompt (releases GVL during inference)
|
|
426
427
|
pub fn generate(&self, prompt: String, config: Option<&GenerationConfig>) -> Result<String> {
|
|
427
428
|
let ruby = Ruby::get().unwrap();
|
|
428
429
|
let config = config
|
|
@@ -435,8 +436,13 @@ impl LLM {
|
|
|
435
436
|
};
|
|
436
437
|
let mut model_ref = model.borrow_mut();
|
|
437
438
|
|
|
438
|
-
|
|
439
|
-
|
|
439
|
+
// Release the GVL during inference so other Ruby threads can run
|
|
440
|
+
// (e.g., TUI render loops, HTTP servers, etc.)
|
|
441
|
+
let result = gvl::without_gvl(|| {
|
|
442
|
+
model_ref.generate(&prompt, &config)
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
result.map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Generation failed: {}", e)))
|
|
440
446
|
}
|
|
441
447
|
|
|
442
448
|
/// Generate text with streaming output
|
data/ext/candle/src/ruby/ner.rs
CHANGED
|
@@ -173,8 +173,10 @@ impl NER {
|
|
|
173
173
|
let ruby = Ruby::get().unwrap();
|
|
174
174
|
let threshold = confidence_threshold.unwrap_or(0.9) as f32;
|
|
175
175
|
|
|
176
|
-
//
|
|
177
|
-
let (encoding, probs_vec) =
|
|
176
|
+
// Release GVL during tokenization + model forward pass
|
|
177
|
+
let (encoding, probs_vec) = crate::gvl::without_gvl(|| {
|
|
178
|
+
self.tokenize_and_predict(&text)
|
|
179
|
+
})?;
|
|
178
180
|
|
|
179
181
|
let tokens = encoding.get_tokens();
|
|
180
182
|
let offsets = encoding.get_offsets();
|
|
@@ -208,8 +210,10 @@ impl NER {
|
|
|
208
210
|
/// Get token-level predictions with labels and confidence scores
|
|
209
211
|
pub fn predict_tokens(&self, text: String) -> Result<RArray> {
|
|
210
212
|
let ruby = Ruby::get().unwrap();
|
|
211
|
-
//
|
|
212
|
-
let (encoding, probs_vec) =
|
|
213
|
+
// Release GVL during tokenization + model forward pass
|
|
214
|
+
let (encoding, probs_vec) = crate::gvl::without_gvl(|| {
|
|
215
|
+
self.tokenize_and_predict(&text)
|
|
216
|
+
})?;
|
|
213
217
|
|
|
214
218
|
let tokens = encoding.get_tokens();
|
|
215
219
|
|
|
@@ -18,6 +18,7 @@ use hf_hub::{api::sync::Api, Repo, RepoType};
|
|
|
18
18
|
use tokenizers::{EncodeInput, Tokenizer};
|
|
19
19
|
use std::cell::RefCell;
|
|
20
20
|
use crate::ruby::{Device, Result};
|
|
21
|
+
use crate::gvl;
|
|
21
22
|
use crate::tokenizer::{TokenizerWrapper, loader::TokenizerLoader};
|
|
22
23
|
|
|
23
24
|
enum RerankerModel {
|
|
@@ -164,39 +165,27 @@ impl Reranker {
|
|
|
164
165
|
}
|
|
165
166
|
|
|
166
167
|
/// Extract CLS embeddings from the model output, handling Metal device workarounds
|
|
167
|
-
fn extract_cls_embeddings(&self, embeddings: &Tensor) -> std::result::Result<Tensor,
|
|
168
|
-
let ruby = Ruby::get().unwrap();
|
|
169
|
-
let runtime_error = ruby.exception_runtime_error();
|
|
170
|
-
|
|
168
|
+
fn extract_cls_embeddings(&self, embeddings: &Tensor) -> std::result::Result<Tensor, String> {
|
|
171
169
|
let cls_embeddings = if self.device.is_metal() {
|
|
172
|
-
// Metal has issues with tensor indexing, use a different approach
|
|
173
170
|
let (batch_size, seq_len, hidden_size) = embeddings.dims3()
|
|
174
|
-
.map_err(|e|
|
|
175
|
-
|
|
176
|
-
// Reshape to [batch * seq_len, hidden] then take first hidden vectors for each batch
|
|
171
|
+
.map_err(|e| format!("Failed to get dims: {}", e))?;
|
|
177
172
|
let reshaped = embeddings.reshape((batch_size * seq_len, hidden_size))
|
|
178
|
-
.map_err(|e|
|
|
179
|
-
|
|
180
|
-
// Extract CLS tokens (first token of each sequence)
|
|
173
|
+
.map_err(|e| format!("Failed to reshape: {}", e))?;
|
|
181
174
|
let mut cls_vecs = Vec::new();
|
|
182
175
|
for i in 0..batch_size {
|
|
183
176
|
let start_idx = i * seq_len;
|
|
184
177
|
let cls_vec = reshaped.narrow(0, start_idx, 1)
|
|
185
|
-
.map_err(|e|
|
|
178
|
+
.map_err(|e| format!("Failed to extract CLS: {}", e))?;
|
|
186
179
|
cls_vecs.push(cls_vec);
|
|
187
180
|
}
|
|
188
|
-
|
|
189
|
-
// Stack the CLS vectors
|
|
190
181
|
Tensor::cat(&cls_vecs, 0)
|
|
191
|
-
.map_err(|e|
|
|
182
|
+
.map_err(|e| format!("Failed to cat CLS tokens: {}", e))?
|
|
192
183
|
} else {
|
|
193
184
|
embeddings.i((.., 0))
|
|
194
|
-
.map_err(|e|
|
|
185
|
+
.map_err(|e| format!("Failed to extract CLS token: {}", e))?
|
|
195
186
|
};
|
|
196
|
-
|
|
197
|
-
// Ensure tensor is contiguous for downstream operations
|
|
198
187
|
cls_embeddings.contiguous()
|
|
199
|
-
.map_err(|e|
|
|
188
|
+
.map_err(|e| format!("Failed to make CLS embeddings contiguous: {}", e))
|
|
200
189
|
}
|
|
201
190
|
|
|
202
191
|
pub fn debug_tokenization(&self, query: String, document: String) -> std::result::Result<RHash, Error> {
|
|
@@ -231,124 +220,147 @@ impl Reranker {
|
|
|
231
220
|
let runtime_error = ruby.exception_runtime_error();
|
|
232
221
|
let documents: Vec<String> = documents.to_vec()?;
|
|
233
222
|
|
|
223
|
+
// Release the GVL for the entire compute portion (tokenization + inference + scoring).
|
|
224
|
+
// None of this calls Ruby API.
|
|
225
|
+
let ranked_docs = gvl::without_gvl(|| -> std::result::Result<Vec<(String, f32, usize)>, String> {
|
|
226
|
+
self.compute_rerank(&query, &documents, &pooling_method, apply_sigmoid)
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
let ranked_docs = ranked_docs
|
|
230
|
+
.map_err(|e| Error::new(runtime_error, e))?;
|
|
231
|
+
|
|
232
|
+
// Build result array (requires GVL for Ruby object creation)
|
|
233
|
+
let result_array = ruby.ary_new();
|
|
234
|
+
for (doc, score, doc_id) in ranked_docs {
|
|
235
|
+
let tuple = ruby.ary_new();
|
|
236
|
+
tuple.push(doc)?;
|
|
237
|
+
tuple.push(ruby.float_from_f64(score as f64))?;
|
|
238
|
+
tuple.push(doc_id)?;
|
|
239
|
+
result_array.push(tuple)?;
|
|
240
|
+
}
|
|
241
|
+
Ok(result_array)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/// Pure compute portion of reranking — no Ruby API calls.
|
|
245
|
+
/// Returns ranked (document, score, original_index) tuples.
|
|
246
|
+
fn compute_rerank(&self, query: &str, documents: &[String], pooling_method: &str, apply_sigmoid: bool) -> std::result::Result<Vec<(String, f32, usize)>, String> {
|
|
234
247
|
// Create query-document pairs for cross-encoder
|
|
235
248
|
let query_and_docs: Vec<EncodeInput> = documents
|
|
236
249
|
.iter()
|
|
237
|
-
.map(|d| (query.
|
|
250
|
+
.map(|d| (query.to_string(), d.clone()).into())
|
|
238
251
|
.collect();
|
|
239
252
|
|
|
240
|
-
// Tokenize batch
|
|
253
|
+
// Tokenize batch
|
|
241
254
|
let encodings = self.tokenizer.inner().encode_batch(query_and_docs, true)
|
|
242
|
-
.map_err(|e|
|
|
255
|
+
.map_err(|e| format!("Tokenization failed: {}", e))?;
|
|
243
256
|
|
|
244
|
-
|
|
245
|
-
let token_ids = encodings
|
|
257
|
+
let token_ids_vec = encodings
|
|
246
258
|
.iter()
|
|
247
259
|
.map(|e| e.get_ids().to_vec())
|
|
248
260
|
.collect::<Vec<_>>();
|
|
249
261
|
|
|
250
|
-
let
|
|
262
|
+
let token_type_ids_vec = encodings
|
|
251
263
|
.iter()
|
|
252
264
|
.map(|e| e.get_type_ids().to_vec())
|
|
253
265
|
.collect::<Vec<_>>();
|
|
254
266
|
|
|
255
|
-
let token_ids = Tensor::new(
|
|
256
|
-
.map_err(|e|
|
|
257
|
-
let token_type_ids = Tensor::new(
|
|
258
|
-
.map_err(|e|
|
|
267
|
+
let token_ids = Tensor::new(token_ids_vec, &self.device)
|
|
268
|
+
.map_err(|e| format!("Failed to create tensor: {}", e))?;
|
|
269
|
+
let token_type_ids = Tensor::new(token_type_ids_vec, &self.device)
|
|
270
|
+
.map_err(|e| format!("Failed to create token type ids tensor: {}", e))?;
|
|
259
271
|
|
|
260
272
|
// Compute scores based on model type
|
|
261
273
|
let scores = match &self.model {
|
|
262
274
|
RerankerModel::Bert { model, pooler, classifier } => {
|
|
263
275
|
let attention_mask = token_ids.ne(0u32)
|
|
264
|
-
.map_err(|e|
|
|
276
|
+
.map_err(|e| format!("Failed to create attention mask: {}", e))?;
|
|
265
277
|
|
|
266
278
|
// Forward pass through BERT
|
|
267
279
|
let embeddings = model.forward(&token_ids, &token_type_ids, Some(&attention_mask))
|
|
268
|
-
.map_err(|e|
|
|
280
|
+
.map_err(|e| format!("Model forward pass failed: {}", e))?;
|
|
269
281
|
|
|
270
282
|
// Apply pooling based on the specified method
|
|
271
|
-
let pooled_embeddings = match pooling_method
|
|
283
|
+
let pooled_embeddings = match pooling_method {
|
|
272
284
|
"pooler" => {
|
|
273
285
|
let cls_embeddings = self.extract_cls_embeddings(&embeddings)?;
|
|
274
286
|
let pooled = pooler.forward(&cls_embeddings)
|
|
275
|
-
.map_err(|e|
|
|
287
|
+
.map_err(|e| format!("Pooler forward failed: {}", e))?;
|
|
276
288
|
pooled.tanh()
|
|
277
|
-
.map_err(|e|
|
|
289
|
+
.map_err(|e| format!("Tanh activation failed: {}", e))?
|
|
278
290
|
},
|
|
279
291
|
"cls" => {
|
|
280
292
|
self.extract_cls_embeddings(&embeddings)?
|
|
281
293
|
},
|
|
282
294
|
"mean" => {
|
|
283
295
|
let (_batch, seq_len, _hidden) = embeddings.dims3()
|
|
284
|
-
.map_err(|e|
|
|
296
|
+
.map_err(|e| format!("Failed to get tensor dimensions: {}", e))?;
|
|
285
297
|
let sum = embeddings.sum(1)
|
|
286
|
-
.map_err(|e|
|
|
298
|
+
.map_err(|e| format!("Failed to sum embeddings: {}", e))?;
|
|
287
299
|
(sum / (seq_len as f64))
|
|
288
|
-
.map_err(|e|
|
|
300
|
+
.map_err(|e| format!("Failed to compute mean: {}", e))?
|
|
289
301
|
},
|
|
290
|
-
_ => return Err(
|
|
291
|
-
format!("Unknown pooling method: {}. Use 'pooler', 'cls', or 'mean'", pooling_method))
|
|
302
|
+
_ => return Err(
|
|
303
|
+
format!("Unknown pooling method: {}. Use 'pooler', 'cls', or 'mean'", pooling_method))
|
|
292
304
|
};
|
|
293
305
|
|
|
294
306
|
let pooled_embeddings = pooled_embeddings.contiguous()
|
|
295
|
-
.map_err(|e|
|
|
307
|
+
.map_err(|e| format!("Failed to make pooled_embeddings contiguous: {}", e))?;
|
|
296
308
|
let logits = classifier.forward(&pooled_embeddings)
|
|
297
|
-
.map_err(|e|
|
|
309
|
+
.map_err(|e| format!("Classifier forward failed: {}", e))?;
|
|
298
310
|
logits.squeeze(1)
|
|
299
|
-
.map_err(|e|
|
|
311
|
+
.map_err(|e| format!("Failed to squeeze tensor: {}", e))?
|
|
300
312
|
}
|
|
301
313
|
RerankerModel::XLMRoberta { model, pad_token_id } => {
|
|
302
314
|
let attention_mask = token_ids.ne(*pad_token_id)
|
|
303
|
-
.map_err(|e|
|
|
315
|
+
.map_err(|e| format!("Failed to create attention mask: {}", e))?;
|
|
304
316
|
|
|
305
317
|
// XLMRobertaForSequenceClassification returns logits directly
|
|
306
318
|
let logits = model.forward(&token_ids, &attention_mask, &token_type_ids)
|
|
307
|
-
.map_err(|e|
|
|
319
|
+
.map_err(|e| format!("Model forward pass failed: {}", e))?;
|
|
308
320
|
logits.squeeze(1)
|
|
309
|
-
.map_err(|e|
|
|
321
|
+
.map_err(|e| format!("Failed to squeeze tensor: {}", e))?
|
|
310
322
|
}
|
|
311
323
|
RerankerModel::DeBERTa { model, pooler, classifier, pad_token_id } => {
|
|
312
324
|
let attention_mask = token_ids.ne(*pad_token_id)
|
|
313
|
-
.map_err(|e|
|
|
325
|
+
.map_err(|e| format!("Failed to create attention mask: {}", e))?;
|
|
314
326
|
|
|
315
327
|
// Forward through DeBERTa encoder
|
|
316
328
|
let encoder_output = model.forward(&token_ids, Some(token_type_ids.clone()), Some(attention_mask))
|
|
317
|
-
.map_err(|e|
|
|
329
|
+
.map_err(|e| format!("Model forward pass failed: {}", e))?;
|
|
318
330
|
|
|
319
331
|
// Pool and classify
|
|
320
332
|
let pooled = pooler.forward(&encoder_output)
|
|
321
|
-
.map_err(|e|
|
|
333
|
+
.map_err(|e| format!("Pooler forward failed: {}", e))?;
|
|
322
334
|
let logits = classifier.forward(&pooled)
|
|
323
|
-
.map_err(|e|
|
|
335
|
+
.map_err(|e| format!("Classifier forward failed: {}", e))?;
|
|
324
336
|
logits.squeeze(1)
|
|
325
|
-
.map_err(|e|
|
|
337
|
+
.map_err(|e| format!("Failed to squeeze tensor: {}", e))?
|
|
326
338
|
}
|
|
327
339
|
RerankerModel::ModernBert { model, head_dense, head_norm, classifier, pad_token_id } => {
|
|
328
340
|
let attention_mask = token_ids.ne(*pad_token_id)
|
|
329
|
-
.map_err(|e|
|
|
341
|
+
.map_err(|e| format!("Failed to create attention mask: {}", e))?;
|
|
330
342
|
let attention_mask_f32 = attention_mask.to_dtype(DType::F32)
|
|
331
|
-
.map_err(|e|
|
|
343
|
+
.map_err(|e| format!("Failed to convert attention mask: {}", e))?;
|
|
332
344
|
|
|
333
345
|
// Forward through ModernBERT encoder
|
|
334
346
|
let encoder_output = model.forward(&token_ids, &attention_mask_f32)
|
|
335
|
-
.map_err(|e|
|
|
347
|
+
.map_err(|e| format!("Model forward pass failed: {}", e))?;
|
|
336
348
|
|
|
337
349
|
// CLS pooling, then head (dense + GELU + norm) + classifier
|
|
338
350
|
let cls = encoder_output.i((.., 0, ..))
|
|
339
|
-
.map_err(|e|
|
|
351
|
+
.map_err(|e| format!("Failed to extract CLS: {}", e))?
|
|
340
352
|
.contiguous()
|
|
341
|
-
.map_err(|e|
|
|
353
|
+
.map_err(|e| format!("Failed to make contiguous: {}", e))?;
|
|
342
354
|
let hidden = head_dense.forward(&cls)
|
|
343
|
-
.map_err(|e|
|
|
355
|
+
.map_err(|e| format!("Head dense failed: {}", e))?;
|
|
344
356
|
let hidden = hidden.gelu_erf()
|
|
345
|
-
.map_err(|e|
|
|
357
|
+
.map_err(|e| format!("GELU activation failed: {}", e))?;
|
|
346
358
|
let hidden = head_norm.forward(&hidden)
|
|
347
|
-
.map_err(|e|
|
|
359
|
+
.map_err(|e| format!("Head norm failed: {}", e))?;
|
|
348
360
|
let logits = classifier.forward(&hidden)
|
|
349
|
-
.map_err(|e|
|
|
361
|
+
.map_err(|e| format!("Classifier forward failed: {}", e))?;
|
|
350
362
|
logits.squeeze(1)
|
|
351
|
-
.map_err(|e|
|
|
363
|
+
.map_err(|e| format!("Failed to squeeze tensor: {}", e))?
|
|
352
364
|
}
|
|
353
365
|
RerankerModel::Qwen3 { model, yes_token_id, no_token_id } => {
|
|
354
366
|
// Qwen3 reranker: decoder-based yes/no scoring
|
|
@@ -356,7 +368,7 @@ impl Reranker {
|
|
|
356
368
|
let mut scores_vec: Vec<f32> = Vec::with_capacity(documents.len());
|
|
357
369
|
let mut model = model.borrow_mut();
|
|
358
370
|
|
|
359
|
-
for doc in
|
|
371
|
+
for doc in documents.iter() {
|
|
360
372
|
// Build the Qwen3 reranker prompt
|
|
361
373
|
let prompt = format!(
|
|
362
374
|
"<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n<Instruct>: Given a web search query, retrieve relevant passages that answer the query\n<Query>: {}\n<Document>: {}<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n",
|
|
@@ -365,7 +377,7 @@ impl Reranker {
|
|
|
365
377
|
|
|
366
378
|
// Tokenize the prompt
|
|
367
379
|
let encoding = self.tokenizer.inner().encode(prompt.as_str(), false)
|
|
368
|
-
.map_err(|e|
|
|
380
|
+
.map_err(|e| format!("Tokenization failed: {}", e))?;
|
|
369
381
|
let input_ids: Vec<u32> = encoding.get_ids().to_vec();
|
|
370
382
|
|
|
371
383
|
// Clear KV cache for each document
|
|
@@ -373,28 +385,28 @@ impl Reranker {
|
|
|
373
385
|
|
|
374
386
|
// Forward pass — get logits for the last token position
|
|
375
387
|
let input_tensor = Tensor::new(&input_ids[..], &self.device)
|
|
376
|
-
.map_err(|e|
|
|
388
|
+
.map_err(|e| format!("Failed to create tensor: {}", e))?
|
|
377
389
|
.unsqueeze(0)
|
|
378
|
-
.map_err(|e|
|
|
390
|
+
.map_err(|e| format!("Failed to unsqueeze: {}", e))?;
|
|
379
391
|
|
|
380
392
|
let logits = model.forward(&input_tensor, 0)
|
|
381
|
-
.map_err(|e|
|
|
393
|
+
.map_err(|e| format!("Model forward pass failed: {}", e))?;
|
|
382
394
|
|
|
383
395
|
// logits shape: [1, 1, vocab_size] → flatten to [vocab_size]
|
|
384
396
|
let logits = logits.flatten_all()
|
|
385
|
-
.map_err(|e|
|
|
397
|
+
.map_err(|e| format!("Failed to flatten: {}", e))?
|
|
386
398
|
.to_dtype(DType::F32)
|
|
387
|
-
.map_err(|e|
|
|
399
|
+
.map_err(|e| format!("Failed to convert dtype: {}", e))?;
|
|
388
400
|
|
|
389
401
|
// Extract yes/no logits and compute score
|
|
390
402
|
let yes_logit: f32 = logits.i(*yes_token_id as usize)
|
|
391
|
-
.map_err(|e|
|
|
403
|
+
.map_err(|e| format!("Failed to get yes logit: {}", e))?
|
|
392
404
|
.to_scalar()
|
|
393
|
-
.map_err(|e|
|
|
405
|
+
.map_err(|e| format!("Failed to convert yes logit: {}", e))?;
|
|
394
406
|
let no_logit: f32 = logits.i(*no_token_id as usize)
|
|
395
|
-
.map_err(|e|
|
|
407
|
+
.map_err(|e| format!("Failed to get no logit: {}", e))?
|
|
396
408
|
.to_scalar()
|
|
397
|
-
.map_err(|e|
|
|
409
|
+
.map_err(|e| format!("Failed to convert no logit: {}", e))?;
|
|
398
410
|
|
|
399
411
|
// softmax over [yes, no] → P(yes)
|
|
400
412
|
let max_logit = yes_logit.max(no_logit);
|
|
@@ -407,24 +419,25 @@ impl Reranker {
|
|
|
407
419
|
|
|
408
420
|
// Build scores tensor for uniform handling below
|
|
409
421
|
Tensor::new(scores_vec.as_slice(), &self.device)
|
|
410
|
-
.map_err(|e|
|
|
422
|
+
.map_err(|e| format!("Failed to create scores tensor: {}", e))?
|
|
411
423
|
}
|
|
412
424
|
};
|
|
413
425
|
|
|
414
426
|
// Optionally apply sigmoid activation
|
|
415
427
|
let scores = if apply_sigmoid {
|
|
416
428
|
sigmoid(&scores)
|
|
417
|
-
.map_err(|e|
|
|
429
|
+
.map_err(|e| format!("Sigmoid failed: {}", e))?
|
|
418
430
|
} else {
|
|
419
431
|
scores
|
|
420
432
|
};
|
|
421
433
|
|
|
422
434
|
let scores_vec: Vec<f32> = scores.to_vec1()
|
|
423
|
-
.map_err(|e|
|
|
435
|
+
.map_err(|e| format!("Failed to convert scores to vec: {}", e))?;
|
|
424
436
|
|
|
425
437
|
// Create tuples with document, score, and original index
|
|
426
438
|
let mut ranked_docs: Vec<(String, f32, usize)> = documents
|
|
427
|
-
.
|
|
439
|
+
.iter()
|
|
440
|
+
.cloned()
|
|
428
441
|
.zip(scores_vec)
|
|
429
442
|
.enumerate()
|
|
430
443
|
.map(|(idx, (doc, score))| (doc, score, idx))
|
|
@@ -433,16 +446,7 @@ impl Reranker {
|
|
|
433
446
|
// Sort documents by relevance score (descending)
|
|
434
447
|
ranked_docs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
435
448
|
|
|
436
|
-
|
|
437
|
-
let result_array = ruby.ary_new();
|
|
438
|
-
for (doc, score, doc_id) in ranked_docs {
|
|
439
|
-
let tuple = ruby.ary_new();
|
|
440
|
-
tuple.push(doc)?;
|
|
441
|
-
tuple.push(ruby.float_from_f64(score as f64))?;
|
|
442
|
-
tuple.push(doc_id)?;
|
|
443
|
-
result_array.push(tuple)?;
|
|
444
|
-
}
|
|
445
|
-
Ok(result_array)
|
|
449
|
+
Ok(ranked_docs)
|
|
446
450
|
}
|
|
447
451
|
|
|
448
452
|
/// Get the tokenizer used by this model
|
data/lib/candle/version.rb
CHANGED
data/lib/candle.rb
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
require_relative "candle/logger"
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
# Load the compiled Rust extension. Precompiled (platform) gems install it into a
|
|
4
|
+
# Ruby-ABI-versioned subdir (lib/candle/<major.minor>/candle.{so,bundle}) so a single
|
|
5
|
+
# fat gem can carry a binary per Ruby version; source/dev builds place it flat at
|
|
6
|
+
# lib/candle/candle.{so,bundle}. Try the versioned path first, fall back to the flat
|
|
7
|
+
# one. Resolution goes through $LOAD_PATH (`require`, never `require_relative`) because
|
|
8
|
+
# RubyGems installs native extensions outside the gem's lib/ dir — see
|
|
9
|
+
# spec/require_spec.rb and Issue #75.
|
|
10
|
+
begin
|
|
11
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
|
12
|
+
require "candle/#{Regexp.last_match(1)}/candle"
|
|
13
|
+
rescue LoadError
|
|
14
|
+
require "candle/candle"
|
|
15
|
+
end
|
|
16
|
+
|
|
3
17
|
require_relative "candle/tensor"
|
|
4
18
|
require_relative "candle/device_utils"
|
|
5
19
|
require_relative "candle/embedding_model_type"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: red-candle
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.8.0.pre1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Christopher Petersen
|
|
@@ -215,6 +215,7 @@ files:
|
|
|
215
215
|
- ext/candle/build.rs
|
|
216
216
|
- ext/candle/extconf.rb
|
|
217
217
|
- ext/candle/rustfmt.toml
|
|
218
|
+
- ext/candle/src/gvl.rs
|
|
218
219
|
- ext/candle/src/lib.rs
|
|
219
220
|
- ext/candle/src/llm/constrained_generation_test.rs
|
|
220
221
|
- ext/candle/src/llm/gemma.rs
|
|
@@ -290,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
290
291
|
version: '3.3'
|
|
291
292
|
requirements:
|
|
292
293
|
- Rust >= 1.85
|
|
293
|
-
rubygems_version:
|
|
294
|
+
rubygems_version: 3.6.9
|
|
294
295
|
specification_version: 4
|
|
295
296
|
summary: Ruby gem for running state-of-the-art language models locally. Access LLMs,
|
|
296
297
|
embeddings, rerankers, and NER models directly from Ruby using Rust-powered Candle
|