red-candle 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +244 -6
- data/README.md +38 -3
- data/Rakefile +46 -1
- data/ext/candle/Cargo.toml +2 -0
- data/ext/candle/src/lib.rs +2 -0
- data/ext/candle/src/llm/constrained_generation_test.rs +316 -0
- data/ext/candle/src/llm/gemma.rs +21 -5
- data/ext/candle/src/llm/generation_config.rs +11 -0
- data/ext/candle/src/llm/llama.rs +21 -5
- data/ext/candle/src/llm/mistral.rs +21 -5
- data/ext/candle/src/llm/mod.rs +5 -0
- data/ext/candle/src/llm/phi.rs +301 -0
- data/ext/candle/src/llm/quantized_gguf.rs +173 -9
- data/ext/candle/src/llm/qwen.rs +245 -0
- data/ext/candle/src/llm/text_generation.rs +183 -26
- data/ext/candle/src/ner.rs +25 -51
- data/ext/candle/src/reranker.rs +41 -68
- data/ext/candle/src/ruby/device.rs +5 -0
- data/ext/candle/src/ruby/llm.rs +119 -55
- data/ext/candle/src/ruby/mod.rs +1 -0
- data/ext/candle/src/ruby/structured.rs +47 -0
- data/ext/candle/src/structured/integration_test.rs +130 -0
- data/ext/candle/src/structured/mod.rs +31 -0
- data/ext/candle/src/structured/schema_processor.rs +215 -0
- data/ext/candle/src/structured/vocabulary_adapter.rs +152 -0
- data/ext/candle/src/structured/vocabulary_adapter_real_test.rs +66 -0
- data/ext/candle/src/structured/vocabulary_adapter_simple_test.rs +70 -0
- data/lib/candle/llm.rb +203 -2
- data/lib/candle/version.rb +1 -1
- metadata +14 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3678037fbb196c621c8e9df6a213b0d3dffbdb1b8b3dfd73eee4a7ea2feafca
|
4
|
+
data.tar.gz: ada97ef81af854439622bdc12b796442be9e0f31e7c7d8a5df374c7bfb07ff2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d353177318c4599fa30974a676350087a8e5fd070fe3d317344a4e1b3ae022cb69adf742d62063c2da09dbab7e971cbfae1e53a87527ce7f1c18afd1223797e8
|
7
|
+
data.tar.gz: df4b2f43f6fb1aa623053fd09d6e48eba0d8c2615f51dc2accdc4dc292fb3fb7d665553b04cae3747e001e04cd4b9cdbe5c022c3efd077daddf97e074a1e9e5c
|
data/Cargo.lock
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This file is automatically @generated by Cargo.
|
2
2
|
# It is not intended for manual editing.
|
3
|
-
version =
|
3
|
+
version = 4
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "accelerate-src"
|
@@ -121,6 +121,26 @@ version = "0.22.1"
|
|
121
121
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
122
122
|
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
123
123
|
|
124
|
+
[[package]]
|
125
|
+
name = "bincode"
|
126
|
+
version = "2.0.1"
|
127
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
128
|
+
checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740"
|
129
|
+
dependencies = [
|
130
|
+
"bincode_derive",
|
131
|
+
"serde",
|
132
|
+
"unty",
|
133
|
+
]
|
134
|
+
|
135
|
+
[[package]]
|
136
|
+
name = "bincode_derive"
|
137
|
+
version = "2.0.1"
|
138
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
139
|
+
checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09"
|
140
|
+
dependencies = [
|
141
|
+
"virtue",
|
142
|
+
]
|
143
|
+
|
124
144
|
[[package]]
|
125
145
|
name = "bindgen"
|
126
146
|
version = "0.69.5"
|
@@ -136,7 +156,7 @@ dependencies = [
|
|
136
156
|
"proc-macro2",
|
137
157
|
"quote",
|
138
158
|
"regex",
|
139
|
-
"rustc-hash",
|
159
|
+
"rustc-hash 1.1.0",
|
140
160
|
"shlex",
|
141
161
|
"syn",
|
142
162
|
]
|
@@ -255,13 +275,14 @@ dependencies = [
|
|
255
275
|
"candle-nn",
|
256
276
|
"candle-transformers",
|
257
277
|
"half",
|
258
|
-
"hf-hub",
|
278
|
+
"hf-hub 0.4.3",
|
259
279
|
"magnus",
|
280
|
+
"outlines-core",
|
260
281
|
"rand 0.8.5",
|
261
282
|
"safetensors 0.3.3",
|
262
283
|
"serde",
|
263
284
|
"serde_json",
|
264
|
-
"tokenizers",
|
285
|
+
"tokenizers 0.21.2",
|
265
286
|
"tokio",
|
266
287
|
]
|
267
288
|
|
@@ -641,6 +662,15 @@ dependencies = [
|
|
641
662
|
"dirs-sys 0.4.1",
|
642
663
|
]
|
643
664
|
|
665
|
+
[[package]]
|
666
|
+
name = "dirs"
|
667
|
+
version = "5.0.1"
|
668
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
669
|
+
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
|
670
|
+
dependencies = [
|
671
|
+
"dirs-sys 0.4.1",
|
672
|
+
]
|
673
|
+
|
644
674
|
[[package]]
|
645
675
|
name = "dirs"
|
646
676
|
version = "6.0.0"
|
@@ -1303,13 +1333,30 @@ version = "0.5.2"
|
|
1303
1333
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1304
1334
|
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
1305
1335
|
|
1336
|
+
[[package]]
|
1337
|
+
name = "hf-hub"
|
1338
|
+
version = "0.3.2"
|
1339
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1340
|
+
checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732"
|
1341
|
+
dependencies = [
|
1342
|
+
"dirs 5.0.1",
|
1343
|
+
"indicatif",
|
1344
|
+
"log",
|
1345
|
+
"native-tls",
|
1346
|
+
"rand 0.8.5",
|
1347
|
+
"serde",
|
1348
|
+
"serde_json",
|
1349
|
+
"thiserror 1.0.69",
|
1350
|
+
"ureq",
|
1351
|
+
]
|
1352
|
+
|
1306
1353
|
[[package]]
|
1307
1354
|
name = "hf-hub"
|
1308
1355
|
version = "0.4.3"
|
1309
1356
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1310
1357
|
checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
|
1311
1358
|
dependencies = [
|
1312
|
-
"dirs",
|
1359
|
+
"dirs 6.0.0",
|
1313
1360
|
"futures",
|
1314
1361
|
"http",
|
1315
1362
|
"indicatif",
|
@@ -1605,6 +1652,12 @@ dependencies = [
|
|
1605
1652
|
"web-time",
|
1606
1653
|
]
|
1607
1654
|
|
1655
|
+
[[package]]
|
1656
|
+
name = "indoc"
|
1657
|
+
version = "2.0.6"
|
1658
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1659
|
+
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
1660
|
+
|
1608
1661
|
[[package]]
|
1609
1662
|
name = "intel-mkl-src"
|
1610
1663
|
version = "0.8.1"
|
@@ -1654,6 +1707,15 @@ dependencies = [
|
|
1654
1707
|
"serde",
|
1655
1708
|
]
|
1656
1709
|
|
1710
|
+
[[package]]
|
1711
|
+
name = "itertools"
|
1712
|
+
version = "0.11.0"
|
1713
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1714
|
+
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
1715
|
+
dependencies = [
|
1716
|
+
"either",
|
1717
|
+
]
|
1718
|
+
|
1657
1719
|
[[package]]
|
1658
1720
|
name = "itertools"
|
1659
1721
|
version = "0.12.1"
|
@@ -1815,6 +1877,15 @@ dependencies = [
|
|
1815
1877
|
"stable_deref_trait",
|
1816
1878
|
]
|
1817
1879
|
|
1880
|
+
[[package]]
|
1881
|
+
name = "memoffset"
|
1882
|
+
version = "0.9.1"
|
1883
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1884
|
+
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
1885
|
+
dependencies = [
|
1886
|
+
"autocfg",
|
1887
|
+
]
|
1888
|
+
|
1818
1889
|
[[package]]
|
1819
1890
|
name = "metal"
|
1820
1891
|
version = "0.27.0"
|
@@ -2186,6 +2257,25 @@ version = "0.2.0"
|
|
2186
2257
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2187
2258
|
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
|
2188
2259
|
|
2260
|
+
[[package]]
|
2261
|
+
name = "outlines-core"
|
2262
|
+
version = "0.2.3"
|
2263
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2264
|
+
checksum = "4f0964d94d3e2322d2c0bbf80549affe085e2c6df08cf6c06e8c558988bcb11b"
|
2265
|
+
dependencies = [
|
2266
|
+
"bincode",
|
2267
|
+
"hf-hub 0.3.2",
|
2268
|
+
"once_cell",
|
2269
|
+
"regex",
|
2270
|
+
"regex-automata",
|
2271
|
+
"rustc-hash 2.1.1",
|
2272
|
+
"serde",
|
2273
|
+
"serde-pyobject",
|
2274
|
+
"serde_json",
|
2275
|
+
"thiserror 2.0.12",
|
2276
|
+
"tokenizers 0.20.3",
|
2277
|
+
]
|
2278
|
+
|
2189
2279
|
[[package]]
|
2190
2280
|
name = "paste"
|
2191
2281
|
version = "1.0.15"
|
@@ -2306,6 +2396,69 @@ dependencies = [
|
|
2306
2396
|
"version_check",
|
2307
2397
|
]
|
2308
2398
|
|
2399
|
+
[[package]]
|
2400
|
+
name = "pyo3"
|
2401
|
+
version = "0.22.6"
|
2402
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2403
|
+
checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884"
|
2404
|
+
dependencies = [
|
2405
|
+
"cfg-if",
|
2406
|
+
"indoc",
|
2407
|
+
"libc",
|
2408
|
+
"memoffset",
|
2409
|
+
"once_cell",
|
2410
|
+
"portable-atomic",
|
2411
|
+
"pyo3-build-config",
|
2412
|
+
"pyo3-ffi",
|
2413
|
+
"pyo3-macros",
|
2414
|
+
"unindent",
|
2415
|
+
]
|
2416
|
+
|
2417
|
+
[[package]]
|
2418
|
+
name = "pyo3-build-config"
|
2419
|
+
version = "0.22.6"
|
2420
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2421
|
+
checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38"
|
2422
|
+
dependencies = [
|
2423
|
+
"once_cell",
|
2424
|
+
"target-lexicon",
|
2425
|
+
]
|
2426
|
+
|
2427
|
+
[[package]]
|
2428
|
+
name = "pyo3-ffi"
|
2429
|
+
version = "0.22.6"
|
2430
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2431
|
+
checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636"
|
2432
|
+
dependencies = [
|
2433
|
+
"libc",
|
2434
|
+
"pyo3-build-config",
|
2435
|
+
]
|
2436
|
+
|
2437
|
+
[[package]]
|
2438
|
+
name = "pyo3-macros"
|
2439
|
+
version = "0.22.6"
|
2440
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2441
|
+
checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453"
|
2442
|
+
dependencies = [
|
2443
|
+
"proc-macro2",
|
2444
|
+
"pyo3-macros-backend",
|
2445
|
+
"quote",
|
2446
|
+
"syn",
|
2447
|
+
]
|
2448
|
+
|
2449
|
+
[[package]]
|
2450
|
+
name = "pyo3-macros-backend"
|
2451
|
+
version = "0.22.6"
|
2452
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2453
|
+
checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe"
|
2454
|
+
dependencies = [
|
2455
|
+
"heck",
|
2456
|
+
"proc-macro2",
|
2457
|
+
"pyo3-build-config",
|
2458
|
+
"quote",
|
2459
|
+
"syn",
|
2460
|
+
]
|
2461
|
+
|
2309
2462
|
[[package]]
|
2310
2463
|
name = "quote"
|
2311
2464
|
version = "1.0.40"
|
@@ -2418,6 +2571,17 @@ dependencies = [
|
|
2418
2571
|
"rayon-core",
|
2419
2572
|
]
|
2420
2573
|
|
2574
|
+
[[package]]
|
2575
|
+
name = "rayon-cond"
|
2576
|
+
version = "0.3.0"
|
2577
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2578
|
+
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
|
2579
|
+
dependencies = [
|
2580
|
+
"either",
|
2581
|
+
"itertools 0.11.0",
|
2582
|
+
"rayon",
|
2583
|
+
]
|
2584
|
+
|
2421
2585
|
[[package]]
|
2422
2586
|
name = "rayon-cond"
|
2423
2587
|
version = "0.4.0"
|
@@ -2604,6 +2768,12 @@ version = "1.1.0"
|
|
2604
2768
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2605
2769
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
2606
2770
|
|
2771
|
+
[[package]]
|
2772
|
+
name = "rustc-hash"
|
2773
|
+
version = "2.1.1"
|
2774
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2775
|
+
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
2776
|
+
|
2607
2777
|
[[package]]
|
2608
2778
|
name = "rustix"
|
2609
2779
|
version = "1.0.7"
|
@@ -2740,6 +2910,16 @@ dependencies = [
|
|
2740
2910
|
"serde_derive",
|
2741
2911
|
]
|
2742
2912
|
|
2913
|
+
[[package]]
|
2914
|
+
name = "serde-pyobject"
|
2915
|
+
version = "0.4.0"
|
2916
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2917
|
+
checksum = "ca4b0aad8b225845739a0030a0d5cc2ae949c56a86a7daf9226c7df7c2016d16"
|
2918
|
+
dependencies = [
|
2919
|
+
"pyo3",
|
2920
|
+
"serde",
|
2921
|
+
]
|
2922
|
+
|
2743
2923
|
[[package]]
|
2744
2924
|
name = "serde_derive"
|
2745
2925
|
version = "1.0.219"
|
@@ -2757,6 +2937,7 @@ version = "1.0.140"
|
|
2757
2937
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2758
2938
|
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
|
2759
2939
|
dependencies = [
|
2940
|
+
"indexmap",
|
2760
2941
|
"itoa",
|
2761
2942
|
"memchr",
|
2762
2943
|
"ryu",
|
@@ -2995,6 +3176,12 @@ dependencies = [
|
|
2995
3176
|
"xattr",
|
2996
3177
|
]
|
2997
3178
|
|
3179
|
+
[[package]]
|
3180
|
+
name = "target-lexicon"
|
3181
|
+
version = "0.12.16"
|
3182
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3183
|
+
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
|
3184
|
+
|
2998
3185
|
[[package]]
|
2999
3186
|
name = "tempfile"
|
3000
3187
|
version = "3.20.0"
|
@@ -3058,6 +3245,39 @@ dependencies = [
|
|
3058
3245
|
"zerovec",
|
3059
3246
|
]
|
3060
3247
|
|
3248
|
+
[[package]]
|
3249
|
+
name = "tokenizers"
|
3250
|
+
version = "0.20.3"
|
3251
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3252
|
+
checksum = "67b67c92f6d705e2a1d106fb0b28c696f9074901a9c656ee5d9f5de204c39bf7"
|
3253
|
+
dependencies = [
|
3254
|
+
"aho-corasick",
|
3255
|
+
"derive_builder",
|
3256
|
+
"esaxx-rs",
|
3257
|
+
"getrandom 0.2.16",
|
3258
|
+
"hf-hub 0.3.2",
|
3259
|
+
"indicatif",
|
3260
|
+
"itertools 0.12.1",
|
3261
|
+
"lazy_static",
|
3262
|
+
"log",
|
3263
|
+
"macro_rules_attribute",
|
3264
|
+
"monostate",
|
3265
|
+
"onig",
|
3266
|
+
"paste",
|
3267
|
+
"rand 0.8.5",
|
3268
|
+
"rayon",
|
3269
|
+
"rayon-cond 0.3.0",
|
3270
|
+
"regex",
|
3271
|
+
"regex-syntax",
|
3272
|
+
"serde",
|
3273
|
+
"serde_json",
|
3274
|
+
"spm_precompiled",
|
3275
|
+
"thiserror 1.0.69",
|
3276
|
+
"unicode-normalization-alignments",
|
3277
|
+
"unicode-segmentation",
|
3278
|
+
"unicode_categories",
|
3279
|
+
]
|
3280
|
+
|
3061
3281
|
[[package]]
|
3062
3282
|
name = "tokenizers"
|
3063
3283
|
version = "0.21.2"
|
@@ -3081,7 +3301,7 @@ dependencies = [
|
|
3081
3301
|
"paste",
|
3082
3302
|
"rand 0.9.1",
|
3083
3303
|
"rayon",
|
3084
|
-
"rayon-cond",
|
3304
|
+
"rayon-cond 0.4.0",
|
3085
3305
|
"regex",
|
3086
3306
|
"regex-syntax",
|
3087
3307
|
"serde",
|
@@ -3365,12 +3585,24 @@ version = "0.1.1"
|
|
3365
3585
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3366
3586
|
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
3367
3587
|
|
3588
|
+
[[package]]
|
3589
|
+
name = "unindent"
|
3590
|
+
version = "0.2.4"
|
3591
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3592
|
+
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
|
3593
|
+
|
3368
3594
|
[[package]]
|
3369
3595
|
name = "untrusted"
|
3370
3596
|
version = "0.9.0"
|
3371
3597
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3372
3598
|
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
3373
3599
|
|
3600
|
+
[[package]]
|
3601
|
+
name = "unty"
|
3602
|
+
version = "0.0.4"
|
3603
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3604
|
+
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
|
3605
|
+
|
3374
3606
|
[[package]]
|
3375
3607
|
name = "ureq"
|
3376
3608
|
version = "2.12.1"
|
@@ -3431,6 +3663,12 @@ version = "0.9.5"
|
|
3431
3663
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3432
3664
|
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
3433
3665
|
|
3666
|
+
[[package]]
|
3667
|
+
name = "virtue"
|
3668
|
+
version = "0.0.18"
|
3669
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3670
|
+
checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
|
3671
|
+
|
3434
3672
|
[[package]]
|
3435
3673
|
name = "walkdir"
|
3436
3674
|
version = "2.5.0"
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[](https://github.com/assaydepot/red-candle/actions/workflows/build.yml)
|
4
4
|
[](https://badge.fury.io/rb/red-candle)
|
5
5
|
|
6
|
-
Run state-of-the-art **language models directly from Ruby**. No Python, no APIs, no external services - just Ruby with blazing-fast Rust under the hood. Hardware accelerated with **Metal (Mac)** and **CUDA (NVIDIA).**
|
6
|
+
Run state-of-the-art **language models directly from Ruby**. No Python, no APIs, no external services - just Ruby with blazing-fast Rust under the hood. Hardware accelerated with **Metal (Mac)** and **CUDA (NVIDIA).** Red candle leverages the Rust ecosystem, notably [Candle](https://github.com/huggingface/candle) and [Magnus](https://github.com/matsadler/magnus), to provide a fast and efficient way to run LLMs in Ruby. See [Dependencies](#dependencies) for more.
|
7
7
|
|
8
8
|
## Install & Chat in 30 Seconds
|
9
9
|
|
@@ -58,7 +58,8 @@ end
|
|
58
58
|
- **EmbeddingModel**: Generate embeddings for text
|
59
59
|
- **Reranker**: Rerank documents based on relevance
|
60
60
|
- **NER**: Named Entity Recognition directly from Ruby
|
61
|
-
- **LLM**: Chat with Large Language Models (e.g., Llama, Mistral, Gemma)
|
61
|
+
- **LLM**: Chat with Large Language Models (e.g., Llama, Mistral, Gemma, Qwen, Phi)
|
62
|
+
- **Structured Generation**: Generate JSON from a schema or match a regular expression
|
62
63
|
|
63
64
|
## Model Storage
|
64
65
|
|
@@ -126,6 +127,10 @@ Red-Candle now supports Large Language Models (LLMs) with GPU acceleration!
|
|
126
127
|
- **Gemma**: Google's Gemma models (e.g., `google/gemma-2b`, `google/gemma-7b`, `google/gemma-2b-it`)
|
127
128
|
- **Llama**: Llama 2 and Llama 3 models (e.g., `TinyLlama/TinyLlama-1.1B-Chat-v1.0`, `meta-llama/Llama-2-7b-hf`, `NousResearch/Llama-2-7b-hf`)
|
128
129
|
- **Mistral**: All Mistral models (e.g., `mistralai/Mistral-7B-Instruct-v0.1`)
|
130
|
+
- **Qwen**: Qwen 2 and 2.5 models (e.g., `Qwen/Qwen2-1.5B`, `Qwen/Qwen2.5-7B-Instruct`)
|
131
|
+
- **Phi**: Microsoft's Phi-2, Phi-3, Phi-3.5, and Phi-4 models (e.g., `microsoft/phi-2`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/phi-4`)
|
132
|
+
- ⚠️ ⚠️ ⚠️ Note: Phi-3 and Phi-4 GGUF models have a known issue with KV cache persistence between generations. The `reset_cache` parameter doesn't work for GGUF models. Recreate the model instance for each generation.
|
133
|
+
- `candle` pull request about phi-3 gguf models: https://github.com/huggingface/candle/pull/2937
|
129
134
|
|
130
135
|
### Quantized Model Support (GGUF)
|
131
136
|
|
@@ -244,6 +249,36 @@ This is particularly useful for:
|
|
244
249
|
- Troubleshooting generation problems
|
245
250
|
- Analyzing model behavior
|
246
251
|
|
252
|
+
## Structured Generation
|
253
|
+
|
254
|
+
Red Candle supports structured generation to constrain LLM outputs to follow specific patterns like JSON schemas or regular expressions:
|
255
|
+
|
256
|
+
```ruby
|
257
|
+
# Define a JSON schema
|
258
|
+
schema = {
|
259
|
+
type: "object",
|
260
|
+
properties: {
|
261
|
+
answer: { type: "string", enum: ["yes", "no"] },
|
262
|
+
confidence: { type: "number", minimum: 0, maximum: 1 }
|
263
|
+
},
|
264
|
+
required: ["answer"]
|
265
|
+
}
|
266
|
+
|
267
|
+
# Generate and parse in one step
|
268
|
+
result = llm.generate_structured("Is Ruby easy to learn?", schema: schema)
|
269
|
+
puts result["answer"] # "yes"
|
270
|
+
puts result["confidence"] # 0.9
|
271
|
+
|
272
|
+
# Or use regex patterns for non-JSON outputs
|
273
|
+
phone_constraint = llm.constraint_from_regex('\d{3}-\d{3}-\d{4}')
|
274
|
+
config = Candle::GenerationConfig.balanced(constraint: phone_constraint)
|
275
|
+
phone = llm.generate("Generate a phone number:", config: config)
|
276
|
+
```
|
277
|
+
|
278
|
+
See [STRUCTURED_GENERATION.md](docs/STRUCTURED_GENERATION.md) for detailed documentation.
|
279
|
+
|
280
|
+
**Note on Reliability**: Structured generation constrains the model's output tokens, but success rates vary by model size and schema complexity. Smaller models (< 7B parameters) may occasionally produce incomplete or invalid JSON, especially with complex schemas. Consider implementing retry logic or fallback strategies in production applications. Larger models generally perform much better with structured generation.
|
281
|
+
|
247
282
|
## ⚠️ Model Format Requirements
|
248
283
|
|
249
284
|
### EmbeddingModels and Rerankers: Safetensors Only
|
@@ -861,7 +896,7 @@ Pull requests are welcome.
|
|
861
896
|
4. `git push --follow-tags`
|
862
897
|
5. `gem push pkg/red-candle-VERSION_NUMBER.gem`
|
863
898
|
|
864
|
-
##
|
899
|
+
## Dependencies
|
865
900
|
|
866
901
|
- [Candle](https://github.com/huggingface/candle)
|
867
902
|
- [Magnus](https://github.com/matsadler/magnus)
|
data/Rakefile
CHANGED
@@ -8,7 +8,14 @@ task default: :test
|
|
8
8
|
Rake::TestTask.new do |t|
|
9
9
|
t.deps << :compile
|
10
10
|
t.libs << "test"
|
11
|
-
t.test_files = FileList["test/**/*_test.rb"]
|
11
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
12
|
+
.exclude("test/benchmarks/**/*_test.rb")
|
13
|
+
.exclude("test/llm/llm_test.rb")
|
14
|
+
.exclude("test/llm/gemma_test.rb")
|
15
|
+
.exclude("test/llm/mistral_test.rb")
|
16
|
+
.exclude("test/llm/llama_test.rb")
|
17
|
+
.exclude("test/llm/phi_test.rb")
|
18
|
+
.exclude("test/llm/qwen_test.rb")
|
12
19
|
end
|
13
20
|
|
14
21
|
spec = Bundler.load_gemspec("candle.gemspec")
|
@@ -63,6 +70,44 @@ task "test:device:benchmark" => :compile do
|
|
63
70
|
Rake::Task["test:benchmark"].invoke
|
64
71
|
end
|
65
72
|
|
73
|
+
desc "Run LLM tests for specific models"
|
74
|
+
namespace :test do
|
75
|
+
namespace :llm do
|
76
|
+
desc "Run tests for Gemma models"
|
77
|
+
task :gemma => :compile do
|
78
|
+
ruby "-Itest", "test/llm/gemma_test.rb"
|
79
|
+
end
|
80
|
+
|
81
|
+
desc "Run tests for Phi models"
|
82
|
+
task :phi => :compile do
|
83
|
+
ruby "-Itest", "test/llm/phi_test.rb"
|
84
|
+
end
|
85
|
+
|
86
|
+
desc "Run tests for Qwen models"
|
87
|
+
task :qwen => :compile do
|
88
|
+
ruby "-Itest", "test/llm/qwen_test.rb"
|
89
|
+
end
|
90
|
+
|
91
|
+
desc "Run tests for Mistral models"
|
92
|
+
task :mistral => :compile do
|
93
|
+
ruby "-Itest", "test/llm/mistral_test.rb"
|
94
|
+
end
|
95
|
+
|
96
|
+
desc "Run tests for Llama models"
|
97
|
+
task :llama => :compile do
|
98
|
+
ruby "-Itest", "test/llm/llama_test.rb"
|
99
|
+
end
|
100
|
+
|
101
|
+
desc "Run tests for TinyLlama models"
|
102
|
+
task :tinyllama => :compile do
|
103
|
+
ruby "-Itest", "test/llm/tinyllama_test.rb"
|
104
|
+
end
|
105
|
+
|
106
|
+
desc "Run all LLM tests (WARNING: downloads large models)"
|
107
|
+
task :all => [:gemma, :phi, :qwen, :mistral, :llama]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
66
111
|
namespace :doc do
|
67
112
|
task default: %i[rustdoc yard]
|
68
113
|
|
data/ext/candle/Cargo.toml
CHANGED
@@ -3,6 +3,7 @@ name = "candle"
|
|
3
3
|
version = "0.1.0"
|
4
4
|
edition = "2021"
|
5
5
|
build = "build.rs"
|
6
|
+
rust-version = "1.85"
|
6
7
|
|
7
8
|
[lib]
|
8
9
|
crate-type = ["cdylib"]
|
@@ -20,6 +21,7 @@ serde_json = "1.0"
|
|
20
21
|
serde = { version = "1.0", features = ["derive"] }
|
21
22
|
tokio = { version = "1.45", features = ["rt", "macros"] }
|
22
23
|
rand = "0.8"
|
24
|
+
outlines-core = "0.2"
|
23
25
|
|
24
26
|
[features]
|
25
27
|
default = []
|
data/ext/candle/src/lib.rs
CHANGED
@@ -7,6 +7,7 @@ pub mod llm;
|
|
7
7
|
pub mod ner;
|
8
8
|
pub mod reranker;
|
9
9
|
pub mod ruby;
|
10
|
+
pub mod structured;
|
10
11
|
pub mod tokenizer;
|
11
12
|
|
12
13
|
// Configuration detection from build.rs
|
@@ -49,6 +50,7 @@ fn init(ruby: &Ruby) -> Result<()> {
|
|
49
50
|
ruby::device::init(rb_candle)?;
|
50
51
|
ruby::tensor::init(rb_candle)?;
|
51
52
|
ruby::tokenizer::init(rb_candle)?;
|
53
|
+
ruby::structured::init_structured(rb_candle)?;
|
52
54
|
candle_utils(rb_candle)?;
|
53
55
|
|
54
56
|
Ok(())
|