red-candle 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 00c0870d599db76ba556ab880f69219bc32d72698c3570cd308df3923b1332f9
4
- data.tar.gz: bffbd02fa1fe11813a304ed16771e6a72ee01f85ccb445eb67c5da6bf696c670
3
+ metadata.gz: 3d1b83311f0ad99adaffb886efda12291e9499ba112742e471e7b1eace390cee
4
+ data.tar.gz: 6156968c937204767fda21adfc6069e55a9571018f064ac56821d61472cc9cc1
5
5
  SHA512:
6
- metadata.gz: c9ab39f01e4777c3d9906592cf62b8b1d2d03fd04be4ba82d8ddee837678ac477be88b1a0a48fa898e443f7469681e26f25297d4d310c39fb5222137d3b17d87
7
- data.tar.gz: 22fe87f0fb64754a0a19bcc632dff72b0cc43838d73797aa991c961b65c9ecb7d9bd7aacbde51baf4063011081f4111a64f53726b51c87aca3e48214304e0119
6
+ metadata.gz: 57abc7d285ebb3c67c438d861563247ec1130b03bee5fab418670bd926de58ad2bc06b2328732c362f46a4f8a8ccb7d85d7bd0e4b7a384838e290dadb7ed2be4
7
+ data.tar.gz: 2f10da2df023629d59b2c2454efa886a87e0738d99801e6bb25b951431856b083780167190a6ba735c3c51cb8d170ed16a9c163f19aaf44fc8b95a72f8ea4b5e
data/Cargo.lock CHANGED
@@ -46,6 +46,24 @@ dependencies = [
46
46
  "memchr",
47
47
  ]
48
48
 
49
+ [[package]]
50
+ name = "aligned"
51
+ version = "0.4.3"
52
+ source = "registry+https://github.com/rust-lang/crates.io-index"
53
+ checksum = "ee4508988c62edf04abd8d92897fca0c2995d907ce1dfeaf369dac3716a40685"
54
+ dependencies = [
55
+ "as-slice",
56
+ ]
57
+
58
+ [[package]]
59
+ name = "aligned-vec"
60
+ version = "0.6.4"
61
+ source = "registry+https://github.com/rust-lang/crates.io-index"
62
+ checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
63
+ dependencies = [
64
+ "equator",
65
+ ]
66
+
49
67
  [[package]]
50
68
  name = "allocator-api2"
51
69
  version = "0.2.21"
@@ -67,6 +85,38 @@ version = "1.0.99"
67
85
  source = "registry+https://github.com/rust-lang/crates.io-index"
68
86
  checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
69
87
 
88
+ [[package]]
89
+ name = "arbitrary"
90
+ version = "1.4.2"
91
+ source = "registry+https://github.com/rust-lang/crates.io-index"
92
+ checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
93
+
94
+ [[package]]
95
+ name = "arg_enum_proc_macro"
96
+ version = "0.3.4"
97
+ source = "registry+https://github.com/rust-lang/crates.io-index"
98
+ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
99
+ dependencies = [
100
+ "proc-macro2",
101
+ "quote",
102
+ "syn",
103
+ ]
104
+
105
+ [[package]]
106
+ name = "arrayvec"
107
+ version = "0.7.6"
108
+ source = "registry+https://github.com/rust-lang/crates.io-index"
109
+ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
110
+
111
+ [[package]]
112
+ name = "as-slice"
113
+ version = "0.2.1"
114
+ source = "registry+https://github.com/rust-lang/crates.io-index"
115
+ checksum = "516b6b4f0e40d50dcda9365d53964ec74560ad4284da2e7fc97122cd83174516"
116
+ dependencies = [
117
+ "stable_deref_trait",
118
+ ]
119
+
70
120
  [[package]]
71
121
  name = "atomic-waker"
72
122
  version = "1.1.2"
@@ -79,6 +129,49 @@ version = "1.5.0"
79
129
  source = "registry+https://github.com/rust-lang/crates.io-index"
80
130
  checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
81
131
 
132
+ [[package]]
133
+ name = "av-scenechange"
134
+ version = "0.14.1"
135
+ source = "registry+https://github.com/rust-lang/crates.io-index"
136
+ checksum = "0f321d77c20e19b92c39e7471cf986812cbb46659d2af674adc4331ef3f18394"
137
+ dependencies = [
138
+ "aligned",
139
+ "anyhow",
140
+ "arg_enum_proc_macro",
141
+ "arrayvec",
142
+ "log",
143
+ "num-rational",
144
+ "num-traits",
145
+ "pastey",
146
+ "rayon",
147
+ "thiserror 2.0.16",
148
+ "v_frame",
149
+ "y4m",
150
+ ]
151
+
152
+ [[package]]
153
+ name = "av1-grain"
154
+ version = "0.2.5"
155
+ source = "registry+https://github.com/rust-lang/crates.io-index"
156
+ checksum = "8cfddb07216410377231960af4fcab838eaa12e013417781b78bd95ee22077f8"
157
+ dependencies = [
158
+ "anyhow",
159
+ "arrayvec",
160
+ "log",
161
+ "nom 8.0.0",
162
+ "num-rational",
163
+ "v_frame",
164
+ ]
165
+
166
+ [[package]]
167
+ name = "avif-serialize"
168
+ version = "0.8.8"
169
+ source = "registry+https://github.com/rust-lang/crates.io-index"
170
+ checksum = "375082f007bd67184fb9c0374614b29f9aaa604ec301635f72338bb65386a53d"
171
+ dependencies = [
172
+ "arrayvec",
173
+ ]
174
+
82
175
  [[package]]
83
176
  name = "aws-lc-rs"
84
177
  version = "1.14.0"
@@ -221,6 +314,12 @@ version = "0.8.0"
221
314
  source = "registry+https://github.com/rust-lang/crates.io-index"
222
315
  checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
223
316
 
317
+ [[package]]
318
+ name = "bit_field"
319
+ version = "0.10.3"
320
+ source = "registry+https://github.com/rust-lang/crates.io-index"
321
+ checksum = "1e4b40c7323adcfc0a41c4b88143ed58346ff65a288fc144329c5c45e05d70c6"
322
+
224
323
  [[package]]
225
324
  name = "bitflags"
226
325
  version = "1.3.2"
@@ -233,6 +332,15 @@ version = "2.9.4"
233
332
  source = "registry+https://github.com/rust-lang/crates.io-index"
234
333
  checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
235
334
 
335
+ [[package]]
336
+ name = "bitstream-io"
337
+ version = "4.9.0"
338
+ source = "registry+https://github.com/rust-lang/crates.io-index"
339
+ checksum = "60d4bd9d1db2c6bdf285e223a7fa369d5ce98ec767dec949c6ca62863ce61757"
340
+ dependencies = [
341
+ "core2",
342
+ ]
343
+
236
344
  [[package]]
237
345
  name = "block"
238
346
  version = "0.1.6"
@@ -257,6 +365,12 @@ dependencies = [
257
365
  "objc2",
258
366
  ]
259
367
 
368
+ [[package]]
369
+ name = "built"
370
+ version = "0.8.0"
371
+ source = "registry+https://github.com/rust-lang/crates.io-index"
372
+ checksum = "f4ad8f11f288f48ca24471bbd51ac257aaeaaa07adae295591266b792902ae64"
373
+
260
374
  [[package]]
261
375
  name = "bumpalo"
262
376
  version = "3.19.0"
@@ -289,6 +403,12 @@ version = "1.5.0"
289
403
  source = "registry+https://github.com/rust-lang/crates.io-index"
290
404
  checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
291
405
 
406
+ [[package]]
407
+ name = "byteorder-lite"
408
+ version = "0.1.0"
409
+ source = "registry+https://github.com/rust-lang/crates.io-index"
410
+ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
411
+
292
412
  [[package]]
293
413
  name = "bytes"
294
414
  version = "1.11.1"
@@ -304,6 +424,7 @@ dependencies = [
304
424
  "candle-transformers",
305
425
  "half",
306
426
  "hf-hub",
427
+ "image",
307
428
  "magnus",
308
429
  "outlines-core",
309
430
  "rand 0.8.5",
@@ -445,7 +566,7 @@ version = "0.6.0"
445
566
  source = "registry+https://github.com/rust-lang/crates.io-index"
446
567
  checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
447
568
  dependencies = [
448
- "nom",
569
+ "nom 7.1.3",
449
570
  ]
450
571
 
451
572
  [[package]]
@@ -493,6 +614,12 @@ dependencies = [
493
614
  "cc",
494
615
  ]
495
616
 
617
+ [[package]]
618
+ name = "color_quant"
619
+ version = "1.1.0"
620
+ source = "registry+https://github.com/rust-lang/crates.io-index"
621
+ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
622
+
496
623
  [[package]]
497
624
  name = "compact_str"
498
625
  version = "0.9.0"
@@ -548,6 +675,15 @@ dependencies = [
548
675
  "libc",
549
676
  ]
550
677
 
678
+ [[package]]
679
+ name = "core2"
680
+ version = "0.4.0"
681
+ source = "registry+https://github.com/rust-lang/crates.io-index"
682
+ checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
683
+ dependencies = [
684
+ "memchr",
685
+ ]
686
+
551
687
  [[package]]
552
688
  name = "cpufeatures"
553
689
  version = "0.2.17"
@@ -812,6 +948,26 @@ dependencies = [
812
948
  "syn",
813
949
  ]
814
950
 
951
+ [[package]]
952
+ name = "equator"
953
+ version = "0.4.2"
954
+ source = "registry+https://github.com/rust-lang/crates.io-index"
955
+ checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
956
+ dependencies = [
957
+ "equator-macro",
958
+ ]
959
+
960
+ [[package]]
961
+ name = "equator-macro"
962
+ version = "0.4.2"
963
+ source = "registry+https://github.com/rust-lang/crates.io-index"
964
+ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
965
+ dependencies = [
966
+ "proc-macro2",
967
+ "quote",
968
+ "syn",
969
+ ]
970
+
815
971
  [[package]]
816
972
  name = "equivalent"
817
973
  version = "1.0.2"
@@ -837,6 +993,21 @@ dependencies = [
837
993
  "cc",
838
994
  ]
839
995
 
996
+ [[package]]
997
+ name = "exr"
998
+ version = "1.74.0"
999
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1000
+ checksum = "4300e043a56aa2cb633c01af81ca8f699a321879a7854d3896a0ba89056363be"
1001
+ dependencies = [
1002
+ "bit_field",
1003
+ "half",
1004
+ "lebe",
1005
+ "miniz_oxide",
1006
+ "rayon-core",
1007
+ "smallvec",
1008
+ "zune-inflate",
1009
+ ]
1010
+
840
1011
  [[package]]
841
1012
  name = "fancy-regex"
842
1013
  version = "0.14.0"
@@ -865,6 +1036,35 @@ version = "2.3.0"
865
1036
  source = "registry+https://github.com/rust-lang/crates.io-index"
866
1037
  checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
867
1038
 
1039
+ [[package]]
1040
+ name = "fax"
1041
+ version = "0.2.6"
1042
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1043
+ checksum = "f05de7d48f37cd6730705cbca900770cab77a89f413d23e100ad7fad7795a0ab"
1044
+ dependencies = [
1045
+ "fax_derive",
1046
+ ]
1047
+
1048
+ [[package]]
1049
+ name = "fax_derive"
1050
+ version = "0.2.0"
1051
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1052
+ checksum = "a0aca10fb742cb43f9e7bb8467c91aa9bcb8e3ffbc6a6f7389bb93ffc920577d"
1053
+ dependencies = [
1054
+ "proc-macro2",
1055
+ "quote",
1056
+ "syn",
1057
+ ]
1058
+
1059
+ [[package]]
1060
+ name = "fdeflate"
1061
+ version = "0.3.7"
1062
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1063
+ checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c"
1064
+ dependencies = [
1065
+ "simd-adler32",
1066
+ ]
1067
+
868
1068
  [[package]]
869
1069
  name = "filetime"
870
1070
  version = "0.2.26"
@@ -1360,6 +1560,16 @@ dependencies = [
1360
1560
  "syn",
1361
1561
  ]
1362
1562
 
1563
+ [[package]]
1564
+ name = "gif"
1565
+ version = "0.14.1"
1566
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1567
+ checksum = "f5df2ba84018d80c213569363bdcd0c64e6933c67fe4c1d60ecf822971a3c35e"
1568
+ dependencies = [
1569
+ "color_quant",
1570
+ "weezl",
1571
+ ]
1572
+
1363
1573
  [[package]]
1364
1574
  name = "gimli"
1365
1575
  version = "0.31.1"
@@ -1717,6 +1927,46 @@ dependencies = [
1717
1927
  "icu_properties",
1718
1928
  ]
1719
1929
 
1930
+ [[package]]
1931
+ name = "image"
1932
+ version = "0.25.10"
1933
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1934
+ checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104"
1935
+ dependencies = [
1936
+ "bytemuck",
1937
+ "byteorder-lite",
1938
+ "color_quant",
1939
+ "exr",
1940
+ "gif",
1941
+ "image-webp",
1942
+ "moxcms",
1943
+ "num-traits",
1944
+ "png",
1945
+ "qoi",
1946
+ "ravif",
1947
+ "rayon",
1948
+ "rgb",
1949
+ "tiff",
1950
+ "zune-core",
1951
+ "zune-jpeg",
1952
+ ]
1953
+
1954
+ [[package]]
1955
+ name = "image-webp"
1956
+ version = "0.2.4"
1957
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1958
+ checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3"
1959
+ dependencies = [
1960
+ "byteorder-lite",
1961
+ "quick-error",
1962
+ ]
1963
+
1964
+ [[package]]
1965
+ name = "imgref"
1966
+ version = "1.12.0"
1967
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1968
+ checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8"
1969
+
1720
1970
  [[package]]
1721
1971
  name = "indexmap"
1722
1972
  version = "2.11.1"
@@ -1762,6 +2012,17 @@ dependencies = [
1762
2012
  "walkdir",
1763
2013
  ]
1764
2014
 
2015
+ [[package]]
2016
+ name = "interpolate_name"
2017
+ version = "0.2.4"
2018
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2019
+ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
2020
+ dependencies = [
2021
+ "proc-macro2",
2022
+ "quote",
2023
+ "syn",
2024
+ ]
2025
+
1765
2026
  [[package]]
1766
2027
  name = "io-uring"
1767
2028
  version = "0.7.10"
@@ -1854,12 +2115,28 @@ version = "1.3.0"
1854
2115
  source = "registry+https://github.com/rust-lang/crates.io-index"
1855
2116
  checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
1856
2117
 
2118
+ [[package]]
2119
+ name = "lebe"
2120
+ version = "0.5.3"
2121
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2122
+ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
2123
+
1857
2124
  [[package]]
1858
2125
  name = "libc"
1859
2126
  version = "0.2.175"
1860
2127
  source = "registry+https://github.com/rust-lang/crates.io-index"
1861
2128
  checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
1862
2129
 
2130
+ [[package]]
2131
+ name = "libfuzzer-sys"
2132
+ version = "0.4.12"
2133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2134
+ checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d"
2135
+ dependencies = [
2136
+ "arbitrary",
2137
+ "cc",
2138
+ ]
2139
+
1863
2140
  [[package]]
1864
2141
  name = "libloading"
1865
2142
  version = "0.8.8"
@@ -1915,6 +2192,15 @@ version = "0.4.28"
1915
2192
  source = "registry+https://github.com/rust-lang/crates.io-index"
1916
2193
  checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
1917
2194
 
2195
+ [[package]]
2196
+ name = "loop9"
2197
+ version = "0.1.5"
2198
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2199
+ checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
2200
+ dependencies = [
2201
+ "imgref",
2202
+ ]
2203
+
1918
2204
  [[package]]
1919
2205
  name = "lru-slab"
1920
2206
  version = "0.1.2"
@@ -1969,6 +2255,16 @@ dependencies = [
1969
2255
  "libc",
1970
2256
  ]
1971
2257
 
2258
+ [[package]]
2259
+ name = "maybe-rayon"
2260
+ version = "0.1.1"
2261
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2262
+ checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
2263
+ dependencies = [
2264
+ "cfg-if",
2265
+ "rayon",
2266
+ ]
2267
+
1972
2268
  [[package]]
1973
2269
  name = "memchr"
1974
2270
  version = "2.7.5"
@@ -2019,6 +2315,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
2019
2315
  checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
2020
2316
  dependencies = [
2021
2317
  "adler2",
2318
+ "simd-adler32",
2022
2319
  ]
2023
2320
 
2024
2321
  [[package]]
@@ -2053,6 +2350,16 @@ dependencies = [
2053
2350
  "syn",
2054
2351
  ]
2055
2352
 
2353
+ [[package]]
2354
+ name = "moxcms"
2355
+ version = "0.8.1"
2356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2357
+ checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b"
2358
+ dependencies = [
2359
+ "num-traits",
2360
+ "pxfm",
2361
+ ]
2362
+
2056
2363
  [[package]]
2057
2364
  name = "native-tls"
2058
2365
  version = "0.2.14"
@@ -2070,6 +2377,12 @@ dependencies = [
2070
2377
  "tempfile",
2071
2378
  ]
2072
2379
 
2380
+ [[package]]
2381
+ name = "new_debug_unreachable"
2382
+ version = "1.0.6"
2383
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2384
+ checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
2385
+
2073
2386
  [[package]]
2074
2387
  name = "nom"
2075
2388
  version = "7.1.3"
@@ -2080,6 +2393,21 @@ dependencies = [
2080
2393
  "minimal-lexical",
2081
2394
  ]
2082
2395
 
2396
+ [[package]]
2397
+ name = "nom"
2398
+ version = "8.0.0"
2399
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2400
+ checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
2401
+ dependencies = [
2402
+ "memchr",
2403
+ ]
2404
+
2405
+ [[package]]
2406
+ name = "noop_proc_macro"
2407
+ version = "0.3.0"
2408
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2409
+ checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
2410
+
2083
2411
  [[package]]
2084
2412
  name = "num"
2085
2413
  version = "0.4.3"
@@ -2114,6 +2442,17 @@ dependencies = [
2114
2442
  "num-traits",
2115
2443
  ]
2116
2444
 
2445
+ [[package]]
2446
+ name = "num-derive"
2447
+ version = "0.4.2"
2448
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2449
+ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
2450
+ dependencies = [
2451
+ "proc-macro2",
2452
+ "quote",
2453
+ "syn",
2454
+ ]
2455
+
2117
2456
  [[package]]
2118
2457
  name = "num-integer"
2119
2458
  version = "0.1.46"
@@ -2386,6 +2725,12 @@ version = "1.0.15"
2386
2725
  source = "registry+https://github.com/rust-lang/crates.io-index"
2387
2726
  checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
2388
2727
 
2728
+ [[package]]
2729
+ name = "pastey"
2730
+ version = "0.1.1"
2731
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2732
+ checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec"
2733
+
2389
2734
  [[package]]
2390
2735
  name = "percent-encoding"
2391
2736
  version = "2.3.2"
@@ -2410,6 +2755,19 @@ version = "0.3.32"
2410
2755
  source = "registry+https://github.com/rust-lang/crates.io-index"
2411
2756
  checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
2412
2757
 
2758
+ [[package]]
2759
+ name = "png"
2760
+ version = "0.18.1"
2761
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2762
+ checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
2763
+ dependencies = [
2764
+ "bitflags 2.9.4",
2765
+ "crc32fast",
2766
+ "fdeflate",
2767
+ "flate2",
2768
+ "miniz_oxide",
2769
+ ]
2770
+
2413
2771
  [[package]]
2414
2772
  name = "portable-atomic"
2415
2773
  version = "1.11.1"
@@ -2475,6 +2833,25 @@ dependencies = [
2475
2833
  "unicode-ident",
2476
2834
  ]
2477
2835
 
2836
+ [[package]]
2837
+ name = "profiling"
2838
+ version = "1.0.17"
2839
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2840
+ checksum = "3eb8486b569e12e2c32ad3e204dbaba5e4b5b216e9367044f25f1dba42341773"
2841
+ dependencies = [
2842
+ "profiling-procmacros",
2843
+ ]
2844
+
2845
+ [[package]]
2846
+ name = "profiling-procmacros"
2847
+ version = "1.0.17"
2848
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2849
+ checksum = "52717f9a02b6965224f95ca2a81e2e0c5c43baacd28ca057577988930b6c3d5b"
2850
+ dependencies = [
2851
+ "quote",
2852
+ "syn",
2853
+ ]
2854
+
2478
2855
  [[package]]
2479
2856
  name = "pulp"
2480
2857
  version = "0.21.5"
@@ -2512,6 +2889,27 @@ version = "0.1.0"
2512
2889
  source = "registry+https://github.com/rust-lang/crates.io-index"
2513
2890
  checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0"
2514
2891
 
2892
+ [[package]]
2893
+ name = "pxfm"
2894
+ version = "0.1.28"
2895
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2896
+ checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d"
2897
+
2898
+ [[package]]
2899
+ name = "qoi"
2900
+ version = "0.4.1"
2901
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2902
+ checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
2903
+ dependencies = [
2904
+ "bytemuck",
2905
+ ]
2906
+
2907
+ [[package]]
2908
+ name = "quick-error"
2909
+ version = "2.0.1"
2910
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2911
+ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
2912
+
2515
2913
  [[package]]
2516
2914
  name = "quinn"
2517
2915
  version = "0.11.9"
@@ -2651,6 +3049,56 @@ dependencies = [
2651
3049
  "rand 0.9.2",
2652
3050
  ]
2653
3051
 
3052
+ [[package]]
3053
+ name = "rav1e"
3054
+ version = "0.8.1"
3055
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3056
+ checksum = "43b6dd56e85d9483277cde964fd1bdb0428de4fec5ebba7540995639a21cb32b"
3057
+ dependencies = [
3058
+ "aligned-vec",
3059
+ "arbitrary",
3060
+ "arg_enum_proc_macro",
3061
+ "arrayvec",
3062
+ "av-scenechange",
3063
+ "av1-grain",
3064
+ "bitstream-io",
3065
+ "built",
3066
+ "cfg-if",
3067
+ "interpolate_name",
3068
+ "itertools 0.14.0",
3069
+ "libc",
3070
+ "libfuzzer-sys",
3071
+ "log",
3072
+ "maybe-rayon",
3073
+ "new_debug_unreachable",
3074
+ "noop_proc_macro",
3075
+ "num-derive",
3076
+ "num-traits",
3077
+ "paste",
3078
+ "profiling",
3079
+ "rand 0.9.2",
3080
+ "rand_chacha 0.9.0",
3081
+ "simd_helpers",
3082
+ "thiserror 2.0.16",
3083
+ "v_frame",
3084
+ "wasm-bindgen",
3085
+ ]
3086
+
3087
+ [[package]]
3088
+ name = "ravif"
3089
+ version = "0.13.0"
3090
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3091
+ checksum = "e52310197d971b0f5be7fe6b57530dcd27beb35c1b013f29d66c1ad73fbbcc45"
3092
+ dependencies = [
3093
+ "avif-serialize",
3094
+ "imgref",
3095
+ "loop9",
3096
+ "quick-error",
3097
+ "rav1e",
3098
+ "rayon",
3099
+ "rgb",
3100
+ ]
3101
+
2654
3102
  [[package]]
2655
3103
  name = "raw-cpuid"
2656
3104
  version = "11.6.0"
@@ -2834,6 +3282,12 @@ dependencies = [
2834
3282
  "webpki-roots 1.0.2",
2835
3283
  ]
2836
3284
 
3285
+ [[package]]
3286
+ name = "rgb"
3287
+ version = "0.8.53"
3288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3289
+ checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4"
3290
+
2837
3291
  [[package]]
2838
3292
  name = "ring"
2839
3293
  version = "0.17.14"
@@ -3103,6 +3557,21 @@ version = "1.3.0"
3103
3557
  source = "registry+https://github.com/rust-lang/crates.io-index"
3104
3558
  checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
3105
3559
 
3560
+ [[package]]
3561
+ name = "simd-adler32"
3562
+ version = "0.3.9"
3563
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3564
+ checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
3565
+
3566
+ [[package]]
3567
+ name = "simd_helpers"
3568
+ version = "0.1.0"
3569
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3570
+ checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
3571
+ dependencies = [
3572
+ "quote",
3573
+ ]
3574
+
3106
3575
  [[package]]
3107
3576
  name = "slab"
3108
3577
  version = "0.4.11"
@@ -3143,7 +3612,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
3143
3612
  checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
3144
3613
  dependencies = [
3145
3614
  "base64 0.13.1",
3146
- "nom",
3615
+ "nom 7.1.3",
3147
3616
  "serde",
3148
3617
  "unicode-segmentation",
3149
3618
  ]
@@ -3321,6 +3790,20 @@ dependencies = [
3321
3790
  "syn",
3322
3791
  ]
3323
3792
 
3793
+ [[package]]
3794
+ name = "tiff"
3795
+ version = "0.11.3"
3796
+ source = "registry+https://github.com/rust-lang/crates.io-index"
3797
+ checksum = "b63feaf3343d35b6ca4d50483f94843803b0f51634937cc2ec519fc32232bc52"
3798
+ dependencies = [
3799
+ "fax",
3800
+ "flate2",
3801
+ "half",
3802
+ "quick-error",
3803
+ "weezl",
3804
+ "zune-jpeg",
3805
+ ]
3806
+
3324
3807
  [[package]]
3325
3808
  name = "tinystr"
3326
3809
  version = "0.8.1"
@@ -3752,6 +4235,17 @@ dependencies = [
3752
4235
  "wasm-bindgen",
3753
4236
  ]
3754
4237
 
4238
+ [[package]]
4239
+ name = "v_frame"
4240
+ version = "0.3.9"
4241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4242
+ checksum = "666b7727c8875d6ab5db9533418d7c764233ac9c0cff1d469aec8fa127597be2"
4243
+ dependencies = [
4244
+ "aligned-vec",
4245
+ "num-traits",
4246
+ "wasm-bindgen",
4247
+ ]
4248
+
3755
4249
  [[package]]
3756
4250
  name = "vcpkg"
3757
4251
  version = "0.2.15"
@@ -3936,6 +4430,12 @@ dependencies = [
3936
4430
  "rustls-pki-types",
3937
4431
  ]
3938
4432
 
4433
+ [[package]]
4434
+ name = "weezl"
4435
+ version = "0.1.12"
4436
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4437
+ checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"
4438
+
3939
4439
  [[package]]
3940
4440
  name = "winapi"
3941
4441
  version = "0.3.9"
@@ -4323,6 +4823,12 @@ dependencies = [
4323
4823
  "rustix",
4324
4824
  ]
4325
4825
 
4826
+ [[package]]
4827
+ name = "y4m"
4828
+ version = "0.8.0"
4829
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4830
+ checksum = "7a5a4b21e1a62b67a2970e6831bc091d7b87e119e7f9791aef9702e3bef04448"
4831
+
4326
4832
  [[package]]
4327
4833
  name = "yoke"
4328
4834
  version = "0.7.5"
@@ -4461,3 +4967,27 @@ dependencies = [
4461
4967
  "memchr",
4462
4968
  "typed-path",
4463
4969
  ]
4970
+
4971
+ [[package]]
4972
+ name = "zune-core"
4973
+ version = "0.5.1"
4974
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4975
+ checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9"
4976
+
4977
+ [[package]]
4978
+ name = "zune-inflate"
4979
+ version = "0.2.54"
4980
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4981
+ checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
4982
+ dependencies = [
4983
+ "simd-adler32",
4984
+ ]
4985
+
4986
+ [[package]]
4987
+ name = "zune-jpeg"
4988
+ version = "0.5.15"
4989
+ source = "registry+https://github.com/rust-lang/crates.io-index"
4990
+ checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296"
4991
+ dependencies = [
4992
+ "zune-core",
4993
+ ]
data/README.md CHANGED
@@ -942,6 +942,49 @@ All NER methods return entities in a consistent format:
942
942
  }
943
943
  ```
944
944
 
945
+ ## Vision-Language Models (VLM)
946
+
947
+ Red-Candle supports vision-language models for understanding and describing images. The VLM module uses LLaVA (Large Language and Vision Assistant), which combines a CLIP vision encoder with a Llama language model.
948
+
949
+ ### Basic Usage
950
+
951
+ ```ruby
952
+ require 'candle'
953
+
954
+ # Load a LLaVA model (requires ~13GB download on first use)
955
+ vlm = Candle::VLM.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
956
+
957
+ # Describe an image
958
+ description = vlm.describe("photo.jpg")
959
+
960
+ # Ask a question about an image
961
+ answer = vlm.ask("photo.jpg", "What animal is in this image?")
962
+ # => "The animal in the image is a cat."
963
+
964
+ # Control output length
965
+ vlm.describe("photo.jpg", max_length: 500)
966
+ vlm.ask("photo.jpg", "What colors do you see?", max_length: 50)
967
+ ```
968
+
969
+ ### How It Works
970
+
971
+ 1. **CLIP Vision Encoder**: Converts the image into a sequence of visual feature tokens (576 patches from a 336x336 image)
972
+ 2. **MM Projector**: Projects vision features into the language model's embedding space
973
+ 3. **Llama LLM**: Processes the combined image+text embeddings and generates a text response
974
+
975
+ ### Supported Models
976
+
977
+ | Model | LLM Backend | Size | Notes |
978
+ |:------|:-----------|:-----|:------|
979
+ | `llava-hf/llava-v1.6-vicuna-7b-hf` | Llama (Vicuna) | 13GB | Recommended, LLaVA-Next with Llama backend |
980
+
981
+ ### Notes
982
+
983
+ - First load downloads ~13GB of model weights (cached for subsequent use)
984
+ - Image preprocessing is automatic (resize, normalize to CLIP format)
985
+ - Generation uses greedy decoding
986
+ - Multiple calls work correctly (KV cache is reset between queries)
987
+
945
988
  ## Common Runtime Errors
946
989
 
947
990
  ### Weight is negative, too large or not a valid number
@@ -22,6 +22,7 @@ serde = { version = "1.0", features = ["derive"] }
22
22
  tokio = { version = "1.45", features = ["rt", "macros"] }
23
23
  rand = "0.8"
24
24
  outlines-core = "0.2.11"
25
+ image = "0.25"
25
26
 
26
27
  [features]
27
28
  default = []
@@ -46,6 +46,7 @@ fn init(ruby: &Ruby) -> Result<()> {
46
46
  ruby::init_llm(rb_candle)?;
47
47
  ruby::ner::init(rb_candle)?;
48
48
  ruby::reranker::init(rb_candle)?;
49
+ ruby::vlm::init(rb_candle)?;
49
50
  ruby::dtype::init(rb_candle)?;
50
51
  ruby::device::init(rb_candle)?;
51
52
  ruby::tensor::init(rb_candle)?;
@@ -10,6 +10,7 @@ pub mod tokenizer;
10
10
  pub mod structured;
11
11
  pub mod reranker;
12
12
  pub mod ner;
13
+ pub mod vlm;
13
14
 
14
15
  pub use embedding_model::{EmbeddingModel, EmbeddingModelInner};
15
16
  pub use tensor::Tensor;
@@ -0,0 +1,330 @@
1
+ use magnus::{function, method, prelude::*, Error, RModule, Ruby};
2
+ use candle_transformers::models::llava::{
3
+ config::{LLaVAConfig, HFLLaVAConfig, HFGenerationConfig, HFPreProcessorConfig},
4
+ LLaVA,
5
+ };
6
+ use candle_transformers::models::llama::Cache;
7
+ use candle_core::{Device as CoreDevice, Tensor, DType};
8
+ use candle_nn::VarBuilder;
9
+ use hf_hub::{api::sync::Api, Repo, RepoType};
10
+ use tokenizers::Tokenizer;
11
+ use crate::ruby::{Device, Result};
12
+ use crate::tokenizer::TokenizerWrapper;
13
+
14
+ const CLIP_MEAN: [f32; 3] = [0.48145466, 0.4578275, 0.40821073];
15
+ const CLIP_STD: [f32; 3] = [0.26862954, 0.26130258, 0.27577711];
16
+
17
+ /// Vision-Language Model wrapping LLaVA for image understanding.
18
+ /// Uses CLIP vision encoder + MM projector + Llama LLM.
19
+ ///
20
+ /// Note: LLaVA contains trait objects (dyn Module) that are !Send,
21
+ /// so we wrap it in an UnsafeCell. This is safe because Ruby's GVL
22
+ /// ensures single-threaded access to the model.
23
+ struct UnsafeSendSync<T>(T);
24
+ unsafe impl<T> Send for UnsafeSendSync<T> {}
25
+ unsafe impl<T> Sync for UnsafeSendSync<T> {}
26
+
27
+ #[magnus::wrap(class = "Candle::VLM", free_immediately, size)]
28
+ pub struct VLM {
29
+ model: std::cell::RefCell<UnsafeSendSync<LLaVA>>,
30
+ tokenizer: TokenizerWrapper,
31
+ cache: std::cell::RefCell<UnsafeSendSync<Cache>>,
32
+ config: LLaVAConfig,
33
+ device: CoreDevice,
34
+ model_id: String,
35
+ image_size: usize,
36
+ eos_token_id: u32,
37
+ }
38
+
39
+ impl VLM {
40
+ pub fn new(model_id: String, device: Option<Device>) -> Result<Self> {
41
+ let device = device.unwrap_or(Device::best()).as_device()?;
42
+ Self::load_model(model_id, device)
43
+ }
44
+
45
+ fn load_model(model_id: String, device: CoreDevice) -> std::result::Result<Self, Error> {
46
+ let ruby = Ruby::get().unwrap();
47
+ let runtime_error = ruby.exception_runtime_error();
48
+
49
+ let result = (|| -> std::result::Result<_, Box<dyn std::error::Error + Send + Sync>> {
50
+ let api = Api::new()?;
51
+ let repo = api.repo(Repo::new(model_id.clone(), RepoType::Model));
52
+
53
+ // Download config files
54
+ let config_filename = repo.get("config.json")?;
55
+ let gen_config_filename = repo.get("generation_config.json")?;
56
+ let preproc_config_filename = repo.get("preprocessor_config.json")?;
57
+ let tokenizer_filename = repo.get("tokenizer.json")?;
58
+
59
+ // Read configs
60
+ let config_str = std::fs::read_to_string(&config_filename)?;
61
+ let gen_config_str = std::fs::read_to_string(&gen_config_filename)?;
62
+ let preproc_config_str = std::fs::read_to_string(&preproc_config_filename)?;
63
+
64
+ // Patch config: some models have null pad_token_id in text_config
65
+ // but candle's HFLLaVATextConfig requires usize. Fix by defaulting to 0.
66
+ let mut config_json: serde_json::Value = serde_json::from_str(&config_str)?;
67
+ let top_pad_id = config_json.get("pad_token_id")
68
+ .and_then(|v| v.as_u64())
69
+ .unwrap_or(0);
70
+ // Patch missing image_grid_pinpoints for LLaVA 1.5
71
+ if config_json.get("image_grid_pinpoints").map_or(true, |v| v.is_null()) {
72
+ config_json["image_grid_pinpoints"] = serde_json::json!([[336, 672], [672, 336], [672, 672]]);
73
+ }
74
+ if let Some(text_config) = config_json.get_mut("text_config") {
75
+ if text_config.get("pad_token_id").map_or(true, |v| v.is_null()) {
76
+ text_config["pad_token_id"] = serde_json::Value::Number(top_pad_id.into());
77
+ }
78
+ }
79
+ let patched_config_str = serde_json::to_string(&config_json)?;
80
+ let hf_config: HFLLaVAConfig = serde_json::from_str(&patched_config_str)?;
81
+ let gen_config: HFGenerationConfig = serde_json::from_str(&gen_config_str)?;
82
+ let preproc_config: HFPreProcessorConfig = serde_json::from_str(&preproc_config_str)?;
83
+
84
+ let image_size = hf_config.vision_config.image_size;
85
+ let eos_token_id = gen_config.eos_token_id as u32;
86
+
87
+ let clip_vision_config = hf_config.to_clip_vision_config();
88
+ let config = hf_config.to_llava_config(&gen_config, &preproc_config);
89
+
90
+ // Load tokenizer
91
+ let tokenizer = Tokenizer::from_file(tokenizer_filename)?;
92
+
93
+ // Download weight files (sharded)
94
+ let weight_files = Self::download_weights(&repo)?;
95
+
96
+ // Load model weights
97
+ let vb = unsafe {
98
+ VarBuilder::from_mmaped_safetensors(&weight_files, DType::F32, &device)?
99
+ };
100
+
101
+ // Load LLaVA model with CLIP vision config
102
+ let model = LLaVA::load(vb, &config, Some(clip_vision_config))?;
103
+
104
+ // Create KV cache for the Llama LLM
105
+ let llama_config = config.to_llama_config();
106
+ let cache = Cache::new(true, DType::F32, &llama_config, &device)?;
107
+
108
+ Ok((model, TokenizerWrapper::new(tokenizer), cache, config, image_size, eos_token_id))
109
+ })();
110
+
111
+ match result {
112
+ Ok((model, tokenizer, cache, config, image_size, eos_token_id)) => {
113
+ Ok(Self {
114
+ model: std::cell::RefCell::new(UnsafeSendSync(model)),
115
+ tokenizer,
116
+ cache: std::cell::RefCell::new(UnsafeSendSync(cache)),
117
+ config,
118
+ device,
119
+ model_id,
120
+ image_size,
121
+ eos_token_id,
122
+ })
123
+ }
124
+ Err(e) => Err(Error::new(runtime_error, format!("Failed to load VLM: {}", e))),
125
+ }
126
+ }
127
+
128
+ fn download_weights(
129
+ repo: &hf_hub::api::sync::ApiRepo,
130
+ ) -> std::result::Result<Vec<std::path::PathBuf>, Box<dyn std::error::Error + Send + Sync>> {
131
+ // Try single file first
132
+ if let Ok(path) = repo.get("model.safetensors") {
133
+ return Ok(vec![path]);
134
+ }
135
+
136
+ // Try to get the index file for sharded weights
137
+ let index_path = repo.get("model.safetensors.index.json")?;
138
+ let index_str = std::fs::read_to_string(&index_path)?;
139
+ let index: serde_json::Value = serde_json::from_str(&index_str)?;
140
+
141
+ let weight_map = index["weight_map"].as_object()
142
+ .ok_or("Missing weight_map in index")?;
143
+
144
+ let mut filenames: Vec<String> = weight_map.values()
145
+ .filter_map(|v| v.as_str().map(String::from))
146
+ .collect();
147
+ filenames.sort();
148
+ filenames.dedup();
149
+
150
+ let mut paths = Vec::new();
151
+ for filename in &filenames {
152
+ let path = repo.get(filename)?;
153
+ paths.push(path);
154
+ }
155
+
156
+ Ok(paths)
157
+ }
158
+
159
+ /// Load and preprocess an image from a file path into a CLIP-ready tensor
160
+ fn load_image(&self, image_path: &str) -> std::result::Result<Tensor, Error> {
161
+ let ruby = Ruby::get().unwrap();
162
+ let runtime_error = ruby.exception_runtime_error();
163
+
164
+ let img = image::open(image_path)
165
+ .map_err(|e| Error::new(runtime_error, format!("Failed to open image: {}", e)))?;
166
+
167
+ // Resize to expected size
168
+ let img = img.resize_exact(
169
+ self.image_size as u32,
170
+ self.image_size as u32,
171
+ image::imageops::FilterType::Triangle,
172
+ );
173
+
174
+ let img = img.to_rgb8();
175
+ let (width, height) = img.dimensions();
176
+ let h = height as usize;
177
+ let w = width as usize;
178
+
179
+ // Convert to CHW format with CLIP normalization
180
+ let mut chw = vec![0f32; 3 * h * w];
181
+ for y in 0..h {
182
+ for x in 0..w {
183
+ let p = img.get_pixel(x as u32, y as u32);
184
+ chw[0 * h * w + y * w + x] = (p[0] as f32 / 255.0 - CLIP_MEAN[0]) / CLIP_STD[0];
185
+ chw[1 * h * w + y * w + x] = (p[1] as f32 / 255.0 - CLIP_MEAN[1]) / CLIP_STD[1];
186
+ chw[2 * h * w + y * w + x] = (p[2] as f32 / 255.0 - CLIP_MEAN[2]) / CLIP_STD[2];
187
+ }
188
+ }
189
+
190
+ Tensor::from_vec(chw, (1, 3, h, w), &self.device)
191
+ .map_err(|e| Error::new(runtime_error, format!("Failed to create image tensor: {}", e)))
192
+ }
193
+
194
+ /// Describe an image
195
+ pub fn describe(&self, image_path: String, max_length: Option<usize>) -> std::result::Result<String, Error> {
196
+ self.ask(image_path, "Describe this image in detail.".to_string(), max_length)
197
+ }
198
+
199
+ /// Ask a question about an image
200
+ pub fn ask(&self, image_path: String, question: String, max_length: Option<usize>) -> std::result::Result<String, Error> {
201
+ let ruby = Ruby::get().unwrap();
202
+ let runtime_error = ruby.exception_runtime_error();
203
+ let max_length = max_length.unwrap_or(256);
204
+
205
+ // Load and preprocess image
206
+ let image_tensor = self.load_image(&image_path)?;
207
+
208
+ // Build prompt with image token placeholder
209
+ // LLaVA 1.5 HF format: USER: <image>\n{question}\nASSISTANT:
210
+ let prompt = format!("USER: <image>\n{}\nASSISTANT:", question);
211
+
212
+ // Tokenize
213
+ let encoding = self.tokenizer.inner().encode(prompt.as_str(), false)
214
+ .map_err(|e| Error::new(runtime_error, format!("Tokenization failed: {}", e)))?;
215
+ let input_ids: Vec<u32> = encoding.get_ids().to_vec();
216
+
217
+ // LLaVA expects I64 input IDs
218
+ let input_ids_i64: Vec<i64> = input_ids.iter().map(|&id| id as i64).collect();
219
+ let input_tensor = Tensor::new(&input_ids_i64[..], &self.device)
220
+ .map_err(|e| Error::new(runtime_error, format!("Failed to create input tensor: {}", e)))?
221
+ .unsqueeze(0)
222
+ .map_err(|e| Error::new(runtime_error, format!("Failed to unsqueeze: {}", e)))?;
223
+
224
+ let mut model_ref = self.model.borrow_mut();
225
+ let model = &mut model_ref.0;
226
+ let mut cache_ref = self.cache.borrow_mut();
227
+ let cache = &mut cache_ref.0;
228
+
229
+ // Prepare multimodal input: merge image features with text embeddings
230
+ let image_size = (self.image_size as u32, self.image_size as u32);
231
+ let input_embeds = model.prepare_inputs_labels_for_multimodal(
232
+ &input_tensor,
233
+ &[image_tensor],
234
+ &[image_size],
235
+ ).map_err(|e| Error::new(runtime_error, format!("Failed to prepare multimodal input: {}", e)))?;
236
+
237
+ // Reset KV cache for fresh generation
238
+ let llama_config = self.config.to_llama_config();
239
+ *cache = Cache::new(true, DType::F32, &llama_config, &self.device)
240
+ .map_err(|e| Error::new(runtime_error, format!("Failed to reset cache: {}", e)))?;
241
+
242
+ // Generate tokens autoregressively
243
+ let mut generated_tokens: Vec<u32> = Vec::new();
244
+ let mut current_embeds = input_embeds;
245
+ let mut pos = 0usize;
246
+
247
+ for _i in 0..max_length {
248
+ let logits = model.forward(&current_embeds, pos, cache)
249
+ .map_err(|e| Error::new(runtime_error, format!("Forward pass failed at pos {}: {}", pos, e)))?;
250
+
251
+ // Advance position by the number of tokens we just processed
252
+ let step_len = current_embeds.dim(1)
253
+ .map_err(|e| Error::new(runtime_error, format!("Failed to get step len: {}", e)))?;
254
+ pos += step_len;
255
+
256
+ // Get logits for last position
257
+ let logits = logits.flatten_all()
258
+ .map_err(|e| Error::new(runtime_error, format!("Failed to flatten: {}", e)))?;
259
+
260
+ // Handle multi-dim logits (take last token if needed)
261
+ let vocab_size = self.config.vocab_size;
262
+ let logits = if logits.elem_count() > vocab_size {
263
+ let n_tokens = logits.elem_count() / vocab_size;
264
+ logits.reshape((n_tokens, vocab_size))
265
+ .map_err(|e| Error::new(runtime_error, format!("Failed to reshape logits: {}", e)))?
266
+ .narrow(0, n_tokens - 1, 1)
267
+ .map_err(|e| Error::new(runtime_error, format!("Failed to narrow logits: {}", e)))?
268
+ .squeeze(0)
269
+ .map_err(|e| Error::new(runtime_error, format!("Failed to squeeze logits: {}", e)))?
270
+ } else {
271
+ logits
272
+ };
273
+
274
+ let logits = logits.to_dtype(DType::F32)
275
+ .map_err(|e| Error::new(runtime_error, format!("Failed to convert dtype: {}", e)))?;
276
+
277
+ // Greedy decoding
278
+ let next_token = logits.argmax(0)
279
+ .map_err(|e| Error::new(runtime_error, format!("Argmax failed: {}", e)))?
280
+ .to_scalar::<u32>()
281
+ .map_err(|e| Error::new(runtime_error, format!("Failed to get token: {}", e)))?;
282
+
283
+ // Check for EOS
284
+ if next_token == self.eos_token_id {
285
+ break;
286
+ }
287
+
288
+ generated_tokens.push(next_token);
289
+
290
+ // For subsequent tokens, embed directly through Llama's embedding layer
291
+ let next_input = Tensor::new(&[next_token as i64], &self.device)
292
+ .map_err(|e| Error::new(runtime_error, format!("Failed to create next input: {}", e)))?
293
+ .unsqueeze(0)
294
+ .map_err(|e| Error::new(runtime_error, format!("Failed to unsqueeze next: {}", e)))?;
295
+
296
+ current_embeds = model.llama.embed(&next_input)
297
+ .map_err(|e| Error::new(runtime_error, format!("Failed to embed next token: {}", e)))?;
298
+ }
299
+
300
+ // Decode generated tokens
301
+ let text = self.tokenizer.inner().decode(&generated_tokens, true)
302
+ .map_err(|e| Error::new(runtime_error, format!("Decoding failed: {}", e)))?;
303
+
304
+ Ok(text.trim().to_string())
305
+ }
306
+
307
+ pub fn model_id(&self) -> String {
308
+ self.model_id.clone()
309
+ }
310
+
311
+ pub fn device(&self) -> Device {
312
+ Device::from_device(&self.device)
313
+ }
314
+
315
+ pub fn tokenizer(&self) -> std::result::Result<crate::ruby::tokenizer::Tokenizer, Error> {
316
+ Ok(crate::ruby::tokenizer::Tokenizer(self.tokenizer.clone()))
317
+ }
318
+ }
319
+
320
+ pub fn init(rb_candle: RModule) -> std::result::Result<(), Error> {
321
+ let ruby = Ruby::get().unwrap();
322
+ let c_vlm = rb_candle.define_class("VLM", ruby.class_object())?;
323
+ c_vlm.define_singleton_method("_create", function!(VLM::new, 2))?;
324
+ c_vlm.define_method("_describe", method!(VLM::describe, 2))?;
325
+ c_vlm.define_method("_ask", method!(VLM::ask, 3))?;
326
+ c_vlm.define_method("model_id", method!(VLM::model_id, 0))?;
327
+ c_vlm.define_method("device", method!(VLM::device, 0))?;
328
+ c_vlm.define_method("tokenizer", method!(VLM::tokenizer, 0))?;
329
+ Ok(())
330
+ }
@@ -1,5 +1,5 @@
1
1
  # :nocov:
2
2
  module Candle
3
- VERSION = "1.6.1"
3
+ VERSION = "1.7.0"
4
4
  end
5
5
  # :nocov:
data/lib/candle/vlm.rb ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Candle
4
+ class VLM
5
+ class << self
6
+ def from_pretrained(model_id, device: nil, **options)
7
+ device_obj = case device
8
+ when "cpu" then Candle::Device.cpu
9
+ when "metal" then Candle::Device.metal
10
+ when "cuda" then Candle::Device.cuda
11
+ when Candle::Device then device
12
+ when nil then nil
13
+ else Candle::Device.best
14
+ end
15
+ _create(model_id, device_obj)
16
+ end
17
+ end
18
+
19
+ def describe(image_path, max_length: 256)
20
+ _describe(image_path, max_length)
21
+ end
22
+
23
+ def ask(image_path, question, max_length: 256)
24
+ _ask(image_path, question, max_length)
25
+ end
26
+
27
+ def inspect
28
+ "#<Candle::VLM model_id=#{model_id.inspect} device=#{device}>"
29
+ end
30
+ end
31
+ end
data/lib/candle.rb CHANGED
@@ -11,4 +11,5 @@ require_relative "candle/llm"
11
11
  require_relative "candle/agent"
12
12
  require_relative "candle/tokenizer"
13
13
  require_relative "candle/ner"
14
+ require_relative "candle/vlm"
14
15
  require_relative "candle/build_info"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-candle
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Petersen
@@ -243,6 +243,7 @@ files:
243
243
  - ext/candle/src/ruby/tensor.rs
244
244
  - ext/candle/src/ruby/tokenizer.rs
245
245
  - ext/candle/src/ruby/utils.rs
246
+ - ext/candle/src/ruby/vlm.rs
246
247
  - ext/candle/src/structured/integration_test.rs
247
248
  - ext/candle/src/structured/mod.rs
248
249
  - ext/candle/src/structured/schema_processor.rs
@@ -268,6 +269,7 @@ files:
268
269
  - lib/candle/tool.rb
269
270
  - lib/candle/tool_call_parser.rb
270
271
  - lib/candle/version.rb
272
+ - lib/candle/vlm.rb
271
273
  - lib/red-candle.rb
272
274
  homepage: https://github.com/scientist-labs/red-candle
273
275
  licenses: