RubyGems - red-candle - Versions diffs - 1.1.2 → 1.2.0 - Mend

red-candle 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/README.md +39 -45
data/Rakefile +79 -88
data/ext/candle/src/lib.rs +2 -4
data/ext/candle/src/llm/quantized_gguf.rs +1 -1
data/ext/candle/src/ruby/device.rs +30 -0
data/ext/candle/src/ruby/embedding_model.rs +74 -28
data/ext/candle/src/ruby/llm.rs +96 -1
data/ext/candle/src/ruby/mod.rs +2 -0
data/ext/candle/src/{ner.rs → ruby/ner.rs} +47 -15
data/ext/candle/src/{reranker.rs → ruby/reranker.rs} +24 -2
data/ext/candle/src/ruby/tensor.rs +101 -26
data/ext/candle/src/ruby/tokenizer.rs +60 -3
data/lib/candle/device_utils.rb +3 -15
data/lib/candle/embedding_model.rb +44 -1
data/lib/candle/llm.rb +63 -1
data/lib/candle/ner.rb +34 -22
data/lib/candle/reranker.rb +20 -1
data/lib/candle/tensor.rb +15 -0
data/lib/candle/version.rb +1 -1
metadata +18 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: cd566594df4f0d3ec8ecd592b1d71610ef0ba2091cd91ea53549405a8c5c9b18
-  data.tar.gz: cfaab403935e927371fbd2f605ea60da3f44effd64950bb132651a5e6437e30c
+  metadata.gz: 3f2d005688d7b0253060d087a9800ea8c1d2c7bbb6ff6c92cca3ebc238d99be3
+  data.tar.gz: 6296db628c2d13a39ef035fe45c41be39de2404e6ab72d9735109ad18879f65c
 SHA512:
-  metadata.gz: b18f384766db5a3de2f794764a8a47d3d69261ae4bf5c93992df7228fe9a5817eb1862b516a9ee3e63b5d349936f46534d50f1378b7932c7ac482550270a8bea
-  data.tar.gz: ce74834cde884bf03e3d163f6d081e87f95db0fba8db24300b8c6aa5b0f1f0162542c836bebb414010534778e394ca36faaec807ff1cc652902c3b318503def5
+  metadata.gz: 4511b6f96d1356101e10547f740702479c894fb6d1e6f8cb04213b49b624ac8dc73d83b8b91bbab396e095fd31bbb4dd019ca967ce7f790447a4b77dd25d3356
+  data.tar.gz: 5a1ef095e2bbd9967317e0c416fb018da2673e18d76e00c98177cdba5dd2f9c5723fc5404b0d4221800227aab8b74e5560d420aff79f94e216365e6d3cee6f1e

data/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# `red-candle` Native LLMs for Ruby 🚀
+<img src="/docs/assets/logo-title.png" alt="red-candle" height="80px">
 [![build](https://github.com/assaydepot/red-candle/actions/workflows/build.yml/badge.svg)](https://github.com/assaydepot/red-candle/actions/workflows/build.yml)
 [![Gem Version](https://badge.fury.io/rb/red-candle.svg)](https://badge.fury.io/rb/red-candle)
@@ -18,7 +18,7 @@ gem install red-candle
 require 'candle'
 # Download a model (one-time, ~650MB) - Mistral, Llama3, Gemma all work!
-llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
                                   gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 # Chat with it - no API calls, running locally in your Ruby process!
@@ -27,8 +27,8 @@ messages = [
 ]
 puts llm.chat(messages)
-# => "Ruby is a dynamic, object-oriented programming language known for its
-#     simplicity, elegance, and productivity, often used for web development
+# => "Ruby is a dynamic, object-oriented programming language known for its
+#     simplicity, elegance, and productivity, often used for web development
 #     with frameworks like Rails."
 ```
@@ -99,22 +99,16 @@ x = x.reshape([3, 2])
 require 'candle'
 # Default model (JinaBERT) on CPU
-model = Candle::EmbeddingModel.new
+model = Candle::EmbeddingModel.from_pretrained
 embedding = model.embedding("Hi there!")
 # Specify device (CPU, Metal, or CUDA)
 device = Candle::Device.cpu     # or Candle::Device.metal, Candle::Device.cuda
-model = Candle::EmbeddingModel.new(
-  model_path: "jinaai/jina-embeddings-v2-base-en",
-  device: device
-)
+model = Candle::EmbeddingModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", device: device)
 embedding = model.embedding("Hi there!")
 # Reranker also supports device selection
-reranker = Candle::Reranker.new(
-  model_path: "cross-encoder/ms-marco-MiniLM-L-12-v2",
-  device: device
-)
+reranker = Candle::Reranker.from_pretrained("cross-encoder/ms-marco-MiniLM-L-12-v2", device: device)
 results = reranker.rerank("query", ["doc1", "doc2", "doc3"])
 ```
@@ -140,8 +134,8 @@ Red-Candle supports quantized models in GGUF format, offering 4-8x memory reduct
 ```ruby
 # Load quantized models - always specify the GGUF filename
-llm = Candle::LLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
-                                  device: device,
+llm = Candle::LLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
+                                  device: device,
                                   gguf_file: "llama-2-7b-chat.Q4_K_M.gguf")
 # Register custom tokenizer mappings for your models
@@ -155,7 +149,7 @@ Candle::LLM.register_tokenizer("my-org/my-model-GGUF", "my-org/my-tokenizer")
 **Memory usage comparison (7B models):**
 - Full precision: ~28 GB
 - Q8_0 (8-bit): ~7 GB - Best quality, larger size
-- Q5_K_M (5-bit): ~4.5 GB - Very good quality
+- Q5_K_M (5-bit): ~4.5 GB - Very good quality
 - Q4_K_M (4-bit): ~4 GB - Recommended default, best balance
 - Q3_K_M (3-bit): ~3 GB - Good for memory-constrained systems
@@ -169,13 +163,13 @@ Candle::LLM.register_tokenizer("my-org/my-model-GGUF", "my-org/my-tokenizer")
 > **Warning**: Q2_K quantization can lead to "weight is negative, too large or not a valid number" errors during inference. Use Q3_K_M or higher for stable operation.
 > ### ⚠️ Huggingface login warning
->
+>
 > Many models, including the one below, require you to agree to the terms. You'll need to:
 > 1. Login to [Huggingface](https://huggingface.co)
 > 2. Agree to the terms. For example: [here](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
 > 3. Authenticate your session. Simplest way is with `huggingface-cli login`. Detail here: [Huggingface CLI](https://huggingface.co/docs/huggingface_hub/en/guides/cli)
 >
-> More details here: [Huggingface Authentication](HUGGINGFACE.md)
+> More details here: [Huggingface Authentication](docs/HUGGINGFACE.md)
 ```ruby
 require 'candle'
@@ -208,7 +202,7 @@ response = llm.chat(messages)
 ### GPU Acceleration
-We see an 18x speed up running LLMs under CUDA vs CPU and a >3x speed up running under Metal vs CPU. Details [here](DEVICE_SUPPORT.md#performance-considerations).
+We see an 18x speed up running LLMs under CUDA vs CPU and a >3x speed up running under Metal vs CPU. Details [here](docs/DEVICE_SUPPORT.md#performance-considerations).
 ```ruby
 # CPU works for all models
@@ -216,7 +210,7 @@ device = Candle::Device.cpu
 llm = Candle::LLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", device: device)
 # Metal
-device = Candle::Device.metal
+device = Candle::Device.metal
 # CUDA support (for NVIDIA GPUs)
 device = Candle::Device.cuda   # Linux/Windows with NVIDIA GPU
@@ -325,9 +319,9 @@ The default model (`jinaai/jina-embeddings-v2-base-en` with the `sentence-transf
 ```ruby
 > require 'candle'
 # Ruby memory = 25.9 MB
-> model = Candle::EmbeddingModel.new
+> model = Candle::EmbeddingModel.from_pretrained
 # Ruby memory = 3.50 GB
-> model2 = Candle::EmbeddingModel.new
+> model2 = Candle::EmbeddingModel.from_pretrained
 # Ruby memory = 7.04 GB
 > model2 = nil
 > GC.start
@@ -353,7 +347,7 @@ And the following ruby:
 ```ruby
 require 'candle'
-model = Candle::EmbeddingModel.new
+model = Candle::EmbeddingModel.from_pretrained
 embedding = model.embedding("Hi there!")
 ```
@@ -367,13 +361,13 @@ Red-Candle includes support for cross-encoder reranking models, which can be use
 require 'candle'
 # Initialize the reranker with a cross-encoder model
-reranker = Candle::Reranker.new(model_path: "cross-encoder/ms-marco-MiniLM-L-12-v2")
+reranker = Candle::Reranker.from_pretrained("cross-encoder/ms-marco-MiniLM-L-12-v2")
 # Define your query and candidate documents
 query = "How many people live in London?"
 documents = [
   "London is known for its financial district",
-  "Around 9 Million people live in London",
+  "Around 9 Million people live in London",
   "The weather in London is often rainy",
   "London is the capital of England"
 ]
@@ -457,7 +451,7 @@ For faster inference on NVIDIA GPUs:
 ```ruby
 # Initialize with CUDA if available (falls back to CPU if not)
-reranker = Candle::Reranker.new(model_path: "cross-encoder/ms-marco-MiniLM-L-12-v2", cuda: true)
+reranker = Candle::Reranker.from_pretrained("cross-encoder/ms-marco-MiniLM-L-12-v2", cuda: true)
 ```
 ### How It Works
@@ -501,7 +495,7 @@ tokens = tokenizer.encode_to_tokens("Hello, world!")
 # Get both IDs and tokens together
 result = tokenizer.encode_with_tokens("preprocessing")
-# => {"ids" => [101, 3653, 22618, 2527, 102],
+# => {"ids" => [101, 3653, 22618, 2527, 102],
 #     "tokens" => ["[CLS]", "prep", "##ro", "##ces", "##sing", "[SEP]"]}
 ```
@@ -563,11 +557,11 @@ llm = Candle::LLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 llm_tokenizer = llm.tokenizer
 # From EmbeddingModel
-embedding_model = Candle::EmbeddingModel.new
+embedding_model = Candle::EmbeddingModel.from_pretrained
 emb_tokenizer = embedding_model.tokenizer
 # From Reranker
-reranker = Candle::Reranker.new(model_path: "cross-encoder/ms-marco-MiniLM-L-12-v2")
+reranker = Candle::Reranker.from_pretrained("cross-encoder/ms-marco-MiniLM-L-12-v2")
 rank_tokenizer = reranker.tokenizer
 ```
@@ -578,7 +572,7 @@ Modern tokenizers split unknown or rare words into subword pieces:
 ```ruby
 # See how words are split into subwords
 result = tokenizer.encode_with_tokens("unbelievable")
-# => {"ids" => [101, 4895, 6499, 102],
+# => {"ids" => [101, 4895, 6499, 102],
 #     "tokens" => ["[CLS]", "un", "##believable", "[SEP]"]}
 # The ## prefix indicates a continuation of the previous token
@@ -589,7 +583,7 @@ complex = tokenizer.encode_to_tokens("preprocessing tokenization")
 ### Use Cases
 - **Token Analysis**: Understand how your text is being processed by models
-- **Debugging**: See why certain inputs might cause unexpected model behavior
+- **Debugging**: See why certain inputs might cause unexpected model behavior
 - **Custom Preprocessing**: Build your own text processing pipelines
 - **Educational**: Teach how modern NLP models handle text
 - **NER Preparation**: Get aligned tokens for named entity recognition tasks
@@ -616,7 +610,7 @@ text = "Apple Inc. was founded by Steve Jobs and Steve Wozniak in Cupertino, Cal
 entities = ner.extract_entities(text)
 entities.each do |entity|
-  puts "#{entity['text']} (#{entity['label']}) - confidence: #{entity['confidence'].round(2)}"
+  puts "#{entity[:text]} (#{entity[:label]}) - confidence: #{entity[:confidence].round(2)}"
 end
 # Output:
 # Apple Inc. (ORG) - confidence: 0.99
@@ -668,8 +662,8 @@ drug_recognizer = Candle::GazetteerEntityRecognizer.new("DRUG")
 drug_recognizer.load_from_file("drug_names.txt")
 # Case-sensitive matching
-product_recognizer = Candle::GazetteerEntityRecognizer.new("PRODUCT",
-  ["iPhone", "iPad", "MacBook"],
+product_recognizer = Candle::GazetteerEntityRecognizer.new("PRODUCT",
+  ["iPhone", "iPad", "MacBook"],
   case_sensitive: true
 )
 ```
@@ -686,7 +680,7 @@ hybrid = Candle::HybridNER.new("Babelscape/wikineural-multilingual-ner")
 hybrid.add_pattern_recognizer("EMAIL", [/\b[\w._%+-]+@[\w.-]+\.[A-Z|a-z]{2,}\b/])
 hybrid.add_pattern_recognizer("PHONE", [/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/])
-# Add gazetteer recognizers
+# Add gazetteer recognizers
 hybrid.add_gazetteer_recognizer("COMPANY", ["Apple", "Google", "Microsoft"])
 hybrid.add_gazetteer_recognizer("PRODUCT", ["iPhone", "Android", "Windows"])
@@ -739,7 +733,7 @@ ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner")
 # English NER (requires separate tokenizer)
 ner = Candle::NER.from_pretrained("dslim/bert-base-NER", tokenizer: "bert-base-cased")
-# Multilingual NER
+# Multilingual NER
 ner = Candle::NER.from_pretrained("Davlan/bert-base-multilingual-cased-ner-hrl")
 # OntoNotes 5 (18 entity types including DATE, TIME, MONEY, etc.)
@@ -758,9 +752,9 @@ ner = Candle::NER.from_pretrained("allenai/scibert_scivocab_uncased")
    ```
 2. **Batch Processing**: Process multiple texts together when possible
 3. **Confidence Threshold**: Balance precision/recall with appropriate thresholds
 4. **Entity Resolution**: The hybrid NER automatically handles overlapping entities
 ### Output Format
@@ -772,7 +766,7 @@ All NER methods return entities in a consistent format:
   "text" => "Apple Inc.",          # The entity text
   "label" => "ORG",               # Entity type
   "start" => 0,                   # Character start position
-  "end" => 10,                    # Character end position
+  "end" => 10,                    # Character end position
   "confidence" => 0.99,           # Confidence score (0-1)
   "token_start" => 0,             # Token start index (model-based only)
   "token_end" => 2,               # Token end index (model-based only)
@@ -799,8 +793,8 @@ All NER methods return entities in a consistent format:
 - Q3_K_M (3-bit) - Minimum recommended quantization
 ```ruby
-llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-                                  device: device,
+llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+                                  device: device,
                                   gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
 ```
@@ -817,7 +811,7 @@ Failed to load quantized model: cannot find tensor model.embed_tokens.weight (Ru
 1. Ensure you're using the latest version of red-candle (1.0.0 or higher)
 2. Make sure to specify the exact GGUF filename:
    ```ruby
-   llm = Candle::LLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+   llm = Candle::LLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
                                      device: device,
                                      gguf_file: "mistral-7b-instruct-v0.2.Q4_K_M.gguf")
    ```
@@ -835,8 +829,8 @@ Failed to load quantized model: No GGUF file found in repository TheBloke/model-
 **Solution:** Specify the exact GGUF filename:
 ```ruby
 # Visit the HuggingFace repository to find the exact filename
-llm = Candle::LLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
-                                  device: device,
+llm = Candle::LLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
+                                  device: device,
                                   gguf_file: "llama-2-7b-chat.Q4_K_M.gguf")
 ```
@@ -864,7 +858,7 @@ Failed to load GGUF model: cannot find llama.attention.head_count in metadata (R
 **Cause:** Some GGUF files may have been created with older conversion tools that don't include all required metadata fields.
-**Solution:**
+**Solution:**
 - Try a different GGUF file from the same model
 - Look for GGUF files from TheBloke or other reputable sources
 - Check if a newer version of the GGUF file is available

data/Rakefile CHANGED Viewed

@@ -1,22 +1,10 @@
 # frozen_string_literal: true
 require "bundler/gem_tasks"
-require "rake/testtask"
 require "rake/extensiontask"
+require "rspec/core/rake_task"
-task default: :test
-Rake::TestTask.new do |t|
-  t.deps << :compile
-  t.libs << "test"
-  t.test_files = FileList["test/**/*_test.rb"]
-    .exclude("test/benchmarks/**/*_test.rb")
-    .exclude("test/llm/llm_test.rb")
-    .exclude("test/llm/gemma_test.rb")
-    .exclude("test/llm/mistral_test.rb")
-    .exclude("test/llm/llama_test.rb")
-    .exclude("test/llm/phi_test.rb")
-    .exclude("test/llm/qwen_test.rb")
-end
+task default: :spec
 spec = Bundler.load_gemspec("candle.gemspec")
 Rake::ExtensionTask.new("candle", spec) do |c|
@@ -33,80 +21,6 @@ Rake::ExtensionTask.new("candle", spec) do |c|
   ]
 end
-desc "Run device compatibility tests"
-Rake::TestTask.new("test:device") do |t|
-  t.deps << :compile
-  t.libs << "test"
-  t.test_files = FileList["test/device_compatibility_test.rb"]
-  t.verbose = true
-end
-desc "Run benchmark tests"
-Rake::TestTask.new("test:benchmark") do |t|
-  t.deps << :compile
-  t.libs << "test"
-  t.test_files = FileList["test/benchmarks/**/*_test.rb"]
-  t.verbose = true
-end
-desc "Run all tests including benchmarks"
-task "test:all" => [:test, "test:benchmark"]
-desc "Run tests on specific devices"
-namespace :test do
-  %w[cpu metal cuda].each do |device|
-    desc "Run tests on #{device.upcase} only"
-    task "device:#{device}" => :compile do
-      ENV['CANDLE_TEST_DEVICES'] = device
-      Rake::Task["test:device"].invoke
-    end
-  end
-end
-desc "Run benchmarks with device tests"
-task "test:device:benchmark" => :compile do
-  ENV['CANDLE_TEST_VERBOSE'] = 'true'
-  Rake::Task["test:device"].invoke
-  Rake::Task["test:benchmark"].invoke
-end
-desc "Run LLM tests for specific models"
-namespace :test do
-  namespace :llm do
-    desc "Run tests for Gemma models"
-    task :gemma => :compile do
-      ruby "-Itest", "test/llm/gemma_test.rb"
-    end
-    desc "Run tests for Phi models"
-    task :phi => :compile do
-      ruby "-Itest", "test/llm/phi_test.rb"
-    end
-    desc "Run tests for Qwen models"
-    task :qwen => :compile do
-      ruby "-Itest", "test/llm/qwen_test.rb"
-    end
-    desc "Run tests for Mistral models"
-    task :mistral => :compile do
-      ruby "-Itest", "test/llm/mistral_test.rb"
-    end
-    desc "Run tests for Llama models"
-    task :llama => :compile do
-      ruby "-Itest", "test/llm/llama_test.rb"
-    end
-    desc "Run tests for TinyLlama models"
-    task :tinyllama => :compile do
-      ruby "-Itest", "test/llm/tinyllama_test.rb"
-    end
-    desc "Run all LLM tests (WARNING: downloads large models)"
-    task :all => [:gemma, :phi, :qwen, :mistral, :llama]
-  end
-end
 namespace :doc do
   task default: %i[rustdoc yard]
@@ -174,3 +88,80 @@ end
 desc "Run Rust tests with coverage (alias)"
 task "coverage:rust" => "rust:coverage:html"
+# RSpec tasks
+desc "Run RSpec tests"
+RSpec::Core::RakeTask.new(:spec) do |t|
+  t.rspec_opts = "--format progress"
+end
+# Add compile as a dependency for spec task
+task spec: :compile
+namespace :spec do
+  desc "Run RSpec tests with all devices"
+  RSpec::Core::RakeTask.new(:device) do |t|
+    t.rspec_opts = "--format documentation --tag device"
+  end
+  desc "Run RSpec tests with coverage"
+  task :coverage do
+    ENV['COVERAGE'] = 'true'
+    Rake::Task["spec"].invoke
+  end
+  desc "Run RSpec tests in parallel (requires parallel_tests gem)"
+  task :parallel do
+    begin
+      require 'parallel_tests'
+      sh "parallel_rspec spec/"
+    rescue LoadError
+      puts "parallel_tests gem not installed. Run: gem install parallel_tests"
+    end
+  end
+  desc "Run specific device tests"
+  %w[cpu metal cuda].each do |device|
+    desc "Run tests on #{device.upcase} only"
+    task "device:#{device}" => :compile do
+      ENV['CANDLE_TEST_DEVICES'] = device
+      sh "rspec spec/device_compatibility_spec.rb --format documentation"
+    end
+  end
+  desc "Run LLM tests for specific models"
+  namespace :llm do
+    desc "Run tests for Gemma models"
+    task :gemma => :compile do
+      sh "rspec spec/llm/gemma_spec.rb --format documentation"
+    end
+    desc "Run tests for Phi models"
+    task :phi => :compile do
+      sh "rspec spec/llm/phi_spec.rb --format documentation"
+    end
+    desc "Run tests for Qwen models"
+    task :qwen => :compile do
+      sh "rspec spec/llm/qwen_spec.rb --format documentation"
+    end
+    desc "Run tests for Mistral models"
+    task :mistral => :compile do
+      sh "rspec spec/llm/mistral_spec.rb --format documentation"
+    end
+    desc "Run tests for Llama models"
+    task :llama => :compile do
+      sh "rspec spec/llm/llama_spec.rb --format documentation"
+    end
+    desc "Run tests for TinyLlama models"
+    task :tinyllama => :compile do
+      sh "rspec spec/llm/tinyllama_spec.rb --format documentation"
+    end
+    desc "Run all LLM tests (WARNING: requires large models already downloaded)"
+    task :all => [:gemma, :phi, :qwen, :mistral, :llama, :tinyllama]
+  end
+end

data/ext/candle/src/lib.rs CHANGED Viewed

@@ -4,8 +4,6 @@ use crate::ruby::candle_utils;
 use crate::ruby::Result;
 pub mod llm;
-pub mod ner;
-pub mod reranker;
 pub mod ruby;
 pub mod structured;
 pub mod tokenizer;
@@ -44,8 +42,8 @@ fn init(ruby: &Ruby) -> Result<()> {
     ruby::init_embedding_model(rb_candle)?;
     ruby::init_llm(rb_candle)?;
-    ner::init(rb_candle)?;
-    reranker::init(rb_candle)?;
+    ruby::ner::init(rb_candle)?;
+    ruby::reranker::init(rb_candle)?;
     ruby::dtype::init(rb_candle)?;
     ruby::device::init(rb_candle)?;
     ruby::tensor::init(rb_candle)?;

data/ext/candle/src/llm/quantized_gguf.rs CHANGED Viewed

@@ -18,7 +18,7 @@ pub struct QuantizedGGUF {
     device: Device,
     model_id: String,
     eos_token_id: u32,
-    architecture: String,
+    pub architecture: String,
     _chat_template: Option<String>,
 }

data/ext/candle/src/ruby/device.rs CHANGED Viewed

@@ -53,6 +53,30 @@ pub fn default_device() -> Device {
     }
 }
+/// Get the best available device by checking runtime availability
+pub fn best_device() -> Device {
+    // Try devices in order of preference
+    #[cfg(feature = "metal")]
+    {
+        // Check if Metal is actually available at runtime
+        if CoreDevice::new_metal(0).is_ok() {
+            return Device::Metal;
+        }
+    }
+    #[cfg(feature = "cuda")]
+    {
+        // Check if CUDA is actually available at runtime
+        if CoreDevice::new_cuda(0).is_ok() {
+            return Device::Cuda;
+        }
+    }
+    // Always fall back to CPU
+    Device::Cpu
+}
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 #[magnus::wrap(class = "Candle::Device")]
 pub enum Device {
@@ -66,6 +90,11 @@ impl Device {
     pub fn cpu() -> Self {
         Self::Cpu
     }
+    /// Get the best available device
+    pub fn best() -> Self {
+        best_device()
+    }
     /// Create a CUDA device (GPU)
     pub fn cuda() -> Result<Self> {
@@ -195,6 +224,7 @@ pub fn init(rb_candle: RModule) -> Result<()> {
     rb_device.define_singleton_method("metal", function!(Device::metal, 0))?;
     rb_device.define_singleton_method("available_devices", function!(available_devices, 0))?;
     rb_device.define_singleton_method("default", function!(default_device, 0))?;
+    rb_device.define_singleton_method("best", function!(best_device, 0))?;
     rb_device.define_method("to_s", method!(Device::__str__, 0))?;
     rb_device.define_method("inspect", method!(Device::__repr__, 0))?;
     rb_device.define_method("==", method!(Device::__eq__, 1))?;