red-candle 1.0.0.pre.7 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -10
  3. data/README.md +399 -18
  4. data/ext/candle/src/lib.rs +6 -3
  5. data/ext/candle/src/llm/gemma.rs +5 -0
  6. data/ext/candle/src/llm/llama.rs +5 -0
  7. data/ext/candle/src/llm/mistral.rs +5 -0
  8. data/ext/candle/src/llm/mod.rs +1 -89
  9. data/ext/candle/src/llm/quantized_gguf.rs +5 -0
  10. data/ext/candle/src/ner.rs +423 -0
  11. data/ext/candle/src/reranker.rs +24 -21
  12. data/ext/candle/src/ruby/device.rs +6 -6
  13. data/ext/candle/src/ruby/dtype.rs +4 -4
  14. data/ext/candle/src/ruby/embedding_model.rs +36 -33
  15. data/ext/candle/src/ruby/llm.rs +31 -13
  16. data/ext/candle/src/ruby/mod.rs +1 -2
  17. data/ext/candle/src/ruby/tensor.rs +66 -66
  18. data/ext/candle/src/ruby/tokenizer.rs +269 -0
  19. data/ext/candle/src/ruby/utils.rs +6 -24
  20. data/ext/candle/src/tokenizer/loader.rs +108 -0
  21. data/ext/candle/src/tokenizer/mod.rs +103 -0
  22. data/ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt +1 -0
  23. data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/common.rs +355 -0
  24. data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/dynamic.rs +276 -0
  25. data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/macros.rs +49 -0
  26. data/ext/candle/target/release/build/pulp-1b95cfe377eede97/out/x86_64_asm.rs +2748 -0
  27. data/ext/candle/target/release/build/rb-sys-f8ac4edc30ab3e53/out/bindings-0.9.116-mri-arm64-darwin24-3.3.0.rs +8902 -0
  28. data/lib/candle/build_info.rb +2 -0
  29. data/lib/candle/device_utils.rb +2 -0
  30. data/lib/candle/ner.rb +345 -0
  31. data/lib/candle/reranker.rb +1 -1
  32. data/lib/candle/tensor.rb +2 -0
  33. data/lib/candle/tokenizer.rb +139 -0
  34. data/lib/candle/version.rb +4 -2
  35. data/lib/candle.rb +2 -0
  36. metadata +128 -5
  37. data/ext/candle/src/ruby/qtensor.rs +0 -69
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6bea0c9f27d5bbbc43e0c2e8fa5456b2b79511f164f7083512025538ca172077
4
- data.tar.gz: b591f559c63c1bdb9fa96beaf1079186284b219e2ceba50d8e2891fa4f3096bd
3
+ metadata.gz: 52c635f005d25a305f99781763a4a3cc03f85fc5b74f0e576e51973ef8306fac
4
+ data.tar.gz: 1a0ac260a3803f1920ba2d9f71ec361013ae1eb99cf2caed62c0e9aecc583e96
5
5
  SHA512:
6
- metadata.gz: 380cde32abf995f9766056638d4d0612acef746a84b31451c0aa2fb8cd205fc76b76b8909729a9af267ee6c549f9149253b2bfb70717385b5afe2cd2ad0b3152
7
- data.tar.gz: 7acdfc4de263501be51e3f74933c528b9d723b4da93e78e3abae2454dc2746fb7696c09722fe6666084a48dff098501b2d1af503963b5d739e879bd0fcb96e43
6
+ metadata.gz: d301e6ed0fe8ac144c0735288c687f5dd74e7967dbe5d357e550ca5d467f6a33017b2bd9e7f46081711b6bf13555caa3e044183cd74cfaa89151e15c8cdb04a4
7
+ data.tar.gz: d296c35002b6d0ed919176375e5cc5d93c70fae0c0ae9a02d5cf86b8a4a49a67898c7fbc96e16350bba6792b18792126c976de156fb854b9b4f3260fa052cd79
data/Gemfile CHANGED
@@ -1,12 +1,3 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- gemspec
4
-
5
- gem "minitest"
6
- gem "rake"
7
- gem "rake-compiler"
8
-
9
- gem "yard", require: false
10
- gem "yard-rustdoc", require: false
11
-
12
- gem "redcarpet", "~> 3.6"
3
+ gemspec
data/README.md CHANGED
@@ -1,9 +1,66 @@
1
- # red-candle
1
+ # `red-candle` Native LLMs for Ruby 🚀
2
2
 
3
3
  [![build](https://github.com/assaydepot/red-candle/actions/workflows/build.yml/badge.svg)](https://github.com/assaydepot/red-candle/actions/workflows/build.yml)
4
4
  [![Gem Version](https://badge.fury.io/rb/red-candle.svg)](https://badge.fury.io/rb/red-candle)
5
5
 
6
- [candle](https://github.com/huggingface/candle) - Minimalist ML framework - for Ruby
6
+ Run state-of-the-art **language models directly from Ruby**. No Python, no APIs, no external services - just Ruby with blazing-fast Rust under the hood. Hardware accelerated with **Metal (Mac)** and **CUDA (NVIDIA).**
7
+
8
+ ## Install & Chat in 30 Seconds
9
+
10
+ [![red-candle quickstart](https://img.youtube.com/vi/hbyFCyh8esk/0.jpg)](https://www.youtube.com/watch?v=hbyFCyh8esk)
11
+
12
+ ```bash
13
+ # Install the gem
14
+ gem install red-candle
15
+ ```
16
+
17
+ ```ruby
18
+ require 'candle'
19
+
20
+ # Download a model (one-time, ~650MB) - Mistral, Llama3, Gemma all work!
21
+ llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
22
+ gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
23
+
24
+ # Chat with it - no API calls, running locally in your Ruby process!
25
+ messages = [
26
+ { role: "user", content: "Explain Ruby in one sentence" }
27
+ ]
28
+
29
+ puts llm.chat(messages)
30
+ # => "Ruby is a dynamic, object-oriented programming language known for its
31
+ # simplicity, elegance, and productivity, often used for web development
32
+ # with frameworks like Rails."
33
+ ```
34
+
35
+ ## What Just Happened?
36
+
37
+ You just ran a 1.1-billion parameter AI model inside Ruby. The model lives in your process memory, runs on your hardware (CPU/GPU), and responds instantly without network latency.
38
+
39
+ ## Stream Responses Like a Pro
40
+
41
+ ```ruby
42
+ # Watch the AI think in real-time
43
+ llm.chat_stream(messages) do |token|
44
+ print token
45
+ end
46
+ ```
47
+
48
+ ## Why This Matters
49
+
50
+ - **Privacy**: Your data never leaves your machine
51
+ - **Speed**: No network overhead, direct memory access
52
+ - **Control**: Fine-tune generation parameters, access raw tokens
53
+ - **Integration**: It's just Ruby objects - use it anywhere Ruby runs
54
+
55
+ ## Supports
56
+
57
+ - **Tokenizers**: Access the tokenizer directly
58
+ - **EmbeddingModel**: Generate embeddings for text
59
+ - **Reranker**: Rerank documents based on relevance
60
+ - **NER**: Named Entity Recognition directly from Ruby
61
+ - **LLM**: Chat with Large Language Models (e.g., Llama, Mistral, Gemma)
62
+
63
+ ----
7
64
 
8
65
  ## Usage
9
66
 
@@ -127,6 +184,8 @@ response = llm.chat(messages)
127
184
 
128
185
  ### GPU Acceleration
129
186
 
187
+ We see an 18x speed up running LLMs under CUDA vs CPU and a >3x speed up running under Metal vs CPU. Details [here](DEVICE_SUPPORT.md#performance-considerations).
188
+
130
189
  ```ruby
131
190
  # CPU works for all models
132
191
  device = Candle::Device.cpu
@@ -166,9 +225,11 @@ This is particularly useful for:
166
225
  - Troubleshooting generation problems
167
226
  - Analyzing model behavior
168
227
 
169
- ## ⚠️ Model Format Requirement: Safetensors Only
228
+ ## ⚠️ Model Format Requirements
170
229
 
171
- Red-Candle **only supports embedding models that provide their weights in the [safetensors](https://github.com/huggingface/safetensors) format** (i.e., the model repo must contain a `model.safetensors` file). If the model repo does not provide the required file, loading will fail with a clear error. Most official BERT and DistilBERT models do **not** provide safetensors; many Sentence Transformers and JinaBERT models do.
230
+ ### EmbeddingModels and Rerankers: Safetensors Only
231
+
232
+ Red-Candle **only supports embedding models and rerankers that provide their weights in the [safetensors](https://github.com/huggingface/safetensors) format** (i.e., the model repo must contain a `model.safetensors` file). If the model repo does not provide the required file, loading will fail with a clear error. Most official BERT and DistilBERT models do **not** provide safetensors; many Sentence Transformers and JinaBERT models do.
172
233
 
173
234
  **If you encounter an error like:**
174
235
 
@@ -178,13 +239,22 @@ RuntimeError: model.safetensors not found after download. Only safetensors model
178
239
 
179
240
  this means the selected model is not compatible. Please choose a model repo that provides the required file.
180
241
 
242
+ ### LLMs: Safetensors and GGUF Support
243
+
244
+ LLM models support two formats:
245
+ 1. **Safetensors format** - Standard HuggingFace models (e.g., `TinyLlama/TinyLlama-1.1B-Chat-v1.0`)
246
+ 2. **GGUF quantized format** - Memory-efficient quantized models (e.g., `TheBloke/Llama-2-7B-Chat-GGUF`)
247
+
248
+ See the [Quantized Model Support](#quantized-model-support-gguf) section for details on using GGUF models.
249
+
181
250
  ## Supported Embedding Models
182
251
 
183
252
  Red-Candle supports the following embedding model types from Hugging Face:
184
253
 
185
254
  1. `Candle::EmbeddingModelType::JINA_BERT` - Jina BERT models (e.g., `jinaai/jina-embeddings-v2-base-en`) (**safetensors required**)
186
- 2. `Candle::EmbeddingModelType::STANDARD_BERT` - Standard BERT models (e.g., `sentence-transformers/all-MiniLM-L6-v2`) (**safetensors required**)
255
+ 2. `Candle::EmbeddingModelType::MINILM` - MINILM models (e.g., `sentence-transformers/all-MiniLM-L6-v2`) (**safetensors required**)
187
256
  3. `Candle::EmbeddingModelType::DISTILBERT` - DistilBERT models (e.g., `distilbert-base-uncased-finetuned-sst-2-english`) (**safetensors required**)
257
+ 4. `Candle::EmbeddingModelType::STANDARD_BERT` - Standard BERT models (e.g., `scientistcom/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext`) (**safetensors required**)
188
258
 
189
259
  > **Note:** Most official BERT and DistilBERT models do _not_ provide safetensors. Please check the model repo before use.
190
260
 
@@ -260,7 +330,7 @@ ranked_results = reranker.rerank(query, documents, pooling_method: "pooler", app
260
330
  # Or apply sigmoid activation to get scores between 0 and 1
261
331
  sigmoid_results = reranker.rerank(query, documents, pooling_method: "pooler", apply_sigmoid: true)
262
332
 
263
- # The pooler method is the default and is recommended for cross-encoders, as is apply_sigmod, so the above is the same as:
333
+ # The pooler method is the default and is recommended for cross-encoders, as is apply_sigmoid, so the above is the same as:
264
334
  ranked_results = reranker.rerank(query, documents)
265
335
 
266
336
  # Results are returned as an array of hashes, sorted by relevance
@@ -351,9 +421,314 @@ The reranker uses a BERT-based architecture that:
351
421
 
352
422
  This joint processing allows cross-encoders to capture subtle semantic relationships between queries and documents, making them more accurate for reranking tasks, though at the cost of higher computational requirements.
353
423
 
424
+ ## Tokenizer
425
+
426
+ Red-Candle provides direct access to tokenizers for text preprocessing and analysis. This is useful for understanding how models process text, debugging issues, and building custom NLP pipelines.
427
+
428
+ ### Basic Usage
429
+
430
+ ```ruby
431
+ require 'candle'
432
+
433
+ # Load a tokenizer from HuggingFace
434
+ tokenizer = Candle::Tokenizer.from_pretrained("bert-base-uncased")
435
+
436
+ # Encode text to token IDs
437
+ token_ids = tokenizer.encode("Hello, world!")
438
+ # => [101, 7592, 1010, 2088, 999, 102]
439
+
440
+ # Decode token IDs back to text
441
+ text = tokenizer.decode(token_ids)
442
+ # => "hello, world!"
443
+
444
+ # Get token strings (subwords) - useful for visualization
445
+ tokens = tokenizer.encode_to_tokens("Hello, world!")
446
+ # => ["[CLS]", "hello", ",", "world", "!", "[SEP]"]
447
+
448
+ # Get both IDs and tokens together
449
+ result = tokenizer.encode_with_tokens("preprocessing")
450
+ # => {"ids" => [101, 3653, 22618, 2527, 102],
451
+ # "tokens" => ["[CLS]", "prep", "##ro", "##ces", "##sing", "[SEP]"]}
452
+ ```
453
+
454
+ ### Batch Processing
455
+
456
+ ```ruby
457
+ # Encode multiple texts at once
458
+ texts = ["Hello world", "How are you?", "Tokenizers are cool"]
459
+ batch_ids = tokenizer.encode_batch(texts)
460
+
461
+ # Get token strings for multiple texts
462
+ batch_tokens = tokenizer.encode_batch_to_tokens(texts)
463
+ ```
464
+
465
+ ### Vocabulary Access
466
+
467
+ ```ruby
468
+ # Get vocabulary size
469
+ vocab_size = tokenizer.vocab_size
470
+ # => 30522
471
+
472
+ # Get full vocabulary as a hash
473
+ vocab = tokenizer.get_vocab
474
+ # vocab["hello"] => 7592
475
+
476
+ # Convert a specific token ID to its string
477
+ token_str = tokenizer.id_to_token(7592)
478
+ # => "hello"
479
+
480
+ # Get special tokens
481
+ special = tokenizer.get_special_tokens
482
+ # => {"cls_token" => 101, "sep_token" => 102, "pad_token" => 0, ...}
483
+ ```
484
+
485
+ ### Configuration
486
+
487
+ ```ruby
488
+ # Create a tokenizer with padding enabled
489
+ padded_tokenizer = tokenizer.with_padding(length: 128)
490
+
491
+ # Create a tokenizer with truncation
492
+ truncated_tokenizer = tokenizer.with_truncation(512)
493
+
494
+ # Configure padding with more options
495
+ padded_tokenizer = tokenizer.with_padding(
496
+ length: 128, # Fixed length padding
497
+ direction: "right", # Pad on the right (default)
498
+ pad_token: "[PAD]" # Padding token
499
+ )
500
+ ```
501
+
502
+ ### Model Integration
503
+
504
+ All models expose their tokenizers:
505
+
506
+ ```ruby
507
+ # From LLM
508
+ llm = Candle::LLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
509
+ llm_tokenizer = llm.tokenizer
510
+
511
+ # From EmbeddingModel
512
+ embedding_model = Candle::EmbeddingModel.new
513
+ emb_tokenizer = embedding_model.tokenizer
514
+
515
+ # From Reranker
516
+ reranker = Candle::Reranker.new(model_path: "cross-encoder/ms-marco-MiniLM-L-12-v2")
517
+ rank_tokenizer = reranker.tokenizer
518
+ ```
519
+
520
+ ### Understanding Subword Tokenization
521
+
522
+ Modern tokenizers split unknown or rare words into subword pieces:
523
+
524
+ ```ruby
525
+ # See how words are split into subwords
526
+ result = tokenizer.encode_with_tokens("unbelievable")
527
+ # => {"ids" => [101, 4895, 6499, 102],
528
+ # "tokens" => ["[CLS]", "un", "##believable", "[SEP]"]}
529
+
530
+ # The ## prefix indicates a continuation of the previous token
531
+ complex = tokenizer.encode_to_tokens("preprocessing tokenization")
532
+ # => ["[CLS]", "prep", "##ro", "##ces", "##sing", "token", "##ization", "[SEP]"]
533
+ ```
534
+
535
+ ### Use Cases
536
+
537
+ - **Token Analysis**: Understand how your text is being processed by models
538
+ - **Debugging**: See why certain inputs might cause unexpected model behavior
539
+ - **Custom Preprocessing**: Build your own text processing pipelines
540
+ - **Educational**: Teach how modern NLP models handle text
541
+ - **NER Preparation**: Get aligned tokens for named entity recognition tasks
542
+
543
+ ## Named Entity Recognition (NER)
544
+
545
+ Red-Candle includes comprehensive Named Entity Recognition capabilities for extracting entities like people, organizations, locations, and custom entity types from text.
546
+
547
+ ### Model-based NER
548
+
549
+ Load pre-trained NER models from HuggingFace:
550
+
551
+ ```ruby
552
+ require 'candle'
553
+
554
+ # Load a pre-trained NER model
555
+ ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner")
556
+
557
+ # Or load a model with a specific tokenizer (for models without tokenizer.json)
558
+ ner = Candle::NER.from_pretrained("dslim/bert-base-NER", tokenizer: "bert-base-cased")
559
+
560
+ # Extract entities from text
561
+ text = "Apple Inc. was founded by Steve Jobs and Steve Wozniak in Cupertino, California."
562
+ entities = ner.extract_entities(text)
563
+
564
+ entities.each do |entity|
565
+ puts "#{entity['text']} (#{entity['label']}) - confidence: #{entity['confidence'].round(2)}"
566
+ end
567
+ # Output:
568
+ # Apple Inc. (ORG) - confidence: 0.99
569
+ # Steve Jobs (PER) - confidence: 0.99
570
+ # Steve Wozniak (PER) - confidence: 0.98
571
+ # Cupertino (LOC) - confidence: 0.97
572
+ # California (LOC) - confidence: 0.98
573
+
574
+ # Adjust confidence threshold (default: 0.9)
575
+ entities = ner.extract_entities(text, confidence_threshold: 0.95)
576
+
577
+ # Get token-level predictions for detailed analysis
578
+ tokens = ner.predict_tokens(text)
579
+ ```
580
+
581
+ ### Pattern-based Recognition
582
+
583
+ For domain-specific entities, use regex patterns:
584
+
585
+ ```ruby
586
+ # Create pattern-based recognizers
587
+ email_recognizer = Candle::PatternEntityRecognizer.new("EMAIL", [
588
+ /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/
589
+ ])
590
+
591
+ phone_recognizer = Candle::PatternEntityRecognizer.new("PHONE", [
592
+ /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, # 555-123-4567
593
+ /\b\(\d{3}\)\s*\d{3}[-.]?\d{4}\b/, # (555) 123-4567
594
+ /\b\+1\s*\d{3}[-.]?\d{3}[-.]?\d{4}\b/ # +1 555-123-4567
595
+ ])
596
+
597
+ # Extract entities
598
+ text = "Contact us at info@example.com or call 555-123-4567"
599
+ email_entities = email_recognizer.recognize(text)
600
+ phone_entities = phone_recognizer.recognize(text)
601
+ ```
602
+
603
+ ### Gazetteer-based Recognition
604
+
605
+ Use dictionaries for known entities:
606
+
607
+ ```ruby
608
+ # Create gazetteer recognizers
609
+ companies = ["Apple", "Google", "Microsoft", "Amazon", "Tesla"]
610
+ company_recognizer = Candle::GazetteerEntityRecognizer.new("COMPANY", companies)
611
+
612
+ # Load from file
613
+ drug_recognizer = Candle::GazetteerEntityRecognizer.new("DRUG")
614
+ drug_recognizer.load_from_file("drug_names.txt")
615
+
616
+ # Case-sensitive matching
617
+ product_recognizer = Candle::GazetteerEntityRecognizer.new("PRODUCT",
618
+ ["iPhone", "iPad", "MacBook"],
619
+ case_sensitive: true
620
+ )
621
+ ```
622
+
623
+ ### Hybrid NER
624
+
625
+ Combine ML models with rule-based approaches for best results:
626
+
627
+ ```ruby
628
+ # Create hybrid NER system
629
+ hybrid = Candle::HybridNER.new("Babelscape/wikineural-multilingual-ner")
630
+
631
+ # Add pattern recognizers
632
+ hybrid.add_pattern_recognizer("EMAIL", [/\b[\w._%+-]+@[\w.-]+\.[A-Z|a-z]{2,}\b/])
633
+ hybrid.add_pattern_recognizer("PHONE", [/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/])
634
+
635
+ # Add gazetteer recognizers
636
+ hybrid.add_gazetteer_recognizer("COMPANY", ["Apple", "Google", "Microsoft"])
637
+ hybrid.add_gazetteer_recognizer("PRODUCT", ["iPhone", "Android", "Windows"])
638
+
639
+ # Extract all entities
640
+ text = "John Smith (john@apple.com) from Apple called about the new iPhone. Reach him at 555-0123."
641
+ entities = hybrid.extract_entities(text)
642
+
643
+ # Results include entities from all recognizers
644
+ # Overlapping entities are automatically resolved (highest confidence wins)
645
+ ```
646
+
647
+ ### Custom Entity Types
648
+
649
+ Perfect for specialized domains:
650
+
651
+ ```ruby
652
+ # Biomedical entities
653
+ gene_patterns = [
654
+ /\b[A-Z][A-Z0-9]{2,}\b/, # TP53, BRCA1, EGFR
655
+ /\bCD\d+\b/, # CD4, CD8, CD34
656
+ /\b[A-Z]+\d[A-Z]\d*\b/ # RAD51C, PALB2
657
+ ]
658
+ gene_recognizer = Candle::PatternEntityRecognizer.new("GENE", gene_patterns)
659
+
660
+ # Financial entities
661
+ ticker_patterns = [
662
+ /\$[A-Z]{1,5}\b/, # $AAPL, $GOOGL
663
+ /\b[A-Z]{1,5}\.NYSE\b/, # AAPL.NYSE
664
+ /\b[A-Z]{1,5}\.NASDAQ\b/ # GOOGL.NASDAQ
665
+ ]
666
+ ticker_recognizer = Candle::PatternEntityRecognizer.new("TICKER", ticker_patterns)
667
+
668
+ # Legal entities
669
+ case_patterns = [
670
+ /\b\d+\s+F\.\d+\s+\d+\b/, # 123 F.3d 456
671
+ /\b\d+\s+U\.S\.\s+\d+\b/, # 123 U.S. 456
672
+ /\bNo\.\s+\d+-\d+\b/ # No. 20-1234
673
+ ]
674
+ case_recognizer = Candle::PatternEntityRecognizer.new("CASE", case_patterns)
675
+ ```
676
+
677
+ ### Available Pre-trained Models
678
+
679
+ Popular NER models on HuggingFace:
680
+
681
+ ```ruby
682
+ # General multilingual NER (4 entity types: PER, ORG, LOC, MISC)
683
+ ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner")
684
+
685
+ # English NER (requires separate tokenizer)
686
+ ner = Candle::NER.from_pretrained("dslim/bert-base-NER", tokenizer: "bert-base-cased")
687
+
688
+ # Multilingual NER
689
+ ner = Candle::NER.from_pretrained("Davlan/bert-base-multilingual-cased-ner-hrl")
690
+
691
+ # OntoNotes 5 (18 entity types including DATE, TIME, MONEY, etc.)
692
+ ner = Candle::NER.from_pretrained("flair/ner-english-ontonotes-large")
693
+
694
+ # Biomedical NER
695
+ ner = Candle::NER.from_pretrained("dmis-lab/biobert-base-cased-v1.2")
696
+ ner = Candle::NER.from_pretrained("allenai/scibert_scivocab_uncased")
697
+ ```
698
+
699
+ ### Performance Tips
700
+
701
+ 1. **Device Selection**: Use GPU for faster inference
702
+ ```ruby
703
+ ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner", device: Candle::Device.metal)
704
+ ```
705
+
706
+ 2. **Batch Processing**: Process multiple texts together when possible
707
+
708
+ 3. **Confidence Threshold**: Balance precision/recall with appropriate thresholds
709
+
710
+ 4. **Entity Resolution**: The hybrid NER automatically handles overlapping entities
711
+
712
+ ### Output Format
713
+
714
+ All NER methods return entities in a consistent format:
715
+
716
+ ```ruby
717
+ {
718
+ "text" => "Apple Inc.", # The entity text
719
+ "label" => "ORG", # Entity type
720
+ "start" => 0, # Character start position
721
+ "end" => 10, # Character end position
722
+ "confidence" => 0.99, # Confidence score (0-1)
723
+ "token_start" => 0, # Token start index (model-based only)
724
+ "token_end" => 2, # Token end index (model-based only)
725
+ "source" => "model" # Source: "model", "pattern", or "gazetteer"
726
+ }
727
+ ```
728
+
354
729
  ## Common Runtime Errors
355
730
 
356
- ### 1. Weight is negative, too large or not a valid number
731
+ ### Weight is negative, too large or not a valid number
357
732
 
358
733
  **Error:**
359
734
  ```
@@ -370,13 +745,12 @@ This joint processing allows cross-encoders to capture subtle semantic relations
370
745
  - Q3_K_M (3-bit) - Minimum recommended quantization
371
746
 
372
747
  ```ruby
373
- # Instead of Q2_K:
374
748
  llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
375
749
  device: device,
376
750
  gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
377
751
  ```
378
752
 
379
- ### 2. Cannot find tensor model.embed_tokens.weight
753
+ ### Cannot find tensor model.embed_tokens.weight
380
754
 
381
755
  **Error:**
382
756
  ```
@@ -395,7 +769,7 @@ Failed to load quantized model: cannot find tensor model.embed_tokens.weight (Ru
395
769
  ```
396
770
  3. If the error persists, the GGUF file may use an unsupported architecture or format
397
771
 
398
- ### 3. No GGUF file found in repository
772
+ ### No GGUF file found in repository
399
773
 
400
774
  **Error:**
401
775
  ```
@@ -412,7 +786,7 @@ llm = Candle::LLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
412
786
  gguf_file: "llama-2-7b-chat.Q4_K_M.gguf")
413
787
  ```
414
788
 
415
- ### 4. Failed to download tokenizer
789
+ ### Failed to download tokenizer
416
790
 
417
791
  **Error:**
418
792
  ```
@@ -423,7 +797,7 @@ Failed to load quantized model: Failed to download tokenizer: request error: HTT
423
797
 
424
798
  **Solution:** The code now includes fallback tokenizer loading. If you still encounter this error, ensure you're using the latest version of red-candle.
425
799
 
426
- ### 5. Missing metadata in GGUF file
800
+ ### Missing metadata in GGUF file
427
801
 
428
802
  **Error:**
429
803
  ```
@@ -452,17 +826,24 @@ Failed to load GGUF model: cannot find llama.attention.head_count in metadata (R
452
826
  FORK IT!
453
827
 
454
828
  ```
455
- git clone https://github.com/your_name/red-candle
829
+ git clone https://github.com/assaydepot/red-candle
456
830
  cd red-candle
457
831
  bundle
458
832
  bundle exec rake compile
459
833
  ```
460
834
 
461
- Implemented with [Magnus](https://github.com/matsadler/magnus), with reference to [Polars Ruby](https://github.com/ankane/polars-ruby)
462
-
463
835
  Pull requests are welcome.
464
836
 
465
- ### See Also
837
+ ## Release
838
+
839
+ 1. Update version number in `lib/candle/version.rb` and commit.
840
+ 2. `bundle exec rake build`
841
+ 3. `git tag VERSION_NUMBER`
842
+ 4. `git push --follow-tags`
843
+ 5. `gem push pkg/red-candle-1.0.0.gem`
844
+
845
+ ## See Also
466
846
 
467
- - [Numo::NArray](https://github.com/ruby-numo/numo-narray)
468
- - [Cumo](https://github.com/sonots/cumo)
847
+ - [Candle](https://github.com/huggingface/candle)
848
+ - [Magnus](https://github.com/matsadler/magnus)
849
+ - [Outlines-core](https://github.com/dottxt-ai/outlines-core)
@@ -1,11 +1,13 @@
1
1
  use magnus::{function, prelude::*, Ruby};
2
2
 
3
3
  use crate::ruby::candle_utils;
4
- use crate::ruby::Result as RbResult;
4
+ use crate::ruby::Result;
5
5
 
6
6
  pub mod llm;
7
+ pub mod ner;
7
8
  pub mod reranker;
8
9
  pub mod ruby;
10
+ pub mod tokenizer;
9
11
 
10
12
  // Configuration detection from build.rs
11
13
  #[cfg(all(has_metal, not(force_cpu)))]
@@ -33,7 +35,7 @@ pub fn get_build_info() -> magnus::RHash {
33
35
  }
34
36
 
35
37
  #[magnus::init]
36
- fn init(ruby: &Ruby) -> RbResult<()> {
38
+ fn init(ruby: &Ruby) -> Result<()> {
37
39
  let rb_candle = ruby.define_module("Candle")?;
38
40
 
39
41
  // Export build info
@@ -41,11 +43,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
41
43
 
42
44
  ruby::init_embedding_model(rb_candle)?;
43
45
  ruby::init_llm(rb_candle)?;
46
+ ner::init(rb_candle)?;
44
47
  reranker::init(rb_candle)?;
45
48
  ruby::dtype::init(rb_candle)?;
46
- ruby::qtensor::init(rb_candle)?;
47
49
  ruby::device::init(rb_candle)?;
48
50
  ruby::tensor::init(rb_candle)?;
51
+ ruby::tokenizer::init(rb_candle)?;
49
52
  candle_utils(rb_candle)?;
50
53
 
51
54
  Ok(())
@@ -21,6 +21,11 @@ impl Gemma {
21
21
  self.model.clear_kv_cache();
22
22
  }
23
23
 
24
+ /// Get the tokenizer
25
+ pub fn tokenizer(&self) -> &TokenizerWrapper {
26
+ &self.tokenizer
27
+ }
28
+
24
29
  /// Load a Gemma model from HuggingFace Hub
25
30
  pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
26
31
  let api = Api::new()
@@ -28,6 +28,11 @@ impl Llama {
28
28
  }
29
29
  }
30
30
 
31
+ /// Get the tokenizer
32
+ pub fn tokenizer(&self) -> &TokenizerWrapper {
33
+ &self.tokenizer
34
+ }
35
+
31
36
  /// Load a Llama model from HuggingFace Hub
32
37
  pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
33
38
  let api = Api::new()
@@ -21,6 +21,11 @@ impl Mistral {
21
21
  self.model.clear_kv_cache();
22
22
  }
23
23
 
24
+ /// Get the tokenizer
25
+ pub fn tokenizer(&self) -> &TokenizerWrapper {
26
+ &self.tokenizer
27
+ }
28
+
24
29
  /// Load a Mistral model from HuggingFace Hub
25
30
  pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
26
31
  let api = Api::new()