red-candle 1.0.0.pre.7 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -10
- data/README.md +399 -18
- data/ext/candle/src/lib.rs +6 -3
- data/ext/candle/src/llm/gemma.rs +5 -0
- data/ext/candle/src/llm/llama.rs +5 -0
- data/ext/candle/src/llm/mistral.rs +5 -0
- data/ext/candle/src/llm/mod.rs +1 -89
- data/ext/candle/src/llm/quantized_gguf.rs +5 -0
- data/ext/candle/src/ner.rs +423 -0
- data/ext/candle/src/reranker.rs +24 -21
- data/ext/candle/src/ruby/device.rs +6 -6
- data/ext/candle/src/ruby/dtype.rs +4 -4
- data/ext/candle/src/ruby/embedding_model.rs +36 -33
- data/ext/candle/src/ruby/llm.rs +31 -13
- data/ext/candle/src/ruby/mod.rs +1 -2
- data/ext/candle/src/ruby/tensor.rs +66 -66
- data/ext/candle/src/ruby/tokenizer.rs +269 -0
- data/ext/candle/src/ruby/utils.rs +6 -24
- data/ext/candle/src/tokenizer/loader.rs +108 -0
- data/ext/candle/src/tokenizer/mod.rs +103 -0
- data/ext/candle/target/release/build/bindgen-0f89ba23b9ca1395/out/host-target.txt +1 -0
- data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/common.rs +355 -0
- data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/dynamic.rs +276 -0
- data/ext/candle/target/release/build/clang-sys-cac31d63c4694603/out/macros.rs +49 -0
- data/ext/candle/target/release/build/pulp-1b95cfe377eede97/out/x86_64_asm.rs +2748 -0
- data/ext/candle/target/release/build/rb-sys-f8ac4edc30ab3e53/out/bindings-0.9.116-mri-arm64-darwin24-3.3.0.rs +8902 -0
- data/lib/candle/build_info.rb +2 -0
- data/lib/candle/device_utils.rb +2 -0
- data/lib/candle/ner.rb +345 -0
- data/lib/candle/reranker.rb +1 -1
- data/lib/candle/tensor.rb +2 -0
- data/lib/candle/tokenizer.rb +139 -0
- data/lib/candle/version.rb +4 -2
- data/lib/candle.rb +2 -0
- metadata +128 -5
- data/ext/candle/src/ruby/qtensor.rs +0 -69
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 52c635f005d25a305f99781763a4a3cc03f85fc5b74f0e576e51973ef8306fac
|
4
|
+
data.tar.gz: 1a0ac260a3803f1920ba2d9f71ec361013ae1eb99cf2caed62c0e9aecc583e96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d301e6ed0fe8ac144c0735288c687f5dd74e7967dbe5d357e550ca5d467f6a33017b2bd9e7f46081711b6bf13555caa3e044183cd74cfaa89151e15c8cdb04a4
|
7
|
+
data.tar.gz: d296c35002b6d0ed919176375e5cc5d93c70fae0c0ae9a02d5cf86b8a4a49a67898c7fbc96e16350bba6792b18792126c976de156fb854b9b4f3260fa052cd79
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,9 +1,66 @@
|
|
1
|
-
# red-candle
|
1
|
+
# `red-candle` Native LLMs for Ruby 🚀
|
2
2
|
|
3
3
|
[](https://github.com/assaydepot/red-candle/actions/workflows/build.yml)
|
4
4
|
[](https://badge.fury.io/rb/red-candle)
|
5
5
|
|
6
|
-
|
6
|
+
Run state-of-the-art **language models directly from Ruby**. No Python, no APIs, no external services - just Ruby with blazing-fast Rust under the hood. Hardware accelerated with **Metal (Mac)** and **CUDA (NVIDIA).**
|
7
|
+
|
8
|
+
## Install & Chat in 30 Seconds
|
9
|
+
|
10
|
+
[](https://www.youtube.com/watch?v=hbyFCyh8esk)
|
11
|
+
|
12
|
+
```bash
|
13
|
+
# Install the gem
|
14
|
+
gem install red-candle
|
15
|
+
```
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require 'candle'
|
19
|
+
|
20
|
+
# Download a model (one-time, ~650MB) - Mistral, Llama3, Gemma all work!
|
21
|
+
llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
22
|
+
gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
|
23
|
+
|
24
|
+
# Chat with it - no API calls, running locally in your Ruby process!
|
25
|
+
messages = [
|
26
|
+
{ role: "user", content: "Explain Ruby in one sentence" }
|
27
|
+
]
|
28
|
+
|
29
|
+
puts llm.chat(messages)
|
30
|
+
# => "Ruby is a dynamic, object-oriented programming language known for its
|
31
|
+
# simplicity, elegance, and productivity, often used for web development
|
32
|
+
# with frameworks like Rails."
|
33
|
+
```
|
34
|
+
|
35
|
+
## What Just Happened?
|
36
|
+
|
37
|
+
You just ran a 1.1-billion parameter AI model inside Ruby. The model lives in your process memory, runs on your hardware (CPU/GPU), and responds instantly without network latency.
|
38
|
+
|
39
|
+
## Stream Responses Like a Pro
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
# Watch the AI think in real-time
|
43
|
+
llm.chat_stream(messages) do |token|
|
44
|
+
print token
|
45
|
+
end
|
46
|
+
```
|
47
|
+
|
48
|
+
## Why This Matters
|
49
|
+
|
50
|
+
- **Privacy**: Your data never leaves your machine
|
51
|
+
- **Speed**: No network overhead, direct memory access
|
52
|
+
- **Control**: Fine-tune generation parameters, access raw tokens
|
53
|
+
- **Integration**: It's just Ruby objects - use it anywhere Ruby runs
|
54
|
+
|
55
|
+
## Supports
|
56
|
+
|
57
|
+
- **Tokenizers**: Access the tokenizer directly
|
58
|
+
- **EmbeddingModel**: Generate embeddings for text
|
59
|
+
- **Reranker**: Rerank documents based on relevance
|
60
|
+
- **NER**: Named Entity Recognition directly from Ruby
|
61
|
+
- **LLM**: Chat with Large Language Models (e.g., Llama, Mistral, Gemma)
|
62
|
+
|
63
|
+
----
|
7
64
|
|
8
65
|
## Usage
|
9
66
|
|
@@ -127,6 +184,8 @@ response = llm.chat(messages)
|
|
127
184
|
|
128
185
|
### GPU Acceleration
|
129
186
|
|
187
|
+
We see an 18x speed up running LLMs under CUDA vs CPU and a >3x speed up running under Metal vs CPU. Details [here](DEVICE_SUPPORT.md#performance-considerations).
|
188
|
+
|
130
189
|
```ruby
|
131
190
|
# CPU works for all models
|
132
191
|
device = Candle::Device.cpu
|
@@ -166,9 +225,11 @@ This is particularly useful for:
|
|
166
225
|
- Troubleshooting generation problems
|
167
226
|
- Analyzing model behavior
|
168
227
|
|
169
|
-
## ⚠️ Model Format
|
228
|
+
## ⚠️ Model Format Requirements
|
170
229
|
|
171
|
-
|
230
|
+
### EmbeddingModels and Rerankers: Safetensors Only
|
231
|
+
|
232
|
+
Red-Candle **only supports embedding models and rerankers that provide their weights in the [safetensors](https://github.com/huggingface/safetensors) format** (i.e., the model repo must contain a `model.safetensors` file). If the model repo does not provide the required file, loading will fail with a clear error. Most official BERT and DistilBERT models do **not** provide safetensors; many Sentence Transformers and JinaBERT models do.
|
172
233
|
|
173
234
|
**If you encounter an error like:**
|
174
235
|
|
@@ -178,13 +239,22 @@ RuntimeError: model.safetensors not found after download. Only safetensors model
|
|
178
239
|
|
179
240
|
this means the selected model is not compatible. Please choose a model repo that provides the required file.
|
180
241
|
|
242
|
+
### LLMs: Safetensors and GGUF Support
|
243
|
+
|
244
|
+
LLM models support two formats:
|
245
|
+
1. **Safetensors format** - Standard HuggingFace models (e.g., `TinyLlama/TinyLlama-1.1B-Chat-v1.0`)
|
246
|
+
2. **GGUF quantized format** - Memory-efficient quantized models (e.g., `TheBloke/Llama-2-7B-Chat-GGUF`)
|
247
|
+
|
248
|
+
See the [Quantized Model Support](#quantized-model-support-gguf) section for details on using GGUF models.
|
249
|
+
|
181
250
|
## Supported Embedding Models
|
182
251
|
|
183
252
|
Red-Candle supports the following embedding model types from Hugging Face:
|
184
253
|
|
185
254
|
1. `Candle::EmbeddingModelType::JINA_BERT` - Jina BERT models (e.g., `jinaai/jina-embeddings-v2-base-en`) (**safetensors required**)
|
186
|
-
2. `Candle::EmbeddingModelType::
|
255
|
+
2. `Candle::EmbeddingModelType::MINILM` - MINILM models (e.g., `sentence-transformers/all-MiniLM-L6-v2`) (**safetensors required**)
|
187
256
|
3. `Candle::EmbeddingModelType::DISTILBERT` - DistilBERT models (e.g., `distilbert-base-uncased-finetuned-sst-2-english`) (**safetensors required**)
|
257
|
+
4. `Candle::EmbeddingModelType::STANDARD_BERT` - Standard BERT models (e.g., `scientistcom/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext`) (**safetensors required**)
|
188
258
|
|
189
259
|
> **Note:** Most official BERT and DistilBERT models do _not_ provide safetensors. Please check the model repo before use.
|
190
260
|
|
@@ -260,7 +330,7 @@ ranked_results = reranker.rerank(query, documents, pooling_method: "pooler", app
|
|
260
330
|
# Or apply sigmoid activation to get scores between 0 and 1
|
261
331
|
sigmoid_results = reranker.rerank(query, documents, pooling_method: "pooler", apply_sigmoid: true)
|
262
332
|
|
263
|
-
# The pooler method is the default and is recommended for cross-encoders, as is
|
333
|
+
# The pooler method is the default and is recommended for cross-encoders, as is apply_sigmoid, so the above is the same as:
|
264
334
|
ranked_results = reranker.rerank(query, documents)
|
265
335
|
|
266
336
|
# Results are returned as an array of hashes, sorted by relevance
|
@@ -351,9 +421,314 @@ The reranker uses a BERT-based architecture that:
|
|
351
421
|
|
352
422
|
This joint processing allows cross-encoders to capture subtle semantic relationships between queries and documents, making them more accurate for reranking tasks, though at the cost of higher computational requirements.
|
353
423
|
|
424
|
+
## Tokenizer
|
425
|
+
|
426
|
+
Red-Candle provides direct access to tokenizers for text preprocessing and analysis. This is useful for understanding how models process text, debugging issues, and building custom NLP pipelines.
|
427
|
+
|
428
|
+
### Basic Usage
|
429
|
+
|
430
|
+
```ruby
|
431
|
+
require 'candle'
|
432
|
+
|
433
|
+
# Load a tokenizer from HuggingFace
|
434
|
+
tokenizer = Candle::Tokenizer.from_pretrained("bert-base-uncased")
|
435
|
+
|
436
|
+
# Encode text to token IDs
|
437
|
+
token_ids = tokenizer.encode("Hello, world!")
|
438
|
+
# => [101, 7592, 1010, 2088, 999, 102]
|
439
|
+
|
440
|
+
# Decode token IDs back to text
|
441
|
+
text = tokenizer.decode(token_ids)
|
442
|
+
# => "hello, world!"
|
443
|
+
|
444
|
+
# Get token strings (subwords) - useful for visualization
|
445
|
+
tokens = tokenizer.encode_to_tokens("Hello, world!")
|
446
|
+
# => ["[CLS]", "hello", ",", "world", "!", "[SEP]"]
|
447
|
+
|
448
|
+
# Get both IDs and tokens together
|
449
|
+
result = tokenizer.encode_with_tokens("preprocessing")
|
450
|
+
# => {"ids" => [101, 3653, 22618, 2527, 102],
|
451
|
+
# "tokens" => ["[CLS]", "prep", "##ro", "##ces", "##sing", "[SEP]"]}
|
452
|
+
```
|
453
|
+
|
454
|
+
### Batch Processing
|
455
|
+
|
456
|
+
```ruby
|
457
|
+
# Encode multiple texts at once
|
458
|
+
texts = ["Hello world", "How are you?", "Tokenizers are cool"]
|
459
|
+
batch_ids = tokenizer.encode_batch(texts)
|
460
|
+
|
461
|
+
# Get token strings for multiple texts
|
462
|
+
batch_tokens = tokenizer.encode_batch_to_tokens(texts)
|
463
|
+
```
|
464
|
+
|
465
|
+
### Vocabulary Access
|
466
|
+
|
467
|
+
```ruby
|
468
|
+
# Get vocabulary size
|
469
|
+
vocab_size = tokenizer.vocab_size
|
470
|
+
# => 30522
|
471
|
+
|
472
|
+
# Get full vocabulary as a hash
|
473
|
+
vocab = tokenizer.get_vocab
|
474
|
+
# vocab["hello"] => 7592
|
475
|
+
|
476
|
+
# Convert a specific token ID to its string
|
477
|
+
token_str = tokenizer.id_to_token(7592)
|
478
|
+
# => "hello"
|
479
|
+
|
480
|
+
# Get special tokens
|
481
|
+
special = tokenizer.get_special_tokens
|
482
|
+
# => {"cls_token" => 101, "sep_token" => 102, "pad_token" => 0, ...}
|
483
|
+
```
|
484
|
+
|
485
|
+
### Configuration
|
486
|
+
|
487
|
+
```ruby
|
488
|
+
# Create a tokenizer with padding enabled
|
489
|
+
padded_tokenizer = tokenizer.with_padding(length: 128)
|
490
|
+
|
491
|
+
# Create a tokenizer with truncation
|
492
|
+
truncated_tokenizer = tokenizer.with_truncation(512)
|
493
|
+
|
494
|
+
# Configure padding with more options
|
495
|
+
padded_tokenizer = tokenizer.with_padding(
|
496
|
+
length: 128, # Fixed length padding
|
497
|
+
direction: "right", # Pad on the right (default)
|
498
|
+
pad_token: "[PAD]" # Padding token
|
499
|
+
)
|
500
|
+
```
|
501
|
+
|
502
|
+
### Model Integration
|
503
|
+
|
504
|
+
All models expose their tokenizers:
|
505
|
+
|
506
|
+
```ruby
|
507
|
+
# From LLM
|
508
|
+
llm = Candle::LLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
509
|
+
llm_tokenizer = llm.tokenizer
|
510
|
+
|
511
|
+
# From EmbeddingModel
|
512
|
+
embedding_model = Candle::EmbeddingModel.new
|
513
|
+
emb_tokenizer = embedding_model.tokenizer
|
514
|
+
|
515
|
+
# From Reranker
|
516
|
+
reranker = Candle::Reranker.new(model_path: "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
517
|
+
rank_tokenizer = reranker.tokenizer
|
518
|
+
```
|
519
|
+
|
520
|
+
### Understanding Subword Tokenization
|
521
|
+
|
522
|
+
Modern tokenizers split unknown or rare words into subword pieces:
|
523
|
+
|
524
|
+
```ruby
|
525
|
+
# See how words are split into subwords
|
526
|
+
result = tokenizer.encode_with_tokens("unbelievable")
|
527
|
+
# => {"ids" => [101, 4895, 6499, 102],
|
528
|
+
# "tokens" => ["[CLS]", "un", "##believable", "[SEP]"]}
|
529
|
+
|
530
|
+
# The ## prefix indicates a continuation of the previous token
|
531
|
+
complex = tokenizer.encode_to_tokens("preprocessing tokenization")
|
532
|
+
# => ["[CLS]", "prep", "##ro", "##ces", "##sing", "token", "##ization", "[SEP]"]
|
533
|
+
```
|
534
|
+
|
535
|
+
### Use Cases
|
536
|
+
|
537
|
+
- **Token Analysis**: Understand how your text is being processed by models
|
538
|
+
- **Debugging**: See why certain inputs might cause unexpected model behavior
|
539
|
+
- **Custom Preprocessing**: Build your own text processing pipelines
|
540
|
+
- **Educational**: Teach how modern NLP models handle text
|
541
|
+
- **NER Preparation**: Get aligned tokens for named entity recognition tasks
|
542
|
+
|
543
|
+
## Named Entity Recognition (NER)
|
544
|
+
|
545
|
+
Red-Candle includes comprehensive Named Entity Recognition capabilities for extracting entities like people, organizations, locations, and custom entity types from text.
|
546
|
+
|
547
|
+
### Model-based NER
|
548
|
+
|
549
|
+
Load pre-trained NER models from HuggingFace:
|
550
|
+
|
551
|
+
```ruby
|
552
|
+
require 'candle'
|
553
|
+
|
554
|
+
# Load a pre-trained NER model
|
555
|
+
ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner")
|
556
|
+
|
557
|
+
# Or load a model with a specific tokenizer (for models without tokenizer.json)
|
558
|
+
ner = Candle::NER.from_pretrained("dslim/bert-base-NER", tokenizer: "bert-base-cased")
|
559
|
+
|
560
|
+
# Extract entities from text
|
561
|
+
text = "Apple Inc. was founded by Steve Jobs and Steve Wozniak in Cupertino, California."
|
562
|
+
entities = ner.extract_entities(text)
|
563
|
+
|
564
|
+
entities.each do |entity|
|
565
|
+
puts "#{entity['text']} (#{entity['label']}) - confidence: #{entity['confidence'].round(2)}"
|
566
|
+
end
|
567
|
+
# Output:
|
568
|
+
# Apple Inc. (ORG) - confidence: 0.99
|
569
|
+
# Steve Jobs (PER) - confidence: 0.99
|
570
|
+
# Steve Wozniak (PER) - confidence: 0.98
|
571
|
+
# Cupertino (LOC) - confidence: 0.97
|
572
|
+
# California (LOC) - confidence: 0.98
|
573
|
+
|
574
|
+
# Adjust confidence threshold (default: 0.9)
|
575
|
+
entities = ner.extract_entities(text, confidence_threshold: 0.95)
|
576
|
+
|
577
|
+
# Get token-level predictions for detailed analysis
|
578
|
+
tokens = ner.predict_tokens(text)
|
579
|
+
```
|
580
|
+
|
581
|
+
### Pattern-based Recognition
|
582
|
+
|
583
|
+
For domain-specific entities, use regex patterns:
|
584
|
+
|
585
|
+
```ruby
|
586
|
+
# Create pattern-based recognizers
|
587
|
+
email_recognizer = Candle::PatternEntityRecognizer.new("EMAIL", [
|
588
|
+
/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/
|
589
|
+
])
|
590
|
+
|
591
|
+
phone_recognizer = Candle::PatternEntityRecognizer.new("PHONE", [
|
592
|
+
/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, # 555-123-4567
|
593
|
+
/\b\(\d{3}\)\s*\d{3}[-.]?\d{4}\b/, # (555) 123-4567
|
594
|
+
/\b\+1\s*\d{3}[-.]?\d{3}[-.]?\d{4}\b/ # +1 555-123-4567
|
595
|
+
])
|
596
|
+
|
597
|
+
# Extract entities
|
598
|
+
text = "Contact us at info@example.com or call 555-123-4567"
|
599
|
+
email_entities = email_recognizer.recognize(text)
|
600
|
+
phone_entities = phone_recognizer.recognize(text)
|
601
|
+
```
|
602
|
+
|
603
|
+
### Gazetteer-based Recognition
|
604
|
+
|
605
|
+
Use dictionaries for known entities:
|
606
|
+
|
607
|
+
```ruby
|
608
|
+
# Create gazetteer recognizers
|
609
|
+
companies = ["Apple", "Google", "Microsoft", "Amazon", "Tesla"]
|
610
|
+
company_recognizer = Candle::GazetteerEntityRecognizer.new("COMPANY", companies)
|
611
|
+
|
612
|
+
# Load from file
|
613
|
+
drug_recognizer = Candle::GazetteerEntityRecognizer.new("DRUG")
|
614
|
+
drug_recognizer.load_from_file("drug_names.txt")
|
615
|
+
|
616
|
+
# Case-sensitive matching
|
617
|
+
product_recognizer = Candle::GazetteerEntityRecognizer.new("PRODUCT",
|
618
|
+
["iPhone", "iPad", "MacBook"],
|
619
|
+
case_sensitive: true
|
620
|
+
)
|
621
|
+
```
|
622
|
+
|
623
|
+
### Hybrid NER
|
624
|
+
|
625
|
+
Combine ML models with rule-based approaches for best results:
|
626
|
+
|
627
|
+
```ruby
|
628
|
+
# Create hybrid NER system
|
629
|
+
hybrid = Candle::HybridNER.new("Babelscape/wikineural-multilingual-ner")
|
630
|
+
|
631
|
+
# Add pattern recognizers
|
632
|
+
hybrid.add_pattern_recognizer("EMAIL", [/\b[\w._%+-]+@[\w.-]+\.[A-Z|a-z]{2,}\b/])
|
633
|
+
hybrid.add_pattern_recognizer("PHONE", [/\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/])
|
634
|
+
|
635
|
+
# Add gazetteer recognizers
|
636
|
+
hybrid.add_gazetteer_recognizer("COMPANY", ["Apple", "Google", "Microsoft"])
|
637
|
+
hybrid.add_gazetteer_recognizer("PRODUCT", ["iPhone", "Android", "Windows"])
|
638
|
+
|
639
|
+
# Extract all entities
|
640
|
+
text = "John Smith (john@apple.com) from Apple called about the new iPhone. Reach him at 555-0123."
|
641
|
+
entities = hybrid.extract_entities(text)
|
642
|
+
|
643
|
+
# Results include entities from all recognizers
|
644
|
+
# Overlapping entities are automatically resolved (highest confidence wins)
|
645
|
+
```
|
646
|
+
|
647
|
+
### Custom Entity Types
|
648
|
+
|
649
|
+
Perfect for specialized domains:
|
650
|
+
|
651
|
+
```ruby
|
652
|
+
# Biomedical entities
|
653
|
+
gene_patterns = [
|
654
|
+
/\b[A-Z][A-Z0-9]{2,}\b/, # TP53, BRCA1, EGFR
|
655
|
+
/\bCD\d+\b/, # CD4, CD8, CD34
|
656
|
+
/\b[A-Z]+\d[A-Z]\d*\b/ # RAD51C, PALB2
|
657
|
+
]
|
658
|
+
gene_recognizer = Candle::PatternEntityRecognizer.new("GENE", gene_patterns)
|
659
|
+
|
660
|
+
# Financial entities
|
661
|
+
ticker_patterns = [
|
662
|
+
/\$[A-Z]{1,5}\b/, # $AAPL, $GOOGL
|
663
|
+
/\b[A-Z]{1,5}\.NYSE\b/, # AAPL.NYSE
|
664
|
+
/\b[A-Z]{1,5}\.NASDAQ\b/ # GOOGL.NASDAQ
|
665
|
+
]
|
666
|
+
ticker_recognizer = Candle::PatternEntityRecognizer.new("TICKER", ticker_patterns)
|
667
|
+
|
668
|
+
# Legal entities
|
669
|
+
case_patterns = [
|
670
|
+
/\b\d+\s+F\.\d+\s+\d+\b/, # 123 F.3d 456
|
671
|
+
/\b\d+\s+U\.S\.\s+\d+\b/, # 123 U.S. 456
|
672
|
+
/\bNo\.\s+\d+-\d+\b/ # No. 20-1234
|
673
|
+
]
|
674
|
+
case_recognizer = Candle::PatternEntityRecognizer.new("CASE", case_patterns)
|
675
|
+
```
|
676
|
+
|
677
|
+
### Available Pre-trained Models
|
678
|
+
|
679
|
+
Popular NER models on HuggingFace:
|
680
|
+
|
681
|
+
```ruby
|
682
|
+
# General multilingual NER (4 entity types: PER, ORG, LOC, MISC)
|
683
|
+
ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner")
|
684
|
+
|
685
|
+
# English NER (requires separate tokenizer)
|
686
|
+
ner = Candle::NER.from_pretrained("dslim/bert-base-NER", tokenizer: "bert-base-cased")
|
687
|
+
|
688
|
+
# Multilingual NER
|
689
|
+
ner = Candle::NER.from_pretrained("Davlan/bert-base-multilingual-cased-ner-hrl")
|
690
|
+
|
691
|
+
# OntoNotes 5 (18 entity types including DATE, TIME, MONEY, etc.)
|
692
|
+
ner = Candle::NER.from_pretrained("flair/ner-english-ontonotes-large")
|
693
|
+
|
694
|
+
# Biomedical NER
|
695
|
+
ner = Candle::NER.from_pretrained("dmis-lab/biobert-base-cased-v1.2")
|
696
|
+
ner = Candle::NER.from_pretrained("allenai/scibert_scivocab_uncased")
|
697
|
+
```
|
698
|
+
|
699
|
+
### Performance Tips
|
700
|
+
|
701
|
+
1. **Device Selection**: Use GPU for faster inference
|
702
|
+
```ruby
|
703
|
+
ner = Candle::NER.from_pretrained("Babelscape/wikineural-multilingual-ner", device: Candle::Device.metal)
|
704
|
+
```
|
705
|
+
|
706
|
+
2. **Batch Processing**: Process multiple texts together when possible
|
707
|
+
|
708
|
+
3. **Confidence Threshold**: Balance precision/recall with appropriate thresholds
|
709
|
+
|
710
|
+
4. **Entity Resolution**: The hybrid NER automatically handles overlapping entities
|
711
|
+
|
712
|
+
### Output Format
|
713
|
+
|
714
|
+
All NER methods return entities in a consistent format:
|
715
|
+
|
716
|
+
```ruby
|
717
|
+
{
|
718
|
+
"text" => "Apple Inc.", # The entity text
|
719
|
+
"label" => "ORG", # Entity type
|
720
|
+
"start" => 0, # Character start position
|
721
|
+
"end" => 10, # Character end position
|
722
|
+
"confidence" => 0.99, # Confidence score (0-1)
|
723
|
+
"token_start" => 0, # Token start index (model-based only)
|
724
|
+
"token_end" => 2, # Token end index (model-based only)
|
725
|
+
"source" => "model" # Source: "model", "pattern", or "gazetteer"
|
726
|
+
}
|
727
|
+
```
|
728
|
+
|
354
729
|
## Common Runtime Errors
|
355
730
|
|
356
|
-
###
|
731
|
+
### Weight is negative, too large or not a valid number
|
357
732
|
|
358
733
|
**Error:**
|
359
734
|
```
|
@@ -370,13 +745,12 @@ This joint processing allows cross-encoders to capture subtle semantic relations
|
|
370
745
|
- Q3_K_M (3-bit) - Minimum recommended quantization
|
371
746
|
|
372
747
|
```ruby
|
373
|
-
# Instead of Q2_K:
|
374
748
|
llm = Candle::LLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
|
375
749
|
device: device,
|
376
750
|
gguf_file: "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
|
377
751
|
```
|
378
752
|
|
379
|
-
###
|
753
|
+
### Cannot find tensor model.embed_tokens.weight
|
380
754
|
|
381
755
|
**Error:**
|
382
756
|
```
|
@@ -395,7 +769,7 @@ Failed to load quantized model: cannot find tensor model.embed_tokens.weight (Ru
|
|
395
769
|
```
|
396
770
|
3. If the error persists, the GGUF file may use an unsupported architecture or format
|
397
771
|
|
398
|
-
###
|
772
|
+
### No GGUF file found in repository
|
399
773
|
|
400
774
|
**Error:**
|
401
775
|
```
|
@@ -412,7 +786,7 @@ llm = Candle::LLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
|
|
412
786
|
gguf_file: "llama-2-7b-chat.Q4_K_M.gguf")
|
413
787
|
```
|
414
788
|
|
415
|
-
###
|
789
|
+
### Failed to download tokenizer
|
416
790
|
|
417
791
|
**Error:**
|
418
792
|
```
|
@@ -423,7 +797,7 @@ Failed to load quantized model: Failed to download tokenizer: request error: HTT
|
|
423
797
|
|
424
798
|
**Solution:** The code now includes fallback tokenizer loading. If you still encounter this error, ensure you're using the latest version of red-candle.
|
425
799
|
|
426
|
-
###
|
800
|
+
### Missing metadata in GGUF file
|
427
801
|
|
428
802
|
**Error:**
|
429
803
|
```
|
@@ -452,17 +826,24 @@ Failed to load GGUF model: cannot find llama.attention.head_count in metadata (R
|
|
452
826
|
FORK IT!
|
453
827
|
|
454
828
|
```
|
455
|
-
git clone https://github.com/
|
829
|
+
git clone https://github.com/assaydepot/red-candle
|
456
830
|
cd red-candle
|
457
831
|
bundle
|
458
832
|
bundle exec rake compile
|
459
833
|
```
|
460
834
|
|
461
|
-
Implemented with [Magnus](https://github.com/matsadler/magnus), with reference to [Polars Ruby](https://github.com/ankane/polars-ruby)
|
462
|
-
|
463
835
|
Pull requests are welcome.
|
464
836
|
|
465
|
-
|
837
|
+
## Release
|
838
|
+
|
839
|
+
1. Update version number in `lib/candle/version.rb` and commit.
|
840
|
+
2. `bundle exec rake build`
|
841
|
+
3. `git tag VERSION_NUMBER`
|
842
|
+
4. `git push --follow-tags`
|
843
|
+
5. `gem push pkg/red-candle-1.0.0.gem`
|
844
|
+
|
845
|
+
## See Also
|
466
846
|
|
467
|
-
- [
|
468
|
-
- [
|
847
|
+
- [Candle](https://github.com/huggingface/candle)
|
848
|
+
- [Magnus](https://github.com/matsadler/magnus)
|
849
|
+
- [Outlines-core](https://github.com/dottxt-ai/outlines-core)
|
data/ext/candle/src/lib.rs
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
use magnus::{function, prelude::*, Ruby};
|
2
2
|
|
3
3
|
use crate::ruby::candle_utils;
|
4
|
-
use crate::ruby::Result
|
4
|
+
use crate::ruby::Result;
|
5
5
|
|
6
6
|
pub mod llm;
|
7
|
+
pub mod ner;
|
7
8
|
pub mod reranker;
|
8
9
|
pub mod ruby;
|
10
|
+
pub mod tokenizer;
|
9
11
|
|
10
12
|
// Configuration detection from build.rs
|
11
13
|
#[cfg(all(has_metal, not(force_cpu)))]
|
@@ -33,7 +35,7 @@ pub fn get_build_info() -> magnus::RHash {
|
|
33
35
|
}
|
34
36
|
|
35
37
|
#[magnus::init]
|
36
|
-
fn init(ruby: &Ruby) ->
|
38
|
+
fn init(ruby: &Ruby) -> Result<()> {
|
37
39
|
let rb_candle = ruby.define_module("Candle")?;
|
38
40
|
|
39
41
|
// Export build info
|
@@ -41,11 +43,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
41
43
|
|
42
44
|
ruby::init_embedding_model(rb_candle)?;
|
43
45
|
ruby::init_llm(rb_candle)?;
|
46
|
+
ner::init(rb_candle)?;
|
44
47
|
reranker::init(rb_candle)?;
|
45
48
|
ruby::dtype::init(rb_candle)?;
|
46
|
-
ruby::qtensor::init(rb_candle)?;
|
47
49
|
ruby::device::init(rb_candle)?;
|
48
50
|
ruby::tensor::init(rb_candle)?;
|
51
|
+
ruby::tokenizer::init(rb_candle)?;
|
49
52
|
candle_utils(rb_candle)?;
|
50
53
|
|
51
54
|
Ok(())
|
data/ext/candle/src/llm/gemma.rs
CHANGED
@@ -21,6 +21,11 @@ impl Gemma {
|
|
21
21
|
self.model.clear_kv_cache();
|
22
22
|
}
|
23
23
|
|
24
|
+
/// Get the tokenizer
|
25
|
+
pub fn tokenizer(&self) -> &TokenizerWrapper {
|
26
|
+
&self.tokenizer
|
27
|
+
}
|
28
|
+
|
24
29
|
/// Load a Gemma model from HuggingFace Hub
|
25
30
|
pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
|
26
31
|
let api = Api::new()
|
data/ext/candle/src/llm/llama.rs
CHANGED
@@ -28,6 +28,11 @@ impl Llama {
|
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
31
|
+
/// Get the tokenizer
|
32
|
+
pub fn tokenizer(&self) -> &TokenizerWrapper {
|
33
|
+
&self.tokenizer
|
34
|
+
}
|
35
|
+
|
31
36
|
/// Load a Llama model from HuggingFace Hub
|
32
37
|
pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
|
33
38
|
let api = Api::new()
|
@@ -21,6 +21,11 @@ impl Mistral {
|
|
21
21
|
self.model.clear_kv_cache();
|
22
22
|
}
|
23
23
|
|
24
|
+
/// Get the tokenizer
|
25
|
+
pub fn tokenizer(&self) -> &TokenizerWrapper {
|
26
|
+
&self.tokenizer
|
27
|
+
}
|
28
|
+
|
24
29
|
/// Load a Mistral model from HuggingFace Hub
|
25
30
|
pub async fn from_pretrained(model_id: &str, device: Device) -> CandleResult<Self> {
|
26
31
|
let api = Api::new()
|