fine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +167 -0
- data/LICENSE +21 -0
- data/README.md +212 -0
- data/Rakefile +6 -0
- data/docs/installation.md +151 -0
- data/docs/tutorials/llm-fine-tuning.md +246 -0
- data/docs/tutorials/model-export.md +200 -0
- data/docs/tutorials/siglip2-image-classification.md +130 -0
- data/docs/tutorials/siglip2-object-recognition.md +203 -0
- data/docs/tutorials/siglip2-similarity-search.md +152 -0
- data/docs/tutorials/text-classification.md +233 -0
- data/docs/tutorials/text-embeddings.md +211 -0
- data/examples/basic_classification.rb +70 -0
- data/examples/data/tool_calls.jsonl +30 -0
- data/examples/demo_training.rb +78 -0
- data/examples/finetune_gemma3_tools.rb +135 -0
- data/examples/real_llm_test.rb +128 -0
- data/examples/real_text_classification_test.rb +90 -0
- data/examples/real_text_embedder_test.rb +110 -0
- data/examples/real_training_test.rb +88 -0
- data/examples/test_export.rb +28 -0
- data/examples/test_image_classifier.rb +79 -0
- data/examples/test_llm.rb +100 -0
- data/examples/test_text_classifier.rb +59 -0
- data/lib/fine/callbacks/base.rb +140 -0
- data/lib/fine/callbacks/progress_bar.rb +66 -0
- data/lib/fine/configuration.rb +106 -0
- data/lib/fine/datasets/data_loader.rb +63 -0
- data/lib/fine/datasets/image_dataset.rb +203 -0
- data/lib/fine/datasets/instruction_dataset.rb +226 -0
- data/lib/fine/datasets/text_data_loader.rb +88 -0
- data/lib/fine/datasets/text_dataset.rb +266 -0
- data/lib/fine/error.rb +49 -0
- data/lib/fine/export/gguf_exporter.rb +424 -0
- data/lib/fine/export/onnx_exporter.rb +249 -0
- data/lib/fine/export.rb +53 -0
- data/lib/fine/hub/config_loader.rb +145 -0
- data/lib/fine/hub/model_downloader.rb +136 -0
- data/lib/fine/hub/safetensors_loader.rb +108 -0
- data/lib/fine/image_classifier.rb +256 -0
- data/lib/fine/llm.rb +336 -0
- data/lib/fine/models/base.rb +48 -0
- data/lib/fine/models/bert_encoder.rb +202 -0
- data/lib/fine/models/bert_for_sequence_classification.rb +226 -0
- data/lib/fine/models/causal_lm.rb +279 -0
- data/lib/fine/models/classification_head.rb +24 -0
- data/lib/fine/models/gemma3_decoder.rb +244 -0
- data/lib/fine/models/llama_decoder.rb +297 -0
- data/lib/fine/models/sentence_transformer.rb +202 -0
- data/lib/fine/models/siglip2_for_image_classification.rb +155 -0
- data/lib/fine/models/siglip2_vision_encoder.rb +190 -0
- data/lib/fine/text_classifier.rb +250 -0
- data/lib/fine/text_embedder.rb +221 -0
- data/lib/fine/tokenizers/auto_tokenizer.rb +208 -0
- data/lib/fine/training/llm_trainer.rb +212 -0
- data/lib/fine/training/text_trainer.rb +275 -0
- data/lib/fine/training/trainer.rb +194 -0
- data/lib/fine/transforms/compose.rb +28 -0
- data/lib/fine/transforms/normalize.rb +33 -0
- data/lib/fine/transforms/resize.rb +35 -0
- data/lib/fine/transforms/to_tensor.rb +53 -0
- data/lib/fine/version.rb +3 -0
- data/lib/fine.rb +112 -0
- data/mise.toml +2 -0
- metadata +240 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# Fine-tuning LLMs with Fine
|
|
2
|
+
|
|
3
|
+
Fine supports fine-tuning open-source LLMs like Llama, Gemma, Mistral, and Qwen for custom tasks.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
require "fine"
|
|
9
|
+
|
|
10
|
+
# Load a base model
|
|
11
|
+
llm = Fine::LLM.new("meta-llama/Llama-3.2-1B")
|
|
12
|
+
|
|
13
|
+
# Fine-tune on your data
|
|
14
|
+
llm.fit(train_file: "instructions.jsonl", epochs: 3)
|
|
15
|
+
|
|
16
|
+
# Generate text
|
|
17
|
+
response = llm.generate("Explain Ruby blocks in simple terms")
|
|
18
|
+
puts response
|
|
19
|
+
|
|
20
|
+
# Save for later
|
|
21
|
+
llm.save("my_llama")
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Preparing Your Data
|
|
25
|
+
|
|
26
|
+
Fine supports multiple data formats.
|
|
27
|
+
|
|
28
|
+
### Alpaca Format (Recommended)
|
|
29
|
+
|
|
30
|
+
```jsonl
|
|
31
|
+
{"instruction": "Explain what a Ruby block is", "input": "", "output": "A Ruby block is a chunk of code..."}
|
|
32
|
+
{"instruction": "Convert this to Ruby", "input": "print('hello')", "output": "puts 'hello'"}
|
|
33
|
+
{"instruction": "What does this code do?", "input": "arr.map(&:upcase)", "output": "It converts each string in the array to uppercase."}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### ShareGPT Format
|
|
37
|
+
|
|
38
|
+
```jsonl
|
|
39
|
+
{"conversations": [{"from": "human", "value": "What is Ruby?"}, {"from": "gpt", "value": "Ruby is a dynamic programming language..."}]}
|
|
40
|
+
{"conversations": [{"from": "human", "value": "Show me a loop"}, {"from": "gpt", "value": "Here's a Ruby loop:\n\n```ruby\n5.times { |i| puts i }\n```"}]}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Simple Format
|
|
44
|
+
|
|
45
|
+
```jsonl
|
|
46
|
+
{"prompt": "### Question: What is 2+2?\n### Answer:", "completion": " 4"}
|
|
47
|
+
{"prompt": "Translate to French: Hello", "completion": " Bonjour"}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Configuration Options
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
llm = Fine::LLM.new("google/gemma-2b") do |config|
|
|
54
|
+
# Training parameters
|
|
55
|
+
config.epochs = 3
|
|
56
|
+
config.batch_size = 4
|
|
57
|
+
config.learning_rate = 2e-5
|
|
58
|
+
|
|
59
|
+
# Sequence length
|
|
60
|
+
config.max_length = 2048
|
|
61
|
+
|
|
62
|
+
# Gradient accumulation (effective batch = batch_size * gradient_accumulation_steps)
|
|
63
|
+
config.gradient_accumulation_steps = 4
|
|
64
|
+
|
|
65
|
+
# Gradient clipping
|
|
66
|
+
config.max_grad_norm = 1.0
|
|
67
|
+
|
|
68
|
+
# Learning rate warmup
|
|
69
|
+
config.warmup_steps = 100
|
|
70
|
+
|
|
71
|
+
# Freeze bottom N layers (for faster training)
|
|
72
|
+
config.freeze_layers = 8
|
|
73
|
+
end
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Supported Models
|
|
77
|
+
|
|
78
|
+
| Model Family | Example Model ID | Notes |
|
|
79
|
+
|-------------|------------------|-------|
|
|
80
|
+
| Llama 3.2 | `meta-llama/Llama-3.2-1B` | Great balance of size/quality |
|
|
81
|
+
| Gemma | `google/gemma-2b` | Good for instruction following |
|
|
82
|
+
| Mistral | `mistralai/Mistral-7B-v0.1` | Strong general performance |
|
|
83
|
+
| Qwen | `Qwen/Qwen2-1.5B` | Multilingual support |
|
|
84
|
+
|
|
85
|
+
## Training Strategies
|
|
86
|
+
|
|
87
|
+
### Full Fine-tuning
|
|
88
|
+
|
|
89
|
+
Train all parameters (requires more memory):
|
|
90
|
+
|
|
91
|
+
```ruby
|
|
92
|
+
llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
|
|
93
|
+
config.freeze_layers = 0 # Train everything
|
|
94
|
+
config.batch_size = 2 # Smaller batch for memory
|
|
95
|
+
config.gradient_accumulation_steps = 8
|
|
96
|
+
end
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Partial Fine-tuning
|
|
100
|
+
|
|
101
|
+
Freeze early layers to reduce memory and training time:
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
|
|
105
|
+
config.freeze_layers = 16 # Freeze bottom 16 layers
|
|
106
|
+
config.batch_size = 4
|
|
107
|
+
end
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Generation Options
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
# Load trained model
|
|
114
|
+
llm = Fine::LLM.load("my_llama")
|
|
115
|
+
|
|
116
|
+
# Greedy decoding (deterministic)
|
|
117
|
+
response = llm.generate(
|
|
118
|
+
"What is Ruby?",
|
|
119
|
+
do_sample: false
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Creative generation
|
|
123
|
+
response = llm.generate(
|
|
124
|
+
"Write a poem about coding",
|
|
125
|
+
temperature: 0.9,
|
|
126
|
+
top_p: 0.95,
|
|
127
|
+
max_new_tokens: 200
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Focused generation
|
|
131
|
+
response = llm.generate(
|
|
132
|
+
"Explain recursion",
|
|
133
|
+
temperature: 0.3,
|
|
134
|
+
top_k: 10,
|
|
135
|
+
max_new_tokens: 150
|
|
136
|
+
)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Chat Interface
|
|
140
|
+
|
|
141
|
+
For conversational use:
|
|
142
|
+
|
|
143
|
+
```ruby
|
|
144
|
+
messages = [
|
|
145
|
+
{ role: "system", content: "You are a helpful Ruby programming assistant." },
|
|
146
|
+
{ role: "user", content: "How do I read a file in Ruby?" }
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
response = llm.chat(messages, max_new_tokens: 200)
|
|
150
|
+
puts response
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Memory Optimization
|
|
154
|
+
|
|
155
|
+
LLMs require significant memory. Here are strategies to reduce usage:
|
|
156
|
+
|
|
157
|
+
### 1. Use Gradient Accumulation
|
|
158
|
+
|
|
159
|
+
```ruby
|
|
160
|
+
config.batch_size = 1
|
|
161
|
+
config.gradient_accumulation_steps = 16
|
|
162
|
+
# Effective batch size = 16, but only 1 sample in memory at a time
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### 2. Freeze Layers
|
|
166
|
+
|
|
167
|
+
```ruby
|
|
168
|
+
config.freeze_layers = 20 # Only train top layers
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### 3. Reduce Sequence Length
|
|
172
|
+
|
|
173
|
+
```ruby
|
|
174
|
+
config.max_length = 512 # Instead of default 2048
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### 4. Use Smaller Models
|
|
178
|
+
|
|
179
|
+
Start with 1B-3B parameter models:
|
|
180
|
+
- `meta-llama/Llama-3.2-1B`
|
|
181
|
+
- `google/gemma-2b`
|
|
182
|
+
- `Qwen/Qwen2-1.5B`
|
|
183
|
+
|
|
184
|
+
## Example: Code Assistant
|
|
185
|
+
|
|
186
|
+
```ruby
|
|
187
|
+
require "fine"
|
|
188
|
+
|
|
189
|
+
# Prepare data (code_instructions.jsonl)
|
|
190
|
+
# {"instruction": "Write a function to reverse a string", "input": "", "output": "def reverse(s)\n s.reverse\nend"}
|
|
191
|
+
|
|
192
|
+
llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
|
|
193
|
+
config.epochs = 3
|
|
194
|
+
config.batch_size = 2
|
|
195
|
+
config.max_length = 1024
|
|
196
|
+
config.freeze_layers = 8
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
llm.fit(train_file: "code_instructions.jsonl")
|
|
200
|
+
llm.save("ruby_code_assistant")
|
|
201
|
+
|
|
202
|
+
# Use it
|
|
203
|
+
assistant = Fine::LLM.load("ruby_code_assistant")
|
|
204
|
+
code = assistant.generate(
|
|
205
|
+
"### Instruction:\nWrite a function to find the maximum element in an array\n\n### Response:\n",
|
|
206
|
+
temperature: 0.2,
|
|
207
|
+
max_new_tokens: 200
|
|
208
|
+
)
|
|
209
|
+
puts code
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Example: Domain Expert
|
|
213
|
+
|
|
214
|
+
```ruby
|
|
215
|
+
# Fine-tune on domain-specific Q&A
|
|
216
|
+
llm = Fine::LLM.new("google/gemma-2b") do |config|
|
|
217
|
+
config.epochs = 5
|
|
218
|
+
config.learning_rate = 1e-5
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
llm.fit(train_file: "medical_qa.jsonl", format: :alpaca)
|
|
222
|
+
llm.save("medical_assistant")
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## Evaluation
|
|
226
|
+
|
|
227
|
+
Track training with validation data:
|
|
228
|
+
|
|
229
|
+
```ruby
|
|
230
|
+
llm.fit(
|
|
231
|
+
train_file: "train.jsonl",
|
|
232
|
+
val_file: "val.jsonl",
|
|
233
|
+
epochs: 3
|
|
234
|
+
)
|
|
235
|
+
# Logs val_loss and val_perplexity each epoch
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Lower perplexity indicates better language modeling.
|
|
239
|
+
|
|
240
|
+
## Tips
|
|
241
|
+
|
|
242
|
+
1. **Start small**: Begin with 1B models and small datasets
|
|
243
|
+
2. **Quality over quantity**: 1000 high-quality examples often beats 10000 noisy ones
|
|
244
|
+
3. **Format consistency**: Keep your instruction format consistent
|
|
245
|
+
4. **Learning rate**: Use lower rates (1e-5 to 5e-5) for fine-tuning
|
|
246
|
+
5. **Early stopping**: Monitor validation loss to avoid overfitting
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Exporting Models for Deployment
|
|
2
|
+
|
|
3
|
+
Fine supports exporting fine-tuned models to ONNX and GGUF formats for production deployment.
|
|
4
|
+
|
|
5
|
+
## ONNX Export
|
|
6
|
+
|
|
7
|
+
ONNX (Open Neural Network Exchange) is a cross-platform format supported by many inference runtimes including ONNX Runtime, TensorRT, and OpenVINO.
|
|
8
|
+
|
|
9
|
+
### Export Text Classifier
|
|
10
|
+
|
|
11
|
+
```ruby
|
|
12
|
+
classifier = Fine::TextClassifier.load("my_classifier")
|
|
13
|
+
classifier.export_onnx("classifier.onnx")
|
|
14
|
+
|
|
15
|
+
# With options
|
|
16
|
+
classifier.export_onnx(
|
|
17
|
+
"classifier.onnx",
|
|
18
|
+
opset_version: 14,
|
|
19
|
+
dynamic_axes: true # Allow variable batch size and sequence length
|
|
20
|
+
)
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Export Text Embedder
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
embedder = Fine::TextEmbedder.load("my_embedder")
|
|
27
|
+
embedder.export_onnx("embedder.onnx")
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Export Image Classifier
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
classifier = Fine::ImageClassifier.load("my_classifier")
|
|
34
|
+
classifier.export_onnx("classifier.onnx")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Export LLM
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
llm = Fine::LLM.load("my_llm")
|
|
41
|
+
llm.export_onnx("llm.onnx")
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Using the Export Module Directly
|
|
45
|
+
|
|
46
|
+
```ruby
|
|
47
|
+
Fine::Export.to_onnx(model, "model.onnx", opset_version: 14)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### ONNX Inference Example (Python)
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import onnxruntime as ort
|
|
54
|
+
import numpy as np
|
|
55
|
+
|
|
56
|
+
session = ort.InferenceSession("classifier.onnx")
|
|
57
|
+
|
|
58
|
+
# Text classification
|
|
59
|
+
input_ids = np.array([[101, 2054, 2003, 2023, 102]], dtype=np.int64)
|
|
60
|
+
attention_mask = np.array([[1, 1, 1, 1, 1]], dtype=np.int64)
|
|
61
|
+
|
|
62
|
+
outputs = session.run(None, {
|
|
63
|
+
"input_ids": input_ids,
|
|
64
|
+
"attention_mask": attention_mask
|
|
65
|
+
})
|
|
66
|
+
logits = outputs[0]
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## GGUF Export (LLMs Only)
|
|
70
|
+
|
|
71
|
+
GGUF is the format used by llama.cpp, ollama, and other efficient inference engines. It supports various quantization levels to reduce model size and memory usage.
|
|
72
|
+
|
|
73
|
+
### Basic Export
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
llm = Fine::LLM.load("my_llm")
|
|
77
|
+
llm.export_gguf("model.gguf")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Quantization Options
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
# F16 - Good balance of quality and size (default)
|
|
84
|
+
llm.export_gguf("model-f16.gguf", quantization: :f16)
|
|
85
|
+
|
|
86
|
+
# Q8 - Smaller, minimal quality loss
|
|
87
|
+
llm.export_gguf("model-q8.gguf", quantization: :q8_0)
|
|
88
|
+
|
|
89
|
+
# Q4 - Smallest, some quality loss
|
|
90
|
+
llm.export_gguf("model-q4.gguf", quantization: :q4_0)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Available Quantization Types
|
|
94
|
+
|
|
95
|
+
| Type | Description | Size Reduction | Quality |
|
|
96
|
+
|------|-------------|----------------|---------|
|
|
97
|
+
| `:f32` | 32-bit float | None | Lossless |
|
|
98
|
+
| `:f16` | 16-bit float | ~50% | Minimal loss |
|
|
99
|
+
| `:q8_0` | 8-bit quantization | ~75% | Very small loss |
|
|
100
|
+
| `:q4_0` | 4-bit quantization | ~87% | Noticeable loss |
|
|
101
|
+
| `:q4_k` | 4-bit K-quant | ~87% | Better than q4_0 |
|
|
102
|
+
| `:q5_k` | 5-bit K-quant | ~84% | Good balance |
|
|
103
|
+
| `:q6_k` | 6-bit K-quant | ~81% | High quality |
|
|
104
|
+
|
|
105
|
+
### Adding Metadata
|
|
106
|
+
|
|
107
|
+
```ruby
|
|
108
|
+
llm.export_gguf(
|
|
109
|
+
"model.gguf",
|
|
110
|
+
quantization: :q4_k,
|
|
111
|
+
metadata: {
|
|
112
|
+
"description" => "Fine-tuned on customer support data",
|
|
113
|
+
"version" => "1.0.0",
|
|
114
|
+
"author" => "Your Name"
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Using with llama.cpp
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Run inference
|
|
123
|
+
./main -m model.gguf -p "What is Ruby?" -n 100
|
|
124
|
+
|
|
125
|
+
# Start a server
|
|
126
|
+
./server -m model.gguf --host 0.0.0.0 --port 8080
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Using with Ollama
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Create a Modelfile
|
|
133
|
+
echo 'FROM ./model.gguf' > Modelfile
|
|
134
|
+
echo 'PARAMETER temperature 0.7' >> Modelfile
|
|
135
|
+
echo 'SYSTEM "You are a helpful assistant."' >> Modelfile
|
|
136
|
+
|
|
137
|
+
# Create the model
|
|
138
|
+
ollama create my-model -f Modelfile
|
|
139
|
+
|
|
140
|
+
# Run it
|
|
141
|
+
ollama run my-model "What is Ruby?"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Best Practices
|
|
145
|
+
|
|
146
|
+
### Choosing the Right Format
|
|
147
|
+
|
|
148
|
+
| Use Case | Recommended Format |
|
|
149
|
+
|----------|-------------------|
|
|
150
|
+
| Web deployment | ONNX |
|
|
151
|
+
| Mobile apps | ONNX (with quantization) |
|
|
152
|
+
| Server inference | ONNX or GGUF |
|
|
153
|
+
| Edge devices | GGUF (quantized) |
|
|
154
|
+
| llama.cpp / ollama | GGUF |
|
|
155
|
+
|
|
156
|
+
### Choosing Quantization
|
|
157
|
+
|
|
158
|
+
| Priority | Recommended |
|
|
159
|
+
|----------|------------|
|
|
160
|
+
| Quality first | `:f16` or `:q8_0` |
|
|
161
|
+
| Balance | `:q5_k` or `:q6_k` |
|
|
162
|
+
| Size first | `:q4_0` or `:q4_k` |
|
|
163
|
+
|
|
164
|
+
### Testing Exported Models
|
|
165
|
+
|
|
166
|
+
Always test your exported models to ensure quality:
|
|
167
|
+
|
|
168
|
+
```ruby
|
|
169
|
+
# Before export - test with Fine
|
|
170
|
+
response = llm.generate("Test prompt")
|
|
171
|
+
|
|
172
|
+
# After export - test with target runtime
|
|
173
|
+
# Compare outputs to ensure quality is acceptable
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Troubleshooting
|
|
177
|
+
|
|
178
|
+
### ONNX Export Fails
|
|
179
|
+
|
|
180
|
+
1. Ensure the model is loaded and trained
|
|
181
|
+
2. Check that torch.rb ONNX support is available
|
|
182
|
+
3. Try a different opset_version (11, 13, or 14)
|
|
183
|
+
|
|
184
|
+
### GGUF Export Issues
|
|
185
|
+
|
|
186
|
+
1. Only LLMs support GGUF export
|
|
187
|
+
2. Large models may need more memory during export
|
|
188
|
+
3. Some model architectures may need custom tensor mappings
|
|
189
|
+
|
|
190
|
+
### Large File Sizes
|
|
191
|
+
|
|
192
|
+
Use quantization to reduce file size:
|
|
193
|
+
|
|
194
|
+
```ruby
|
|
195
|
+
# ONNX with INT8 quantization
|
|
196
|
+
classifier.export_onnx("model.onnx", quantize: :int8)
|
|
197
|
+
|
|
198
|
+
# GGUF with Q4 quantization
|
|
199
|
+
llm.export_gguf("model.gguf", quantization: :q4_0)
|
|
200
|
+
```
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Fine-tuning SigLIP2 for Image Classification
|
|
2
|
+
|
|
3
|
+
Classify images into predefined categories (e.g., cats vs dogs, product types, document categories).
|
|
4
|
+
|
|
5
|
+
## When to Use This
|
|
6
|
+
|
|
7
|
+
- You have images that belong to distinct categories
|
|
8
|
+
- You want to automatically sort or label images
|
|
9
|
+
- Examples: photo organization, content moderation, product categorization
|
|
10
|
+
|
|
11
|
+
## Dataset Structure
|
|
12
|
+
|
|
13
|
+
Organize images in folders named after each class:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
data/
|
|
17
|
+
train/
|
|
18
|
+
cats/
|
|
19
|
+
cat001.jpg
|
|
20
|
+
cat002.jpg
|
|
21
|
+
...
|
|
22
|
+
dogs/
|
|
23
|
+
dog001.jpg
|
|
24
|
+
dog002.jpg
|
|
25
|
+
...
|
|
26
|
+
val/
|
|
27
|
+
cats/
|
|
28
|
+
cat101.jpg
|
|
29
|
+
dogs/
|
|
30
|
+
dog101.jpg
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
**Tips:**
|
|
34
|
+
- Aim for at least 20-50 images per class for decent results
|
|
35
|
+
- More images = better accuracy
|
|
36
|
+
- Balance classes roughly equally
|
|
37
|
+
- Use clear, representative images
|
|
38
|
+
|
|
39
|
+
## Basic Training
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
require "fine"
|
|
43
|
+
|
|
44
|
+
classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224")
|
|
45
|
+
|
|
46
|
+
classifier.fit(
|
|
47
|
+
train_dir: "data/train",
|
|
48
|
+
val_dir: "data/val",
|
|
49
|
+
epochs: 3
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
classifier.save("models/my_classifier")
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Training with Configuration
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
|
|
59
|
+
# Training settings
|
|
60
|
+
config.epochs = 5
|
|
61
|
+
config.batch_size = 16 # Lower if running out of memory
|
|
62
|
+
config.learning_rate = 2e-4 # Default works well for most cases
|
|
63
|
+
|
|
64
|
+
# Freeze encoder for faster training (less accurate)
|
|
65
|
+
config.freeze_encoder = false # Set true for feature extraction only
|
|
66
|
+
|
|
67
|
+
# Track progress
|
|
68
|
+
config.on_epoch_end do |epoch, metrics|
|
|
69
|
+
puts "Epoch #{epoch + 1}: accuracy=#{(metrics[:accuracy] * 100).round(1)}%"
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
classifier.fit(train_dir: "data/train", val_dir: "data/val")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Making Predictions
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
# Load trained model
|
|
80
|
+
classifier = Fine::ImageClassifier.load("models/my_classifier")
|
|
81
|
+
|
|
82
|
+
# Single image
|
|
83
|
+
results = classifier.predict("photo.jpg")
|
|
84
|
+
puts results.first[:label] # => "cats"
|
|
85
|
+
puts results.first[:score] # => 0.95
|
|
86
|
+
|
|
87
|
+
# Multiple images
|
|
88
|
+
results = classifier.predict(["photo1.jpg", "photo2.jpg", "photo3.jpg"])
|
|
89
|
+
results.each_with_index do |preds, i|
|
|
90
|
+
puts "Image #{i + 1}: #{preds.first[:label]}"
|
|
91
|
+
end
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Using Without Validation Set
|
|
95
|
+
|
|
96
|
+
If you don't have a separate validation set:
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
classifier.fit_with_split(
|
|
100
|
+
data_dir: "data/all_images",
|
|
101
|
+
val_split: 0.2, # Use 20% for validation
|
|
102
|
+
epochs: 3
|
|
103
|
+
)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Model Selection
|
|
107
|
+
|
|
108
|
+
| Model | Size | Speed | Accuracy | Use Case |
|
|
109
|
+
|-------|------|-------|----------|----------|
|
|
110
|
+
| `siglip2-base-patch16-224` | 86M | Fast | Good | Quick experiments, limited GPU |
|
|
111
|
+
| `siglip2-base-patch16-384` | 86M | Medium | Better | Production with good GPU |
|
|
112
|
+
| `siglip2-large-patch16-256` | 303M | Slower | Best | Maximum accuracy |
|
|
113
|
+
|
|
114
|
+
## Troubleshooting
|
|
115
|
+
|
|
116
|
+
**Out of memory:**
|
|
117
|
+
- Reduce `batch_size` (try 8 or 4)
|
|
118
|
+
- Use a smaller model variant
|
|
119
|
+
- Use `freeze_encoder = true`
|
|
120
|
+
|
|
121
|
+
**Low accuracy:**
|
|
122
|
+
- Add more training images
|
|
123
|
+
- Train for more epochs
|
|
124
|
+
- Check for mislabeled images
|
|
125
|
+
- Ensure classes are visually distinct
|
|
126
|
+
|
|
127
|
+
**Overfitting (train accuracy high, val accuracy low):**
|
|
128
|
+
- Add more training data
|
|
129
|
+
- Reduce epochs
|
|
130
|
+
- Use `freeze_encoder = true`
|