fine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/CHANGELOG.md +38 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +167 -0
  6. data/LICENSE +21 -0
  7. data/README.md +212 -0
  8. data/Rakefile +6 -0
  9. data/docs/installation.md +151 -0
  10. data/docs/tutorials/llm-fine-tuning.md +246 -0
  11. data/docs/tutorials/model-export.md +200 -0
  12. data/docs/tutorials/siglip2-image-classification.md +130 -0
  13. data/docs/tutorials/siglip2-object-recognition.md +203 -0
  14. data/docs/tutorials/siglip2-similarity-search.md +152 -0
  15. data/docs/tutorials/text-classification.md +233 -0
  16. data/docs/tutorials/text-embeddings.md +211 -0
  17. data/examples/basic_classification.rb +70 -0
  18. data/examples/data/tool_calls.jsonl +30 -0
  19. data/examples/demo_training.rb +78 -0
  20. data/examples/finetune_gemma3_tools.rb +135 -0
  21. data/examples/real_llm_test.rb +128 -0
  22. data/examples/real_text_classification_test.rb +90 -0
  23. data/examples/real_text_embedder_test.rb +110 -0
  24. data/examples/real_training_test.rb +88 -0
  25. data/examples/test_export.rb +28 -0
  26. data/examples/test_image_classifier.rb +79 -0
  27. data/examples/test_llm.rb +100 -0
  28. data/examples/test_text_classifier.rb +59 -0
  29. data/lib/fine/callbacks/base.rb +140 -0
  30. data/lib/fine/callbacks/progress_bar.rb +66 -0
  31. data/lib/fine/configuration.rb +106 -0
  32. data/lib/fine/datasets/data_loader.rb +63 -0
  33. data/lib/fine/datasets/image_dataset.rb +203 -0
  34. data/lib/fine/datasets/instruction_dataset.rb +226 -0
  35. data/lib/fine/datasets/text_data_loader.rb +88 -0
  36. data/lib/fine/datasets/text_dataset.rb +266 -0
  37. data/lib/fine/error.rb +49 -0
  38. data/lib/fine/export/gguf_exporter.rb +424 -0
  39. data/lib/fine/export/onnx_exporter.rb +249 -0
  40. data/lib/fine/export.rb +53 -0
  41. data/lib/fine/hub/config_loader.rb +145 -0
  42. data/lib/fine/hub/model_downloader.rb +136 -0
  43. data/lib/fine/hub/safetensors_loader.rb +108 -0
  44. data/lib/fine/image_classifier.rb +256 -0
  45. data/lib/fine/llm.rb +336 -0
  46. data/lib/fine/models/base.rb +48 -0
  47. data/lib/fine/models/bert_encoder.rb +202 -0
  48. data/lib/fine/models/bert_for_sequence_classification.rb +226 -0
  49. data/lib/fine/models/causal_lm.rb +279 -0
  50. data/lib/fine/models/classification_head.rb +24 -0
  51. data/lib/fine/models/gemma3_decoder.rb +244 -0
  52. data/lib/fine/models/llama_decoder.rb +297 -0
  53. data/lib/fine/models/sentence_transformer.rb +202 -0
  54. data/lib/fine/models/siglip2_for_image_classification.rb +155 -0
  55. data/lib/fine/models/siglip2_vision_encoder.rb +190 -0
  56. data/lib/fine/text_classifier.rb +250 -0
  57. data/lib/fine/text_embedder.rb +221 -0
  58. data/lib/fine/tokenizers/auto_tokenizer.rb +208 -0
  59. data/lib/fine/training/llm_trainer.rb +212 -0
  60. data/lib/fine/training/text_trainer.rb +275 -0
  61. data/lib/fine/training/trainer.rb +194 -0
  62. data/lib/fine/transforms/compose.rb +28 -0
  63. data/lib/fine/transforms/normalize.rb +33 -0
  64. data/lib/fine/transforms/resize.rb +35 -0
  65. data/lib/fine/transforms/to_tensor.rb +53 -0
  66. data/lib/fine/version.rb +3 -0
  67. data/lib/fine.rb +112 -0
  68. data/mise.toml +2 -0
  69. metadata +240 -0
@@ -0,0 +1,246 @@
1
+ # Fine-tuning LLMs with Fine
2
+
3
+ Fine supports fine-tuning open-source LLMs like Llama, Gemma, Mistral, and Qwen for custom tasks.
4
+
5
+ ## Quick Start
6
+
7
+ ```ruby
8
+ require "fine"
9
+
10
+ # Load a base model
11
+ llm = Fine::LLM.new("meta-llama/Llama-3.2-1B")
12
+
13
+ # Fine-tune on your data
14
+ llm.fit(train_file: "instructions.jsonl", epochs: 3)
15
+
16
+ # Generate text
17
+ response = llm.generate("Explain Ruby blocks in simple terms")
18
+ puts response
19
+
20
+ # Save for later
21
+ llm.save("my_llama")
22
+ ```
23
+
24
+ ## Preparing Your Data
25
+
26
+ Fine supports multiple data formats.
27
+
28
+ ### Alpaca Format (Recommended)
29
+
30
+ ```jsonl
31
+ {"instruction": "Explain what a Ruby block is", "input": "", "output": "A Ruby block is a chunk of code..."}
32
+ {"instruction": "Convert this to Ruby", "input": "print('hello')", "output": "puts 'hello'"}
33
+ {"instruction": "What does this code do?", "input": "arr.map(&:upcase)", "output": "It converts each string in the array to uppercase."}
34
+ ```
35
+
36
+ ### ShareGPT Format
37
+
38
+ ```jsonl
39
+ {"conversations": [{"from": "human", "value": "What is Ruby?"}, {"from": "gpt", "value": "Ruby is a dynamic programming language..."}]}
40
+ {"conversations": [{"from": "human", "value": "Show me a loop"}, {"from": "gpt", "value": "Here's a Ruby loop:\n\n```ruby\n5.times { |i| puts i }\n```"}]}
41
+ ```
42
+
43
+ ### Simple Format
44
+
45
+ ```jsonl
46
+ {"prompt": "### Question: What is 2+2?\n### Answer:", "completion": " 4"}
47
+ {"prompt": "Translate to French: Hello", "completion": " Bonjour"}
48
+ ```
49
+
50
+ ## Configuration Options
51
+
52
+ ```ruby
53
+ llm = Fine::LLM.new("google/gemma-2b") do |config|
54
+ # Training parameters
55
+ config.epochs = 3
56
+ config.batch_size = 4
57
+ config.learning_rate = 2e-5
58
+
59
+ # Sequence length
60
+ config.max_length = 2048
61
+
62
+ # Gradient accumulation (effective batch = batch_size * gradient_accumulation_steps)
63
+ config.gradient_accumulation_steps = 4
64
+
65
+ # Gradient clipping
66
+ config.max_grad_norm = 1.0
67
+
68
+ # Learning rate warmup
69
+ config.warmup_steps = 100
70
+
71
+ # Freeze bottom N layers (for faster training)
72
+ config.freeze_layers = 8
73
+ end
74
+ ```
75
+
76
+ ## Supported Models
77
+
78
+ | Model Family | Example Model ID | Notes |
79
+ |-------------|------------------|-------|
80
+ | Llama 3.2 | `meta-llama/Llama-3.2-1B` | Great balance of size/quality |
81
+ | Gemma | `google/gemma-2b` | Good for instruction following |
82
+ | Mistral | `mistralai/Mistral-7B-v0.1` | Strong general performance |
83
+ | Qwen | `Qwen/Qwen2-1.5B` | Multilingual support |
84
+
85
+ ## Training Strategies
86
+
87
+ ### Full Fine-tuning
88
+
89
+ Train all parameters (requires more memory):
90
+
91
+ ```ruby
92
+ llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
93
+ config.freeze_layers = 0 # Train everything
94
+ config.batch_size = 2 # Smaller batch for memory
95
+ config.gradient_accumulation_steps = 8
96
+ end
97
+ ```
98
+
99
+ ### Partial Fine-tuning
100
+
101
+ Freeze early layers to reduce memory and training time:
102
+
103
+ ```ruby
104
+ llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
105
+ config.freeze_layers = 16 # Freeze bottom 16 layers
106
+ config.batch_size = 4
107
+ end
108
+ ```
109
+
110
+ ## Generation Options
111
+
112
+ ```ruby
113
+ # Load trained model
114
+ llm = Fine::LLM.load("my_llama")
115
+
116
+ # Greedy decoding (deterministic)
117
+ response = llm.generate(
118
+ "What is Ruby?",
119
+ do_sample: false
120
+ )
121
+
122
+ # Creative generation
123
+ response = llm.generate(
124
+ "Write a poem about coding",
125
+ temperature: 0.9,
126
+ top_p: 0.95,
127
+ max_new_tokens: 200
128
+ )
129
+
130
+ # Focused generation
131
+ response = llm.generate(
132
+ "Explain recursion",
133
+ temperature: 0.3,
134
+ top_k: 10,
135
+ max_new_tokens: 150
136
+ )
137
+ ```
138
+
139
+ ## Chat Interface
140
+
141
+ For conversational use:
142
+
143
+ ```ruby
144
+ messages = [
145
+ { role: "system", content: "You are a helpful Ruby programming assistant." },
146
+ { role: "user", content: "How do I read a file in Ruby?" }
147
+ ]
148
+
149
+ response = llm.chat(messages, max_new_tokens: 200)
150
+ puts response
151
+ ```
152
+
153
+ ## Memory Optimization
154
+
155
+ LLMs require significant memory. Here are strategies to reduce usage:
156
+
157
+ ### 1. Use Gradient Accumulation
158
+
159
+ ```ruby
160
+ config.batch_size = 1
161
+ config.gradient_accumulation_steps = 16
162
+ # Effective batch size = 16, but only 1 sample in memory at a time
163
+ ```
164
+
165
+ ### 2. Freeze Layers
166
+
167
+ ```ruby
168
+ config.freeze_layers = 20 # Only train top layers
169
+ ```
170
+
171
+ ### 3. Reduce Sequence Length
172
+
173
+ ```ruby
174
+ config.max_length = 512 # Instead of default 2048
175
+ ```
176
+
177
+ ### 4. Use Smaller Models
178
+
179
+ Start with 1B-3B parameter models:
180
+ - `meta-llama/Llama-3.2-1B`
181
+ - `google/gemma-2b`
182
+ - `Qwen/Qwen2-1.5B`
183
+
184
+ ## Example: Code Assistant
185
+
186
+ ```ruby
187
+ require "fine"
188
+
189
+ # Prepare data (code_instructions.jsonl)
190
+ # {"instruction": "Write a function to reverse a string", "input": "", "output": "def reverse(s)\n s.reverse\nend"}
191
+
192
+ llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
193
+ config.epochs = 3
194
+ config.batch_size = 2
195
+ config.max_length = 1024
196
+ config.freeze_layers = 8
197
+ end
198
+
199
+ llm.fit(train_file: "code_instructions.jsonl")
200
+ llm.save("ruby_code_assistant")
201
+
202
+ # Use it
203
+ assistant = Fine::LLM.load("ruby_code_assistant")
204
+ code = assistant.generate(
205
+ "### Instruction:\nWrite a function to find the maximum element in an array\n\n### Response:\n",
206
+ temperature: 0.2,
207
+ max_new_tokens: 200
208
+ )
209
+ puts code
210
+ ```
211
+
212
+ ## Example: Domain Expert
213
+
214
+ ```ruby
215
+ # Fine-tune on domain-specific Q&A
216
+ llm = Fine::LLM.new("google/gemma-2b") do |config|
217
+ config.epochs = 5
218
+ config.learning_rate = 1e-5
219
+ end
220
+
221
+ llm.fit(train_file: "medical_qa.jsonl", format: :alpaca)
222
+ llm.save("medical_assistant")
223
+ ```
224
+
225
+ ## Evaluation
226
+
227
+ Track training with validation data:
228
+
229
+ ```ruby
230
+ llm.fit(
231
+ train_file: "train.jsonl",
232
+ val_file: "val.jsonl",
233
+ epochs: 3
234
+ )
235
+ # Logs val_loss and val_perplexity each epoch
236
+ ```
237
+
238
+ Lower perplexity indicates better language modeling.
239
+
240
+ ## Tips
241
+
242
+ 1. **Start small**: Begin with 1B models and small datasets
243
+ 2. **Quality over quantity**: 1000 high-quality examples often beats 10000 noisy ones
244
+ 3. **Format consistency**: Keep your instruction format consistent
245
+ 4. **Learning rate**: Use lower rates (1e-5 to 5e-5) for fine-tuning
246
+ 5. **Early stopping**: Monitor validation loss to avoid overfitting
@@ -0,0 +1,200 @@
1
+ # Exporting Models for Deployment
2
+
3
+ Fine supports exporting fine-tuned models to ONNX and GGUF formats for production deployment.
4
+
5
+ ## ONNX Export
6
+
7
+ ONNX (Open Neural Network Exchange) is a cross-platform format supported by many inference runtimes including ONNX Runtime, TensorRT, and OpenVINO.
8
+
9
+ ### Export Text Classifier
10
+
11
+ ```ruby
12
+ classifier = Fine::TextClassifier.load("my_classifier")
13
+ classifier.export_onnx("classifier.onnx")
14
+
15
+ # With options
16
+ classifier.export_onnx(
17
+ "classifier.onnx",
18
+ opset_version: 14,
19
+ dynamic_axes: true # Allow variable batch size and sequence length
20
+ )
21
+ ```
22
+
23
+ ### Export Text Embedder
24
+
25
+ ```ruby
26
+ embedder = Fine::TextEmbedder.load("my_embedder")
27
+ embedder.export_onnx("embedder.onnx")
28
+ ```
29
+
30
+ ### Export Image Classifier
31
+
32
+ ```ruby
33
+ classifier = Fine::ImageClassifier.load("my_classifier")
34
+ classifier.export_onnx("classifier.onnx")
35
+ ```
36
+
37
+ ### Export LLM
38
+
39
+ ```ruby
40
+ llm = Fine::LLM.load("my_llm")
41
+ llm.export_onnx("llm.onnx")
42
+ ```
43
+
44
+ ### Using the Export Module Directly
45
+
46
+ ```ruby
47
+ Fine::Export.to_onnx(model, "model.onnx", opset_version: 14)
48
+ ```
49
+
50
+ ### ONNX Inference Example (Python)
51
+
52
+ ```python
53
+ import onnxruntime as ort
54
+ import numpy as np
55
+
56
+ session = ort.InferenceSession("classifier.onnx")
57
+
58
+ # Text classification
59
+ input_ids = np.array([[101, 2054, 2003, 2023, 102]], dtype=np.int64)
60
+ attention_mask = np.array([[1, 1, 1, 1, 1]], dtype=np.int64)
61
+
62
+ outputs = session.run(None, {
63
+ "input_ids": input_ids,
64
+ "attention_mask": attention_mask
65
+ })
66
+ logits = outputs[0]
67
+ ```
68
+
69
+ ## GGUF Export (LLMs Only)
70
+
71
+ GGUF is the format used by llama.cpp, ollama, and other efficient inference engines. It supports various quantization levels to reduce model size and memory usage.
72
+
73
+ ### Basic Export
74
+
75
+ ```ruby
76
+ llm = Fine::LLM.load("my_llm")
77
+ llm.export_gguf("model.gguf")
78
+ ```
79
+
80
+ ### Quantization Options
81
+
82
+ ```ruby
83
+ # F16 - Good balance of quality and size (default)
84
+ llm.export_gguf("model-f16.gguf", quantization: :f16)
85
+
86
+ # Q8 - Smaller, minimal quality loss
87
+ llm.export_gguf("model-q8.gguf", quantization: :q8_0)
88
+
89
+ # Q4 - Smallest, some quality loss
90
+ llm.export_gguf("model-q4.gguf", quantization: :q4_0)
91
+ ```
92
+
93
+ ### Available Quantization Types
94
+
95
+ | Type | Description | Size Reduction | Quality |
96
+ |------|-------------|----------------|---------|
97
+ | `:f32` | 32-bit float | None | Lossless |
98
+ | `:f16` | 16-bit float | ~50% | Minimal loss |
99
+ | `:q8_0` | 8-bit quantization | ~75% | Very small loss |
100
+ | `:q4_0` | 4-bit quantization | ~87% | Noticeable loss |
101
+ | `:q4_k` | 4-bit K-quant | ~87% | Better than q4_0 |
102
+ | `:q5_k` | 5-bit K-quant | ~84% | Good balance |
103
+ | `:q6_k` | 6-bit K-quant | ~81% | High quality |
104
+
105
+ ### Adding Metadata
106
+
107
+ ```ruby
108
+ llm.export_gguf(
109
+ "model.gguf",
110
+ quantization: :q4_k,
111
+ metadata: {
112
+ "description" => "Fine-tuned on customer support data",
113
+ "version" => "1.0.0",
114
+ "author" => "Your Name"
115
+ }
116
+ )
117
+ ```
118
+
119
+ ### Using with llama.cpp
120
+
121
+ ```bash
122
+ # Run inference
123
+ ./main -m model.gguf -p "What is Ruby?" -n 100
124
+
125
+ # Start a server
126
+ ./server -m model.gguf --host 0.0.0.0 --port 8080
127
+ ```
128
+
129
+ ### Using with Ollama
130
+
131
+ ```bash
132
+ # Create a Modelfile
133
+ echo 'FROM ./model.gguf' > Modelfile
134
+ echo 'PARAMETER temperature 0.7' >> Modelfile
135
+ echo 'SYSTEM "You are a helpful assistant."' >> Modelfile
136
+
137
+ # Create the model
138
+ ollama create my-model -f Modelfile
139
+
140
+ # Run it
141
+ ollama run my-model "What is Ruby?"
142
+ ```
143
+
144
+ ## Best Practices
145
+
146
+ ### Choosing the Right Format
147
+
148
+ | Use Case | Recommended Format |
149
+ |----------|-------------------|
150
+ | Web deployment | ONNX |
151
+ | Mobile apps | ONNX (with quantization) |
152
+ | Server inference | ONNX or GGUF |
153
+ | Edge devices | GGUF (quantized) |
154
+ | llama.cpp / ollama | GGUF |
155
+
156
+ ### Choosing Quantization
157
+
158
+ | Priority | Recommended |
159
+ |----------|------------|
160
+ | Quality first | `:f16` or `:q8_0` |
161
+ | Balance | `:q5_k` or `:q6_k` |
162
+ | Size first | `:q4_0` or `:q4_k` |
163
+
164
+ ### Testing Exported Models
165
+
166
+ Always test your exported models to ensure quality:
167
+
168
+ ```ruby
169
+ # Before export - test with Fine
170
+ response = llm.generate("Test prompt")
171
+
172
+ # After export - test with target runtime
173
+ # Compare outputs to ensure quality is acceptable
174
+ ```
175
+
176
+ ## Troubleshooting
177
+
178
+ ### ONNX Export Fails
179
+
180
+ 1. Ensure the model is loaded and trained
181
+ 2. Check that torch.rb ONNX support is available
182
+ 3. Try a different opset_version (11, 13, or 14)
183
+
184
+ ### GGUF Export Issues
185
+
186
+ 1. Only LLMs support GGUF export
187
+ 2. Large models may need more memory during export
188
+ 3. Some model architectures may need custom tensor mappings
189
+
190
+ ### Large File Sizes
191
+
192
+ Use quantization to reduce file size:
193
+
194
+ ```ruby
195
+ # ONNX with INT8 quantization
196
+ classifier.export_onnx("model.onnx", quantize: :int8)
197
+
198
+ # GGUF with Q4 quantization
199
+ llm.export_gguf("model.gguf", quantization: :q4_0)
200
+ ```
@@ -0,0 +1,130 @@
1
+ # Fine-tuning SigLIP2 for Image Classification
2
+
3
+ Classify images into predefined categories (e.g., cats vs dogs, product types, document categories).
4
+
5
+ ## When to Use This
6
+
7
+ - You have images that belong to distinct categories
8
+ - You want to automatically sort or label images
9
+ - Examples: photo organization, content moderation, product categorization
10
+
11
+ ## Dataset Structure
12
+
13
+ Organize images in folders named after each class:
14
+
15
+ ```
16
+ data/
17
+ train/
18
+ cats/
19
+ cat001.jpg
20
+ cat002.jpg
21
+ ...
22
+ dogs/
23
+ dog001.jpg
24
+ dog002.jpg
25
+ ...
26
+ val/
27
+ cats/
28
+ cat101.jpg
29
+ dogs/
30
+ dog101.jpg
31
+ ```
32
+
33
+ **Tips:**
34
+ - Aim for at least 20-50 images per class for decent results
35
+ - More images = better accuracy
36
+ - Balance classes roughly equally
37
+ - Use clear, representative images
38
+
39
+ ## Basic Training
40
+
41
+ ```ruby
42
+ require "fine"
43
+
44
+ classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224")
45
+
46
+ classifier.fit(
47
+ train_dir: "data/train",
48
+ val_dir: "data/val",
49
+ epochs: 3
50
+ )
51
+
52
+ classifier.save("models/my_classifier")
53
+ ```
54
+
55
+ ## Training with Configuration
56
+
57
+ ```ruby
58
+ classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
59
+ # Training settings
60
+ config.epochs = 5
61
+ config.batch_size = 16 # Lower if running out of memory
62
+ config.learning_rate = 2e-4 # Default works well for most cases
63
+
64
+ # Freeze encoder for faster training (less accurate)
65
+ config.freeze_encoder = false # Set true for feature extraction only
66
+
67
+ # Track progress
68
+ config.on_epoch_end do |epoch, metrics|
69
+ puts "Epoch #{epoch + 1}: accuracy=#{(metrics[:accuracy] * 100).round(1)}%"
70
+ end
71
+ end
72
+
73
+ classifier.fit(train_dir: "data/train", val_dir: "data/val")
74
+ ```
75
+
76
+ ## Making Predictions
77
+
78
+ ```ruby
79
+ # Load trained model
80
+ classifier = Fine::ImageClassifier.load("models/my_classifier")
81
+
82
+ # Single image
83
+ results = classifier.predict("photo.jpg")
84
+ puts results.first[:label] # => "cats"
85
+ puts results.first[:score] # => 0.95
86
+
87
+ # Multiple images
88
+ results = classifier.predict(["photo1.jpg", "photo2.jpg", "photo3.jpg"])
89
+ results.each_with_index do |preds, i|
90
+ puts "Image #{i + 1}: #{preds.first[:label]}"
91
+ end
92
+ ```
93
+
94
+ ## Using Without Validation Set
95
+
96
+ If you don't have a separate validation set:
97
+
98
+ ```ruby
99
+ classifier.fit_with_split(
100
+ data_dir: "data/all_images",
101
+ val_split: 0.2, # Use 20% for validation
102
+ epochs: 3
103
+ )
104
+ ```
105
+
106
+ ## Model Selection
107
+
108
+ | Model | Size | Speed | Accuracy | Use Case |
109
+ |-------|------|-------|----------|----------|
110
+ | `siglip2-base-patch16-224` | 86M | Fast | Good | Quick experiments, limited GPU |
111
+ | `siglip2-base-patch16-384` | 86M | Medium | Better | Production with good GPU |
112
+ | `siglip2-large-patch16-256` | 303M | Slower | Best | Maximum accuracy |
113
+
114
+ ## Troubleshooting
115
+
116
+ **Out of memory:**
117
+ - Reduce `batch_size` (try 8 or 4)
118
+ - Use a smaller model variant
119
+ - Use `freeze_encoder = true`
120
+
121
+ **Low accuracy:**
122
+ - Add more training images
123
+ - Train for more epochs
124
+ - Check for mislabeled images
125
+ - Ensure classes are visually distinct
126
+
127
+ **Overfitting (train accuracy high, val accuracy low):**
128
+ - Add more training data
129
+ - Reduce epochs
130
+ - Use `freeze_encoder = true`