fine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +167 -0
- data/LICENSE +21 -0
- data/README.md +212 -0
- data/Rakefile +6 -0
- data/docs/installation.md +151 -0
- data/docs/tutorials/llm-fine-tuning.md +246 -0
- data/docs/tutorials/model-export.md +200 -0
- data/docs/tutorials/siglip2-image-classification.md +130 -0
- data/docs/tutorials/siglip2-object-recognition.md +203 -0
- data/docs/tutorials/siglip2-similarity-search.md +152 -0
- data/docs/tutorials/text-classification.md +233 -0
- data/docs/tutorials/text-embeddings.md +211 -0
- data/examples/basic_classification.rb +70 -0
- data/examples/data/tool_calls.jsonl +30 -0
- data/examples/demo_training.rb +78 -0
- data/examples/finetune_gemma3_tools.rb +135 -0
- data/examples/real_llm_test.rb +128 -0
- data/examples/real_text_classification_test.rb +90 -0
- data/examples/real_text_embedder_test.rb +110 -0
- data/examples/real_training_test.rb +88 -0
- data/examples/test_export.rb +28 -0
- data/examples/test_image_classifier.rb +79 -0
- data/examples/test_llm.rb +100 -0
- data/examples/test_text_classifier.rb +59 -0
- data/lib/fine/callbacks/base.rb +140 -0
- data/lib/fine/callbacks/progress_bar.rb +66 -0
- data/lib/fine/configuration.rb +106 -0
- data/lib/fine/datasets/data_loader.rb +63 -0
- data/lib/fine/datasets/image_dataset.rb +203 -0
- data/lib/fine/datasets/instruction_dataset.rb +226 -0
- data/lib/fine/datasets/text_data_loader.rb +88 -0
- data/lib/fine/datasets/text_dataset.rb +266 -0
- data/lib/fine/error.rb +49 -0
- data/lib/fine/export/gguf_exporter.rb +424 -0
- data/lib/fine/export/onnx_exporter.rb +249 -0
- data/lib/fine/export.rb +53 -0
- data/lib/fine/hub/config_loader.rb +145 -0
- data/lib/fine/hub/model_downloader.rb +136 -0
- data/lib/fine/hub/safetensors_loader.rb +108 -0
- data/lib/fine/image_classifier.rb +256 -0
- data/lib/fine/llm.rb +336 -0
- data/lib/fine/models/base.rb +48 -0
- data/lib/fine/models/bert_encoder.rb +202 -0
- data/lib/fine/models/bert_for_sequence_classification.rb +226 -0
- data/lib/fine/models/causal_lm.rb +279 -0
- data/lib/fine/models/classification_head.rb +24 -0
- data/lib/fine/models/gemma3_decoder.rb +244 -0
- data/lib/fine/models/llama_decoder.rb +297 -0
- data/lib/fine/models/sentence_transformer.rb +202 -0
- data/lib/fine/models/siglip2_for_image_classification.rb +155 -0
- data/lib/fine/models/siglip2_vision_encoder.rb +190 -0
- data/lib/fine/text_classifier.rb +250 -0
- data/lib/fine/text_embedder.rb +221 -0
- data/lib/fine/tokenizers/auto_tokenizer.rb +208 -0
- data/lib/fine/training/llm_trainer.rb +212 -0
- data/lib/fine/training/text_trainer.rb +275 -0
- data/lib/fine/training/trainer.rb +194 -0
- data/lib/fine/transforms/compose.rb +28 -0
- data/lib/fine/transforms/normalize.rb +33 -0
- data/lib/fine/transforms/resize.rb +35 -0
- data/lib/fine/transforms/to_tensor.rb +53 -0
- data/lib/fine/version.rb +3 -0
- data/lib/fine.rb +112 -0
- data/mise.toml +2 -0
- metadata +240 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: a12cf37fd90bb1744c5d4a91863406a7bd02742df9596113dce12e847874e969
|
|
4
|
+
data.tar.gz: 7fba7fea8eef802257b8f20949a78befe21baad74757ab28426c147c508d4009
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7a6383340fc114a158e34bc905dddc24865b0ace2d4fa771abd129ca0a5bb9cbeb372682b181329ad33daffa447facf426a65cd275580f59951c4e22791e0747
|
|
7
|
+
data.tar.gz: e8ae855256a5cd1fd2d3467177f3990db3081ff416e36931968f75b1f2578fa979fe7699d38d288fcbd35d3ab7bfa3cb545dfbee697f4b02e1a113375307f1a6
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.1.0] - 2025-01-26
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- **Image Classification** with SigLIP2 models
|
|
10
|
+
- `Fine::ImageClassifier` for training and inference
|
|
11
|
+
- Support for `google/siglip2-base-patch16-224` and other SigLIP2 variants
|
|
12
|
+
- Directory-based dataset loading with automatic class discovery
|
|
13
|
+
|
|
14
|
+
- **Text Classification** with BERT-family models
|
|
15
|
+
- `Fine::TextClassifier` for sentiment, intent, and category classification
|
|
16
|
+
- Support for DistilBERT, BERT, DeBERTa models
|
|
17
|
+
- JSONL dataset format with `text` and `label` fields
|
|
18
|
+
|
|
19
|
+
- **Text Embeddings** with Sentence Transformers
|
|
20
|
+
- `Fine::TextEmbedder` for domain-specific embeddings
|
|
21
|
+
- Support for MiniLM, MPNet, BGE models
|
|
22
|
+
- Contrastive learning with positive/negative pairs
|
|
23
|
+
|
|
24
|
+
- **LLM Fine-tuning** (experimental)
|
|
25
|
+
- `Fine::LLM` for instruction fine-tuning
|
|
26
|
+
- Support for Llama, Gemma, Qwen architectures
|
|
27
|
+
- Alpaca-format dataset support
|
|
28
|
+
|
|
29
|
+
- **Model Export**
|
|
30
|
+
- ONNX export for all model types
|
|
31
|
+
- GGUF export for LLMs (llama.cpp/Ollama compatible)
|
|
32
|
+
|
|
33
|
+
- **Infrastructure**
|
|
34
|
+
- HuggingFace Hub integration with automatic model downloads
|
|
35
|
+
- SafeTensors weight loading
|
|
36
|
+
- Block-based configuration DSL
|
|
37
|
+
- Progress bar callbacks
|
|
38
|
+
- MPS (Apple Silicon) and CUDA support
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
fine (0.1.0)
|
|
5
|
+
down (>= 5.0)
|
|
6
|
+
ruby-vips (>= 2.1)
|
|
7
|
+
safetensors (>= 0.1)
|
|
8
|
+
tokenizers (>= 0.4)
|
|
9
|
+
torch-rb (>= 0.17)
|
|
10
|
+
tty-progressbar (>= 0.18)
|
|
11
|
+
|
|
12
|
+
GEM
|
|
13
|
+
remote: https://rubygems.org/
|
|
14
|
+
specs:
|
|
15
|
+
addressable (2.8.8)
|
|
16
|
+
public_suffix (>= 2.0.2, < 8.0)
|
|
17
|
+
ast (2.4.3)
|
|
18
|
+
diff-lcs (1.6.2)
|
|
19
|
+
down (5.4.2)
|
|
20
|
+
addressable (~> 2.8)
|
|
21
|
+
ffi (1.17.3-aarch64-linux-gnu)
|
|
22
|
+
ffi (1.17.3-aarch64-linux-musl)
|
|
23
|
+
ffi (1.17.3-arm64-darwin)
|
|
24
|
+
ffi (1.17.3-x86_64-darwin)
|
|
25
|
+
ffi (1.17.3-x86_64-linux-gnu)
|
|
26
|
+
ffi (1.17.3-x86_64-linux-musl)
|
|
27
|
+
json (2.18.0)
|
|
28
|
+
language_server-protocol (3.17.0.5)
|
|
29
|
+
lint_roller (1.1.0)
|
|
30
|
+
logger (1.7.0)
|
|
31
|
+
parallel (1.27.0)
|
|
32
|
+
parser (3.3.10.1)
|
|
33
|
+
ast (~> 2.4.1)
|
|
34
|
+
racc
|
|
35
|
+
prism (1.8.0)
|
|
36
|
+
public_suffix (7.0.2)
|
|
37
|
+
racc (1.8.1)
|
|
38
|
+
rainbow (3.1.1)
|
|
39
|
+
rake (13.3.1)
|
|
40
|
+
regexp_parser (2.11.3)
|
|
41
|
+
rice (4.9.1)
|
|
42
|
+
rspec (3.13.2)
|
|
43
|
+
rspec-core (~> 3.13.0)
|
|
44
|
+
rspec-expectations (~> 3.13.0)
|
|
45
|
+
rspec-mocks (~> 3.13.0)
|
|
46
|
+
rspec-core (3.13.6)
|
|
47
|
+
rspec-support (~> 3.13.0)
|
|
48
|
+
rspec-expectations (3.13.5)
|
|
49
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
50
|
+
rspec-support (~> 3.13.0)
|
|
51
|
+
rspec-mocks (3.13.7)
|
|
52
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
53
|
+
rspec-support (~> 3.13.0)
|
|
54
|
+
rspec-support (3.13.6)
|
|
55
|
+
rubocop (1.82.1)
|
|
56
|
+
json (~> 2.3)
|
|
57
|
+
language_server-protocol (~> 3.17.0.2)
|
|
58
|
+
lint_roller (~> 1.1.0)
|
|
59
|
+
parallel (~> 1.10)
|
|
60
|
+
parser (>= 3.3.0.2)
|
|
61
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
62
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
63
|
+
rubocop-ast (>= 1.48.0, < 2.0)
|
|
64
|
+
ruby-progressbar (~> 1.7)
|
|
65
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
66
|
+
rubocop-ast (1.49.0)
|
|
67
|
+
parser (>= 3.3.7.2)
|
|
68
|
+
prism (~> 1.7)
|
|
69
|
+
ruby-progressbar (1.13.0)
|
|
70
|
+
ruby-vips (2.3.0)
|
|
71
|
+
ffi (~> 1.12)
|
|
72
|
+
logger
|
|
73
|
+
safetensors (0.2.2-aarch64-linux)
|
|
74
|
+
safetensors (0.2.2-aarch64-linux-musl)
|
|
75
|
+
safetensors (0.2.2-arm64-darwin)
|
|
76
|
+
safetensors (0.2.2-x86_64-darwin)
|
|
77
|
+
safetensors (0.2.2-x86_64-linux)
|
|
78
|
+
safetensors (0.2.2-x86_64-linux-musl)
|
|
79
|
+
strings-ansi (0.2.0)
|
|
80
|
+
tokenizers (0.6.3-aarch64-linux)
|
|
81
|
+
tokenizers (0.6.3-aarch64-linux-musl)
|
|
82
|
+
tokenizers (0.6.3-arm64-darwin)
|
|
83
|
+
tokenizers (0.6.3-x86_64-darwin)
|
|
84
|
+
tokenizers (0.6.3-x86_64-linux)
|
|
85
|
+
tokenizers (0.6.3-x86_64-linux-musl)
|
|
86
|
+
torch-rb (0.23.0)
|
|
87
|
+
rice (>= 4.8)
|
|
88
|
+
tty-cursor (0.7.1)
|
|
89
|
+
tty-progressbar (0.18.3)
|
|
90
|
+
strings-ansi (~> 0.2)
|
|
91
|
+
tty-cursor (~> 0.7)
|
|
92
|
+
tty-screen (~> 0.8)
|
|
93
|
+
unicode-display_width (>= 1.6, < 3.0)
|
|
94
|
+
tty-screen (0.8.2)
|
|
95
|
+
unicode-display_width (2.6.0)
|
|
96
|
+
|
|
97
|
+
PLATFORMS
|
|
98
|
+
aarch64-linux
|
|
99
|
+
aarch64-linux-gnu
|
|
100
|
+
aarch64-linux-musl
|
|
101
|
+
arm64-darwin
|
|
102
|
+
x86_64-darwin
|
|
103
|
+
x86_64-linux
|
|
104
|
+
x86_64-linux-gnu
|
|
105
|
+
x86_64-linux-musl
|
|
106
|
+
|
|
107
|
+
DEPENDENCIES
|
|
108
|
+
fine!
|
|
109
|
+
rake (~> 13.0)
|
|
110
|
+
rspec (~> 3.12)
|
|
111
|
+
rubocop (~> 1.50)
|
|
112
|
+
|
|
113
|
+
CHECKSUMS
|
|
114
|
+
addressable (2.8.8) sha256=7c13b8f9536cf6364c03b9d417c19986019e28f7c00ac8132da4eb0fe393b057
|
|
115
|
+
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
116
|
+
diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
|
|
117
|
+
down (5.4.2) sha256=516e5e01e7a96214a7e2cd155aac6f700593038ae6c857c0f4a05413b1c58acf
|
|
118
|
+
ffi (1.17.3-aarch64-linux-gnu) sha256=28ad573df26560f0aedd8a90c3371279a0b2bd0b4e834b16a2baa10bd7a97068
|
|
119
|
+
ffi (1.17.3-aarch64-linux-musl) sha256=020b33b76775b1abacc3b7d86b287cef3251f66d747092deec592c7f5df764b2
|
|
120
|
+
ffi (1.17.3-arm64-darwin) sha256=0c690555d4cee17a7f07c04d59df39b2fba74ec440b19da1f685c6579bb0717f
|
|
121
|
+
ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5
|
|
122
|
+
ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
|
|
123
|
+
ffi (1.17.3-x86_64-linux-musl) sha256=086b221c3a68320b7564066f46fed23449a44f7a1935f1fe5a245bd89d9aea56
|
|
124
|
+
fine (0.1.0)
|
|
125
|
+
json (2.18.0) sha256=b10506aee4183f5cf49e0efc48073d7b75843ce3782c68dbeb763351c08fd505
|
|
126
|
+
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
127
|
+
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
128
|
+
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
129
|
+
parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
|
|
130
|
+
parser (3.3.10.1) sha256=06f6a725d2cd91e5e7f2b7c32ba143631e1f7c8ae2fb918fc4cebec187e6a688
|
|
131
|
+
prism (1.8.0) sha256=84453a16ef5530ea62c5f03ec16b52a459575ad4e7b9c2b360fd8ce2c39c1254
|
|
132
|
+
public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
|
|
133
|
+
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
134
|
+
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
135
|
+
rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
|
|
136
|
+
regexp_parser (2.11.3) sha256=ca13f381a173b7a93450e53459075c9b76a10433caadcb2f1180f2c741fc55a4
|
|
137
|
+
rice (4.9.1) sha256=f915b35a3a7f51af83a3dd173da387da56bfe6f0dcf02ed0b3d2889c531b2bbd
|
|
138
|
+
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
139
|
+
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
140
|
+
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
141
|
+
rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
|
|
142
|
+
rspec-support (3.13.6) sha256=2e8de3702427eab064c9352fe74488cc12a1bfae887ad8b91cba480ec9f8afb2
|
|
143
|
+
rubocop (1.82.1) sha256=09f1a6a654a960eda767aebea33e47603080f8e9c9a3f019bf9b94c9cab5e273
|
|
144
|
+
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
145
|
+
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
146
|
+
ruby-vips (2.3.0) sha256=e685ec02c13969912debbd98019e50492e12989282da5f37d05f5471442f5374
|
|
147
|
+
safetensors (0.2.2-aarch64-linux) sha256=5b50146d50a76fe0395b7aef4d13a1da8fcad44e9cf0f5aead935d5d17fb04dd
|
|
148
|
+
safetensors (0.2.2-aarch64-linux-musl) sha256=d6dea4e4f5ca11cff8ba4c017382838df5d33d78f79fabd9a5e5e482aa6afd57
|
|
149
|
+
safetensors (0.2.2-arm64-darwin) sha256=19d77df47154038974f76a4e1bac2d778ea04ca2c49abcd5b9f9c0f1a899d10b
|
|
150
|
+
safetensors (0.2.2-x86_64-darwin) sha256=a1dc2b415f6ef35c8887b15a6f72c673f3b008455c33aa399154c0eabf5adbcd
|
|
151
|
+
safetensors (0.2.2-x86_64-linux) sha256=f447d3d3110a7592b521a23f58b0251283659b27f3700ab627ac6ba517fa04ff
|
|
152
|
+
safetensors (0.2.2-x86_64-linux-musl) sha256=0d52871f2b672485cda73bc94807bb6bd74409a33414fdc341950ceb88f76049
|
|
153
|
+
strings-ansi (0.2.0) sha256=90262d760ea4a94cc2ae8d58205277a343409c288cbe7c29416b1826bd511c88
|
|
154
|
+
tokenizers (0.6.3-aarch64-linux) sha256=9d54a23f2e2246cc942d183af4549e3972b937d9b01f7a387cb146bf698eee84
|
|
155
|
+
tokenizers (0.6.3-aarch64-linux-musl) sha256=c178d8556769256857d77fb396f8ab004b29d058f59c620a2cfc56b01b501e27
|
|
156
|
+
tokenizers (0.6.3-arm64-darwin) sha256=29a6a5582dce106d846a906ee9e4254c12db45a3855c3ff6881d4be8be03e6b6
|
|
157
|
+
tokenizers (0.6.3-x86_64-darwin) sha256=4b71386cc08ceff5f86b448c74b2b297c00a280a1d502399b6cda23ef94e01fd
|
|
158
|
+
tokenizers (0.6.3-x86_64-linux) sha256=77a45cbde59daac33bdda1a74d45c18080478992a00ee7d898e7b8d15d0b3149
|
|
159
|
+
tokenizers (0.6.3-x86_64-linux-musl) sha256=a4b08c53bf0c8f7674c3abd03e013f0bb7c0c2457174b116c2872a37c64f0297
|
|
160
|
+
torch-rb (0.23.0) sha256=618f19523ff6d27da0cb001d6b32e041c05eb796a4e016b4c48b7f0736aa1d5f
|
|
161
|
+
tty-cursor (0.7.1) sha256=79534185e6a777888d88628b14b6a1fdf5154a603f285f80b1753e1908e0bf48
|
|
162
|
+
tty-progressbar (0.18.3) sha256=6cbb4260e55e74a9180d502143eb6a467d2c8e51bf5f3c9509fb5cacc3d4a5f6
|
|
163
|
+
tty-screen (0.8.2) sha256=c090652115beae764336c28802d633f204fb84da93c6a968aa5d8e319e819b50
|
|
164
|
+
unicode-display_width (2.6.0) sha256=12279874bba6d5e4d2728cef814b19197dbb10d7a7837a869bab65da943b7f5a
|
|
165
|
+
|
|
166
|
+
BUNDLED WITH
|
|
167
|
+
4.0.3
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Chris Hasinski
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# Fine
|
|
2
|
+
|
|
3
|
+
Fine-tune machine learning models with Ruby.
|
|
4
|
+
|
|
5
|
+
```ruby
|
|
6
|
+
classifier = Fine::TextClassifier.new("distilbert-base-uncased")
|
|
7
|
+
classifier.fit(train_file: "reviews.jsonl", epochs: 3)
|
|
8
|
+
classifier.predict("This product is amazing!")
|
|
9
|
+
# => [{ label: "positive", score: 0.97 }]
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## I want to fine-tune...
|
|
13
|
+
|
|
14
|
+
### Text classification (sentiment, spam, intent)
|
|
15
|
+
|
|
16
|
+
Classify text into categories—reviews, support tickets, chat messages.
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
classifier = Fine::TextClassifier.new("distilbert-base-uncased")
|
|
20
|
+
classifier.fit(train_file: "data/reviews.jsonl", epochs: 3)
|
|
21
|
+
|
|
22
|
+
classifier.predict("Terrible experience, waste of money")
|
|
23
|
+
# => [{ label: "negative", score: 0.94 }]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
[Full tutorial: Text Classification](docs/tutorials/text-classification.md)
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
### Text embeddings for semantic search
|
|
31
|
+
|
|
32
|
+
Train embeddings for your domain—legal docs, support tickets, product catalog.
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
embedder = Fine::TextEmbedder.new("sentence-transformers/all-MiniLM-L6-v2")
|
|
36
|
+
embedder.fit(train_file: "data/pairs.jsonl", epochs: 3)
|
|
37
|
+
|
|
38
|
+
embedding = embedder.encode("How do I cancel my subscription?")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
[Full tutorial: Text Embeddings](docs/tutorials/text-embeddings.md)
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
### Image classification
|
|
46
|
+
|
|
47
|
+
Classify images into categories (cats vs dogs, products, documents).
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224")
|
|
51
|
+
classifier.fit(train_dir: "data/train", val_dir: "data/val", epochs: 3)
|
|
52
|
+
|
|
53
|
+
classifier.predict("photo.jpg")
|
|
54
|
+
# => [{ label: "cat", score: 0.95 }]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
[Full tutorial: Image Classification](docs/tutorials/siglip2-image-classification.md)
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
### Image recognition for custom objects
|
|
62
|
+
|
|
63
|
+
Teach the model to recognize your products, logos, or custom objects.
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-384") do |config|
|
|
67
|
+
config.epochs = 5
|
|
68
|
+
config.learning_rate = 1e-4
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
classifier.fit(train_dir: "products/train")
|
|
72
|
+
classifier.save("models/product_detector")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
[Full tutorial: Object Recognition](docs/tutorials/siglip2-object-recognition.md)
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### Image similarity search
|
|
80
|
+
|
|
81
|
+
Find visually similar images in your catalog.
|
|
82
|
+
|
|
83
|
+
```ruby
|
|
84
|
+
encoder = model.encoder
|
|
85
|
+
embedding = encoder.call(image_tensor)
|
|
86
|
+
similarity = cosine_similarity(embedding1, embedding2)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
[Full tutorial: Similarity Search](docs/tutorials/siglip2-similarity-search.md)
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
### LLMs
|
|
94
|
+
|
|
95
|
+
Fine-tune Gemma, Llama, Qwen and other open models for custom tasks.
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
llm = Fine::LLM.new("meta-llama/Llama-3.2-1B")
|
|
99
|
+
llm.fit(train_file: "instructions.jsonl", epochs: 3)
|
|
100
|
+
|
|
101
|
+
llm.generate("Explain Ruby blocks")
|
|
102
|
+
# => "A Ruby block is a chunk of code that can be passed to a method..."
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
[Full tutorial: LLM Fine-tuning](docs/tutorials/llm-fine-tuning.md)
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Installation
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
gem 'fine'
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Requires Ruby 3.1+, LibTorch, and libvips.
|
|
116
|
+
|
|
117
|
+
[Full installation guide](docs/installation.md)
|
|
118
|
+
|
|
119
|
+
**Quick setup (macOS):**
|
|
120
|
+
```bash
|
|
121
|
+
brew install pytorch libvips
|
|
122
|
+
bundle install
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Supported Models
|
|
126
|
+
|
|
127
|
+
**Text Classification**
|
|
128
|
+
|
|
129
|
+
| Model | Parameters | Speed | Quality |
|
|
130
|
+
|-------|------------|-------|---------|
|
|
131
|
+
| `distilbert-base-uncased` | 66M | Fast | Good |
|
|
132
|
+
| `bert-base-uncased` | 110M | Medium | Better |
|
|
133
|
+
| `microsoft/deberta-v3-small` | 44M | Fast | Great |
|
|
134
|
+
| `microsoft/deberta-v3-base` | 86M | Medium | Best |
|
|
135
|
+
|
|
136
|
+
**Text Embeddings**
|
|
137
|
+
|
|
138
|
+
| Model | Parameters | Best For |
|
|
139
|
+
|-------|------------|----------|
|
|
140
|
+
| `sentence-transformers/all-MiniLM-L6-v2` | 22M | Fast, general purpose |
|
|
141
|
+
| `sentence-transformers/all-mpnet-base-v2` | 110M | Better quality |
|
|
142
|
+
| `BAAI/bge-small-en-v1.5` | 33M | Retrieval optimized |
|
|
143
|
+
| `BAAI/bge-base-en-v1.5` | 110M | Best retrieval |
|
|
144
|
+
|
|
145
|
+
**Vision (SigLIP2)**
|
|
146
|
+
|
|
147
|
+
| Model | Parameters | Best For |
|
|
148
|
+
|-------|------------|----------|
|
|
149
|
+
| `google/siglip2-base-patch16-224` | 86M | Quick experiments |
|
|
150
|
+
| `google/siglip2-base-patch16-384` | 86M | Good balance |
|
|
151
|
+
| `google/siglip2-large-patch16-256` | 303M | Maximum accuracy |
|
|
152
|
+
| `google/siglip2-so400m-patch14-224` | 400M | Best quality |
|
|
153
|
+
|
|
154
|
+
**LLMs**
|
|
155
|
+
|
|
156
|
+
| Model | Parameters | Best For |
|
|
157
|
+
|-------|------------|----------|
|
|
158
|
+
| `meta-llama/Llama-3.2-1B` | 1B | Fast experiments |
|
|
159
|
+
| `google/gemma-2b` | 2B | Good balance |
|
|
160
|
+
| `Qwen/Qwen2-1.5B` | 1.5B | Multilingual |
|
|
161
|
+
| `mistralai/Mistral-7B-v0.1` | 7B | Best quality |
|
|
162
|
+
|
|
163
|
+
## Configuration
|
|
164
|
+
|
|
165
|
+
```ruby
|
|
166
|
+
Fine::TextClassifier.new("distilbert-base-uncased") do |config|
|
|
167
|
+
config.epochs = 3
|
|
168
|
+
config.batch_size = 16
|
|
169
|
+
config.learning_rate = 2e-5
|
|
170
|
+
config.max_length = 256
|
|
171
|
+
|
|
172
|
+
config.on_epoch_end do |epoch, metrics|
|
|
173
|
+
puts "Epoch #{epoch}: #{metrics[:accuracy]}"
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Export for Deployment
|
|
179
|
+
|
|
180
|
+
Export fine-tuned models to production formats.
|
|
181
|
+
|
|
182
|
+
**ONNX** - For ONNX Runtime, TensorRT, OpenVINO:
|
|
183
|
+
|
|
184
|
+
```ruby
|
|
185
|
+
classifier.export_onnx("model.onnx")
|
|
186
|
+
embedder.export_onnx("embedder.onnx")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**GGUF** - For llama.cpp, ollama (LLMs only):
|
|
190
|
+
|
|
191
|
+
```ruby
|
|
192
|
+
llm.export_gguf("model.gguf", quantization: :q4_0)
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
[Full tutorial: Model Export](docs/tutorials/model-export.md)
|
|
196
|
+
|
|
197
|
+
## Roadmap
|
|
198
|
+
|
|
199
|
+
- [x] SigLIP2 image classification
|
|
200
|
+
- [x] Text classification (BERT, DeBERTa)
|
|
201
|
+
- [x] Text embedding models
|
|
202
|
+
- [x] LLM fine-tuning (Gemma, Llama, Qwen)
|
|
203
|
+
- [x] ONNX & GGUF export
|
|
204
|
+
- [ ] LoRA/QLoRA fine-tuning
|
|
205
|
+
|
|
206
|
+
## Contributing
|
|
207
|
+
|
|
208
|
+
Bug reports and pull requests welcome at [github.com/khasinski/fine](https://github.com/khasinski/fine).
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
MIT
|
data/Rakefile
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Installation
|
|
2
|
+
|
|
3
|
+
## Requirements
|
|
4
|
+
|
|
5
|
+
- Ruby 3.1+
|
|
6
|
+
- LibTorch 2.x
|
|
7
|
+
- libvips (for image processing)
|
|
8
|
+
|
|
9
|
+
## Quick Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install system dependencies (macOS)
|
|
13
|
+
brew install pytorch libvips
|
|
14
|
+
|
|
15
|
+
# Install system dependencies (Ubuntu/Debian)
|
|
16
|
+
# sudo apt-get install libvips-dev
|
|
17
|
+
# Download LibTorch from https://pytorch.org/get-started/locally/
|
|
18
|
+
|
|
19
|
+
# Add to your Gemfile
|
|
20
|
+
gem 'fine'
|
|
21
|
+
|
|
22
|
+
# Install
|
|
23
|
+
bundle install
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Detailed Setup
|
|
27
|
+
|
|
28
|
+
### 1. Install LibTorch
|
|
29
|
+
|
|
30
|
+
**macOS (Homebrew):**
|
|
31
|
+
```bash
|
|
32
|
+
brew install pytorch
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**Linux:**
|
|
36
|
+
Download from [pytorch.org](https://pytorch.org/get-started/locally/) and set:
|
|
37
|
+
```bash
|
|
38
|
+
export LIBTORCH=/path/to/libtorch
|
|
39
|
+
export LD_LIBRARY_PATH=$LIBTORCH/lib:$LD_LIBRARY_PATH
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Verify:**
|
|
43
|
+
```bash
|
|
44
|
+
# Should show libtorch path
|
|
45
|
+
ls $(brew --prefix pytorch)/lib/libtorch* 2>/dev/null || echo "Check LibTorch installation"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### 2. Install libvips
|
|
49
|
+
|
|
50
|
+
**macOS:**
|
|
51
|
+
```bash
|
|
52
|
+
brew install vips
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
**Ubuntu/Debian:**
|
|
56
|
+
```bash
|
|
57
|
+
sudo apt-get install libvips-dev
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Verify:**
|
|
61
|
+
```bash
|
|
62
|
+
vips --version
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 3. Install the Gem
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
# Gemfile
|
|
69
|
+
gem 'fine'
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
bundle install
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 4. Verify Installation
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
require 'fine'
|
|
80
|
+
|
|
81
|
+
puts Fine::VERSION
|
|
82
|
+
puts "Torch available: #{defined?(Torch)}"
|
|
83
|
+
puts "Device: #{Fine.device}"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Troubleshooting
|
|
87
|
+
|
|
88
|
+
### LibTorch Version Mismatch
|
|
89
|
+
|
|
90
|
+
Error: `Incompatible LibTorch version`
|
|
91
|
+
|
|
92
|
+
The torch-rb gem requires a specific LibTorch version. Check compatibility:
|
|
93
|
+
```bash
|
|
94
|
+
# Check your LibTorch version
|
|
95
|
+
python3 -c "import torch; print(torch.__version__)"
|
|
96
|
+
|
|
97
|
+
# Or check brew
|
|
98
|
+
brew info pytorch
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Update LibTorch or pin torch-rb to a compatible version.
|
|
102
|
+
|
|
103
|
+
### Missing libvips
|
|
104
|
+
|
|
105
|
+
Error: `cannot load such file -- vips`
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# macOS
|
|
109
|
+
brew install vips
|
|
110
|
+
|
|
111
|
+
# Ubuntu
|
|
112
|
+
sudo apt-get install libvips-dev
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Native Extension Build Failures
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# Ensure you have build tools
|
|
119
|
+
xcode-select --install # macOS
|
|
120
|
+
|
|
121
|
+
# Reinstall with verbose output
|
|
122
|
+
gem install torch-rb -- --verbose
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## GPU Support
|
|
126
|
+
|
|
127
|
+
### CUDA (NVIDIA)
|
|
128
|
+
|
|
129
|
+
Ensure CUDA toolkit is installed and LibTorch was built with CUDA support.
|
|
130
|
+
|
|
131
|
+
```ruby
|
|
132
|
+
Fine.configure do |config|
|
|
133
|
+
config.device = "cuda" # or "cuda:0" for specific GPU
|
|
134
|
+
end
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### MPS (Apple Silicon)
|
|
138
|
+
|
|
139
|
+
Automatic on M1/M2/M3 Macs with supported PyTorch:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
puts Fine.device # Should show "mps" on Apple Silicon
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### CPU Only
|
|
146
|
+
|
|
147
|
+
```ruby
|
|
148
|
+
Fine.configure do |config|
|
|
149
|
+
config.device = "cpu"
|
|
150
|
+
end
|
|
151
|
+
```
|