fine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/CHANGELOG.md +38 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +167 -0
  6. data/LICENSE +21 -0
  7. data/README.md +212 -0
  8. data/Rakefile +6 -0
  9. data/docs/installation.md +151 -0
  10. data/docs/tutorials/llm-fine-tuning.md +246 -0
  11. data/docs/tutorials/model-export.md +200 -0
  12. data/docs/tutorials/siglip2-image-classification.md +130 -0
  13. data/docs/tutorials/siglip2-object-recognition.md +203 -0
  14. data/docs/tutorials/siglip2-similarity-search.md +152 -0
  15. data/docs/tutorials/text-classification.md +233 -0
  16. data/docs/tutorials/text-embeddings.md +211 -0
  17. data/examples/basic_classification.rb +70 -0
  18. data/examples/data/tool_calls.jsonl +30 -0
  19. data/examples/demo_training.rb +78 -0
  20. data/examples/finetune_gemma3_tools.rb +135 -0
  21. data/examples/real_llm_test.rb +128 -0
  22. data/examples/real_text_classification_test.rb +90 -0
  23. data/examples/real_text_embedder_test.rb +110 -0
  24. data/examples/real_training_test.rb +88 -0
  25. data/examples/test_export.rb +28 -0
  26. data/examples/test_image_classifier.rb +79 -0
  27. data/examples/test_llm.rb +100 -0
  28. data/examples/test_text_classifier.rb +59 -0
  29. data/lib/fine/callbacks/base.rb +140 -0
  30. data/lib/fine/callbacks/progress_bar.rb +66 -0
  31. data/lib/fine/configuration.rb +106 -0
  32. data/lib/fine/datasets/data_loader.rb +63 -0
  33. data/lib/fine/datasets/image_dataset.rb +203 -0
  34. data/lib/fine/datasets/instruction_dataset.rb +226 -0
  35. data/lib/fine/datasets/text_data_loader.rb +88 -0
  36. data/lib/fine/datasets/text_dataset.rb +266 -0
  37. data/lib/fine/error.rb +49 -0
  38. data/lib/fine/export/gguf_exporter.rb +424 -0
  39. data/lib/fine/export/onnx_exporter.rb +249 -0
  40. data/lib/fine/export.rb +53 -0
  41. data/lib/fine/hub/config_loader.rb +145 -0
  42. data/lib/fine/hub/model_downloader.rb +136 -0
  43. data/lib/fine/hub/safetensors_loader.rb +108 -0
  44. data/lib/fine/image_classifier.rb +256 -0
  45. data/lib/fine/llm.rb +336 -0
  46. data/lib/fine/models/base.rb +48 -0
  47. data/lib/fine/models/bert_encoder.rb +202 -0
  48. data/lib/fine/models/bert_for_sequence_classification.rb +226 -0
  49. data/lib/fine/models/causal_lm.rb +279 -0
  50. data/lib/fine/models/classification_head.rb +24 -0
  51. data/lib/fine/models/gemma3_decoder.rb +244 -0
  52. data/lib/fine/models/llama_decoder.rb +297 -0
  53. data/lib/fine/models/sentence_transformer.rb +202 -0
  54. data/lib/fine/models/siglip2_for_image_classification.rb +155 -0
  55. data/lib/fine/models/siglip2_vision_encoder.rb +190 -0
  56. data/lib/fine/text_classifier.rb +250 -0
  57. data/lib/fine/text_embedder.rb +221 -0
  58. data/lib/fine/tokenizers/auto_tokenizer.rb +208 -0
  59. data/lib/fine/training/llm_trainer.rb +212 -0
  60. data/lib/fine/training/text_trainer.rb +275 -0
  61. data/lib/fine/training/trainer.rb +194 -0
  62. data/lib/fine/transforms/compose.rb +28 -0
  63. data/lib/fine/transforms/normalize.rb +33 -0
  64. data/lib/fine/transforms/resize.rb +35 -0
  65. data/lib/fine/transforms/to_tensor.rb +53 -0
  66. data/lib/fine/version.rb +3 -0
  67. data/lib/fine.rb +112 -0
  68. data/mise.toml +2 -0
  69. metadata +240 -0
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Test LLM components (loading, forward pass, minimal training)
5
+ # Note: Full LLM training requires significant compute
6
+
7
+ require "bundler/setup"
8
+ require "fine"
9
+
10
+ puts "=" * 60
11
+ puts "LLM COMPONENTS TEST"
12
+ puts "=" * 60
13
+
14
+ Fine.configure do |config|
15
+ config.progress_bar = false
16
+ end
17
+
18
+ fixtures_path = File.expand_path("../spec/fixtures/text/instructions.jsonl", __dir__)
19
+
20
+ puts "\n1. Testing LlamaDecoder components..."
21
+
22
+ begin
23
+ # Test with a small config to verify components work
24
+ test_config = Fine::Hub::ConfigLoader.new("/dev/null") rescue nil
25
+
26
+ # Create a minimal config for testing
27
+ class MinimalConfig
28
+ attr_accessor :config
29
+
30
+ def initialize
31
+ @config = {}
32
+ end
33
+
34
+ def vocab_size = 1000
35
+ def hidden_size = 256
36
+ def num_hidden_layers = 2
37
+ def num_attention_heads = 4
38
+ def intermediate_size = 512
39
+ def max_position_embeddings = 128
40
+ def rms_norm_eps = 1e-6
41
+ def rope_theta = 10000.0
42
+ def num_key_value_heads = 4
43
+ def use_bias = false
44
+ def to_h = @config
45
+ end
46
+
47
+ config = MinimalConfig.new
48
+
49
+ puts " Creating LlamaDecoder with small config..."
50
+ decoder = Fine::Models::LlamaDecoder.new(config)
51
+ puts " ✓ LlamaDecoder created"
52
+
53
+ puts " Testing forward pass..."
54
+ input_ids = Torch.randint(0, 1000, [2, 16], dtype: :long) # batch=2, seq=16
55
+ output = decoder.call(input_ids)
56
+ puts " ✓ Forward pass successful"
57
+ puts " Input shape: #{input_ids.shape.to_a}"
58
+ puts " Output shape: #{output[:last_hidden_state].shape.to_a}"
59
+
60
+ puts "\n2. Testing CausalLM wrapper..."
61
+ lm = Fine::Models::CausalLM.new(config)
62
+ output = lm.call(input_ids)
63
+ puts " ✓ CausalLM forward pass successful"
64
+ puts " Logits shape: #{output[:logits].shape.to_a}"
65
+
66
+ puts "\n3. Testing with labels (loss computation)..."
67
+ labels = input_ids.clone
68
+ output = lm.call(input_ids, labels: labels)
69
+ puts " ✓ Loss computed: #{output[:loss].item.round(4)}"
70
+
71
+ puts "\n4. Testing InstructionDataset..."
72
+ tokenizer_mock = Object.new
73
+ def tokenizer_mock.encode(text, **_)
74
+ ids = Array.new(10) { rand(100) }
75
+ { input_ids: [ids], attention_mask: [Array.new(10, 1)] }
76
+ end
77
+ def tokenizer_mock.pad_token_id = 0
78
+ def tokenizer_mock.eos_token_id = 2
79
+
80
+ dataset = Fine::Datasets::InstructionDataset.from_jsonl(
81
+ fixtures_path,
82
+ tokenizer: tokenizer_mock,
83
+ max_length: 32
84
+ )
85
+ puts " ✓ Dataset loaded with #{dataset.size} examples"
86
+
87
+ item = dataset[0]
88
+ puts " Sample item keys: #{item.keys}"
89
+ puts " Input IDs length: #{item[:input_ids].size}"
90
+
91
+ puts "\n5. Testing training step..."
92
+ optimizer = Torch::Optim::Adam.new(lm.parameters, lr: 1e-4)
93
+
94
+ initial_loss = nil
95
+ 3.times do |i|
96
+ optimizer.zero_grad
97
+
98
+ batch_ids = Torch.randint(0, 1000, [2, 16], dtype: :long)
99
+ labels = batch_ids.clone
100
+ output = lm.call(batch_ids, labels: labels)
101
+
102
+ loss = output[:loss]
103
+ initial_loss ||= loss.item
104
+ loss.backward
105
+ optimizer.step
106
+
107
+ puts " Step #{i + 1}: loss=#{loss.item.round(4)}"
108
+ end
109
+
110
+ if output[:loss].item < initial_loss
111
+ puts " ✓ Loss decreased during training"
112
+ else
113
+ puts " ⚠ Loss did not decrease (expected with random data)"
114
+ end
115
+
116
+ puts "\n" + "=" * 60
117
+ puts "LLM COMPONENTS TEST PASSED!"
118
+ puts "=" * 60
119
+
120
+ rescue => e
121
+ puts "\n" + "=" * 60
122
+ puts "LLM COMPONENTS TEST FAILED!"
123
+ puts "=" * 60
124
+ puts "\nError: #{e.class}: #{e.message}"
125
+ puts "\nBacktrace:"
126
+ puts e.backtrace.first(15).join("\n")
127
+ exit 1
128
+ end
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Test real text classification fine-tuning
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "=" * 60
10
+ puts "TEXT CLASSIFICATION FINE-TUNING TEST"
11
+ puts "=" * 60
12
+
13
+ Fine.configure do |config|
14
+ config.progress_bar = false
15
+ end
16
+
17
+ fixtures_path = File.expand_path("../spec/fixtures/text/reviews.jsonl", __dir__)
18
+
19
+ puts "\n1. Setting up TextClassifier with DistilBERT..."
20
+ classifier = Fine::TextClassifier.new("distilbert-base-uncased") do |config|
21
+ config.epochs = 2
22
+ config.batch_size = 4
23
+ config.learning_rate = 5e-5
24
+ config.max_length = 128
25
+ end
26
+
27
+ puts " Epochs: #{classifier.config.epochs}"
28
+ puts " Batch size: #{classifier.config.batch_size}"
29
+ puts " Max length: #{classifier.config.max_length}"
30
+
31
+ puts "\n2. Starting training on #{fixtures_path}..."
32
+ puts " (This will download DistilBERT from HuggingFace if not cached)"
33
+
34
+ begin
35
+ history = classifier.fit(train_file: fixtures_path)
36
+
37
+ puts "\n3. Training completed!"
38
+ puts " Training history:"
39
+ history.each_with_index do |metrics, i|
40
+ puts " Epoch #{i + 1}: loss=#{metrics[:loss].round(4)}, acc=#{(metrics[:accuracy] * 100).round(1)}%"
41
+ end
42
+
43
+ # Check if loss decreased
44
+ if history.size >= 2
45
+ if history.last[:loss] < history.first[:loss]
46
+ puts "\n ✓ Loss decreased from #{history.first[:loss].round(4)} to #{history.last[:loss].round(4)}"
47
+ else
48
+ puts "\n ⚠ Loss did not decrease (may need more epochs or data)"
49
+ end
50
+ end
51
+
52
+ puts "\n4. Testing predictions..."
53
+ test_texts = [
54
+ "This product is amazing and works perfectly!",
55
+ "Terrible quality, broke after one day.",
56
+ "It's okay, nothing special.",
57
+ "Best purchase I've ever made!"
58
+ ]
59
+
60
+ predictions = classifier.predict(test_texts)
61
+ test_texts.each_with_index do |text, i|
62
+ pred = predictions[i].first
63
+ puts " \"#{text[0..40]}...\""
64
+ puts " → #{pred[:label]} (#{(pred[:score] * 100).round(1)}%)"
65
+ end
66
+
67
+ puts "\n5. Saving model..."
68
+ save_path = "/tmp/fine_text_classifier"
69
+ classifier.save(save_path)
70
+ puts " Saved to: #{save_path}"
71
+
72
+ puts "\n6. Loading and re-testing..."
73
+ loaded = Fine::TextClassifier.load(save_path)
74
+ loaded_predictions = loaded.predict(test_texts.first)
75
+ puts " Loaded model prediction for first text:"
76
+ puts " → #{loaded_predictions.first.first[:label]} (#{(loaded_predictions.first.first[:score] * 100).round(1)}%)"
77
+
78
+ puts "\n" + "=" * 60
79
+ puts "TEXT CLASSIFICATION TEST PASSED!"
80
+ puts "=" * 60
81
+
82
+ rescue => e
83
+ puts "\n" + "=" * 60
84
+ puts "TEXT CLASSIFICATION TEST FAILED!"
85
+ puts "=" * 60
86
+ puts "\nError: #{e.class}: #{e.message}"
87
+ puts "\nBacktrace:"
88
+ puts e.backtrace.first(15).join("\n")
89
+ exit 1
90
+ end
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Test text embeddings (without fine-tuning, which requires more data)
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "=" * 60
10
+ puts "TEXT EMBEDDER TEST"
11
+ puts "=" * 60
12
+
13
+ Fine.configure do |config|
14
+ config.progress_bar = false
15
+ end
16
+
17
+ puts "\n1. Loading SentenceTransformer (all-MiniLM-L6-v2)..."
18
+ puts " (This will download from HuggingFace if not cached)"
19
+
20
+ begin
21
+ embedder = Fine::TextEmbedder.new("sentence-transformers/all-MiniLM-L6-v2") do |config|
22
+ config.max_length = 128
23
+ end
24
+
25
+ puts " Model loaded successfully!"
26
+ puts " Embedding dimension: #{embedder.embedding_dim}"
27
+
28
+ puts "\n2. Testing single text encoding..."
29
+ text = "The quick brown fox jumps over the lazy dog."
30
+ embedding = embedder.encode(text)
31
+ puts " Text: \"#{text}\""
32
+ puts " Embedding shape: [#{embedding.size}]"
33
+ puts " First 5 values: #{embedding.first(5).map { |v| v.round(4) }}"
34
+
35
+ puts "\n3. Testing batch encoding..."
36
+ texts = [
37
+ "I love machine learning!",
38
+ "Deep learning is fascinating.",
39
+ "The weather is nice today.",
40
+ "Ruby is a great programming language."
41
+ ]
42
+ embeddings = embedder.encode(texts)
43
+ puts " Encoded #{texts.size} texts"
44
+ puts " Result shapes: #{embeddings.size} x #{embeddings.first.size}"
45
+
46
+ puts "\n4. Testing semantic similarity..."
47
+ pairs = [
48
+ ["I love programming", "Coding is my passion"],
49
+ ["I love programming", "The sky is blue"],
50
+ ["Machine learning is cool", "AI and ML are interesting"],
51
+ ["Dogs are pets", "Cats are animals"]
52
+ ]
53
+
54
+ pairs.each do |text_a, text_b|
55
+ similarity = embedder.similarity(text_a, text_b)
56
+ puts " \"#{text_a[0..25]}...\" vs \"#{text_b[0..25]}...\""
57
+ puts " → Similarity: #{(similarity * 100).round(1)}%"
58
+ end
59
+
60
+ puts "\n5. Testing semantic search..."
61
+ query = "machine learning"
62
+ corpus = [
63
+ "I love pizza",
64
+ "Deep learning is a subset of machine learning",
65
+ "The stock market is volatile",
66
+ "Neural networks can learn complex patterns",
67
+ "Ruby on Rails is a web framework",
68
+ "Artificial intelligence is transforming industries"
69
+ ]
70
+
71
+ results = embedder.search(query, corpus, top_k: 3)
72
+ puts " Query: \"#{query}\""
73
+ puts " Top 3 results:"
74
+ results.each_with_index do |result, i|
75
+ puts " #{i + 1}. \"#{result[:text][0..45]}...\" (#{(result[:score] * 100).round(1)}%)"
76
+ end
77
+
78
+ puts "\n6. Saving and loading model..."
79
+ save_path = "/tmp/fine_text_embedder"
80
+ embedder.save(save_path)
81
+ puts " Saved to: #{save_path}"
82
+
83
+ loaded = Fine::TextEmbedder.load(save_path)
84
+ puts " Loaded successfully!"
85
+
86
+ # Verify embeddings match
87
+ original_emb = embedder.encode("test text")
88
+ loaded_emb = loaded.encode("test text")
89
+ diff = original_emb.zip(loaded_emb).map { |a, b| (a - b).abs }.max
90
+ puts " Max embedding difference: #{diff.round(6)}"
91
+
92
+ if diff < 0.0001
93
+ puts " ✓ Embeddings match!"
94
+ else
95
+ puts " ⚠ Embeddings differ"
96
+ end
97
+
98
+ puts "\n" + "=" * 60
99
+ puts "TEXT EMBEDDER TEST PASSED!"
100
+ puts "=" * 60
101
+
102
+ rescue => e
103
+ puts "\n" + "=" * 60
104
+ puts "TEXT EMBEDDER TEST FAILED!"
105
+ puts "=" * 60
106
+ puts "\nError: #{e.class}: #{e.message}"
107
+ puts "\nBacktrace:"
108
+ puts e.backtrace.first(15).join("\n")
109
+ exit 1
110
+ end
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Actually fine-tune a model to verify training works
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "=" * 60
10
+ puts "REAL FINE-TUNING TEST"
11
+ puts "=" * 60
12
+
13
+ # Disable progress bar for cleaner output
14
+ Fine.configure do |config|
15
+ config.progress_bar = false
16
+ end
17
+
18
+ fixtures_path = File.expand_path("../spec/fixtures/images", __dir__)
19
+
20
+ puts "\n1. Setting up ImageClassifier with SigLIP2..."
21
+ classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
22
+ config.epochs = 2
23
+ config.batch_size = 2
24
+ config.learning_rate = 1e-4
25
+ config.image_size = 224
26
+ config.freeze_encoder = true # Only train classification head
27
+ end
28
+
29
+ puts " Epochs: #{classifier.config.epochs}"
30
+ puts " Batch size: #{classifier.config.batch_size}"
31
+ puts " Learning rate: #{classifier.config.learning_rate}"
32
+
33
+ puts "\n2. Starting training on #{fixtures_path}..."
34
+ puts " (This will download the model from HuggingFace if not cached)"
35
+
36
+ begin
37
+ history = classifier.fit(train_dir: fixtures_path, epochs: 2)
38
+
39
+ puts "\n3. Training completed!"
40
+ puts " Training history:"
41
+ history.each do |epoch_data|
42
+ puts " Epoch #{epoch_data[:epoch]}: loss=#{epoch_data[:loss].round(4)}"
43
+ end
44
+
45
+ # Check if loss decreased
46
+ if history.size >= 2
47
+ if history.last[:loss] < history.first[:loss]
48
+ puts "\n ✓ Loss decreased from #{history.first[:loss].round(4)} to #{history.last[:loss].round(4)}"
49
+ else
50
+ puts "\n ⚠ Loss did not decrease (may need more epochs or data)"
51
+ end
52
+ end
53
+
54
+ puts "\n4. Testing prediction..."
55
+ test_image = Dir.glob(File.join(fixtures_path, "*/*.jpg")).first
56
+ predictions = classifier.predict(test_image)
57
+ puts " Image: #{File.basename(test_image)}"
58
+ puts " Predictions:"
59
+ predictions.first.each do |pred|
60
+ puts " #{pred[:label]}: #{(pred[:score] * 100).round(1)}%"
61
+ end
62
+
63
+ puts "\n5. Saving model..."
64
+ save_path = "/tmp/fine_trained_model"
65
+ classifier.save(save_path)
66
+ puts " Saved to: #{save_path}"
67
+
68
+ puts "\n6. Loading and re-testing..."
69
+ loaded = Fine::ImageClassifier.load(save_path)
70
+ loaded_predictions = loaded.predict(test_image)
71
+ puts " Loaded model predictions:"
72
+ loaded_predictions.first.each do |pred|
73
+ puts " #{pred[:label]}: #{(pred[:score] * 100).round(1)}%"
74
+ end
75
+
76
+ puts "\n" + "=" * 60
77
+ puts "FINE-TUNING TEST PASSED!"
78
+ puts "=" * 60
79
+
80
+ rescue => e
81
+ puts "\n" + "=" * 60
82
+ puts "FINE-TUNING FAILED!"
83
+ puts "=" * 60
84
+ puts "\nError: #{e.class}: #{e.message}"
85
+ puts "\nBacktrace:"
86
+ puts e.backtrace.first(15).join("\n")
87
+ exit 1
88
+ end
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Test export module
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "Testing Fine::Export module..."
10
+ puts "=" * 50
11
+
12
+ puts "\n1. Testing GGUF quantization options..."
13
+ options = Fine::Export.gguf_quantization_options
14
+ puts " Available quantization types: #{options.keys.join(', ')}"
15
+ options.each do |type, desc|
16
+ puts " - #{type}: #{desc}"
17
+ end
18
+
19
+ puts "\n2. Testing GGUF exporter constants..."
20
+ puts " GGUF Magic: 0x#{Fine::Export::GGUFExporter::GGUF_MAGIC.to_s(16).upcase}"
21
+ puts " GGUF Version: #{Fine::Export::GGUFExporter::GGUF_VERSION}"
22
+ puts " Available quantizations: #{Fine::Export::GGUFExporter::QUANTIZATION_TYPES.keys.join(', ')}"
23
+
24
+ puts "\n3. Testing ONNX exporter..."
25
+ puts " Supported types: #{Fine::Export::ONNXExporter::SUPPORTED_TYPES.map(&:to_s).join(', ')}"
26
+
27
+ puts "\n" + "=" * 50
28
+ puts "Export module tests passed!"
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Quick test of image classification with tiny local model
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "Testing Fine::ImageClassifier..."
10
+ puts "=" * 50
11
+
12
+ # Use test fixtures
13
+ fixtures_path = File.expand_path("../spec/fixtures/images", __dir__)
14
+
15
+ puts "\n1. Creating ImageClassifier..."
16
+ classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
17
+ config.epochs = 1
18
+ config.batch_size = 2
19
+ config.learning_rate = 1e-4
20
+ config.image_size = 32 # Small for testing
21
+ config.freeze_encoder = true # Faster training
22
+ end
23
+
24
+ puts " Config: epochs=#{classifier.config.epochs}, batch_size=#{classifier.config.batch_size}"
25
+
26
+ puts "\n2. Loading dataset from #{fixtures_path}..."
27
+ # Just test the dataset loading part without actual training
28
+ # (training requires downloading the model which takes time)
29
+
30
+ transforms = Fine::Transforms::Compose.new([
31
+ Fine::Transforms::Resize.new(32),
32
+ Fine::Transforms::ToTensor.new,
33
+ Fine::Transforms::Normalize.new
34
+ ])
35
+
36
+ dataset = Fine::Datasets::ImageDataset.from_directory(fixtures_path, transforms: transforms)
37
+ puts " Dataset size: #{dataset.size}"
38
+ puts " Classes: #{dataset.class_names.join(', ')}"
39
+ puts " Label map: #{dataset.label_map}"
40
+
41
+ puts "\n3. Testing data loading..."
42
+ item = dataset[0]
43
+ puts " Item keys: #{item.keys.join(', ')}"
44
+ puts " Pixel values shape: #{item[:pixel_values].shape.inspect}"
45
+ puts " Label: #{item[:label]}"
46
+
47
+ puts "\n4. Testing DataLoader..."
48
+ loader = Fine::Datasets::DataLoader.new(dataset, batch_size: 2, shuffle: true)
49
+ batch = loader.first
50
+ puts " Batch pixel_values shape: #{batch[:pixel_values].shape.inspect}"
51
+ puts " Batch labels: #{batch[:labels].to_a}"
52
+
53
+ puts "\n" + "=" * 50
54
+ puts "Basic tests passed!"
55
+ puts "\nNote: Full training requires downloading model weights from HuggingFace."
56
+ puts "Run with DOWNLOAD_MODELS=1 to test full training."
57
+
58
+ if ENV["DOWNLOAD_MODELS"]
59
+ puts "\n" + "=" * 50
60
+ puts "Downloading model and running full training..."
61
+
62
+ begin
63
+ classifier.fit(train_dir: fixtures_path, epochs: 1)
64
+ puts "Training completed!"
65
+
66
+ # Test save
67
+ model_path = "/tmp/fine_test_model"
68
+ classifier.save(model_path)
69
+ puts "Model saved to #{model_path}"
70
+
71
+ # Test load
72
+ loaded = Fine::ImageClassifier.load(model_path)
73
+ puts "Model loaded successfully!"
74
+
75
+ rescue => e
76
+ puts "Training failed: #{e.message}"
77
+ puts e.backtrace.first(5).join("\n")
78
+ end
79
+ end
@@ -0,0 +1,100 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Quick test of LLM components with local fixtures
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "Testing Fine::LLM components..."
10
+ puts "=" * 50
11
+
12
+ # Use test fixtures
13
+ fixtures_path = File.expand_path("../spec/fixtures/text/instructions.jsonl", __dir__)
14
+
15
+ puts "\n1. Creating LLM..."
16
+ llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
17
+ config.epochs = 1
18
+ config.batch_size = 1
19
+ config.learning_rate = 1e-5
20
+ config.max_length = 128
21
+ end
22
+
23
+ puts " Config: epochs=#{llm.config.epochs}, batch_size=#{llm.config.batch_size}, max_length=#{llm.config.max_length}"
24
+
25
+ puts "\n2. Testing InstructionDataset loading..."
26
+
27
+ # Create a mock tokenizer for testing
28
+ class MockLLMTokenizer
29
+ attr_reader :pad_token_id, :eos_token_id
30
+
31
+ def initialize
32
+ @pad_token_id = 0
33
+ @eos_token_id = 1
34
+ end
35
+
36
+ def encode(text, **_kwargs)
37
+ tokens = text.split.take(20).map { |w| w.hash.abs % 1000 }
38
+ {
39
+ input_ids: [tokens]
40
+ }
41
+ end
42
+
43
+ def decode(token_ids)
44
+ "Decoded text for #{token_ids.size} tokens"
45
+ end
46
+
47
+ def vocab_size
48
+ 32000
49
+ end
50
+ end
51
+
52
+ mock_tokenizer = MockLLMTokenizer.new
53
+
54
+ dataset = Fine::Datasets::InstructionDataset.from_jsonl(
55
+ fixtures_path,
56
+ tokenizer: mock_tokenizer,
57
+ format: :alpaca,
58
+ max_length: 128
59
+ )
60
+ puts " Dataset size: #{dataset.size}"
61
+
62
+ puts "\n3. Testing data item..."
63
+ item = dataset[0]
64
+ puts " Item keys: #{item.keys.join(', ')}"
65
+ puts " Input IDs shape: #{item[:input_ids].shape.inspect}"
66
+ puts " Labels shape: #{item[:labels].shape.inspect}"
67
+ puts " Attention mask shape: #{item[:attention_mask].shape.inspect}"
68
+
69
+ puts "\n4. Testing InstructionDataLoader..."
70
+ loader = Fine::Datasets::InstructionDataLoader.new(dataset, batch_size: 2, shuffle: false)
71
+ batch = loader.first
72
+ puts " Batch input_ids shape: #{batch[:input_ids].shape.inspect}"
73
+ puts " Batch labels shape: #{batch[:labels].shape.inspect}"
74
+
75
+ puts "\n5. Testing LLM model components..."
76
+
77
+ # Test RMSNorm
78
+ puts " Testing RMSNorm..."
79
+ norm = Fine::Models::RMSNorm.new(64)
80
+ test_input = Torch.randn([2, 10, 64])
81
+ norm_output = norm.call(test_input)
82
+ puts " RMSNorm output shape: #{norm_output.shape.inspect}"
83
+
84
+ # Test LlamaMLP
85
+ puts " Testing LlamaMLP..."
86
+ mlp = Fine::Models::LlamaMLP.new(hidden_size: 64, intermediate_size: 128)
87
+ mlp_output = mlp.call(test_input)
88
+ puts " LlamaMLP output shape: #{mlp_output.shape.inspect}"
89
+
90
+ # Test RotaryEmbedding
91
+ puts " Testing RotaryEmbedding..."
92
+ rope = Fine::Models::RotaryEmbedding.new(32, 128, 10000.0)
93
+ x = Torch.randn([2, 4, 10, 32])
94
+ position_ids = Torch.arange(10).unsqueeze(0).expand(2, -1)
95
+ cos, sin = rope.call(x, position_ids)
96
+ puts " RoPE cos shape: #{cos.shape.inspect}"
97
+ puts " RoPE sin shape: #{sin.shape.inspect}"
98
+
99
+ puts "\n" + "=" * 50
100
+ puts "LLM component tests passed!"
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Quick test of text classification with local fixtures
5
+
6
+ require "bundler/setup"
7
+ require "fine"
8
+
9
+ puts "Testing Fine::TextClassifier..."
10
+ puts "=" * 50
11
+
12
+ # Use test fixtures
13
+ fixtures_path = File.expand_path("../spec/fixtures/text/reviews.jsonl", __dir__)
14
+
15
+ puts "\n1. Creating TextClassifier..."
16
+ classifier = Fine::TextClassifier.new("distilbert-base-uncased") do |config|
17
+ config.epochs = 1
18
+ config.batch_size = 2
19
+ config.learning_rate = 2e-5
20
+ config.max_length = 64
21
+ end
22
+
23
+ puts " Config: epochs=#{classifier.config.epochs}, batch_size=#{classifier.config.batch_size}, max_length=#{classifier.config.max_length}"
24
+
25
+ puts "\n2. Testing TextDataset loading..."
26
+
27
+ # Create a mock tokenizer for testing
28
+ class MockTokenizer
29
+ def encode(texts, **_kwargs)
30
+ texts = [texts] if texts.is_a?(String)
31
+ {
32
+ input_ids: texts.map { |_| (1..10).to_a },
33
+ attention_mask: texts.map { |_| [1] * 10 },
34
+ token_type_ids: texts.map { |_| [0] * 10 }
35
+ }
36
+ end
37
+ end
38
+
39
+ mock_tokenizer = MockTokenizer.new
40
+
41
+ dataset = Fine::Datasets::TextDataset.from_file(fixtures_path, tokenizer: mock_tokenizer)
42
+ puts " Dataset size: #{dataset.size}"
43
+ puts " Classes: #{dataset.num_classes}"
44
+ puts " Label map: #{dataset.label_map}"
45
+
46
+ puts "\n3. Testing data item..."
47
+ item = dataset[0]
48
+ puts " Item keys: #{item.keys.join(', ')}"
49
+ puts " Input IDs length: #{item[:input_ids].size}"
50
+ puts " Label: #{item[:label]}"
51
+
52
+ puts "\n4. Testing TextDataLoader..."
53
+ loader = Fine::Datasets::TextDataLoader.new(dataset, batch_size: 2, shuffle: false)
54
+ batch = loader.first
55
+ puts " Batch input_ids shape: #{batch[:input_ids].shape.inspect}"
56
+ puts " Batch labels: #{batch[:labels].to_a}"
57
+
58
+ puts "\n" + "=" * 50
59
+ puts "Text classification tests passed!"