RubyGems - fine - Versions diffs - 0.1.0 - Mend

fine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/CHANGELOG.md +38 -0
data/Gemfile +6 -0
data/Gemfile.lock +167 -0
data/LICENSE +21 -0
data/README.md +212 -0
data/Rakefile +6 -0
data/docs/installation.md +151 -0
data/docs/tutorials/llm-fine-tuning.md +246 -0
data/docs/tutorials/model-export.md +200 -0
data/docs/tutorials/siglip2-image-classification.md +130 -0
data/docs/tutorials/siglip2-object-recognition.md +203 -0
data/docs/tutorials/siglip2-similarity-search.md +152 -0
data/docs/tutorials/text-classification.md +233 -0
data/docs/tutorials/text-embeddings.md +211 -0
data/examples/basic_classification.rb +70 -0
data/examples/data/tool_calls.jsonl +30 -0
data/examples/demo_training.rb +78 -0
data/examples/finetune_gemma3_tools.rb +135 -0
data/examples/real_llm_test.rb +128 -0
data/examples/real_text_classification_test.rb +90 -0
data/examples/real_text_embedder_test.rb +110 -0
data/examples/real_training_test.rb +88 -0
data/examples/test_export.rb +28 -0
data/examples/test_image_classifier.rb +79 -0
data/examples/test_llm.rb +100 -0
data/examples/test_text_classifier.rb +59 -0
data/lib/fine/callbacks/base.rb +140 -0
data/lib/fine/callbacks/progress_bar.rb +66 -0
data/lib/fine/configuration.rb +106 -0
data/lib/fine/datasets/data_loader.rb +63 -0
data/lib/fine/datasets/image_dataset.rb +203 -0
data/lib/fine/datasets/instruction_dataset.rb +226 -0
data/lib/fine/datasets/text_data_loader.rb +88 -0
data/lib/fine/datasets/text_dataset.rb +266 -0
data/lib/fine/error.rb +49 -0
data/lib/fine/export/gguf_exporter.rb +424 -0
data/lib/fine/export/onnx_exporter.rb +249 -0
data/lib/fine/export.rb +53 -0
data/lib/fine/hub/config_loader.rb +145 -0
data/lib/fine/hub/model_downloader.rb +136 -0
data/lib/fine/hub/safetensors_loader.rb +108 -0
data/lib/fine/image_classifier.rb +256 -0
data/lib/fine/llm.rb +336 -0
data/lib/fine/models/base.rb +48 -0
data/lib/fine/models/bert_encoder.rb +202 -0
data/lib/fine/models/bert_for_sequence_classification.rb +226 -0
data/lib/fine/models/causal_lm.rb +279 -0
data/lib/fine/models/classification_head.rb +24 -0
data/lib/fine/models/gemma3_decoder.rb +244 -0
data/lib/fine/models/llama_decoder.rb +297 -0
data/lib/fine/models/sentence_transformer.rb +202 -0
data/lib/fine/models/siglip2_for_image_classification.rb +155 -0
data/lib/fine/models/siglip2_vision_encoder.rb +190 -0
data/lib/fine/text_classifier.rb +250 -0
data/lib/fine/text_embedder.rb +221 -0
data/lib/fine/tokenizers/auto_tokenizer.rb +208 -0
data/lib/fine/training/llm_trainer.rb +212 -0
data/lib/fine/training/text_trainer.rb +275 -0
data/lib/fine/training/trainer.rb +194 -0
data/lib/fine/transforms/compose.rb +28 -0
data/lib/fine/transforms/normalize.rb +33 -0
data/lib/fine/transforms/resize.rb +35 -0
data/lib/fine/transforms/to_tensor.rb +53 -0
data/lib/fine/version.rb +3 -0
data/lib/fine.rb +112 -0
data/mise.toml +2 -0
metadata +240 -0

data/examples/real_llm_test.rb ADDED Viewed

@@ -0,0 +1,128 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Test LLM components (loading, forward pass, minimal training)
+# Note: Full LLM training requires significant compute
+require "bundler/setup"
+require "fine"
+puts "=" * 60
+puts "LLM COMPONENTS TEST"
+puts "=" * 60
+Fine.configure do |config|
+  config.progress_bar = false
+end
+fixtures_path = File.expand_path("../spec/fixtures/text/instructions.jsonl", __dir__)
+puts "\n1. Testing LlamaDecoder components..."
+begin
+  # Test with a small config to verify components work
+  test_config = Fine::Hub::ConfigLoader.new("/dev/null") rescue nil
+  # Create a minimal config for testing
+  class MinimalConfig
+    attr_accessor :config
+    def initialize
+      @config = {}
+    end
+    def vocab_size = 1000
+    def hidden_size = 256
+    def num_hidden_layers = 2
+    def num_attention_heads = 4
+    def intermediate_size = 512
+    def max_position_embeddings = 128
+    def rms_norm_eps = 1e-6
+    def rope_theta = 10000.0
+    def num_key_value_heads = 4
+    def use_bias = false
+    def to_h = @config
+  end
+  config = MinimalConfig.new
+  puts "   Creating LlamaDecoder with small config..."
+  decoder = Fine::Models::LlamaDecoder.new(config)
+  puts "   ✓ LlamaDecoder created"
+  puts "   Testing forward pass..."
+  input_ids = Torch.randint(0, 1000, [2, 16], dtype: :long)  # batch=2, seq=16
+  output = decoder.call(input_ids)
+  puts "   ✓ Forward pass successful"
+  puts "     Input shape: #{input_ids.shape.to_a}"
+  puts "     Output shape: #{output[:last_hidden_state].shape.to_a}"
+  puts "\n2. Testing CausalLM wrapper..."
+  lm = Fine::Models::CausalLM.new(config)
+  output = lm.call(input_ids)
+  puts "   ✓ CausalLM forward pass successful"
+  puts "     Logits shape: #{output[:logits].shape.to_a}"
+  puts "\n3. Testing with labels (loss computation)..."
+  labels = input_ids.clone
+  output = lm.call(input_ids, labels: labels)
+  puts "   ✓ Loss computed: #{output[:loss].item.round(4)}"
+  puts "\n4. Testing InstructionDataset..."
+  tokenizer_mock = Object.new
+  def tokenizer_mock.encode(text, **_)
+    ids = Array.new(10) { rand(100) }
+    { input_ids: [ids], attention_mask: [Array.new(10, 1)] }
+  end
+  def tokenizer_mock.pad_token_id = 0
+  def tokenizer_mock.eos_token_id = 2
+  dataset = Fine::Datasets::InstructionDataset.from_jsonl(
+    fixtures_path,
+    tokenizer: tokenizer_mock,
+    max_length: 32
+  )
+  puts "   ✓ Dataset loaded with #{dataset.size} examples"
+  item = dataset[0]
+  puts "   Sample item keys: #{item.keys}"
+  puts "   Input IDs length: #{item[:input_ids].size}"
+  puts "\n5. Testing training step..."
+  optimizer = Torch::Optim::Adam.new(lm.parameters, lr: 1e-4)
+  initial_loss = nil
+  3.times do |i|
+    optimizer.zero_grad
+    batch_ids = Torch.randint(0, 1000, [2, 16], dtype: :long)
+    labels = batch_ids.clone
+    output = lm.call(batch_ids, labels: labels)
+    loss = output[:loss]
+    initial_loss ||= loss.item
+    loss.backward
+    optimizer.step
+    puts "   Step #{i + 1}: loss=#{loss.item.round(4)}"
+  end
+  if output[:loss].item < initial_loss
+    puts "   ✓ Loss decreased during training"
+  else
+    puts "   ⚠ Loss did not decrease (expected with random data)"
+  end
+  puts "\n" + "=" * 60
+  puts "LLM COMPONENTS TEST PASSED!"
+  puts "=" * 60
+rescue => e
+  puts "\n" + "=" * 60
+  puts "LLM COMPONENTS TEST FAILED!"
+  puts "=" * 60
+  puts "\nError: #{e.class}: #{e.message}"
+  puts "\nBacktrace:"
+  puts e.backtrace.first(15).join("\n")
+  exit 1
+end

data/examples/real_text_classification_test.rb ADDED Viewed

@@ -0,0 +1,90 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Test real text classification fine-tuning
+require "bundler/setup"
+require "fine"
+puts "=" * 60
+puts "TEXT CLASSIFICATION FINE-TUNING TEST"
+puts "=" * 60
+Fine.configure do |config|
+  config.progress_bar = false
+end
+fixtures_path = File.expand_path("../spec/fixtures/text/reviews.jsonl", __dir__)
+puts "\n1. Setting up TextClassifier with DistilBERT..."
+classifier = Fine::TextClassifier.new("distilbert-base-uncased") do |config|
+  config.epochs = 2
+  config.batch_size = 4
+  config.learning_rate = 5e-5
+  config.max_length = 128
+end
+puts "   Epochs: #{classifier.config.epochs}"
+puts "   Batch size: #{classifier.config.batch_size}"
+puts "   Max length: #{classifier.config.max_length}"
+puts "\n2. Starting training on #{fixtures_path}..."
+puts "   (This will download DistilBERT from HuggingFace if not cached)"
+begin
+  history = classifier.fit(train_file: fixtures_path)
+  puts "\n3. Training completed!"
+  puts "   Training history:"
+  history.each_with_index do |metrics, i|
+    puts "   Epoch #{i + 1}: loss=#{metrics[:loss].round(4)}, acc=#{(metrics[:accuracy] * 100).round(1)}%"
+  end
+  # Check if loss decreased
+  if history.size >= 2
+    if history.last[:loss] < history.first[:loss]
+      puts "\n   ✓ Loss decreased from #{history.first[:loss].round(4)} to #{history.last[:loss].round(4)}"
+    else
+      puts "\n   ⚠ Loss did not decrease (may need more epochs or data)"
+    end
+  end
+  puts "\n4. Testing predictions..."
+  test_texts = [
+    "This product is amazing and works perfectly!",
+    "Terrible quality, broke after one day.",
+    "It's okay, nothing special.",
+    "Best purchase I've ever made!"
+  ]
+  predictions = classifier.predict(test_texts)
+  test_texts.each_with_index do |text, i|
+    pred = predictions[i].first
+    puts "   \"#{text[0..40]}...\""
+    puts "     → #{pred[:label]} (#{(pred[:score] * 100).round(1)}%)"
+  end
+  puts "\n5. Saving model..."
+  save_path = "/tmp/fine_text_classifier"
+  classifier.save(save_path)
+  puts "   Saved to: #{save_path}"
+  puts "\n6. Loading and re-testing..."
+  loaded = Fine::TextClassifier.load(save_path)
+  loaded_predictions = loaded.predict(test_texts.first)
+  puts "   Loaded model prediction for first text:"
+  puts "     → #{loaded_predictions.first.first[:label]} (#{(loaded_predictions.first.first[:score] * 100).round(1)}%)"
+  puts "\n" + "=" * 60
+  puts "TEXT CLASSIFICATION TEST PASSED!"
+  puts "=" * 60
+rescue => e
+  puts "\n" + "=" * 60
+  puts "TEXT CLASSIFICATION TEST FAILED!"
+  puts "=" * 60
+  puts "\nError: #{e.class}: #{e.message}"
+  puts "\nBacktrace:"
+  puts e.backtrace.first(15).join("\n")
+  exit 1
+end

data/examples/real_text_embedder_test.rb ADDED Viewed

@@ -0,0 +1,110 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Test text embeddings (without fine-tuning, which requires more data)
+require "bundler/setup"
+require "fine"
+puts "=" * 60
+puts "TEXT EMBEDDER TEST"
+puts "=" * 60
+Fine.configure do |config|
+  config.progress_bar = false
+end
+puts "\n1. Loading SentenceTransformer (all-MiniLM-L6-v2)..."
+puts "   (This will download from HuggingFace if not cached)"
+begin
+  embedder = Fine::TextEmbedder.new("sentence-transformers/all-MiniLM-L6-v2") do |config|
+    config.max_length = 128
+  end
+  puts "   Model loaded successfully!"
+  puts "   Embedding dimension: #{embedder.embedding_dim}"
+  puts "\n2. Testing single text encoding..."
+  text = "The quick brown fox jumps over the lazy dog."
+  embedding = embedder.encode(text)
+  puts "   Text: \"#{text}\""
+  puts "   Embedding shape: [#{embedding.size}]"
+  puts "   First 5 values: #{embedding.first(5).map { |v| v.round(4) }}"
+  puts "\n3. Testing batch encoding..."
+  texts = [
+    "I love machine learning!",
+    "Deep learning is fascinating.",
+    "The weather is nice today.",
+    "Ruby is a great programming language."
+  ]
+  embeddings = embedder.encode(texts)
+  puts "   Encoded #{texts.size} texts"
+  puts "   Result shapes: #{embeddings.size} x #{embeddings.first.size}"
+  puts "\n4. Testing semantic similarity..."
+  pairs = [
+    ["I love programming", "Coding is my passion"],
+    ["I love programming", "The sky is blue"],
+    ["Machine learning is cool", "AI and ML are interesting"],
+    ["Dogs are pets", "Cats are animals"]
+  ]
+  pairs.each do |text_a, text_b|
+    similarity = embedder.similarity(text_a, text_b)
+    puts "   \"#{text_a[0..25]}...\" vs \"#{text_b[0..25]}...\""
+    puts "     → Similarity: #{(similarity * 100).round(1)}%"
+  end
+  puts "\n5. Testing semantic search..."
+  query = "machine learning"
+  corpus = [
+    "I love pizza",
+    "Deep learning is a subset of machine learning",
+    "The stock market is volatile",
+    "Neural networks can learn complex patterns",
+    "Ruby on Rails is a web framework",
+    "Artificial intelligence is transforming industries"
+  ]
+  results = embedder.search(query, corpus, top_k: 3)
+  puts "   Query: \"#{query}\""
+  puts "   Top 3 results:"
+  results.each_with_index do |result, i|
+    puts "     #{i + 1}. \"#{result[:text][0..45]}...\" (#{(result[:score] * 100).round(1)}%)"
+  end
+  puts "\n6. Saving and loading model..."
+  save_path = "/tmp/fine_text_embedder"
+  embedder.save(save_path)
+  puts "   Saved to: #{save_path}"
+  loaded = Fine::TextEmbedder.load(save_path)
+  puts "   Loaded successfully!"
+  # Verify embeddings match
+  original_emb = embedder.encode("test text")
+  loaded_emb = loaded.encode("test text")
+  diff = original_emb.zip(loaded_emb).map { |a, b| (a - b).abs }.max
+  puts "   Max embedding difference: #{diff.round(6)}"
+  if diff < 0.0001
+    puts "   ✓ Embeddings match!"
+  else
+    puts "   ⚠ Embeddings differ"
+  end
+  puts "\n" + "=" * 60
+  puts "TEXT EMBEDDER TEST PASSED!"
+  puts "=" * 60
+rescue => e
+  puts "\n" + "=" * 60
+  puts "TEXT EMBEDDER TEST FAILED!"
+  puts "=" * 60
+  puts "\nError: #{e.class}: #{e.message}"
+  puts "\nBacktrace:"
+  puts e.backtrace.first(15).join("\n")
+  exit 1
+end

data/examples/real_training_test.rb ADDED Viewed

@@ -0,0 +1,88 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Actually fine-tune a model to verify training works
+require "bundler/setup"
+require "fine"
+puts "=" * 60
+puts "REAL FINE-TUNING TEST"
+puts "=" * 60
+# Disable progress bar for cleaner output
+Fine.configure do |config|
+  config.progress_bar = false
+end
+fixtures_path = File.expand_path("../spec/fixtures/images", __dir__)
+puts "\n1. Setting up ImageClassifier with SigLIP2..."
+classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
+  config.epochs = 2
+  config.batch_size = 2
+  config.learning_rate = 1e-4
+  config.image_size = 224
+  config.freeze_encoder = true  # Only train classification head
+end
+puts "   Epochs: #{classifier.config.epochs}"
+puts "   Batch size: #{classifier.config.batch_size}"
+puts "   Learning rate: #{classifier.config.learning_rate}"
+puts "\n2. Starting training on #{fixtures_path}..."
+puts "   (This will download the model from HuggingFace if not cached)"
+begin
+  history = classifier.fit(train_dir: fixtures_path, epochs: 2)
+  puts "\n3. Training completed!"
+  puts "   Training history:"
+  history.each do |epoch_data|
+    puts "   Epoch #{epoch_data[:epoch]}: loss=#{epoch_data[:loss].round(4)}"
+  end
+  # Check if loss decreased
+  if history.size >= 2
+    if history.last[:loss] < history.first[:loss]
+      puts "\n   ✓ Loss decreased from #{history.first[:loss].round(4)} to #{history.last[:loss].round(4)}"
+    else
+      puts "\n   ⚠ Loss did not decrease (may need more epochs or data)"
+    end
+  end
+  puts "\n4. Testing prediction..."
+  test_image = Dir.glob(File.join(fixtures_path, "*/*.jpg")).first
+  predictions = classifier.predict(test_image)
+  puts "   Image: #{File.basename(test_image)}"
+  puts "   Predictions:"
+  predictions.first.each do |pred|
+    puts "     #{pred[:label]}: #{(pred[:score] * 100).round(1)}%"
+  end
+  puts "\n5. Saving model..."
+  save_path = "/tmp/fine_trained_model"
+  classifier.save(save_path)
+  puts "   Saved to: #{save_path}"
+  puts "\n6. Loading and re-testing..."
+  loaded = Fine::ImageClassifier.load(save_path)
+  loaded_predictions = loaded.predict(test_image)
+  puts "   Loaded model predictions:"
+  loaded_predictions.first.each do |pred|
+    puts "     #{pred[:label]}: #{(pred[:score] * 100).round(1)}%"
+  end
+  puts "\n" + "=" * 60
+  puts "FINE-TUNING TEST PASSED!"
+  puts "=" * 60
+rescue => e
+  puts "\n" + "=" * 60
+  puts "FINE-TUNING FAILED!"
+  puts "=" * 60
+  puts "\nError: #{e.class}: #{e.message}"
+  puts "\nBacktrace:"
+  puts e.backtrace.first(15).join("\n")
+  exit 1
+end

data/examples/test_export.rb ADDED Viewed

@@ -0,0 +1,28 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Test export module
+require "bundler/setup"
+require "fine"
+puts "Testing Fine::Export module..."
+puts "=" * 50
+puts "\n1. Testing GGUF quantization options..."
+options = Fine::Export.gguf_quantization_options
+puts "   Available quantization types: #{options.keys.join(', ')}"
+options.each do |type, desc|
+  puts "   - #{type}: #{desc}"
+end
+puts "\n2. Testing GGUF exporter constants..."
+puts "   GGUF Magic: 0x#{Fine::Export::GGUFExporter::GGUF_MAGIC.to_s(16).upcase}"
+puts "   GGUF Version: #{Fine::Export::GGUFExporter::GGUF_VERSION}"
+puts "   Available quantizations: #{Fine::Export::GGUFExporter::QUANTIZATION_TYPES.keys.join(', ')}"
+puts "\n3. Testing ONNX exporter..."
+puts "   Supported types: #{Fine::Export::ONNXExporter::SUPPORTED_TYPES.map(&:to_s).join(', ')}"
+puts "\n" + "=" * 50
+puts "Export module tests passed!"

data/examples/test_image_classifier.rb ADDED Viewed

@@ -0,0 +1,79 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Quick test of image classification with tiny local model
+require "bundler/setup"
+require "fine"
+puts "Testing Fine::ImageClassifier..."
+puts "=" * 50
+# Use test fixtures
+fixtures_path = File.expand_path("../spec/fixtures/images", __dir__)
+puts "\n1. Creating ImageClassifier..."
+classifier = Fine::ImageClassifier.new("google/siglip2-base-patch16-224") do |config|
+  config.epochs = 1
+  config.batch_size = 2
+  config.learning_rate = 1e-4
+  config.image_size = 32  # Small for testing
+  config.freeze_encoder = true  # Faster training
+end
+puts "   Config: epochs=#{classifier.config.epochs}, batch_size=#{classifier.config.batch_size}"
+puts "\n2. Loading dataset from #{fixtures_path}..."
+# Just test the dataset loading part without actual training
+# (training requires downloading the model which takes time)
+transforms = Fine::Transforms::Compose.new([
+  Fine::Transforms::Resize.new(32),
+  Fine::Transforms::ToTensor.new,
+  Fine::Transforms::Normalize.new
+])
+dataset = Fine::Datasets::ImageDataset.from_directory(fixtures_path, transforms: transforms)
+puts "   Dataset size: #{dataset.size}"
+puts "   Classes: #{dataset.class_names.join(', ')}"
+puts "   Label map: #{dataset.label_map}"
+puts "\n3. Testing data loading..."
+item = dataset[0]
+puts "   Item keys: #{item.keys.join(', ')}"
+puts "   Pixel values shape: #{item[:pixel_values].shape.inspect}"
+puts "   Label: #{item[:label]}"
+puts "\n4. Testing DataLoader..."
+loader = Fine::Datasets::DataLoader.new(dataset, batch_size: 2, shuffle: true)
+batch = loader.first
+puts "   Batch pixel_values shape: #{batch[:pixel_values].shape.inspect}"
+puts "   Batch labels: #{batch[:labels].to_a}"
+puts "\n" + "=" * 50
+puts "Basic tests passed!"
+puts "\nNote: Full training requires downloading model weights from HuggingFace."
+puts "Run with DOWNLOAD_MODELS=1 to test full training."
+if ENV["DOWNLOAD_MODELS"]
+  puts "\n" + "=" * 50
+  puts "Downloading model and running full training..."
+  begin
+    classifier.fit(train_dir: fixtures_path, epochs: 1)
+    puts "Training completed!"
+    # Test save
+    model_path = "/tmp/fine_test_model"
+    classifier.save(model_path)
+    puts "Model saved to #{model_path}"
+    # Test load
+    loaded = Fine::ImageClassifier.load(model_path)
+    puts "Model loaded successfully!"
+  rescue => e
+    puts "Training failed: #{e.message}"
+    puts e.backtrace.first(5).join("\n")
+  end
+end

data/examples/test_llm.rb ADDED Viewed

@@ -0,0 +1,100 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Quick test of LLM components with local fixtures
+require "bundler/setup"
+require "fine"
+puts "Testing Fine::LLM components..."
+puts "=" * 50
+# Use test fixtures
+fixtures_path = File.expand_path("../spec/fixtures/text/instructions.jsonl", __dir__)
+puts "\n1. Creating LLM..."
+llm = Fine::LLM.new("meta-llama/Llama-3.2-1B") do |config|
+  config.epochs = 1
+  config.batch_size = 1
+  config.learning_rate = 1e-5
+  config.max_length = 128
+end
+puts "   Config: epochs=#{llm.config.epochs}, batch_size=#{llm.config.batch_size}, max_length=#{llm.config.max_length}"
+puts "\n2. Testing InstructionDataset loading..."
+# Create a mock tokenizer for testing
+class MockLLMTokenizer
+  attr_reader :pad_token_id, :eos_token_id
+  def initialize
+    @pad_token_id = 0
+    @eos_token_id = 1
+  end
+  def encode(text, **_kwargs)
+    tokens = text.split.take(20).map { |w| w.hash.abs % 1000 }
+    {
+      input_ids: [tokens]
+    }
+  end
+  def decode(token_ids)
+    "Decoded text for #{token_ids.size} tokens"
+  end
+  def vocab_size
+    32000
+  end
+end
+mock_tokenizer = MockLLMTokenizer.new
+dataset = Fine::Datasets::InstructionDataset.from_jsonl(
+  fixtures_path,
+  tokenizer: mock_tokenizer,
+  format: :alpaca,
+  max_length: 128
+)
+puts "   Dataset size: #{dataset.size}"
+puts "\n3. Testing data item..."
+item = dataset[0]
+puts "   Item keys: #{item.keys.join(', ')}"
+puts "   Input IDs shape: #{item[:input_ids].shape.inspect}"
+puts "   Labels shape: #{item[:labels].shape.inspect}"
+puts "   Attention mask shape: #{item[:attention_mask].shape.inspect}"
+puts "\n4. Testing InstructionDataLoader..."
+loader = Fine::Datasets::InstructionDataLoader.new(dataset, batch_size: 2, shuffle: false)
+batch = loader.first
+puts "   Batch input_ids shape: #{batch[:input_ids].shape.inspect}"
+puts "   Batch labels shape: #{batch[:labels].shape.inspect}"
+puts "\n5. Testing LLM model components..."
+# Test RMSNorm
+puts "   Testing RMSNorm..."
+norm = Fine::Models::RMSNorm.new(64)
+test_input = Torch.randn([2, 10, 64])
+norm_output = norm.call(test_input)
+puts "   RMSNorm output shape: #{norm_output.shape.inspect}"
+# Test LlamaMLP
+puts "   Testing LlamaMLP..."
+mlp = Fine::Models::LlamaMLP.new(hidden_size: 64, intermediate_size: 128)
+mlp_output = mlp.call(test_input)
+puts "   LlamaMLP output shape: #{mlp_output.shape.inspect}"
+# Test RotaryEmbedding
+puts "   Testing RotaryEmbedding..."
+rope = Fine::Models::RotaryEmbedding.new(32, 128, 10000.0)
+x = Torch.randn([2, 4, 10, 32])
+position_ids = Torch.arange(10).unsqueeze(0).expand(2, -1)
+cos, sin = rope.call(x, position_ids)
+puts "   RoPE cos shape: #{cos.shape.inspect}"
+puts "   RoPE sin shape: #{sin.shape.inspect}"
+puts "\n" + "=" * 50
+puts "LLM component tests passed!"

data/examples/test_text_classifier.rb ADDED Viewed

@@ -0,0 +1,59 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Quick test of text classification with local fixtures
+require "bundler/setup"
+require "fine"
+puts "Testing Fine::TextClassifier..."
+puts "=" * 50
+# Use test fixtures
+fixtures_path = File.expand_path("../spec/fixtures/text/reviews.jsonl", __dir__)
+puts "\n1. Creating TextClassifier..."
+classifier = Fine::TextClassifier.new("distilbert-base-uncased") do |config|
+  config.epochs = 1
+  config.batch_size = 2
+  config.learning_rate = 2e-5
+  config.max_length = 64
+end
+puts "   Config: epochs=#{classifier.config.epochs}, batch_size=#{classifier.config.batch_size}, max_length=#{classifier.config.max_length}"
+puts "\n2. Testing TextDataset loading..."
+# Create a mock tokenizer for testing
+class MockTokenizer
+  def encode(texts, **_kwargs)
+    texts = [texts] if texts.is_a?(String)
+    {
+      input_ids: texts.map { |_| (1..10).to_a },
+      attention_mask: texts.map { |_| [1] * 10 },
+      token_type_ids: texts.map { |_| [0] * 10 }
+    }
+  end
+end
+mock_tokenizer = MockTokenizer.new
+dataset = Fine::Datasets::TextDataset.from_file(fixtures_path, tokenizer: mock_tokenizer)
+puts "   Dataset size: #{dataset.size}"
+puts "   Classes: #{dataset.num_classes}"
+puts "   Label map: #{dataset.label_map}"
+puts "\n3. Testing data item..."
+item = dataset[0]
+puts "   Item keys: #{item.keys.join(', ')}"
+puts "   Input IDs length: #{item[:input_ids].size}"
+puts "   Label: #{item[:label]}"
+puts "\n4. Testing TextDataLoader..."
+loader = Fine::Datasets::TextDataLoader.new(dataset, batch_size: 2, shuffle: false)
+batch = loader.first
+puts "   Batch input_ids shape: #{batch[:input_ids].shape.inspect}"
+puts "   Batch labels: #{batch[:labels].to_a}"
+puts "\n" + "=" * 50
+puts "Text classification tests passed!"