RubyGems - fine - Versions diffs - 0.1.0 → 0.2.0 - Mend

fine 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +4 -4
data/README.md +20 -10
data/docs/examples/image-classification-shapes.md +83 -0
data/docs/examples/text-embeddings-faq.md +98 -0
data/docs/quickstart.md +209 -0
data/docs/tutorials/lora-tool-calling.md +306 -0
data/examples/data/generate_tool_data.rb +261 -0
data/examples/data/ollama_tool_calls.jsonl +40 -0
data/examples/data/sentiment_reviews.jsonl +30 -0
data/examples/data/shapes/circle/circle_1.jpg +0 -0
data/examples/data/shapes/circle/circle_10.jpg +0 -0
data/examples/data/shapes/circle/circle_2.jpg +0 -0
data/examples/data/shapes/circle/circle_3.jpg +0 -0
data/examples/data/shapes/circle/circle_4.jpg +0 -0
data/examples/data/shapes/circle/circle_5.jpg +0 -0
data/examples/data/shapes/circle/circle_6.jpg +0 -0
data/examples/data/shapes/circle/circle_7.jpg +0 -0
data/examples/data/shapes/circle/circle_8.jpg +0 -0
data/examples/data/shapes/circle/circle_9.jpg +0 -0
data/examples/data/shapes/square/square_1.jpg +0 -0
data/examples/data/shapes/square/square_10.jpg +0 -0
data/examples/data/shapes/square/square_2.jpg +0 -0
data/examples/data/shapes/square/square_3.jpg +0 -0
data/examples/data/shapes/square/square_4.jpg +0 -0
data/examples/data/shapes/square/square_5.jpg +0 -0
data/examples/data/shapes/square/square_6.jpg +0 -0
data/examples/data/shapes/square/square_7.jpg +0 -0
data/examples/data/shapes/square/square_8.jpg +0 -0
data/examples/data/shapes/square/square_9.jpg +0 -0
data/examples/data/shapes/triangle/triangle_1.jpg +0 -0
data/examples/data/shapes/triangle/triangle_10.jpg +0 -0
data/examples/data/shapes/triangle/triangle_2.jpg +0 -0
data/examples/data/shapes/triangle/triangle_3.jpg +0 -0
data/examples/data/shapes/triangle/triangle_4.jpg +0 -0
data/examples/data/shapes/triangle/triangle_5.jpg +0 -0
data/examples/data/shapes/triangle/triangle_6.jpg +0 -0
data/examples/data/shapes/triangle/triangle_7.jpg +0 -0
data/examples/data/shapes/triangle/triangle_8.jpg +0 -0
data/examples/data/shapes/triangle/triangle_9.jpg +0 -0
data/examples/data/support_faq_pairs.jsonl +30 -0
data/examples/generate_shape_images.rb +94 -0
data/examples/sentiment_classification.rb +87 -0
data/examples/shape_classification.rb +87 -0
data/examples/support_faq_embeddings.rb +105 -0
data/examples/train_lora_tools.rb +218 -0
data/lib/fine/configuration.rb +173 -15
data/lib/fine/datasets/image_dataset.rb +14 -2
data/lib/fine/datasets/instruction_dataset.rb +17 -2
data/lib/fine/datasets/text_dataset.rb +15 -5
data/lib/fine/hub/config_loader.rb +4 -4
data/lib/fine/hub/safetensors_loader.rb +3 -2
data/lib/fine/llm.rb +39 -10
data/lib/fine/lora.rb +214 -0
data/lib/fine/models/bert_encoder.rb +15 -6
data/lib/fine/models/bert_for_sequence_classification.rb +35 -4
data/lib/fine/models/causal_lm.rb +46 -5
data/lib/fine/models/gemma3_decoder.rb +25 -6
data/lib/fine/models/llama_decoder.rb +9 -8
data/lib/fine/models/sentence_transformer.rb +1 -1
data/lib/fine/tokenizers/auto_tokenizer.rb +15 -0
data/lib/fine/training/text_trainer.rb +3 -1
data/lib/fine/validators.rb +304 -0
data/lib/fine/version.rb +1 -1
data/lib/fine.rb +4 -0
metadata +47 -2

data/examples/train_lora_tools.rb ADDED Viewed

@@ -0,0 +1,218 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+# Train larger model with LoRA for efficient fine-tuning
+# LoRA dramatically reduces memory by only training ~1% of parameters
+require "bundler/setup"
+require "fine"
+MAX_MEMORY_GB = 40
+MONITOR_INTERVAL = 2
+def get_memory_usage_gb
+  `ps -o rss= -p #{Process.pid}`.strip.to_i / 1024.0 / 1024.0
+end
+puts "=" * 70
+puts "LORA TOOL CALLING TRAINING"
+puts "=" * 70
+puts "Max memory limit: #{MAX_MEMORY_GB} GB"
+max_memory_seen = 0.0
+memory_exceeded = false
+monitor_thread = Thread.new do
+  loop do
+    mem = get_memory_usage_gb
+    max_memory_seen = mem if mem > max_memory_seen
+    if mem > MAX_MEMORY_GB
+      memory_exceeded = true
+      Thread.main.raise(Interrupt, "Memory limit exceeded: #{mem.round(2)} GB")
+    end
+    sleep MONITOR_INTERVAL
+  rescue => e
+    break if e.is_a?(Interrupt)
+  end
+end
+begin
+  Fine.configure { |c| c.progress_bar = false }
+  # Use larger dataset
+  data_path = File.expand_path("data/ollama_tool_calls_large.jsonl", __dir__)
+  # Try 4B model first, fall back to 1B if memory issues
+  model_id = ARGV[0] || "google/gemma-3-1b-it"
+  puts "\n[1/6] Loading model: #{model_id}..."
+  model = Fine::Models::CausalLM.from_pretrained(model_id)
+  puts "   Model loaded: #{get_memory_usage_gb.round(2)} GB"
+  puts "\n[2/6] Applying LoRA..."
+  # Apply LoRA to attention projections
+  Fine::LoRA.apply(
+    model,
+    rank: 32,           # Higher rank = more capacity for structured output
+    alpha: 64,          # Scaling factor
+    dropout: 0.05,      # Light dropout for regularization
+    target_modules: %w[q_proj k_proj v_proj o_proj]  # All attention projections
+  )
+  puts "   LoRA applied: #{get_memory_usage_gb.round(2)} GB"
+  # Move to device
+  model.to(Fine.device)
+  model.train
+  puts "   On #{Fine.device}: #{get_memory_usage_gb.round(2)} GB"
+  puts "\n[3/6] Loading tokenizer..."
+  downloader = Fine::Hub::ModelDownloader.new(model_id)
+  model_path = downloader.download
+  tokenizer = Fine::Tokenizers::AutoTokenizer.new(model_path, max_length: 384)
+  puts "   Found tokenizer"
+  puts "\n[4/6] Loading training data..."
+  dataset = Fine::Datasets::InstructionDataset.from_jsonl(
+    data_path,
+    tokenizer: tokenizer,
+    format: :alpaca,
+    max_length: 384
+  )
+  puts "   #{dataset.size} examples loaded"
+  data_loader = Fine::Datasets::InstructionDataLoader.new(
+    dataset,
+    batch_size: 1,
+    shuffle: true,
+    pad_token_id: tokenizer.pad_token_id
+  )
+  puts "\n[5/6] Training with LoRA..."
+  # Only get LoRA parameters for optimizer
+  lora_params = Fine::LoRA.trainable_parameters(model)
+  optimizer = Torch::Optim::AdamW.new(lora_params, lr: 1e-4)  # Higher LR for LoRA
+  epochs = 15  # More epochs for structured output learning
+  total_loss = 0.0
+  step = 0
+  epochs.times do |epoch|
+    epoch_loss = 0.0
+    batch_count = 0
+    data_loader.each do |batch|
+      input_ids = batch[:input_ids].to(Fine.device)
+      labels = batch[:labels].to(Fine.device)
+      attention_mask = batch[:attention_mask].to(Fine.device)
+      # Forward
+      outputs = model.forward(input_ids, attention_mask: attention_mask, labels: labels)
+      loss = outputs[:loss]
+      # Backward
+      loss.backward
+      # Optimizer step
+      optimizer.step
+      optimizer.zero_grad
+      epoch_loss += loss.to(:float32).item
+      batch_count += 1
+      step += 1
+    end
+    avg_loss = epoch_loss / batch_count
+    mem = get_memory_usage_gb
+    puts "   Epoch #{epoch + 1}: loss=#{avg_loss.round(4)} | Memory: #{mem.round(2)} GB"
+  end
+  puts "\n[6/6] Testing generation..."
+  model.eval
+  test_cases = [
+    {
+      prompt: "What's the weather in Tokyo?",
+      tools: "get_weather: Get current weather\n  Parameters: location (string, required)"
+    },
+    {
+      prompt: "Calculate 50 + 25 * 2",
+      tools: "calculate: Math calculator\n  Parameters: expression (string, required)"
+    },
+    {
+      prompt: "Search for Ruby tutorials",
+      tools: "search_web: Web search\n  Parameters: query (string, required)"
+    }
+  ]
+  test_cases.each do |tc|
+    full_prompt = <<~PROMPT
+### Instruction:
+#{tc[:prompt]}
+### Input:
+You have access to the following tools:
+#{tc[:tools]}
+Respond with a JSON tool call if a tool is needed.
+### Response:
+PROMPT
+    ids = tokenizer.encode_for_generation(full_prompt)
+    input_ids = Torch.tensor([ids]).to(Fine.device)
+    Torch.no_grad do
+      output_ids = model.generate(
+        input_ids,
+        max_new_tokens: 150,
+        temperature: 0.1,
+        do_sample: false,
+        eos_token_id: tokenizer.eos_token_id
+      )
+      response = tokenizer.decode(output_ids[0].to_a)
+      generated = response.split("### Response:").last.to_s.strip
+      puts "\n   Q: #{tc[:prompt]}"
+      puts "   A: #{generated[0..200]}"
+      begin
+        json = JSON.parse(generated)
+        if json["tool_calls"]
+          puts "   [Valid Ollama format]"
+        end
+      rescue JSON::ParserError
+        puts "   [Not valid JSON]"
+      end
+    end
+  end
+  puts "\n" + "=" * 70
+  save_path = "/tmp/gemma3-lora-tools"
+  # Merge LoRA weights for inference
+  puts "Merging LoRA weights..."
+  Fine::LoRA.merge!(model)
+  model.save(save_path)
+  tokenizer.save(save_path)
+  puts "Model saved to: #{save_path}"
+  puts "Max memory used: #{max_memory_seen.round(2)} GB"
+  puts "=" * 70
+rescue Interrupt => e
+  if memory_exceeded
+    puts "\n\nTERMINATED: Memory limit exceeded!"
+    exit 1
+  else
+    puts "\n\nInterrupted"
+    exit 130
+  end
+rescue => e
+  puts "\nFailed: #{e.class}: #{e.message}"
+  puts e.backtrace.first(10).join("\n")
+  exit 1
+ensure
+  monitor_thread&.kill
+end

data/lib/fine/configuration.rb CHANGED Viewed

@@ -2,42 +2,95 @@
 module Fine
   # Configuration for training runs
+  #
+  # @example Basic usage
+  #   Fine::TextClassifier.new("distilbert-base-uncased") do |config|
+  #     config.epochs = 5
+  #     config.batch_size = 16
+  #   end
+  #
+  # @example With callbacks
+  #   config.on_epoch_end do |epoch, metrics|
+  #     puts "Epoch #{epoch}: loss=#{metrics[:loss]}"
+  #   end
+  #
   class Configuration
+    # Default values for all configurations
+    DEFAULTS = {
+      epochs: 3,
+      batch_size: 16,
+      learning_rate: 2e-5,
+      weight_decay: 0.01,
+      warmup_ratio: 0.1,
+      optimizer: :adamw,
+      scheduler: :linear,
+      dropout: 0.1,
+      image_size: 224
+    }.freeze
     # Training hyperparameters
+    # @!attribute epochs
+    #   @return [Integer] Number of training epochs (default: 3)
+    # @!attribute batch_size
+    #   @return [Integer] Samples per batch (default: 16)
+    # @!attribute learning_rate
+    #   @return [Float] Learning rate (default: 2e-5)
+    # @!attribute weight_decay
+    #   @return [Float] L2 regularization (default: 0.01)
     attr_accessor :epochs, :batch_size, :learning_rate, :weight_decay
+    # @!attribute warmup_steps
+    #   @return [Integer] Number of warmup steps (default: 0, use warmup_ratio instead)
+    # @!attribute warmup_ratio
+    #   @return [Float] Fraction of training for warmup (default: 0.1)
     attr_accessor :warmup_steps, :warmup_ratio
+    # @!attribute optimizer
+    #   @return [Symbol] Optimizer type (:adamw, :adam, :sgd) (default: :adamw)
+    # @!attribute scheduler
+    #   @return [Symbol] LR scheduler (:linear, :cosine, :constant) (default: :linear)
     attr_accessor :optimizer, :scheduler
     # Model configuration
+    # @!attribute freeze_encoder
+    #   @return [Boolean] Freeze encoder weights, only train head (default: false)
+    # @!attribute dropout
+    #   @return [Float] Dropout probability (default: 0.1)
+    # @!attribute num_labels
+    #   @return [Integer, nil] Number of output classes (auto-detected if nil)
     attr_accessor :freeze_encoder, :dropout, :num_labels
     # Data configuration
+    # @!attribute image_size
+    #   @return [Integer] Target image size for resizing (default: 224)
     attr_accessor :image_size
-    # Callbacks
+    # @!attribute callbacks
+    #   @return [Array<Callbacks::Base>] Training callbacks
     attr_accessor :callbacks
-    # Augmentation
+    # @!attribute augmentation_config
+    #   @return [AugmentationConfig] Data augmentation settings
     attr_reader :augmentation_config
     def initialize
-      # Training defaults
-      @epochs = 3
-      @batch_size = 32
-      @learning_rate = 2e-4
-      @weight_decay = 0.02
+      # Training defaults - optimized for most tasks
+      @epochs = DEFAULTS[:epochs]
+      @batch_size = DEFAULTS[:batch_size]
+      @learning_rate = DEFAULTS[:learning_rate]
+      @weight_decay = DEFAULTS[:weight_decay]
       @warmup_steps = 0
-      @warmup_ratio = 0.0
-      @optimizer = :adamw
-      @scheduler = :cosine
+      @warmup_ratio = DEFAULTS[:warmup_ratio]
+      @optimizer = DEFAULTS[:optimizer]
+      @scheduler = DEFAULTS[:scheduler]
       # Model defaults
       @freeze_encoder = false
-      @dropout = 0.1
+      @dropout = DEFAULTS[:dropout]
       @num_labels = nil # auto-detect from dataset
       # Data defaults
-      @image_size = 224
+      @image_size = DEFAULTS[:image_size]
       # Callbacks
       @callbacks = []
@@ -46,34 +99,138 @@ module Fine
       @augmentation_config = AugmentationConfig.new
     end
+    # Configure data augmentation
+    #
+    # @yield [AugmentationConfig] The augmentation configuration
+    # @return [AugmentationConfig]
+    #
+    # @example
+    #   config.augmentation do |aug|
+    #     aug.random_horizontal_flip = true
+    #     aug.random_rotation = 15
+    #   end
     def augmentation
       yield @augmentation_config if block_given?
       @augmentation_config
     end
     # Register a callback for epoch end
+    #
+    # @yield [Integer, Hash] Epoch number and metrics hash
+    #
+    # @example
+    #   config.on_epoch_end do |epoch, metrics|
+    #     puts "Epoch #{epoch}: loss=#{metrics[:loss]}"
+    #   end
     def on_epoch_end(&block)
       @callbacks << Callbacks::LambdaCallback.new(on_epoch_end: block)
     end
     # Register a callback for batch end
+    #
+    # @yield [Integer, Float] Batch index and loss value
     def on_batch_end(&block)
       @callbacks << Callbacks::LambdaCallback.new(on_batch_end: block)
     end
-    # Register a callback for train begin
+    # Register a callback for training start
+    #
+    # @yield [Hash] Training info (model, config)
     def on_train_begin(&block)
       @callbacks << Callbacks::LambdaCallback.new(on_train_begin: block)
     end
-    # Register a callback for train end
+    # Register a callback for training end
+    #
+    # @yield [Array<Hash>] Training history
     def on_train_end(&block)
       @callbacks << Callbacks::LambdaCallback.new(on_train_end: block)
     end
+    # Return configuration as a hash
+    def to_h
+      {
+        epochs: @epochs,
+        batch_size: @batch_size,
+        learning_rate: @learning_rate,
+        weight_decay: @weight_decay,
+        warmup_steps: @warmup_steps,
+        warmup_ratio: @warmup_ratio,
+        optimizer: @optimizer,
+        scheduler: @scheduler,
+        freeze_encoder: @freeze_encoder,
+        dropout: @dropout,
+        num_labels: @num_labels,
+        image_size: @image_size
+      }
+    end
+  end
+  # Configuration for text models (BERT, DistilBERT, DeBERTa)
+  class TextConfiguration < Configuration
+    # @!attribute max_length
+    #   @return [Integer] Maximum sequence length (default: 128)
+    attr_accessor :max_length
+    # Text model defaults
+    DEFAULTS = Configuration::DEFAULTS.merge(
+      max_length: 128,
+      batch_size: 16
+    ).freeze
+    def initialize
+      super
+      @max_length = DEFAULTS[:max_length]
+      @batch_size = DEFAULTS[:batch_size]
+    end
+  end
+  # Configuration for embedding models (Sentence Transformers)
+  class EmbeddingConfiguration < Configuration
+    # @!attribute max_length
+    #   @return [Integer] Maximum sequence length (default: 128)
+    # @!attribute pooling_mode
+    #   @return [Symbol] Pooling strategy (:mean, :cls, :max) (default: :mean)
+    # @!attribute loss
+    #   @return [Symbol] Loss function (:cosine, :contrastive, :triplet) (default: :cosine)
+    attr_accessor :max_length, :pooling_mode, :loss
+    # Embedding model defaults
+    DEFAULTS = Configuration::DEFAULTS.merge(
+      max_length: 128,
+      pooling_mode: :mean,
+      loss: :cosine,
+      batch_size: 32
+    ).freeze
+    def initialize
+      super
+      @max_length = DEFAULTS[:max_length]
+      @pooling_mode = DEFAULTS[:pooling_mode]
+      @loss = DEFAULTS[:loss]
+      @batch_size = DEFAULTS[:batch_size]
+    end
   end
   # Configuration for data augmentation
+  #
+  # @example
+  #   config.augmentation do |aug|
+  #     aug.random_horizontal_flip = true
+  #     aug.random_rotation = 15
+  #     aug.color_jitter = { brightness: 0.2, contrast: 0.2 }
+  #   end
   class AugmentationConfig
+    # @!attribute random_horizontal_flip
+    #   @return [Boolean] Randomly flip images horizontally (default: false)
+    # @!attribute random_vertical_flip
+    #   @return [Boolean] Randomly flip images vertically (default: false)
+    # @!attribute random_rotation
+    #   @return [Integer] Max rotation degrees (0 = disabled) (default: 0)
+    # @!attribute color_jitter
+    #   @return [Hash, nil] Color jitter settings { brightness:, contrast:, saturation:, hue: }
+    # @!attribute random_resized_crop
+    #   @return [Hash, nil] Random crop settings { scale:, ratio: }
     attr_accessor :random_horizontal_flip, :random_vertical_flip
     attr_accessor :random_rotation, :color_jitter
     attr_accessor :random_resized_crop
@@ -86,6 +243,7 @@ module Fine
       @random_resized_crop = nil
     end
+    # Check if any augmentation is enabled
     def enabled?
       @random_horizontal_flip ||
         @random_vertical_flip ||
@@ -94,12 +252,12 @@ module Fine
         @random_resized_crop
     end
+    # Convert to transform objects
     def to_transforms
       transforms = []
       transforms << Transforms::RandomHorizontalFlip.new if @random_horizontal_flip
       transforms << Transforms::RandomVerticalFlip.new if @random_vertical_flip
       transforms << Transforms::RandomRotation.new(@random_rotation) if @random_rotation.positive?
-      # Add more transforms as implemented
       transforms
     end
   end

data/lib/fine/datasets/image_dataset.rb CHANGED Viewed

@@ -23,9 +23,21 @@ module Fine
       #
       # @param path [String] Path to the root directory
       # @param transforms [Transforms::Compose, nil] Optional transforms to apply
+      # @param validate [Boolean] Whether to validate directory structure
       # @return [ImageDataset]
-      def self.from_directory(path, transforms: nil)
-        raise DatasetError, "Directory not found: #{path}" unless File.directory?(path)
+      #
+      # @example Expected directory structure
+      #   # data/
+      #   #   cats/
+      #   #     cat1.jpg
+      #   #     cat2.jpg
+      #   #   dogs/
+      #   #     dog1.jpg
+      #   #     dog2.jpg
+      #   dataset = ImageDataset.from_directory("data/", transforms: transforms)
+      #
+      def self.from_directory(path, transforms: nil, validate: true)
+        Validators.validate_image_directory!(path) if validate
         images = []
         labels = []

data/lib/fine/datasets/instruction_dataset.rb CHANGED Viewed

@@ -17,9 +17,24 @@ module Fine
       # @param tokenizer [Tokenizers::AutoTokenizer] Tokenizer
       # @param format [Symbol] Data format (:alpaca, :sharegpt, :simple, :auto)
       # @param max_length [Integer] Maximum sequence length
+      # @param validate [Boolean] Whether to validate the file first
       # @return [InstructionDataset]
-      def self.from_jsonl(path, tokenizer:, format: :auto, max_length: 2048)
-        examples = File.readlines(path).map { |line| JSON.parse(line, symbolize_names: true) }
+      #
+      # @example Alpaca format
+      #   # {"instruction": "Summarize this", "input": "Long text...", "output": "Summary"}
+      #   dataset = InstructionDataset.from_jsonl("data.jsonl", tokenizer: tok)
+      #
+      # @example ShareGPT format
+      #   # {"conversations": [{"from": "human", "value": "Hi"}, {"from": "assistant", "value": "Hello!"}]}
+      #   dataset = InstructionDataset.from_jsonl("chat.jsonl", tokenizer: tok, format: :sharegpt)
+      #
+      def self.from_jsonl(path, tokenizer:, format: :auto, max_length: 2048, validate: true)
+        detected_format = Validators.validate_instructions!(path, format: format) if validate
+        format = detected_format if validate && format == :auto
+        examples = File.readlines(path).reject { |l| l.strip.empty? }.map do |line|
+          JSON.parse(line, symbolize_names: true)
+        end
         new(examples, tokenizer: tokenizer, format: format, max_length: max_length)
       end

data/lib/fine/datasets/text_dataset.rb CHANGED Viewed

@@ -19,9 +19,17 @@ module Fine
       # @param tokenizer [AutoTokenizer] Tokenizer to use
       # @param text_column [String] Name of text field
       # @param label_column [String] Name of label field
+      # @param validate [Boolean] Whether to validate the file first
       # @return [TextDataset]
-      def self.from_jsonl(path, tokenizer:, text_column: "text", label_column: "label")
-        raise DatasetError, "File not found: #{path}" unless File.exist?(path)
+      #
+      # @example
+      #   # Expected JSONL format:
+      #   # {"text": "Great product!", "label": "positive"}
+      #   # {"text": "Terrible service", "label": "negative"}
+      #   dataset = TextDataset.from_jsonl("reviews.jsonl", tokenizer: tokenizer)
+      #
+      def self.from_jsonl(path, tokenizer:, text_column: "text", label_column: "label", validate: true)
+        Validators.validate_text_classification!(path) if validate
         texts = []
         labels = []
@@ -29,9 +37,11 @@ module Fine
         File.foreach(path) do |line|
           next if line.strip.empty?
-          data = JSON.parse(line)
-          texts << data[text_column]
-          labels << data[label_column]
+          data = JSON.parse(line, symbolize_names: true)
+          text_key = data.key?(text_column.to_sym) ? text_column.to_sym : text_column
+          label_key = data.key?(label_column.to_sym) ? label_column.to_sym : label_column
+          texts << data[text_key]
+          labels << data[label_key]
         end
         raise DatasetError, "No data found in #{path}" if texts.empty?

data/lib/fine/hub/config_loader.rb CHANGED Viewed

@@ -20,19 +20,19 @@ module Fine
       # Vision encoder configuration
       def hidden_size
-        vision_config["hidden_size"] || config["hidden_size"] || 768
+        vision_config["hidden_size"] || config["hidden_size"] || config["dim"] || 768
       end
       def num_hidden_layers
-        vision_config["num_hidden_layers"] || config["num_hidden_layers"] || 12
+        vision_config["num_hidden_layers"] || config["num_hidden_layers"] || config["n_layers"] || 12
       end
       def num_attention_heads
-        vision_config["num_attention_heads"] || config["num_attention_heads"] || 12
+        vision_config["num_attention_heads"] || config["num_attention_heads"] || config["n_heads"] || 12
       end
       def intermediate_size
-        vision_config["intermediate_size"] || config["intermediate_size"] || 3072
+        vision_config["intermediate_size"] || config["intermediate_size"] || config["hidden_dim"] || 3072
       end
       def image_size

data/lib/fine/hub/safetensors_loader.rb CHANGED Viewed

@@ -10,8 +10,9 @@ module Fine
       # @param path [String] Path to the safetensors file
       # @param strict [Boolean] If true, raise error on missing/unexpected keys
       # @param prefix [String] Prefix to add/remove from weight names
+      # @param skip_mapping [Boolean] If true, skip weight name mapping (for loading saved Fine models)
       # @return [Hash] Hash with :missing_keys and :unexpected_keys arrays
-      def self.load_into_model(model, path, strict: false, prefix: nil)
+      def self.load_into_model(model, path, strict: false, prefix: nil, skip_mapping: false)
         tensors = Safetensors::Torch.load_file(path)
         # Get model's state dict keys
@@ -22,7 +23,7 @@ module Fine
         unexpected_keys = []
         tensors.each do |name, tensor|
-          mapped_name = map_weight_name(name, prefix: prefix)
+          mapped_name = skip_mapping ? name : map_weight_name(name, prefix: prefix)
           if model_keys.include?(mapped_name)
             mapped_tensors[mapped_name] = tensor

data/lib/fine/llm.rb CHANGED Viewed

@@ -204,9 +204,9 @@ module Fine
     def generate(prompt, max_new_tokens: 100, temperature: 0.7, top_p: 0.9, top_k: 50, do_sample: true)
       raise TrainingError, "Model not loaded" unless @model && @tokenizer
-      # Tokenize prompt (without tensors for easier manipulation)
-      encoding = @tokenizer.encode(prompt, return_tensors: false)
-      input_ids = Torch.tensor([encoding[:input_ids].first])
+      # Tokenize prompt without padding for autoregressive generation
+      ids = @tokenizer.encode_for_generation(prompt)
+      input_ids = Torch.tensor([ids])
       # Move to device
       input_ids = input_ids.to(Fine.device)
@@ -316,19 +316,48 @@ module Fine
   end
   # Configuration for LLM fine-tuning
+  #
+  # @example
+  #   llm = Fine::LLM.new("google/gemma-3-1b-it") do |config|
+  #     config.epochs = 3
+  #     config.max_length = 512
+  #     config.learning_rate = 1e-5
+  #   end
+  #
   class LLMConfiguration < Configuration
+    # LLM-specific defaults
+    DEFAULTS = Configuration::DEFAULTS.merge(
+      max_length: 2048,
+      learning_rate: 2e-5,
+      batch_size: 4,
+      epochs: 3,
+      warmup_steps: 100,
+      gradient_accumulation_steps: 4,
+      max_grad_norm: 1.0
+    ).freeze
+    # @!attribute max_length
+    #   @return [Integer] Maximum sequence length (default: 2048)
+    # @!attribute gradient_accumulation_steps
+    #   @return [Integer] Accumulate gradients over N steps (default: 4)
+    # @!attribute max_grad_norm
+    #   @return [Float] Gradient clipping norm (default: 1.0)
+    # @!attribute freeze_layers
+    #   @return [Integer] Number of bottom layers to freeze (default: 0)
+    # @!attribute pad_token_id
+    #   @return [Integer, nil] Padding token ID (auto-detected if nil)
     attr_accessor :max_length, :warmup_steps, :gradient_accumulation_steps,
                   :max_grad_norm, :freeze_layers, :pad_token_id
     def initialize
       super
-      @max_length = 2048
-      @learning_rate = 2e-5
-      @batch_size = 4
-      @epochs = 1
-      @warmup_steps = 100
-      @gradient_accumulation_steps = 4
-      @max_grad_norm = 1.0
+      @max_length = DEFAULTS[:max_length]
+      @learning_rate = DEFAULTS[:learning_rate]
+      @batch_size = DEFAULTS[:batch_size]
+      @epochs = DEFAULTS[:epochs]
+      @warmup_steps = DEFAULTS[:warmup_steps]
+      @gradient_accumulation_steps = DEFAULTS[:gradient_accumulation_steps]
+      @max_grad_norm = DEFAULTS[:max_grad_norm]
       @freeze_layers = 0
       @pad_token_id = nil
     end