RubyGems - ruby_llm - Versions diffs - 1.5.0 → 1.5.1 - Mend

ruby_llm 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/ruby_llm/model/info.rb +2 -2
data/lib/ruby_llm/models.json +428 -433
data/lib/ruby_llm/models_schema.json +168 -0
data/lib/ruby_llm/providers/gemini/capabilities.rb +3 -0
data/lib/ruby_llm/providers/mistral/capabilities.rb +30 -18
data/lib/ruby_llm/utils.rb +12 -0
data/lib/ruby_llm/version.rb +1 -1
data/lib/tasks/models_update.rake +26 -0
metadata +2 -1

data/lib/ruby_llm/models_schema.json ADDED Viewed

@@ -0,0 +1,168 @@
+{
+  "title": "RubyLLM Models Schema",
+  "description": "Schema for validating the structure of models.json",
+  "type": "array",
+  "items": {
+    "type": "object",
+    "required": ["id", "name", "provider", "context_window", "max_output_tokens"],
+    "properties": {
+      "id": {
+        "type": "string",
+        "description": "Unique identifier for the model"
+      },
+      "name": {
+        "type": "string",
+        "description": "Display name of the model"
+      },
+      "provider": {
+        "type": "string",
+        "description": "Provider of the model (e.g., openai, anthropic, mistral)"
+      },
+      "family": {
+        "type": ["string", "null"],
+        "description": "Model family (e.g., gpt-4, claude-3)"
+      },
+      "created_at": {
+        "type": ["null", {"type": "string", "format": "date-time"}],
+        "description": "Creation date of the model"
+      },
+      "context_window": {
+        "type": ["null", {"type": "integer", "minimum": 0}],
+        "description": "Maximum context window size"
+      },
+      "max_output_tokens": {
+        "type": ["null", {"type": "integer", "minimum": 0}],
+        "description": "Maximum output tokens"
+      },
+      "knowledge_cutoff": {
+        "type": ["null", {"type": "string", "format": "date"}],
+        "description": "Knowledge cutoff date"
+      },
+      "modalities": {
+        "type": "object",
+        "required": ["input", "output"],
+        "properties": {
+          "input": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "enum": ["text", "image", "audio", "pdf", "video", "file"]
+            },
+            "uniqueItems": true,
+            "description": "Supported input modalities"
+          },
+          "output": {
+            "type": "array",
+            "items": {
+              "type": "string",
+              "enum": ["text", "image", "audio", "embeddings", "moderation"]
+            },
+            "uniqueItems": true,
+            "description": "Supported output modalities"
+          }
+        }
+      },
+      "capabilities": {
+        "type": "array",
+        "items": {
+          "type": "string",
+          "enum": [
+            "streaming", "function_calling", "structured_output", "predicted_outputs",
+            "distillation", "fine_tuning", "batch", "realtime", "image_generation",
+            "speech_generation", "transcription", "translation", "citations", "reasoning",
+            "caching", "moderation", "json_mode", "vision"
+          ]
+        },
+        "uniqueItems": true,
+        "description": "Model capabilities"
+      },
+      "pricing": {
+        "type": "object",
+        "properties": {
+          "text_tokens": {
+            "type": "object",
+            "required": ["standard"],
+            "properties": {
+              "standard": {
+                "type": "object",
+                "properties": {
+                  "input_per_million": {"type": "number", "minimum": 0},
+                  "cached_input_per_million": {"type": "number", "minimum": 0},
+                  "output_per_million": {"type": "number", "minimum": 0},
+                  "reasoning_output_per_million": {"type": "number", "minimum": 0}
+                }
+              },
+              "batch": {
+                "type": "object",
+                "properties": {
+                  "input_per_million": {"type": "number", "minimum": 0},
+                  "output_per_million": {"type": "number", "minimum": 0}
+                }
+              }
+            }
+          },
+          "images": {
+            "type": "object",
+            "properties": {
+              "standard": {
+                "type": "object",
+                "properties": {
+                  "input": {"type": "number", "minimum": 0},
+                  "output": {"type": "number", "minimum": 0}
+                }
+              },
+              "batch": {
+                "type": "object",
+                "properties": {
+                  "input": {"type": "number", "minimum": 0},
+                  "output": {"type": "number", "minimum": 0}
+                }
+              }
+            }
+          },
+          "audio_tokens": {
+            "type": "object",
+            "properties": {
+              "standard": {
+                "type": "object",
+                "properties": {
+                  "input_per_million": {"type": "number", "minimum": 0},
+                  "output_per_million": {"type": "number", "minimum": 0}
+                }
+              },
+              "batch": {
+                "type": "object",
+                "properties": {
+                  "input_per_million": {"type": "number", "minimum": 0},
+                  "output_per_million": {"type": "number", "minimum": 0}
+                }
+              }
+            }
+          },
+          "embeddings": {
+            "type": "object",
+            "properties": {
+              "standard": {
+                "type": "object",
+                "properties": {
+                  "input_per_million": {"type": "number", "minimum": 0}
+                }
+              },
+              "batch": {
+                "type": "object",
+                "properties": {
+                  "input_per_million": {"type": "number", "minimum": 0}
+                }
+              }
+            }
+          }
+        },
+        "description": "Pricing information for the model"
+      },
+      "metadata": {
+        "type": "object",
+        "description": "Additional metadata about the model"
+      }
+    }
+  }
+}

data/lib/ruby_llm/providers/gemini/capabilities.rb CHANGED Viewed

@@ -280,6 +280,9 @@ module RubyLLM
           # Embedding output
           modalities[:output] << 'embeddings' if model_id.match?(/embedding|gemini-embedding/)
+          # Image output for imagen models
+          modalities[:output] = ['image'] if model_id.match?(/imagen/)
           modalities
         end

data/lib/ruby_llm/providers/mistral/capabilities.rb CHANGED Viewed

@@ -7,20 +7,24 @@ module RubyLLM
       module Capabilities
         module_function
-        def supports_streaming?(_model_id)
-          true
+        def supports_streaming?(model_id)
+          # All chat models support streaming, but not embedding/moderation/OCR/transcription
+          !model_id.match?(/embed|moderation|ocr|transcriptions/)
         end
-        def supports_tools?(_model_id)
-          true
+        def supports_tools?(model_id)
+          # Most chat models support tools except embedding/moderation/OCR/voxtral/transcription
+          !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions|mistral-(tiny|small)-(2312|2402)/)
         end
         def supports_vision?(model_id)
-          model_id.include?('pixtral')
+          # Models with vision capabilities
+          model_id.match?(/pixtral|mistral-small-(2503|2506)|mistral-medium/)
         end
-        def supports_json_mode?(_model_id)
-          true
+        def supports_json_mode?(model_id)
+          # Most chat models support JSON mode (structured output)
+          !model_id.match?(/embed|moderation|ocr|voxtral|transcriptions/) && supports_tools?(model_id)
         end
         def format_display_name(model_id)
@@ -71,7 +75,7 @@ module RubyLLM
           when /embed/
             {
               input: ['text'],
-              output: ['embedding']
+              output: ['embeddings']
             }
           else
             {
@@ -81,18 +85,26 @@ module RubyLLM
           end
         end
-        def capabilities_for(model_id)
+        def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
           case model_id
-          when /embed/ then { embeddings: true }
-          when /moderation/ then { moderation: true }
+          when /moderation/ then ['moderation']
+          when /voxtral.*transcribe/ then ['transcription']
+          when /ocr/ then ['vision']
           else
-            {
-              chat: true,
-              streaming: supports_streaming?(model_id),
-              tools: supports_tools?(model_id),
-              vision: supports_vision?(model_id),
-              json_mode: supports_json_mode?(model_id)
-            }
+            capabilities = []
+            capabilities << 'streaming' if supports_streaming?(model_id)
+            capabilities << 'function_calling' if supports_tools?(model_id)
+            capabilities << 'structured_output' if supports_json_mode?(model_id)
+            capabilities << 'vision' if supports_vision?(model_id)
+            # Model-specific capabilities
+            capabilities << 'reasoning' if model_id.match?(/magistral/)
+            capabilities << 'batch' unless model_id.match?(/voxtral|ocr|embed|moderation/)
+            capabilities << 'fine_tuning' if model_id.match?(/mistral-(small|medium|large)|devstral/)
+            capabilities << 'distillation' if model_id.match?(/ministral/)
+            capabilities << 'predicted_outputs' if model_id.match?(/codestral/)
+            capabilities.uniq
           end
         end

data/lib/ruby_llm/utils.rb CHANGED Viewed

@@ -24,6 +24,18 @@ module RubyLLM
       end
     end
+    def to_time(value)
+      return unless value
+      value.is_a?(Time) ? value : Time.parse(value.to_s)
+    end
+    def to_date(value)
+      return unless value
+      value.is_a?(Date) ? value : Date.parse(value.to_s)
+    end
     def deep_merge(params, payload)
       params.merge(payload) do |_key, params_value, payload_value|
         if params_value.is_a?(Hash) && payload_value.is_a?(Hash)

data/lib/ruby_llm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module RubyLLM
-  VERSION = '1.5.0'
+  VERSION = '1.5.1'
 end

data/lib/tasks/models_update.rake CHANGED Viewed

@@ -2,6 +2,7 @@
 require 'dotenv/load'
 require 'ruby_llm'
+require 'json-schema'
 task default: ['models:update']
@@ -49,6 +50,9 @@ def refresh_models
   elsif models.all.size == initial_count && initial_count.positive?
     puts 'Warning: Model list unchanged.'
   else
+    puts 'Validating models...'
+    validate_models!(models)
     puts "Saving models.json (#{models.all.size} models)"
     models.save_models
   end
@@ -56,6 +60,28 @@ def refresh_models
   @models = models
 end
+def validate_models!(models)
+  schema_path = File.expand_path('../ruby_llm/models_schema.json', __dir__)
+  models_data = models.all.map(&:to_h)
+  validation_errors = JSON::Validator.fully_validate(schema_path, models_data)
+  unless validation_errors.empty?
+    # Save failed models for inspection
+    failed_path = File.expand_path('../ruby_llm/models.failed.json', __dir__)
+    File.write(failed_path, JSON.pretty_generate(models_data))
+    puts 'ERROR: Models validation failed:'
+    puts "\nValidation errors:"
+    validation_errors.first(10).each { |error| puts "  - #{error}" }
+    puts "  ... and #{validation_errors.size - 10} more errors" if validation_errors.size > 10
+    puts "-> Failed models saved to: #{failed_path}"
+    exit(1)
+  end
+  puts '✓ Models validation passed'
+end
 def display_model_stats
   puts "\nModel count:"
   provider_counts = @models.all.group_by(&:provider).transform_values(&:count)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm
 version: !ruby/object:Gem::Version
-  version: 1.5.0
+  version: 1.5.1
 platform: ruby
 authors:
 - Carmine Paolino
@@ -168,6 +168,7 @@ files:
 - lib/ruby_llm/model/pricing_tier.rb
 - lib/ruby_llm/models.json
 - lib/ruby_llm/models.rb
+- lib/ruby_llm/models_schema.json
 - lib/ruby_llm/provider.rb
 - lib/ruby_llm/providers/anthropic.rb
 - lib/ruby_llm/providers/anthropic/capabilities.rb