RubyGems - ruby_llm - Versions diffs - 1.1.1 → 1.2.0 - Mend

ruby_llm 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +2 -2
data/lib/ruby_llm/chat.rb +19 -6
data/lib/ruby_llm/configuration.rb +2 -1
data/lib/ruby_llm/models.json +416 -80
data/lib/ruby_llm/providers/bedrock/models.rb +8 -1
data/lib/ruby_llm/providers/openai/capabilities.rb +23 -8
data/lib/ruby_llm/providers/openai.rb +1 -1
data/lib/ruby_llm/version.rb +1 -1
data/lib/ruby_llm.rb +2 -2
data/lib/tasks/models_docs.rake +13 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d07eaf11ea6e6cc923921ebaa92341c91c0aab15021c7347cee4db960defea3f
-  data.tar.gz: e0a024fe5f82ecada1ecb6d3bc9ab3e3b24f8488cf10cf44ddaf8c1621746255
+  metadata.gz: 2f06ce431337dc189e6172b0c98ed897fdba930200f3f118c39c15f4527ec135
+  data.tar.gz: 18f8ff36e7ee18cbee315e66db4b8f04619c98595a5de3b73d215bed248ca0d4
 SHA512:
-  metadata.gz: 33667bbaf9573ed597f29580a7f61855ddddad0d844891aa0b0085ed444a4bd5f36d78d930f9069ca09bee8f89f957eee1570a8df6f547a5ea34cb0b0c332787
-  data.tar.gz: bbcd322f99ee88b1a588743bd0e3f29baf461a1b7548b4c0024b52afc918ffe03ac413e82fc66fc476ed3b5dd8448ae724c97bd81189b1cacb2bbd1ca0db0abb
+  metadata.gz: 42f7603cfec24fa6cc59b1186d2d6a90af9e9076eb79124ac5ce09d73000fbcdb931ab90cafe21bf95b39417a52ff000ca9d02ba51c8a78877d1a1f47b70866f
+  data.tar.gz: 8513b6774ef3d745e7bbc8947f856608d13ade163ca658139c2992e923ad08d5ee00b99275cbfe762591182897b73db7bfc879fae8db7b8ce2f3ba1fea5ee235

data/README.md CHANGED Viewed

@@ -118,7 +118,7 @@ end
 ## Have great conversations
 ```ruby
-# Start a chat with the default model (GPT-4o-mini)
+# Start a chat with the default model (gpt-4.1-nano)
 chat = RubyLLM.chat
 # Or specify what you want
@@ -169,7 +169,7 @@ class ToolCall < ApplicationRecord
 end
 # In a background job
-chat = Chat.create! model_id: "gpt-4o-mini"
+chat = Chat.create! model_id: "gpt-4.1-nano"
 # Set personality or behavior with instructions (aka system prompts) - they're persisted too!
 chat.with_instructions "You are a friendly Ruby expert who loves to help beginners"

data/lib/ruby_llm/chat.rb CHANGED Viewed

@@ -8,14 +8,18 @@ module RubyLLM
   #   chat = RubyLLM.chat
   #   chat.ask "What's the best way to learn Ruby?"
   #   chat.ask "Can you elaborate on that?"
-  class Chat
+  class Chat # rubocop:disable Metrics/ClassLength
     include Enumerable
     attr_reader :model, :messages, :tools
-    def initialize(model: nil, provider: nil)
+    def initialize(model: nil, provider: nil, assume_model_exists: false) # rubocop:disable Metrics/MethodLength
+      if assume_model_exists && !provider
+        raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
+      end
       model_id = model || RubyLLM.config.default_model
-      with_model(model_id, provider: provider)
+      with_model(model_id, provider: provider, assume_exists: assume_model_exists)
       @temperature = 0.7
       @messages = []
       @tools = {}
@@ -54,9 +58,18 @@ module RubyLLM
       self
     end
-    def with_model(model_id, provider: nil)
-      @model = Models.find model_id, provider
-      @provider = Provider.providers[@model.provider.to_sym] || raise(Error, "Unknown provider: #{@model.provider}")
+    def with_model(model_id, provider: nil, assume_exists: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
+      if assume_exists
+        raise ArgumentError, 'Provider must be specified if assume_exists is true' unless provider
+        @provider = Provider.providers[provider.to_sym] || raise(Error, "Unknown provider: #{provider.to_sym}")
+        @model = Struct.new(:id, :provider, :supports_functions, :supports_vision).new(model_id, provider, true, true)
+        RubyLLM.logger.warn "Assuming model '#{model_id}' exists for provider '#{provider}'. " \
+                            'Capabilities may not be accurately reflected.'
+      else
+        @model = Models.find model_id, provider
+        @provider = Provider.providers[@model.provider.to_sym] || raise(Error, "Unknown provider: #{@model.provider}")
+      end
       self
     end

data/lib/ruby_llm/configuration.rb CHANGED Viewed

@@ -12,6 +12,7 @@ module RubyLLM
   class Configuration
     # Provider-specific configuration
     attr_accessor :openai_api_key,
+                  :openai_api_base,
                   :anthropic_api_key,
                   :gemini_api_key,
                   :deepseek_api_key,
@@ -39,7 +40,7 @@ module RubyLLM
       @retry_interval_randomness = 0.5
       # Default models
-      @default_model = 'gpt-4o-mini'
+      @default_model = 'gpt-4.1-nano'
       @default_embedding_model = 'text-embedding-3-small'
       @default_image_model = 'dall-e-3'
     end

data/lib/ruby_llm/models.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Haiku",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_5_haiku",
@@ -34,7 +34,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -47,7 +47,8 @@
       "provider_name": "Anthropic",
       "customizations_supported": [],
       "inference_configurations": [
-        "ON_DEMAND"
+        "ON_DEMAND",
+        "INFERENCE_PROFILE"
       ],
       "response_streaming_supported": true,
       "input_modalities": [
@@ -64,7 +65,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -94,7 +95,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -124,7 +125,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -154,7 +155,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet v2",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -184,7 +185,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet v2",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -214,7 +215,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet v2",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -244,7 +245,7 @@
     "created_at": null,
     "display_name": "Claude 3.5 Sonnet v2",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude3_sonnet",
@@ -269,36 +270,6 @@
       ]
     }
   },
-  {
-    "id": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
-    "created_at": null,
-    "display_name": "Claude 3.7 Sonnet",
-    "provider": "bedrock",
-    "context_window": 4096,
-    "max_tokens": 4096,
-    "type": "chat",
-    "family": "claude3_sonnet",
-    "supports_vision": true,
-    "supports_functions": true,
-    "supports_json_mode": true,
-    "input_price_per_million": 3.0,
-    "output_price_per_million": 15.0,
-    "metadata": {
-      "provider_name": "Anthropic",
-      "customizations_supported": [],
-      "inference_configurations": [
-        "INFERENCE_PROFILE"
-      ],
-      "response_streaming_supported": true,
-      "input_modalities": [
-        "TEXT",
-        "IMAGE"
-      ],
-      "output_modalities": [
-        "TEXT"
-      ]
-    }
-  },
   {
     "id": "anthropic.claude-3-haiku-20240307-v1:0",
     "created_at": null,
@@ -607,13 +578,13 @@
     "created_at": null,
     "display_name": "Claude Instant",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude_instant",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 0.8,
     "output_price_per_million": 2.4,
     "metadata": {
@@ -636,13 +607,13 @@
     "created_at": null,
     "display_name": "Claude Instant",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude_instant",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 0.8,
     "output_price_per_million": 2.4,
     "metadata": {
@@ -665,13 +636,13 @@
     "created_at": null,
     "display_name": "Claude",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude2",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 8.0,
     "output_price_per_million": 24.0,
     "metadata": {
@@ -694,13 +665,13 @@
     "created_at": null,
     "display_name": "Claude",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude2",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 8.0,
     "output_price_per_million": 24.0,
     "metadata": {
@@ -723,13 +694,13 @@
     "created_at": null,
     "display_name": "Claude",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude2",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 8.0,
     "output_price_per_million": 24.0,
     "metadata": {
@@ -752,13 +723,13 @@
     "created_at": null,
     "display_name": "Claude",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude2",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 8.0,
     "output_price_per_million": 24.0,
     "metadata": {
@@ -781,13 +752,13 @@
     "created_at": null,
     "display_name": "Claude",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude2",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 8.0,
     "output_price_per_million": 24.0,
     "metadata": {
@@ -810,13 +781,13 @@
     "created_at": null,
     "display_name": "Claude",
     "provider": "bedrock",
-    "context_window": 4096,
+    "context_window": 200000,
     "max_tokens": 4096,
     "type": "chat",
     "family": "claude2",
-    "supports_vision": false,
-    "supports_functions": false,
-    "supports_json_mode": false,
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
     "input_price_per_million": 8.0,
     "output_price_per_million": 24.0,
     "metadata": {
@@ -1613,7 +1584,8 @@
       "output_token_limit": 8192,
       "supported_generation_methods": [
         "generateContent",
-        "countTokens"
+        "countTokens",
+        "createCachedContent"
       ]
     }
   },
@@ -1638,7 +1610,8 @@
       "output_token_limit": 8192,
       "supported_generation_methods": [
         "generateContent",
-        "countTokens"
+        "countTokens",
+        "createCachedContent"
       ]
     }
   },
@@ -1794,6 +1767,31 @@
       ]
     }
   },
+  {
+    "id": "gemini-2.0-flash-live-001",
+    "created_at": null,
+    "display_name": "Gemini 2.0 Flash 001",
+    "provider": "gemini",
+    "context_window": 131072,
+    "max_tokens": 8192,
+    "type": "chat",
+    "family": "gemini20_flash",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.1,
+    "output_price_per_million": 0.4,
+    "metadata": {
+      "version": "001",
+      "description": "Gemini 2.0 Flash 001",
+      "input_token_limit": 131072,
+      "output_token_limit": 8192,
+      "supported_generation_methods": [
+        "bidiGenerateContent",
+        "countTokens"
+      ]
+    }
+  },
   {
     "id": "gemini-2.0-flash-thinking-exp",
     "created_at": null,
@@ -1890,7 +1888,8 @@
       "output_token_limit": 65536,
       "supported_generation_methods": [
         "generateContent",
-        "countTokens"
+        "countTokens",
+        "createCachedContent"
       ]
     }
   },
@@ -1915,7 +1914,8 @@
       "output_token_limit": 65536,
       "supported_generation_methods": [
         "generateContent",
-        "countTokens"
+        "countTokens",
+        "createCachedContent"
       ]
     }
   },
@@ -1940,7 +1940,34 @@
       "output_token_limit": 65536,
       "supported_generation_methods": [
         "generateContent",
-        "countTokens"
+        "countTokens",
+        "createCachedContent"
+      ]
+    }
+  },
+  {
+    "id": "gemini-2.5-pro-preview-03-25",
+    "created_at": null,
+    "display_name": "Gemini 2.5 Pro Preview 03-25",
+    "provider": "gemini",
+    "context_window": 1048576,
+    "max_tokens": 65536,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.075,
+    "output_price_per_million": 0.3,
+    "metadata": {
+      "version": "2.5-preview-03-25",
+      "description": "Gemini 2.5 Pro Preview 03-25",
+      "input_token_limit": 1048576,
+      "output_token_limit": 65536,
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens",
+        "createCachedContent"
       ]
     }
   },
@@ -1964,7 +1991,8 @@
       "input_token_limit": 8192,
       "output_token_limit": 1,
       "supported_generation_methods": [
-        "embedContent"
+        "embedContent",
+        "countTextTokens"
       ]
     }
   },
@@ -1988,7 +2016,8 @@
       "input_token_limit": 8192,
       "output_token_limit": 1,
       "supported_generation_methods": [
-        "embedContent"
+        "embedContent",
+        "countTextTokens"
       ]
     }
   },
@@ -2013,7 +2042,8 @@
       "output_token_limit": 65536,
       "supported_generation_methods": [
         "generateContent",
-        "countTokens"
+        "countTokens",
+        "createCachedContent"
       ]
     }
   },
@@ -2042,6 +2072,56 @@
       ]
     }
   },
+  {
+    "id": "gemma-3-12b-it",
+    "created_at": null,
+    "display_name": "Gemma 3 12B",
+    "provider": "gemini",
+    "context_window": 32768,
+    "max_tokens": 8192,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": false,
+    "supports_json_mode": false,
+    "input_price_per_million": 0.075,
+    "output_price_per_million": 0.3,
+    "metadata": {
+      "version": "001",
+      "description": null,
+      "input_token_limit": 32768,
+      "output_token_limit": 8192,
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ]
+    }
+  },
+  {
+    "id": "gemma-3-1b-it",
+    "created_at": null,
+    "display_name": "Gemma 3 1B",
+    "provider": "gemini",
+    "context_window": 32768,
+    "max_tokens": 8192,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": false,
+    "supports_json_mode": false,
+    "input_price_per_million": 0.075,
+    "output_price_per_million": 0.3,
+    "metadata": {
+      "version": "001",
+      "description": null,
+      "input_token_limit": 32768,
+      "output_token_limit": 8192,
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ]
+    }
+  },
   {
     "id": "gemma-3-27b-it",
     "created_at": null,
@@ -2067,6 +2147,31 @@
       ]
     }
   },
+  {
+    "id": "gemma-3-4b-it",
+    "created_at": null,
+    "display_name": "Gemma 3 4B",
+    "provider": "gemini",
+    "context_window": 32768,
+    "max_tokens": 8192,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": false,
+    "supports_json_mode": false,
+    "input_price_per_million": 0.075,
+    "output_price_per_million": 0.3,
+    "metadata": {
+      "version": "001",
+      "description": null,
+      "input_token_limit": 32768,
+      "output_token_limit": 8192,
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ]
+    }
+  },
   {
     "id": "gpt-3.5-turbo",
     "created_at": "2023-02-28T19:56:42+01:00",
@@ -2314,6 +2419,120 @@
       "owned_by": "system"
     }
   },
+  {
+    "id": "gpt-4.1",
+    "created_at": "2025-04-10T22:22:22+02:00",
+    "display_name": "GPT-4.1",
+    "provider": "openai",
+    "context_window": 1047576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "gpt41",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 2.0,
+    "output_price_per_million": 8.0,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "gpt-4.1-2025-04-14",
+    "created_at": "2025-04-10T22:09:06+02:00",
+    "display_name": "GPT-4.1 20250414",
+    "provider": "openai",
+    "context_window": 1047576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "gpt41",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 2.0,
+    "output_price_per_million": 8.0,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "gpt-4.1-mini",
+    "created_at": "2025-04-10T22:49:33+02:00",
+    "display_name": "GPT-4.1 Mini",
+    "provider": "openai",
+    "context_window": 1047576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "gpt41_mini",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.4,
+    "output_price_per_million": 1.6,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "gpt-4.1-mini-2025-04-14",
+    "created_at": "2025-04-10T22:39:07+02:00",
+    "display_name": "GPT-4.1 Mini 20250414",
+    "provider": "openai",
+    "context_window": 1047576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "gpt41_mini",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.4,
+    "output_price_per_million": 1.6,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "gpt-4.1-nano",
+    "created_at": "2025-04-10T23:48:27+02:00",
+    "display_name": "GPT-4.1 Nano",
+    "provider": "openai",
+    "context_window": 1047576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "gpt41_nano",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.1,
+    "output_price_per_million": 0.4,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "gpt-4.1-nano-2025-04-14",
+    "created_at": "2025-04-10T23:37:05+02:00",
+    "display_name": "GPT-4.1 Nano 20250414",
+    "provider": "openai",
+    "context_window": 1047576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "gpt41_nano",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.1,
+    "output_price_per_million": 0.4,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
   {
     "id": "gpt-4.5-preview",
     "created_at": "2025-02-27T03:24:19+01:00",
@@ -2838,6 +3057,31 @@
       ]
     }
   },
+  {
+    "id": "learnlm-2.0-flash-experimental",
+    "created_at": null,
+    "display_name": "LearnLM 2.0 Flash Experimental",
+    "provider": "gemini",
+    "context_window": 1048576,
+    "max_tokens": 32768,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 0.075,
+    "output_price_per_million": 0.3,
+    "metadata": {
+      "version": "2.0",
+      "description": "LearnLM 2.0 Flash Experimental",
+      "input_token_limit": 1048576,
+      "output_token_limit": 32768,
+      "supported_generation_methods": [
+        "generateContent",
+        "countTokens"
+      ]
+    }
+  },
   {
     "id": "o1",
     "created_at": "2024-12-16T20:03:36+01:00",
@@ -3028,6 +3272,44 @@
       "owned_by": "system"
     }
   },
+  {
+    "id": "o4-mini",
+    "created_at": "2025-04-09T21:02:31+02:00",
+    "display_name": "O4 Mini",
+    "provider": "openai",
+    "context_window": 4096,
+    "max_tokens": 16384,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": false,
+    "supports_json_mode": false,
+    "input_price_per_million": 0.5,
+    "output_price_per_million": 1.5,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
+  {
+    "id": "o4-mini-2025-04-16",
+    "created_at": "2025-04-08T19:31:46+02:00",
+    "display_name": "O4 Mini 20250416",
+    "provider": "openai",
+    "context_window": 4096,
+    "max_tokens": 16384,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": false,
+    "supports_json_mode": false,
+    "input_price_per_million": 0.5,
+    "output_price_per_million": 1.5,
+    "metadata": {
+      "object": "model",
+      "owned_by": "system"
+    }
+  },
   {
     "id": "omni-moderation-2024-09-26",
     "created_at": "2024-11-27T20:07:46+01:00",
@@ -3249,6 +3531,60 @@
       "owned_by": "system"
     }
   },
+  {
+    "id": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
+    "created_at": null,
+    "display_name": "Claude 3.7 Sonnet",
+    "provider": "bedrock",
+    "context_window": 200000,
+    "max_tokens": 4096,
+    "type": "chat",
+    "family": "claude3_sonnet",
+    "supports_vision": true,
+    "supports_functions": true,
+    "supports_json_mode": true,
+    "input_price_per_million": 3.0,
+    "output_price_per_million": 15.0,
+    "metadata": {
+      "provider_name": "Anthropic",
+      "customizations_supported": [],
+      "inference_configurations": [
+        "INFERENCE_PROFILE"
+      ],
+      "response_streaming_supported": true,
+      "input_modalities": [
+        "TEXT",
+        "IMAGE"
+      ],
+      "output_modalities": [
+        "TEXT"
+      ]
+    }
+  },
+  {
+    "id": "veo-2.0-generate-001",
+    "created_at": null,
+    "display_name": "Veo 2",
+    "provider": "gemini",
+    "context_window": 480,
+    "max_tokens": 8192,
+    "type": "chat",
+    "family": "other",
+    "supports_vision": false,
+    "supports_functions": false,
+    "supports_json_mode": false,
+    "input_price_per_million": 0.075,
+    "output_price_per_million": 0.3,
+    "metadata": {
+      "version": "2.0",
+      "description": "Vertex served Veo 2 model.",
+      "input_token_limit": 480,
+      "output_token_limit": 8192,
+      "supported_generation_methods": [
+        "predictLongRunning"
+      ]
+    }
+  },
   {
     "id": "whisper-1",
     "created_at": "2023-02-27T22:13:04+01:00",

data/lib/ruby_llm/providers/bedrock/models.rb CHANGED Viewed

@@ -42,13 +42,20 @@ module RubyLLM
         def base_model_attributes(model_id, model, slug)
           {
-            id: model_id,
+            id: model_id_with_prefix(model_id, model),
             created_at: nil,
             display_name: model['modelName'] || capabilities.format_display_name(model_id),
             provider: slug
           }
         end
+        def model_id_with_prefix(model_id, model)
+          return model_id unless model['inferenceTypesSupported']&.include?('INFERENCE_PROFILE')
+          return model_id if model['inferenceTypesSupported']&.include?('ON_DEMAND')
+          "us.#{model_id}"
+        end
         def capability_attributes(model_id, capabilities)
           {
             context_window: capabilities.context_window_for(model_id),

data/lib/ruby_llm/providers/openai/capabilities.rb CHANGED Viewed

@@ -3,13 +3,15 @@
 module RubyLLM
   module Providers
     module OpenAI
-      # Determines capabilities and pricing for OpenAI models
-      module Capabilities # rubocop:disable Metrics/ModuleLength
+      module Capabilities # rubocop:disable Metrics/ModuleLength,Style/Documentation
         module_function
         MODEL_PATTERNS = {
           dall_e: /^dall-e/,
           chatgpt4o: /^chatgpt-4o/,
+          gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
+          gpt41_mini: /^gpt-4\.1-mini/,
+          gpt41_nano: /^gpt-4\.1-nano/,
           gpt4: /^gpt-4(?:-\d{6})?$/,
           gpt4_turbo: /^gpt-4(?:\.5)?-(?:\d{6}-)?(preview|turbo)/,
           gpt35_turbo: /^gpt-3\.5-turbo/,
@@ -38,8 +40,9 @@ module RubyLLM
           moderation: /^(?:omni|text)-moderation/
         }.freeze
-        def context_window_for(model_id) # rubocop:disable Metrics/MethodLength
+        def context_window_for(model_id) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
           case model_family(model_id)
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
           when 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
                'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
                'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
@@ -55,6 +58,7 @@ module RubyLLM
         def max_tokens_for(model_id) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
           case model_family(model_id)
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
           when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
           when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
           when 'gpt4' then 8_192
@@ -71,15 +75,16 @@ module RubyLLM
         def supports_vision?(model_id)
           case model_family(model_id)
-          when 'chatgpt4o', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
-               'moderation', 'gpt4o_search', 'gpt4o_mini_search' then true
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1',
+               'o1_pro', 'moderation', 'gpt4o_search', 'gpt4o_mini_search' then true
           else false
           end
         end
         def supports_functions?(model_id)
           case model_family(model_id)
-          when 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
+               'o3_mini' then true
           when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
                'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
           else false # rubocop:disable Lint/DuplicateBranch
@@ -88,7 +93,8 @@ module RubyLLM
         def supports_structured_output?(model_id)
           case model_family(model_id)
-          when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
+          when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
+               'o3_mini' then true
           else false
           end
         end
@@ -98,6 +104,9 @@ module RubyLLM
         end
         PRICES = {
+          gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
+          gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
+          gpt41_nano: { input: 0.1, output: 0.4 },
           chatgpt4o: { input: 5.0, output: 15.0 },
           gpt4: { input: 10.0, output: 30.0 },
           gpt4_turbo: { input: 10.0, output: 30.0 },
@@ -141,6 +150,12 @@ module RubyLLM
           prices[:input] || prices[:price] || default_input_price
         end
+        def cached_input_price_for(model_id)
+          family = model_family(model_id).to_sym
+          prices = PRICES.fetch(family, {})
+          prices[:cached_input]
+        end
         def output_price_for(model_id)
           family = model_family(model_id).to_sym
           prices = PRICES.fetch(family, { output: default_output_price })
@@ -200,7 +215,7 @@ module RubyLLM
         end
         def normalize_temperature(temperature, model_id)
-          if model_id.match?(/^o[13]/)
+          if model_id.match?(/^o\d/)
             RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
             1.0
           else

data/lib/ruby_llm/providers/openai.rb CHANGED Viewed

@@ -29,7 +29,7 @@ module RubyLLM
       module_function
       def api_base
-        'https://api.openai.com/v1'
+        RubyLLM.config.openai_api_base || 'https://api.openai.com/v1'
       end
       def headers

data/lib/ruby_llm/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module RubyLLM
-  VERSION = '1.1.1'
+  VERSION = '1.2.0'
 end

data/lib/ruby_llm.rb CHANGED Viewed

@@ -30,8 +30,8 @@ module RubyLLM
   class Error < StandardError; end
   class << self
-    def chat(model: nil, provider: nil)
-      Chat.new(model: model, provider: provider)
+    def chat(model: nil, provider: nil, assume_model_exists: false)
+      Chat.new(model:, provider:, assume_model_exists:)
     end
     def embed(...)

data/lib/tasks/models_docs.rake CHANGED Viewed

@@ -86,10 +86,18 @@ namespace :models do # rubocop:disable Metrics/BlockLength
       ---
       # Available Models
+      {: .no_toc }
       This guide lists all models available in RubyLLM, automatically generated from the current model registry.
+      {: .fs-6 .fw-300 }
-      _Last updated: #{Time.now.utc.strftime('%Y-%m-%d')}_
+      ## Table of contents
+      {: .no_toc .text-delta }
+      1. TOC
+      {:toc}
+      ---
       ## Contributing
@@ -115,6 +123,10 @@ namespace :models do # rubocop:disable Metrics/BlockLength
       For more information about working with models, see the [Working with Models](/guides/models) guide.
       ## Models by Type
+      {: .d-inline-block }
+      Last updated: #{Time.now.utc.strftime('%Y-%m-%d')}
+      {: .label .label-green }
       ### Chat Models (#{RubyLLM.models.chat_models.count})

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ruby_llm
 version: !ruby/object:Gem::Version
-  version: 1.1.1
+  version: 1.2.0
 platform: ruby
 authors:
 - Carmine Paolino
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2025-04-11 00:00:00.000000000 Z
+date: 2025-04-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: base64