ruby_llm 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d07eaf11ea6e6cc923921ebaa92341c91c0aab15021c7347cee4db960defea3f
4
- data.tar.gz: e0a024fe5f82ecada1ecb6d3bc9ab3e3b24f8488cf10cf44ddaf8c1621746255
3
+ metadata.gz: 2f06ce431337dc189e6172b0c98ed897fdba930200f3f118c39c15f4527ec135
4
+ data.tar.gz: 18f8ff36e7ee18cbee315e66db4b8f04619c98595a5de3b73d215bed248ca0d4
5
5
  SHA512:
6
- metadata.gz: 33667bbaf9573ed597f29580a7f61855ddddad0d844891aa0b0085ed444a4bd5f36d78d930f9069ca09bee8f89f957eee1570a8df6f547a5ea34cb0b0c332787
7
- data.tar.gz: bbcd322f99ee88b1a588743bd0e3f29baf461a1b7548b4c0024b52afc918ffe03ac413e82fc66fc476ed3b5dd8448ae724c97bd81189b1cacb2bbd1ca0db0abb
6
+ metadata.gz: 42f7603cfec24fa6cc59b1186d2d6a90af9e9076eb79124ac5ce09d73000fbcdb931ab90cafe21bf95b39417a52ff000ca9d02ba51c8a78877d1a1f47b70866f
7
+ data.tar.gz: 8513b6774ef3d745e7bbc8947f856608d13ade163ca658139c2992e923ad08d5ee00b99275cbfe762591182897b73db7bfc879fae8db7b8ce2f3ba1fea5ee235
data/README.md CHANGED
@@ -118,7 +118,7 @@ end
118
118
  ## Have great conversations
119
119
 
120
120
  ```ruby
121
- # Start a chat with the default model (GPT-4o-mini)
121
+ # Start a chat with the default model (gpt-4.1-nano)
122
122
  chat = RubyLLM.chat
123
123
 
124
124
  # Or specify what you want
@@ -169,7 +169,7 @@ class ToolCall < ApplicationRecord
169
169
  end
170
170
 
171
171
  # In a background job
172
- chat = Chat.create! model_id: "gpt-4o-mini"
172
+ chat = Chat.create! model_id: "gpt-4.1-nano"
173
173
 
174
174
  # Set personality or behavior with instructions (aka system prompts) - they're persisted too!
175
175
  chat.with_instructions "You are a friendly Ruby expert who loves to help beginners"
data/lib/ruby_llm/chat.rb CHANGED
@@ -8,14 +8,18 @@ module RubyLLM
8
8
  # chat = RubyLLM.chat
9
9
  # chat.ask "What's the best way to learn Ruby?"
10
10
  # chat.ask "Can you elaborate on that?"
11
- class Chat
11
+ class Chat # rubocop:disable Metrics/ClassLength
12
12
  include Enumerable
13
13
 
14
14
  attr_reader :model, :messages, :tools
15
15
 
16
- def initialize(model: nil, provider: nil)
16
+ def initialize(model: nil, provider: nil, assume_model_exists: false) # rubocop:disable Metrics/MethodLength
17
+ if assume_model_exists && !provider
18
+ raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
19
+ end
20
+
17
21
  model_id = model || RubyLLM.config.default_model
18
- with_model(model_id, provider: provider)
22
+ with_model(model_id, provider: provider, assume_exists: assume_model_exists)
19
23
  @temperature = 0.7
20
24
  @messages = []
21
25
  @tools = {}
@@ -54,9 +58,18 @@ module RubyLLM
54
58
  self
55
59
  end
56
60
 
57
- def with_model(model_id, provider: nil)
58
- @model = Models.find model_id, provider
59
- @provider = Provider.providers[@model.provider.to_sym] || raise(Error, "Unknown provider: #{@model.provider}")
61
+ def with_model(model_id, provider: nil, assume_exists: false) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
62
+ if assume_exists
63
+ raise ArgumentError, 'Provider must be specified if assume_exists is true' unless provider
64
+
65
+ @provider = Provider.providers[provider.to_sym] || raise(Error, "Unknown provider: #{provider.to_sym}")
66
+ @model = Struct.new(:id, :provider, :supports_functions, :supports_vision).new(model_id, provider, true, true)
67
+ RubyLLM.logger.warn "Assuming model '#{model_id}' exists for provider '#{provider}'. " \
68
+ 'Capabilities may not be accurately reflected.'
69
+ else
70
+ @model = Models.find model_id, provider
71
+ @provider = Provider.providers[@model.provider.to_sym] || raise(Error, "Unknown provider: #{@model.provider}")
72
+ end
60
73
  self
61
74
  end
62
75
 
@@ -12,6 +12,7 @@ module RubyLLM
12
12
  class Configuration
13
13
  # Provider-specific configuration
14
14
  attr_accessor :openai_api_key,
15
+ :openai_api_base,
15
16
  :anthropic_api_key,
16
17
  :gemini_api_key,
17
18
  :deepseek_api_key,
@@ -39,7 +40,7 @@ module RubyLLM
39
40
  @retry_interval_randomness = 0.5
40
41
 
41
42
  # Default models
42
- @default_model = 'gpt-4o-mini'
43
+ @default_model = 'gpt-4.1-nano'
43
44
  @default_embedding_model = 'text-embedding-3-small'
44
45
  @default_image_model = 'dall-e-3'
45
46
  end
@@ -4,7 +4,7 @@
4
4
  "created_at": null,
5
5
  "display_name": "Claude 3.5 Haiku",
6
6
  "provider": "bedrock",
7
- "context_window": 4096,
7
+ "context_window": 200000,
8
8
  "max_tokens": 4096,
9
9
  "type": "chat",
10
10
  "family": "claude3_5_haiku",
@@ -34,7 +34,7 @@
34
34
  "created_at": null,
35
35
  "display_name": "Claude 3.5 Sonnet",
36
36
  "provider": "bedrock",
37
- "context_window": 4096,
37
+ "context_window": 200000,
38
38
  "max_tokens": 4096,
39
39
  "type": "chat",
40
40
  "family": "claude3_sonnet",
@@ -47,7 +47,8 @@
47
47
  "provider_name": "Anthropic",
48
48
  "customizations_supported": [],
49
49
  "inference_configurations": [
50
- "ON_DEMAND"
50
+ "ON_DEMAND",
51
+ "INFERENCE_PROFILE"
51
52
  ],
52
53
  "response_streaming_supported": true,
53
54
  "input_modalities": [
@@ -64,7 +65,7 @@
64
65
  "created_at": null,
65
66
  "display_name": "Claude 3.5 Sonnet",
66
67
  "provider": "bedrock",
67
- "context_window": 4096,
68
+ "context_window": 200000,
68
69
  "max_tokens": 4096,
69
70
  "type": "chat",
70
71
  "family": "claude3_sonnet",
@@ -94,7 +95,7 @@
94
95
  "created_at": null,
95
96
  "display_name": "Claude 3.5 Sonnet",
96
97
  "provider": "bedrock",
97
- "context_window": 4096,
98
+ "context_window": 200000,
98
99
  "max_tokens": 4096,
99
100
  "type": "chat",
100
101
  "family": "claude3_sonnet",
@@ -124,7 +125,7 @@
124
125
  "created_at": null,
125
126
  "display_name": "Claude 3.5 Sonnet",
126
127
  "provider": "bedrock",
127
- "context_window": 4096,
128
+ "context_window": 200000,
128
129
  "max_tokens": 4096,
129
130
  "type": "chat",
130
131
  "family": "claude3_sonnet",
@@ -154,7 +155,7 @@
154
155
  "created_at": null,
155
156
  "display_name": "Claude 3.5 Sonnet v2",
156
157
  "provider": "bedrock",
157
- "context_window": 4096,
158
+ "context_window": 200000,
158
159
  "max_tokens": 4096,
159
160
  "type": "chat",
160
161
  "family": "claude3_sonnet",
@@ -184,7 +185,7 @@
184
185
  "created_at": null,
185
186
  "display_name": "Claude 3.5 Sonnet v2",
186
187
  "provider": "bedrock",
187
- "context_window": 4096,
188
+ "context_window": 200000,
188
189
  "max_tokens": 4096,
189
190
  "type": "chat",
190
191
  "family": "claude3_sonnet",
@@ -214,7 +215,7 @@
214
215
  "created_at": null,
215
216
  "display_name": "Claude 3.5 Sonnet v2",
216
217
  "provider": "bedrock",
217
- "context_window": 4096,
218
+ "context_window": 200000,
218
219
  "max_tokens": 4096,
219
220
  "type": "chat",
220
221
  "family": "claude3_sonnet",
@@ -244,7 +245,7 @@
244
245
  "created_at": null,
245
246
  "display_name": "Claude 3.5 Sonnet v2",
246
247
  "provider": "bedrock",
247
- "context_window": 4096,
248
+ "context_window": 200000,
248
249
  "max_tokens": 4096,
249
250
  "type": "chat",
250
251
  "family": "claude3_sonnet",
@@ -269,36 +270,6 @@
269
270
  ]
270
271
  }
271
272
  },
272
- {
273
- "id": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
274
- "created_at": null,
275
- "display_name": "Claude 3.7 Sonnet",
276
- "provider": "bedrock",
277
- "context_window": 4096,
278
- "max_tokens": 4096,
279
- "type": "chat",
280
- "family": "claude3_sonnet",
281
- "supports_vision": true,
282
- "supports_functions": true,
283
- "supports_json_mode": true,
284
- "input_price_per_million": 3.0,
285
- "output_price_per_million": 15.0,
286
- "metadata": {
287
- "provider_name": "Anthropic",
288
- "customizations_supported": [],
289
- "inference_configurations": [
290
- "INFERENCE_PROFILE"
291
- ],
292
- "response_streaming_supported": true,
293
- "input_modalities": [
294
- "TEXT",
295
- "IMAGE"
296
- ],
297
- "output_modalities": [
298
- "TEXT"
299
- ]
300
- }
301
- },
302
273
  {
303
274
  "id": "anthropic.claude-3-haiku-20240307-v1:0",
304
275
  "created_at": null,
@@ -607,13 +578,13 @@
607
578
  "created_at": null,
608
579
  "display_name": "Claude Instant",
609
580
  "provider": "bedrock",
610
- "context_window": 4096,
581
+ "context_window": 200000,
611
582
  "max_tokens": 4096,
612
583
  "type": "chat",
613
584
  "family": "claude_instant",
614
- "supports_vision": false,
615
- "supports_functions": false,
616
- "supports_json_mode": false,
585
+ "supports_vision": true,
586
+ "supports_functions": true,
587
+ "supports_json_mode": true,
617
588
  "input_price_per_million": 0.8,
618
589
  "output_price_per_million": 2.4,
619
590
  "metadata": {
@@ -636,13 +607,13 @@
636
607
  "created_at": null,
637
608
  "display_name": "Claude Instant",
638
609
  "provider": "bedrock",
639
- "context_window": 4096,
610
+ "context_window": 200000,
640
611
  "max_tokens": 4096,
641
612
  "type": "chat",
642
613
  "family": "claude_instant",
643
- "supports_vision": false,
644
- "supports_functions": false,
645
- "supports_json_mode": false,
614
+ "supports_vision": true,
615
+ "supports_functions": true,
616
+ "supports_json_mode": true,
646
617
  "input_price_per_million": 0.8,
647
618
  "output_price_per_million": 2.4,
648
619
  "metadata": {
@@ -665,13 +636,13 @@
665
636
  "created_at": null,
666
637
  "display_name": "Claude",
667
638
  "provider": "bedrock",
668
- "context_window": 4096,
639
+ "context_window": 200000,
669
640
  "max_tokens": 4096,
670
641
  "type": "chat",
671
642
  "family": "claude2",
672
- "supports_vision": false,
673
- "supports_functions": false,
674
- "supports_json_mode": false,
643
+ "supports_vision": true,
644
+ "supports_functions": true,
645
+ "supports_json_mode": true,
675
646
  "input_price_per_million": 8.0,
676
647
  "output_price_per_million": 24.0,
677
648
  "metadata": {
@@ -694,13 +665,13 @@
694
665
  "created_at": null,
695
666
  "display_name": "Claude",
696
667
  "provider": "bedrock",
697
- "context_window": 4096,
668
+ "context_window": 200000,
698
669
  "max_tokens": 4096,
699
670
  "type": "chat",
700
671
  "family": "claude2",
701
- "supports_vision": false,
702
- "supports_functions": false,
703
- "supports_json_mode": false,
672
+ "supports_vision": true,
673
+ "supports_functions": true,
674
+ "supports_json_mode": true,
704
675
  "input_price_per_million": 8.0,
705
676
  "output_price_per_million": 24.0,
706
677
  "metadata": {
@@ -723,13 +694,13 @@
723
694
  "created_at": null,
724
695
  "display_name": "Claude",
725
696
  "provider": "bedrock",
726
- "context_window": 4096,
697
+ "context_window": 200000,
727
698
  "max_tokens": 4096,
728
699
  "type": "chat",
729
700
  "family": "claude2",
730
- "supports_vision": false,
731
- "supports_functions": false,
732
- "supports_json_mode": false,
701
+ "supports_vision": true,
702
+ "supports_functions": true,
703
+ "supports_json_mode": true,
733
704
  "input_price_per_million": 8.0,
734
705
  "output_price_per_million": 24.0,
735
706
  "metadata": {
@@ -752,13 +723,13 @@
752
723
  "created_at": null,
753
724
  "display_name": "Claude",
754
725
  "provider": "bedrock",
755
- "context_window": 4096,
726
+ "context_window": 200000,
756
727
  "max_tokens": 4096,
757
728
  "type": "chat",
758
729
  "family": "claude2",
759
- "supports_vision": false,
760
- "supports_functions": false,
761
- "supports_json_mode": false,
730
+ "supports_vision": true,
731
+ "supports_functions": true,
732
+ "supports_json_mode": true,
762
733
  "input_price_per_million": 8.0,
763
734
  "output_price_per_million": 24.0,
764
735
  "metadata": {
@@ -781,13 +752,13 @@
781
752
  "created_at": null,
782
753
  "display_name": "Claude",
783
754
  "provider": "bedrock",
784
- "context_window": 4096,
755
+ "context_window": 200000,
785
756
  "max_tokens": 4096,
786
757
  "type": "chat",
787
758
  "family": "claude2",
788
- "supports_vision": false,
789
- "supports_functions": false,
790
- "supports_json_mode": false,
759
+ "supports_vision": true,
760
+ "supports_functions": true,
761
+ "supports_json_mode": true,
791
762
  "input_price_per_million": 8.0,
792
763
  "output_price_per_million": 24.0,
793
764
  "metadata": {
@@ -810,13 +781,13 @@
810
781
  "created_at": null,
811
782
  "display_name": "Claude",
812
783
  "provider": "bedrock",
813
- "context_window": 4096,
784
+ "context_window": 200000,
814
785
  "max_tokens": 4096,
815
786
  "type": "chat",
816
787
  "family": "claude2",
817
- "supports_vision": false,
818
- "supports_functions": false,
819
- "supports_json_mode": false,
788
+ "supports_vision": true,
789
+ "supports_functions": true,
790
+ "supports_json_mode": true,
820
791
  "input_price_per_million": 8.0,
821
792
  "output_price_per_million": 24.0,
822
793
  "metadata": {
@@ -1613,7 +1584,8 @@
1613
1584
  "output_token_limit": 8192,
1614
1585
  "supported_generation_methods": [
1615
1586
  "generateContent",
1616
- "countTokens"
1587
+ "countTokens",
1588
+ "createCachedContent"
1617
1589
  ]
1618
1590
  }
1619
1591
  },
@@ -1638,7 +1610,8 @@
1638
1610
  "output_token_limit": 8192,
1639
1611
  "supported_generation_methods": [
1640
1612
  "generateContent",
1641
- "countTokens"
1613
+ "countTokens",
1614
+ "createCachedContent"
1642
1615
  ]
1643
1616
  }
1644
1617
  },
@@ -1794,6 +1767,31 @@
1794
1767
  ]
1795
1768
  }
1796
1769
  },
1770
+ {
1771
+ "id": "gemini-2.0-flash-live-001",
1772
+ "created_at": null,
1773
+ "display_name": "Gemini 2.0 Flash 001",
1774
+ "provider": "gemini",
1775
+ "context_window": 131072,
1776
+ "max_tokens": 8192,
1777
+ "type": "chat",
1778
+ "family": "gemini20_flash",
1779
+ "supports_vision": true,
1780
+ "supports_functions": true,
1781
+ "supports_json_mode": true,
1782
+ "input_price_per_million": 0.1,
1783
+ "output_price_per_million": 0.4,
1784
+ "metadata": {
1785
+ "version": "001",
1786
+ "description": "Gemini 2.0 Flash 001",
1787
+ "input_token_limit": 131072,
1788
+ "output_token_limit": 8192,
1789
+ "supported_generation_methods": [
1790
+ "bidiGenerateContent",
1791
+ "countTokens"
1792
+ ]
1793
+ }
1794
+ },
1797
1795
  {
1798
1796
  "id": "gemini-2.0-flash-thinking-exp",
1799
1797
  "created_at": null,
@@ -1890,7 +1888,8 @@
1890
1888
  "output_token_limit": 65536,
1891
1889
  "supported_generation_methods": [
1892
1890
  "generateContent",
1893
- "countTokens"
1891
+ "countTokens",
1892
+ "createCachedContent"
1894
1893
  ]
1895
1894
  }
1896
1895
  },
@@ -1915,7 +1914,8 @@
1915
1914
  "output_token_limit": 65536,
1916
1915
  "supported_generation_methods": [
1917
1916
  "generateContent",
1918
- "countTokens"
1917
+ "countTokens",
1918
+ "createCachedContent"
1919
1919
  ]
1920
1920
  }
1921
1921
  },
@@ -1940,7 +1940,34 @@
1940
1940
  "output_token_limit": 65536,
1941
1941
  "supported_generation_methods": [
1942
1942
  "generateContent",
1943
- "countTokens"
1943
+ "countTokens",
1944
+ "createCachedContent"
1945
+ ]
1946
+ }
1947
+ },
1948
+ {
1949
+ "id": "gemini-2.5-pro-preview-03-25",
1950
+ "created_at": null,
1951
+ "display_name": "Gemini 2.5 Pro Preview 03-25",
1952
+ "provider": "gemini",
1953
+ "context_window": 1048576,
1954
+ "max_tokens": 65536,
1955
+ "type": "chat",
1956
+ "family": "other",
1957
+ "supports_vision": true,
1958
+ "supports_functions": true,
1959
+ "supports_json_mode": true,
1960
+ "input_price_per_million": 0.075,
1961
+ "output_price_per_million": 0.3,
1962
+ "metadata": {
1963
+ "version": "2.5-preview-03-25",
1964
+ "description": "Gemini 2.5 Pro Preview 03-25",
1965
+ "input_token_limit": 1048576,
1966
+ "output_token_limit": 65536,
1967
+ "supported_generation_methods": [
1968
+ "generateContent",
1969
+ "countTokens",
1970
+ "createCachedContent"
1944
1971
  ]
1945
1972
  }
1946
1973
  },
@@ -1964,7 +1991,8 @@
1964
1991
  "input_token_limit": 8192,
1965
1992
  "output_token_limit": 1,
1966
1993
  "supported_generation_methods": [
1967
- "embedContent"
1994
+ "embedContent",
1995
+ "countTextTokens"
1968
1996
  ]
1969
1997
  }
1970
1998
  },
@@ -1988,7 +2016,8 @@
1988
2016
  "input_token_limit": 8192,
1989
2017
  "output_token_limit": 1,
1990
2018
  "supported_generation_methods": [
1991
- "embedContent"
2019
+ "embedContent",
2020
+ "countTextTokens"
1992
2021
  ]
1993
2022
  }
1994
2023
  },
@@ -2013,7 +2042,8 @@
2013
2042
  "output_token_limit": 65536,
2014
2043
  "supported_generation_methods": [
2015
2044
  "generateContent",
2016
- "countTokens"
2045
+ "countTokens",
2046
+ "createCachedContent"
2017
2047
  ]
2018
2048
  }
2019
2049
  },
@@ -2042,6 +2072,56 @@
2042
2072
  ]
2043
2073
  }
2044
2074
  },
2075
+ {
2076
+ "id": "gemma-3-12b-it",
2077
+ "created_at": null,
2078
+ "display_name": "Gemma 3 12B",
2079
+ "provider": "gemini",
2080
+ "context_window": 32768,
2081
+ "max_tokens": 8192,
2082
+ "type": "chat",
2083
+ "family": "other",
2084
+ "supports_vision": false,
2085
+ "supports_functions": false,
2086
+ "supports_json_mode": false,
2087
+ "input_price_per_million": 0.075,
2088
+ "output_price_per_million": 0.3,
2089
+ "metadata": {
2090
+ "version": "001",
2091
+ "description": null,
2092
+ "input_token_limit": 32768,
2093
+ "output_token_limit": 8192,
2094
+ "supported_generation_methods": [
2095
+ "generateContent",
2096
+ "countTokens"
2097
+ ]
2098
+ }
2099
+ },
2100
+ {
2101
+ "id": "gemma-3-1b-it",
2102
+ "created_at": null,
2103
+ "display_name": "Gemma 3 1B",
2104
+ "provider": "gemini",
2105
+ "context_window": 32768,
2106
+ "max_tokens": 8192,
2107
+ "type": "chat",
2108
+ "family": "other",
2109
+ "supports_vision": false,
2110
+ "supports_functions": false,
2111
+ "supports_json_mode": false,
2112
+ "input_price_per_million": 0.075,
2113
+ "output_price_per_million": 0.3,
2114
+ "metadata": {
2115
+ "version": "001",
2116
+ "description": null,
2117
+ "input_token_limit": 32768,
2118
+ "output_token_limit": 8192,
2119
+ "supported_generation_methods": [
2120
+ "generateContent",
2121
+ "countTokens"
2122
+ ]
2123
+ }
2124
+ },
2045
2125
  {
2046
2126
  "id": "gemma-3-27b-it",
2047
2127
  "created_at": null,
@@ -2067,6 +2147,31 @@
2067
2147
  ]
2068
2148
  }
2069
2149
  },
2150
+ {
2151
+ "id": "gemma-3-4b-it",
2152
+ "created_at": null,
2153
+ "display_name": "Gemma 3 4B",
2154
+ "provider": "gemini",
2155
+ "context_window": 32768,
2156
+ "max_tokens": 8192,
2157
+ "type": "chat",
2158
+ "family": "other",
2159
+ "supports_vision": false,
2160
+ "supports_functions": false,
2161
+ "supports_json_mode": false,
2162
+ "input_price_per_million": 0.075,
2163
+ "output_price_per_million": 0.3,
2164
+ "metadata": {
2165
+ "version": "001",
2166
+ "description": null,
2167
+ "input_token_limit": 32768,
2168
+ "output_token_limit": 8192,
2169
+ "supported_generation_methods": [
2170
+ "generateContent",
2171
+ "countTokens"
2172
+ ]
2173
+ }
2174
+ },
2070
2175
  {
2071
2176
  "id": "gpt-3.5-turbo",
2072
2177
  "created_at": "2023-02-28T19:56:42+01:00",
@@ -2314,6 +2419,120 @@
2314
2419
  "owned_by": "system"
2315
2420
  }
2316
2421
  },
2422
+ {
2423
+ "id": "gpt-4.1",
2424
+ "created_at": "2025-04-10T22:22:22+02:00",
2425
+ "display_name": "GPT-4.1",
2426
+ "provider": "openai",
2427
+ "context_window": 1047576,
2428
+ "max_tokens": 32768,
2429
+ "type": "chat",
2430
+ "family": "gpt41",
2431
+ "supports_vision": true,
2432
+ "supports_functions": true,
2433
+ "supports_json_mode": true,
2434
+ "input_price_per_million": 2.0,
2435
+ "output_price_per_million": 8.0,
2436
+ "metadata": {
2437
+ "object": "model",
2438
+ "owned_by": "system"
2439
+ }
2440
+ },
2441
+ {
2442
+ "id": "gpt-4.1-2025-04-14",
2443
+ "created_at": "2025-04-10T22:09:06+02:00",
2444
+ "display_name": "GPT-4.1 20250414",
2445
+ "provider": "openai",
2446
+ "context_window": 1047576,
2447
+ "max_tokens": 32768,
2448
+ "type": "chat",
2449
+ "family": "gpt41",
2450
+ "supports_vision": true,
2451
+ "supports_functions": true,
2452
+ "supports_json_mode": true,
2453
+ "input_price_per_million": 2.0,
2454
+ "output_price_per_million": 8.0,
2455
+ "metadata": {
2456
+ "object": "model",
2457
+ "owned_by": "system"
2458
+ }
2459
+ },
2460
+ {
2461
+ "id": "gpt-4.1-mini",
2462
+ "created_at": "2025-04-10T22:49:33+02:00",
2463
+ "display_name": "GPT-4.1 Mini",
2464
+ "provider": "openai",
2465
+ "context_window": 1047576,
2466
+ "max_tokens": 32768,
2467
+ "type": "chat",
2468
+ "family": "gpt41_mini",
2469
+ "supports_vision": true,
2470
+ "supports_functions": true,
2471
+ "supports_json_mode": true,
2472
+ "input_price_per_million": 0.4,
2473
+ "output_price_per_million": 1.6,
2474
+ "metadata": {
2475
+ "object": "model",
2476
+ "owned_by": "system"
2477
+ }
2478
+ },
2479
+ {
2480
+ "id": "gpt-4.1-mini-2025-04-14",
2481
+ "created_at": "2025-04-10T22:39:07+02:00",
2482
+ "display_name": "GPT-4.1 Mini 20250414",
2483
+ "provider": "openai",
2484
+ "context_window": 1047576,
2485
+ "max_tokens": 32768,
2486
+ "type": "chat",
2487
+ "family": "gpt41_mini",
2488
+ "supports_vision": true,
2489
+ "supports_functions": true,
2490
+ "supports_json_mode": true,
2491
+ "input_price_per_million": 0.4,
2492
+ "output_price_per_million": 1.6,
2493
+ "metadata": {
2494
+ "object": "model",
2495
+ "owned_by": "system"
2496
+ }
2497
+ },
2498
+ {
2499
+ "id": "gpt-4.1-nano",
2500
+ "created_at": "2025-04-10T23:48:27+02:00",
2501
+ "display_name": "GPT-4.1 Nano",
2502
+ "provider": "openai",
2503
+ "context_window": 1047576,
2504
+ "max_tokens": 32768,
2505
+ "type": "chat",
2506
+ "family": "gpt41_nano",
2507
+ "supports_vision": true,
2508
+ "supports_functions": true,
2509
+ "supports_json_mode": true,
2510
+ "input_price_per_million": 0.1,
2511
+ "output_price_per_million": 0.4,
2512
+ "metadata": {
2513
+ "object": "model",
2514
+ "owned_by": "system"
2515
+ }
2516
+ },
2517
+ {
2518
+ "id": "gpt-4.1-nano-2025-04-14",
2519
+ "created_at": "2025-04-10T23:37:05+02:00",
2520
+ "display_name": "GPT-4.1 Nano 20250414",
2521
+ "provider": "openai",
2522
+ "context_window": 1047576,
2523
+ "max_tokens": 32768,
2524
+ "type": "chat",
2525
+ "family": "gpt41_nano",
2526
+ "supports_vision": true,
2527
+ "supports_functions": true,
2528
+ "supports_json_mode": true,
2529
+ "input_price_per_million": 0.1,
2530
+ "output_price_per_million": 0.4,
2531
+ "metadata": {
2532
+ "object": "model",
2533
+ "owned_by": "system"
2534
+ }
2535
+ },
2317
2536
  {
2318
2537
  "id": "gpt-4.5-preview",
2319
2538
  "created_at": "2025-02-27T03:24:19+01:00",
@@ -2838,6 +3057,31 @@
2838
3057
  ]
2839
3058
  }
2840
3059
  },
3060
+ {
3061
+ "id": "learnlm-2.0-flash-experimental",
3062
+ "created_at": null,
3063
+ "display_name": "LearnLM 2.0 Flash Experimental",
3064
+ "provider": "gemini",
3065
+ "context_window": 1048576,
3066
+ "max_tokens": 32768,
3067
+ "type": "chat",
3068
+ "family": "other",
3069
+ "supports_vision": true,
3070
+ "supports_functions": true,
3071
+ "supports_json_mode": true,
3072
+ "input_price_per_million": 0.075,
3073
+ "output_price_per_million": 0.3,
3074
+ "metadata": {
3075
+ "version": "2.0",
3076
+ "description": "LearnLM 2.0 Flash Experimental",
3077
+ "input_token_limit": 1048576,
3078
+ "output_token_limit": 32768,
3079
+ "supported_generation_methods": [
3080
+ "generateContent",
3081
+ "countTokens"
3082
+ ]
3083
+ }
3084
+ },
2841
3085
  {
2842
3086
  "id": "o1",
2843
3087
  "created_at": "2024-12-16T20:03:36+01:00",
@@ -3028,6 +3272,44 @@
3028
3272
  "owned_by": "system"
3029
3273
  }
3030
3274
  },
3275
+ {
3276
+ "id": "o4-mini",
3277
+ "created_at": "2025-04-09T21:02:31+02:00",
3278
+ "display_name": "O4 Mini",
3279
+ "provider": "openai",
3280
+ "context_window": 4096,
3281
+ "max_tokens": 16384,
3282
+ "type": "chat",
3283
+ "family": "other",
3284
+ "supports_vision": false,
3285
+ "supports_functions": false,
3286
+ "supports_json_mode": false,
3287
+ "input_price_per_million": 0.5,
3288
+ "output_price_per_million": 1.5,
3289
+ "metadata": {
3290
+ "object": "model",
3291
+ "owned_by": "system"
3292
+ }
3293
+ },
3294
+ {
3295
+ "id": "o4-mini-2025-04-16",
3296
+ "created_at": "2025-04-08T19:31:46+02:00",
3297
+ "display_name": "O4 Mini 20250416",
3298
+ "provider": "openai",
3299
+ "context_window": 4096,
3300
+ "max_tokens": 16384,
3301
+ "type": "chat",
3302
+ "family": "other",
3303
+ "supports_vision": false,
3304
+ "supports_functions": false,
3305
+ "supports_json_mode": false,
3306
+ "input_price_per_million": 0.5,
3307
+ "output_price_per_million": 1.5,
3308
+ "metadata": {
3309
+ "object": "model",
3310
+ "owned_by": "system"
3311
+ }
3312
+ },
3031
3313
  {
3032
3314
  "id": "omni-moderation-2024-09-26",
3033
3315
  "created_at": "2024-11-27T20:07:46+01:00",
@@ -3249,6 +3531,60 @@
3249
3531
  "owned_by": "system"
3250
3532
  }
3251
3533
  },
3534
+ {
3535
+ "id": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
3536
+ "created_at": null,
3537
+ "display_name": "Claude 3.7 Sonnet",
3538
+ "provider": "bedrock",
3539
+ "context_window": 200000,
3540
+ "max_tokens": 4096,
3541
+ "type": "chat",
3542
+ "family": "claude3_sonnet",
3543
+ "supports_vision": true,
3544
+ "supports_functions": true,
3545
+ "supports_json_mode": true,
3546
+ "input_price_per_million": 3.0,
3547
+ "output_price_per_million": 15.0,
3548
+ "metadata": {
3549
+ "provider_name": "Anthropic",
3550
+ "customizations_supported": [],
3551
+ "inference_configurations": [
3552
+ "INFERENCE_PROFILE"
3553
+ ],
3554
+ "response_streaming_supported": true,
3555
+ "input_modalities": [
3556
+ "TEXT",
3557
+ "IMAGE"
3558
+ ],
3559
+ "output_modalities": [
3560
+ "TEXT"
3561
+ ]
3562
+ }
3563
+ },
3564
+ {
3565
+ "id": "veo-2.0-generate-001",
3566
+ "created_at": null,
3567
+ "display_name": "Veo 2",
3568
+ "provider": "gemini",
3569
+ "context_window": 480,
3570
+ "max_tokens": 8192,
3571
+ "type": "chat",
3572
+ "family": "other",
3573
+ "supports_vision": false,
3574
+ "supports_functions": false,
3575
+ "supports_json_mode": false,
3576
+ "input_price_per_million": 0.075,
3577
+ "output_price_per_million": 0.3,
3578
+ "metadata": {
3579
+ "version": "2.0",
3580
+ "description": "Vertex served Veo 2 model.",
3581
+ "input_token_limit": 480,
3582
+ "output_token_limit": 8192,
3583
+ "supported_generation_methods": [
3584
+ "predictLongRunning"
3585
+ ]
3586
+ }
3587
+ },
3252
3588
  {
3253
3589
  "id": "whisper-1",
3254
3590
  "created_at": "2023-02-27T22:13:04+01:00",
@@ -42,13 +42,20 @@ module RubyLLM
42
42
 
43
43
  def base_model_attributes(model_id, model, slug)
44
44
  {
45
- id: model_id,
45
+ id: model_id_with_prefix(model_id, model),
46
46
  created_at: nil,
47
47
  display_name: model['modelName'] || capabilities.format_display_name(model_id),
48
48
  provider: slug
49
49
  }
50
50
  end
51
51
 
52
+ def model_id_with_prefix(model_id, model)
53
+ return model_id unless model['inferenceTypesSupported']&.include?('INFERENCE_PROFILE')
54
+ return model_id if model['inferenceTypesSupported']&.include?('ON_DEMAND')
55
+
56
+ "us.#{model_id}"
57
+ end
58
+
52
59
  def capability_attributes(model_id, capabilities)
53
60
  {
54
61
  context_window: capabilities.context_window_for(model_id),
@@ -3,13 +3,15 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  module OpenAI
6
- # Determines capabilities and pricing for OpenAI models
7
- module Capabilities # rubocop:disable Metrics/ModuleLength
6
+ module Capabilities # rubocop:disable Metrics/ModuleLength,Style/Documentation
8
7
  module_function
9
8
 
10
9
  MODEL_PATTERNS = {
11
10
  dall_e: /^dall-e/,
12
11
  chatgpt4o: /^chatgpt-4o/,
12
+ gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
13
+ gpt41_mini: /^gpt-4\.1-mini/,
14
+ gpt41_nano: /^gpt-4\.1-nano/,
13
15
  gpt4: /^gpt-4(?:-\d{6})?$/,
14
16
  gpt4_turbo: /^gpt-4(?:\.5)?-(?:\d{6}-)?(preview|turbo)/,
15
17
  gpt35_turbo: /^gpt-3\.5-turbo/,
@@ -38,8 +40,9 @@ module RubyLLM
38
40
  moderation: /^(?:omni|text)-moderation/
39
41
  }.freeze
40
42
 
41
- def context_window_for(model_id) # rubocop:disable Metrics/MethodLength
43
+ def context_window_for(model_id) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
42
44
  case model_family(model_id)
45
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
43
46
  when 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
44
47
  'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
45
48
  'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
@@ -55,6 +58,7 @@ module RubyLLM
55
58
 
56
59
  def max_tokens_for(model_id) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
57
60
  case model_family(model_id)
61
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
58
62
  when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
59
63
  when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
60
64
  when 'gpt4' then 8_192
@@ -71,15 +75,16 @@ module RubyLLM
71
75
 
72
76
  def supports_vision?(model_id)
73
77
  case model_family(model_id)
74
- when 'chatgpt4o', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
75
- 'moderation', 'gpt4o_search', 'gpt4o_mini_search' then true
78
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1',
79
+ 'o1_pro', 'moderation', 'gpt4o_search', 'gpt4o_mini_search' then true
76
80
  else false
77
81
  end
78
82
  end
79
83
 
80
84
  def supports_functions?(model_id)
81
85
  case model_family(model_id)
82
- when 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
86
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
87
+ 'o3_mini' then true
83
88
  when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
84
89
  'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
85
90
  else false # rubocop:disable Lint/DuplicateBranch
@@ -88,7 +93,8 @@ module RubyLLM
88
93
 
89
94
  def supports_structured_output?(model_id)
90
95
  case model_family(model_id)
91
- when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
96
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
97
+ 'o3_mini' then true
92
98
  else false
93
99
  end
94
100
  end
@@ -98,6 +104,9 @@ module RubyLLM
98
104
  end
99
105
 
100
106
  PRICES = {
107
+ gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
108
+ gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
109
+ gpt41_nano: { input: 0.1, output: 0.4 },
101
110
  chatgpt4o: { input: 5.0, output: 15.0 },
102
111
  gpt4: { input: 10.0, output: 30.0 },
103
112
  gpt4_turbo: { input: 10.0, output: 30.0 },
@@ -141,6 +150,12 @@ module RubyLLM
141
150
  prices[:input] || prices[:price] || default_input_price
142
151
  end
143
152
 
153
+ def cached_input_price_for(model_id)
154
+ family = model_family(model_id).to_sym
155
+ prices = PRICES.fetch(family, {})
156
+ prices[:cached_input]
157
+ end
158
+
144
159
  def output_price_for(model_id)
145
160
  family = model_family(model_id).to_sym
146
161
  prices = PRICES.fetch(family, { output: default_output_price })
@@ -200,7 +215,7 @@ module RubyLLM
200
215
  end
201
216
 
202
217
  def normalize_temperature(temperature, model_id)
203
- if model_id.match?(/^o[13]/)
218
+ if model_id.match?(/^o\d/)
204
219
  RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
205
220
  1.0
206
221
  else
@@ -29,7 +29,7 @@ module RubyLLM
29
29
  module_function
30
30
 
31
31
  def api_base
32
- 'https://api.openai.com/v1'
32
+ RubyLLM.config.openai_api_base || 'https://api.openai.com/v1'
33
33
  end
34
34
 
35
35
  def headers
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- VERSION = '1.1.1'
4
+ VERSION = '1.2.0'
5
5
  end
data/lib/ruby_llm.rb CHANGED
@@ -30,8 +30,8 @@ module RubyLLM
30
30
  class Error < StandardError; end
31
31
 
32
32
  class << self
33
- def chat(model: nil, provider: nil)
34
- Chat.new(model: model, provider: provider)
33
+ def chat(model: nil, provider: nil, assume_model_exists: false)
34
+ Chat.new(model:, provider:, assume_model_exists:)
35
35
  end
36
36
 
37
37
  def embed(...)
@@ -86,10 +86,18 @@ namespace :models do # rubocop:disable Metrics/BlockLength
86
86
  ---
87
87
 
88
88
  # Available Models
89
+ {: .no_toc }
89
90
 
90
91
  This guide lists all models available in RubyLLM, automatically generated from the current model registry.
92
+ {: .fs-6 .fw-300 }
91
93
 
92
- _Last updated: #{Time.now.utc.strftime('%Y-%m-%d')}_
94
+ ## Table of contents
95
+ {: .no_toc .text-delta }
96
+
97
+ 1. TOC
98
+ {:toc}
99
+
100
+ ---
93
101
 
94
102
  ## Contributing
95
103
 
@@ -115,6 +123,10 @@ namespace :models do # rubocop:disable Metrics/BlockLength
115
123
  For more information about working with models, see the [Working with Models](/guides/models) guide.
116
124
 
117
125
  ## Models by Type
126
+ {: .d-inline-block }
127
+
128
+ Last updated: #{Time.now.utc.strftime('%Y-%m-%d')}
129
+ {: .label .label-green }
118
130
 
119
131
  ### Chat Models (#{RubyLLM.models.chat_models.count})
120
132
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Carmine Paolino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-04-11 00:00:00.000000000 Z
11
+ date: 2025-04-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64