ruby_llm_community 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +22 -0
  3. data/README.md +172 -0
  4. data/lib/generators/ruby_llm/install/templates/INSTALL_INFO.md.tt +108 -0
  5. data/lib/generators/ruby_llm/install/templates/chat_model.rb.tt +3 -0
  6. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +8 -0
  7. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +15 -0
  8. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +14 -0
  9. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +6 -0
  10. data/lib/generators/ruby_llm/install/templates/message_model.rb.tt +3 -0
  11. data/lib/generators/ruby_llm/install/templates/tool_call_model.rb.tt +3 -0
  12. data/lib/generators/ruby_llm/install_generator.rb +121 -0
  13. data/lib/ruby_llm/active_record/acts_as.rb +382 -0
  14. data/lib/ruby_llm/aliases.json +217 -0
  15. data/lib/ruby_llm/aliases.rb +56 -0
  16. data/lib/ruby_llm/attachment.rb +164 -0
  17. data/lib/ruby_llm/chat.rb +226 -0
  18. data/lib/ruby_llm/chunk.rb +6 -0
  19. data/lib/ruby_llm/configuration.rb +73 -0
  20. data/lib/ruby_llm/connection.rb +126 -0
  21. data/lib/ruby_llm/content.rb +52 -0
  22. data/lib/ruby_llm/context.rb +29 -0
  23. data/lib/ruby_llm/embedding.rb +30 -0
  24. data/lib/ruby_llm/error.rb +84 -0
  25. data/lib/ruby_llm/image.rb +53 -0
  26. data/lib/ruby_llm/message.rb +81 -0
  27. data/lib/ruby_llm/mime_type.rb +67 -0
  28. data/lib/ruby_llm/model/info.rb +101 -0
  29. data/lib/ruby_llm/model/modalities.rb +22 -0
  30. data/lib/ruby_llm/model/pricing.rb +51 -0
  31. data/lib/ruby_llm/model/pricing_category.rb +48 -0
  32. data/lib/ruby_llm/model/pricing_tier.rb +34 -0
  33. data/lib/ruby_llm/model.rb +7 -0
  34. data/lib/ruby_llm/models.json +29924 -0
  35. data/lib/ruby_llm/models.rb +214 -0
  36. data/lib/ruby_llm/models_schema.json +168 -0
  37. data/lib/ruby_llm/provider.rb +221 -0
  38. data/lib/ruby_llm/providers/anthropic/capabilities.rb +179 -0
  39. data/lib/ruby_llm/providers/anthropic/chat.rb +120 -0
  40. data/lib/ruby_llm/providers/anthropic/embeddings.rb +20 -0
  41. data/lib/ruby_llm/providers/anthropic/media.rb +116 -0
  42. data/lib/ruby_llm/providers/anthropic/models.rb +56 -0
  43. data/lib/ruby_llm/providers/anthropic/streaming.rb +45 -0
  44. data/lib/ruby_llm/providers/anthropic/tools.rb +108 -0
  45. data/lib/ruby_llm/providers/anthropic.rb +37 -0
  46. data/lib/ruby_llm/providers/bedrock/capabilities.rb +167 -0
  47. data/lib/ruby_llm/providers/bedrock/chat.rb +76 -0
  48. data/lib/ruby_llm/providers/bedrock/media.rb +73 -0
  49. data/lib/ruby_llm/providers/bedrock/models.rb +82 -0
  50. data/lib/ruby_llm/providers/bedrock/signing.rb +831 -0
  51. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +63 -0
  52. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +71 -0
  53. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +79 -0
  54. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +92 -0
  55. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +91 -0
  56. data/lib/ruby_llm/providers/bedrock/streaming.rb +36 -0
  57. data/lib/ruby_llm/providers/bedrock.rb +83 -0
  58. data/lib/ruby_llm/providers/deepseek/capabilities.rb +131 -0
  59. data/lib/ruby_llm/providers/deepseek/chat.rb +17 -0
  60. data/lib/ruby_llm/providers/deepseek.rb +30 -0
  61. data/lib/ruby_llm/providers/gemini/capabilities.rb +351 -0
  62. data/lib/ruby_llm/providers/gemini/chat.rb +146 -0
  63. data/lib/ruby_llm/providers/gemini/embeddings.rb +39 -0
  64. data/lib/ruby_llm/providers/gemini/images.rb +48 -0
  65. data/lib/ruby_llm/providers/gemini/media.rb +55 -0
  66. data/lib/ruby_llm/providers/gemini/models.rb +41 -0
  67. data/lib/ruby_llm/providers/gemini/streaming.rb +66 -0
  68. data/lib/ruby_llm/providers/gemini/tools.rb +82 -0
  69. data/lib/ruby_llm/providers/gemini.rb +36 -0
  70. data/lib/ruby_llm/providers/gpustack/chat.rb +17 -0
  71. data/lib/ruby_llm/providers/gpustack/models.rb +55 -0
  72. data/lib/ruby_llm/providers/gpustack.rb +33 -0
  73. data/lib/ruby_llm/providers/mistral/capabilities.rb +163 -0
  74. data/lib/ruby_llm/providers/mistral/chat.rb +26 -0
  75. data/lib/ruby_llm/providers/mistral/embeddings.rb +36 -0
  76. data/lib/ruby_llm/providers/mistral/models.rb +49 -0
  77. data/lib/ruby_llm/providers/mistral.rb +32 -0
  78. data/lib/ruby_llm/providers/ollama/chat.rb +28 -0
  79. data/lib/ruby_llm/providers/ollama/media.rb +50 -0
  80. data/lib/ruby_llm/providers/ollama.rb +29 -0
  81. data/lib/ruby_llm/providers/openai/capabilities.rb +306 -0
  82. data/lib/ruby_llm/providers/openai/chat.rb +87 -0
  83. data/lib/ruby_llm/providers/openai/embeddings.rb +36 -0
  84. data/lib/ruby_llm/providers/openai/images.rb +38 -0
  85. data/lib/ruby_llm/providers/openai/media.rb +81 -0
  86. data/lib/ruby_llm/providers/openai/models.rb +39 -0
  87. data/lib/ruby_llm/providers/openai/response.rb +116 -0
  88. data/lib/ruby_llm/providers/openai/response_media.rb +76 -0
  89. data/lib/ruby_llm/providers/openai/streaming.rb +191 -0
  90. data/lib/ruby_llm/providers/openai/tools.rb +100 -0
  91. data/lib/ruby_llm/providers/openai.rb +44 -0
  92. data/lib/ruby_llm/providers/openai_base.rb +44 -0
  93. data/lib/ruby_llm/providers/openrouter/models.rb +88 -0
  94. data/lib/ruby_llm/providers/openrouter.rb +26 -0
  95. data/lib/ruby_llm/providers/perplexity/capabilities.rb +138 -0
  96. data/lib/ruby_llm/providers/perplexity/chat.rb +17 -0
  97. data/lib/ruby_llm/providers/perplexity/models.rb +42 -0
  98. data/lib/ruby_llm/providers/perplexity.rb +52 -0
  99. data/lib/ruby_llm/railtie.rb +17 -0
  100. data/lib/ruby_llm/stream_accumulator.rb +103 -0
  101. data/lib/ruby_llm/streaming.rb +162 -0
  102. data/lib/ruby_llm/tool.rb +100 -0
  103. data/lib/ruby_llm/tool_call.rb +31 -0
  104. data/lib/ruby_llm/utils.rb +49 -0
  105. data/lib/ruby_llm/version.rb +5 -0
  106. data/lib/ruby_llm.rb +98 -0
  107. data/lib/tasks/aliases.rake +235 -0
  108. data/lib/tasks/models_docs.rake +224 -0
  109. data/lib/tasks/models_update.rake +108 -0
  110. data/lib/tasks/release.rake +32 -0
  111. data/lib/tasks/vcr.rake +99 -0
  112. metadata +128 -7
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ # Provides utility functions for data manipulation within the RubyLLM library
5
+ module Utils
6
+ module_function
7
+
8
+ def format_text_file_for_llm(text_file)
9
+ "<file name='#{text_file.filename}' mime_type='#{text_file.mime_type}'>#{text_file.content}</file>"
10
+ end
11
+
12
+ def hash_get(hash, key)
13
+ hash[key.to_sym] || hash[key.to_s]
14
+ end
15
+
16
+ def to_safe_array(item)
17
+ case item
18
+ when Array
19
+ item
20
+ when Hash
21
+ [item]
22
+ else
23
+ Array(item)
24
+ end
25
+ end
26
+
27
+ def to_time(value)
28
+ return unless value
29
+
30
+ value.is_a?(Time) ? value : Time.parse(value.to_s)
31
+ end
32
+
33
+ def to_date(value)
34
+ return unless value
35
+
36
+ value.is_a?(Date) ? value : Date.parse(value.to_s)
37
+ end
38
+
39
+ def deep_merge(params, payload)
40
+ params.merge(payload) do |_key, params_value, payload_value|
41
+ if params_value.is_a?(Hash) && payload_value.is_a?(Hash)
42
+ deep_merge(params_value, payload_value)
43
+ else
44
+ payload_value
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ VERSION = '0.0.3'
5
+ end
data/lib/ruby_llm.rb ADDED
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'base64'
4
+ require 'event_stream_parser'
5
+ require 'faraday'
6
+ require 'faraday/retry'
7
+ require 'json'
8
+ require 'logger'
9
+ require 'securerandom'
10
+ require 'zeitwerk'
11
+
12
+ loader = Zeitwerk::Loader.for_gem
13
+ loader.inflector.inflect(
14
+ 'ruby_llm' => 'RubyLLM',
15
+ 'llm' => 'LLM',
16
+ 'openai' => 'OpenAI',
17
+ 'openai_base' => 'OpenAIBase',
18
+ 'api' => 'API',
19
+ 'deepseek' => 'DeepSeek',
20
+ 'perplexity' => 'Perplexity',
21
+ 'bedrock' => 'Bedrock',
22
+ 'openrouter' => 'OpenRouter',
23
+ 'gpustack' => 'GPUStack',
24
+ 'mistral' => 'Mistral',
25
+ 'pdf' => 'PDF'
26
+ )
27
+ loader.ignore("#{__dir__}/tasks")
28
+ loader.ignore("#{__dir__}/ruby_llm/railtie")
29
+ loader.ignore("#{__dir__}/ruby_llm/active_record")
30
+ loader.ignore("#{__dir__}/generators")
31
+ loader.setup
32
+
33
+ # A delightful Ruby interface to modern AI language models.
34
+ # Provides a unified way to interact with models from OpenAI, Anthropic and others
35
+ # with a focus on developer happiness and convention over configuration.
36
+ module RubyLLM
37
+ class Error < StandardError; end
38
+
39
+ class << self
40
+ def context
41
+ context_config = config.dup
42
+ yield context_config if block_given?
43
+ Context.new(context_config)
44
+ end
45
+
46
+ def chat(...)
47
+ Chat.new(...)
48
+ end
49
+
50
+ def embed(...)
51
+ Embedding.embed(...)
52
+ end
53
+
54
+ def paint(...)
55
+ Image.paint(...)
56
+ end
57
+
58
+ def models
59
+ Models.instance
60
+ end
61
+
62
+ def providers
63
+ Provider.providers.values
64
+ end
65
+
66
+ def configure
67
+ yield config
68
+ end
69
+
70
+ def config
71
+ @config ||= Configuration.new
72
+ end
73
+
74
+ def logger
75
+ @logger ||= config.logger || Logger.new(
76
+ config.log_file,
77
+ progname: 'RubyLLM',
78
+ level: config.log_level
79
+ )
80
+ end
81
+ end
82
+ end
83
+
84
+ RubyLLM::Provider.register :anthropic, RubyLLM::Providers::Anthropic
85
+ RubyLLM::Provider.register :bedrock, RubyLLM::Providers::Bedrock
86
+ RubyLLM::Provider.register :deepseek, RubyLLM::Providers::DeepSeek
87
+ RubyLLM::Provider.register :gemini, RubyLLM::Providers::Gemini
88
+ RubyLLM::Provider.register :gpustack, RubyLLM::Providers::GPUStack
89
+ RubyLLM::Provider.register :mistral, RubyLLM::Providers::Mistral
90
+ RubyLLM::Provider.register :ollama, RubyLLM::Providers::Ollama
91
+ RubyLLM::Provider.register :openai, RubyLLM::Providers::OpenAI
92
+ RubyLLM::Provider.register :openrouter, RubyLLM::Providers::OpenRouter
93
+ RubyLLM::Provider.register :perplexity, RubyLLM::Providers::Perplexity
94
+
95
+ if defined?(Rails::Railtie)
96
+ require 'ruby_llm/railtie'
97
+ require 'ruby_llm/active_record/acts_as'
98
+ end
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ namespace :aliases do # rubocop:disable Metrics/BlockLength
6
+ desc 'Generate aliases.json from models in the registry'
7
+ task :generate do # rubocop:disable Metrics/BlockLength
8
+ require 'ruby_llm'
9
+
10
+ # Group models by provider
11
+ models = Hash.new { |h, k| h[k] = [] }
12
+
13
+ RubyLLM.models.all.each do |model|
14
+ models[model.provider] << model.id
15
+ end
16
+
17
+ aliases = {}
18
+
19
+ # OpenAI models
20
+ models['openai'].each do |model|
21
+ openrouter_model = "openai/#{model}"
22
+ next unless models['openrouter'].include?(openrouter_model)
23
+
24
+ alias_key = model.gsub('-latest', '')
25
+ aliases[alias_key] = {
26
+ 'openai' => model,
27
+ 'openrouter' => openrouter_model
28
+ }
29
+ end
30
+
31
+ # Anthropic models - group by base name and find latest
32
+ anthropic_latest = group_anthropic_models_by_base_name(models['anthropic'])
33
+
34
+ anthropic_latest.each do |base_name, latest_model|
35
+ # Check OpenRouter naming patterns for the BASE NAME (not the full dated model)
36
+ openrouter_variants = [
37
+ "anthropic/#{base_name}", # anthropic/claude-3-5-sonnet
38
+ "anthropic/#{base_name.gsub(/-(\d)/, '.\1')}", # anthropic/claude-3.5-sonnet
39
+ "anthropic/#{base_name.gsub(/claude-(\d+)-(\d+)/, 'claude-\1.\2')}", # claude-3-5 -> claude-3.5
40
+ "anthropic/#{base_name.gsub(/(\d+)-(\d+)/, '\1.\2')}" # any X-Y -> X.Y pattern
41
+ ]
42
+
43
+ openrouter_model = openrouter_variants.find { |v| models['openrouter'].include?(v) }
44
+
45
+ # Find corresponding Bedrock model
46
+ bedrock_model = find_best_bedrock_model(latest_model, models['bedrock'])
47
+
48
+ # Create alias if we have any match (OpenRouter OR Bedrock) OR if it's Anthropic-only
49
+ next unless openrouter_model || bedrock_model || models['anthropic'].include?(latest_model)
50
+
51
+ aliases[base_name] = {
52
+ 'anthropic' => latest_model
53
+ }
54
+
55
+ aliases[base_name]['openrouter'] = openrouter_model if openrouter_model
56
+ aliases[base_name]['bedrock'] = bedrock_model if bedrock_model
57
+ end
58
+
59
+ # Also check if Bedrock has models that Anthropic doesn't
60
+ models['bedrock'].each do |bedrock_model|
61
+ next unless bedrock_model.start_with?('anthropic.')
62
+
63
+ # Extract the Claude model name
64
+ next unless bedrock_model =~ /anthropic\.(claude-[\d\.]+-[a-z]+)/
65
+
66
+ base_name = Regexp.last_match(1)
67
+ # Normalize to Anthropic naming convention
68
+ anthropic_name = base_name.tr('.', '-')
69
+
70
+ # Skip if we already have an alias for this
71
+ next if aliases[anthropic_name]
72
+
73
+ # Check if this model exists in OpenRouter
74
+ openrouter_variants = [
75
+ "anthropic/#{anthropic_name}",
76
+ "anthropic/#{base_name}" # Keep the dots
77
+ ]
78
+
79
+ openrouter_model = openrouter_variants.find { |v| models['openrouter'].include?(v) }
80
+
81
+ aliases[anthropic_name] = {
82
+ 'bedrock' => bedrock_model
83
+ }
84
+
85
+ aliases[anthropic_name]['anthropic'] = anthropic_name if models['anthropic'].include?(anthropic_name)
86
+ aliases[anthropic_name]['openrouter'] = openrouter_model if openrouter_model
87
+ end
88
+
89
+ # Gemini models
90
+ models['gemini'].each do |model|
91
+ # OpenRouter uses "google/" prefix and sometimes different naming
92
+ openrouter_variants = [
93
+ "google/#{model}",
94
+ "google/#{model.gsub('gemini-', 'gemini-').tr('.', '-')}",
95
+ "google/#{model.gsub('gemini-', 'gemini-')}"
96
+ ]
97
+
98
+ openrouter_model = openrouter_variants.find { |v| models['openrouter'].include?(v) }
99
+ next unless openrouter_model
100
+
101
+ alias_key = model.gsub('-latest', '')
102
+ aliases[alias_key] = {
103
+ 'gemini' => model,
104
+ 'openrouter' => openrouter_model
105
+ }
106
+ end
107
+
108
+ # DeepSeek models
109
+ models['deepseek'].each do |model|
110
+ openrouter_model = "deepseek/#{model}"
111
+ next unless models['openrouter'].include?(openrouter_model)
112
+
113
+ alias_key = model.gsub('-latest', '')
114
+ aliases[alias_key] = {
115
+ 'deepseek' => model,
116
+ 'openrouter' => openrouter_model
117
+ }
118
+ end
119
+
120
+ # Write the result
121
+ sorted_aliases = aliases.sort.to_h
122
+ File.write('lib/ruby_llm/aliases.json', JSON.pretty_generate(sorted_aliases))
123
+
124
+ puts "Generated #{sorted_aliases.size} aliases"
125
+ end
126
+
127
+ def group_anthropic_models_by_base_name(anthropic_models) # rubocop:disable Rake/MethodDefinitionInTask
128
+ grouped = Hash.new { |h, k| h[k] = [] }
129
+
130
+ anthropic_models.each do |model|
131
+ base_name = extract_base_name(model)
132
+ grouped[base_name] << model
133
+ end
134
+
135
+ # Find the latest model for each base name
136
+ latest_models = {}
137
+ grouped.each do |base_name, model_list|
138
+ if model_list.size == 1
139
+ latest_models[base_name] = model_list.first
140
+ else
141
+ # Sort by date and take the latest
142
+ latest_model = model_list.max_by { |model| extract_date_from_model(model) }
143
+ latest_models[base_name] = latest_model
144
+ end
145
+ end
146
+
147
+ latest_models
148
+ end
149
+
150
+ def extract_base_name(model) # rubocop:disable Rake/MethodDefinitionInTask
151
+ # Remove date suffix (YYYYMMDD) from model name
152
+ if model =~ /^(.+)-(\d{8})$/
153
+ Regexp.last_match(1)
154
+ else
155
+ # Models without date suffix (like claude-2.0, claude-2.1)
156
+ model
157
+ end
158
+ end
159
+
160
+ def extract_date_from_model(model) # rubocop:disable Rake/MethodDefinitionInTask
161
+ # Extract date for comparison, return '00000000' for models without dates
162
+ if model =~ /-(\d{8})$/
163
+ Regexp.last_match(1)
164
+ else
165
+ '00000000' # Ensures models without dates sort before dated ones
166
+ end
167
+ end
168
+
169
+ def find_best_bedrock_model(anthropic_model, bedrock_models) # rubocop:disable Metrics/PerceivedComplexity,Rake/MethodDefinitionInTask
170
+ # Special mapping for Claude 2.x models
171
+ base_pattern = case anthropic_model
172
+ when 'claude-2.0', 'claude-2'
173
+ 'claude-v2'
174
+ when 'claude-2.1'
175
+ 'claude-v2:1'
176
+ when 'claude-instant-v1', 'claude-instant'
177
+ 'claude-instant'
178
+ else
179
+ # For Claude 3+ models, extract base name
180
+ extract_base_name(anthropic_model)
181
+ end
182
+
183
+ # Find all matching Bedrock models by stripping provider prefix and comparing base name
184
+ matching_models = bedrock_models.select do |bedrock_model|
185
+ # Strip any provider prefix (anthropic. or us.anthropic.)
186
+ model_without_prefix = bedrock_model.sub(/^(?:us\.)?anthropic\./, '')
187
+ model_without_prefix.start_with?(base_pattern)
188
+ end
189
+
190
+ return nil if matching_models.empty?
191
+
192
+ # Get model info to check context window
193
+ begin
194
+ model_info = RubyLLM.models.find(anthropic_model)
195
+ target_context = model_info.context_window
196
+ rescue StandardError
197
+ target_context = nil
198
+ end
199
+
200
+ # If we have context window info, try to match it
201
+ if target_context
202
+ # Convert to k format (200000 -> 200k)
203
+ target_k = target_context / 1000
204
+
205
+ # Find models with this specific context window
206
+ with_context = matching_models.select do |m|
207
+ m.include?(":#{target_k}k") || m.include?(":0:#{target_k}k")
208
+ end
209
+
210
+ return with_context.first if with_context.any?
211
+ end
212
+
213
+ # Otherwise, pick the one with the highest context window or latest version
214
+ matching_models.min_by do |model|
215
+ # Extract context window if specified
216
+ context_priority = if model =~ /:(?:\d+:)?(\d+)k/
217
+ -Regexp.last_match(1).to_i # Negative for descending sort
218
+ else
219
+ 0 # No context specified
220
+ end
221
+
222
+ # Extract version if present
223
+ version_priority = if model =~ /-v(\d+):/
224
+ -Regexp.last_match(1).to_i # Negative for descending sort (latest version first)
225
+ else
226
+ 0
227
+ end
228
+
229
+ # Prefer models with explicit context windows
230
+ has_context_priority = model.include?('k') ? -1 : 0
231
+
232
+ [has_context_priority, context_priority, version_priority]
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dotenv/load'
4
+ require 'fileutils'
5
+
6
+ namespace :models do
7
+ desc 'Generate available models documentation'
8
+ task :docs do
9
+ FileUtils.mkdir_p('docs') # ensure output directory exists
10
+
11
+ # Generate markdown content
12
+ output = generate_models_markdown
13
+
14
+ # Write the output
15
+ File.write('docs/_reference/available-models.md', output)
16
+ puts 'Generated docs/_reference/available-models.md'
17
+ end
18
+ end
19
+
20
+ def generate_models_markdown
21
+ <<~MARKDOWN
22
+ ---
23
+ layout: default
24
+ title: Available Models
25
+ nav_order: 1
26
+ description: Browse hundreds of AI models from every major provider. Always up-to-date, automatically generated.
27
+ redirect_from:
28
+ - /guides/available-models
29
+ ---
30
+
31
+ # {{ page.title }}
32
+ {: .no_toc }
33
+
34
+ {{ page.description }}
35
+ {: .fs-6 .fw-300 }
36
+
37
+ ## Table of contents
38
+ {: .no_toc .text-delta }
39
+
40
+ 1. TOC
41
+ {:toc}
42
+
43
+ ---
44
+
45
+ After reading this guide, you will know:
46
+
47
+ * How RubyLLM's model registry works and where data comes from
48
+ * How to find models by provider, capability, or purpose
49
+ * What information is available for each model
50
+ * How to use model aliases for simpler configuration
51
+
52
+ ## How Model Data Works
53
+
54
+ RubyLLM's model registry combines data from multiple sources:
55
+
56
+ - **OpenAI, Anthropic, DeepSeek, Gemini**: Data from [Parsera](https://api.parsera.org/v1/llm-specs)
57
+ - **OpenRouter**: Direct from OpenRouter's API
58
+ - **Other providers**: Defined in `capabilities.rb` files
59
+
60
+ ## Contributing Model Updates
61
+
62
+ **For major providers** (OpenAI, Anthropic, DeepSeek, Gemini): File issues with [Parsera](https://github.com/parsera-labs/api-llm-specs/issues) for public model data corrections.
63
+
64
+ **For other providers**: Edit `lib/ruby_llm/providers/<provider>/capabilities.rb` then run `rake models:update`.
65
+
66
+ See the [Contributing Guide](https://github.com/crmne/ruby_llm/blob/main/CONTRIBUTING.md) for details.
67
+
68
+ ## Last Updated
69
+ {: .d-inline-block }
70
+
71
+ #{Time.now.utc.strftime('%Y-%m-%d')}
72
+ {: .label .label-green }
73
+
74
+ ## Models by Provider
75
+
76
+ #{generate_provider_sections}
77
+
78
+ ## Models by Capability
79
+
80
+ #{generate_capability_sections}
81
+
82
+ ## Models by Modality
83
+
84
+ #{generate_modality_sections}
85
+ MARKDOWN
86
+ end
87
+
88
+ def generate_provider_sections
89
+ RubyLLM::Provider.providers.filter_map do |provider, provider_class|
90
+ models = RubyLLM.models.by_provider(provider)
91
+ next if models.none?
92
+
93
+ <<~PROVIDER
94
+ ### #{provider_class.name} (#{models.count})
95
+
96
+ #{models_table(models)}
97
+ PROVIDER
98
+ end.join("\n\n")
99
+ end
100
+
101
+ def generate_capability_sections
102
+ capabilities = {
103
+ 'Function Calling' => RubyLLM.models.select(&:function_calling?),
104
+ 'Structured Output' => RubyLLM.models.select(&:structured_output?),
105
+ 'Streaming' => RubyLLM.models.select { |m| m.capabilities.include?('streaming') },
106
+ # 'Reasoning' => RubyLLM.models.select { |m| m.capabilities.include?('reasoning') },
107
+ 'Batch Processing' => RubyLLM.models.select { |m| m.capabilities.include?('batch') }
108
+ }
109
+
110
+ capabilities.filter_map do |capability, models|
111
+ next if models.none?
112
+
113
+ <<~CAPABILITY
114
+ ### #{capability} (#{models.count})
115
+
116
+ #{models_table(models)}
117
+ CAPABILITY
118
+ end.join("\n\n")
119
+ end
120
+
121
+ def generate_modality_sections # rubocop:disable Metrics/PerceivedComplexity
122
+ sections = []
123
+
124
+ # Models that support vision/images
125
+ vision_models = RubyLLM.models.select { |m| (m.modalities.input || []).include?('image') }
126
+ if vision_models.any?
127
+ sections << <<~SECTION
128
+ ### Vision Models (#{vision_models.count})
129
+
130
+ Models that can process images:
131
+
132
+ #{models_table(vision_models)}
133
+ SECTION
134
+ end
135
+
136
+ # Models that support audio
137
+ audio_models = RubyLLM.models.select { |m| (m.modalities.input || []).include?('audio') }
138
+ if audio_models.any?
139
+ sections << <<~SECTION
140
+ ### Audio Input Models (#{audio_models.count})
141
+
142
+ Models that can process audio:
143
+
144
+ #{models_table(audio_models)}
145
+ SECTION
146
+ end
147
+
148
+ # Models that support PDFs
149
+ pdf_models = RubyLLM.models.select { |m| (m.modalities.input || []).include?('pdf') }
150
+ if pdf_models.any?
151
+ sections << <<~SECTION
152
+ ### PDF Models (#{pdf_models.count})
153
+
154
+ Models that can process PDF documents:
155
+
156
+ #{models_table(pdf_models)}
157
+ SECTION
158
+ end
159
+
160
+ # Models for embeddings
161
+ embedding_models = RubyLLM.models.select { |m| (m.modalities.output || []).include?('embeddings') }
162
+ if embedding_models.any?
163
+ sections << <<~SECTION
164
+ ### Embedding Models (#{embedding_models.count})
165
+
166
+ Models that generate embeddings:
167
+
168
+ #{models_table(embedding_models)}
169
+ SECTION
170
+ end
171
+
172
+ sections.join("\n\n")
173
+ end
174
+
175
+ def models_table(models)
176
+ return '*No models found*' if models.none?
177
+
178
+ headers = ['Model', 'Provider', 'Context', 'Max Output', 'Standard Pricing (per 1M tokens)']
179
+ alignment = [':--', ':--', '--:', '--:', ':--']
180
+
181
+ rows = models.sort_by { |m| [m.provider, m.name] }.map do |model|
182
+ # Format pricing information
183
+ pricing = standard_pricing_display(model)
184
+
185
+ [
186
+ model.id,
187
+ model.provider,
188
+ model.context_window || '-',
189
+ model.max_output_tokens || '-',
190
+ pricing
191
+ ]
192
+ end
193
+
194
+ table = []
195
+ table << "| #{headers.join(' | ')} |"
196
+ table << "| #{alignment.join(' | ')} |"
197
+
198
+ rows.each do |row|
199
+ table << "| #{row.join(' | ')} |"
200
+ end
201
+
202
+ table.join("\n")
203
+ end
204
+
205
+ def standard_pricing_display(model)
206
+ # Access pricing data using to_h to get the raw hash
207
+ pricing_data = model.pricing.to_h[:text_tokens]&.dig(:standard) || {}
208
+
209
+ if pricing_data.any?
210
+ parts = []
211
+
212
+ parts << "In: $#{format('%.2f', pricing_data[:input_per_million])}" if pricing_data[:input_per_million]
213
+
214
+ parts << "Out: $#{format('%.2f', pricing_data[:output_per_million])}" if pricing_data[:output_per_million]
215
+
216
+ if pricing_data[:cached_input_per_million]
217
+ parts << "Cache: $#{format('%.2f', pricing_data[:cached_input_per_million])}"
218
+ end
219
+
220
+ return parts.join(', ') if parts.any?
221
+ end
222
+
223
+ '-'
224
+ end