dify_llm 1.9.1 → 1.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +27 -8
  3. data/lib/generators/ruby_llm/agent/agent_generator.rb +36 -0
  4. data/lib/generators/ruby_llm/agent/templates/agent.rb.tt +6 -0
  5. data/lib/generators/ruby_llm/agent/templates/instructions.txt.erb.tt +0 -0
  6. data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +110 -41
  7. data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +14 -15
  8. data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +8 -11
  9. data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +2 -2
  10. data/lib/generators/ruby_llm/chat_ui/templates/helpers/messages_helper.rb.tt +25 -0
  11. data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +2 -2
  12. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/_chat.html.erb.tt +16 -0
  13. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/_form.html.erb.tt +31 -0
  14. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/index.html.erb.tt +31 -0
  15. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/new.html.erb.tt +9 -0
  16. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/show.html.erb.tt +27 -0
  17. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_assistant.html.erb.tt +14 -0
  18. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_content.html.erb.tt +1 -0
  19. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_error.html.erb.tt +13 -0
  20. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_form.html.erb.tt +23 -0
  21. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_system.html.erb.tt +10 -0
  22. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_tool.html.erb.tt +2 -0
  23. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_tool_calls.html.erb.tt +4 -0
  24. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_user.html.erb.tt +14 -0
  25. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/tool_calls/_default.html.erb.tt +13 -0
  26. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/tool_results/_default.html.erb.tt +21 -0
  27. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/models/_model.html.erb.tt +17 -0
  28. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/models/index.html.erb.tt +40 -0
  29. data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/models/show.html.erb.tt +27 -0
  30. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +2 -2
  31. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +2 -2
  32. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +19 -7
  33. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +1 -1
  34. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +5 -3
  35. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_assistant.html.erb.tt +9 -0
  36. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_content.html.erb.tt +1 -1
  37. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_error.html.erb.tt +8 -0
  38. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +1 -1
  39. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_system.html.erb.tt +6 -0
  40. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool.html.erb.tt +2 -0
  41. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +4 -7
  42. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_user.html.erb.tt +9 -0
  43. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +5 -7
  44. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/tool_calls/_default.html.erb.tt +8 -0
  45. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/tool_results/_default.html.erb.tt +16 -0
  46. data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +11 -12
  47. data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +27 -17
  48. data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +3 -4
  49. data/lib/generators/ruby_llm/generator_helpers.rb +37 -17
  50. data/lib/generators/ruby_llm/install/install_generator.rb +22 -18
  51. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +1 -1
  52. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +4 -1
  53. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +4 -10
  54. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +2 -1
  55. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +2 -2
  56. data/lib/generators/ruby_llm/schema/schema_generator.rb +26 -0
  57. data/lib/generators/ruby_llm/schema/templates/schema.rb.tt +2 -0
  58. data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +9 -0
  59. data/lib/generators/ruby_llm/tool/templates/tool_call.html.erb.tt +13 -0
  60. data/lib/generators/ruby_llm/tool/templates/tool_result.html.erb.tt +13 -0
  61. data/lib/generators/ruby_llm/tool/tool_generator.rb +96 -0
  62. data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
  63. data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
  64. data/lib/generators/ruby_llm/upgrade_to_v1_14/templates/add_v1_14_tool_call_columns.rb.tt +7 -0
  65. data/lib/generators/ruby_llm/upgrade_to_v1_14/upgrade_to_v1_14_generator.rb +49 -0
  66. data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +1 -1
  67. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +2 -4
  68. data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +1 -1
  69. data/lib/ruby_llm/active_record/acts_as.rb +10 -4
  70. data/lib/ruby_llm/active_record/acts_as_legacy.rb +132 -27
  71. data/lib/ruby_llm/active_record/chat_methods.rb +132 -28
  72. data/lib/ruby_llm/active_record/message_methods.rb +58 -8
  73. data/lib/ruby_llm/active_record/model_methods.rb +1 -1
  74. data/lib/ruby_llm/active_record/payload_helpers.rb +26 -0
  75. data/lib/ruby_llm/active_record/tool_call_methods.rb +15 -0
  76. data/lib/ruby_llm/agent.rb +365 -0
  77. data/lib/ruby_llm/aliases.json +199 -62
  78. data/lib/ruby_llm/attachment.rb +15 -4
  79. data/lib/ruby_llm/chat.rb +150 -22
  80. data/lib/ruby_llm/configuration.rb +65 -65
  81. data/lib/ruby_llm/connection.rb +11 -7
  82. data/lib/ruby_llm/content.rb +6 -2
  83. data/lib/ruby_llm/error.rb +37 -1
  84. data/lib/ruby_llm/message.rb +43 -15
  85. data/lib/ruby_llm/model/info.rb +15 -13
  86. data/lib/ruby_llm/models.json +37560 -14094
  87. data/lib/ruby_llm/models.rb +321 -38
  88. data/lib/ruby_llm/models_schema.json +2 -2
  89. data/lib/ruby_llm/provider.rb +26 -4
  90. data/lib/ruby_llm/providers/anthropic/capabilities.rb +5 -119
  91. data/lib/ruby_llm/providers/anthropic/chat.rb +149 -17
  92. data/lib/ruby_llm/providers/anthropic/media.rb +2 -2
  93. data/lib/ruby_llm/providers/anthropic/models.rb +3 -9
  94. data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
  95. data/lib/ruby_llm/providers/anthropic/tools.rb +20 -0
  96. data/lib/ruby_llm/providers/anthropic.rb +5 -1
  97. data/lib/ruby_llm/providers/azure/chat.rb +29 -0
  98. data/lib/ruby_llm/providers/azure/embeddings.rb +24 -0
  99. data/lib/ruby_llm/providers/azure/media.rb +45 -0
  100. data/lib/ruby_llm/providers/azure/models.rb +14 -0
  101. data/lib/ruby_llm/providers/azure.rb +148 -0
  102. data/lib/ruby_llm/providers/bedrock/auth.rb +122 -0
  103. data/lib/ruby_llm/providers/bedrock/chat.rb +357 -28
  104. data/lib/ruby_llm/providers/bedrock/media.rb +62 -33
  105. data/lib/ruby_llm/providers/bedrock/models.rb +107 -62
  106. data/lib/ruby_llm/providers/bedrock/streaming.rb +309 -8
  107. data/lib/ruby_llm/providers/bedrock.rb +69 -52
  108. data/lib/ruby_llm/providers/deepseek/capabilities.rb +4 -114
  109. data/lib/ruby_llm/providers/deepseek.rb +5 -1
  110. data/lib/ruby_llm/providers/dify/chat.rb +82 -7
  111. data/lib/ruby_llm/providers/dify/media.rb +2 -2
  112. data/lib/ruby_llm/providers/dify/streaming.rb +26 -4
  113. data/lib/ruby_llm/providers/dify.rb +4 -0
  114. data/lib/ruby_llm/providers/gemini/capabilities.rb +45 -207
  115. data/lib/ruby_llm/providers/gemini/chat.rb +88 -6
  116. data/lib/ruby_llm/providers/gemini/images.rb +1 -1
  117. data/lib/ruby_llm/providers/gemini/models.rb +2 -4
  118. data/lib/ruby_llm/providers/gemini/streaming.rb +34 -2
  119. data/lib/ruby_llm/providers/gemini/tools.rb +35 -3
  120. data/lib/ruby_llm/providers/gemini.rb +4 -0
  121. data/lib/ruby_llm/providers/gpustack/capabilities.rb +20 -0
  122. data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
  123. data/lib/ruby_llm/providers/gpustack.rb +8 -0
  124. data/lib/ruby_llm/providers/mistral/capabilities.rb +8 -0
  125. data/lib/ruby_llm/providers/mistral/chat.rb +59 -1
  126. data/lib/ruby_llm/providers/mistral.rb +4 -0
  127. data/lib/ruby_llm/providers/ollama/capabilities.rb +20 -0
  128. data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
  129. data/lib/ruby_llm/providers/ollama.rb +11 -1
  130. data/lib/ruby_llm/providers/openai/capabilities.rb +96 -192
  131. data/lib/ruby_llm/providers/openai/chat.rb +101 -7
  132. data/lib/ruby_llm/providers/openai/media.rb +5 -2
  133. data/lib/ruby_llm/providers/openai/models.rb +2 -4
  134. data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
  135. data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
  136. data/lib/ruby_llm/providers/openai/tools.rb +27 -2
  137. data/lib/ruby_llm/providers/openai.rb +11 -1
  138. data/lib/ruby_llm/providers/openrouter/chat.rb +168 -0
  139. data/lib/ruby_llm/providers/openrouter/images.rb +69 -0
  140. data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
  141. data/lib/ruby_llm/providers/openrouter.rb +37 -1
  142. data/lib/ruby_llm/providers/perplexity/capabilities.rb +34 -99
  143. data/lib/ruby_llm/providers/perplexity/models.rb +12 -14
  144. data/lib/ruby_llm/providers/perplexity.rb +4 -0
  145. data/lib/ruby_llm/providers/vertexai/models.rb +1 -1
  146. data/lib/ruby_llm/providers/vertexai.rb +23 -7
  147. data/lib/ruby_llm/providers/xai/chat.rb +15 -0
  148. data/lib/ruby_llm/providers/xai/models.rb +75 -0
  149. data/lib/ruby_llm/providers/xai.rb +32 -0
  150. data/lib/ruby_llm/stream_accumulator.rb +120 -18
  151. data/lib/ruby_llm/streaming.rb +82 -60
  152. data/lib/ruby_llm/thinking.rb +49 -0
  153. data/lib/ruby_llm/tokens.rb +47 -0
  154. data/lib/ruby_llm/tool.rb +49 -4
  155. data/lib/ruby_llm/tool_call.rb +6 -3
  156. data/lib/ruby_llm/version.rb +1 -1
  157. data/lib/ruby_llm.rb +14 -8
  158. data/lib/tasks/models.rake +62 -23
  159. data/lib/tasks/release.rake +1 -1
  160. data/lib/tasks/ruby_llm.rake +9 -1
  161. data/lib/tasks/vcr.rake +33 -1
  162. metadata +67 -16
  163. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +0 -13
  164. data/lib/ruby_llm/providers/bedrock/capabilities.rb +0 -167
  165. data/lib/ruby_llm/providers/bedrock/signing.rb +0 -831
  166. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -51
  167. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -71
  168. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -67
  169. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -80
  170. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -78
@@ -3,13 +3,11 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  class OpenAI
6
- # Determines capabilities and pricing for OpenAI models
6
+ # Provider-level capability checks and narrow registry fallbacks.
7
7
  module Capabilities
8
8
  module_function
9
9
 
10
10
  MODEL_PATTERNS = {
11
- dall_e: /^dall-e/,
12
- chatgpt4o: /^chatgpt-4o/,
13
11
  gpt41: /^gpt-4\.1(?!-(?:mini|nano))/,
14
12
  gpt41_mini: /^gpt-4\.1-mini/,
15
13
  gpt41_nano: /^gpt-4\.1-nano/,
@@ -26,9 +24,9 @@ module RubyLLM
26
24
  gpt4o_realtime: /^gpt-4o-realtime/,
27
25
  gpt4o_search: /^gpt-4o-search/,
28
26
  gpt4o_transcribe: /^gpt-4o-transcribe/,
29
- gpt5: /^gpt-5/,
30
- gpt5_mini: /^gpt-5-mini/,
31
- gpt5_nano: /^gpt-5-nano/,
27
+ gpt5: /^gpt-5(?!.*(?:mini|nano))/,
28
+ gpt5_mini: /^gpt-5.*mini/,
29
+ gpt5_nano: /^gpt-5.*nano/,
32
30
  o1: /^o1(?!-(?:mini|pro))/,
33
31
  o1_mini: /^o1-mini/,
34
32
  o1_pro: /^o1-pro/,
@@ -44,71 +42,6 @@ module RubyLLM
44
42
  moderation: /^(?:omni|text)-moderation/
45
43
  }.freeze
46
44
 
47
- def context_window_for(model_id)
48
- case model_family(model_id)
49
- when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
50
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
51
- 'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
52
- 'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
53
- when 'gpt4' then 8_192
54
- when 'gpt4o_mini_transcribe' then 16_000
55
- when 'o1', 'o1_pro', 'o3_mini' then 200_000
56
- when 'gpt35_turbo' then 16_385
57
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
58
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
59
- else 4_096
60
- end
61
- end
62
-
63
- def max_tokens_for(model_id)
64
- case model_family(model_id)
65
- when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
66
- when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
67
- when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
68
- when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
69
- when 'gpt4' then 8_192
70
- when 'gpt35_turbo' then 4_096
71
- when 'gpt4_turbo', 'gpt4o_realtime', 'gpt4o_mini_realtime' then 4_096 # rubocop:disable Lint/DuplicateBranch
72
- when 'gpt4o_mini_transcribe' then 2_000
73
- when 'o1', 'o1_pro', 'o3_mini' then 100_000
74
- when 'o1_mini' then 65_536
75
- when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
76
- 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
77
- else 16_384 # rubocop:disable Lint/DuplicateBranch
78
- end
79
- end
80
-
81
- def supports_vision?(model_id)
82
- case model_family(model_id)
83
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
84
- 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
85
- 'gpt4o_mini_search' then true
86
- else false
87
- end
88
- end
89
-
90
- def supports_functions?(model_id)
91
- case model_family(model_id)
92
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
93
- 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
94
- when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
95
- 'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
96
- else false # rubocop:disable Lint/DuplicateBranch
97
- end
98
- end
99
-
100
- def supports_structured_output?(model_id)
101
- case model_family(model_id)
102
- when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
103
- 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
104
- else false
105
- end
106
- end
107
-
108
- def supports_json_mode?(model_id)
109
- supports_structured_output?(model_id)
110
- end
111
-
112
45
  PRICES = {
113
46
  gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
114
47
  gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
@@ -116,21 +49,19 @@ module RubyLLM
116
49
  gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
117
50
  gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
118
51
  gpt41_nano: { input: 0.1, output: 0.4 },
119
- chatgpt4o: { input: 5.0, output: 15.0 },
120
52
  gpt4: { input: 10.0, output: 30.0 },
121
53
  gpt4_turbo: { input: 10.0, output: 30.0 },
122
- gpt45: { input: 75.0, output: 150.0 },
123
54
  gpt35_turbo: { input: 0.5, output: 1.5 },
124
55
  gpt4o: { input: 2.5, output: 10.0 },
125
- gpt4o_audio: { input: 2.5, output: 10.0, audio_input: 40.0, audio_output: 80.0 },
56
+ gpt4o_audio: { input: 2.5, output: 10.0 },
126
57
  gpt4o_mini: { input: 0.15, output: 0.6 },
127
- gpt4o_mini_audio: { input: 0.15, output: 0.6, audio_input: 10.0, audio_output: 20.0 },
58
+ gpt4o_mini_audio: { input: 0.15, output: 0.6 },
128
59
  gpt4o_mini_realtime: { input: 0.6, output: 2.4 },
129
- gpt4o_mini_transcribe: { input: 1.25, output: 5.0, audio_input: 3.0 },
60
+ gpt4o_mini_transcribe: { input: 1.25, output: 5.0 },
130
61
  gpt4o_mini_tts: { input: 0.6, output: 12.0 },
131
62
  gpt4o_realtime: { input: 5.0, output: 20.0 },
132
63
  gpt4o_search: { input: 2.5, output: 10.0 },
133
- gpt4o_transcribe: { input: 2.5, output: 10.0, audio_input: 6.0 },
64
+ gpt4o_transcribe: { input: 2.5, output: 10.0 },
134
65
  o1: { input: 15.0, output: 60.0 },
135
66
  o1_mini: { input: 1.1, output: 4.4 },
136
67
  o1_pro: { input: 150.0, output: 600.0 },
@@ -146,153 +77,126 @@ module RubyLLM
146
77
  moderation: { price: 0.0 }
147
78
  }.freeze
148
79
 
149
- def model_family(model_id)
150
- MODEL_PATTERNS.each do |family, pattern|
151
- return family.to_s if model_id.match?(pattern)
152
- end
153
- 'other'
80
+ def supports_tool_choice?(_model_id)
81
+ true
154
82
  end
155
83
 
156
- def input_price_for(model_id)
157
- family = model_family(model_id).to_sym
158
- prices = PRICES.fetch(family, { input: default_input_price })
159
- prices[:input] || prices[:price] || default_input_price
160
- end
161
-
162
- def cached_input_price_for(model_id)
163
- family = model_family(model_id).to_sym
164
- prices = PRICES.fetch(family, {})
165
- prices[:cached_input]
166
- end
167
-
168
- def output_price_for(model_id)
169
- family = model_family(model_id).to_sym
170
- prices = PRICES.fetch(family, { output: default_output_price })
171
- prices[:output] || prices[:price] || default_output_price
84
+ def supports_tool_parallel_control?(_model_id)
85
+ true
172
86
  end
173
87
 
174
- def model_type(model_id)
88
+ def context_window_for(model_id)
175
89
  case model_family(model_id)
176
- when /embedding/ then 'embedding'
177
- when /^tts|whisper|gpt4o_(?:mini_)?(?:transcribe|tts)$/ then 'audio'
178
- when 'moderation' then 'moderation'
179
- when /dall/ then 'image'
180
- else 'chat'
90
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
91
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
92
+ 'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime', 'gpt4o_search',
93
+ 'gpt4o_transcribe', 'o1_mini' then 128_000
94
+ when 'gpt4' then 8_192
95
+ when 'gpt4o_mini_transcribe' then 16_000
96
+ when 'o1', 'o1_pro', 'o3_mini' then 200_000
97
+ when 'gpt35_turbo' then 16_385
98
+ when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
99
+ 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
100
+ else 4_096
181
101
  end
182
102
  end
183
103
 
184
- def default_input_price
185
- 0.50
104
+ def max_tokens_for(model_id)
105
+ case model_family(model_id)
106
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
107
+ when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
108
+ when 'gpt4' then 8_192
109
+ when 'gpt35_turbo' then 4_096
110
+ when 'gpt4o_mini_transcribe' then 2_000
111
+ when 'o1', 'o1_pro', 'o3_mini' then 100_000
112
+ when 'o1_mini' then 65_536
113
+ when 'gpt4o_mini_tts', 'tts1', 'tts1_hd', 'whisper', 'moderation',
114
+ 'embedding3_large', 'embedding3_small', 'embedding_ada' then nil
115
+ else 16_384
116
+ end
186
117
  end
187
118
 
188
- def default_output_price
189
- 1.50
119
+ def critical_capabilities_for(model_id)
120
+ capabilities = []
121
+ capabilities << 'function_calling' if supports_functions?(model_id)
122
+ capabilities << 'structured_output' if supports_structured_output?(model_id)
123
+ capabilities << 'vision' if supports_vision?(model_id)
124
+ capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
125
+ capabilities
190
126
  end
191
127
 
192
- def format_display_name(model_id)
193
- model_id.then { |id| humanize(id) }
194
- .then { |name| apply_special_formatting(name) }
195
- end
128
+ def pricing_for(model_id)
129
+ standard_pricing = {
130
+ input_per_million: input_price_for(model_id),
131
+ output_per_million: output_price_for(model_id)
132
+ }
196
133
 
197
- def humanize(id)
198
- id.tr('-', ' ')
199
- .split
200
- .map(&:capitalize)
201
- .join(' ')
202
- end
134
+ cached_price = cached_input_price_for(model_id)
135
+ standard_pricing[:cached_input_per_million] = cached_price if cached_price
203
136
 
204
- def apply_special_formatting(name)
205
- name
206
- .gsub(/(\d{4}) (\d{2}) (\d{2})/, '\1\2\3')
207
- .gsub(/^(?:Gpt|Chatgpt|Tts|Dall E) /) { |m| special_prefix_format(m.strip) }
208
- .gsub(/^O([13]) /, 'O\1-')
209
- .gsub(/^O[13] Mini/, '\0'.tr(' ', '-'))
210
- .gsub(/\d\.\d /, '\0'.sub(' ', '-'))
211
- .gsub(/4o (?=Mini|Preview|Turbo|Audio|Realtime|Transcribe|Tts)/, '4o-')
212
- .gsub(/\bHd\b/, 'HD')
213
- .gsub(/(?:Omni|Text) Moderation/, '\0'.tr(' ', '-'))
214
- .gsub('Text Embedding', 'text-embedding-')
137
+ { text_tokens: { standard: standard_pricing } }
215
138
  end
216
139
 
217
- def special_prefix_format(prefix)
218
- case prefix # rubocop:disable Style/HashLikeCase
219
- when 'Gpt' then 'GPT-'
220
- when 'Chatgpt' then 'ChatGPT-'
221
- when 'Tts' then 'TTS-'
222
- when 'Dall E' then 'DALL-E-'
140
+ def model_family(model_id)
141
+ MODEL_PATTERNS.each do |family, pattern|
142
+ return family.to_s if model_id.match?(pattern)
223
143
  end
144
+
145
+ 'other'
224
146
  end
225
147
 
226
- def self.normalize_temperature(temperature, model_id)
227
- if model_id.match?(/^(o\d|gpt-5)/)
228
- RubyLLM.logger.debug "Model #{model_id} requires temperature=1.0, ignoring provided value"
229
- 1.0
230
- elsif model_id.match?(/-search/)
231
- RubyLLM.logger.debug "Model #{model_id} does not accept temperature parameter, removing"
232
- nil
148
+ def supports_vision?(model_id)
149
+ case model_family(model_id)
150
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
151
+ 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search'
152
+ true
233
153
  else
234
- temperature
154
+ false
235
155
  end
236
156
  end
237
157
 
238
- def modalities_for(model_id)
239
- modalities = {
240
- input: ['text'],
241
- output: ['text']
242
- }
243
-
244
- # Vision support
245
- modalities[:input] << 'image' if supports_vision?(model_id)
246
- modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
247
- modalities[:input] << 'pdf' if supports_vision?(model_id)
248
- modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
249
- modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
250
- modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
251
- modalities[:output] << 'moderation' if model_id.match?(/moderation/)
252
-
253
- modalities
158
+ def supports_functions?(model_id)
159
+ case model_family(model_id)
160
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4',
161
+ 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
162
+ true
163
+ else
164
+ false
165
+ end
254
166
  end
255
167
 
256
- def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
257
- capabilities = []
258
-
259
- capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
260
- capabilities << 'function_calling' if supports_functions?(model_id)
261
- capabilities << 'structured_output' if supports_json_mode?(model_id)
262
- capabilities << 'batch' if model_id.match?(/embedding|batch/)
263
- capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
264
-
265
- if model_id.match?(/gpt-4-turbo|gpt-4o/)
266
- capabilities << 'image_generation' if model_id.match?(/vision/)
267
- capabilities << 'speech_generation' if model_id.match?(/audio/)
268
- capabilities << 'transcription' if model_id.match?(/audio/)
168
+ def supports_structured_output?(model_id)
169
+ case model_family(model_id)
170
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4o',
171
+ 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini'
172
+ true
173
+ else
174
+ false
269
175
  end
270
-
271
- capabilities
272
176
  end
273
177
 
274
- def pricing_for(model_id)
275
- standard_pricing = {
276
- input_per_million: input_price_for(model_id),
277
- output_per_million: output_price_for(model_id)
278
- }
279
-
280
- if respond_to?(:cached_input_price_for)
281
- cached_price = cached_input_price_for(model_id)
282
- standard_pricing[:cached_input_per_million] = cached_price if cached_price
283
- end
178
+ def input_price_for(model_id)
179
+ price_for(model_id, :input, 0.50)
180
+ end
284
181
 
285
- pricing = { text_tokens: { standard: standard_pricing } }
182
+ def output_price_for(model_id)
183
+ price_for(model_id, :output, 1.50)
184
+ end
286
185
 
287
- if model_id.match?(/embedding|batch/)
288
- pricing[:text_tokens][:batch] = {
289
- input_per_million: standard_pricing[:input_per_million] * 0.5,
290
- output_per_million: standard_pricing[:output_per_million] * 0.5
291
- }
292
- end
186
+ def cached_input_price_for(model_id)
187
+ family = model_family(model_id).to_sym
188
+ PRICES.fetch(family, {})[:cached_input]
189
+ end
293
190
 
294
- pricing
191
+ def price_for(model_id, key, fallback)
192
+ family = model_family(model_id).to_sym
193
+ prices = PRICES.fetch(family, { key => fallback })
194
+ prices[key] || prices[:price] || fallback
295
195
  end
196
+
197
+ module_function :context_window_for, :max_tokens_for, :critical_capabilities_for, :pricing_for,
198
+ :model_family, :supports_vision?, :supports_functions?, :supports_structured_output?,
199
+ :input_price_for, :output_price_for, :cached_input_price_for, :price_for
296
200
  end
297
201
  end
298
202
  end
@@ -11,7 +11,10 @@ module RubyLLM
11
11
 
12
12
  module_function
13
13
 
14
- def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
14
+ # rubocop:disable Metrics/ParameterLists,Metrics/PerceivedComplexity
15
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil,
16
+ thinking: nil, tool_prefs: nil)
17
+ tool_prefs ||= {}
15
18
  payload = {
16
19
  model: model.id,
17
20
  messages: format_messages(messages),
@@ -19,24 +22,34 @@ module RubyLLM
19
22
  }
20
23
 
21
24
  payload[:temperature] = temperature unless temperature.nil?
22
- payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
25
+ if tools.any?
26
+ payload[:tools] = tools.map { |_, tool| tool_for(tool) }
27
+ payload[:tool_choice] = build_tool_choice(tool_prefs[:choice]) unless tool_prefs[:choice].nil?
28
+ payload[:parallel_tool_calls] = tool_prefs[:calls] == :many unless tool_prefs[:calls].nil?
29
+ end
23
30
 
24
31
  if schema
25
- strict = schema[:strict] != false
32
+ schema_name = schema[:name]
33
+ schema_def = schema[:schema]
34
+ strict = schema[:strict]
26
35
 
27
36
  payload[:response_format] = {
28
37
  type: 'json_schema',
29
38
  json_schema: {
30
- name: 'response',
31
- schema: schema,
39
+ name: schema_name,
40
+ schema: schema_def,
32
41
  strict: strict
33
42
  }
34
43
  }
35
44
  end
36
45
 
46
+ effort = resolve_effort(thinking)
47
+ payload[:reasoning_effort] = effort if effort
48
+
37
49
  payload[:stream_options] = { include_usage: true } if stream
38
50
  payload
39
51
  end
52
+ # rubocop:enable Metrics/ParameterLists,Metrics/PerceivedComplexity
40
53
 
41
54
  def parse_completion_response(response)
42
55
  data = response.body
@@ -49,15 +62,21 @@ module RubyLLM
49
62
 
50
63
  usage = data['usage'] || {}
51
64
  cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
65
+ thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
66
+ content, thinking_from_blocks = extract_content_and_thinking(message_data['content'])
67
+ thinking_text = thinking_from_blocks || extract_thinking_text(message_data)
68
+ thinking_signature = extract_thinking_signature(message_data)
52
69
 
53
70
  Message.new(
54
71
  role: :assistant,
55
- content: message_data['content'],
72
+ content: content,
73
+ thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
56
74
  tool_calls: parse_tool_calls(message_data['tool_calls']),
57
75
  input_tokens: usage['prompt_tokens'],
58
76
  output_tokens: usage['completion_tokens'],
59
77
  cached_tokens: cached_tokens,
60
78
  cache_creation_tokens: 0,
79
+ thinking_tokens: thinking_tokens,
61
80
  model_id: data['model'],
62
81
  raw: response
63
82
  )
@@ -70,7 +89,7 @@ module RubyLLM
70
89
  content: Media.format_content(msg.content),
71
90
  tool_calls: format_tool_calls(msg.tool_calls),
72
91
  tool_call_id: msg.tool_call_id
73
- }.compact
92
+ }.compact.merge(format_thinking(msg))
74
93
  end
75
94
  end
76
95
 
@@ -82,6 +101,81 @@ module RubyLLM
82
101
  role.to_s
83
102
  end
84
103
  end
104
+
105
+ def resolve_effort(thinking)
106
+ return nil unless thinking
107
+
108
+ thinking.respond_to?(:effort) ? thinking.effort : thinking
109
+ end
110
+
111
+ def format_thinking(msg)
112
+ return {} unless msg.role == :assistant
113
+
114
+ thinking = msg.thinking
115
+ return {} unless thinking
116
+
117
+ payload = {}
118
+ if thinking.text
119
+ payload[:reasoning] = thinking.text
120
+ payload[:reasoning_content] = thinking.text
121
+ end
122
+ payload[:reasoning_signature] = thinking.signature if thinking.signature
123
+ payload
124
+ end
125
+
126
+ def extract_thinking_text(message_data)
127
+ candidate = message_data['reasoning_content'] || message_data['reasoning'] || message_data['thinking']
128
+ candidate.is_a?(String) ? candidate : nil
129
+ end
130
+
131
+ def extract_thinking_signature(message_data)
132
+ candidate = message_data['reasoning_signature'] || message_data['signature']
133
+ candidate.is_a?(String) ? candidate : nil
134
+ end
135
+
136
+ def extract_content_and_thinking(content)
137
+ return extract_think_tag_content(content) if content.is_a?(String)
138
+ return [content, nil] unless content.is_a?(Array)
139
+
140
+ text = extract_text_from_blocks(content)
141
+ thinking = extract_thinking_from_blocks(content)
142
+
143
+ [text.empty? ? nil : text, thinking.empty? ? nil : thinking]
144
+ end
145
+
146
+ def extract_text_from_blocks(blocks)
147
+ blocks.filter_map do |block|
148
+ block['text'] if block['type'] == 'text' && block['text'].is_a?(String)
149
+ end.join
150
+ end
151
+
152
+ def extract_thinking_from_blocks(blocks)
153
+ blocks.filter_map do |block|
154
+ next unless block['type'] == 'thinking'
155
+
156
+ extract_thinking_text_from_block(block)
157
+ end.join
158
+ end
159
+
160
+ def extract_thinking_text_from_block(block)
161
+ thinking_block = block['thinking']
162
+ return thinking_block if thinking_block.is_a?(String)
163
+
164
+ if thinking_block.is_a?(Array)
165
+ return thinking_block.filter_map { |item| item['text'] if item['type'] == 'text' }.join
166
+ end
167
+
168
+ block['text'] if block['text'].is_a?(String)
169
+ end
170
+
171
+ def extract_think_tag_content(text)
172
+ return [text, nil] unless text.include?('<think>')
173
+
174
+ thinking = text.scan(%r{<think>(.*?)</think>}m).join
175
+ content = text.gsub(%r{<think>.*?</think>}m, '').strip
176
+
177
+ [content.empty? ? nil : content, thinking.empty? ? nil : thinking]
178
+ end
85
179
  end
86
180
  end
87
181
  end
@@ -8,7 +8,10 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
11
- return content.value if content.is_a?(RubyLLM::Content::Raw)
11
+ if content.is_a?(RubyLLM::Content::Raw)
12
+ value = content.value
13
+ return value.is_a?(Hash) ? value.to_json : value
14
+ end
12
15
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
13
16
  return content unless content.is_a?(Content)
14
17
 
@@ -37,7 +40,7 @@ module RubyLLM
37
40
  {
38
41
  type: 'image_url',
39
42
  image_url: {
40
- url: image.url? ? image.source : image.for_llm
43
+ url: image.url? ? image.source.to_s : image.for_llm
41
44
  }
42
45
  }
43
46
  end
@@ -17,14 +17,12 @@ module RubyLLM
17
17
 
18
18
  Model::Info.new(
19
19
  id: model_id,
20
- name: capabilities.format_display_name(model_id),
20
+ name: model_id,
21
21
  provider: slug,
22
- family: capabilities.model_family(model_id),
23
22
  created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
24
23
  context_window: capabilities.context_window_for(model_id),
25
24
  max_output_tokens: capabilities.max_tokens_for(model_id),
26
- modalities: capabilities.modalities_for(model_id),
27
- capabilities: capabilities.capabilities_for(model_id),
25
+ capabilities: capabilities.critical_capabilities_for(model_id),
28
26
  pricing: capabilities.pricing_for(model_id),
29
27
  metadata: {
30
28
  object: model_data['object'],
@@ -14,16 +14,24 @@ module RubyLLM
14
14
  def build_chunk(data)
15
15
  usage = data['usage'] || {}
16
16
  cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
+ delta = data.dig('choices', 0, 'delta') || {}
18
+ content_source = delta['content'] || data.dig('choices', 0, 'message', 'content')
19
+ content, thinking_from_blocks = OpenAI::Chat.extract_content_and_thinking(content_source)
17
20
 
18
21
  Chunk.new(
19
22
  role: :assistant,
20
23
  model_id: data['model'],
21
- content: data.dig('choices', 0, 'delta', 'content'),
22
- tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
24
+ content: content,
25
+ thinking: Thinking.build(
26
+ text: thinking_from_blocks || delta['reasoning_content'] || delta['reasoning'],
27
+ signature: delta['reasoning_signature']
28
+ ),
29
+ tool_calls: parse_tool_calls(delta['tool_calls'], parse_arguments: false),
23
30
  input_tokens: usage['prompt_tokens'],
24
31
  output_tokens: usage['completion_tokens'],
25
32
  cached_tokens: cached_tokens,
26
- cache_creation_tokens: 0
33
+ cache_creation_tokens: 0,
34
+ thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
27
35
  )
28
36
  end
29
37
 
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAI
6
+ # Normalizes temperature for OpenAI models with provider-specific requirements.
7
+ module Temperature
8
+ module_function
9
+
10
+ def normalize(temperature, model_id)
11
+ if model_id.match?(/^(o\d|gpt-5)/) && !temperature.nil? && !temperature_close_to_one?(temperature)
12
+ RubyLLM.logger.debug { "Model #{model_id} requires temperature=1.0, setting that instead." }
13
+ 1.0
14
+ elsif model_id.include?('-search')
15
+ RubyLLM.logger.debug { "Model #{model_id} does not accept temperature parameter, removing" }
16
+ nil
17
+ else
18
+ temperature
19
+ end
20
+ end
21
+
22
+ def temperature_close_to_one?(temperature)
23
+ (temperature.to_f - 1.0).abs <= Float::EPSILON
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end