llm.rb 8.1.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +196 -6
  3. data/README.md +233 -518
  4. data/data/anthropic.json +278 -258
  5. data/data/bedrock.json +1288 -1561
  6. data/data/deepseek.json +38 -38
  7. data/data/google.json +656 -579
  8. data/data/openai.json +860 -818
  9. data/data/xai.json +243 -552
  10. data/data/zai.json +168 -168
  11. data/lib/llm/active_record/acts_as_agent.rb +5 -0
  12. data/lib/llm/active_record/acts_as_llm.rb +7 -8
  13. data/lib/llm/active_record.rb +1 -6
  14. data/lib/llm/agent.rb +121 -82
  15. data/lib/llm/context.rb +79 -74
  16. data/lib/llm/contract/completion.rb +45 -0
  17. data/lib/llm/cost.rb +81 -4
  18. data/lib/llm/error.rb +1 -1
  19. data/lib/llm/function/array.rb +8 -5
  20. data/lib/llm/function/call_group.rb +39 -0
  21. data/lib/llm/function/call_task.rb +46 -0
  22. data/lib/llm/function/fork/task.rb +6 -0
  23. data/lib/llm/function/ractor/task.rb +6 -0
  24. data/lib/llm/function/task.rb +10 -0
  25. data/lib/llm/function.rb +28 -1
  26. data/lib/llm/mcp/transport/http.rb +26 -46
  27. data/lib/llm/mcp/transport/stdio.rb +0 -8
  28. data/lib/llm/mcp.rb +6 -23
  29. data/lib/llm/provider.rb +30 -20
  30. data/lib/llm/providers/anthropic/error_handler.rb +6 -7
  31. data/lib/llm/providers/anthropic/files.rb +2 -2
  32. data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
  33. data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
  34. data/lib/llm/providers/anthropic.rb +1 -1
  35. data/lib/llm/providers/bedrock/error_handler.rb +8 -9
  36. data/lib/llm/providers/bedrock/models.rb +13 -13
  37. data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
  38. data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
  39. data/lib/llm/providers/bedrock.rb +1 -1
  40. data/lib/llm/providers/google/error_handler.rb +6 -7
  41. data/lib/llm/providers/google/files.rb +2 -4
  42. data/lib/llm/providers/google/images.rb +1 -1
  43. data/lib/llm/providers/google/models.rb +0 -2
  44. data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
  45. data/lib/llm/providers/google/stream_parser.rb +2 -2
  46. data/lib/llm/providers/google.rb +1 -1
  47. data/lib/llm/providers/ollama/error_handler.rb +6 -7
  48. data/lib/llm/providers/ollama/models.rb +0 -2
  49. data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
  50. data/lib/llm/providers/ollama.rb +1 -1
  51. data/lib/llm/providers/openai/audio.rb +3 -3
  52. data/lib/llm/providers/openai/error_handler.rb +6 -7
  53. data/lib/llm/providers/openai/files.rb +2 -2
  54. data/lib/llm/providers/openai/images.rb +3 -3
  55. data/lib/llm/providers/openai/models.rb +1 -1
  56. data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
  57. data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
  58. data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
  59. data/lib/llm/providers/openai/responses.rb +2 -2
  60. data/lib/llm/providers/openai/stream_parser.rb +2 -2
  61. data/lib/llm/providers/openai/vector_stores.rb +1 -1
  62. data/lib/llm/providers/openai.rb +1 -1
  63. data/lib/llm/response.rb +10 -8
  64. data/lib/llm/schema.rb +11 -0
  65. data/lib/llm/sequel/agent.rb +5 -0
  66. data/lib/llm/sequel/plugin.rb +8 -14
  67. data/lib/llm/stream/queue.rb +15 -42
  68. data/lib/llm/stream.rb +15 -40
  69. data/lib/llm/tool/param.rb +1 -8
  70. data/lib/llm/transport/execution.rb +67 -0
  71. data/lib/llm/transport/http.rb +134 -0
  72. data/lib/llm/transport/persistent_http.rb +152 -0
  73. data/lib/llm/transport/response/http.rb +113 -0
  74. data/lib/llm/transport/response.rb +112 -0
  75. data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
  76. data/lib/llm/transport.rb +139 -0
  77. data/lib/llm/usage.rb +14 -5
  78. data/lib/llm/utils.rb +24 -14
  79. data/lib/llm/version.rb +1 -1
  80. data/lib/llm.rb +3 -12
  81. data/llm.gemspec +2 -16
  82. metadata +13 -20
  83. data/lib/llm/bot.rb +0 -3
  84. data/lib/llm/provider/transport/http/execution.rb +0 -115
  85. data/lib/llm/provider/transport/http/interruptible.rb +0 -114
  86. data/lib/llm/provider/transport/http.rb +0 -145
data/lib/llm/agent.rb CHANGED
@@ -23,8 +23,7 @@ module LLM
23
23
  # advisory tool errors back through the model and keeps the loop in-band.
24
24
  # Set `tool_attempts: nil` to disable that advisory behavior.
25
25
  # * Tool loop execution can be configured with `concurrency :call`,
26
- # `:thread`, `:task`, `:fiber`, `:ractor`, or a list of queued task
27
- # types such as `[:thread, :ractor]`.
26
+ # `:thread`, `:task`, `:fiber`, or `:ractor`.
28
27
  #
29
28
  # @example
30
29
  # class SystemAdmin < LLM::Agent
@@ -49,9 +48,9 @@ module LLM
49
48
  # The model identifier
50
49
  # @return [String, nil]
51
50
  # Returns the current model when no argument is provided
52
- def self.model(model = nil)
53
- return @model if model.nil?
54
- @model = model
51
+ def self.model(model = nil, &block)
52
+ return @model if model.nil? && !block
53
+ @model = block || model
55
54
  end
56
55
 
57
56
  ##
@@ -60,9 +59,9 @@ module LLM
60
59
  # One or more tools
61
60
  # @return [Array<LLM::Function>]
62
61
  # Returns the current tools when no argument is provided
63
- def self.tools(*tools)
64
- return @tools || [] if tools.empty?
65
- @tools = tools.flatten
62
+ def self.tools(*tools, &block)
63
+ return @tools || [] if tools.empty? && !block
64
+ @tools = block || tools.flatten
66
65
  end
67
66
 
68
67
  ##
@@ -71,9 +70,9 @@ module LLM
71
70
  # One or more skill directories
72
71
  # @return [Array<String>, nil]
73
72
  # Returns the current skills when no argument is provided
74
- def self.skills(*skills)
75
- return @skills if skills.empty?
76
- @skills = skills.flatten
73
+ def self.skills(*skills, &block)
74
+ return @skills if skills.empty? && !block
75
+ @skills = block || skills.flatten
77
76
  end
78
77
 
79
78
  ##
@@ -82,9 +81,9 @@ module LLM
82
81
  # The schema
83
82
  # @return [#to_json, nil]
84
83
  # Returns the current schema when no argument is provided
85
- def self.schema(schema = nil)
86
- return @schema if schema.nil?
87
- @schema = schema
84
+ def self.schema(schema = nil, &block)
85
+ return @schema if schema.nil? && !block
86
+ @schema = block || schema
88
87
  end
89
88
 
90
89
  ##
@@ -110,9 +109,8 @@ module LLM
110
109
  # - `:fork`: forked child processes
111
110
  # - `:ractor`: concurrent Ruby ractors for class-based tools; MCP tools are not supported,
112
111
  # and this mode is especially useful for CPU-bound tool work
113
- # - `[:thread, :ractor]`: the possible concurrency strategies to wait on, in the
114
- # given order. This is useful for mixed tool sets or when work may have been
115
- # spawned with more than one concurrency strategy.
112
+ # Usually pass a single strategy. Arrays are only for advanced mixed-work
113
+ # cases and are not needed for normal queued stream tool loops.
116
114
  # @return [Symbol, Array<Symbol>, nil]
117
115
  def self.concurrency(concurrency = nil)
118
116
  return @concurrency if concurrency.nil?
@@ -139,6 +137,39 @@ module LLM
139
137
  @tracer = block || tracer
140
138
  end
141
139
 
140
+ ##
141
+ # Set or get the default stream.
142
+ #
143
+ # When a block is provided, it is stored and evaluated lazily against the
144
+ # agent instance during initialization so it can build a fresh stream for
145
+ # each agent.
146
+ #
147
+ # @example
148
+ # class Agent < LLM::Agent
149
+ # stream { MyStream.new }
150
+ # end
151
+ #
152
+ # @param [Object, Proc, nil] stream
153
+ # @yieldreturn [Object, nil]
154
+ # @return [Object, Proc, nil]
155
+ def self.stream(stream = nil, &block)
156
+ return @stream if stream.nil? && !block
157
+ @stream = block || stream
158
+ end
159
+
160
+ ##
161
+ # Set or get the tool names that require confirmation before they can run.
162
+ #
163
+ # @param [String, Symbol, Array<String, Symbol>, Proc] tool_names
164
+ # One or more tool names.
165
+ # @param [Proc] block
166
+ # An optional, lazy-evaluated Proc
167
+ # @return [Array<String>, Proc, nil]
168
+ def self.confirm(*tool_names, &block)
169
+ return @confirm if tool_names.empty? && !block
170
+ @confirm = block || tool_names.flatten.map(&:to_s)
171
+ end
172
+
142
173
  ##
143
174
  # @param [LLM::Provider] provider
144
175
  # A provider
@@ -150,15 +181,27 @@ module LLM
150
181
  # @option params [Array<LLM::Function>, nil] :tools Defaults to nil
151
182
  # @option params [Array<String>, nil] :skills Defaults to nil
152
183
  # @option params [#to_json, nil] :schema Defaults to nil
184
+ # @option params [Object, Proc, nil] :stream Optional stream override for this agent instance
153
185
  # @option params [LLM::Tracer, Proc, nil] :tracer Optional tracer override for this agent instance
154
186
  # @option params [Symbol, Array<Symbol>, nil] :concurrency Defaults to the agent class concurrency
155
187
  def initialize(llm, params = {})
156
- defaults = {model: self.class.model, tools: self.class.tools, skills: self.class.skills, schema: self.class.schema}.compact
157
- @concurrency = params.delete(:concurrency) || self.class.concurrency
158
188
  @llm = llm
159
- tracer = params.key?(:tracer) ? params.delete(:tracer) : self.class.tracer
160
- @tracer = resolve_option(tracer) unless tracer.nil?
161
- @ctx = LLM::Context.new(llm, defaults.merge({guard: true}).merge(params))
189
+ fields = %i[model skills schema tracer stream tools concurrency instructions confirm]
190
+ fields_ivar = %i[tracer concurrency instructions confirm]
191
+ fields.each do |field|
192
+ resolvable = params.key?(field) ? params.delete(field) : self.class.public_send(field)
193
+ resolve_symbol = !%i[concurrency confirm].include?(field)
194
+ resolved = resolvable != nil ? resolve_option(self, resolvable, resolve_symbol:) : resolvable
195
+ resolved = [*resolved].map(&:to_s) if field == :confirm && resolved
196
+ if field == :model
197
+ params[field] = resolved unless resolved.nil? || params.key?(field)
198
+ elsif resolved && !fields_ivar.include?(field)
199
+ params[field] ||= resolved
200
+ elsif fields_ivar.include?(field)
201
+ instance_variable_set(:"@#{field}", resolved)
202
+ end
203
+ end
204
+ @ctx = LLM::Context.new(llm, {guard: true}.merge(params))
162
205
  end
163
206
 
164
207
  ##
@@ -178,31 +221,10 @@ module LLM
178
221
  # response = agent.talk("Hello, what is your name?")
179
222
  # puts response.choices[0].content
180
223
  def talk(prompt, params = {})
181
- run_loop(:talk, prompt, params)
224
+ run_loop(prompt, params)
182
225
  end
183
226
  alias_method :chat, :talk
184
227
 
185
- ##
186
- # Maintain a conversation via the responses API.
187
- # This method immediately sends a request to the LLM and returns the response.
188
- #
189
- # @note Not all LLM providers support this API
190
- # @param prompt (see LLM::Provider#complete)
191
- # @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
192
- # @option params [Integer] :tool_attempts
193
- # The maxinum number of tool call iterations before the agent sends
194
- # in-band advisory tool errors back through the model (default 25).
195
- # Set to `nil` to disable advisory tool-limit returns.
196
- # @return [LLM::Response] Returns the LLM's response for this turn.
197
- # @example
198
- # llm = LLM.openai(key: ENV["KEY"])
199
- # agent = LLM::Agent.new(llm)
200
- # res = agent.respond("What is the capital of France?")
201
- # puts res.output_text
202
- def respond(prompt, params = {})
203
- run_loop(:respond, prompt, params)
204
- end
205
-
206
228
  ##
207
229
  # @return [LLM::Buffer<LLM::Message>]
208
230
  def messages
@@ -222,13 +244,6 @@ module LLM
222
244
  @ctx.returns
223
245
  end
224
246
 
225
- ##
226
- # @see LLM::Context#call
227
- # @return [Object]
228
- def call(...)
229
- @tracer ? @llm.with_tracer(@tracer) { @ctx.call(...) } : @ctx.call(...)
230
- end
231
-
232
247
  ##
233
248
  # @see LLM::Context#wait
234
249
  # @return [Array<LLM::Function::Return>]
@@ -293,6 +308,13 @@ module LLM
293
308
  @tracer || @ctx.tracer
294
309
  end
295
310
 
311
+ ##
312
+ # @return [LLM::Stream, #<<, nil]
313
+ # Returns a stream object, or nil
314
+ def stream
315
+ @ctx.stream
316
+ end
317
+
296
318
  ##
297
319
  # Returns the model an Agent is actively using
298
320
  # @return [String]
@@ -327,6 +349,13 @@ module LLM
327
349
  @ctx.context_window
328
350
  end
329
351
 
352
+ ##
353
+ # @see LLM::Context#params
354
+ # @return [Hash]
355
+ def params
356
+ @ctx.params
357
+ end
358
+
330
359
  ##
331
360
  # @see LLM::Context#to_h
332
361
  # @return [Hash]
@@ -363,19 +392,33 @@ module LLM
363
392
  end
364
393
  alias_method :restore, :deserialize
365
394
 
395
+ ##
396
+ # This method is called when confirmation is required before a tool can run.
397
+ #
398
+ # @param [LLM::Function] fn
399
+ # The pending function call. It can be cancelled through the
400
+ # {LLM::Function#cancel} method.
401
+ # @param [Symbol, Array<Symbol>] strategy
402
+ # The execution strategy that would be used for the tool call.
403
+ # @return [LLM::Function::Return]
404
+ # Return either `fn.spawn(strategy).wait` to approve execution or
405
+ # `fn.cancel(...)` to cancel the call.
406
+ def on_tool_confirmation(fn, strategy)
407
+ fn.cancel
408
+ end
409
+
366
410
  private
367
411
 
368
412
  ##
369
413
  # @return [LLM::Prompt]
370
414
  def apply_instructions(new_prompt)
371
- instr = self.class.instructions
372
- return new_prompt unless instr
415
+ return new_prompt unless @instructions
373
416
  if LLM::Prompt === new_prompt
374
- new_prompt.system(instr) if inject_instructions?(new_prompt)
417
+ new_prompt.system(@instructions) if inject_instructions?(new_prompt)
375
418
  new_prompt
376
419
  else
377
420
  prompt do
378
- _1.system(instr) if inject_instructions?
421
+ _1.system(@instructions) if inject_instructions?
379
422
  _1.user(new_prompt)
380
423
  end
381
424
  end
@@ -396,50 +439,46 @@ module LLM
396
439
  ##
397
440
  # @return [Array<LLM::Function::Return>]
398
441
  def call_functions
399
- case concurrency || :call
400
- when :call then call(:functions)
401
- when :thread, :task, :fiber, :fork, :ractor, Array then wait(concurrency)
402
- else raise ArgumentError, "Unknown concurrency: #{concurrency.inspect}. " \
403
- "Expected :call, :thread, :task, :fiber, :fork, :ractor, " \
404
- "or an array of the mentioned options"
442
+ strategy = concurrency || :call
443
+ return wait(strategy) unless @confirm&.any?
444
+ confirmables = @ctx.functions.select { @confirm.include?(_1.name.to_s) }
445
+ results = confirmables.map do |tool|
446
+ send(:on_tool_confirmation, tool, strategy)
405
447
  end
448
+ @ctx.functions? ? [*results, *wait(strategy)] : results
406
449
  end
407
450
 
408
- def run_loop(method, prompt, params)
409
- loop = proc do
451
+ ##
452
+ # Runs the tool loop
453
+ # @api private
454
+ def run_loop(prompt, params)
455
+ run = proc do
410
456
  max = params.key?(:tool_attempts) ? params.delete(:tool_attempts) : 25
411
457
  max = Integer(max) if max
412
458
  stream = params[:stream] || @ctx.params[:stream]
413
459
  stream.extra[:concurrency] = concurrency if LLM::Stream === stream
414
- res = @ctx.public_send(method, apply_instructions(prompt), params)
415
- loop do
416
- break if @ctx.functions.empty?
460
+ res = @ctx.talk(apply_instructions(prompt), params)
461
+ while @ctx.functions?
417
462
  if max
418
463
  max.times do
419
- break if @ctx.functions.empty?
420
- res = @ctx.public_send(method, call_functions, params)
464
+ break unless @ctx.functions?
465
+ res = @ctx.talk(call_functions, params)
421
466
  end
422
- break if @ctx.functions.empty?
423
- res = @ctx.public_send(method, @ctx.functions.map { rate_limit(_1) }, params)
467
+ res = @ctx.talk(@ctx.functions.map(&:rate_limit), params) if @ctx.functions?
424
468
  else
425
- res = @ctx.public_send(method, call_functions, params)
469
+ res = @ctx.talk(call_functions, params)
426
470
  end
427
471
  end
428
472
  res
429
473
  end
430
- @tracer ? @llm.with_tracer(@tracer, &loop) : loop.call
474
+ return run.call unless @tracer
475
+ @llm.with_tracer(@tracer, &run)
431
476
  end
432
477
 
433
- def rate_limit(function)
434
- LLM::Function::Return.new(function.id, function.name, {
435
- error: true,
436
- type: LLM::ToolLoopError.name,
437
- message: "tool loop rate limit reached"
438
- })
439
- end
440
-
441
- def resolve_option(option)
442
- Proc === option ? instance_exec(&option) : option
478
+ ##
479
+ # @api private
480
+ def resolve_option(...)
481
+ LLM::Utils.resolve_option(...)
443
482
  end
444
483
  end
445
484
  end
data/lib/llm/context.rb CHANGED
@@ -44,6 +44,11 @@ module LLM
44
44
  input_tokens: 0,
45
45
  output_tokens: 0,
46
46
  reasoning_tokens: 0,
47
+ input_audio_tokens: 0,
48
+ output_audio_tokens: 0,
49
+ input_image_tokens: 0,
50
+ cache_read_tokens: 0,
51
+ cache_write_tokens: 0,
47
52
  total_tokens: 0
48
53
  )
49
54
  private_constant :ZERO_USAGE
@@ -63,13 +68,6 @@ module LLM
63
68
  # @return [Symbol]
64
69
  attr_reader :mode
65
70
 
66
- ##
67
- # Returns the default params for this context
68
- # @return [Hash]
69
- def params
70
- @params.dup
71
- end
72
-
73
71
  ##
74
72
  # @param [LLM::Provider] llm
75
73
  # A provider
@@ -93,6 +91,13 @@ module LLM
93
91
  @messages = LLM::Buffer.new(llm)
94
92
  end
95
93
 
94
+ ##
95
+ # Returns the default params for this context
96
+ # @return [Hash]
97
+ def params
98
+ @params.dup
99
+ end
100
+
96
101
  ##
97
102
  # Returns a context compactor
98
103
  # This feature is inspired by the compaction approach developed by
@@ -186,14 +191,9 @@ module LLM
186
191
  # res = ctx.talk("Hello, what is your name?")
187
192
  # puts res.messages[0].content
188
193
  def talk(prompt, params = {})
189
- return respond(prompt, params) if mode == :responses
190
194
  @owner = @llm.request_owner
191
195
  compactor.compact!(prompt) if compactor.compact?(prompt)
192
- params = params.merge(messages: @messages.to_a)
193
- params = @params.merge(params)
194
- prompt, params = transform(prompt, params)
195
- bind!(params[:stream], params[:model], params[:tools])
196
- res = @llm.complete(prompt, params)
196
+ prompt, params, res = mode == :responses ? respond(prompt, params) : complete(prompt, params)
197
197
  self.compacted = false
198
198
  role = params[:role] || @llm.user_role
199
199
  role = @llm.tool_role if params[:role].nil? && [*prompt].grep(LLM::Function::Return).any?
@@ -203,35 +203,6 @@ module LLM
203
203
  end
204
204
  alias_method :chat, :talk
205
205
 
206
- ##
207
- # Interact with the context via the responses API.
208
- # This method immediately sends a request to the LLM and returns the response.
209
- #
210
- # @note Not all LLM providers support this API
211
- # @param prompt (see LLM::Provider#complete)
212
- # @param params The params, including optional :role (defaults to :user), :stream, :tools, :schema etc.
213
- # @return [LLM::Response] Returns the LLM's response for this turn.
214
- # @example
215
- # llm = LLM.openai(key: ENV["KEY"])
216
- # ctx = LLM::Context.new(llm)
217
- # res = ctx.respond("What is the capital of France?")
218
- # puts res.output_text
219
- def respond(prompt, params = {})
220
- @owner = @llm.request_owner
221
- compactor.compact!(prompt) if compactor.compact?(prompt)
222
- params = @params.merge(params)
223
- prompt, params = transform(prompt, params)
224
- bind!(params[:stream], params[:model], params[:tools])
225
- res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
226
- params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
227
- res = @llm.responses.create(prompt, params)
228
- self.compacted = false
229
- role = params[:role] || @llm.user_role
230
- @messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
231
- @messages.concat [res.choices[-1]]
232
- res
233
- end
234
-
235
206
  ##
236
207
  # @return [String]
237
208
  def inspect
@@ -257,18 +228,13 @@ module LLM
257
228
  end
258
229
 
259
230
  ##
260
- # Calls a named collection of work through the context.
261
- #
262
- # This currently supports `:functions`, forwarding to `functions.call`.
263
- #
264
- # @param [Symbol] target
265
- # The work collection to call
266
- # @return [Array<LLM::Function::Return>]
267
- def call(target)
268
- case target
269
- when :functions then guarded_returns || functions.call
270
- else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
271
- end
231
+ # Returns whether there is pending tool work in this context.
232
+ # This prefers queued streamed tool work when present, and otherwise
233
+ # falls back to unresolved functions derived from the message history.
234
+ # @return [Boolean]
235
+ def functions?
236
+ pending = queue
237
+ (pending && !pending.empty?) || functions.any?
272
238
  end
273
239
 
274
240
  ##
@@ -307,14 +273,15 @@ module LLM
307
273
  # the context's pending functions directly.
308
274
  #
309
275
  # @param [Symbol, Array<Symbol>] strategy
310
- # The concurrency strategy to use, or the possible concurrency strategies to
311
- # wait on. For example, `[:thread, :ractor]` waits for any queued thread or
312
- # ractor work, in that order.
276
+ # If the stream queue already has tool work, `wait` will drain it
277
+ # without using this argument.
278
+ # Otherwise, this controls how pending functions are resolved directly.
279
+ # Use `:call` for sequential execution without spawning.
313
280
  # @return [Array<LLM::Function::Return>]
314
281
  def wait(strategy)
315
282
  if LLM::Stream === stream && !stream.queue.empty?
316
283
  @queue = stream.queue
317
- @queue.wait(strategy)
284
+ @queue.wait
318
285
  else
319
286
  return guarded_returns if guarded_returns
320
287
  @queue = functions.spawn(strategy)
@@ -350,6 +317,11 @@ module LLM
350
317
  input_tokens: usage.input_tokens || 0,
351
318
  output_tokens: usage.output_tokens || 0,
352
319
  reasoning_tokens: usage.reasoning_tokens || 0,
320
+ input_audio_tokens: usage.input_audio_tokens || 0,
321
+ output_audio_tokens: usage.output_audio_tokens || 0,
322
+ input_image_tokens: usage.input_image_tokens || 0,
323
+ cache_read_tokens: usage.cache_read_tokens || 0,
324
+ cache_write_tokens: usage.cache_write_tokens || 0,
353
325
  total_tokens: usage.total_tokens || 0
354
326
  )
355
327
  else
@@ -414,6 +386,13 @@ module LLM
414
386
  @llm.tracer
415
387
  end
416
388
 
389
+ ##
390
+ # @return [LLM::Stream, #<<, nil]
391
+ # Returns a stream object, or nil
392
+ def stream
393
+ @stream || @params[:stream]
394
+ end
395
+
417
396
  ##
418
397
  # Returns the model a Context is actively using
419
398
  # @return [String]
@@ -458,12 +437,7 @@ module LLM
458
437
  # Returns an _approximate_ cost for a given context
459
438
  # based on both the provider, and model
460
439
  def cost
461
- cost = LLM.registry_for(llm).cost(model:)
462
- input_cost = (cost.input.to_f / 1_000_000.0) * usage.input_tokens
463
- output_cost = (cost.output.to_f / 1_000_000.0) * usage.output_tokens
464
- LLM::Cost.new(input_cost, output_cost)
465
- rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
466
- LLM::Cost.new(0, 0)
440
+ LLM::Cost.from(self)
467
441
  end
468
442
 
469
443
  ##
@@ -485,6 +459,9 @@ module LLM
485
459
 
486
460
  private
487
461
 
462
+ ##
463
+ # Binds runtime metadata onto an active stream.
464
+ # @api private
488
465
  def bind!(stream, model, tools)
489
466
  return unless LLM::Stream === stream
490
467
  @stream = stream
@@ -494,25 +471,33 @@ module LLM
494
471
  stream.extra[:tools] = tools
495
472
  end
496
473
 
474
+ ##
475
+ # Returns the bound stream queue, if available.
476
+ # @api private
497
477
  def queue
498
478
  return @queue if @queue
499
479
  stream.queue if LLM::Stream === stream
500
480
  end
501
481
 
502
- def stream
503
- @stream || @params[:stream]
504
- end
505
-
482
+ ##
483
+ # Loads skill directories and adapts them into tools.
484
+ # @api private
506
485
  def load_skills(skills)
507
486
  [*skills].map { LLM::Skill.load(_1).to_tool(self) }
508
487
  end
509
488
 
489
+ ##
490
+ # Builds in-band guarded returns when the guard blocks tool work.
491
+ # @api private
510
492
  def guarded_returns
511
493
  warning = guard&.call(self)
512
494
  return unless warning
513
495
  functions.map { guarded_return_for(_1, warning) }
514
496
  end
515
497
 
498
+ ##
499
+ # Rewrites a prompt and params through the configured transformer.
500
+ # @api private
516
501
  def transform(prompt, params)
517
502
  return [prompt, params] unless transformer
518
503
  stream = params[:stream]
@@ -522,6 +507,32 @@ module LLM
522
507
  stream.on_transform_finish(self, transformer) if LLM::Stream === stream
523
508
  end
524
509
 
510
+ ##
511
+ # Executes a turn through the Responses API.
512
+ # @api private
513
+ def respond(prompt, params)
514
+ params = @params.merge(params)
515
+ prompt, params = transform(prompt, params)
516
+ bind!(params[:stream], params[:model], params[:tools])
517
+ res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
518
+ params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
519
+ [prompt, params, @llm.responses.create(prompt, params)]
520
+ end
521
+
522
+ ##
523
+ # Executes a turn through the chat completions API.
524
+ # @api private
525
+ def complete(prompt, params)
526
+ params = params.merge(messages: @messages.to_a)
527
+ params = @params.merge(params)
528
+ prompt, params = transform(prompt, params)
529
+ bind!(params[:stream], params[:model], params[:tools])
530
+ [prompt, params, @llm.complete(prompt, params)]
531
+ end
532
+
533
+ ##
534
+ # Builds one guarded tool return for a blocked function call.
535
+ # @api private
525
536
  def guarded_return_for(function, warning)
526
537
  LLM::Function::Return.new(function.id, function.name, {
527
538
  error: true,
@@ -530,10 +541,4 @@ module LLM
530
541
  })
531
542
  end
532
543
  end
533
-
534
- # Backward-compatible alias
535
- Bot = Context
536
-
537
- # Scheduled for removal in v6.0
538
- deprecate_constant :Bot
539
544
  end
@@ -36,6 +36,46 @@ module LLM::Contract
36
36
  raise NotImplementedError, "#{self.class} does not implement '#{__method__}'"
37
37
  end
38
38
 
39
+ ##
40
+ # @return [Integer]
41
+ # Returns the number of input audio tokens, or 0 when the
42
+ # provider does not report input audio usage
43
+ def input_audio_tokens
44
+ 0
45
+ end
46
+
47
+ ##
48
+ # @return [Integer]
49
+ # Returns the number of output audio tokens, or 0 when the
50
+ # provider does not report output audio usage
51
+ def output_audio_tokens
52
+ 0
53
+ end
54
+
55
+ ##
56
+ # @return [Integer]
57
+ # Returns the number of input image tokens, or 0 when the
58
+ # provider does not report input image usage
59
+ def input_image_tokens
60
+ 0
61
+ end
62
+
63
+ ##
64
+ # @return [Integer]
65
+ # Returns the number of cached input tokens, or 0 when the
66
+ # provider does not report cache usage
67
+ def cache_read_tokens
68
+ 0
69
+ end
70
+
71
+ ##
72
+ # @return [Integer]
73
+ # Returns the number of cache creation input tokens, or 0 when the
74
+ # provider does not report cache creation usage
75
+ def cache_write_tokens
76
+ 0
77
+ end
78
+
39
79
  ##
40
80
  # @return [Integer]
41
81
  # Returns the total number of tokens
@@ -72,6 +112,11 @@ module LLM::Contract
72
112
  input_tokens:,
73
113
  output_tokens:,
74
114
  reasoning_tokens:,
115
+ input_audio_tokens:,
116
+ output_audio_tokens:,
117
+ input_image_tokens:,
118
+ cache_read_tokens:,
119
+ cache_write_tokens:,
75
120
  total_tokens:
76
121
  )
77
122
  end