llm.rb 8.1.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +196 -6
- data/README.md +233 -518
- data/data/anthropic.json +278 -258
- data/data/bedrock.json +1288 -1561
- data/data/deepseek.json +38 -38
- data/data/google.json +656 -579
- data/data/openai.json +860 -818
- data/data/xai.json +243 -552
- data/data/zai.json +168 -168
- data/lib/llm/active_record/acts_as_agent.rb +5 -0
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/active_record.rb +1 -6
- data/lib/llm/agent.rb +121 -82
- data/lib/llm/context.rb +79 -74
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/call_task.rb +46 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +28 -1
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/provider.rb +30 -20
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +8 -9
- data/lib/llm/providers/bedrock/models.rb +13 -13
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google/stream_parser.rb +2 -2
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/stream_parser.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/schema.rb +11 -0
- data/lib/llm/sequel/agent.rb +5 -0
- data/lib/llm/sequel/plugin.rb +8 -14
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +15 -40
- data/lib/llm/tool/param.rb +1 -8
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/utils.rb +24 -14
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +3 -12
- data/llm.gemspec +2 -16
- metadata +13 -20
- data/lib/llm/bot.rb +0 -3
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
data/lib/llm/agent.rb
CHANGED
|
@@ -23,8 +23,7 @@ module LLM
|
|
|
23
23
|
# advisory tool errors back through the model and keeps the loop in-band.
|
|
24
24
|
# Set `tool_attempts: nil` to disable that advisory behavior.
|
|
25
25
|
# * Tool loop execution can be configured with `concurrency :call`,
|
|
26
|
-
# `:thread`, `:task`, `:fiber`, `:ractor
|
|
27
|
-
# types such as `[:thread, :ractor]`.
|
|
26
|
+
# `:thread`, `:task`, `:fiber`, or `:ractor`.
|
|
28
27
|
#
|
|
29
28
|
# @example
|
|
30
29
|
# class SystemAdmin < LLM::Agent
|
|
@@ -49,9 +48,9 @@ module LLM
|
|
|
49
48
|
# The model identifier
|
|
50
49
|
# @return [String, nil]
|
|
51
50
|
# Returns the current model when no argument is provided
|
|
52
|
-
def self.model(model = nil)
|
|
53
|
-
return @model if model.nil?
|
|
54
|
-
@model = model
|
|
51
|
+
def self.model(model = nil, &block)
|
|
52
|
+
return @model if model.nil? && !block
|
|
53
|
+
@model = block || model
|
|
55
54
|
end
|
|
56
55
|
|
|
57
56
|
##
|
|
@@ -60,9 +59,9 @@ module LLM
|
|
|
60
59
|
# One or more tools
|
|
61
60
|
# @return [Array<LLM::Function>]
|
|
62
61
|
# Returns the current tools when no argument is provided
|
|
63
|
-
def self.tools(*tools)
|
|
64
|
-
return @tools || [] if tools.empty?
|
|
65
|
-
@tools = tools.flatten
|
|
62
|
+
def self.tools(*tools, &block)
|
|
63
|
+
return @tools || [] if tools.empty? && !block
|
|
64
|
+
@tools = block || tools.flatten
|
|
66
65
|
end
|
|
67
66
|
|
|
68
67
|
##
|
|
@@ -71,9 +70,9 @@ module LLM
|
|
|
71
70
|
# One or more skill directories
|
|
72
71
|
# @return [Array<String>, nil]
|
|
73
72
|
# Returns the current skills when no argument is provided
|
|
74
|
-
def self.skills(*skills)
|
|
75
|
-
return @skills if skills.empty?
|
|
76
|
-
@skills = skills.flatten
|
|
73
|
+
def self.skills(*skills, &block)
|
|
74
|
+
return @skills if skills.empty? && !block
|
|
75
|
+
@skills = block || skills.flatten
|
|
77
76
|
end
|
|
78
77
|
|
|
79
78
|
##
|
|
@@ -82,9 +81,9 @@ module LLM
|
|
|
82
81
|
# The schema
|
|
83
82
|
# @return [#to_json, nil]
|
|
84
83
|
# Returns the current schema when no argument is provided
|
|
85
|
-
def self.schema(schema = nil)
|
|
86
|
-
return @schema if schema.nil?
|
|
87
|
-
@schema = schema
|
|
84
|
+
def self.schema(schema = nil, &block)
|
|
85
|
+
return @schema if schema.nil? && !block
|
|
86
|
+
@schema = block || schema
|
|
88
87
|
end
|
|
89
88
|
|
|
90
89
|
##
|
|
@@ -110,9 +109,8 @@ module LLM
|
|
|
110
109
|
# - `:fork`: forked child processes
|
|
111
110
|
# - `:ractor`: concurrent Ruby ractors for class-based tools; MCP tools are not supported,
|
|
112
111
|
# and this mode is especially useful for CPU-bound tool work
|
|
113
|
-
#
|
|
114
|
-
#
|
|
115
|
-
# spawned with more than one concurrency strategy.
|
|
112
|
+
# Usually pass a single strategy. Arrays are only for advanced mixed-work
|
|
113
|
+
# cases and are not needed for normal queued stream tool loops.
|
|
116
114
|
# @return [Symbol, Array<Symbol>, nil]
|
|
117
115
|
def self.concurrency(concurrency = nil)
|
|
118
116
|
return @concurrency if concurrency.nil?
|
|
@@ -139,6 +137,39 @@ module LLM
|
|
|
139
137
|
@tracer = block || tracer
|
|
140
138
|
end
|
|
141
139
|
|
|
140
|
+
##
|
|
141
|
+
# Set or get the default stream.
|
|
142
|
+
#
|
|
143
|
+
# When a block is provided, it is stored and evaluated lazily against the
|
|
144
|
+
# agent instance during initialization so it can build a fresh stream for
|
|
145
|
+
# each agent.
|
|
146
|
+
#
|
|
147
|
+
# @example
|
|
148
|
+
# class Agent < LLM::Agent
|
|
149
|
+
# stream { MyStream.new }
|
|
150
|
+
# end
|
|
151
|
+
#
|
|
152
|
+
# @param [Object, Proc, nil] stream
|
|
153
|
+
# @yieldreturn [Object, nil]
|
|
154
|
+
# @return [Object, Proc, nil]
|
|
155
|
+
def self.stream(stream = nil, &block)
|
|
156
|
+
return @stream if stream.nil? && !block
|
|
157
|
+
@stream = block || stream
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
##
|
|
161
|
+
# Set or get the tool names that require confirmation before they can run.
|
|
162
|
+
#
|
|
163
|
+
# @param [String, Symbol, Array<String, Symbol>, Proc] tool_names
|
|
164
|
+
# One or more tool names.
|
|
165
|
+
# @param [Proc] block
|
|
166
|
+
# An optional, lazy-evaluated Proc
|
|
167
|
+
# @return [Array<String>, Proc, nil]
|
|
168
|
+
def self.confirm(*tool_names, &block)
|
|
169
|
+
return @confirm if tool_names.empty? && !block
|
|
170
|
+
@confirm = block || tool_names.flatten.map(&:to_s)
|
|
171
|
+
end
|
|
172
|
+
|
|
142
173
|
##
|
|
143
174
|
# @param [LLM::Provider] provider
|
|
144
175
|
# A provider
|
|
@@ -150,15 +181,27 @@ module LLM
|
|
|
150
181
|
# @option params [Array<LLM::Function>, nil] :tools Defaults to nil
|
|
151
182
|
# @option params [Array<String>, nil] :skills Defaults to nil
|
|
152
183
|
# @option params [#to_json, nil] :schema Defaults to nil
|
|
184
|
+
# @option params [Object, Proc, nil] :stream Optional stream override for this agent instance
|
|
153
185
|
# @option params [LLM::Tracer, Proc, nil] :tracer Optional tracer override for this agent instance
|
|
154
186
|
# @option params [Symbol, Array<Symbol>, nil] :concurrency Defaults to the agent class concurrency
|
|
155
187
|
def initialize(llm, params = {})
|
|
156
|
-
defaults = {model: self.class.model, tools: self.class.tools, skills: self.class.skills, schema: self.class.schema}.compact
|
|
157
|
-
@concurrency = params.delete(:concurrency) || self.class.concurrency
|
|
158
188
|
@llm = llm
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
189
|
+
fields = %i[model skills schema tracer stream tools concurrency instructions confirm]
|
|
190
|
+
fields_ivar = %i[tracer concurrency instructions confirm]
|
|
191
|
+
fields.each do |field|
|
|
192
|
+
resolvable = params.key?(field) ? params.delete(field) : self.class.public_send(field)
|
|
193
|
+
resolve_symbol = !%i[concurrency confirm].include?(field)
|
|
194
|
+
resolved = resolvable != nil ? resolve_option(self, resolvable, resolve_symbol:) : resolvable
|
|
195
|
+
resolved = [*resolved].map(&:to_s) if field == :confirm && resolved
|
|
196
|
+
if field == :model
|
|
197
|
+
params[field] = resolved unless resolved.nil? || params.key?(field)
|
|
198
|
+
elsif resolved && !fields_ivar.include?(field)
|
|
199
|
+
params[field] ||= resolved
|
|
200
|
+
elsif fields_ivar.include?(field)
|
|
201
|
+
instance_variable_set(:"@#{field}", resolved)
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
@ctx = LLM::Context.new(llm, {guard: true}.merge(params))
|
|
162
205
|
end
|
|
163
206
|
|
|
164
207
|
##
|
|
@@ -178,31 +221,10 @@ module LLM
|
|
|
178
221
|
# response = agent.talk("Hello, what is your name?")
|
|
179
222
|
# puts response.choices[0].content
|
|
180
223
|
def talk(prompt, params = {})
|
|
181
|
-
run_loop(
|
|
224
|
+
run_loop(prompt, params)
|
|
182
225
|
end
|
|
183
226
|
alias_method :chat, :talk
|
|
184
227
|
|
|
185
|
-
##
|
|
186
|
-
# Maintain a conversation via the responses API.
|
|
187
|
-
# This method immediately sends a request to the LLM and returns the response.
|
|
188
|
-
#
|
|
189
|
-
# @note Not all LLM providers support this API
|
|
190
|
-
# @param prompt (see LLM::Provider#complete)
|
|
191
|
-
# @param [Hash] params The params passed to the provider, including optional :stream, :tools, :schema etc.
|
|
192
|
-
# @option params [Integer] :tool_attempts
|
|
193
|
-
# The maxinum number of tool call iterations before the agent sends
|
|
194
|
-
# in-band advisory tool errors back through the model (default 25).
|
|
195
|
-
# Set to `nil` to disable advisory tool-limit returns.
|
|
196
|
-
# @return [LLM::Response] Returns the LLM's response for this turn.
|
|
197
|
-
# @example
|
|
198
|
-
# llm = LLM.openai(key: ENV["KEY"])
|
|
199
|
-
# agent = LLM::Agent.new(llm)
|
|
200
|
-
# res = agent.respond("What is the capital of France?")
|
|
201
|
-
# puts res.output_text
|
|
202
|
-
def respond(prompt, params = {})
|
|
203
|
-
run_loop(:respond, prompt, params)
|
|
204
|
-
end
|
|
205
|
-
|
|
206
228
|
##
|
|
207
229
|
# @return [LLM::Buffer<LLM::Message>]
|
|
208
230
|
def messages
|
|
@@ -222,13 +244,6 @@ module LLM
|
|
|
222
244
|
@ctx.returns
|
|
223
245
|
end
|
|
224
246
|
|
|
225
|
-
##
|
|
226
|
-
# @see LLM::Context#call
|
|
227
|
-
# @return [Object]
|
|
228
|
-
def call(...)
|
|
229
|
-
@tracer ? @llm.with_tracer(@tracer) { @ctx.call(...) } : @ctx.call(...)
|
|
230
|
-
end
|
|
231
|
-
|
|
232
247
|
##
|
|
233
248
|
# @see LLM::Context#wait
|
|
234
249
|
# @return [Array<LLM::Function::Return>]
|
|
@@ -293,6 +308,13 @@ module LLM
|
|
|
293
308
|
@tracer || @ctx.tracer
|
|
294
309
|
end
|
|
295
310
|
|
|
311
|
+
##
|
|
312
|
+
# @return [LLM::Stream, #<<, nil]
|
|
313
|
+
# Returns a stream object, or nil
|
|
314
|
+
def stream
|
|
315
|
+
@ctx.stream
|
|
316
|
+
end
|
|
317
|
+
|
|
296
318
|
##
|
|
297
319
|
# Returns the model an Agent is actively using
|
|
298
320
|
# @return [String]
|
|
@@ -327,6 +349,13 @@ module LLM
|
|
|
327
349
|
@ctx.context_window
|
|
328
350
|
end
|
|
329
351
|
|
|
352
|
+
##
|
|
353
|
+
# @see LLM::Context#params
|
|
354
|
+
# @return [Hash]
|
|
355
|
+
def params
|
|
356
|
+
@ctx.params
|
|
357
|
+
end
|
|
358
|
+
|
|
330
359
|
##
|
|
331
360
|
# @see LLM::Context#to_h
|
|
332
361
|
# @return [Hash]
|
|
@@ -363,19 +392,33 @@ module LLM
|
|
|
363
392
|
end
|
|
364
393
|
alias_method :restore, :deserialize
|
|
365
394
|
|
|
395
|
+
##
|
|
396
|
+
# This method is called when confirmation is required before a tool can run.
|
|
397
|
+
#
|
|
398
|
+
# @param [LLM::Function] fn
|
|
399
|
+
# The pending function call. It can be cancelled through the
|
|
400
|
+
# {LLM::Function#cancel} method.
|
|
401
|
+
# @param [Symbol, Array<Symbol>] strategy
|
|
402
|
+
# The execution strategy that would be used for the tool call.
|
|
403
|
+
# @return [LLM::Function::Return]
|
|
404
|
+
# Return either `fn.spawn(strategy).wait` to approve execution or
|
|
405
|
+
# `fn.cancel(...)` to cancel the call.
|
|
406
|
+
def on_tool_confirmation(fn, strategy)
|
|
407
|
+
fn.cancel
|
|
408
|
+
end
|
|
409
|
+
|
|
366
410
|
private
|
|
367
411
|
|
|
368
412
|
##
|
|
369
413
|
# @return [LLM::Prompt]
|
|
370
414
|
def apply_instructions(new_prompt)
|
|
371
|
-
|
|
372
|
-
return new_prompt unless instr
|
|
415
|
+
return new_prompt unless @instructions
|
|
373
416
|
if LLM::Prompt === new_prompt
|
|
374
|
-
new_prompt.system(
|
|
417
|
+
new_prompt.system(@instructions) if inject_instructions?(new_prompt)
|
|
375
418
|
new_prompt
|
|
376
419
|
else
|
|
377
420
|
prompt do
|
|
378
|
-
_1.system(
|
|
421
|
+
_1.system(@instructions) if inject_instructions?
|
|
379
422
|
_1.user(new_prompt)
|
|
380
423
|
end
|
|
381
424
|
end
|
|
@@ -396,50 +439,46 @@ module LLM
|
|
|
396
439
|
##
|
|
397
440
|
# @return [Array<LLM::Function::Return>]
|
|
398
441
|
def call_functions
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
"or an array of the mentioned options"
|
|
442
|
+
strategy = concurrency || :call
|
|
443
|
+
return wait(strategy) unless @confirm&.any?
|
|
444
|
+
confirmables = @ctx.functions.select { @confirm.include?(_1.name.to_s) }
|
|
445
|
+
results = confirmables.map do |tool|
|
|
446
|
+
send(:on_tool_confirmation, tool, strategy)
|
|
405
447
|
end
|
|
448
|
+
@ctx.functions? ? [*results, *wait(strategy)] : results
|
|
406
449
|
end
|
|
407
450
|
|
|
408
|
-
|
|
409
|
-
|
|
451
|
+
##
|
|
452
|
+
# Runs the tool loop
|
|
453
|
+
# @api private
|
|
454
|
+
def run_loop(prompt, params)
|
|
455
|
+
run = proc do
|
|
410
456
|
max = params.key?(:tool_attempts) ? params.delete(:tool_attempts) : 25
|
|
411
457
|
max = Integer(max) if max
|
|
412
458
|
stream = params[:stream] || @ctx.params[:stream]
|
|
413
459
|
stream.extra[:concurrency] = concurrency if LLM::Stream === stream
|
|
414
|
-
res = @ctx.
|
|
415
|
-
|
|
416
|
-
break if @ctx.functions.empty?
|
|
460
|
+
res = @ctx.talk(apply_instructions(prompt), params)
|
|
461
|
+
while @ctx.functions?
|
|
417
462
|
if max
|
|
418
463
|
max.times do
|
|
419
|
-
break
|
|
420
|
-
res = @ctx.
|
|
464
|
+
break unless @ctx.functions?
|
|
465
|
+
res = @ctx.talk(call_functions, params)
|
|
421
466
|
end
|
|
422
|
-
|
|
423
|
-
res = @ctx.public_send(method, @ctx.functions.map { rate_limit(_1) }, params)
|
|
467
|
+
res = @ctx.talk(@ctx.functions.map(&:rate_limit), params) if @ctx.functions?
|
|
424
468
|
else
|
|
425
|
-
res = @ctx.
|
|
469
|
+
res = @ctx.talk(call_functions, params)
|
|
426
470
|
end
|
|
427
471
|
end
|
|
428
472
|
res
|
|
429
473
|
end
|
|
430
|
-
|
|
474
|
+
return run.call unless @tracer
|
|
475
|
+
@llm.with_tracer(@tracer, &run)
|
|
431
476
|
end
|
|
432
477
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
message: "tool loop rate limit reached"
|
|
438
|
-
})
|
|
439
|
-
end
|
|
440
|
-
|
|
441
|
-
def resolve_option(option)
|
|
442
|
-
Proc === option ? instance_exec(&option) : option
|
|
478
|
+
##
|
|
479
|
+
# @api private
|
|
480
|
+
def resolve_option(...)
|
|
481
|
+
LLM::Utils.resolve_option(...)
|
|
443
482
|
end
|
|
444
483
|
end
|
|
445
484
|
end
|
data/lib/llm/context.rb
CHANGED
|
@@ -44,6 +44,11 @@ module LLM
|
|
|
44
44
|
input_tokens: 0,
|
|
45
45
|
output_tokens: 0,
|
|
46
46
|
reasoning_tokens: 0,
|
|
47
|
+
input_audio_tokens: 0,
|
|
48
|
+
output_audio_tokens: 0,
|
|
49
|
+
input_image_tokens: 0,
|
|
50
|
+
cache_read_tokens: 0,
|
|
51
|
+
cache_write_tokens: 0,
|
|
47
52
|
total_tokens: 0
|
|
48
53
|
)
|
|
49
54
|
private_constant :ZERO_USAGE
|
|
@@ -63,13 +68,6 @@ module LLM
|
|
|
63
68
|
# @return [Symbol]
|
|
64
69
|
attr_reader :mode
|
|
65
70
|
|
|
66
|
-
##
|
|
67
|
-
# Returns the default params for this context
|
|
68
|
-
# @return [Hash]
|
|
69
|
-
def params
|
|
70
|
-
@params.dup
|
|
71
|
-
end
|
|
72
|
-
|
|
73
71
|
##
|
|
74
72
|
# @param [LLM::Provider] llm
|
|
75
73
|
# A provider
|
|
@@ -93,6 +91,13 @@ module LLM
|
|
|
93
91
|
@messages = LLM::Buffer.new(llm)
|
|
94
92
|
end
|
|
95
93
|
|
|
94
|
+
##
|
|
95
|
+
# Returns the default params for this context
|
|
96
|
+
# @return [Hash]
|
|
97
|
+
def params
|
|
98
|
+
@params.dup
|
|
99
|
+
end
|
|
100
|
+
|
|
96
101
|
##
|
|
97
102
|
# Returns a context compactor
|
|
98
103
|
# This feature is inspired by the compaction approach developed by
|
|
@@ -186,14 +191,9 @@ module LLM
|
|
|
186
191
|
# res = ctx.talk("Hello, what is your name?")
|
|
187
192
|
# puts res.messages[0].content
|
|
188
193
|
def talk(prompt, params = {})
|
|
189
|
-
return respond(prompt, params) if mode == :responses
|
|
190
194
|
@owner = @llm.request_owner
|
|
191
195
|
compactor.compact!(prompt) if compactor.compact?(prompt)
|
|
192
|
-
params = params
|
|
193
|
-
params = @params.merge(params)
|
|
194
|
-
prompt, params = transform(prompt, params)
|
|
195
|
-
bind!(params[:stream], params[:model], params[:tools])
|
|
196
|
-
res = @llm.complete(prompt, params)
|
|
196
|
+
prompt, params, res = mode == :responses ? respond(prompt, params) : complete(prompt, params)
|
|
197
197
|
self.compacted = false
|
|
198
198
|
role = params[:role] || @llm.user_role
|
|
199
199
|
role = @llm.tool_role if params[:role].nil? && [*prompt].grep(LLM::Function::Return).any?
|
|
@@ -203,35 +203,6 @@ module LLM
|
|
|
203
203
|
end
|
|
204
204
|
alias_method :chat, :talk
|
|
205
205
|
|
|
206
|
-
##
|
|
207
|
-
# Interact with the context via the responses API.
|
|
208
|
-
# This method immediately sends a request to the LLM and returns the response.
|
|
209
|
-
#
|
|
210
|
-
# @note Not all LLM providers support this API
|
|
211
|
-
# @param prompt (see LLM::Provider#complete)
|
|
212
|
-
# @param params The params, including optional :role (defaults to :user), :stream, :tools, :schema etc.
|
|
213
|
-
# @return [LLM::Response] Returns the LLM's response for this turn.
|
|
214
|
-
# @example
|
|
215
|
-
# llm = LLM.openai(key: ENV["KEY"])
|
|
216
|
-
# ctx = LLM::Context.new(llm)
|
|
217
|
-
# res = ctx.respond("What is the capital of France?")
|
|
218
|
-
# puts res.output_text
|
|
219
|
-
def respond(prompt, params = {})
|
|
220
|
-
@owner = @llm.request_owner
|
|
221
|
-
compactor.compact!(prompt) if compactor.compact?(prompt)
|
|
222
|
-
params = @params.merge(params)
|
|
223
|
-
prompt, params = transform(prompt, params)
|
|
224
|
-
bind!(params[:stream], params[:model], params[:tools])
|
|
225
|
-
res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
|
|
226
|
-
params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
|
|
227
|
-
res = @llm.responses.create(prompt, params)
|
|
228
|
-
self.compacted = false
|
|
229
|
-
role = params[:role] || @llm.user_role
|
|
230
|
-
@messages.concat LLM::Prompt === prompt ? prompt.to_a : [LLM::Message.new(role, prompt)]
|
|
231
|
-
@messages.concat [res.choices[-1]]
|
|
232
|
-
res
|
|
233
|
-
end
|
|
234
|
-
|
|
235
206
|
##
|
|
236
207
|
# @return [String]
|
|
237
208
|
def inspect
|
|
@@ -257,18 +228,13 @@ module LLM
|
|
|
257
228
|
end
|
|
258
229
|
|
|
259
230
|
##
|
|
260
|
-
#
|
|
261
|
-
#
|
|
262
|
-
#
|
|
263
|
-
#
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
def call(target)
|
|
268
|
-
case target
|
|
269
|
-
when :functions then guarded_returns || functions.call
|
|
270
|
-
else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
|
|
271
|
-
end
|
|
231
|
+
# Returns whether there is pending tool work in this context.
|
|
232
|
+
# This prefers queued streamed tool work when present, and otherwise
|
|
233
|
+
# falls back to unresolved functions derived from the message history.
|
|
234
|
+
# @return [Boolean]
|
|
235
|
+
def functions?
|
|
236
|
+
pending = queue
|
|
237
|
+
(pending && !pending.empty?) || functions.any?
|
|
272
238
|
end
|
|
273
239
|
|
|
274
240
|
##
|
|
@@ -307,14 +273,15 @@ module LLM
|
|
|
307
273
|
# the context's pending functions directly.
|
|
308
274
|
#
|
|
309
275
|
# @param [Symbol, Array<Symbol>] strategy
|
|
310
|
-
#
|
|
311
|
-
#
|
|
312
|
-
#
|
|
276
|
+
# If the stream queue already has tool work, `wait` will drain it
|
|
277
|
+
# without using this argument.
|
|
278
|
+
# Otherwise, this controls how pending functions are resolved directly.
|
|
279
|
+
# Use `:call` for sequential execution without spawning.
|
|
313
280
|
# @return [Array<LLM::Function::Return>]
|
|
314
281
|
def wait(strategy)
|
|
315
282
|
if LLM::Stream === stream && !stream.queue.empty?
|
|
316
283
|
@queue = stream.queue
|
|
317
|
-
@queue.wait
|
|
284
|
+
@queue.wait
|
|
318
285
|
else
|
|
319
286
|
return guarded_returns if guarded_returns
|
|
320
287
|
@queue = functions.spawn(strategy)
|
|
@@ -350,6 +317,11 @@ module LLM
|
|
|
350
317
|
input_tokens: usage.input_tokens || 0,
|
|
351
318
|
output_tokens: usage.output_tokens || 0,
|
|
352
319
|
reasoning_tokens: usage.reasoning_tokens || 0,
|
|
320
|
+
input_audio_tokens: usage.input_audio_tokens || 0,
|
|
321
|
+
output_audio_tokens: usage.output_audio_tokens || 0,
|
|
322
|
+
input_image_tokens: usage.input_image_tokens || 0,
|
|
323
|
+
cache_read_tokens: usage.cache_read_tokens || 0,
|
|
324
|
+
cache_write_tokens: usage.cache_write_tokens || 0,
|
|
353
325
|
total_tokens: usage.total_tokens || 0
|
|
354
326
|
)
|
|
355
327
|
else
|
|
@@ -414,6 +386,13 @@ module LLM
|
|
|
414
386
|
@llm.tracer
|
|
415
387
|
end
|
|
416
388
|
|
|
389
|
+
##
|
|
390
|
+
# @return [LLM::Stream, #<<, nil]
|
|
391
|
+
# Returns a stream object, or nil
|
|
392
|
+
def stream
|
|
393
|
+
@stream || @params[:stream]
|
|
394
|
+
end
|
|
395
|
+
|
|
417
396
|
##
|
|
418
397
|
# Returns the model a Context is actively using
|
|
419
398
|
# @return [String]
|
|
@@ -458,12 +437,7 @@ module LLM
|
|
|
458
437
|
# Returns an _approximate_ cost for a given context
|
|
459
438
|
# based on both the provider, and model
|
|
460
439
|
def cost
|
|
461
|
-
|
|
462
|
-
input_cost = (cost.input.to_f / 1_000_000.0) * usage.input_tokens
|
|
463
|
-
output_cost = (cost.output.to_f / 1_000_000.0) * usage.output_tokens
|
|
464
|
-
LLM::Cost.new(input_cost, output_cost)
|
|
465
|
-
rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
|
|
466
|
-
LLM::Cost.new(0, 0)
|
|
440
|
+
LLM::Cost.from(self)
|
|
467
441
|
end
|
|
468
442
|
|
|
469
443
|
##
|
|
@@ -485,6 +459,9 @@ module LLM
|
|
|
485
459
|
|
|
486
460
|
private
|
|
487
461
|
|
|
462
|
+
##
|
|
463
|
+
# Binds runtime metadata onto an active stream.
|
|
464
|
+
# @api private
|
|
488
465
|
def bind!(stream, model, tools)
|
|
489
466
|
return unless LLM::Stream === stream
|
|
490
467
|
@stream = stream
|
|
@@ -494,25 +471,33 @@ module LLM
|
|
|
494
471
|
stream.extra[:tools] = tools
|
|
495
472
|
end
|
|
496
473
|
|
|
474
|
+
##
|
|
475
|
+
# Returns the bound stream queue, if available.
|
|
476
|
+
# @api private
|
|
497
477
|
def queue
|
|
498
478
|
return @queue if @queue
|
|
499
479
|
stream.queue if LLM::Stream === stream
|
|
500
480
|
end
|
|
501
481
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
482
|
+
##
|
|
483
|
+
# Loads skill directories and adapts them into tools.
|
|
484
|
+
# @api private
|
|
506
485
|
def load_skills(skills)
|
|
507
486
|
[*skills].map { LLM::Skill.load(_1).to_tool(self) }
|
|
508
487
|
end
|
|
509
488
|
|
|
489
|
+
##
|
|
490
|
+
# Builds in-band guarded returns when the guard blocks tool work.
|
|
491
|
+
# @api private
|
|
510
492
|
def guarded_returns
|
|
511
493
|
warning = guard&.call(self)
|
|
512
494
|
return unless warning
|
|
513
495
|
functions.map { guarded_return_for(_1, warning) }
|
|
514
496
|
end
|
|
515
497
|
|
|
498
|
+
##
|
|
499
|
+
# Rewrites a prompt and params through the configured transformer.
|
|
500
|
+
# @api private
|
|
516
501
|
def transform(prompt, params)
|
|
517
502
|
return [prompt, params] unless transformer
|
|
518
503
|
stream = params[:stream]
|
|
@@ -522,6 +507,32 @@ module LLM
|
|
|
522
507
|
stream.on_transform_finish(self, transformer) if LLM::Stream === stream
|
|
523
508
|
end
|
|
524
509
|
|
|
510
|
+
##
|
|
511
|
+
# Executes a turn through the Responses API.
|
|
512
|
+
# @api private
|
|
513
|
+
def respond(prompt, params)
|
|
514
|
+
params = @params.merge(params)
|
|
515
|
+
prompt, params = transform(prompt, params)
|
|
516
|
+
bind!(params[:stream], params[:model], params[:tools])
|
|
517
|
+
res_id = params[:store] == false ? nil : @messages.find(&:assistant?)&.response&.response_id
|
|
518
|
+
params = params.merge(previous_response_id: res_id, input: @messages.to_a).compact
|
|
519
|
+
[prompt, params, @llm.responses.create(prompt, params)]
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
##
|
|
523
|
+
# Executes a turn through the chat completions API.
|
|
524
|
+
# @api private
|
|
525
|
+
def complete(prompt, params)
|
|
526
|
+
params = params.merge(messages: @messages.to_a)
|
|
527
|
+
params = @params.merge(params)
|
|
528
|
+
prompt, params = transform(prompt, params)
|
|
529
|
+
bind!(params[:stream], params[:model], params[:tools])
|
|
530
|
+
[prompt, params, @llm.complete(prompt, params)]
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
##
|
|
534
|
+
# Builds one guarded tool return for a blocked function call.
|
|
535
|
+
# @api private
|
|
525
536
|
def guarded_return_for(function, warning)
|
|
526
537
|
LLM::Function::Return.new(function.id, function.name, {
|
|
527
538
|
error: true,
|
|
@@ -530,10 +541,4 @@ module LLM
|
|
|
530
541
|
})
|
|
531
542
|
end
|
|
532
543
|
end
|
|
533
|
-
|
|
534
|
-
# Backward-compatible alias
|
|
535
|
-
Bot = Context
|
|
536
|
-
|
|
537
|
-
# Scheduled for removal in v6.0
|
|
538
|
-
deprecate_constant :Bot
|
|
539
544
|
end
|
|
@@ -36,6 +36,46 @@ module LLM::Contract
|
|
|
36
36
|
raise NotImplementedError, "#{self.class} does not implement '#{__method__}'"
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
+
##
|
|
40
|
+
# @return [Integer]
|
|
41
|
+
# Returns the number of input audio tokens, or 0 when the
|
|
42
|
+
# provider does not report input audio usage
|
|
43
|
+
def input_audio_tokens
|
|
44
|
+
0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
##
|
|
48
|
+
# @return [Integer]
|
|
49
|
+
# Returns the number of output audio tokens, or 0 when the
|
|
50
|
+
# provider does not report output audio usage
|
|
51
|
+
def output_audio_tokens
|
|
52
|
+
0
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
##
|
|
56
|
+
# @return [Integer]
|
|
57
|
+
# Returns the number of input image tokens, or 0 when the
|
|
58
|
+
# provider does not report input image usage
|
|
59
|
+
def input_image_tokens
|
|
60
|
+
0
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
##
|
|
64
|
+
# @return [Integer]
|
|
65
|
+
# Returns the number of cached input tokens, or 0 when the
|
|
66
|
+
# provider does not report cache usage
|
|
67
|
+
def cache_read_tokens
|
|
68
|
+
0
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
##
|
|
72
|
+
# @return [Integer]
|
|
73
|
+
# Returns the number of cache creation input tokens, or 0 when the
|
|
74
|
+
# provider does not report cache creation usage
|
|
75
|
+
def cache_write_tokens
|
|
76
|
+
0
|
|
77
|
+
end
|
|
78
|
+
|
|
39
79
|
##
|
|
40
80
|
# @return [Integer]
|
|
41
81
|
# Returns the total number of tokens
|
|
@@ -72,6 +112,11 @@ module LLM::Contract
|
|
|
72
112
|
input_tokens:,
|
|
73
113
|
output_tokens:,
|
|
74
114
|
reasoning_tokens:,
|
|
115
|
+
input_audio_tokens:,
|
|
116
|
+
output_audio_tokens:,
|
|
117
|
+
input_image_tokens:,
|
|
118
|
+
cache_read_tokens:,
|
|
119
|
+
cache_write_tokens:,
|
|
75
120
|
total_tokens:
|
|
76
121
|
)
|
|
77
122
|
end
|