llm.rb 8.1.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +120 -2
- data/README.md +161 -514
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/agent.rb +36 -16
- data/lib/llm/context.rb +30 -26
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +1 -0
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/provider.rb +23 -20
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +8 -9
- data/lib/llm/providers/bedrock/models.rb +13 -13
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/sequel/plugin.rb +7 -8
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +4 -4
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +2 -12
- data/llm.gemspec +2 -16
- metadata +11 -19
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
- data/lib/llm/utils.rb +0 -19
|
@@ -75,14 +75,6 @@ module LLM::ActiveRecord
|
|
|
75
75
|
ctx.wait(...)
|
|
76
76
|
end
|
|
77
77
|
|
|
78
|
-
##
|
|
79
|
-
# Calls into the stored context.
|
|
80
|
-
# @see LLM::Context#call
|
|
81
|
-
# @return [Object]
|
|
82
|
-
def call(...)
|
|
83
|
-
ctx.call(...)
|
|
84
|
-
end
|
|
85
|
-
|
|
86
78
|
##
|
|
87
79
|
# @see LLM::Context#mode
|
|
88
80
|
# @return [Symbol]
|
|
@@ -112,6 +104,13 @@ module LLM::ActiveRecord
|
|
|
112
104
|
ctx.functions
|
|
113
105
|
end
|
|
114
106
|
|
|
107
|
+
##
|
|
108
|
+
# @see LLM::Context#functions?
|
|
109
|
+
# @return [Boolean]
|
|
110
|
+
def functions?
|
|
111
|
+
ctx.functions?
|
|
112
|
+
end
|
|
113
|
+
|
|
115
114
|
##
|
|
116
115
|
# @see LLM::Context#returns
|
|
117
116
|
# @return [Array<LLM::Function::Return>]
|
data/lib/llm/agent.rb
CHANGED
|
@@ -23,8 +23,7 @@ module LLM
|
|
|
23
23
|
# advisory tool errors back through the model and keeps the loop in-band.
|
|
24
24
|
# Set `tool_attempts: nil` to disable that advisory behavior.
|
|
25
25
|
# * Tool loop execution can be configured with `concurrency :call`,
|
|
26
|
-
# `:thread`, `:task`, `:fiber`, `:ractor
|
|
27
|
-
# types such as `[:thread, :ractor]`.
|
|
26
|
+
# `:thread`, `:task`, `:fiber`, or `:ractor`.
|
|
28
27
|
#
|
|
29
28
|
# @example
|
|
30
29
|
# class SystemAdmin < LLM::Agent
|
|
@@ -110,9 +109,8 @@ module LLM
|
|
|
110
109
|
# - `:fork`: forked child processes
|
|
111
110
|
# - `:ractor`: concurrent Ruby ractors for class-based tools; MCP tools are not supported,
|
|
112
111
|
# and this mode is especially useful for CPU-bound tool work
|
|
113
|
-
#
|
|
114
|
-
#
|
|
115
|
-
# spawned with more than one concurrency strategy.
|
|
112
|
+
# Usually pass a single strategy. Arrays are only for advanced mixed-work
|
|
113
|
+
# cases and are not needed for normal queued stream tool loops.
|
|
116
114
|
# @return [Symbol, Array<Symbol>, nil]
|
|
117
115
|
def self.concurrency(concurrency = nil)
|
|
118
116
|
return @concurrency if concurrency.nil?
|
|
@@ -139,6 +137,26 @@ module LLM
|
|
|
139
137
|
@tracer = block || tracer
|
|
140
138
|
end
|
|
141
139
|
|
|
140
|
+
##
|
|
141
|
+
# Set or get the default stream.
|
|
142
|
+
#
|
|
143
|
+
# When a block is provided, it is stored and evaluated lazily against the
|
|
144
|
+
# agent instance during initialization so it can build a fresh stream for
|
|
145
|
+
# each agent.
|
|
146
|
+
#
|
|
147
|
+
# @example
|
|
148
|
+
# class Agent < LLM::Agent
|
|
149
|
+
# stream { MyStream.new }
|
|
150
|
+
# end
|
|
151
|
+
#
|
|
152
|
+
# @param [Object, Proc, nil] stream
|
|
153
|
+
# @yieldreturn [Object, nil]
|
|
154
|
+
# @return [Object, Proc, nil]
|
|
155
|
+
def self.stream(stream = nil, &block)
|
|
156
|
+
return @stream if stream.nil? && !block
|
|
157
|
+
@stream = block || stream
|
|
158
|
+
end
|
|
159
|
+
|
|
142
160
|
##
|
|
143
161
|
# @param [LLM::Provider] provider
|
|
144
162
|
# A provider
|
|
@@ -157,7 +175,9 @@ module LLM
|
|
|
157
175
|
@concurrency = params.delete(:concurrency) || self.class.concurrency
|
|
158
176
|
@llm = llm
|
|
159
177
|
tracer = params.key?(:tracer) ? params.delete(:tracer) : self.class.tracer
|
|
178
|
+
stream = params.key?(:stream) ? params.delete(:stream) : self.class.stream
|
|
160
179
|
@tracer = resolve_option(tracer) unless tracer.nil?
|
|
180
|
+
params[:stream] = resolve_option(stream) unless stream.nil?
|
|
161
181
|
@ctx = LLM::Context.new(llm, defaults.merge({guard: true}).merge(params))
|
|
162
182
|
end
|
|
163
183
|
|
|
@@ -222,13 +242,6 @@ module LLM
|
|
|
222
242
|
@ctx.returns
|
|
223
243
|
end
|
|
224
244
|
|
|
225
|
-
##
|
|
226
|
-
# @see LLM::Context#call
|
|
227
|
-
# @return [Object]
|
|
228
|
-
def call(...)
|
|
229
|
-
@tracer ? @llm.with_tracer(@tracer) { @ctx.call(...) } : @ctx.call(...)
|
|
230
|
-
end
|
|
231
|
-
|
|
232
245
|
##
|
|
233
246
|
# @see LLM::Context#wait
|
|
234
247
|
# @return [Array<LLM::Function::Return>]
|
|
@@ -293,6 +306,13 @@ module LLM
|
|
|
293
306
|
@tracer || @ctx.tracer
|
|
294
307
|
end
|
|
295
308
|
|
|
309
|
+
##
|
|
310
|
+
# @return [LLM::Stream, #<<, nil]
|
|
311
|
+
# Returns a stream object, or nil
|
|
312
|
+
def stream
|
|
313
|
+
@ctx.stream
|
|
314
|
+
end
|
|
315
|
+
|
|
296
316
|
##
|
|
297
317
|
# Returns the model an Agent is actively using
|
|
298
318
|
# @return [String]
|
|
@@ -397,7 +417,7 @@ module LLM
|
|
|
397
417
|
# @return [Array<LLM::Function::Return>]
|
|
398
418
|
def call_functions
|
|
399
419
|
case concurrency || :call
|
|
400
|
-
when :call then
|
|
420
|
+
when :call then wait(:call)
|
|
401
421
|
when :thread, :task, :fiber, :fork, :ractor, Array then wait(concurrency)
|
|
402
422
|
else raise ArgumentError, "Unknown concurrency: #{concurrency.inspect}. " \
|
|
403
423
|
"Expected :call, :thread, :task, :fiber, :fork, :ractor, " \
|
|
@@ -413,13 +433,13 @@ module LLM
|
|
|
413
433
|
stream.extra[:concurrency] = concurrency if LLM::Stream === stream
|
|
414
434
|
res = @ctx.public_send(method, apply_instructions(prompt), params)
|
|
415
435
|
loop do
|
|
416
|
-
break
|
|
436
|
+
break unless @ctx.functions?
|
|
417
437
|
if max
|
|
418
438
|
max.times do
|
|
419
|
-
break
|
|
439
|
+
break unless @ctx.functions?
|
|
420
440
|
res = @ctx.public_send(method, call_functions, params)
|
|
421
441
|
end
|
|
422
|
-
break
|
|
442
|
+
break unless @ctx.functions?
|
|
423
443
|
res = @ctx.public_send(method, @ctx.functions.map { rate_limit(_1) }, params)
|
|
424
444
|
else
|
|
425
445
|
res = @ctx.public_send(method, call_functions, params)
|
data/lib/llm/context.rb
CHANGED
|
@@ -44,6 +44,11 @@ module LLM
|
|
|
44
44
|
input_tokens: 0,
|
|
45
45
|
output_tokens: 0,
|
|
46
46
|
reasoning_tokens: 0,
|
|
47
|
+
input_audio_tokens: 0,
|
|
48
|
+
output_audio_tokens: 0,
|
|
49
|
+
input_image_tokens: 0,
|
|
50
|
+
cache_read_tokens: 0,
|
|
51
|
+
cache_write_tokens: 0,
|
|
47
52
|
total_tokens: 0
|
|
48
53
|
)
|
|
49
54
|
private_constant :ZERO_USAGE
|
|
@@ -257,18 +262,13 @@ module LLM
|
|
|
257
262
|
end
|
|
258
263
|
|
|
259
264
|
##
|
|
260
|
-
#
|
|
261
|
-
#
|
|
262
|
-
#
|
|
263
|
-
#
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
def call(target)
|
|
268
|
-
case target
|
|
269
|
-
when :functions then guarded_returns || functions.call
|
|
270
|
-
else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
|
|
271
|
-
end
|
|
265
|
+
# Returns whether there is pending tool work in this context.
|
|
266
|
+
# This prefers queued streamed tool work when present, and otherwise
|
|
267
|
+
# falls back to unresolved functions derived from the message history.
|
|
268
|
+
# @return [Boolean]
|
|
269
|
+
def functions?
|
|
270
|
+
pending = queue
|
|
271
|
+
(pending && !pending.empty?) || functions.any?
|
|
272
272
|
end
|
|
273
273
|
|
|
274
274
|
##
|
|
@@ -307,14 +307,15 @@ module LLM
|
|
|
307
307
|
# the context's pending functions directly.
|
|
308
308
|
#
|
|
309
309
|
# @param [Symbol, Array<Symbol>] strategy
|
|
310
|
-
#
|
|
311
|
-
#
|
|
312
|
-
#
|
|
310
|
+
# If the stream queue already has tool work, `wait` will drain it
|
|
311
|
+
# without using this argument.
|
|
312
|
+
# Otherwise, this controls how pending functions are resolved directly.
|
|
313
|
+
# Use `:call` for sequential execution without spawning.
|
|
313
314
|
# @return [Array<LLM::Function::Return>]
|
|
314
315
|
def wait(strategy)
|
|
315
316
|
if LLM::Stream === stream && !stream.queue.empty?
|
|
316
317
|
@queue = stream.queue
|
|
317
|
-
@queue.wait
|
|
318
|
+
@queue.wait
|
|
318
319
|
else
|
|
319
320
|
return guarded_returns if guarded_returns
|
|
320
321
|
@queue = functions.spawn(strategy)
|
|
@@ -350,6 +351,11 @@ module LLM
|
|
|
350
351
|
input_tokens: usage.input_tokens || 0,
|
|
351
352
|
output_tokens: usage.output_tokens || 0,
|
|
352
353
|
reasoning_tokens: usage.reasoning_tokens || 0,
|
|
354
|
+
input_audio_tokens: usage.input_audio_tokens || 0,
|
|
355
|
+
output_audio_tokens: usage.output_audio_tokens || 0,
|
|
356
|
+
input_image_tokens: usage.input_image_tokens || 0,
|
|
357
|
+
cache_read_tokens: usage.cache_read_tokens || 0,
|
|
358
|
+
cache_write_tokens: usage.cache_write_tokens || 0,
|
|
353
359
|
total_tokens: usage.total_tokens || 0
|
|
354
360
|
)
|
|
355
361
|
else
|
|
@@ -414,6 +420,13 @@ module LLM
|
|
|
414
420
|
@llm.tracer
|
|
415
421
|
end
|
|
416
422
|
|
|
423
|
+
##
|
|
424
|
+
# @return [LLM::Stream, #<<, nil]
|
|
425
|
+
# Returns a stream object, or nil
|
|
426
|
+
def stream
|
|
427
|
+
@stream || @params[:stream]
|
|
428
|
+
end
|
|
429
|
+
|
|
417
430
|
##
|
|
418
431
|
# Returns the model a Context is actively using
|
|
419
432
|
# @return [String]
|
|
@@ -458,12 +471,7 @@ module LLM
|
|
|
458
471
|
# Returns an _approximate_ cost for a given context
|
|
459
472
|
# based on both the provider, and model
|
|
460
473
|
def cost
|
|
461
|
-
|
|
462
|
-
input_cost = (cost.input.to_f / 1_000_000.0) * usage.input_tokens
|
|
463
|
-
output_cost = (cost.output.to_f / 1_000_000.0) * usage.output_tokens
|
|
464
|
-
LLM::Cost.new(input_cost, output_cost)
|
|
465
|
-
rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
|
|
466
|
-
LLM::Cost.new(0, 0)
|
|
474
|
+
LLM::Cost.from(self)
|
|
467
475
|
end
|
|
468
476
|
|
|
469
477
|
##
|
|
@@ -499,10 +507,6 @@ module LLM
|
|
|
499
507
|
stream.queue if LLM::Stream === stream
|
|
500
508
|
end
|
|
501
509
|
|
|
502
|
-
def stream
|
|
503
|
-
@stream || @params[:stream]
|
|
504
|
-
end
|
|
505
|
-
|
|
506
510
|
def load_skills(skills)
|
|
507
511
|
[*skills].map { LLM::Skill.load(_1).to_tool(self) }
|
|
508
512
|
end
|
|
@@ -36,6 +36,46 @@ module LLM::Contract
|
|
|
36
36
|
raise NotImplementedError, "#{self.class} does not implement '#{__method__}'"
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
+
##
|
|
40
|
+
# @return [Integer]
|
|
41
|
+
# Returns the number of input audio tokens, or 0 when the
|
|
42
|
+
# provider does not report input audio usage
|
|
43
|
+
def input_audio_tokens
|
|
44
|
+
0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
##
|
|
48
|
+
# @return [Integer]
|
|
49
|
+
# Returns the number of output audio tokens, or 0 when the
|
|
50
|
+
# provider does not report output audio usage
|
|
51
|
+
def output_audio_tokens
|
|
52
|
+
0
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
##
|
|
56
|
+
# @return [Integer]
|
|
57
|
+
# Returns the number of input image tokens, or 0 when the
|
|
58
|
+
# provider does not report input image usage
|
|
59
|
+
def input_image_tokens
|
|
60
|
+
0
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
##
|
|
64
|
+
# @return [Integer]
|
|
65
|
+
# Returns the number of cached input tokens, or 0 when the
|
|
66
|
+
# provider does not report cache usage
|
|
67
|
+
def cache_read_tokens
|
|
68
|
+
0
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
##
|
|
72
|
+
# @return [Integer]
|
|
73
|
+
# Returns the number of cache creation input tokens, or 0 when the
|
|
74
|
+
# provider does not report cache creation usage
|
|
75
|
+
def cache_write_tokens
|
|
76
|
+
0
|
|
77
|
+
end
|
|
78
|
+
|
|
39
79
|
##
|
|
40
80
|
# @return [Integer]
|
|
41
81
|
# Returns the total number of tokens
|
|
@@ -72,6 +112,11 @@ module LLM::Contract
|
|
|
72
112
|
input_tokens:,
|
|
73
113
|
output_tokens:,
|
|
74
114
|
reasoning_tokens:,
|
|
115
|
+
input_audio_tokens:,
|
|
116
|
+
output_audio_tokens:,
|
|
117
|
+
input_image_tokens:,
|
|
118
|
+
cache_read_tokens:,
|
|
119
|
+
cache_write_tokens:,
|
|
75
120
|
total_tokens:
|
|
76
121
|
)
|
|
77
122
|
end
|
data/lib/llm/cost.rb
CHANGED
|
@@ -2,19 +2,96 @@
|
|
|
2
2
|
|
|
3
3
|
##
|
|
4
4
|
# The {LLM::Cost LLM::Cost} class represents an approximate
|
|
5
|
-
# cost breakdown for a provider request. It stores
|
|
6
|
-
#
|
|
5
|
+
# cost breakdown for a provider request. It stores input,
|
|
6
|
+
# output, input audio, output audio, input image, cache read, cache write,
|
|
7
|
+
# and reasoning costs separately and can return the total.
|
|
7
8
|
#
|
|
8
9
|
# @attr [Float] input_costs
|
|
9
10
|
# Returns the input cost
|
|
10
11
|
# @attr [Float] output_costs
|
|
11
12
|
# Returns the output cost
|
|
12
|
-
|
|
13
|
+
# @attr [Float, nil] input_audio_costs
|
|
14
|
+
# Returns the input audio cost, or nil when no input audio tokens
|
|
15
|
+
# were used
|
|
16
|
+
# @attr [Float, nil] output_audio_costs
|
|
17
|
+
# Returns the output audio cost, or nil when no output audio tokens
|
|
18
|
+
# were used
|
|
19
|
+
# @attr [Float, nil] input_image_costs
|
|
20
|
+
# Returns the input image cost, or nil when no input image tokens
|
|
21
|
+
# were used
|
|
22
|
+
# @attr [Float, nil] cache_read_costs
|
|
23
|
+
# Returns the cache read cost, or nil when no cache tokens
|
|
24
|
+
# were used
|
|
25
|
+
# @attr [Float, nil] cache_write_costs
|
|
26
|
+
# Returns the cache write cost, or nil when no cache creation
|
|
27
|
+
# tokens were used
|
|
28
|
+
# @attr [Float, nil] reasoning_costs
|
|
29
|
+
# Returns the reasoning cost, or nil when no reasoning tokens
|
|
30
|
+
# were used
|
|
31
|
+
class LLM::Cost < Struct.new(
|
|
32
|
+
:input_costs, :output_costs,
|
|
33
|
+
:input_audio_costs, :output_audio_costs,
|
|
34
|
+
:cache_read_costs, :cache_write_costs,
|
|
35
|
+
:input_image_costs, :reasoning_costs,
|
|
36
|
+
keyword_init: true
|
|
37
|
+
)
|
|
38
|
+
##
|
|
39
|
+
# Build a cost breakdown from token usage and model pricing.
|
|
40
|
+
# @param [LLM::Context]
|
|
41
|
+
# Context used to resolve provider, model, and token usage
|
|
42
|
+
# @return [LLM::Cost]
|
|
43
|
+
def self.from(ctx)
|
|
44
|
+
pricing = LLM.registry_for(ctx.llm).cost(model: ctx.model)
|
|
45
|
+
new(
|
|
46
|
+
input_costs: price(pricing.input, ctx.usage.input_tokens),
|
|
47
|
+
output_costs: price(pricing.output, ctx.usage.output_tokens),
|
|
48
|
+
input_audio_costs: price(pricing.input_audio, ctx.usage.input_audio_tokens),
|
|
49
|
+
output_audio_costs: price(pricing.output_audio, ctx.usage.output_audio_tokens),
|
|
50
|
+
input_image_costs: price(pricing.input, ctx.usage.input_image_tokens),
|
|
51
|
+
cache_read_costs: price(pricing.cache_read, ctx.usage.cache_read_tokens),
|
|
52
|
+
cache_write_costs: price(pricing.cache_write, ctx.usage.cache_write_tokens),
|
|
53
|
+
reasoning_costs: price(pricing.output, ctx.usage.reasoning_tokens)
|
|
54
|
+
)
|
|
55
|
+
rescue LLM::NoSuchModelError, LLM::NoSuchRegistryError
|
|
56
|
+
new
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
##
|
|
60
|
+
# @api private
|
|
61
|
+
def self.price(rate, tokens)
|
|
62
|
+
return if tokens.nil? || tokens.to_i.zero?
|
|
63
|
+
return if rate.nil? || rate.to_f.zero?
|
|
64
|
+
((rate.to_f / 1_000_000.0) * tokens.to_i).round(12)
|
|
65
|
+
end
|
|
66
|
+
private_class_method :price
|
|
67
|
+
|
|
13
68
|
##
|
|
14
69
|
# @return [Float]
|
|
15
70
|
# Returns the total cost
|
|
16
71
|
def total
|
|
17
|
-
|
|
72
|
+
[
|
|
73
|
+
input_costs, output_costs,
|
|
74
|
+
input_audio_costs, output_audio_costs,
|
|
75
|
+
cache_read_costs, cache_write_costs,
|
|
76
|
+
input_image_costs, reasoning_costs
|
|
77
|
+
].compact.sum.round(12)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
##
|
|
81
|
+
# @return [Hash]
|
|
82
|
+
# Returns a hash with the non-nil cost components and the total
|
|
83
|
+
def to_h
|
|
84
|
+
{
|
|
85
|
+
input: input_costs,
|
|
86
|
+
output: output_costs,
|
|
87
|
+
input_audio: input_audio_costs,
|
|
88
|
+
output_audio: output_audio_costs,
|
|
89
|
+
input_image: input_image_costs,
|
|
90
|
+
cache_read: cache_read_costs,
|
|
91
|
+
cache_write: cache_write_costs,
|
|
92
|
+
reasoning: reasoning_costs,
|
|
93
|
+
total: total
|
|
94
|
+
}.compact
|
|
18
95
|
end
|
|
19
96
|
|
|
20
97
|
##
|
data/lib/llm/error.rb
CHANGED
data/lib/llm/function/array.rb
CHANGED
|
@@ -18,21 +18,23 @@ class LLM::Function
|
|
|
18
18
|
|
|
19
19
|
##
|
|
20
20
|
# Calls all functions in a collection concurrently.
|
|
21
|
-
# This method returns an
|
|
22
|
-
#
|
|
23
|
-
# that can be waited on to access the return values.
|
|
21
|
+
# This method returns an execution group that can be
|
|
22
|
+
# waited on to access the return values.
|
|
24
23
|
#
|
|
25
24
|
# @param [Symbol] strategy
|
|
26
25
|
# Controls concurrency strategy:
|
|
26
|
+
# - `:call`: Call functions sequentially without spawning
|
|
27
27
|
# - `:thread`: Use threads
|
|
28
28
|
# - `:task`: Use async tasks (requires async gem)
|
|
29
29
|
# - `:fiber`: Use scheduler-backed fibers (requires Fiber.scheduler)
|
|
30
30
|
# - `:fork`: Use forked child processes
|
|
31
31
|
# - `:ractor`: Use Ruby ractors (class-based tools only; MCP tools are not supported)
|
|
32
32
|
#
|
|
33
|
-
# @return [LLM::Function::ThreadGroup, LLM::Function::TaskGroup, LLM::Function::FiberGroup, LLM::Function::Ractor::Group]
|
|
33
|
+
# @return [LLM::Function::CallGroup, LLM::Function::ThreadGroup, LLM::Function::TaskGroup, LLM::Function::FiberGroup, LLM::Function::Ractor::Group]
|
|
34
34
|
def spawn(strategy)
|
|
35
35
|
case strategy
|
|
36
|
+
when :call
|
|
37
|
+
CallGroup.new(self)
|
|
36
38
|
when :task
|
|
37
39
|
TaskGroup.new(map { |fn| fn.spawn(:task) })
|
|
38
40
|
when :thread
|
|
@@ -44,7 +46,7 @@ class LLM::Function
|
|
|
44
46
|
when :ractor
|
|
45
47
|
Ractor::Group.new(map { |fn| fn.spawn(:ractor) })
|
|
46
48
|
else
|
|
47
|
-
raise ArgumentError, "Unknown strategy: #{strategy.inspect}. Expected :thread, :task, :fiber, :fork, or :ractor"
|
|
49
|
+
raise ArgumentError, "Unknown strategy: #{strategy.inspect}. Expected :call, :thread, :task, :fiber, :fork, or :ractor"
|
|
48
50
|
end
|
|
49
51
|
end
|
|
50
52
|
|
|
@@ -54,6 +56,7 @@ class LLM::Function
|
|
|
54
56
|
#
|
|
55
57
|
# @param [Symbol] strategy
|
|
56
58
|
# Controls concurrency strategy:
|
|
59
|
+
# - `:call`: Call each function sequentially through a call group
|
|
57
60
|
# - `:thread`: Use threads
|
|
58
61
|
# - `:task`: Use async tasks (requires async gem)
|
|
59
62
|
# - `:fiber`: Use scheduler-backed fibers (requires Fiber.scheduler)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class LLM::Function
|
|
4
|
+
##
|
|
5
|
+
# The {LLM::Function::CallGroup} class wraps an array of
|
|
6
|
+
# {LLM::Function} objects for sequential execution.
|
|
7
|
+
#
|
|
8
|
+
# It provides the same basic interface as the concurrent group
|
|
9
|
+
# wrappers so callers can flow through `spawn(strategy).wait`
|
|
10
|
+
# uniformly, even when the selected strategy is direct calls.
|
|
11
|
+
class CallGroup
|
|
12
|
+
##
|
|
13
|
+
# @param [Array<LLM::Function>] functions
|
|
14
|
+
# @return [LLM::Function::CallGroup]
|
|
15
|
+
def initialize(functions)
|
|
16
|
+
@functions = functions
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
##
|
|
20
|
+
# @return [Boolean]
|
|
21
|
+
def alive?
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
##
|
|
26
|
+
# @return [nil]
|
|
27
|
+
def interrupt!
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
alias_method :cancel!, :interrupt!
|
|
31
|
+
|
|
32
|
+
##
|
|
33
|
+
# @return [Array<LLM::Function::Return>]
|
|
34
|
+
def wait
|
|
35
|
+
@functions.map(&:call)
|
|
36
|
+
end
|
|
37
|
+
alias_method :value, :wait
|
|
38
|
+
end
|
|
39
|
+
end
|
data/lib/llm/function/task.rb
CHANGED
|
@@ -53,6 +53,16 @@ class LLM::Function
|
|
|
53
53
|
end
|
|
54
54
|
alias_method :value, :wait
|
|
55
55
|
|
|
56
|
+
##
|
|
57
|
+
# @return [Class]
|
|
58
|
+
def group_class
|
|
59
|
+
case task
|
|
60
|
+
when Thread then LLM::Function::ThreadGroup
|
|
61
|
+
when Fiber then LLM::Function::FiberGroup
|
|
62
|
+
else LLM::Function::TaskGroup
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
56
66
|
private
|
|
57
67
|
|
|
58
68
|
def scheduler
|
data/lib/llm/function.rb
CHANGED
|
@@ -32,6 +32,7 @@ class LLM::Function
|
|
|
32
32
|
require_relative "function/registry"
|
|
33
33
|
require_relative "function/tracing"
|
|
34
34
|
require_relative "function/array"
|
|
35
|
+
require_relative "function/call_group"
|
|
35
36
|
require_relative "function/task"
|
|
36
37
|
require_relative "function/thread_group"
|
|
37
38
|
require_relative "function/fiber_group"
|
|
@@ -16,12 +16,13 @@ module LLM::MCP::Transport
|
|
|
16
16
|
# Extra headers to send with requests
|
|
17
17
|
# @param [Integer, nil] timeout
|
|
18
18
|
# The timeout in seconds. Defaults to nil
|
|
19
|
+
# @param [LLM::Transport, Class, nil] transport
|
|
20
|
+
# Optional override with any {LLM::Transport} instance or subclass
|
|
19
21
|
# @return [LLM::MCP::Transport::HTTP]
|
|
20
|
-
def initialize(url:, headers: {}, timeout: nil)
|
|
22
|
+
def initialize(url:, headers: {}, timeout: nil, transport: nil)
|
|
21
23
|
@uri = URI.parse(url)
|
|
22
|
-
@use_ssl = @uri.scheme == "https"
|
|
23
24
|
@headers = headers
|
|
24
|
-
@
|
|
25
|
+
@transport = resolve_transport(transport, timeout:)
|
|
25
26
|
@queue = []
|
|
26
27
|
@monitor = Monitor.new
|
|
27
28
|
@running = false
|
|
@@ -61,21 +62,11 @@ module LLM::MCP::Transport
|
|
|
61
62
|
# @return [void]
|
|
62
63
|
def write(message)
|
|
63
64
|
raise LLM::MCP::Error, "MCP transport is not running" unless running?
|
|
64
|
-
req = Net::HTTP::Post.new(uri.
|
|
65
|
+
req = Net::HTTP::Post.new(uri.request_uri, headers.merge("content-type" => "application/json"))
|
|
65
66
|
req.body = LLM.json.dump(message)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
else
|
|
70
|
-
http = persistent_client
|
|
71
|
-
args = [uri, req]
|
|
72
|
-
end
|
|
73
|
-
http.request(*args) do |res|
|
|
74
|
-
unless Net::HTTPSuccess === res
|
|
75
|
-
raise LLM::MCP::Error, "MCP transport write failed with HTTP #{res.code}"
|
|
76
|
-
end
|
|
77
|
-
read(res)
|
|
78
|
-
end
|
|
67
|
+
res = transport.request(req, owner: self) { consume(_1) }
|
|
68
|
+
res = LLM::Transport::Response.from(res)
|
|
69
|
+
raise LLM::MCP::Error, "MCP transport write failed with HTTP #{res.code}" unless res.success?
|
|
79
70
|
end
|
|
80
71
|
|
|
81
72
|
##
|
|
@@ -100,30 +91,27 @@ module LLM::MCP::Transport
|
|
|
100
91
|
@running
|
|
101
92
|
end
|
|
102
93
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
LLM.lock(:mcp) do
|
|
112
|
-
LLM.require "net/http/persistent" unless defined?(Net::HTTP::Persistent)
|
|
113
|
-
unless LLM::MCP.clients.key?(key)
|
|
114
|
-
http = Net::HTTP::Persistent.new(name: self.class.name)
|
|
115
|
-
http.read_timeout = timeout
|
|
116
|
-
http.open_timeout = timeout
|
|
117
|
-
LLM::MCP.clients[key] ||= http
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
self
|
|
94
|
+
private
|
|
95
|
+
|
|
96
|
+
attr_reader :uri, :headers, :transport
|
|
97
|
+
|
|
98
|
+
def consume(res)
|
|
99
|
+
res = LLM::Transport::Response.from(res)
|
|
100
|
+
read(res)
|
|
101
|
+
res
|
|
121
102
|
end
|
|
122
|
-
alias_method :persistent, :persist!
|
|
123
103
|
|
|
124
|
-
|
|
104
|
+
def resolve_transport(transport, timeout:)
|
|
105
|
+
return default_transport(timeout:) if transport.nil?
|
|
106
|
+
if Class === transport && transport <= LLM::Transport
|
|
107
|
+
return transport.new(host: uri.host, port: uri.port, timeout:, ssl: uri.scheme == "https")
|
|
108
|
+
end
|
|
109
|
+
transport
|
|
110
|
+
end
|
|
125
111
|
|
|
126
|
-
|
|
112
|
+
def default_transport(timeout:)
|
|
113
|
+
LLM::Transport::HTTP.new(host: uri.host, port: uri.port, timeout:, ssl: uri.scheme == "https")
|
|
114
|
+
end
|
|
127
115
|
|
|
128
116
|
def read(res)
|
|
129
117
|
if res["content-type"].to_s.include?("text/event-stream")
|
|
@@ -142,14 +130,6 @@ module LLM::MCP::Transport
|
|
|
142
130
|
lock { @queue << message }
|
|
143
131
|
end
|
|
144
132
|
|
|
145
|
-
def persistent_client
|
|
146
|
-
LLM::MCP.clients[key]
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def key
|
|
150
|
-
"#{uri.scheme}:#{uri.host}:#{uri.port}:#{timeout}"
|
|
151
|
-
end
|
|
152
|
-
|
|
153
133
|
def lock(&)
|
|
154
134
|
@monitor.synchronize(&)
|
|
155
135
|
end
|
|
@@ -78,14 +78,6 @@ module LLM::MCP::Transport
|
|
|
78
78
|
command.wait
|
|
79
79
|
end
|
|
80
80
|
|
|
81
|
-
##
|
|
82
|
-
# This method is a no-op for stdio transports
|
|
83
|
-
# @return [LLM::MCP::Transport::Stdio]
|
|
84
|
-
def persist!
|
|
85
|
-
self
|
|
86
|
-
end
|
|
87
|
-
alias_method :persistent, :persist!
|
|
88
|
-
|
|
89
81
|
private
|
|
90
82
|
|
|
91
83
|
attr_reader :command, :stdin, :stdout, :stderr
|