llm.rb 8.1.0 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +120 -2
- data/README.md +161 -514
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/agent.rb +36 -16
- data/lib/llm/context.rb +30 -26
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +1 -0
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/provider.rb +23 -20
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +8 -9
- data/lib/llm/providers/bedrock/models.rb +13 -13
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/sequel/plugin.rb +7 -8
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +4 -4
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +2 -12
- data/llm.gemspec +2 -16
- metadata +11 -19
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
- data/lib/llm/utils.rb +0 -19
|
@@ -5,7 +5,7 @@ class LLM::OpenAI
|
|
|
5
5
|
# @private
|
|
6
6
|
class ErrorHandler
|
|
7
7
|
##
|
|
8
|
-
# @return [
|
|
8
|
+
# @return [LLM::Transport::Response]
|
|
9
9
|
# Non-2XX response from the server
|
|
10
10
|
attr_reader :res
|
|
11
11
|
|
|
@@ -19,13 +19,13 @@ class LLM::OpenAI
|
|
|
19
19
|
# The tracer
|
|
20
20
|
# @param [Object, nil] span
|
|
21
21
|
# The span
|
|
22
|
-
# @param [Net::HTTPResponse] res
|
|
22
|
+
# @param [LLM::Transport::Response, Net::HTTPResponse] res
|
|
23
23
|
# The response from the server
|
|
24
24
|
# @return [LLM::OpenAI::ErrorHandler]
|
|
25
25
|
def initialize(tracer, span, res)
|
|
26
26
|
@tracer = tracer
|
|
27
27
|
@span = span
|
|
28
|
-
@res = res
|
|
28
|
+
@res = LLM::Transport::Response.from(res)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
##
|
|
@@ -49,12 +49,11 @@ class LLM::OpenAI
|
|
|
49
49
|
##
|
|
50
50
|
# @return [LLM::Error]
|
|
51
51
|
def error
|
|
52
|
-
|
|
53
|
-
when Net::HTTPServerError
|
|
52
|
+
if res.server_error?
|
|
54
53
|
LLM::ServerError.new("Server error").tap { _1.response = res }
|
|
55
|
-
|
|
54
|
+
elsif res.unauthorized?
|
|
56
55
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
57
|
-
|
|
56
|
+
elsif res.rate_limited?
|
|
58
57
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
59
58
|
else
|
|
60
59
|
error = body["error"] || {}
|
|
@@ -62,7 +62,7 @@ class LLM::OpenAI
|
|
|
62
62
|
multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), purpose:))
|
|
63
63
|
req = Net::HTTP::Post.new(path("/files"), headers)
|
|
64
64
|
req["content-type"] = multi.content_type
|
|
65
|
-
set_body_stream(req, multi.body)
|
|
65
|
+
transport.set_body_stream(req, multi.body)
|
|
66
66
|
res, span, tracer = execute(request: req, operation: "request")
|
|
67
67
|
res = ResponseAdapter.adapt(res, type: :file)
|
|
68
68
|
tracer.on_request_finish(operation: "request", res:, span:)
|
|
@@ -134,7 +134,7 @@ class LLM::OpenAI
|
|
|
134
134
|
|
|
135
135
|
private
|
|
136
136
|
|
|
137
|
-
[:path, :headers, :execute, :
|
|
137
|
+
[:path, :headers, :execute, :transport].each do |m|
|
|
138
138
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
139
139
|
end
|
|
140
140
|
end
|
|
@@ -78,7 +78,7 @@ class LLM::OpenAI
|
|
|
78
78
|
multi = LLM::Multipart.new(params.merge!(image:, model:, response_format:))
|
|
79
79
|
req = Net::HTTP::Post.new(path("/images/variations"), headers)
|
|
80
80
|
req["content-type"] = multi.content_type
|
|
81
|
-
set_body_stream(req, multi.body)
|
|
81
|
+
transport.set_body_stream(req, multi.body)
|
|
82
82
|
res, span, tracer = execute(request: req, operation: "request")
|
|
83
83
|
res = ResponseAdapter.adapt(res, type: :image)
|
|
84
84
|
tracer.on_request_finish(operation: "request", model:, res:, span:)
|
|
@@ -104,7 +104,7 @@ class LLM::OpenAI
|
|
|
104
104
|
multi = LLM::Multipart.new(params.merge!(image:, prompt:, model:, response_format:))
|
|
105
105
|
req = Net::HTTP::Post.new(path("/images/edits"), headers)
|
|
106
106
|
req["content-type"] = multi.content_type
|
|
107
|
-
set_body_stream(req, multi.body)
|
|
107
|
+
transport.set_body_stream(req, multi.body)
|
|
108
108
|
res, span, tracer = execute(request: req, operation: "request")
|
|
109
109
|
res = ResponseAdapter.adapt(res, type: :image)
|
|
110
110
|
tracer.on_request_finish(operation: "request", model:, res:, span:)
|
|
@@ -113,7 +113,7 @@ class LLM::OpenAI
|
|
|
113
113
|
|
|
114
114
|
private
|
|
115
115
|
|
|
116
|
-
[:path, :headers, :execute, :
|
|
116
|
+
[:path, :headers, :execute, :transport].each do |m|
|
|
117
117
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
118
118
|
end
|
|
119
119
|
end
|
|
@@ -40,6 +40,48 @@ module LLM::OpenAI::ResponseAdapter
|
|
|
40
40
|
&.reasoning_tokens || 0
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
+
##
|
|
44
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
45
|
+
def input_audio_tokens
|
|
46
|
+
body
|
|
47
|
+
.usage
|
|
48
|
+
&.prompt_tokens_details
|
|
49
|
+
&.audio_tokens || 0
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
##
|
|
53
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
54
|
+
def output_audio_tokens
|
|
55
|
+
body
|
|
56
|
+
.usage
|
|
57
|
+
&.completion_tokens_details
|
|
58
|
+
&.audio_tokens || 0
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
##
|
|
62
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
63
|
+
def input_image_tokens
|
|
64
|
+
body
|
|
65
|
+
.usage
|
|
66
|
+
&.prompt_tokens_details
|
|
67
|
+
&.image_tokens || 0
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
##
|
|
71
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
72
|
+
def cache_read_tokens
|
|
73
|
+
body
|
|
74
|
+
.usage
|
|
75
|
+
&.prompt_tokens_details
|
|
76
|
+
&.cached_tokens || 0
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
##
|
|
80
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
81
|
+
def cache_write_tokens
|
|
82
|
+
0
|
|
83
|
+
end
|
|
84
|
+
|
|
43
85
|
##
|
|
44
86
|
# (see LLM::Contract::Completion#total_tokens)
|
|
45
87
|
def total_tokens
|
|
@@ -42,6 +42,45 @@ module LLM::OpenAI::ResponseAdapter
|
|
|
42
42
|
&.reasoning_tokens || 0
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
+
##
|
|
46
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
47
|
+
def input_audio_tokens
|
|
48
|
+
body
|
|
49
|
+
.usage
|
|
50
|
+
&.input_tokens_details
|
|
51
|
+
&.audio_tokens || 0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
##
|
|
55
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
56
|
+
def output_audio_tokens
|
|
57
|
+
body
|
|
58
|
+
.usage
|
|
59
|
+
&.output_tokens_details
|
|
60
|
+
&.audio_tokens || 0
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
##
|
|
64
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
65
|
+
def input_image_tokens
|
|
66
|
+
super
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
##
|
|
70
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
71
|
+
def cache_read_tokens
|
|
72
|
+
body
|
|
73
|
+
.usage
|
|
74
|
+
&.input_tokens_details
|
|
75
|
+
&.cached_tokens || 0
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
##
|
|
79
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
80
|
+
def cache_write_tokens
|
|
81
|
+
0
|
|
82
|
+
end
|
|
83
|
+
|
|
45
84
|
##
|
|
46
85
|
# (see LLM::Contract::Completion#total_tokens)
|
|
47
86
|
def total_tokens
|
|
@@ -44,7 +44,7 @@ class LLM::OpenAI
|
|
|
44
44
|
messages = build_complete_messages(prompt, params, role)
|
|
45
45
|
@provider.tracer.set_request_metadata(user_input: extract_user_input(messages, fallback: prompt))
|
|
46
46
|
body = LLM.json.dump({input: [adapt(messages, mode: :response)].flatten}.merge!(params))
|
|
47
|
-
set_body_stream(req, StringIO.new(body))
|
|
47
|
+
transport.set_body_stream(req, StringIO.new(body))
|
|
48
48
|
res, span, tracer = execute(request: req, stream:, stream_parser:, operation: "chat", model: params[:model])
|
|
49
49
|
res = ResponseAdapter.adapt(res, type: :responds)
|
|
50
50
|
.extend(Module.new { define_method(:__tools__) { tools } })
|
|
@@ -85,7 +85,7 @@ class LLM::OpenAI
|
|
|
85
85
|
|
|
86
86
|
private
|
|
87
87
|
|
|
88
|
-
[:path, :headers, :execute, :
|
|
88
|
+
[:path, :headers, :execute, :transport, :resolve_tools].each do |m|
|
|
89
89
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
90
90
|
end
|
|
91
91
|
|
data/lib/llm/providers/openai.rb
CHANGED
|
@@ -223,7 +223,7 @@ module LLM
|
|
|
223
223
|
messages = build_complete_messages(prompt, params, role)
|
|
224
224
|
body = LLM.json.dump({messages: adapt(messages, mode: :complete).flatten}.merge!(params))
|
|
225
225
|
req = Net::HTTP::Post.new(completions_path, headers)
|
|
226
|
-
set_body_stream(req, StringIO.new(body))
|
|
226
|
+
transport.set_body_stream(req, StringIO.new(body))
|
|
227
227
|
[req, messages]
|
|
228
228
|
end
|
|
229
229
|
|
data/lib/llm/response.rb
CHANGED
|
@@ -10,25 +10,27 @@ module LLM
|
|
|
10
10
|
# handling can share one common surface without flattening away
|
|
11
11
|
# specialized behavior.
|
|
12
12
|
#
|
|
13
|
-
# The normalized response
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
13
|
+
# The normalized response keeps the transport response available
|
|
14
|
+
# through {#res}. When the default net/http transport is in use,
|
|
15
|
+
# {LLM::Transport::Response::HTTP
|
|
16
|
+
# LLM::Transport::Response::HTTP} keeps the
|
|
17
|
+
# original {Net::HTTPResponse Net::HTTPResponse} available through
|
|
18
|
+
# its own {LLM::Transport::Response::HTTP#res #res}.
|
|
17
19
|
class Response
|
|
18
20
|
require "json"
|
|
19
21
|
|
|
20
22
|
##
|
|
21
23
|
# Returns the HTTP response
|
|
22
|
-
# @return [
|
|
24
|
+
# @return [LLM::Transport::Response]
|
|
23
25
|
attr_reader :res
|
|
24
26
|
|
|
25
27
|
##
|
|
26
|
-
# @param [
|
|
28
|
+
# @param [LLM::Transport::Response] res
|
|
27
29
|
# HTTP response
|
|
28
30
|
# @return [LLM::Response]
|
|
29
31
|
# Returns an instance of LLM::Response
|
|
30
32
|
def initialize(res)
|
|
31
|
-
@res = res
|
|
33
|
+
@res = LLM::Transport::Response.from(res)
|
|
32
34
|
end
|
|
33
35
|
|
|
34
36
|
##
|
|
@@ -51,7 +53,7 @@ module LLM
|
|
|
51
53
|
# Returns true if the response is successful
|
|
52
54
|
# @return [Boolean]
|
|
53
55
|
def ok?
|
|
54
|
-
|
|
56
|
+
@res.success?
|
|
55
57
|
end
|
|
56
58
|
|
|
57
59
|
##
|
data/lib/llm/sequel/plugin.rb
CHANGED
|
@@ -184,14 +184,6 @@ module LLM::Sequel
|
|
|
184
184
|
ctx.wait(...)
|
|
185
185
|
end
|
|
186
186
|
|
|
187
|
-
##
|
|
188
|
-
# Calls into the stored context.
|
|
189
|
-
# @see LLM::Context#call
|
|
190
|
-
# @return [Object]
|
|
191
|
-
def call(...)
|
|
192
|
-
ctx.call(...)
|
|
193
|
-
end
|
|
194
|
-
|
|
195
187
|
##
|
|
196
188
|
# @see LLM::Context#mode
|
|
197
189
|
# @return [Symbol]
|
|
@@ -222,6 +214,13 @@ module LLM::Sequel
|
|
|
222
214
|
ctx.functions
|
|
223
215
|
end
|
|
224
216
|
|
|
217
|
+
##
|
|
218
|
+
# @see LLM::Context#functions?
|
|
219
|
+
# @return [Boolean]
|
|
220
|
+
def functions?
|
|
221
|
+
ctx.functions?
|
|
222
|
+
end
|
|
223
|
+
|
|
225
224
|
##
|
|
226
225
|
# @see LLM::Context#returns
|
|
227
226
|
# @return [Array<LLM::Function::Return>]
|
data/lib/llm/stream/queue.rb
CHANGED
|
@@ -4,7 +4,7 @@ class LLM::Stream
|
|
|
4
4
|
##
|
|
5
5
|
# A small queue for collecting streamed tool work. Values can be immediate
|
|
6
6
|
# {LLM::Function::Return} objects or concurrent handles returned by
|
|
7
|
-
# {LLM::Function#spawn}. Calling {#wait
|
|
7
|
+
# {LLM::Function#spawn}. Calling {#wait} resolves queued work and
|
|
8
8
|
# returns an array of {LLM::Function::Return} values.
|
|
9
9
|
class Queue
|
|
10
10
|
##
|
|
@@ -41,56 +41,29 @@ class LLM::Stream
|
|
|
41
41
|
|
|
42
42
|
##
|
|
43
43
|
# Waits for queued work to finish and returns function results.
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
#
|
|
47
|
-
#
|
|
48
|
-
# - `:task`: Use async tasks (requires async gem)
|
|
49
|
-
# - `:fiber`: Use scheduler-backed fibers (requires Fiber.scheduler)
|
|
50
|
-
# - `:ractor`: Use Ruby ractors (class-based tools only; MCP tools are not supported)
|
|
51
|
-
# - `[:thread, :ractor]`: Wait for any queued thread or ractor work, in the
|
|
52
|
-
# given order. This is useful when different tools were spawned with
|
|
53
|
-
# different concurrency strategies.
|
|
44
|
+
#
|
|
45
|
+
# Queued work is waited according to the actual task types that were
|
|
46
|
+
# enqueued, so callers do not need to provide a strategy here.
|
|
47
|
+
#
|
|
54
48
|
# @return [Array<LLM::Function::Return>]
|
|
55
|
-
def wait
|
|
49
|
+
def wait
|
|
56
50
|
returns, tasks = @items.shift(@items.length).partition { LLM::Function::Return === _1 }
|
|
57
|
-
results = wait_tasks(tasks
|
|
51
|
+
results = wait_tasks(tasks)
|
|
58
52
|
returns.concat fire_hooks(tasks, results)
|
|
59
53
|
end
|
|
60
54
|
alias_method :value, :wait
|
|
61
55
|
|
|
62
56
|
private
|
|
63
57
|
|
|
64
|
-
def wait_tasks(tasks
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
strategies.flat_map do |name|
|
|
72
|
-
selected = grouped.fetch(name)
|
|
73
|
-
selected.empty? ? [] : wait_group(selected, name)
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def wait_group(tasks, strategy)
|
|
78
|
-
case strategy
|
|
79
|
-
when :thread then LLM::Function::ThreadGroup.new(tasks).wait
|
|
80
|
-
when :task then LLM::Function::TaskGroup.new(tasks).wait
|
|
81
|
-
when :fiber then LLM::Function::FiberGroup.new(tasks).wait
|
|
82
|
-
when :ractor then LLM::Function::Ractor::Group.new(tasks).wait
|
|
83
|
-
else raise ArgumentError, "Unknown strategy: #{strategy.inspect}. Expected :thread, :task, :fiber, or :ractor"
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
def task_strategy(task)
|
|
88
|
-
case task.task
|
|
89
|
-
when Thread then :thread
|
|
90
|
-
when Fiber then :fiber
|
|
91
|
-
when LLM::Function::Ractor::Task then :ractor
|
|
92
|
-
else :task
|
|
58
|
+
def wait_tasks(tasks)
|
|
59
|
+
return [] if tasks.empty?
|
|
60
|
+
results = {}
|
|
61
|
+
grouped_tasks = tasks.group_by(&:group_class)
|
|
62
|
+
grouped_tasks.each do |group_class, group|
|
|
63
|
+
returns = group_class.new(group).wait
|
|
64
|
+
returns.each.with_index { results[group[_2]] = _1 }
|
|
93
65
|
end
|
|
66
|
+
tasks.map { results[_1] }
|
|
94
67
|
end
|
|
95
68
|
|
|
96
69
|
def fire_hooks(tasks, results)
|
data/lib/llm/stream.rb
CHANGED
|
@@ -46,11 +46,11 @@ module LLM
|
|
|
46
46
|
|
|
47
47
|
##
|
|
48
48
|
# Waits for queued tool work to finish and returns function results.
|
|
49
|
-
#
|
|
50
|
-
#
|
|
49
|
+
# Any passed arguments are ignored because queued work is waited according
|
|
50
|
+
# to the actual task types already present in the queue.
|
|
51
51
|
# @return [Array<LLM::Function::Return>]
|
|
52
|
-
def wait(
|
|
53
|
-
queue.wait
|
|
52
|
+
def wait(*)
|
|
53
|
+
queue.wait
|
|
54
54
|
end
|
|
55
55
|
|
|
56
56
|
# @group Public callbacks
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class LLM::Transport
|
|
4
|
+
##
|
|
5
|
+
# Internal request execution methods for {LLM::Provider}.
|
|
6
|
+
#
|
|
7
|
+
# This module handles provider-side transport execution, response
|
|
8
|
+
# parsing, streaming, and request body setup.
|
|
9
|
+
#
|
|
10
|
+
# @api private
|
|
11
|
+
module Execution
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
##
|
|
15
|
+
# Executes a HTTP request
|
|
16
|
+
# @param [Net::HTTPRequest] request
|
|
17
|
+
# The request to send
|
|
18
|
+
# @param [Proc] b
|
|
19
|
+
# A block to yield the response to (optional)
|
|
20
|
+
# @return [LLM::Transport::Response]
|
|
21
|
+
# The response from the server
|
|
22
|
+
# @raise [LLM::Error::Unauthorized]
|
|
23
|
+
# When authentication fails
|
|
24
|
+
# @raise [LLM::Error::RateLimit]
|
|
25
|
+
# When the rate limit is exceeded
|
|
26
|
+
# @raise [LLM::Error]
|
|
27
|
+
# When any other unsuccessful status code is returned
|
|
28
|
+
# @raise [SystemCallError]
|
|
29
|
+
# When there is a network error at the operating system level
|
|
30
|
+
# @return [LLM::Transport::Response]
|
|
31
|
+
def execute(request:, operation:, stream: nil, stream_parser: self.stream_parser, model: nil, inputs: nil, &b)
|
|
32
|
+
stream &&= LLM::Object.from(streamer: stream, parser: stream_parser, decoder: stream_decoder)
|
|
33
|
+
owner = transport.request_owner
|
|
34
|
+
tracer = self.tracer
|
|
35
|
+
span = tracer.on_request_start(operation:, model:, inputs:)
|
|
36
|
+
res = transport.request(request, owner:, stream:, &b)
|
|
37
|
+
res = LLM::Transport::Response.from(res)
|
|
38
|
+
[handle_response(res, tracer, span), span, tracer]
|
|
39
|
+
rescue *transport.interrupt_errors
|
|
40
|
+
raise LLM::Interrupt, "request interrupted" if transport.interrupted?(owner)
|
|
41
|
+
raise
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
##
|
|
45
|
+
# Handles the response from a request
|
|
46
|
+
# @param [LLM::Transport::Response] res
|
|
47
|
+
# The response to handle
|
|
48
|
+
# @param [Object, nil] span
|
|
49
|
+
# The span
|
|
50
|
+
# @return [LLM::Transport::Response]
|
|
51
|
+
def handle_response(res, tracer, span)
|
|
52
|
+
res.ok? ? res.body = parse_response(res) : error_handler.new(tracer, span, res).raise_error!
|
|
53
|
+
res
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
##
|
|
57
|
+
# Parse a HTTP response
|
|
58
|
+
# @param [LLM::Transport::Response] res
|
|
59
|
+
# @return [LLM::Object, String]
|
|
60
|
+
def parse_response(res)
|
|
61
|
+
case res["content-type"]
|
|
62
|
+
when %r{\Aapplication/json\s*} then LLM::Object.from(LLM.json.load(res.body))
|
|
63
|
+
else res.body
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
|
|
5
|
+
class LLM::Transport
|
|
6
|
+
##
|
|
7
|
+
# The {LLM::Transport::HTTP LLM::Transport::HTTP} transport is the
|
|
8
|
+
# built-in adapter for Ruby's {Net::HTTP Net::HTTP}. It manages
|
|
9
|
+
# transient HTTP connections, tracks active requests by owner, and
|
|
10
|
+
# interrupts in-flight requests when needed.
|
|
11
|
+
#
|
|
12
|
+
# @api private
|
|
13
|
+
class HTTP < self
|
|
14
|
+
INTERRUPT_ERRORS = [::IOError, ::EOFError, Errno::EBADF].freeze
|
|
15
|
+
Request = Struct.new(:client, keyword_init: true)
|
|
16
|
+
|
|
17
|
+
##
|
|
18
|
+
# @param [String] host
|
|
19
|
+
# @param [Integer] port
|
|
20
|
+
# @param [Integer] timeout
|
|
21
|
+
# @param [Boolean] ssl
|
|
22
|
+
# @return [LLM::Transport::HTTP]
|
|
23
|
+
def initialize(host:, port:, timeout:, ssl:)
|
|
24
|
+
@host = host
|
|
25
|
+
@port = port
|
|
26
|
+
@timeout = timeout
|
|
27
|
+
@ssl = ssl
|
|
28
|
+
@base_uri = URI("#{ssl ? "https" : "http"}://#{host}:#{port}/")
|
|
29
|
+
@monitor = Monitor.new
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
##
|
|
33
|
+
# Returns the current request owner.
|
|
34
|
+
# @return [Object]
|
|
35
|
+
def request_owner
|
|
36
|
+
return Fiber.current unless defined?(::Async)
|
|
37
|
+
Async::Task.current? ? Async::Task.current : Fiber.current
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
##
|
|
41
|
+
# @return [Array<Class<Exception>>]
|
|
42
|
+
def interrupt_errors
|
|
43
|
+
[*INTERRUPT_ERRORS, *optional_interrupt_errors]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
##
|
|
47
|
+
# Interrupt an active request, if any.
|
|
48
|
+
# @param [Fiber] owner
|
|
49
|
+
# @return [nil]
|
|
50
|
+
def interrupt!(owner)
|
|
51
|
+
req = request_for(owner) or return
|
|
52
|
+
lock { (@interrupts ||= {})[owner] = true }
|
|
53
|
+
close_socket(req.client)
|
|
54
|
+
req.client.finish if req.client.active?
|
|
55
|
+
owner.stop if owner.respond_to?(:stop)
|
|
56
|
+
rescue *interrupt_errors
|
|
57
|
+
nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
##
|
|
61
|
+
# Returns whether an execution owner was interrupted.
|
|
62
|
+
# @param [Fiber] owner
|
|
63
|
+
# @return [Boolean, nil]
|
|
64
|
+
def interrupted?(owner)
|
|
65
|
+
lock { @interrupts&.delete(owner) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
##
|
|
69
|
+
# Performs a request on the current HTTP transport.
|
|
70
|
+
# @param [Net::HTTPRequest] request
|
|
71
|
+
# @param [Fiber] owner
|
|
72
|
+
# @param [LLM::Object, nil] stream
|
|
73
|
+
# @yieldparam [LLM::Transport::Response] response
|
|
74
|
+
# @return [Object]
|
|
75
|
+
def request(request, owner:, stream: nil, &b)
|
|
76
|
+
client = client()
|
|
77
|
+
set_request(Request.new(client:), owner)
|
|
78
|
+
perform_request(client, request, stream, &b)
|
|
79
|
+
ensure
|
|
80
|
+
clear_request(owner)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
##
|
|
84
|
+
# @return [String]
|
|
85
|
+
def inspect
|
|
86
|
+
"#<#{self.class.name}:0x#{object_id.to_s(16)}>"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
attr_reader :host, :port, :timeout, :ssl, :base_uri
|
|
92
|
+
|
|
93
|
+
def client
|
|
94
|
+
client = Net::HTTP.new(host, port)
|
|
95
|
+
client.read_timeout = timeout
|
|
96
|
+
client.use_ssl = ssl
|
|
97
|
+
client
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def close_socket(http)
|
|
101
|
+
socket = http&.instance_variable_get(:@socket) or return
|
|
102
|
+
socket = socket.io if socket.respond_to?(:io)
|
|
103
|
+
socket.close
|
|
104
|
+
rescue *interrupt_errors
|
|
105
|
+
nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def request_for(owner)
|
|
109
|
+
lock do
|
|
110
|
+
@requests ||= {}
|
|
111
|
+
@requests[owner]
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def set_request(req, owner)
|
|
116
|
+
lock do
|
|
117
|
+
@requests ||= {}
|
|
118
|
+
@requests[owner] = req
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def clear_request(owner)
|
|
123
|
+
lock { @requests&.delete(owner) }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def lock(&)
|
|
127
|
+
@monitor.synchronize(&)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def optional_interrupt_errors
|
|
131
|
+
defined?(::Async::Stop) ? [Async::Stop] : []
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|