llm.rb 8.1.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +196 -6
- data/README.md +233 -518
- data/data/anthropic.json +278 -258
- data/data/bedrock.json +1288 -1561
- data/data/deepseek.json +38 -38
- data/data/google.json +656 -579
- data/data/openai.json +860 -818
- data/data/xai.json +243 -552
- data/data/zai.json +168 -168
- data/lib/llm/active_record/acts_as_agent.rb +5 -0
- data/lib/llm/active_record/acts_as_llm.rb +7 -8
- data/lib/llm/active_record.rb +1 -6
- data/lib/llm/agent.rb +121 -82
- data/lib/llm/context.rb +79 -74
- data/lib/llm/contract/completion.rb +45 -0
- data/lib/llm/cost.rb +81 -4
- data/lib/llm/error.rb +1 -1
- data/lib/llm/function/array.rb +8 -5
- data/lib/llm/function/call_group.rb +39 -0
- data/lib/llm/function/call_task.rb +46 -0
- data/lib/llm/function/fork/task.rb +6 -0
- data/lib/llm/function/ractor/task.rb +6 -0
- data/lib/llm/function/task.rb +10 -0
- data/lib/llm/function.rb +28 -1
- data/lib/llm/mcp/transport/http.rb +26 -46
- data/lib/llm/mcp/transport/stdio.rb +0 -8
- data/lib/llm/mcp.rb +6 -23
- data/lib/llm/provider.rb +30 -20
- data/lib/llm/providers/anthropic/error_handler.rb +6 -7
- data/lib/llm/providers/anthropic/files.rb +2 -2
- data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
- data/lib/llm/providers/anthropic.rb +1 -1
- data/lib/llm/providers/bedrock/error_handler.rb +8 -9
- data/lib/llm/providers/bedrock/models.rb +13 -13
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
- data/lib/llm/providers/bedrock.rb +1 -1
- data/lib/llm/providers/google/error_handler.rb +6 -7
- data/lib/llm/providers/google/files.rb +2 -4
- data/lib/llm/providers/google/images.rb +1 -1
- data/lib/llm/providers/google/models.rb +0 -2
- data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/google/stream_parser.rb +2 -2
- data/lib/llm/providers/google.rb +1 -1
- data/lib/llm/providers/ollama/error_handler.rb +6 -7
- data/lib/llm/providers/ollama/models.rb +0 -2
- data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
- data/lib/llm/providers/ollama.rb +1 -1
- data/lib/llm/providers/openai/audio.rb +3 -3
- data/lib/llm/providers/openai/error_handler.rb +6 -7
- data/lib/llm/providers/openai/files.rb +2 -2
- data/lib/llm/providers/openai/images.rb +3 -3
- data/lib/llm/providers/openai/models.rb +1 -1
- data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
- data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
- data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
- data/lib/llm/providers/openai/responses.rb +2 -2
- data/lib/llm/providers/openai/stream_parser.rb +2 -2
- data/lib/llm/providers/openai/vector_stores.rb +1 -1
- data/lib/llm/providers/openai.rb +1 -1
- data/lib/llm/response.rb +10 -8
- data/lib/llm/schema.rb +11 -0
- data/lib/llm/sequel/agent.rb +5 -0
- data/lib/llm/sequel/plugin.rb +8 -14
- data/lib/llm/stream/queue.rb +15 -42
- data/lib/llm/stream.rb +15 -40
- data/lib/llm/tool/param.rb +1 -8
- data/lib/llm/transport/execution.rb +67 -0
- data/lib/llm/transport/http.rb +134 -0
- data/lib/llm/transport/persistent_http.rb +152 -0
- data/lib/llm/transport/response/http.rb +113 -0
- data/lib/llm/transport/response.rb +112 -0
- data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
- data/lib/llm/transport.rb +139 -0
- data/lib/llm/usage.rb +14 -5
- data/lib/llm/utils.rb +24 -14
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +3 -12
- data/llm.gemspec +2 -16
- metadata +13 -20
- data/lib/llm/bot.rb +0 -3
- data/lib/llm/provider/transport/http/execution.rb +0 -115
- data/lib/llm/provider/transport/http/interruptible.rb +0 -114
- data/lib/llm/provider/transport/http.rb +0 -145
|
@@ -27,6 +27,36 @@ module LLM::Anthropic::ResponseAdapter
|
|
|
27
27
|
0
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
##
|
|
31
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
32
|
+
def input_audio_tokens
|
|
33
|
+
super
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
##
|
|
37
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
38
|
+
def output_audio_tokens
|
|
39
|
+
super
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
##
|
|
43
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
44
|
+
def input_image_tokens
|
|
45
|
+
super
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
##
|
|
49
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
50
|
+
def cache_read_tokens
|
|
51
|
+
body.usage&.cache_read_input_tokens || 0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
##
|
|
55
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
56
|
+
def cache_write_tokens
|
|
57
|
+
body.usage&.cache_creation_input_tokens || 0
|
|
58
|
+
end
|
|
59
|
+
|
|
30
60
|
##
|
|
31
61
|
# (see LLM::Contract::Completion#total_tokens)
|
|
32
62
|
def total_tokens
|
|
@@ -105,14 +105,14 @@ class LLM::Anthropic
|
|
|
105
105
|
end
|
|
106
106
|
|
|
107
107
|
def resolve_tool(tool)
|
|
108
|
-
registered = @stream.
|
|
108
|
+
registered = @stream.__find__(tool["name"])
|
|
109
109
|
fn = (registered || LLM::Function.new(tool["name"])).dup.tap do |fn|
|
|
110
110
|
fn.id = tool["id"]
|
|
111
111
|
fn.arguments = LLM::Anthropic.parse_tool_input(tool["input"])
|
|
112
112
|
fn.tracer = @stream.extra[:tracer]
|
|
113
113
|
fn.model = @stream.extra[:model]
|
|
114
114
|
end
|
|
115
|
-
[fn, (registered ? nil :
|
|
115
|
+
[fn, (registered ? nil : fn.unavailable)]
|
|
116
116
|
end
|
|
117
117
|
end
|
|
118
118
|
end
|
|
@@ -161,7 +161,7 @@ module LLM
|
|
|
161
161
|
payload = adapt(messages)
|
|
162
162
|
body = LLM.json.dump(payload.merge!(params))
|
|
163
163
|
req = Net::HTTP::Post.new("/v1/messages", headers)
|
|
164
|
-
set_body_stream(req, StringIO.new(body))
|
|
164
|
+
transport.set_body_stream(req, StringIO.new(body))
|
|
165
165
|
req
|
|
166
166
|
end
|
|
167
167
|
|
|
@@ -11,7 +11,7 @@ class LLM::Bedrock
|
|
|
11
11
|
# @api private
|
|
12
12
|
class ErrorHandler
|
|
13
13
|
##
|
|
14
|
-
# @return [
|
|
14
|
+
# @return [LLM::Transport::Response]
|
|
15
15
|
attr_reader :res
|
|
16
16
|
|
|
17
17
|
##
|
|
@@ -21,12 +21,12 @@ class LLM::Bedrock
|
|
|
21
21
|
##
|
|
22
22
|
# @param [LLM::Tracer] tracer
|
|
23
23
|
# @param [Object, nil] span
|
|
24
|
-
# @param [Net::HTTPResponse] res
|
|
24
|
+
# @param [LLM::Transport::Response, Net::HTTPResponse] res
|
|
25
25
|
# @return [LLM::Bedrock::ErrorHandler]
|
|
26
26
|
def initialize(tracer, span, res)
|
|
27
27
|
@tracer = tracer
|
|
28
28
|
@span = span
|
|
29
|
-
@res = res
|
|
29
|
+
@res = LLM::Transport::Response.from(res)
|
|
30
30
|
end
|
|
31
31
|
|
|
32
32
|
##
|
|
@@ -44,16 +44,15 @@ class LLM::Bedrock
|
|
|
44
44
|
# @return [LLM::Error]
|
|
45
45
|
def error
|
|
46
46
|
message = extract_message
|
|
47
|
-
|
|
48
|
-
when Net::HTTPServerError
|
|
47
|
+
if res.server_error?
|
|
49
48
|
LLM::ServerError.new(message).tap { _1.response = res }
|
|
50
|
-
|
|
49
|
+
elsif res.unauthorized?
|
|
51
50
|
LLM::UnauthorizedError.new(message).tap { _1.response = res }
|
|
52
|
-
|
|
51
|
+
elsif res.forbidden?
|
|
53
52
|
LLM::UnauthorizedError.new(message).tap { _1.response = res }
|
|
54
|
-
|
|
53
|
+
elsif res.rate_limited?
|
|
55
54
|
LLM::RateLimitError.new(message).tap { _1.response = res }
|
|
56
|
-
|
|
55
|
+
elsif res.not_found?
|
|
57
56
|
LLM::Error.new("Bedrock model not found: #{message}").tap { _1.response = res }
|
|
58
57
|
else
|
|
59
58
|
LLM::Error.new(message).tap { _1.response = res }
|
|
@@ -8,8 +8,9 @@ class LLM::Bedrock
|
|
|
8
8
|
#
|
|
9
9
|
# Unlike the Converse API (which lives on `bedrock-runtime.<region>.amazonaws.com`),
|
|
10
10
|
# the models endpoint lives on the control plane at
|
|
11
|
-
# `bedrock.<region>.amazonaws.com`. This class
|
|
12
|
-
#
|
|
11
|
+
# `bedrock.<region>.amazonaws.com`. This class builds a matching
|
|
12
|
+
# transport for the control-plane host from the provider's current
|
|
13
|
+
# transport class.
|
|
13
14
|
#
|
|
14
15
|
# @example
|
|
15
16
|
# llm = LLM.bedrock(
|
|
@@ -39,19 +40,18 @@ class LLM::Bedrock
|
|
|
39
40
|
# @return [LLM::Response]
|
|
40
41
|
def all(**params)
|
|
41
42
|
host = credentials.host
|
|
42
|
-
|
|
43
|
+
req = build_request(host, params)
|
|
44
|
+
res = build_transport(host).request(req, owner: self)
|
|
45
|
+
handle_response(res)
|
|
43
46
|
end
|
|
44
47
|
|
|
45
48
|
private
|
|
46
49
|
|
|
47
50
|
##
|
|
48
51
|
# @param [String] host
|
|
49
|
-
# @return [
|
|
50
|
-
def
|
|
51
|
-
|
|
52
|
-
http.use_ssl = true
|
|
53
|
-
http.read_timeout = timeout
|
|
54
|
-
http
|
|
52
|
+
# @return [LLM::Transport]
|
|
53
|
+
def build_transport(host)
|
|
54
|
+
transport.class.new(host:, port: 443, timeout:, ssl: true)
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
##
|
|
@@ -68,12 +68,12 @@ class LLM::Bedrock
|
|
|
68
68
|
end
|
|
69
69
|
|
|
70
70
|
##
|
|
71
|
-
# @param [Net::HTTPResponse] res
|
|
71
|
+
# @param [LLM::Transport::Response, Net::HTTPResponse] res
|
|
72
72
|
# @return [LLM::Response]
|
|
73
73
|
# @raise [LLM::Error]
|
|
74
74
|
def handle_response(res)
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
res = LLM::Transport::Response.from(res)
|
|
76
|
+
if res.success?
|
|
77
77
|
res.body = LLM::Object.from(LLM.json.load(res.body || "{}"))
|
|
78
78
|
LLM::Bedrock::ResponseAdapter.adapt(res, type: :models)
|
|
79
79
|
else
|
|
@@ -102,7 +102,7 @@ class LLM::Bedrock
|
|
|
102
102
|
end
|
|
103
103
|
end
|
|
104
104
|
|
|
105
|
-
[:timeout, :tracer].each do |m|
|
|
105
|
+
[:timeout, :tracer, :transport].each do |m|
|
|
106
106
|
define_method(m) { @provider.send(m) }
|
|
107
107
|
end
|
|
108
108
|
end
|
|
@@ -56,6 +56,36 @@ module LLM::Bedrock::ResponseAdapter
|
|
|
56
56
|
0
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
+
##
|
|
60
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
61
|
+
def input_audio_tokens
|
|
62
|
+
super
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
##
|
|
66
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
67
|
+
def output_audio_tokens
|
|
68
|
+
super
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
##
|
|
72
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
73
|
+
def input_image_tokens
|
|
74
|
+
super
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
##
|
|
78
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
79
|
+
def cache_read_tokens
|
|
80
|
+
0
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
##
|
|
84
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
85
|
+
def cache_write_tokens
|
|
86
|
+
0
|
|
87
|
+
end
|
|
88
|
+
|
|
59
89
|
##
|
|
60
90
|
# (see LLM::Contract::Completion#total_tokens)
|
|
61
91
|
def total_tokens
|
|
@@ -184,14 +184,14 @@ class LLM::Bedrock
|
|
|
184
184
|
|
|
185
185
|
def resolve_tool(tool)
|
|
186
186
|
payload = tool["toolUse"] || {}
|
|
187
|
-
registered = @stream.
|
|
187
|
+
registered = @stream.__find__(payload["name"])
|
|
188
188
|
fn = (registered || LLM::Function.new(payload["name"])).dup.tap do |f|
|
|
189
189
|
f.id = payload["toolUseId"]
|
|
190
190
|
f.arguments = payload["input"] || {}
|
|
191
191
|
f.tracer = @stream.extra[:tracer]
|
|
192
192
|
f.model = @stream.extra[:model]
|
|
193
193
|
end
|
|
194
|
-
[fn, registered ? nil :
|
|
194
|
+
[fn, registered ? nil : fn.unavailable]
|
|
195
195
|
end
|
|
196
196
|
|
|
197
197
|
def content
|
|
@@ -218,7 +218,7 @@ module LLM
|
|
|
218
218
|
path = stream ? "/model/#{model_id}/converse-stream" \
|
|
219
219
|
: "/model/#{model_id}/converse"
|
|
220
220
|
req = Net::HTTP::Post.new(path, headers)
|
|
221
|
-
set_body_stream(req, StringIO.new(body))
|
|
221
|
+
transport.set_body_stream(req, StringIO.new(body))
|
|
222
222
|
[req, messages, body]
|
|
223
223
|
end
|
|
224
224
|
|
|
@@ -5,7 +5,7 @@ class LLM::Google
|
|
|
5
5
|
# @private
|
|
6
6
|
class ErrorHandler
|
|
7
7
|
##
|
|
8
|
-
# @return [
|
|
8
|
+
# @return [LLM::Transport::Response]
|
|
9
9
|
# Non-2XX response from the server
|
|
10
10
|
attr_reader :res
|
|
11
11
|
|
|
@@ -19,13 +19,13 @@ class LLM::Google
|
|
|
19
19
|
# The tracer
|
|
20
20
|
# @param [Object, nil] span
|
|
21
21
|
# The span
|
|
22
|
-
# @param [Net::HTTPResponse] res
|
|
22
|
+
# @param [LLM::Transport::Response, Net::HTTPResponse] res
|
|
23
23
|
# The response from the server
|
|
24
24
|
# @return [LLM::Google::ErrorHandler]
|
|
25
25
|
def initialize(tracer, span, res)
|
|
26
26
|
@tracer = tracer
|
|
27
27
|
@span = span
|
|
28
|
-
@res = res
|
|
28
|
+
@res = LLM::Transport::Response.from(res)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
##
|
|
@@ -49,17 +49,16 @@ class LLM::Google
|
|
|
49
49
|
##
|
|
50
50
|
# @return [LLM::Error]
|
|
51
51
|
def error
|
|
52
|
-
|
|
53
|
-
when Net::HTTPServerError
|
|
52
|
+
if res.server_error?
|
|
54
53
|
LLM::ServerError.new("Server error").tap { _1.response = res }
|
|
55
|
-
|
|
54
|
+
elsif res.bad_request?
|
|
56
55
|
reason = body.dig("error", "details", 0, "reason")
|
|
57
56
|
if reason == "API_KEY_INVALID"
|
|
58
57
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
59
58
|
else
|
|
60
59
|
LLM::Error.new("Unexpected response").tap { _1.response = res }
|
|
61
60
|
end
|
|
62
|
-
|
|
61
|
+
elsif res.rate_limited?
|
|
63
62
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
64
63
|
else
|
|
65
64
|
LLM::Error.new("Unexpected response").tap { _1.response = res }
|
|
@@ -69,7 +69,7 @@ class LLM::Google
|
|
|
69
69
|
req["X-Goog-Upload-Offset"] = 0
|
|
70
70
|
req["X-Goog-Upload-Command"] = "upload, finalize"
|
|
71
71
|
file.with_io do |io|
|
|
72
|
-
set_body_stream(req, io)
|
|
72
|
+
transport.set_body_stream(req, io)
|
|
73
73
|
res, span, tracer = execute(request: req, operation: "request")
|
|
74
74
|
res = ResponseAdapter.adapt(res, type: :file)
|
|
75
75
|
tracer.on_request_finish(operation: "request", res:, span:)
|
|
@@ -127,8 +127,6 @@ class LLM::Google
|
|
|
127
127
|
|
|
128
128
|
private
|
|
129
129
|
|
|
130
|
-
include LLM::Utils
|
|
131
|
-
|
|
132
130
|
def request_upload_url(file:)
|
|
133
131
|
req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{key}", headers)
|
|
134
132
|
req["X-Goog-Upload-Protocol"] = "resumable"
|
|
@@ -146,7 +144,7 @@ class LLM::Google
|
|
|
146
144
|
@provider.instance_variable_get(:@key)
|
|
147
145
|
end
|
|
148
146
|
|
|
149
|
-
[:headers, :execute, :
|
|
147
|
+
[:headers, :execute, :transport].each do |m|
|
|
150
148
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
151
149
|
end
|
|
152
150
|
end
|
|
@@ -33,6 +33,36 @@ module LLM::Google::ResponseAdapter
|
|
|
33
33
|
body.usageMetadata.thoughtsTokenCount || 0
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
##
|
|
37
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
38
|
+
def input_audio_tokens
|
|
39
|
+
super
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
##
|
|
43
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
44
|
+
def output_audio_tokens
|
|
45
|
+
super
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
##
|
|
49
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
50
|
+
def input_image_tokens
|
|
51
|
+
super
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
##
|
|
55
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
56
|
+
def cache_read_tokens
|
|
57
|
+
0
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
##
|
|
61
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
62
|
+
def cache_write_tokens
|
|
63
|
+
0
|
|
64
|
+
end
|
|
65
|
+
|
|
36
66
|
##
|
|
37
67
|
# (see LLM::Contract::Completion#total_tokens)
|
|
38
68
|
def total_tokens
|
|
@@ -153,14 +153,14 @@ class LLM::Google
|
|
|
153
153
|
|
|
154
154
|
def resolve_tool(part, cindex, pindex)
|
|
155
155
|
call = part["functionCall"]
|
|
156
|
-
registered = @stream.
|
|
156
|
+
registered = @stream.__find__(call["name"])
|
|
157
157
|
fn = (registered || LLM::Function.new(call["name"])).dup.tap do |fn|
|
|
158
158
|
fn.id = LLM::Google.tool_id(part:, cindex:, pindex:)
|
|
159
159
|
fn.arguments = call["args"]
|
|
160
160
|
fn.tracer = @stream.extra[:tracer]
|
|
161
161
|
fn.model = @stream.extra[:model]
|
|
162
162
|
end
|
|
163
|
-
[fn, (registered ? nil :
|
|
163
|
+
[fn, (registered ? nil : fn.unavailable)]
|
|
164
164
|
end
|
|
165
165
|
end
|
|
166
166
|
end
|
data/lib/llm/providers/google.rb
CHANGED
|
@@ -208,7 +208,7 @@ module LLM
|
|
|
208
208
|
req = Net::HTTP::Post.new(path, headers)
|
|
209
209
|
messages = build_complete_messages(prompt, params, role)
|
|
210
210
|
body = LLM.json.dump({contents: adapt(messages)}.merge!(params))
|
|
211
|
-
set_body_stream(req, StringIO.new(body))
|
|
211
|
+
transport.set_body_stream(req, StringIO.new(body))
|
|
212
212
|
req
|
|
213
213
|
end
|
|
214
214
|
|
|
@@ -5,7 +5,7 @@ class LLM::Ollama
|
|
|
5
5
|
# @private
|
|
6
6
|
class ErrorHandler
|
|
7
7
|
##
|
|
8
|
-
# @return [
|
|
8
|
+
# @return [LLM::Transport::Response]
|
|
9
9
|
# Non-2XX response from the server
|
|
10
10
|
attr_reader :res
|
|
11
11
|
|
|
@@ -19,13 +19,13 @@ class LLM::Ollama
|
|
|
19
19
|
# The tracer
|
|
20
20
|
# @param [Object, nil] span
|
|
21
21
|
# The span
|
|
22
|
-
# @param [Net::HTTPResponse] res
|
|
22
|
+
# @param [LLM::Transport::Response, Net::HTTPResponse] res
|
|
23
23
|
# The response from the server
|
|
24
24
|
# @return [LLM::Ollama::ErrorHandler]
|
|
25
25
|
def initialize(tracer, span, res)
|
|
26
26
|
@tracer = tracer
|
|
27
27
|
@span = span
|
|
28
|
-
@res = res
|
|
28
|
+
@res = LLM::Transport::Response.from(res)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
##
|
|
@@ -43,12 +43,11 @@ class LLM::Ollama
|
|
|
43
43
|
##
|
|
44
44
|
# @return [LLM::Error]
|
|
45
45
|
def error
|
|
46
|
-
|
|
47
|
-
when Net::HTTPServerError
|
|
46
|
+
if res.server_error?
|
|
48
47
|
LLM::ServerError.new("Server error").tap { _1.response = res }
|
|
49
|
-
|
|
48
|
+
elsif res.unauthorized?
|
|
50
49
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
51
|
-
|
|
50
|
+
elsif res.rate_limited?
|
|
52
51
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
53
52
|
else
|
|
54
53
|
LLM::Error.new("Unexpected response").tap { _1.response = res }
|
|
@@ -27,6 +27,36 @@ module LLM::Ollama::ResponseAdapter
|
|
|
27
27
|
0
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
##
|
|
31
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
32
|
+
def input_audio_tokens
|
|
33
|
+
super
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
##
|
|
37
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
38
|
+
def output_audio_tokens
|
|
39
|
+
super
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
##
|
|
43
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
44
|
+
def input_image_tokens
|
|
45
|
+
super
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
##
|
|
49
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
50
|
+
def cache_read_tokens
|
|
51
|
+
0
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
##
|
|
55
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
56
|
+
def cache_write_tokens
|
|
57
|
+
0
|
|
58
|
+
end
|
|
59
|
+
|
|
30
60
|
##
|
|
31
61
|
# (see LLM::Contract::Completion#total_tokens)
|
|
32
62
|
def total_tokens
|
data/lib/llm/providers/ollama.rb
CHANGED
|
@@ -130,7 +130,7 @@ module LLM
|
|
|
130
130
|
messages = build_complete_messages(prompt, params, role)
|
|
131
131
|
body = LLM.json.dump({messages: [adapt(messages)].flatten}.merge!(params))
|
|
132
132
|
req = Net::HTTP::Post.new("/api/chat", headers)
|
|
133
|
-
set_body_stream(req, StringIO.new(body))
|
|
133
|
+
transport.set_body_stream(req, StringIO.new(body))
|
|
134
134
|
req
|
|
135
135
|
end
|
|
136
136
|
|
|
@@ -57,7 +57,7 @@ class LLM::OpenAI
|
|
|
57
57
|
multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
|
|
58
58
|
req = Net::HTTP::Post.new(path("/audio/transcriptions"), headers)
|
|
59
59
|
req["content-type"] = multi.content_type
|
|
60
|
-
set_body_stream(req, multi.body)
|
|
60
|
+
transport.set_body_stream(req, multi.body)
|
|
61
61
|
res, span, tracer = execute(request: req, operation: "request")
|
|
62
62
|
res = LLM::Response.new(res)
|
|
63
63
|
tracer.on_request_finish(operation: "request", model:, res:, span:)
|
|
@@ -81,7 +81,7 @@ class LLM::OpenAI
|
|
|
81
81
|
multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
|
|
82
82
|
req = Net::HTTP::Post.new(path("/audio/translations"), headers)
|
|
83
83
|
req["content-type"] = multi.content_type
|
|
84
|
-
set_body_stream(req, multi.body)
|
|
84
|
+
transport.set_body_stream(req, multi.body)
|
|
85
85
|
res, span, tracer = execute(request: req, operation: "request")
|
|
86
86
|
res = LLM::Response.new(res)
|
|
87
87
|
tracer.on_request_finish(operation: "request", model:, res:, span:)
|
|
@@ -90,7 +90,7 @@ class LLM::OpenAI
|
|
|
90
90
|
|
|
91
91
|
private
|
|
92
92
|
|
|
93
|
-
[:path, :headers, :execute, :
|
|
93
|
+
[:path, :headers, :execute, :transport].each do |m|
|
|
94
94
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
95
95
|
end
|
|
96
96
|
end
|
|
@@ -5,7 +5,7 @@ class LLM::OpenAI
|
|
|
5
5
|
# @private
|
|
6
6
|
class ErrorHandler
|
|
7
7
|
##
|
|
8
|
-
# @return [
|
|
8
|
+
# @return [LLM::Transport::Response]
|
|
9
9
|
# Non-2XX response from the server
|
|
10
10
|
attr_reader :res
|
|
11
11
|
|
|
@@ -19,13 +19,13 @@ class LLM::OpenAI
|
|
|
19
19
|
# The tracer
|
|
20
20
|
# @param [Object, nil] span
|
|
21
21
|
# The span
|
|
22
|
-
# @param [Net::HTTPResponse] res
|
|
22
|
+
# @param [LLM::Transport::Response, Net::HTTPResponse] res
|
|
23
23
|
# The response from the server
|
|
24
24
|
# @return [LLM::OpenAI::ErrorHandler]
|
|
25
25
|
def initialize(tracer, span, res)
|
|
26
26
|
@tracer = tracer
|
|
27
27
|
@span = span
|
|
28
|
-
@res = res
|
|
28
|
+
@res = LLM::Transport::Response.from(res)
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
##
|
|
@@ -49,12 +49,11 @@ class LLM::OpenAI
|
|
|
49
49
|
##
|
|
50
50
|
# @return [LLM::Error]
|
|
51
51
|
def error
|
|
52
|
-
|
|
53
|
-
when Net::HTTPServerError
|
|
52
|
+
if res.server_error?
|
|
54
53
|
LLM::ServerError.new("Server error").tap { _1.response = res }
|
|
55
|
-
|
|
54
|
+
elsif res.unauthorized?
|
|
56
55
|
LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
|
|
57
|
-
|
|
56
|
+
elsif res.rate_limited?
|
|
58
57
|
LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
|
|
59
58
|
else
|
|
60
59
|
error = body["error"] || {}
|
|
@@ -62,7 +62,7 @@ class LLM::OpenAI
|
|
|
62
62
|
multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), purpose:))
|
|
63
63
|
req = Net::HTTP::Post.new(path("/files"), headers)
|
|
64
64
|
req["content-type"] = multi.content_type
|
|
65
|
-
set_body_stream(req, multi.body)
|
|
65
|
+
transport.set_body_stream(req, multi.body)
|
|
66
66
|
res, span, tracer = execute(request: req, operation: "request")
|
|
67
67
|
res = ResponseAdapter.adapt(res, type: :file)
|
|
68
68
|
tracer.on_request_finish(operation: "request", res:, span:)
|
|
@@ -134,7 +134,7 @@ class LLM::OpenAI
|
|
|
134
134
|
|
|
135
135
|
private
|
|
136
136
|
|
|
137
|
-
[:path, :headers, :execute, :
|
|
137
|
+
[:path, :headers, :execute, :transport].each do |m|
|
|
138
138
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
139
139
|
end
|
|
140
140
|
end
|
|
@@ -78,7 +78,7 @@ class LLM::OpenAI
|
|
|
78
78
|
multi = LLM::Multipart.new(params.merge!(image:, model:, response_format:))
|
|
79
79
|
req = Net::HTTP::Post.new(path("/images/variations"), headers)
|
|
80
80
|
req["content-type"] = multi.content_type
|
|
81
|
-
set_body_stream(req, multi.body)
|
|
81
|
+
transport.set_body_stream(req, multi.body)
|
|
82
82
|
res, span, tracer = execute(request: req, operation: "request")
|
|
83
83
|
res = ResponseAdapter.adapt(res, type: :image)
|
|
84
84
|
tracer.on_request_finish(operation: "request", model:, res:, span:)
|
|
@@ -104,7 +104,7 @@ class LLM::OpenAI
|
|
|
104
104
|
multi = LLM::Multipart.new(params.merge!(image:, prompt:, model:, response_format:))
|
|
105
105
|
req = Net::HTTP::Post.new(path("/images/edits"), headers)
|
|
106
106
|
req["content-type"] = multi.content_type
|
|
107
|
-
set_body_stream(req, multi.body)
|
|
107
|
+
transport.set_body_stream(req, multi.body)
|
|
108
108
|
res, span, tracer = execute(request: req, operation: "request")
|
|
109
109
|
res = ResponseAdapter.adapt(res, type: :image)
|
|
110
110
|
tracer.on_request_finish(operation: "request", model:, res:, span:)
|
|
@@ -113,7 +113,7 @@ class LLM::OpenAI
|
|
|
113
113
|
|
|
114
114
|
private
|
|
115
115
|
|
|
116
|
-
[:path, :headers, :execute, :
|
|
116
|
+
[:path, :headers, :execute, :transport].each do |m|
|
|
117
117
|
define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
|
|
118
118
|
end
|
|
119
119
|
end
|
|
@@ -40,6 +40,48 @@ module LLM::OpenAI::ResponseAdapter
|
|
|
40
40
|
&.reasoning_tokens || 0
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
+
##
|
|
44
|
+
# (see LLM::Contract::Completion#input_audio_tokens)
|
|
45
|
+
def input_audio_tokens
|
|
46
|
+
body
|
|
47
|
+
.usage
|
|
48
|
+
&.prompt_tokens_details
|
|
49
|
+
&.audio_tokens || 0
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
##
|
|
53
|
+
# (see LLM::Contract::Completion#output_audio_tokens)
|
|
54
|
+
def output_audio_tokens
|
|
55
|
+
body
|
|
56
|
+
.usage
|
|
57
|
+
&.completion_tokens_details
|
|
58
|
+
&.audio_tokens || 0
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
##
|
|
62
|
+
# (see LLM::Contract::Completion#input_image_tokens)
|
|
63
|
+
def input_image_tokens
|
|
64
|
+
body
|
|
65
|
+
.usage
|
|
66
|
+
&.prompt_tokens_details
|
|
67
|
+
&.image_tokens || 0
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
##
|
|
71
|
+
# (see LLM::Contract::Completion#cache_read_tokens)
|
|
72
|
+
def cache_read_tokens
|
|
73
|
+
body
|
|
74
|
+
.usage
|
|
75
|
+
&.prompt_tokens_details
|
|
76
|
+
&.cached_tokens || 0
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
##
|
|
80
|
+
# (see LLM::Contract::Completion#cache_write_tokens)
|
|
81
|
+
def cache_write_tokens
|
|
82
|
+
0
|
|
83
|
+
end
|
|
84
|
+
|
|
43
85
|
##
|
|
44
86
|
# (see LLM::Contract::Completion#total_tokens)
|
|
45
87
|
def total_tokens
|