llm.rb 8.1.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +196 -6
  3. data/README.md +233 -518
  4. data/data/anthropic.json +278 -258
  5. data/data/bedrock.json +1288 -1561
  6. data/data/deepseek.json +38 -38
  7. data/data/google.json +656 -579
  8. data/data/openai.json +860 -818
  9. data/data/xai.json +243 -552
  10. data/data/zai.json +168 -168
  11. data/lib/llm/active_record/acts_as_agent.rb +5 -0
  12. data/lib/llm/active_record/acts_as_llm.rb +7 -8
  13. data/lib/llm/active_record.rb +1 -6
  14. data/lib/llm/agent.rb +121 -82
  15. data/lib/llm/context.rb +79 -74
  16. data/lib/llm/contract/completion.rb +45 -0
  17. data/lib/llm/cost.rb +81 -4
  18. data/lib/llm/error.rb +1 -1
  19. data/lib/llm/function/array.rb +8 -5
  20. data/lib/llm/function/call_group.rb +39 -0
  21. data/lib/llm/function/call_task.rb +46 -0
  22. data/lib/llm/function/fork/task.rb +6 -0
  23. data/lib/llm/function/ractor/task.rb +6 -0
  24. data/lib/llm/function/task.rb +10 -0
  25. data/lib/llm/function.rb +28 -1
  26. data/lib/llm/mcp/transport/http.rb +26 -46
  27. data/lib/llm/mcp/transport/stdio.rb +0 -8
  28. data/lib/llm/mcp.rb +6 -23
  29. data/lib/llm/provider.rb +30 -20
  30. data/lib/llm/providers/anthropic/error_handler.rb +6 -7
  31. data/lib/llm/providers/anthropic/files.rb +2 -2
  32. data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
  33. data/lib/llm/providers/anthropic/stream_parser.rb +2 -2
  34. data/lib/llm/providers/anthropic.rb +1 -1
  35. data/lib/llm/providers/bedrock/error_handler.rb +8 -9
  36. data/lib/llm/providers/bedrock/models.rb +13 -13
  37. data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
  38. data/lib/llm/providers/bedrock/stream_parser.rb +2 -2
  39. data/lib/llm/providers/bedrock.rb +1 -1
  40. data/lib/llm/providers/google/error_handler.rb +6 -7
  41. data/lib/llm/providers/google/files.rb +2 -4
  42. data/lib/llm/providers/google/images.rb +1 -1
  43. data/lib/llm/providers/google/models.rb +0 -2
  44. data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
  45. data/lib/llm/providers/google/stream_parser.rb +2 -2
  46. data/lib/llm/providers/google.rb +1 -1
  47. data/lib/llm/providers/ollama/error_handler.rb +6 -7
  48. data/lib/llm/providers/ollama/models.rb +0 -2
  49. data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
  50. data/lib/llm/providers/ollama.rb +1 -1
  51. data/lib/llm/providers/openai/audio.rb +3 -3
  52. data/lib/llm/providers/openai/error_handler.rb +6 -7
  53. data/lib/llm/providers/openai/files.rb +2 -2
  54. data/lib/llm/providers/openai/images.rb +3 -3
  55. data/lib/llm/providers/openai/models.rb +1 -1
  56. data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
  57. data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
  58. data/lib/llm/providers/openai/responses/stream_parser.rb +2 -2
  59. data/lib/llm/providers/openai/responses.rb +2 -2
  60. data/lib/llm/providers/openai/stream_parser.rb +2 -2
  61. data/lib/llm/providers/openai/vector_stores.rb +1 -1
  62. data/lib/llm/providers/openai.rb +1 -1
  63. data/lib/llm/response.rb +10 -8
  64. data/lib/llm/schema.rb +11 -0
  65. data/lib/llm/sequel/agent.rb +5 -0
  66. data/lib/llm/sequel/plugin.rb +8 -14
  67. data/lib/llm/stream/queue.rb +15 -42
  68. data/lib/llm/stream.rb +15 -40
  69. data/lib/llm/tool/param.rb +1 -8
  70. data/lib/llm/transport/execution.rb +67 -0
  71. data/lib/llm/transport/http.rb +134 -0
  72. data/lib/llm/transport/persistent_http.rb +152 -0
  73. data/lib/llm/transport/response/http.rb +113 -0
  74. data/lib/llm/transport/response.rb +112 -0
  75. data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
  76. data/lib/llm/transport.rb +139 -0
  77. data/lib/llm/usage.rb +14 -5
  78. data/lib/llm/utils.rb +24 -14
  79. data/lib/llm/version.rb +1 -1
  80. data/lib/llm.rb +3 -12
  81. data/llm.gemspec +2 -16
  82. metadata +13 -20
  83. data/lib/llm/bot.rb +0 -3
  84. data/lib/llm/provider/transport/http/execution.rb +0 -115
  85. data/lib/llm/provider/transport/http/interruptible.rb +0 -114
  86. data/lib/llm/provider/transport/http.rb +0 -145
@@ -27,6 +27,36 @@ module LLM::Anthropic::ResponseAdapter
27
27
  0
28
28
  end
29
29
 
30
+ ##
31
+ # (see LLM::Contract::Completion#input_audio_tokens)
32
+ def input_audio_tokens
33
+ super
34
+ end
35
+
36
+ ##
37
+ # (see LLM::Contract::Completion#output_audio_tokens)
38
+ def output_audio_tokens
39
+ super
40
+ end
41
+
42
+ ##
43
+ # (see LLM::Contract::Completion#input_image_tokens)
44
+ def input_image_tokens
45
+ super
46
+ end
47
+
48
+ ##
49
+ # (see LLM::Contract::Completion#cache_read_tokens)
50
+ def cache_read_tokens
51
+ body.usage&.cache_read_input_tokens || 0
52
+ end
53
+
54
+ ##
55
+ # (see LLM::Contract::Completion#cache_write_tokens)
56
+ def cache_write_tokens
57
+ body.usage&.cache_creation_input_tokens || 0
58
+ end
59
+
30
60
  ##
31
61
  # (see LLM::Contract::Completion#total_tokens)
32
62
  def total_tokens
@@ -105,14 +105,14 @@ class LLM::Anthropic
105
105
  end
106
106
 
107
107
  def resolve_tool(tool)
108
- registered = @stream.find_tool(tool["name"])
108
+ registered = @stream.__find__(tool["name"])
109
109
  fn = (registered || LLM::Function.new(tool["name"])).dup.tap do |fn|
110
110
  fn.id = tool["id"]
111
111
  fn.arguments = LLM::Anthropic.parse_tool_input(tool["input"])
112
112
  fn.tracer = @stream.extra[:tracer]
113
113
  fn.model = @stream.extra[:model]
114
114
  end
115
- [fn, (registered ? nil : @stream.tool_not_found(fn))]
115
+ [fn, (registered ? nil : fn.unavailable)]
116
116
  end
117
117
  end
118
118
  end
@@ -161,7 +161,7 @@ module LLM
161
161
  payload = adapt(messages)
162
162
  body = LLM.json.dump(payload.merge!(params))
163
163
  req = Net::HTTP::Post.new("/v1/messages", headers)
164
- set_body_stream(req, StringIO.new(body))
164
+ transport.set_body_stream(req, StringIO.new(body))
165
165
  req
166
166
  end
167
167
 
@@ -11,7 +11,7 @@ class LLM::Bedrock
11
11
  # @api private
12
12
  class ErrorHandler
13
13
  ##
14
- # @return [Net::HTTPResponse]
14
+ # @return [LLM::Transport::Response]
15
15
  attr_reader :res
16
16
 
17
17
  ##
@@ -21,12 +21,12 @@ class LLM::Bedrock
21
21
  ##
22
22
  # @param [LLM::Tracer] tracer
23
23
  # @param [Object, nil] span
24
- # @param [Net::HTTPResponse] res
24
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
25
25
  # @return [LLM::Bedrock::ErrorHandler]
26
26
  def initialize(tracer, span, res)
27
27
  @tracer = tracer
28
28
  @span = span
29
- @res = res
29
+ @res = LLM::Transport::Response.from(res)
30
30
  end
31
31
 
32
32
  ##
@@ -44,16 +44,15 @@ class LLM::Bedrock
44
44
  # @return [LLM::Error]
45
45
  def error
46
46
  message = extract_message
47
- case res
48
- when Net::HTTPServerError
47
+ if res.server_error?
49
48
  LLM::ServerError.new(message).tap { _1.response = res }
50
- when Net::HTTPUnauthorized
49
+ elsif res.unauthorized?
51
50
  LLM::UnauthorizedError.new(message).tap { _1.response = res }
52
- when Net::HTTPForbidden
51
+ elsif res.forbidden?
53
52
  LLM::UnauthorizedError.new(message).tap { _1.response = res }
54
- when Net::HTTPTooManyRequests
53
+ elsif res.rate_limited?
55
54
  LLM::RateLimitError.new(message).tap { _1.response = res }
56
- when Net::HTTPNotFound
55
+ elsif res.not_found?
57
56
  LLM::Error.new("Bedrock model not found: #{message}").tap { _1.response = res }
58
57
  else
59
58
  LLM::Error.new(message).tap { _1.response = res }
@@ -8,8 +8,9 @@ class LLM::Bedrock
8
8
  #
9
9
  # Unlike the Converse API (which lives on `bedrock-runtime.<region>.amazonaws.com`),
10
10
  # the models endpoint lives on the control plane at
11
- # `bedrock.<region>.amazonaws.com`. This class manages its own HTTP
12
- # connection since the provider's transport is pinned to the runtime host.
11
+ # `bedrock.<region>.amazonaws.com`. This class builds a matching
12
+ # transport for the control-plane host from the provider's current
13
+ # transport class.
13
14
  #
14
15
  # @example
15
16
  # llm = LLM.bedrock(
@@ -39,19 +40,18 @@ class LLM::Bedrock
39
40
  # @return [LLM::Response]
40
41
  def all(**params)
41
42
  host = credentials.host
42
- handle_response http(host).request(build_request(host, params))
43
+ req = build_request(host, params)
44
+ res = build_transport(host).request(req, owner: self)
45
+ handle_response(res)
43
46
  end
44
47
 
45
48
  private
46
49
 
47
50
  ##
48
51
  # @param [String] host
49
- # @return [Net::HTTP]
50
- def http(host)
51
- http = Net::HTTP.new(host, 443)
52
- http.use_ssl = true
53
- http.read_timeout = timeout
54
- http
52
+ # @return [LLM::Transport]
53
+ def build_transport(host)
54
+ transport.class.new(host:, port: 443, timeout:, ssl: true)
55
55
  end
56
56
 
57
57
  ##
@@ -68,12 +68,12 @@ class LLM::Bedrock
68
68
  end
69
69
 
70
70
  ##
71
- # @param [Net::HTTPResponse] res
71
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
72
72
  # @return [LLM::Response]
73
73
  # @raise [LLM::Error]
74
74
  def handle_response(res)
75
- case res
76
- when Net::HTTPSuccess
75
+ res = LLM::Transport::Response.from(res)
76
+ if res.success?
77
77
  res.body = LLM::Object.from(LLM.json.load(res.body || "{}"))
78
78
  LLM::Bedrock::ResponseAdapter.adapt(res, type: :models)
79
79
  else
@@ -102,7 +102,7 @@ class LLM::Bedrock
102
102
  end
103
103
  end
104
104
 
105
- [:timeout, :tracer].each do |m|
105
+ [:timeout, :tracer, :transport].each do |m|
106
106
  define_method(m) { @provider.send(m) }
107
107
  end
108
108
  end
@@ -56,6 +56,36 @@ module LLM::Bedrock::ResponseAdapter
56
56
  0
57
57
  end
58
58
 
59
+ ##
60
+ # (see LLM::Contract::Completion#input_audio_tokens)
61
+ def input_audio_tokens
62
+ super
63
+ end
64
+
65
+ ##
66
+ # (see LLM::Contract::Completion#output_audio_tokens)
67
+ def output_audio_tokens
68
+ super
69
+ end
70
+
71
+ ##
72
+ # (see LLM::Contract::Completion#input_image_tokens)
73
+ def input_image_tokens
74
+ super
75
+ end
76
+
77
+ ##
78
+ # (see LLM::Contract::Completion#cache_read_tokens)
79
+ def cache_read_tokens
80
+ 0
81
+ end
82
+
83
+ ##
84
+ # (see LLM::Contract::Completion#cache_write_tokens)
85
+ def cache_write_tokens
86
+ 0
87
+ end
88
+
59
89
  ##
60
90
  # (see LLM::Contract::Completion#total_tokens)
61
91
  def total_tokens
@@ -184,14 +184,14 @@ class LLM::Bedrock
184
184
 
185
185
  def resolve_tool(tool)
186
186
  payload = tool["toolUse"] || {}
187
- registered = @stream.find_tool(payload["name"])
187
+ registered = @stream.__find__(payload["name"])
188
188
  fn = (registered || LLM::Function.new(payload["name"])).dup.tap do |f|
189
189
  f.id = payload["toolUseId"]
190
190
  f.arguments = payload["input"] || {}
191
191
  f.tracer = @stream.extra[:tracer]
192
192
  f.model = @stream.extra[:model]
193
193
  end
194
- [fn, registered ? nil : @stream.tool_not_found(fn)]
194
+ [fn, registered ? nil : fn.unavailable]
195
195
  end
196
196
 
197
197
  def content
@@ -218,7 +218,7 @@ module LLM
218
218
  path = stream ? "/model/#{model_id}/converse-stream" \
219
219
  : "/model/#{model_id}/converse"
220
220
  req = Net::HTTP::Post.new(path, headers)
221
- set_body_stream(req, StringIO.new(body))
221
+ transport.set_body_stream(req, StringIO.new(body))
222
222
  [req, messages, body]
223
223
  end
224
224
 
@@ -5,7 +5,7 @@ class LLM::Google
5
5
  # @private
6
6
  class ErrorHandler
7
7
  ##
8
- # @return [Net::HTTPResponse]
8
+ # @return [LLM::Transport::Response]
9
9
  # Non-2XX response from the server
10
10
  attr_reader :res
11
11
 
@@ -19,13 +19,13 @@ class LLM::Google
19
19
  # The tracer
20
20
  # @param [Object, nil] span
21
21
  # The span
22
- # @param [Net::HTTPResponse] res
22
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
23
23
  # The response from the server
24
24
  # @return [LLM::Google::ErrorHandler]
25
25
  def initialize(tracer, span, res)
26
26
  @tracer = tracer
27
27
  @span = span
28
- @res = res
28
+ @res = LLM::Transport::Response.from(res)
29
29
  end
30
30
 
31
31
  ##
@@ -49,17 +49,16 @@ class LLM::Google
49
49
  ##
50
50
  # @return [LLM::Error]
51
51
  def error
52
- case res
53
- when Net::HTTPServerError
52
+ if res.server_error?
54
53
  LLM::ServerError.new("Server error").tap { _1.response = res }
55
- when Net::HTTPBadRequest
54
+ elsif res.bad_request?
56
55
  reason = body.dig("error", "details", 0, "reason")
57
56
  if reason == "API_KEY_INVALID"
58
57
  LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
59
58
  else
60
59
  LLM::Error.new("Unexpected response").tap { _1.response = res }
61
60
  end
62
- when Net::HTTPTooManyRequests
61
+ elsif res.rate_limited?
63
62
  LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
64
63
  else
65
64
  LLM::Error.new("Unexpected response").tap { _1.response = res }
@@ -69,7 +69,7 @@ class LLM::Google
69
69
  req["X-Goog-Upload-Offset"] = 0
70
70
  req["X-Goog-Upload-Command"] = "upload, finalize"
71
71
  file.with_io do |io|
72
- set_body_stream(req, io)
72
+ transport.set_body_stream(req, io)
73
73
  res, span, tracer = execute(request: req, operation: "request")
74
74
  res = ResponseAdapter.adapt(res, type: :file)
75
75
  tracer.on_request_finish(operation: "request", res:, span:)
@@ -127,8 +127,6 @@ class LLM::Google
127
127
 
128
128
  private
129
129
 
130
- include LLM::Utils
131
-
132
130
  def request_upload_url(file:)
133
131
  req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{key}", headers)
134
132
  req["X-Goog-Upload-Protocol"] = "resumable"
@@ -146,7 +144,7 @@ class LLM::Google
146
144
  @provider.instance_variable_get(:@key)
147
145
  end
148
146
 
149
- [:headers, :execute, :set_body_stream].each do |m|
147
+ [:headers, :execute, :transport].each do |m|
150
148
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
151
149
  end
152
150
  end
@@ -91,7 +91,7 @@ class LLM::Google
91
91
  @provider.instance_variable_get(:@key)
92
92
  end
93
93
 
94
- [:headers, :execute, :set_body_stream].each do |m|
94
+ [:headers, :execute].each do |m|
95
95
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
96
96
  end
97
97
  end
@@ -17,8 +17,6 @@ class LLM::Google
17
17
  # print "id: ", model.id, "\n"
18
18
  # end
19
19
  class Models
20
- include LLM::Utils
21
-
22
20
  ##
23
21
  # Returns a new Models object
24
22
  # @param provider [LLM::Provider]
@@ -33,6 +33,36 @@ module LLM::Google::ResponseAdapter
33
33
  body.usageMetadata.thoughtsTokenCount || 0
34
34
  end
35
35
 
36
+ ##
37
+ # (see LLM::Contract::Completion#input_audio_tokens)
38
+ def input_audio_tokens
39
+ super
40
+ end
41
+
42
+ ##
43
+ # (see LLM::Contract::Completion#output_audio_tokens)
44
+ def output_audio_tokens
45
+ super
46
+ end
47
+
48
+ ##
49
+ # (see LLM::Contract::Completion#input_image_tokens)
50
+ def input_image_tokens
51
+ super
52
+ end
53
+
54
+ ##
55
+ # (see LLM::Contract::Completion#cache_read_tokens)
56
+ def cache_read_tokens
57
+ 0
58
+ end
59
+
60
+ ##
61
+ # (see LLM::Contract::Completion#cache_write_tokens)
62
+ def cache_write_tokens
63
+ 0
64
+ end
65
+
36
66
  ##
37
67
  # (see LLM::Contract::Completion#total_tokens)
38
68
  def total_tokens
@@ -153,14 +153,14 @@ class LLM::Google
153
153
 
154
154
  def resolve_tool(part, cindex, pindex)
155
155
  call = part["functionCall"]
156
- registered = @stream.find_tool(call["name"])
156
+ registered = @stream.__find__(call["name"])
157
157
  fn = (registered || LLM::Function.new(call["name"])).dup.tap do |fn|
158
158
  fn.id = LLM::Google.tool_id(part:, cindex:, pindex:)
159
159
  fn.arguments = call["args"]
160
160
  fn.tracer = @stream.extra[:tracer]
161
161
  fn.model = @stream.extra[:model]
162
162
  end
163
- [fn, (registered ? nil : @stream.tool_not_found(fn))]
163
+ [fn, (registered ? nil : fn.unavailable)]
164
164
  end
165
165
  end
166
166
  end
@@ -208,7 +208,7 @@ module LLM
208
208
  req = Net::HTTP::Post.new(path, headers)
209
209
  messages = build_complete_messages(prompt, params, role)
210
210
  body = LLM.json.dump({contents: adapt(messages)}.merge!(params))
211
- set_body_stream(req, StringIO.new(body))
211
+ transport.set_body_stream(req, StringIO.new(body))
212
212
  req
213
213
  end
214
214
 
@@ -5,7 +5,7 @@ class LLM::Ollama
5
5
  # @private
6
6
  class ErrorHandler
7
7
  ##
8
- # @return [Net::HTTPResponse]
8
+ # @return [LLM::Transport::Response]
9
9
  # Non-2XX response from the server
10
10
  attr_reader :res
11
11
 
@@ -19,13 +19,13 @@ class LLM::Ollama
19
19
  # The tracer
20
20
  # @param [Object, nil] span
21
21
  # The span
22
- # @param [Net::HTTPResponse] res
22
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
23
23
  # The response from the server
24
24
  # @return [LLM::Ollama::ErrorHandler]
25
25
  def initialize(tracer, span, res)
26
26
  @tracer = tracer
27
27
  @span = span
28
- @res = res
28
+ @res = LLM::Transport::Response.from(res)
29
29
  end
30
30
 
31
31
  ##
@@ -43,12 +43,11 @@ class LLM::Ollama
43
43
  ##
44
44
  # @return [LLM::Error]
45
45
  def error
46
- case res
47
- when Net::HTTPServerError
46
+ if res.server_error?
48
47
  LLM::ServerError.new("Server error").tap { _1.response = res }
49
- when Net::HTTPUnauthorized
48
+ elsif res.unauthorized?
50
49
  LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
51
- when Net::HTTPTooManyRequests
50
+ elsif res.rate_limited?
52
51
  LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
53
52
  else
54
53
  LLM::Error.new("Unexpected response").tap { _1.response = res }
@@ -17,8 +17,6 @@ class LLM::Ollama
17
17
  # print "id: ", model.id, "\n"
18
18
  # end
19
19
  class Models
20
- include LLM::Utils
21
-
22
20
  ##
23
21
  # Returns a new Models object
24
22
  # @param provider [LLM::Provider]
@@ -27,6 +27,36 @@ module LLM::Ollama::ResponseAdapter
27
27
  0
28
28
  end
29
29
 
30
+ ##
31
+ # (see LLM::Contract::Completion#input_audio_tokens)
32
+ def input_audio_tokens
33
+ super
34
+ end
35
+
36
+ ##
37
+ # (see LLM::Contract::Completion#output_audio_tokens)
38
+ def output_audio_tokens
39
+ super
40
+ end
41
+
42
+ ##
43
+ # (see LLM::Contract::Completion#input_image_tokens)
44
+ def input_image_tokens
45
+ super
46
+ end
47
+
48
+ ##
49
+ # (see LLM::Contract::Completion#cache_read_tokens)
50
+ def cache_read_tokens
51
+ 0
52
+ end
53
+
54
+ ##
55
+ # (see LLM::Contract::Completion#cache_write_tokens)
56
+ def cache_write_tokens
57
+ 0
58
+ end
59
+
30
60
  ##
31
61
  # (see LLM::Contract::Completion#total_tokens)
32
62
  def total_tokens
@@ -130,7 +130,7 @@ module LLM
130
130
  messages = build_complete_messages(prompt, params, role)
131
131
  body = LLM.json.dump({messages: [adapt(messages)].flatten}.merge!(params))
132
132
  req = Net::HTTP::Post.new("/api/chat", headers)
133
- set_body_stream(req, StringIO.new(body))
133
+ transport.set_body_stream(req, StringIO.new(body))
134
134
  req
135
135
  end
136
136
 
@@ -57,7 +57,7 @@ class LLM::OpenAI
57
57
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
58
58
  req = Net::HTTP::Post.new(path("/audio/transcriptions"), headers)
59
59
  req["content-type"] = multi.content_type
60
- set_body_stream(req, multi.body)
60
+ transport.set_body_stream(req, multi.body)
61
61
  res, span, tracer = execute(request: req, operation: "request")
62
62
  res = LLM::Response.new(res)
63
63
  tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -81,7 +81,7 @@ class LLM::OpenAI
81
81
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
82
82
  req = Net::HTTP::Post.new(path("/audio/translations"), headers)
83
83
  req["content-type"] = multi.content_type
84
- set_body_stream(req, multi.body)
84
+ transport.set_body_stream(req, multi.body)
85
85
  res, span, tracer = execute(request: req, operation: "request")
86
86
  res = LLM::Response.new(res)
87
87
  tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -90,7 +90,7 @@ class LLM::OpenAI
90
90
 
91
91
  private
92
92
 
93
- [:path, :headers, :execute, :set_body_stream].each do |m|
93
+ [:path, :headers, :execute, :transport].each do |m|
94
94
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
95
95
  end
96
96
  end
@@ -5,7 +5,7 @@ class LLM::OpenAI
5
5
  # @private
6
6
  class ErrorHandler
7
7
  ##
8
- # @return [Net::HTTPResponse]
8
+ # @return [LLM::Transport::Response]
9
9
  # Non-2XX response from the server
10
10
  attr_reader :res
11
11
 
@@ -19,13 +19,13 @@ class LLM::OpenAI
19
19
  # The tracer
20
20
  # @param [Object, nil] span
21
21
  # The span
22
- # @param [Net::HTTPResponse] res
22
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
23
23
  # The response from the server
24
24
  # @return [LLM::OpenAI::ErrorHandler]
25
25
  def initialize(tracer, span, res)
26
26
  @tracer = tracer
27
27
  @span = span
28
- @res = res
28
+ @res = LLM::Transport::Response.from(res)
29
29
  end
30
30
 
31
31
  ##
@@ -49,12 +49,11 @@ class LLM::OpenAI
49
49
  ##
50
50
  # @return [LLM::Error]
51
51
  def error
52
- case res
53
- when Net::HTTPServerError
52
+ if res.server_error?
54
53
  LLM::ServerError.new("Server error").tap { _1.response = res }
55
- when Net::HTTPUnauthorized
54
+ elsif res.unauthorized?
56
55
  LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
57
- when Net::HTTPTooManyRequests
56
+ elsif res.rate_limited?
58
57
  LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
59
58
  else
60
59
  error = body["error"] || {}
@@ -62,7 +62,7 @@ class LLM::OpenAI
62
62
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), purpose:))
63
63
  req = Net::HTTP::Post.new(path("/files"), headers)
64
64
  req["content-type"] = multi.content_type
65
- set_body_stream(req, multi.body)
65
+ transport.set_body_stream(req, multi.body)
66
66
  res, span, tracer = execute(request: req, operation: "request")
67
67
  res = ResponseAdapter.adapt(res, type: :file)
68
68
  tracer.on_request_finish(operation: "request", res:, span:)
@@ -134,7 +134,7 @@ class LLM::OpenAI
134
134
 
135
135
  private
136
136
 
137
- [:path, :headers, :execute, :set_body_stream].each do |m|
137
+ [:path, :headers, :execute, :transport].each do |m|
138
138
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
139
139
  end
140
140
  end
@@ -78,7 +78,7 @@ class LLM::OpenAI
78
78
  multi = LLM::Multipart.new(params.merge!(image:, model:, response_format:))
79
79
  req = Net::HTTP::Post.new(path("/images/variations"), headers)
80
80
  req["content-type"] = multi.content_type
81
- set_body_stream(req, multi.body)
81
+ transport.set_body_stream(req, multi.body)
82
82
  res, span, tracer = execute(request: req, operation: "request")
83
83
  res = ResponseAdapter.adapt(res, type: :image)
84
84
  tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -104,7 +104,7 @@ class LLM::OpenAI
104
104
  multi = LLM::Multipart.new(params.merge!(image:, prompt:, model:, response_format:))
105
105
  req = Net::HTTP::Post.new(path("/images/edits"), headers)
106
106
  req["content-type"] = multi.content_type
107
- set_body_stream(req, multi.body)
107
+ transport.set_body_stream(req, multi.body)
108
108
  res, span, tracer = execute(request: req, operation: "request")
109
109
  res = ResponseAdapter.adapt(res, type: :image)
110
110
  tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -113,7 +113,7 @@ class LLM::OpenAI
113
113
 
114
114
  private
115
115
 
116
- [:path, :headers, :execute, :set_body_stream].each do |m|
116
+ [:path, :headers, :execute, :transport].each do |m|
117
117
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
118
118
  end
119
119
  end
@@ -48,7 +48,7 @@ class LLM::OpenAI
48
48
 
49
49
  private
50
50
 
51
- [:path, :headers, :execute, :set_body_stream].each do |m|
51
+ [:path, :headers, :execute].each do |m|
52
52
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
53
53
  end
54
54
  end
@@ -40,6 +40,48 @@ module LLM::OpenAI::ResponseAdapter
40
40
  &.reasoning_tokens || 0
41
41
  end
42
42
 
43
+ ##
44
+ # (see LLM::Contract::Completion#input_audio_tokens)
45
+ def input_audio_tokens
46
+ body
47
+ .usage
48
+ &.prompt_tokens_details
49
+ &.audio_tokens || 0
50
+ end
51
+
52
+ ##
53
+ # (see LLM::Contract::Completion#output_audio_tokens)
54
+ def output_audio_tokens
55
+ body
56
+ .usage
57
+ &.completion_tokens_details
58
+ &.audio_tokens || 0
59
+ end
60
+
61
+ ##
62
+ # (see LLM::Contract::Completion#input_image_tokens)
63
+ def input_image_tokens
64
+ body
65
+ .usage
66
+ &.prompt_tokens_details
67
+ &.image_tokens || 0
68
+ end
69
+
70
+ ##
71
+ # (see LLM::Contract::Completion#cache_read_tokens)
72
+ def cache_read_tokens
73
+ body
74
+ .usage
75
+ &.prompt_tokens_details
76
+ &.cached_tokens || 0
77
+ end
78
+
79
+ ##
80
+ # (see LLM::Contract::Completion#cache_write_tokens)
81
+ def cache_write_tokens
82
+ 0
83
+ end
84
+
43
85
  ##
44
86
  # (see LLM::Contract::Completion#total_tokens)
45
87
  def total_tokens