llm.rb 8.1.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +120 -2
  3. data/README.md +161 -514
  4. data/lib/llm/active_record/acts_as_llm.rb +7 -8
  5. data/lib/llm/agent.rb +36 -16
  6. data/lib/llm/context.rb +30 -26
  7. data/lib/llm/contract/completion.rb +45 -0
  8. data/lib/llm/cost.rb +81 -4
  9. data/lib/llm/error.rb +1 -1
  10. data/lib/llm/function/array.rb +8 -5
  11. data/lib/llm/function/call_group.rb +39 -0
  12. data/lib/llm/function/fork/task.rb +6 -0
  13. data/lib/llm/function/ractor/task.rb +6 -0
  14. data/lib/llm/function/task.rb +10 -0
  15. data/lib/llm/function.rb +1 -0
  16. data/lib/llm/mcp/transport/http.rb +26 -46
  17. data/lib/llm/mcp/transport/stdio.rb +0 -8
  18. data/lib/llm/mcp.rb +6 -23
  19. data/lib/llm/provider.rb +23 -20
  20. data/lib/llm/providers/anthropic/error_handler.rb +6 -7
  21. data/lib/llm/providers/anthropic/files.rb +2 -2
  22. data/lib/llm/providers/anthropic/response_adapter/completion.rb +30 -0
  23. data/lib/llm/providers/anthropic.rb +1 -1
  24. data/lib/llm/providers/bedrock/error_handler.rb +8 -9
  25. data/lib/llm/providers/bedrock/models.rb +13 -13
  26. data/lib/llm/providers/bedrock/response_adapter/completion.rb +30 -0
  27. data/lib/llm/providers/bedrock.rb +1 -1
  28. data/lib/llm/providers/google/error_handler.rb +6 -7
  29. data/lib/llm/providers/google/files.rb +2 -4
  30. data/lib/llm/providers/google/images.rb +1 -1
  31. data/lib/llm/providers/google/models.rb +0 -2
  32. data/lib/llm/providers/google/response_adapter/completion.rb +30 -0
  33. data/lib/llm/providers/google.rb +1 -1
  34. data/lib/llm/providers/ollama/error_handler.rb +6 -7
  35. data/lib/llm/providers/ollama/models.rb +0 -2
  36. data/lib/llm/providers/ollama/response_adapter/completion.rb +30 -0
  37. data/lib/llm/providers/ollama.rb +1 -1
  38. data/lib/llm/providers/openai/audio.rb +3 -3
  39. data/lib/llm/providers/openai/error_handler.rb +6 -7
  40. data/lib/llm/providers/openai/files.rb +2 -2
  41. data/lib/llm/providers/openai/images.rb +3 -3
  42. data/lib/llm/providers/openai/models.rb +1 -1
  43. data/lib/llm/providers/openai/response_adapter/completion.rb +42 -0
  44. data/lib/llm/providers/openai/response_adapter/responds.rb +39 -0
  45. data/lib/llm/providers/openai/responses.rb +2 -2
  46. data/lib/llm/providers/openai/vector_stores.rb +1 -1
  47. data/lib/llm/providers/openai.rb +1 -1
  48. data/lib/llm/response.rb +10 -8
  49. data/lib/llm/sequel/plugin.rb +7 -8
  50. data/lib/llm/stream/queue.rb +15 -42
  51. data/lib/llm/stream.rb +4 -4
  52. data/lib/llm/transport/execution.rb +67 -0
  53. data/lib/llm/transport/http.rb +134 -0
  54. data/lib/llm/transport/persistent_http.rb +152 -0
  55. data/lib/llm/transport/response/http.rb +113 -0
  56. data/lib/llm/transport/response.rb +112 -0
  57. data/lib/llm/{provider/transport/http → transport}/stream_decoder.rb +8 -4
  58. data/lib/llm/transport.rb +139 -0
  59. data/lib/llm/usage.rb +14 -5
  60. data/lib/llm/version.rb +1 -1
  61. data/lib/llm.rb +2 -12
  62. data/llm.gemspec +2 -16
  63. metadata +11 -19
  64. data/lib/llm/provider/transport/http/execution.rb +0 -115
  65. data/lib/llm/provider/transport/http/interruptible.rb +0 -114
  66. data/lib/llm/provider/transport/http.rb +0 -145
  67. data/lib/llm/utils.rb +0 -19
data/lib/llm/mcp.rb CHANGED
@@ -24,14 +24,6 @@ class LLM::MCP
24
24
 
25
25
  include RPC
26
26
 
27
- @clients = {}
28
-
29
- ##
30
- # @api private
31
- def self.clients
32
- @clients
33
- end
34
-
35
27
  ##
36
28
  # Builds an MCP client that uses the stdio transport.
37
29
  # @param [LLM::Provider, nil] llm
@@ -69,6 +61,9 @@ class LLM::MCP
69
61
  # The URL for the MCP HTTP endpoint
70
62
  # @option http [Hash] :headers
71
63
  # Extra headers for requests
64
+ # @option http [LLM::Transport, Class] :transport
65
+ # Optional override with any {LLM::Transport} instance or subclass,
66
+ # similar to {LLM::Provider}
72
67
  # @param [Integer] timeout
73
68
  # The maximum amount of time to wait when reading from an MCP process
74
69
  # @return [LLM::MCP] A new MCP instance
@@ -82,8 +77,9 @@ class LLM::MCP
82
77
  @transport = Transport::Stdio.new(command:)
83
78
  elsif http
84
79
  persistent = http.delete(:persistent)
85
- @transport = Transport::HTTP.new(**http, timeout:)
86
- @transport.persistent if persistent
80
+ transport = http.delete(:transport)
81
+ transport ||= LLM::Transport::PersistentHTTP if persistent
82
+ @transport = Transport::HTTP.new(**http, timeout:, transport:)
87
83
  else
88
84
  raise ArgumentError, "stdio or http is required"
89
85
  end
@@ -121,19 +117,6 @@ class LLM::MCP
121
117
  stop
122
118
  end
123
119
 
124
- ##
125
- # Configures an HTTP MCP transport to use a persistent connection pool
126
- # via the optional dependency [Net::HTTP::Persistent](https://github.com/drbrain/net-http-persistent)
127
- # @example
128
- # mcp = LLM::MCP.http(url: "https://example.com/mcp", persistent: true)
129
- # # do something with 'mcp'
130
- # @return [LLM::MCP]
131
- def persist!
132
- transport.persist!
133
- self
134
- end
135
- alias_method :persistent, :persist!
136
-
137
120
  ##
138
121
  # Returns the tools provided by the MCP process.
139
122
  # @return [Array<Class<LLM::Tool>>]
data/lib/llm/provider.rb CHANGED
@@ -6,10 +6,7 @@
6
6
  #
7
7
  # @abstract
8
8
  class LLM::Provider
9
- require "net/http"
10
- require_relative "provider/transport/http"
11
- require_relative "provider/transport/http/execution"
12
- include Transport::HTTP::Execution
9
+ include LLM::Transport::Execution
13
10
 
14
11
  ##
15
12
  # @param [String, nil] key
@@ -27,7 +24,9 @@ class LLM::Provider
27
24
  # @param [Boolean] persistent
28
25
  # Whether to use a persistent connection.
29
26
  # Requires the net-http-persistent gem.
30
- def initialize(key:, host:, port: 443, timeout: 60, ssl: true, base_path: "", persistent: false)
27
+ # @param [LLM::Transport, Class, nil] transport
28
+ # Optional override with any {LLM::Transport} instance or subclass.
29
+ def initialize(key:, host:, port: 443, timeout: 60, ssl: true, base_path: "", persistent: false, transport: nil)
31
30
  @key = key
32
31
  @host = host
33
32
  @port = port
@@ -36,7 +35,7 @@ class LLM::Provider
36
35
  @base_path = normalize_base_path(base_path)
37
36
  @base_uri = URI("#{ssl ? "https" : "http"}://#{host}:#{port}/")
38
37
  @headers = {"User-Agent" => "llm.rb v#{LLM::VERSION}"}
39
- @transport = Transport::HTTP.new(host:, port:, timeout:, ssl:, persistent:)
38
+ @transport = resolve_transport(transport, persistent:)
40
39
  @monitor = Monitor.new
41
40
  end
42
41
 
@@ -316,19 +315,6 @@ class LLM::Provider
316
315
  end
317
316
  end
318
317
 
319
- ##
320
- # This method configures a provider to use a persistent connection pool
321
- # via the optional dependency [Net::HTTP::Persistent](https://github.com/drbrain/net-http-persistent)
322
- # @example
323
- # llm = LLM.openai(key: ENV["KEY"]).persistent
324
- # # do something with 'llm'
325
- # @return [LLM::Provider]
326
- def persist!
327
- transport.persist!
328
- self
329
- end
330
- alias_method :persistent, :persist!
331
-
332
318
  ##
333
319
  # Interrupt the active request, if any.
334
320
  # @param [Fiber] owner
@@ -403,7 +389,7 @@ class LLM::Provider
403
389
  # @return [Class]
404
390
  # Returns the class responsible for decoding streamed response bodies
405
391
  def stream_decoder
406
- LLM::Provider::Transport::HTTP::StreamDecoder
392
+ LLM::Transport::StreamDecoder
407
393
  end
408
394
 
409
395
  ##
@@ -431,6 +417,23 @@ class LLM::Provider
431
417
  @monitor.synchronize(&)
432
418
  end
433
419
 
420
+ ##
421
+ # @api private
422
+ def default_transport(persistent:)
423
+ transport_class = persistent ? LLM::Transport::PersistentHTTP : LLM::Transport::HTTP
424
+ transport_class.new(host:, port:, timeout:, ssl:)
425
+ end
426
+
427
+ ##
428
+ # @api private
429
+ def resolve_transport(transport, persistent:)
430
+ return default_transport(persistent:) if transport.nil?
431
+ if Class === transport && transport <= LLM::Transport
432
+ return transport.new(host:, port:, timeout:, ssl:)
433
+ end
434
+ transport
435
+ end
436
+
434
437
  ##
435
438
  # @api private
436
439
  def thread
@@ -5,7 +5,7 @@ class LLM::Anthropic
5
5
  # @private
6
6
  class ErrorHandler
7
7
  ##
8
- # @return [Net::HTTPResponse]
8
+ # @return [LLM::Transport::Response]
9
9
  # Non-2XX response from the server
10
10
  attr_reader :res
11
11
 
@@ -19,13 +19,13 @@ class LLM::Anthropic
19
19
  # The tracer
20
20
  # @param [Object, nil] span
21
21
  # The span
22
- # @param [Net::HTTPResponse] res
22
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
23
23
  # The response from the server
24
24
  # @return [LLM::Anthropic::ErrorHandler]
25
25
  def initialize(tracer, span, res)
26
26
  @tracer = tracer
27
27
  @span = span
28
- @res = res
28
+ @res = LLM::Transport::Response.from(res)
29
29
  end
30
30
 
31
31
  ##
@@ -43,12 +43,11 @@ class LLM::Anthropic
43
43
  ##
44
44
  # @return [LLM::Error]
45
45
  def error
46
- case res
47
- when Net::HTTPServerError
46
+ if res.server_error?
48
47
  LLM::ServerError.new("Server error").tap { _1.response = res }
49
- when Net::HTTPUnauthorized
48
+ elsif res.unauthorized?
50
49
  LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
51
- when Net::HTTPTooManyRequests
50
+ elsif res.rate_limited?
52
51
  LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
53
52
  else
54
53
  LLM::Error.new("Unexpected response").tap { _1.response = res }
@@ -58,7 +58,7 @@ class LLM::Anthropic
58
58
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file)))
59
59
  req = Net::HTTP::Post.new("/v1/files", headers)
60
60
  req["content-type"] = multi.content_type
61
- set_body_stream(req, multi.body)
61
+ transport.set_body_stream(req, multi.body)
62
62
  res, span, tracer = execute(request: req, operation: "request")
63
63
  res = ResponseAdapter.adapt(res, type: :file)
64
64
  tracer.on_request_finish(operation: "request", res:, span:)
@@ -159,7 +159,7 @@ class LLM::Anthropic
159
159
  @provider.instance_variable_get(:@key)
160
160
  end
161
161
 
162
- [:headers, :execute, :set_body_stream].each do |m|
162
+ [:headers, :execute, :transport].each do |m|
163
163
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
164
164
  end
165
165
  end
@@ -27,6 +27,36 @@ module LLM::Anthropic::ResponseAdapter
27
27
  0
28
28
  end
29
29
 
30
+ ##
31
+ # (see LLM::Contract::Completion#input_audio_tokens)
32
+ def input_audio_tokens
33
+ super
34
+ end
35
+
36
+ ##
37
+ # (see LLM::Contract::Completion#output_audio_tokens)
38
+ def output_audio_tokens
39
+ super
40
+ end
41
+
42
+ ##
43
+ # (see LLM::Contract::Completion#input_image_tokens)
44
+ def input_image_tokens
45
+ super
46
+ end
47
+
48
+ ##
49
+ # (see LLM::Contract::Completion#cache_read_tokens)
50
+ def cache_read_tokens
51
+ body.usage&.cache_read_input_tokens || 0
52
+ end
53
+
54
+ ##
55
+ # (see LLM::Contract::Completion#cache_write_tokens)
56
+ def cache_write_tokens
57
+ body.usage&.cache_creation_input_tokens || 0
58
+ end
59
+
30
60
  ##
31
61
  # (see LLM::Contract::Completion#total_tokens)
32
62
  def total_tokens
@@ -161,7 +161,7 @@ module LLM
161
161
  payload = adapt(messages)
162
162
  body = LLM.json.dump(payload.merge!(params))
163
163
  req = Net::HTTP::Post.new("/v1/messages", headers)
164
- set_body_stream(req, StringIO.new(body))
164
+ transport.set_body_stream(req, StringIO.new(body))
165
165
  req
166
166
  end
167
167
 
@@ -11,7 +11,7 @@ class LLM::Bedrock
11
11
  # @api private
12
12
  class ErrorHandler
13
13
  ##
14
- # @return [Net::HTTPResponse]
14
+ # @return [LLM::Transport::Response]
15
15
  attr_reader :res
16
16
 
17
17
  ##
@@ -21,12 +21,12 @@ class LLM::Bedrock
21
21
  ##
22
22
  # @param [LLM::Tracer] tracer
23
23
  # @param [Object, nil] span
24
- # @param [Net::HTTPResponse] res
24
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
25
25
  # @return [LLM::Bedrock::ErrorHandler]
26
26
  def initialize(tracer, span, res)
27
27
  @tracer = tracer
28
28
  @span = span
29
- @res = res
29
+ @res = LLM::Transport::Response.from(res)
30
30
  end
31
31
 
32
32
  ##
@@ -44,16 +44,15 @@ class LLM::Bedrock
44
44
  # @return [LLM::Error]
45
45
  def error
46
46
  message = extract_message
47
- case res
48
- when Net::HTTPServerError
47
+ if res.server_error?
49
48
  LLM::ServerError.new(message).tap { _1.response = res }
50
- when Net::HTTPUnauthorized
49
+ elsif res.unauthorized?
51
50
  LLM::UnauthorizedError.new(message).tap { _1.response = res }
52
- when Net::HTTPForbidden
51
+ elsif res.forbidden?
53
52
  LLM::UnauthorizedError.new(message).tap { _1.response = res }
54
- when Net::HTTPTooManyRequests
53
+ elsif res.rate_limited?
55
54
  LLM::RateLimitError.new(message).tap { _1.response = res }
56
- when Net::HTTPNotFound
55
+ elsif res.not_found?
57
56
  LLM::Error.new("Bedrock model not found: #{message}").tap { _1.response = res }
58
57
  else
59
58
  LLM::Error.new(message).tap { _1.response = res }
@@ -8,8 +8,9 @@ class LLM::Bedrock
8
8
  #
9
9
  # Unlike the Converse API (which lives on `bedrock-runtime.<region>.amazonaws.com`),
10
10
  # the models endpoint lives on the control plane at
11
- # `bedrock.<region>.amazonaws.com`. This class manages its own HTTP
12
- # connection since the provider's transport is pinned to the runtime host.
11
+ # `bedrock.<region>.amazonaws.com`. This class builds a matching
12
+ # transport for the control-plane host from the provider's current
13
+ # transport class.
13
14
  #
14
15
  # @example
15
16
  # llm = LLM.bedrock(
@@ -39,19 +40,18 @@ class LLM::Bedrock
39
40
  # @return [LLM::Response]
40
41
  def all(**params)
41
42
  host = credentials.host
42
- handle_response http(host).request(build_request(host, params))
43
+ req = build_request(host, params)
44
+ res = build_transport(host).request(req, owner: self)
45
+ handle_response(res)
43
46
  end
44
47
 
45
48
  private
46
49
 
47
50
  ##
48
51
  # @param [String] host
49
- # @return [Net::HTTP]
50
- def http(host)
51
- http = Net::HTTP.new(host, 443)
52
- http.use_ssl = true
53
- http.read_timeout = timeout
54
- http
52
+ # @return [LLM::Transport]
53
+ def build_transport(host)
54
+ transport.class.new(host:, port: 443, timeout:, ssl: true)
55
55
  end
56
56
 
57
57
  ##
@@ -68,12 +68,12 @@ class LLM::Bedrock
68
68
  end
69
69
 
70
70
  ##
71
- # @param [Net::HTTPResponse] res
71
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
72
72
  # @return [LLM::Response]
73
73
  # @raise [LLM::Error]
74
74
  def handle_response(res)
75
- case res
76
- when Net::HTTPSuccess
75
+ res = LLM::Transport::Response.from(res)
76
+ if res.success?
77
77
  res.body = LLM::Object.from(LLM.json.load(res.body || "{}"))
78
78
  LLM::Bedrock::ResponseAdapter.adapt(res, type: :models)
79
79
  else
@@ -102,7 +102,7 @@ class LLM::Bedrock
102
102
  end
103
103
  end
104
104
 
105
- [:timeout, :tracer].each do |m|
105
+ [:timeout, :tracer, :transport].each do |m|
106
106
  define_method(m) { @provider.send(m) }
107
107
  end
108
108
  end
@@ -56,6 +56,36 @@ module LLM::Bedrock::ResponseAdapter
56
56
  0
57
57
  end
58
58
 
59
+ ##
60
+ # (see LLM::Contract::Completion#input_audio_tokens)
61
+ def input_audio_tokens
62
+ super
63
+ end
64
+
65
+ ##
66
+ # (see LLM::Contract::Completion#output_audio_tokens)
67
+ def output_audio_tokens
68
+ super
69
+ end
70
+
71
+ ##
72
+ # (see LLM::Contract::Completion#input_image_tokens)
73
+ def input_image_tokens
74
+ super
75
+ end
76
+
77
+ ##
78
+ # (see LLM::Contract::Completion#cache_read_tokens)
79
+ def cache_read_tokens
80
+ 0
81
+ end
82
+
83
+ ##
84
+ # (see LLM::Contract::Completion#cache_write_tokens)
85
+ def cache_write_tokens
86
+ 0
87
+ end
88
+
59
89
  ##
60
90
  # (see LLM::Contract::Completion#total_tokens)
61
91
  def total_tokens
@@ -218,7 +218,7 @@ module LLM
218
218
  path = stream ? "/model/#{model_id}/converse-stream" \
219
219
  : "/model/#{model_id}/converse"
220
220
  req = Net::HTTP::Post.new(path, headers)
221
- set_body_stream(req, StringIO.new(body))
221
+ transport.set_body_stream(req, StringIO.new(body))
222
222
  [req, messages, body]
223
223
  end
224
224
 
@@ -5,7 +5,7 @@ class LLM::Google
5
5
  # @private
6
6
  class ErrorHandler
7
7
  ##
8
- # @return [Net::HTTPResponse]
8
+ # @return [LLM::Transport::Response]
9
9
  # Non-2XX response from the server
10
10
  attr_reader :res
11
11
 
@@ -19,13 +19,13 @@ class LLM::Google
19
19
  # The tracer
20
20
  # @param [Object, nil] span
21
21
  # The span
22
- # @param [Net::HTTPResponse] res
22
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
23
23
  # The response from the server
24
24
  # @return [LLM::Google::ErrorHandler]
25
25
  def initialize(tracer, span, res)
26
26
  @tracer = tracer
27
27
  @span = span
28
- @res = res
28
+ @res = LLM::Transport::Response.from(res)
29
29
  end
30
30
 
31
31
  ##
@@ -49,17 +49,16 @@ class LLM::Google
49
49
  ##
50
50
  # @return [LLM::Error]
51
51
  def error
52
- case res
53
- when Net::HTTPServerError
52
+ if res.server_error?
54
53
  LLM::ServerError.new("Server error").tap { _1.response = res }
55
- when Net::HTTPBadRequest
54
+ elsif res.bad_request?
56
55
  reason = body.dig("error", "details", 0, "reason")
57
56
  if reason == "API_KEY_INVALID"
58
57
  LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
59
58
  else
60
59
  LLM::Error.new("Unexpected response").tap { _1.response = res }
61
60
  end
62
- when Net::HTTPTooManyRequests
61
+ elsif res.rate_limited?
63
62
  LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
64
63
  else
65
64
  LLM::Error.new("Unexpected response").tap { _1.response = res }
@@ -69,7 +69,7 @@ class LLM::Google
69
69
  req["X-Goog-Upload-Offset"] = 0
70
70
  req["X-Goog-Upload-Command"] = "upload, finalize"
71
71
  file.with_io do |io|
72
- set_body_stream(req, io)
72
+ transport.set_body_stream(req, io)
73
73
  res, span, tracer = execute(request: req, operation: "request")
74
74
  res = ResponseAdapter.adapt(res, type: :file)
75
75
  tracer.on_request_finish(operation: "request", res:, span:)
@@ -127,8 +127,6 @@ class LLM::Google
127
127
 
128
128
  private
129
129
 
130
- include LLM::Utils
131
-
132
130
  def request_upload_url(file:)
133
131
  req = Net::HTTP::Post.new("/upload/v1beta/files?key=#{key}", headers)
134
132
  req["X-Goog-Upload-Protocol"] = "resumable"
@@ -146,7 +144,7 @@ class LLM::Google
146
144
  @provider.instance_variable_get(:@key)
147
145
  end
148
146
 
149
- [:headers, :execute, :set_body_stream].each do |m|
147
+ [:headers, :execute, :transport].each do |m|
150
148
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
151
149
  end
152
150
  end
@@ -91,7 +91,7 @@ class LLM::Google
91
91
  @provider.instance_variable_get(:@key)
92
92
  end
93
93
 
94
- [:headers, :execute, :set_body_stream].each do |m|
94
+ [:headers, :execute].each do |m|
95
95
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
96
96
  end
97
97
  end
@@ -17,8 +17,6 @@ class LLM::Google
17
17
  # print "id: ", model.id, "\n"
18
18
  # end
19
19
  class Models
20
- include LLM::Utils
21
-
22
20
  ##
23
21
  # Returns a new Models object
24
22
  # @param provider [LLM::Provider]
@@ -33,6 +33,36 @@ module LLM::Google::ResponseAdapter
33
33
  body.usageMetadata.thoughtsTokenCount || 0
34
34
  end
35
35
 
36
+ ##
37
+ # (see LLM::Contract::Completion#input_audio_tokens)
38
+ def input_audio_tokens
39
+ super
40
+ end
41
+
42
+ ##
43
+ # (see LLM::Contract::Completion#output_audio_tokens)
44
+ def output_audio_tokens
45
+ super
46
+ end
47
+
48
+ ##
49
+ # (see LLM::Contract::Completion#input_image_tokens)
50
+ def input_image_tokens
51
+ super
52
+ end
53
+
54
+ ##
55
+ # (see LLM::Contract::Completion#cache_read_tokens)
56
+ def cache_read_tokens
57
+ 0
58
+ end
59
+
60
+ ##
61
+ # (see LLM::Contract::Completion#cache_write_tokens)
62
+ def cache_write_tokens
63
+ 0
64
+ end
65
+
36
66
  ##
37
67
  # (see LLM::Contract::Completion#total_tokens)
38
68
  def total_tokens
@@ -208,7 +208,7 @@ module LLM
208
208
  req = Net::HTTP::Post.new(path, headers)
209
209
  messages = build_complete_messages(prompt, params, role)
210
210
  body = LLM.json.dump({contents: adapt(messages)}.merge!(params))
211
- set_body_stream(req, StringIO.new(body))
211
+ transport.set_body_stream(req, StringIO.new(body))
212
212
  req
213
213
  end
214
214
 
@@ -5,7 +5,7 @@ class LLM::Ollama
5
5
  # @private
6
6
  class ErrorHandler
7
7
  ##
8
- # @return [Net::HTTPResponse]
8
+ # @return [LLM::Transport::Response]
9
9
  # Non-2XX response from the server
10
10
  attr_reader :res
11
11
 
@@ -19,13 +19,13 @@ class LLM::Ollama
19
19
  # The tracer
20
20
  # @param [Object, nil] span
21
21
  # The span
22
- # @param [Net::HTTPResponse] res
22
+ # @param [LLM::Transport::Response, Net::HTTPResponse] res
23
23
  # The response from the server
24
24
  # @return [LLM::Ollama::ErrorHandler]
25
25
  def initialize(tracer, span, res)
26
26
  @tracer = tracer
27
27
  @span = span
28
- @res = res
28
+ @res = LLM::Transport::Response.from(res)
29
29
  end
30
30
 
31
31
  ##
@@ -43,12 +43,11 @@ class LLM::Ollama
43
43
  ##
44
44
  # @return [LLM::Error]
45
45
  def error
46
- case res
47
- when Net::HTTPServerError
46
+ if res.server_error?
48
47
  LLM::ServerError.new("Server error").tap { _1.response = res }
49
- when Net::HTTPUnauthorized
48
+ elsif res.unauthorized?
50
49
  LLM::UnauthorizedError.new("Authentication error").tap { _1.response = res }
51
- when Net::HTTPTooManyRequests
50
+ elsif res.rate_limited?
52
51
  LLM::RateLimitError.new("Too many requests").tap { _1.response = res }
53
52
  else
54
53
  LLM::Error.new("Unexpected response").tap { _1.response = res }
@@ -17,8 +17,6 @@ class LLM::Ollama
17
17
  # print "id: ", model.id, "\n"
18
18
  # end
19
19
  class Models
20
- include LLM::Utils
21
-
22
20
  ##
23
21
  # Returns a new Models object
24
22
  # @param provider [LLM::Provider]
@@ -27,6 +27,36 @@ module LLM::Ollama::ResponseAdapter
27
27
  0
28
28
  end
29
29
 
30
+ ##
31
+ # (see LLM::Contract::Completion#input_audio_tokens)
32
+ def input_audio_tokens
33
+ super
34
+ end
35
+
36
+ ##
37
+ # (see LLM::Contract::Completion#output_audio_tokens)
38
+ def output_audio_tokens
39
+ super
40
+ end
41
+
42
+ ##
43
+ # (see LLM::Contract::Completion#input_image_tokens)
44
+ def input_image_tokens
45
+ super
46
+ end
47
+
48
+ ##
49
+ # (see LLM::Contract::Completion#cache_read_tokens)
50
+ def cache_read_tokens
51
+ 0
52
+ end
53
+
54
+ ##
55
+ # (see LLM::Contract::Completion#cache_write_tokens)
56
+ def cache_write_tokens
57
+ 0
58
+ end
59
+
30
60
  ##
31
61
  # (see LLM::Contract::Completion#total_tokens)
32
62
  def total_tokens
@@ -130,7 +130,7 @@ module LLM
130
130
  messages = build_complete_messages(prompt, params, role)
131
131
  body = LLM.json.dump({messages: [adapt(messages)].flatten}.merge!(params))
132
132
  req = Net::HTTP::Post.new("/api/chat", headers)
133
- set_body_stream(req, StringIO.new(body))
133
+ transport.set_body_stream(req, StringIO.new(body))
134
134
  req
135
135
  end
136
136
 
@@ -57,7 +57,7 @@ class LLM::OpenAI
57
57
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
58
58
  req = Net::HTTP::Post.new(path("/audio/transcriptions"), headers)
59
59
  req["content-type"] = multi.content_type
60
- set_body_stream(req, multi.body)
60
+ transport.set_body_stream(req, multi.body)
61
61
  res, span, tracer = execute(request: req, operation: "request")
62
62
  res = LLM::Response.new(res)
63
63
  tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -81,7 +81,7 @@ class LLM::OpenAI
81
81
  multi = LLM::Multipart.new(params.merge!(file: LLM.File(file), model:))
82
82
  req = Net::HTTP::Post.new(path("/audio/translations"), headers)
83
83
  req["content-type"] = multi.content_type
84
- set_body_stream(req, multi.body)
84
+ transport.set_body_stream(req, multi.body)
85
85
  res, span, tracer = execute(request: req, operation: "request")
86
86
  res = LLM::Response.new(res)
87
87
  tracer.on_request_finish(operation: "request", model:, res:, span:)
@@ -90,7 +90,7 @@ class LLM::OpenAI
90
90
 
91
91
  private
92
92
 
93
- [:path, :headers, :execute, :set_body_stream].each do |m|
93
+ [:path, :headers, :execute, :transport].each do |m|
94
94
  define_method(m) { |*args, **kwargs, &b| @provider.send(m, *args, **kwargs, &b) }
95
95
  end
96
96
  end