llm.rb 8.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::Bedrock
4
+ ##
5
+ # Parses Bedrock Converse Stream events into a response body
6
+ # and emits stream callbacks (on_content, on_tool_call, etc.).
7
+ #
8
+ # Receives decoded JSON payloads from {StreamDecoder} along with
9
+ # the AWS Event Stream event type header.
10
+ #
11
+ # Bedrock Converse Stream event types:
12
+ # messageStart — initial role
13
+ # contentBlockStart — tool use or reasoning start
14
+ # contentBlockDelta — text delta, tool input JSON, or reasoning text
15
+ # contentBlockStop — content block finished
16
+ # messageStop — final stop reason, usage metadata
17
+ #
18
+ # @api private
19
+ class StreamParser
20
+ TOOL_MARKER = "<|DSML|function_calls"
21
+
22
+ ##
23
+ # @return [Hash] Fully constructed response body
24
+ attr_reader :body
25
+
26
+ ##
27
+ # @param [#<<, LLM::Stream] stream
28
+ def initialize(stream)
29
+ @body = {"output" => {"message" => {"role" => "assistant", "content" => []}}}
30
+ @stream = stream
31
+ @text_markers = {}
32
+ @can_emit_content = stream.respond_to?(:on_content)
33
+ @can_emit_reasoning_content = stream.respond_to?(:on_reasoning_content)
34
+ @can_emit_tool_call = stream.respond_to?(:on_tool_call)
35
+ @can_push_content = stream.respond_to?(:<<)
36
+ end
37
+
38
+ ##
39
+ # @param [Hash] payload Decoded JSON from an event stream frame
40
+ # @param [String, nil] event_type The :event-type header value
41
+ # @return [self]
42
+ def parse!(payload, event_type: nil)
43
+ type = event_type || payload["type"]
44
+ case type
45
+ when "messageStart"
46
+ # { "role" => "assistant" }
47
+ when "contentBlockStart"
48
+ # { "contentBlockIndex" => 0, "start" => { "toolUse" => {...} } }
49
+ handle_content_block_start(payload)
50
+ when "contentBlockDelta"
51
+ # { "contentBlockIndex" => 0, "delta" => { "text" => "..." } }
52
+ handle_content_block_delta(payload)
53
+ when "contentBlockStop"
54
+ handle_content_block_stop(payload)
55
+ when "messageStop"
56
+ # { "stopReason" => "end_turn", "metadata" => {"usage" => {...}} }
57
+ merge_metadata(payload)
58
+ when "metadata"
59
+ # { "usage" => {...} }
60
+ merge_metadata(payload)
61
+ end
62
+ self
63
+ end
64
+
65
+ ##
66
+ # @return [void]
67
+ def free
68
+ @text_markers.clear
69
+ end
70
+
71
+ private
72
+
73
+ def handle_content_block_start(payload)
74
+ index = payload["contentBlockIndex"]
75
+ start_data = payload["start"] || {}
76
+ if (tool_use = start_data["toolUse"])
77
+ content[index] = {"toolUse" => {"toolUseId" => tool_use["toolUseId"], "name" => tool_use["name"], "input" => +""}}
78
+ elsif (reasoning = start_data["reasoningContent"])
79
+ content[index] = {"reasoningContent" => {"text" => +"", "signature" => reasoning["signature"]}.compact}
80
+ end
81
+ end
82
+
83
+ def handle_content_block_delta(payload)
84
+ index = payload["contentBlockIndex"]
85
+ delta = payload["delta"] || {}
86
+ if (text = delta["text"])
87
+ ensure_content_block(index, "text")
88
+ visible = filtered_text(index, text)
89
+ return if visible.empty?
90
+ content[index]["text"] ||= +""
91
+ content[index]["text"] << visible
92
+ emit_content(visible)
93
+ elsif (tool_input = delta.dig("toolUse", "input"))
94
+ ensure_content_block(index, "tool_use")
95
+ content[index]["toolUse"]["input"] ||= +""
96
+ content[index]["toolUse"]["input"] << tool_input
97
+ elsif (reasoning = delta["reasoningContent"])
98
+ ensure_content_block(index, "reasoning")
99
+ if reasoning["text"]
100
+ content[index]["reasoningContent"]["text"] ||= +""
101
+ content[index]["reasoningContent"]["text"] << reasoning["text"]
102
+ emit_reasoning_content(reasoning["text"])
103
+ end
104
+ if reasoning["signature"]
105
+ content[index]["reasoningContent"]["signature"] = reasoning["signature"]
106
+ end
107
+ end
108
+ end
109
+
110
+ def handle_content_block_stop(payload)
111
+ index = payload["contentBlockIndex"]
112
+ item = content[index]
113
+ return unless item
114
+ flush_text(index, item)
115
+ if item["toolUse"] && item["toolUse"]["input"].is_a?(String)
116
+ parsed = LLM.json.load(item["toolUse"]["input"])
117
+ item["toolUse"]["input"] = parsed.is_a?(Hash) ? parsed : {}
118
+ emit_tool(item)
119
+ end
120
+ rescue *LLM.json.parser_error
121
+ item["toolUse"]["input"] = {} if item&.dig("toolUse")
122
+ end
123
+
124
+ def ensure_content_block(index, type)
125
+ content[index] ||= case type
126
+ when "tool_use" then {"toolUse" => {"input" => +""}}
127
+ when "reasoning" then {"reasoningContent" => {"text" => +""}}
128
+ else {}
129
+ end
130
+ end
131
+
132
+ def filtered_text(index, text)
133
+ state = (@text_markers[index] ||= +"")
134
+ value = state << text
135
+ value.gsub!(TOOL_MARKER, "")
136
+ keep = marker_prefix_length(value)
137
+ @text_markers[index] = keep.zero? ? +"" : value[-keep..]
138
+ keep.zero? ? value : value[0...-keep]
139
+ end
140
+
141
+ def flush_text(index, item)
142
+ value = @text_markers.delete(index).to_s
143
+ return unless item["text"]
144
+ if value.empty?
145
+ content[index] = {} if item["text"].empty?
146
+ else
147
+ item["text"] << value
148
+ emit_content(value)
149
+ end
150
+ end
151
+
152
+ def marker_prefix_length(value)
153
+ [value.length, TOOL_MARKER.length - 1].min.downto(1) do |length|
154
+ return length if TOOL_MARKER.start_with?(value[-length..])
155
+ end
156
+ 0
157
+ end
158
+
159
+ def merge_metadata(payload)
160
+ metadata = payload["metadata"] || payload
161
+ return unless metadata.is_a?(Hash)
162
+ usage = metadata["usage"]
163
+ @body["usage"] = usage if usage
164
+ @body["stopReason"] = payload["stopReason"] if payload["stopReason"]
165
+ end
166
+
167
+ def emit_content(value)
168
+ if @can_emit_content
169
+ @stream.on_content(value)
170
+ elsif @can_push_content
171
+ @stream << value
172
+ end
173
+ end
174
+
175
+ def emit_reasoning_content(value)
176
+ @stream.on_reasoning_content(value) if @can_emit_reasoning_content
177
+ end
178
+
179
+ def emit_tool(tool)
180
+ return unless @can_emit_tool_call
181
+ function, error = resolve_tool(tool)
182
+ @stream.on_tool_call(function, error)
183
+ end
184
+
185
+ def resolve_tool(tool)
186
+ payload = tool["toolUse"] || {}
187
+ registered = @stream.find_tool(payload["name"])
188
+ fn = (registered || LLM::Function.new(payload["name"])).dup.tap do |f|
189
+ f.id = payload["toolUseId"]
190
+ f.arguments = payload["input"] || {}
191
+ f.tracer = @stream.extra[:tracer]
192
+ f.model = @stream.extra[:model]
193
+ end
194
+ [fn, registered ? nil : @stream.tool_not_found(fn)]
195
+ end
196
+
197
+ def content
198
+ @body["output"]["message"]["content"]
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,272 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ ##
5
+ # The Bedrock class implements a provider for
6
+ # [Amazon Bedrock](https://aws.amazon.com/bedrock/).
7
+ #
8
+ # Bedrock provides access to foundation models from Anthropic, Meta,
9
+ # Mistral, AI21 Labs, Cohere, and more through the AWS infrastructure.
10
+ # This provider uses the Bedrock Converse API for chat completions,
11
+ # and the Converse Stream API for streaming.
12
+ #
13
+ # Unlike other llm.rb providers which use API key authentication,
14
+ # Bedrock uses AWS Signature V4 (SigV4) for request signing.
15
+ # You must provide AWS credentials (access key, secret key, and region)
16
+ # instead of a single API key.
17
+ #
18
+ # Streaming uses the AWS Event Stream binary protocol instead of
19
+ # standard SSE. The binary framing is decoded inline using only
20
+ # Ruby's stdlib.
21
+ #
22
+ # @example
23
+ # require "llm"
24
+ #
25
+ # llm = LLM.bedrock(
26
+ # access_key_id: ENV["AWS_ACCESS_KEY_ID"],
27
+ # secret_access_key: ENV["AWS_SECRET_ACCESS_KEY"],
28
+ # region: "us-east-1"
29
+ # )
30
+ # ctx = LLM::Context.new(llm)
31
+ # ctx.talk "Hello, how are you?"
32
+ # ctx.messages.select(&:assistant?).each { puts _1.content }
33
+ class Bedrock < Provider
34
+ require_relative "bedrock/signature"
35
+ require_relative "bedrock/error_handler"
36
+ require_relative "bedrock/request_adapter"
37
+ require_relative "bedrock/request_adapter/completion"
38
+ require_relative "bedrock/response_adapter"
39
+ require_relative "bedrock/response_adapter/completion"
40
+ require_relative "bedrock/response_adapter/models"
41
+ require_relative "bedrock/stream_decoder"
42
+ require_relative "bedrock/stream_parser"
43
+ require_relative "bedrock/models"
44
+
45
+ include RequestAdapter
46
+
47
+ HOST_PATTERN = "bedrock-runtime.%s.amazonaws.com"
48
+
49
+ ##
50
+ # @param [String] access_key_id AWS access key ID
51
+ # @param [String] secret_access_key AWS secret access key
52
+ # @param [String] region AWS region (e.g. "us-east-1")
53
+ # @param [String, nil] session_token AWS session token for temporary credentials
54
+ # @param [String, nil] host Override the Bedrock API host
55
+ # @param [Integer] port Connection port
56
+ # @param [Boolean] ssl Whether to use SSL
57
+ # @param [Integer] timeout Request timeout in seconds
58
+ def initialize(access_key_id: nil, secret_access_key: nil,
59
+ region: nil, session_token: nil,
60
+ host: nil, port: 443, ssl: true, timeout: 60,
61
+ **)
62
+ region ||= "us-east-1"
63
+ @access_key_id = access_key_id
64
+ @secret_access_key = secret_access_key
65
+ @aws_region = region
66
+ @session_token = session_token
67
+ host ||= HOST_PATTERN % region
68
+ @aws_host = host
69
+ super(key: @access_key_id, host:, port:, ssl:, timeout:, persistent: false)
70
+ end
71
+
72
+ ##
73
+ # @return [Symbol] Returns the provider's name
74
+ def name
75
+ :bedrock
76
+ end
77
+
78
+ ##
79
+ # Provides an interface to the Bedrock Converse API
80
+ #
81
+ # @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html
82
+ #
83
+ # @param prompt (see LLM::Provider#complete)
84
+ # @param params (see LLM::Provider#complete)
85
+ # @return (see LLM::Provider#complete)
86
+ def complete(prompt, params = {})
87
+ params, stream, tools, role = normalize_complete_params(params)
88
+ req, messages, body = build_complete_request(prompt, params, role, stream:)
89
+ tracer.set_request_metadata(user_input: extract_user_input(messages, fallback: prompt))
90
+ sign!(req, body)
91
+ model_id = model_id_for(req.path)
92
+ res, span, tracer = execute(request: req, stream:, operation: "chat", stream_parser:, model: model_id)
93
+ res = ResponseAdapter.adapt(res, type: :completion)
94
+ .extend(Module.new { define_method(:__tools__) { tools } })
95
+ tracer.on_request_finish(operation: "chat", model: model_id, res:, span:)
96
+ res
97
+ end
98
+
99
+ ##
100
+ # Provides an interface to Bedrock's ListFoundationModels API.
101
+ #
102
+ # @note
103
+ # Unlike the Converse API (bedrock-runtime), this endpoint lives
104
+ # on the control plane (bedrock.<region>.amazonaws.com).
105
+ #
106
+ # @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_ListFoundationModels.html
107
+ # @return [LLM::Bedrock::Models]
108
+ def models
109
+ LLM::Bedrock::Models.new(self)
110
+ end
111
+
112
+ ##
113
+ # @raise [NotImplementedError]
114
+ def files
115
+ raise NotImplementedError
116
+ end
117
+
118
+ ##
119
+ # @raise [NotImplementedError]
120
+ def images
121
+ raise NotImplementedError
122
+ end
123
+
124
+ ##
125
+ # @raise [NotImplementedError]
126
+ def audio
127
+ raise NotImplementedError
128
+ end
129
+
130
+ ##
131
+ # @raise [NotImplementedError]
132
+ def moderations
133
+ raise NotImplementedError
134
+ end
135
+
136
+ ##
137
+ # @raise [NotImplementedError]
138
+ def responses
139
+ raise NotImplementedError
140
+ end
141
+
142
+ ##
143
+ # @raise [NotImplementedError]
144
+ def vector_stores
145
+ raise NotImplementedError
146
+ end
147
+
148
+ ##
149
+ # @raise [NotImplementedError]
150
+ def embed(input, model: nil, **params)
151
+ raise NotImplementedError
152
+ end
153
+
154
+ ##
155
+ # @return [String]
156
+ def assistant_role
157
+ "assistant"
158
+ end
159
+
160
+ ##
161
+ # Bedrock expects tool results as user messages containing
162
+ # `toolResult` content blocks rather than a distinct `tool` role.
163
+ # @return (see LLM::Provider#tool_role)
164
+ def tool_role
165
+ :user
166
+ end
167
+
168
+ ##
169
+ # @return [String]
170
+ def default_model
171
+ "deepseek.v3.2"
172
+ end
173
+
174
+ private
175
+
176
+ def headers
177
+ lock do
178
+ (@headers || {}).merge("Content-Type" => "application/json")
179
+ end
180
+ end
181
+
182
+ def credentials
183
+ LLM::Object.from(
184
+ access_key_id: @access_key_id,
185
+ secret_access_key: @secret_access_key,
186
+ aws_region: @aws_region,
187
+ host: @aws_host,
188
+ session_token: @session_token
189
+ )
190
+ end
191
+
192
+ def stream_parser
193
+ LLM::Bedrock::StreamParser
194
+ end
195
+
196
+ def error_handler
197
+ LLM::Bedrock::ErrorHandler
198
+ end
199
+
200
+ def stream_decoder
201
+ LLM::Bedrock::StreamDecoder
202
+ end
203
+
204
+ def normalize_complete_params(params)
205
+ params = {role: :user, model: default_model, max_tokens: 2048}.merge!(params)
206
+ tools = resolve_tools(params.delete(:tools))
207
+ params = [params, adapt_schema(params), adapt_tools(tools)].inject({}, &:merge!).compact
208
+ role, stream = params.delete(:role), params.delete(:stream)
209
+ params[:stream] = true if streamable?(stream) || stream == true
210
+ [params, stream, tools, role]
211
+ end
212
+
213
+ def build_complete_request(prompt, params, role, stream: nil)
214
+ messages = build_complete_messages(prompt, params, role)
215
+ model_id = params.delete(:model) || default_model
216
+ payload = build_converse_payload(messages, params)
217
+ body = LLM.json.dump(payload)
218
+ path = stream ? "/model/#{model_id}/converse-stream" \
219
+ : "/model/#{model_id}/converse"
220
+ req = Net::HTTP::Post.new(path, headers)
221
+ set_body_stream(req, StringIO.new(body))
222
+ [req, messages, body]
223
+ end
224
+
225
+ def build_complete_messages(prompt, params, role)
226
+ if LLM::Prompt === prompt
227
+ [*(params.delete(:messages) || []), *prompt]
228
+ else
229
+ [*(params.delete(:messages) || []), Message.new(role, prompt)]
230
+ end
231
+ end
232
+
233
+ def build_converse_payload(messages, params)
234
+ adapted = adapt(messages)
235
+ payload = {}
236
+ payload[:system] = adapted[:system] if adapted[:system]&.any?
237
+ payload[:messages] = adapted[:messages]
238
+ inference_config = {}
239
+ inference_config[:maxTokens] = params.delete(:max_tokens) if params[:max_tokens]
240
+ inference_config[:temperature] = params.delete(:temperature) if params.key?(:temperature)
241
+ inference_config[:topP] = params.delete(:top_p) if params.key?(:top_p)
242
+ inference_config[:stopSequences] = params.delete(:stop) if params[:stop]
243
+ payload[:inferenceConfig] = inference_config unless inference_config.empty?
244
+ payload[:toolConfig] = params.delete(:toolConfig) if params[:toolConfig]
245
+ payload[:outputConfig] = params.delete(:outputConfig) if params[:outputConfig]
246
+ additional = {}
247
+ top_k = params.delete(:top_k)
248
+ additional[:top_k] = top_k if top_k
249
+ payload[:additionalModelRequestFields] = additional unless additional.empty?
250
+ payload
251
+ end
252
+
253
+ def extract_user_input(messages, fallback:)
254
+ message = messages.reverse.find(&:user?) || messages.last
255
+ value = message&.content || fallback
256
+ value.is_a?(String) ? value : LLM.json.dump(value)
257
+ end
258
+
259
+ def model_id_for(path)
260
+ path[%r{\A/model/(.+?)/converse(?:-stream)?\z}, 1] || default_model
261
+ end
262
+
263
+ def sign!(req, body)
264
+ Signature.new(
265
+ credentials:,
266
+ method: req.method,
267
+ path: req.path,
268
+ body:
269
+ ).sign!(req)
270
+ end
271
+ end
272
+ end
data/lib/llm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
- VERSION = "8.0.0"
4
+ VERSION = "8.1.0"
5
5
  end
data/lib/llm.rb CHANGED
@@ -161,6 +161,14 @@ module LLM
161
161
  LLM::OpenAI.new(**)
162
162
  end
163
163
 
164
+ ##
165
+ # @param (see LLM::Bedrock#initialize)
166
+ # @return (see LLM::Bedrock#initialize)
167
+ def bedrock(**)
168
+ lock(:require) { require_relative "llm/providers/bedrock" unless defined?(LLM::Bedrock) }
169
+ LLM::Bedrock.new(**)
170
+ end
171
+
164
172
  ##
165
173
  # @param key (see LLM::XAI#initialize)
166
174
  # @param host (see LLM::XAI#initialize)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm.rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 8.0.0
4
+ version: 8.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Antar Azri
@@ -287,6 +287,7 @@ files:
287
287
  - LICENSE
288
288
  - README.md
289
289
  - data/anthropic.json
290
+ - data/bedrock.json
290
291
  - data/deepseek.json
291
292
  - data/google.json
292
293
  - data/openai.json
@@ -369,6 +370,17 @@ files:
369
370
  - lib/llm/providers/anthropic/response_adapter/web_search.rb
370
371
  - lib/llm/providers/anthropic/stream_parser.rb
371
372
  - lib/llm/providers/anthropic/utils.rb
373
+ - lib/llm/providers/bedrock.rb
374
+ - lib/llm/providers/bedrock/error_handler.rb
375
+ - lib/llm/providers/bedrock/models.rb
376
+ - lib/llm/providers/bedrock/request_adapter.rb
377
+ - lib/llm/providers/bedrock/request_adapter/completion.rb
378
+ - lib/llm/providers/bedrock/response_adapter.rb
379
+ - lib/llm/providers/bedrock/response_adapter/completion.rb
380
+ - lib/llm/providers/bedrock/response_adapter/models.rb
381
+ - lib/llm/providers/bedrock/signature.rb
382
+ - lib/llm/providers/bedrock/stream_decoder.rb
383
+ - lib/llm/providers/bedrock/stream_parser.rb
372
384
  - lib/llm/providers/deepseek.rb
373
385
  - lib/llm/providers/deepseek/request_adapter.rb
374
386
  - lib/llm/providers/deepseek/request_adapter/completion.rb