llm.rb 7.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +151 -1
- data/README.md +45 -25
- data/data/bedrock.json +2948 -0
- data/data/deepseek.json +8 -8
- data/data/openai.json +39 -2
- data/data/xai.json +35 -0
- data/data/zai.json +1 -1
- data/lib/llm/active_record/acts_as_agent.rb +2 -6
- data/lib/llm/active_record/acts_as_llm.rb +4 -82
- data/lib/llm/active_record.rb +80 -2
- data/lib/llm/agent.rb +9 -4
- data/lib/llm/error.rb +4 -0
- data/lib/llm/function/array.rb +7 -3
- data/lib/llm/function/fiber_group.rb +9 -3
- data/lib/llm/function/fork/job.rb +67 -0
- data/lib/llm/function/fork/task.rb +76 -0
- data/lib/llm/function/fork.rb +8 -0
- data/lib/llm/function/fork_group.rb +36 -0
- data/lib/llm/function/ractor/task.rb +13 -3
- data/lib/llm/function/task.rb +10 -2
- data/lib/llm/function.rb +24 -11
- data/lib/llm/mcp/command.rb +1 -1
- data/lib/llm/mcp/transport/http.rb +2 -2
- data/lib/llm/mcp.rb +7 -4
- data/lib/llm/object/kernel.rb +8 -2
- data/lib/llm/object.rb +75 -21
- data/lib/llm/{mcp/pipe.rb → pipe.rb} +9 -8
- data/lib/llm/provider/transport/http/execution.rb +1 -1
- data/lib/llm/provider/transport/http.rb +1 -1
- data/lib/llm/provider.rb +7 -0
- data/lib/llm/providers/bedrock/error_handler.rb +80 -0
- data/lib/llm/providers/bedrock/models.rb +109 -0
- data/lib/llm/providers/bedrock/request_adapter/completion.rb +153 -0
- data/lib/llm/providers/bedrock/request_adapter.rb +95 -0
- data/lib/llm/providers/bedrock/response_adapter/completion.rb +143 -0
- data/lib/llm/providers/bedrock/response_adapter/models.rb +34 -0
- data/lib/llm/providers/bedrock/response_adapter.rb +40 -0
- data/lib/llm/providers/bedrock/signature.rb +166 -0
- data/lib/llm/providers/bedrock/stream_decoder.rb +140 -0
- data/lib/llm/providers/bedrock/stream_parser.rb +201 -0
- data/lib/llm/providers/bedrock.rb +272 -0
- data/lib/llm/stream/queue.rb +1 -1
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +27 -1
- data/llm.gemspec +2 -1
- metadata +33 -3
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
class LLM::Bedrock
|
|
6
|
+
##
|
|
7
|
+
# Decodes AWS Event Stream binary frames.
|
|
8
|
+
#
|
|
9
|
+
# Bedrock Converse Stream uses the AWS Event Stream protocol,
|
|
10
|
+
# a binary framing format (not SSE). Each message has:
|
|
11
|
+
# - total length (4 bytes, big-endian)
|
|
12
|
+
# - headers length (4 bytes, big-endian)
|
|
13
|
+
# - prelude CRC (4 bytes)
|
|
14
|
+
# - headers (variable)
|
|
15
|
+
# - payload (variable, usually JSON)
|
|
16
|
+
# - message CRC (4 bytes)
|
|
17
|
+
#
|
|
18
|
+
# Implements #<< to match the interface expected by llm.rb's
|
|
19
|
+
# streaming transport, so it can replace the SSE-based
|
|
20
|
+
# StreamDecoder when streaming from Bedrock.
|
|
21
|
+
#
|
|
22
|
+
# @api private
|
|
23
|
+
class StreamDecoder
|
|
24
|
+
##
|
|
25
|
+
# @return [LLM::Bedrock::StreamParser]
|
|
26
|
+
attr_reader :parser
|
|
27
|
+
|
|
28
|
+
##
|
|
29
|
+
# @param [LLM::Bedrock::StreamParser] parser
|
|
30
|
+
def initialize(parser)
|
|
31
|
+
@buffer = +"".b
|
|
32
|
+
@parser = parser
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
# Feeds a raw binary chunk into the decoder.
|
|
37
|
+
# Accumulates data until complete frames are available,
|
|
38
|
+
# then decodes them and passes the JSON payload to the parser.
|
|
39
|
+
#
|
|
40
|
+
# @param [String] chunk Raw binary data
|
|
41
|
+
# @return [void]
|
|
42
|
+
def <<(chunk)
|
|
43
|
+
@buffer << chunk
|
|
44
|
+
decode_frames
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
##
|
|
48
|
+
# @return [Hash] The fully constructed response body
|
|
49
|
+
def body
|
|
50
|
+
parser.body
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
##
|
|
54
|
+
# @return [void]
|
|
55
|
+
def free
|
|
56
|
+
@buffer.clear
|
|
57
|
+
parser.free if parser.respond_to?(:free)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def decode_frames
|
|
63
|
+
loop do
|
|
64
|
+
break if @buffer.bytesize < 12
|
|
65
|
+
total_length = @buffer[0, 4].unpack1("N")
|
|
66
|
+
break if @buffer.bytesize < total_length
|
|
67
|
+
# headers_length = @buffer[4, 4].unpack1("N")
|
|
68
|
+
# prelude_crc = @buffer[8, 4].unpack1("N")
|
|
69
|
+
headers = decode_headers
|
|
70
|
+
payload_start = 12 + headers[:length]
|
|
71
|
+
payload_length = total_length - payload_start - 4
|
|
72
|
+
payload = @buffer[payload_start, payload_length] if payload_length > 0
|
|
73
|
+
# message_crc from last 4 bytes, not needed for our purposes
|
|
74
|
+
json = payload ? LLM.json.load(payload) : {}
|
|
75
|
+
parser.parse!(json, event_type: headers[:event_type]) if json.is_a?(Hash)
|
|
76
|
+
@buffer = @buffer[total_length..] || +"".b
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def decode_headers
|
|
81
|
+
headers_length = @buffer[4, 4].unpack1("N")
|
|
82
|
+
offset = 12
|
|
83
|
+
end_offset = offset + headers_length
|
|
84
|
+
result = {event_type: nil, length: headers_length}
|
|
85
|
+
while offset < end_offset
|
|
86
|
+
name_len = @buffer.getbyte(offset)
|
|
87
|
+
offset += 1
|
|
88
|
+
break if offset + name_len > end_offset
|
|
89
|
+
name = @buffer[offset, name_len]
|
|
90
|
+
offset += name_len
|
|
91
|
+
break if offset >= end_offset
|
|
92
|
+
value_type = @buffer.getbyte(offset)
|
|
93
|
+
offset += 1
|
|
94
|
+
value = case value_type
|
|
95
|
+
when 7 # string
|
|
96
|
+
str_len = @buffer[offset, 2].unpack1("n")
|
|
97
|
+
offset += 2
|
|
98
|
+
str = @buffer[offset, str_len]
|
|
99
|
+
offset += str_len
|
|
100
|
+
str
|
|
101
|
+
when 8 # binary
|
|
102
|
+
bin_len = @buffer[offset, 2].unpack1("n")
|
|
103
|
+
offset += 2
|
|
104
|
+
bin = @buffer[offset, bin_len]
|
|
105
|
+
offset += bin_len
|
|
106
|
+
bin
|
|
107
|
+
when 9 # boolean true
|
|
108
|
+
true
|
|
109
|
+
when 1 # boolean false
|
|
110
|
+
false
|
|
111
|
+
when 2 # byte
|
|
112
|
+
val = @buffer.getbyte(offset)
|
|
113
|
+
offset += 1
|
|
114
|
+
val
|
|
115
|
+
when 3 # int16
|
|
116
|
+
val = @buffer[offset, 2].unpack1("s>")
|
|
117
|
+
offset += 2
|
|
118
|
+
val
|
|
119
|
+
when 4 # int32
|
|
120
|
+
val = @buffer[offset, 4].unpack1("l>")
|
|
121
|
+
offset += 4
|
|
122
|
+
val
|
|
123
|
+
when 6 # byte array (as raw string)
|
|
124
|
+
bin_len = @buffer[offset, 2].unpack1("n")
|
|
125
|
+
offset += 2
|
|
126
|
+
bin = @buffer[offset, bin_len]
|
|
127
|
+
offset += bin_len
|
|
128
|
+
bin
|
|
129
|
+
else
|
|
130
|
+
# Unknown type, skip to end of headers
|
|
131
|
+
offset = end_offset
|
|
132
|
+
nil
|
|
133
|
+
end
|
|
134
|
+
result[:event_type] = value if name == ":event-type"
|
|
135
|
+
result[name] = value if name
|
|
136
|
+
end
|
|
137
|
+
result
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class LLM::Bedrock
|
|
4
|
+
##
|
|
5
|
+
# Parses Bedrock Converse Stream events into a response body
|
|
6
|
+
# and emits stream callbacks (on_content, on_tool_call, etc.).
|
|
7
|
+
#
|
|
8
|
+
# Receives decoded JSON payloads from {StreamDecoder} along with
|
|
9
|
+
# the AWS Event Stream event type header.
|
|
10
|
+
#
|
|
11
|
+
# Bedrock Converse Stream event types:
|
|
12
|
+
# messageStart — initial role
|
|
13
|
+
# contentBlockStart — tool use or reasoning start
|
|
14
|
+
# contentBlockDelta — text delta, tool input JSON, or reasoning text
|
|
15
|
+
# contentBlockStop — content block finished
|
|
16
|
+
# messageStop — final stop reason, usage metadata
|
|
17
|
+
#
|
|
18
|
+
# @api private
|
|
19
|
+
class StreamParser
|
|
20
|
+
TOOL_MARKER = "<|DSML|function_calls"
|
|
21
|
+
|
|
22
|
+
##
|
|
23
|
+
# @return [Hash] Fully constructed response body
|
|
24
|
+
attr_reader :body
|
|
25
|
+
|
|
26
|
+
##
|
|
27
|
+
# @param [#<<, LLM::Stream] stream
|
|
28
|
+
def initialize(stream)
|
|
29
|
+
@body = {"output" => {"message" => {"role" => "assistant", "content" => []}}}
|
|
30
|
+
@stream = stream
|
|
31
|
+
@text_markers = {}
|
|
32
|
+
@can_emit_content = stream.respond_to?(:on_content)
|
|
33
|
+
@can_emit_reasoning_content = stream.respond_to?(:on_reasoning_content)
|
|
34
|
+
@can_emit_tool_call = stream.respond_to?(:on_tool_call)
|
|
35
|
+
@can_push_content = stream.respond_to?(:<<)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
##
|
|
39
|
+
# @param [Hash] payload Decoded JSON from an event stream frame
|
|
40
|
+
# @param [String, nil] event_type The :event-type header value
|
|
41
|
+
# @return [self]
|
|
42
|
+
def parse!(payload, event_type: nil)
|
|
43
|
+
type = event_type || payload["type"]
|
|
44
|
+
case type
|
|
45
|
+
when "messageStart"
|
|
46
|
+
# { "role" => "assistant" }
|
|
47
|
+
when "contentBlockStart"
|
|
48
|
+
# { "contentBlockIndex" => 0, "start" => { "toolUse" => {...} } }
|
|
49
|
+
handle_content_block_start(payload)
|
|
50
|
+
when "contentBlockDelta"
|
|
51
|
+
# { "contentBlockIndex" => 0, "delta" => { "text" => "..." } }
|
|
52
|
+
handle_content_block_delta(payload)
|
|
53
|
+
when "contentBlockStop"
|
|
54
|
+
handle_content_block_stop(payload)
|
|
55
|
+
when "messageStop"
|
|
56
|
+
# { "stopReason" => "end_turn", "metadata" => {"usage" => {...}} }
|
|
57
|
+
merge_metadata(payload)
|
|
58
|
+
when "metadata"
|
|
59
|
+
# { "usage" => {...} }
|
|
60
|
+
merge_metadata(payload)
|
|
61
|
+
end
|
|
62
|
+
self
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
##
|
|
66
|
+
# @return [void]
|
|
67
|
+
def free
|
|
68
|
+
@text_markers.clear
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def handle_content_block_start(payload)
|
|
74
|
+
index = payload["contentBlockIndex"]
|
|
75
|
+
start_data = payload["start"] || {}
|
|
76
|
+
if (tool_use = start_data["toolUse"])
|
|
77
|
+
content[index] = {"toolUse" => {"toolUseId" => tool_use["toolUseId"], "name" => tool_use["name"], "input" => +""}}
|
|
78
|
+
elsif (reasoning = start_data["reasoningContent"])
|
|
79
|
+
content[index] = {"reasoningContent" => {"text" => +"", "signature" => reasoning["signature"]}.compact}
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def handle_content_block_delta(payload)
|
|
84
|
+
index = payload["contentBlockIndex"]
|
|
85
|
+
delta = payload["delta"] || {}
|
|
86
|
+
if (text = delta["text"])
|
|
87
|
+
ensure_content_block(index, "text")
|
|
88
|
+
visible = filtered_text(index, text)
|
|
89
|
+
return if visible.empty?
|
|
90
|
+
content[index]["text"] ||= +""
|
|
91
|
+
content[index]["text"] << visible
|
|
92
|
+
emit_content(visible)
|
|
93
|
+
elsif (tool_input = delta.dig("toolUse", "input"))
|
|
94
|
+
ensure_content_block(index, "tool_use")
|
|
95
|
+
content[index]["toolUse"]["input"] ||= +""
|
|
96
|
+
content[index]["toolUse"]["input"] << tool_input
|
|
97
|
+
elsif (reasoning = delta["reasoningContent"])
|
|
98
|
+
ensure_content_block(index, "reasoning")
|
|
99
|
+
if reasoning["text"]
|
|
100
|
+
content[index]["reasoningContent"]["text"] ||= +""
|
|
101
|
+
content[index]["reasoningContent"]["text"] << reasoning["text"]
|
|
102
|
+
emit_reasoning_content(reasoning["text"])
|
|
103
|
+
end
|
|
104
|
+
if reasoning["signature"]
|
|
105
|
+
content[index]["reasoningContent"]["signature"] = reasoning["signature"]
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def handle_content_block_stop(payload)
|
|
111
|
+
index = payload["contentBlockIndex"]
|
|
112
|
+
item = content[index]
|
|
113
|
+
return unless item
|
|
114
|
+
flush_text(index, item)
|
|
115
|
+
if item["toolUse"] && item["toolUse"]["input"].is_a?(String)
|
|
116
|
+
parsed = LLM.json.load(item["toolUse"]["input"])
|
|
117
|
+
item["toolUse"]["input"] = parsed.is_a?(Hash) ? parsed : {}
|
|
118
|
+
emit_tool(item)
|
|
119
|
+
end
|
|
120
|
+
rescue *LLM.json.parser_error
|
|
121
|
+
item["toolUse"]["input"] = {} if item&.dig("toolUse")
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def ensure_content_block(index, type)
|
|
125
|
+
content[index] ||= case type
|
|
126
|
+
when "tool_use" then {"toolUse" => {"input" => +""}}
|
|
127
|
+
when "reasoning" then {"reasoningContent" => {"text" => +""}}
|
|
128
|
+
else {}
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def filtered_text(index, text)
|
|
133
|
+
state = (@text_markers[index] ||= +"")
|
|
134
|
+
value = state << text
|
|
135
|
+
value.gsub!(TOOL_MARKER, "")
|
|
136
|
+
keep = marker_prefix_length(value)
|
|
137
|
+
@text_markers[index] = keep.zero? ? +"" : value[-keep..]
|
|
138
|
+
keep.zero? ? value : value[0...-keep]
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def flush_text(index, item)
|
|
142
|
+
value = @text_markers.delete(index).to_s
|
|
143
|
+
return unless item["text"]
|
|
144
|
+
if value.empty?
|
|
145
|
+
content[index] = {} if item["text"].empty?
|
|
146
|
+
else
|
|
147
|
+
item["text"] << value
|
|
148
|
+
emit_content(value)
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def marker_prefix_length(value)
|
|
153
|
+
[value.length, TOOL_MARKER.length - 1].min.downto(1) do |length|
|
|
154
|
+
return length if TOOL_MARKER.start_with?(value[-length..])
|
|
155
|
+
end
|
|
156
|
+
0
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def merge_metadata(payload)
|
|
160
|
+
metadata = payload["metadata"] || payload
|
|
161
|
+
return unless metadata.is_a?(Hash)
|
|
162
|
+
usage = metadata["usage"]
|
|
163
|
+
@body["usage"] = usage if usage
|
|
164
|
+
@body["stopReason"] = payload["stopReason"] if payload["stopReason"]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def emit_content(value)
|
|
168
|
+
if @can_emit_content
|
|
169
|
+
@stream.on_content(value)
|
|
170
|
+
elsif @can_push_content
|
|
171
|
+
@stream << value
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def emit_reasoning_content(value)
|
|
176
|
+
@stream.on_reasoning_content(value) if @can_emit_reasoning_content
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def emit_tool(tool)
|
|
180
|
+
return unless @can_emit_tool_call
|
|
181
|
+
function, error = resolve_tool(tool)
|
|
182
|
+
@stream.on_tool_call(function, error)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def resolve_tool(tool)
|
|
186
|
+
payload = tool["toolUse"] || {}
|
|
187
|
+
registered = @stream.find_tool(payload["name"])
|
|
188
|
+
fn = (registered || LLM::Function.new(payload["name"])).dup.tap do |f|
|
|
189
|
+
f.id = payload["toolUseId"]
|
|
190
|
+
f.arguments = payload["input"] || {}
|
|
191
|
+
f.tracer = @stream.extra[:tracer]
|
|
192
|
+
f.model = @stream.extra[:model]
|
|
193
|
+
end
|
|
194
|
+
[fn, registered ? nil : @stream.tool_not_found(fn)]
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def content
|
|
198
|
+
@body["output"]["message"]["content"]
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LLM
|
|
4
|
+
##
|
|
5
|
+
# The Bedrock class implements a provider for
|
|
6
|
+
# [Amazon Bedrock](https://aws.amazon.com/bedrock/).
|
|
7
|
+
#
|
|
8
|
+
# Bedrock provides access to foundation models from Anthropic, Meta,
|
|
9
|
+
# Mistral, AI21 Labs, Cohere, and more through the AWS infrastructure.
|
|
10
|
+
# This provider uses the Bedrock Converse API for chat completions,
|
|
11
|
+
# and the Converse Stream API for streaming.
|
|
12
|
+
#
|
|
13
|
+
# Unlike other llm.rb providers which use API key authentication,
|
|
14
|
+
# Bedrock uses AWS Signature V4 (SigV4) for request signing.
|
|
15
|
+
# You must provide AWS credentials (access key, secret key, and region)
|
|
16
|
+
# instead of a single API key.
|
|
17
|
+
#
|
|
18
|
+
# Streaming uses the AWS Event Stream binary protocol instead of
|
|
19
|
+
# standard SSE. The binary framing is decoded inline using only
|
|
20
|
+
# Ruby's stdlib.
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# require "llm"
|
|
24
|
+
#
|
|
25
|
+
# llm = LLM.bedrock(
|
|
26
|
+
# access_key_id: ENV["AWS_ACCESS_KEY_ID"],
|
|
27
|
+
# secret_access_key: ENV["AWS_SECRET_ACCESS_KEY"],
|
|
28
|
+
# region: "us-east-1"
|
|
29
|
+
# )
|
|
30
|
+
# ctx = LLM::Context.new(llm)
|
|
31
|
+
# ctx.talk "Hello, how are you?"
|
|
32
|
+
# ctx.messages.select(&:assistant?).each { puts _1.content }
|
|
33
|
+
class Bedrock < Provider
|
|
34
|
+
require_relative "bedrock/signature"
|
|
35
|
+
require_relative "bedrock/error_handler"
|
|
36
|
+
require_relative "bedrock/request_adapter"
|
|
37
|
+
require_relative "bedrock/request_adapter/completion"
|
|
38
|
+
require_relative "bedrock/response_adapter"
|
|
39
|
+
require_relative "bedrock/response_adapter/completion"
|
|
40
|
+
require_relative "bedrock/response_adapter/models"
|
|
41
|
+
require_relative "bedrock/stream_decoder"
|
|
42
|
+
require_relative "bedrock/stream_parser"
|
|
43
|
+
require_relative "bedrock/models"
|
|
44
|
+
|
|
45
|
+
include RequestAdapter
|
|
46
|
+
|
|
47
|
+
HOST_PATTERN = "bedrock-runtime.%s.amazonaws.com"
|
|
48
|
+
|
|
49
|
+
##
|
|
50
|
+
# @param [String] access_key_id AWS access key ID
|
|
51
|
+
# @param [String] secret_access_key AWS secret access key
|
|
52
|
+
# @param [String] region AWS region (e.g. "us-east-1")
|
|
53
|
+
# @param [String, nil] session_token AWS session token for temporary credentials
|
|
54
|
+
# @param [String, nil] host Override the Bedrock API host
|
|
55
|
+
# @param [Integer] port Connection port
|
|
56
|
+
# @param [Boolean] ssl Whether to use SSL
|
|
57
|
+
# @param [Integer] timeout Request timeout in seconds
|
|
58
|
+
def initialize(access_key_id: nil, secret_access_key: nil,
|
|
59
|
+
region: nil, session_token: nil,
|
|
60
|
+
host: nil, port: 443, ssl: true, timeout: 60,
|
|
61
|
+
**)
|
|
62
|
+
region ||= "us-east-1"
|
|
63
|
+
@access_key_id = access_key_id
|
|
64
|
+
@secret_access_key = secret_access_key
|
|
65
|
+
@aws_region = region
|
|
66
|
+
@session_token = session_token
|
|
67
|
+
host ||= HOST_PATTERN % region
|
|
68
|
+
@aws_host = host
|
|
69
|
+
super(key: @access_key_id, host:, port:, ssl:, timeout:, persistent: false)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
##
|
|
73
|
+
# @return [Symbol] Returns the provider's name
|
|
74
|
+
def name
|
|
75
|
+
:bedrock
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
##
|
|
79
|
+
# Provides an interface to the Bedrock Converse API
|
|
80
|
+
#
|
|
81
|
+
# @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html
|
|
82
|
+
#
|
|
83
|
+
# @param prompt (see LLM::Provider#complete)
|
|
84
|
+
# @param params (see LLM::Provider#complete)
|
|
85
|
+
# @return (see LLM::Provider#complete)
|
|
86
|
+
def complete(prompt, params = {})
|
|
87
|
+
params, stream, tools, role = normalize_complete_params(params)
|
|
88
|
+
req, messages, body = build_complete_request(prompt, params, role, stream:)
|
|
89
|
+
tracer.set_request_metadata(user_input: extract_user_input(messages, fallback: prompt))
|
|
90
|
+
sign!(req, body)
|
|
91
|
+
model_id = model_id_for(req.path)
|
|
92
|
+
res, span, tracer = execute(request: req, stream:, operation: "chat", stream_parser:, model: model_id)
|
|
93
|
+
res = ResponseAdapter.adapt(res, type: :completion)
|
|
94
|
+
.extend(Module.new { define_method(:__tools__) { tools } })
|
|
95
|
+
tracer.on_request_finish(operation: "chat", model: model_id, res:, span:)
|
|
96
|
+
res
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
##
|
|
100
|
+
# Provides an interface to Bedrock's ListFoundationModels API.
|
|
101
|
+
#
|
|
102
|
+
# @note
|
|
103
|
+
# Unlike the Converse API (bedrock-runtime), this endpoint lives
|
|
104
|
+
# on the control plane (bedrock.<region>.amazonaws.com).
|
|
105
|
+
#
|
|
106
|
+
# @see https://docs.aws.amazon.com/bedrock/latest/APIReference/API_ListFoundationModels.html
|
|
107
|
+
# @return [LLM::Bedrock::Models]
|
|
108
|
+
def models
|
|
109
|
+
LLM::Bedrock::Models.new(self)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
##
|
|
113
|
+
# @raise [NotImplementedError]
|
|
114
|
+
def files
|
|
115
|
+
raise NotImplementedError
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
##
|
|
119
|
+
# @raise [NotImplementedError]
|
|
120
|
+
def images
|
|
121
|
+
raise NotImplementedError
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
##
|
|
125
|
+
# @raise [NotImplementedError]
|
|
126
|
+
def audio
|
|
127
|
+
raise NotImplementedError
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
##
|
|
131
|
+
# @raise [NotImplementedError]
|
|
132
|
+
def moderations
|
|
133
|
+
raise NotImplementedError
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
##
|
|
137
|
+
# @raise [NotImplementedError]
|
|
138
|
+
def responses
|
|
139
|
+
raise NotImplementedError
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
##
|
|
143
|
+
# @raise [NotImplementedError]
|
|
144
|
+
def vector_stores
|
|
145
|
+
raise NotImplementedError
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
##
|
|
149
|
+
# @raise [NotImplementedError]
|
|
150
|
+
def embed(input, model: nil, **params)
|
|
151
|
+
raise NotImplementedError
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
##
|
|
155
|
+
# @return [String]
|
|
156
|
+
def assistant_role
|
|
157
|
+
"assistant"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
##
|
|
161
|
+
# Bedrock expects tool results as user messages containing
|
|
162
|
+
# `toolResult` content blocks rather than a distinct `tool` role.
|
|
163
|
+
# @return (see LLM::Provider#tool_role)
|
|
164
|
+
def tool_role
|
|
165
|
+
:user
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
##
|
|
169
|
+
# @return [String]
|
|
170
|
+
def default_model
|
|
171
|
+
"deepseek.v3.2"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
private
|
|
175
|
+
|
|
176
|
+
def headers
|
|
177
|
+
lock do
|
|
178
|
+
(@headers || {}).merge("Content-Type" => "application/json")
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def credentials
|
|
183
|
+
LLM::Object.from(
|
|
184
|
+
access_key_id: @access_key_id,
|
|
185
|
+
secret_access_key: @secret_access_key,
|
|
186
|
+
aws_region: @aws_region,
|
|
187
|
+
host: @aws_host,
|
|
188
|
+
session_token: @session_token
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def stream_parser
|
|
193
|
+
LLM::Bedrock::StreamParser
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def error_handler
|
|
197
|
+
LLM::Bedrock::ErrorHandler
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def stream_decoder
|
|
201
|
+
LLM::Bedrock::StreamDecoder
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def normalize_complete_params(params)
|
|
205
|
+
params = {role: :user, model: default_model, max_tokens: 2048}.merge!(params)
|
|
206
|
+
tools = resolve_tools(params.delete(:tools))
|
|
207
|
+
params = [params, adapt_schema(params), adapt_tools(tools)].inject({}, &:merge!).compact
|
|
208
|
+
role, stream = params.delete(:role), params.delete(:stream)
|
|
209
|
+
params[:stream] = true if streamable?(stream) || stream == true
|
|
210
|
+
[params, stream, tools, role]
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def build_complete_request(prompt, params, role, stream: nil)
|
|
214
|
+
messages = build_complete_messages(prompt, params, role)
|
|
215
|
+
model_id = params.delete(:model) || default_model
|
|
216
|
+
payload = build_converse_payload(messages, params)
|
|
217
|
+
body = LLM.json.dump(payload)
|
|
218
|
+
path = stream ? "/model/#{model_id}/converse-stream" \
|
|
219
|
+
: "/model/#{model_id}/converse"
|
|
220
|
+
req = Net::HTTP::Post.new(path, headers)
|
|
221
|
+
set_body_stream(req, StringIO.new(body))
|
|
222
|
+
[req, messages, body]
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def build_complete_messages(prompt, params, role)
|
|
226
|
+
if LLM::Prompt === prompt
|
|
227
|
+
[*(params.delete(:messages) || []), *prompt]
|
|
228
|
+
else
|
|
229
|
+
[*(params.delete(:messages) || []), Message.new(role, prompt)]
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def build_converse_payload(messages, params)
|
|
234
|
+
adapted = adapt(messages)
|
|
235
|
+
payload = {}
|
|
236
|
+
payload[:system] = adapted[:system] if adapted[:system]&.any?
|
|
237
|
+
payload[:messages] = adapted[:messages]
|
|
238
|
+
inference_config = {}
|
|
239
|
+
inference_config[:maxTokens] = params.delete(:max_tokens) if params[:max_tokens]
|
|
240
|
+
inference_config[:temperature] = params.delete(:temperature) if params.key?(:temperature)
|
|
241
|
+
inference_config[:topP] = params.delete(:top_p) if params.key?(:top_p)
|
|
242
|
+
inference_config[:stopSequences] = params.delete(:stop) if params[:stop]
|
|
243
|
+
payload[:inferenceConfig] = inference_config unless inference_config.empty?
|
|
244
|
+
payload[:toolConfig] = params.delete(:toolConfig) if params[:toolConfig]
|
|
245
|
+
payload[:outputConfig] = params.delete(:outputConfig) if params[:outputConfig]
|
|
246
|
+
additional = {}
|
|
247
|
+
top_k = params.delete(:top_k)
|
|
248
|
+
additional[:top_k] = top_k if top_k
|
|
249
|
+
payload[:additionalModelRequestFields] = additional unless additional.empty?
|
|
250
|
+
payload
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def extract_user_input(messages, fallback:)
|
|
254
|
+
message = messages.reverse.find(&:user?) || messages.last
|
|
255
|
+
value = message&.content || fallback
|
|
256
|
+
value.is_a?(String) ? value : LLM.json.dump(value)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def model_id_for(path)
|
|
260
|
+
path[%r{\A/model/(.+?)/converse(?:-stream)?\z}, 1] || default_model
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def sign!(req, body)
|
|
264
|
+
Signature.new(
|
|
265
|
+
credentials:,
|
|
266
|
+
method: req.method,
|
|
267
|
+
path: req.path,
|
|
268
|
+
body:
|
|
269
|
+
).sign!(req)
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
data/lib/llm/stream/queue.rb
CHANGED
|
@@ -46,7 +46,7 @@ class LLM::Stream
|
|
|
46
46
|
# to wait on:
|
|
47
47
|
# - `:thread`: Use threads
|
|
48
48
|
# - `:task`: Use async tasks (requires async gem)
|
|
49
|
-
# - `:fiber`: Use
|
|
49
|
+
# - `:fiber`: Use scheduler-backed fibers (requires Fiber.scheduler)
|
|
50
50
|
# - `:ractor`: Use Ruby ractors (class-based tools only; MCP tools are not supported)
|
|
51
51
|
# - `[:thread, :ractor]`: Wait for any queued thread or ractor work, in the
|
|
52
52
|
# given order. This is useful when different tools were spawned with
|
data/lib/llm/version.rb
CHANGED
data/lib/llm.rb
CHANGED
|
@@ -20,6 +20,7 @@ module LLM
|
|
|
20
20
|
require_relative "llm/mime"
|
|
21
21
|
require_relative "llm/multipart"
|
|
22
22
|
require_relative "llm/file"
|
|
23
|
+
require_relative "llm/pipe"
|
|
23
24
|
require_relative "llm/stream"
|
|
24
25
|
require_relative "llm/provider"
|
|
25
26
|
require_relative "llm/context"
|
|
@@ -48,7 +49,24 @@ module LLM
|
|
|
48
49
|
|
|
49
50
|
##
|
|
50
51
|
# @api private
|
|
51
|
-
def self.clients
|
|
52
|
+
def self.clients
|
|
53
|
+
@clients
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
##
|
|
57
|
+
# Requires an optional runtime dependency
|
|
58
|
+
# @raise [LLM::DependencyError]
|
|
59
|
+
# When the dependency cannot be loaded
|
|
60
|
+
def self.require(name)
|
|
61
|
+
super
|
|
62
|
+
rescue ::LoadError
|
|
63
|
+
names = {"xchan" => "xchan.rb", "net/http/persistent" => "net-http-persistent"}
|
|
64
|
+
name = names[name] || name
|
|
65
|
+
raise LLM::LoadError,
|
|
66
|
+
"#{name} is an optional runtime dependency but it does not appear to be installed. " \
|
|
67
|
+
"Consider 'gem install #{name}', adding '#{name}' to your Gemfile or " \
|
|
68
|
+
"opting out of the functionality provided by '#{name}'"
|
|
69
|
+
end
|
|
52
70
|
|
|
53
71
|
##
|
|
54
72
|
# @param [Symbol, LLM::Provider] llm
|
|
@@ -143,6 +161,14 @@ module LLM
|
|
|
143
161
|
LLM::OpenAI.new(**)
|
|
144
162
|
end
|
|
145
163
|
|
|
164
|
+
##
|
|
165
|
+
# @param (see LLM::Bedrock#initialize)
|
|
166
|
+
# @return (see LLM::Bedrock#initialize)
|
|
167
|
+
def bedrock(**)
|
|
168
|
+
lock(:require) { require_relative "llm/providers/bedrock" unless defined?(LLM::Bedrock) }
|
|
169
|
+
LLM::Bedrock.new(**)
|
|
170
|
+
end
|
|
171
|
+
|
|
146
172
|
##
|
|
147
173
|
# @param key (see LLM::XAI#initialize)
|
|
148
174
|
# @param host (see LLM::XAI#initialize)
|
data/llm.gemspec
CHANGED
|
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
|
|
|
25
25
|
DESCRIPTION
|
|
26
26
|
|
|
27
27
|
spec.license = "0BSD"
|
|
28
|
-
spec.required_ruby_version = ">= 3.
|
|
28
|
+
spec.required_ruby_version = ">= 3.3.0"
|
|
29
29
|
|
|
30
30
|
spec.homepage = "https://github.com/llmrb/llm.rb"
|
|
31
31
|
spec.metadata["homepage_uri"] = "https://github.com/llmrb/llm.rb"
|
|
@@ -57,5 +57,6 @@ Gem::Specification.new do |spec|
|
|
|
57
57
|
spec.add_development_dependency "activerecord", "~> 8.0"
|
|
58
58
|
spec.add_development_dependency "sequel", "~> 5.0"
|
|
59
59
|
spec.add_development_dependency "sqlite3", "~> 2.0"
|
|
60
|
+
spec.add_development_dependency "xchan.rb", "~> 0.20"
|
|
60
61
|
spec.add_development_dependency "pg", "~> 1.5"
|
|
61
62
|
end
|