dify_llm 1.9.2 → 1.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +27 -8
- data/lib/generators/ruby_llm/agent/agent_generator.rb +36 -0
- data/lib/generators/ruby_llm/agent/templates/agent.rb.tt +6 -0
- data/lib/generators/ruby_llm/agent/templates/instructions.txt.erb.tt +0 -0
- data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +110 -41
- data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +14 -15
- data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +8 -11
- data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +2 -2
- data/lib/generators/ruby_llm/chat_ui/templates/helpers/messages_helper.rb.tt +25 -0
- data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +2 -2
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/_chat.html.erb.tt +16 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/_form.html.erb.tt +31 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/index.html.erb.tt +31 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/new.html.erb.tt +9 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/chats/show.html.erb.tt +27 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_assistant.html.erb.tt +14 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_content.html.erb.tt +1 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_error.html.erb.tt +13 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_form.html.erb.tt +23 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_system.html.erb.tt +10 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_tool.html.erb.tt +2 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_tool_calls.html.erb.tt +4 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/_user.html.erb.tt +14 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/tool_calls/_default.html.erb.tt +13 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/messages/tool_results/_default.html.erb.tt +21 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/models/_model.html.erb.tt +17 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/models/index.html.erb.tt +40 -0
- data/lib/generators/ruby_llm/chat_ui/templates/tailwind/views/models/show.html.erb.tt +27 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +2 -2
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +2 -2
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +19 -7
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +1 -1
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +5 -3
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_assistant.html.erb.tt +9 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_content.html.erb.tt +1 -1
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_error.html.erb.tt +8 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +1 -1
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_system.html.erb.tt +6 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool.html.erb.tt +2 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +4 -7
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_user.html.erb.tt +9 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +5 -7
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/tool_calls/_default.html.erb.tt +8 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/tool_results/_default.html.erb.tt +16 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +11 -12
- data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +27 -17
- data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +3 -4
- data/lib/generators/ruby_llm/generator_helpers.rb +37 -17
- data/lib/generators/ruby_llm/install/install_generator.rb +22 -18
- data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +1 -1
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +4 -1
- data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +4 -10
- data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +2 -1
- data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +2 -2
- data/lib/generators/ruby_llm/schema/schema_generator.rb +26 -0
- data/lib/generators/ruby_llm/schema/templates/schema.rb.tt +2 -0
- data/lib/generators/ruby_llm/tool/templates/tool.rb.tt +9 -0
- data/lib/generators/ruby_llm/tool/templates/tool_call.html.erb.tt +13 -0
- data/lib/generators/ruby_llm/tool/templates/tool_result.html.erb.tt +13 -0
- data/lib/generators/ruby_llm/tool/tool_generator.rb +96 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_14/templates/add_v1_14_tool_call_columns.rb.tt +7 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_14/upgrade_to_v1_14_generator.rb +49 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +2 -4
- data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +1 -1
- data/lib/ruby_llm/active_record/acts_as.rb +10 -4
- data/lib/ruby_llm/active_record/acts_as_legacy.rb +132 -27
- data/lib/ruby_llm/active_record/chat_methods.rb +132 -28
- data/lib/ruby_llm/active_record/message_methods.rb +58 -8
- data/lib/ruby_llm/active_record/model_methods.rb +1 -1
- data/lib/ruby_llm/active_record/payload_helpers.rb +26 -0
- data/lib/ruby_llm/active_record/tool_call_methods.rb +15 -0
- data/lib/ruby_llm/agent.rb +365 -0
- data/lib/ruby_llm/aliases.json +106 -61
- data/lib/ruby_llm/attachment.rb +8 -3
- data/lib/ruby_llm/chat.rb +150 -22
- data/lib/ruby_llm/configuration.rb +65 -65
- data/lib/ruby_llm/connection.rb +11 -7
- data/lib/ruby_llm/content.rb +6 -2
- data/lib/ruby_llm/error.rb +37 -1
- data/lib/ruby_llm/message.rb +43 -15
- data/lib/ruby_llm/model/info.rb +15 -13
- data/lib/ruby_llm/models.json +25039 -12260
- data/lib/ruby_llm/models.rb +185 -24
- data/lib/ruby_llm/provider.rb +26 -4
- data/lib/ruby_llm/providers/anthropic/capabilities.rb +5 -119
- data/lib/ruby_llm/providers/anthropic/chat.rb +149 -17
- data/lib/ruby_llm/providers/anthropic/media.rb +2 -2
- data/lib/ruby_llm/providers/anthropic/models.rb +3 -9
- data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
- data/lib/ruby_llm/providers/anthropic/tools.rb +20 -0
- data/lib/ruby_llm/providers/anthropic.rb +5 -1
- data/lib/ruby_llm/providers/azure/chat.rb +29 -0
- data/lib/ruby_llm/providers/azure/embeddings.rb +24 -0
- data/lib/ruby_llm/providers/azure/media.rb +45 -0
- data/lib/ruby_llm/providers/azure/models.rb +14 -0
- data/lib/ruby_llm/providers/azure.rb +148 -0
- data/lib/ruby_llm/providers/bedrock/auth.rb +122 -0
- data/lib/ruby_llm/providers/bedrock/chat.rb +357 -28
- data/lib/ruby_llm/providers/bedrock/media.rb +62 -33
- data/lib/ruby_llm/providers/bedrock/models.rb +104 -65
- data/lib/ruby_llm/providers/bedrock/streaming.rb +309 -8
- data/lib/ruby_llm/providers/bedrock.rb +69 -52
- data/lib/ruby_llm/providers/deepseek/capabilities.rb +4 -114
- data/lib/ruby_llm/providers/deepseek.rb +5 -1
- data/lib/ruby_llm/providers/dify/chat.rb +82 -7
- data/lib/ruby_llm/providers/dify/media.rb +2 -2
- data/lib/ruby_llm/providers/dify/streaming.rb +26 -4
- data/lib/ruby_llm/providers/dify.rb +4 -0
- data/lib/ruby_llm/providers/gemini/capabilities.rb +45 -207
- data/lib/ruby_llm/providers/gemini/chat.rb +88 -6
- data/lib/ruby_llm/providers/gemini/images.rb +1 -1
- data/lib/ruby_llm/providers/gemini/models.rb +2 -4
- data/lib/ruby_llm/providers/gemini/streaming.rb +34 -2
- data/lib/ruby_llm/providers/gemini/tools.rb +35 -3
- data/lib/ruby_llm/providers/gemini.rb +4 -0
- data/lib/ruby_llm/providers/gpustack/capabilities.rb +20 -0
- data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
- data/lib/ruby_llm/providers/gpustack.rb +8 -0
- data/lib/ruby_llm/providers/mistral/capabilities.rb +8 -0
- data/lib/ruby_llm/providers/mistral/chat.rb +59 -1
- data/lib/ruby_llm/providers/mistral.rb +4 -0
- data/lib/ruby_llm/providers/ollama/capabilities.rb +20 -0
- data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
- data/lib/ruby_llm/providers/ollama.rb +11 -1
- data/lib/ruby_llm/providers/openai/capabilities.rb +96 -192
- data/lib/ruby_llm/providers/openai/chat.rb +101 -7
- data/lib/ruby_llm/providers/openai/media.rb +5 -2
- data/lib/ruby_llm/providers/openai/models.rb +2 -4
- data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
- data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
- data/lib/ruby_llm/providers/openai/tools.rb +27 -2
- data/lib/ruby_llm/providers/openai.rb +11 -1
- data/lib/ruby_llm/providers/openrouter/chat.rb +168 -0
- data/lib/ruby_llm/providers/openrouter/images.rb +69 -0
- data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
- data/lib/ruby_llm/providers/openrouter.rb +37 -1
- data/lib/ruby_llm/providers/perplexity/capabilities.rb +34 -99
- data/lib/ruby_llm/providers/perplexity/models.rb +12 -14
- data/lib/ruby_llm/providers/perplexity.rb +4 -0
- data/lib/ruby_llm/providers/vertexai/models.rb +1 -1
- data/lib/ruby_llm/providers/vertexai.rb +23 -7
- data/lib/ruby_llm/providers/xai/chat.rb +15 -0
- data/lib/ruby_llm/providers/xai/models.rb +75 -0
- data/lib/ruby_llm/providers/xai.rb +32 -0
- data/lib/ruby_llm/stream_accumulator.rb +120 -18
- data/lib/ruby_llm/streaming.rb +60 -57
- data/lib/ruby_llm/thinking.rb +49 -0
- data/lib/ruby_llm/tokens.rb +47 -0
- data/lib/ruby_llm/tool.rb +48 -3
- data/lib/ruby_llm/tool_call.rb +6 -3
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/ruby_llm.rb +14 -8
- data/lib/tasks/models.rake +61 -22
- data/lib/tasks/release.rake +1 -1
- data/lib/tasks/ruby_llm.rake +9 -1
- data/lib/tasks/vcr.rake +33 -1
- metadata +67 -16
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +0 -13
- data/lib/ruby_llm/providers/bedrock/capabilities.rb +0 -167
- data/lib/ruby_llm/providers/bedrock/signing.rb +0 -831
- data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -51
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -71
- data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -67
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -80
- data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -78
|
@@ -3,100 +3,139 @@
|
|
|
3
3
|
module RubyLLM
|
|
4
4
|
module Providers
|
|
5
5
|
class Bedrock
|
|
6
|
-
# Models methods for
|
|
6
|
+
# Models methods for AWS Bedrock.
|
|
7
7
|
module Models
|
|
8
|
-
|
|
9
|
-
mgmt_api_base = "https://bedrock.#{@config.bedrock_region}.amazonaws.com"
|
|
10
|
-
full_models_url = "#{mgmt_api_base}/#{models_url}"
|
|
11
|
-
signature = sign_request(full_models_url, method: :get)
|
|
12
|
-
response = @connection.get(full_models_url) do |req|
|
|
13
|
-
req.headers.merge! signature.headers
|
|
14
|
-
end
|
|
8
|
+
module_function
|
|
15
9
|
|
|
16
|
-
|
|
17
|
-
end
|
|
10
|
+
REGION_PREFIXES = %w[global us eu ap sa ca me af il].freeze
|
|
18
11
|
|
|
19
|
-
|
|
12
|
+
def models_api_base
|
|
13
|
+
"https://bedrock.#{bedrock_region}.amazonaws.com"
|
|
14
|
+
end
|
|
20
15
|
|
|
21
16
|
def models_url
|
|
22
|
-
'foundation-models'
|
|
17
|
+
'/foundation-models'
|
|
23
18
|
end
|
|
24
19
|
|
|
25
|
-
def parse_list_models_response(response, slug,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
models.select { |m| m['modelId'].include?('claude') }.map do |model_data|
|
|
29
|
-
model_id = model_data['modelId']
|
|
30
|
-
|
|
31
|
-
Model::Info.new(
|
|
32
|
-
id: model_id_with_region(model_id, model_data),
|
|
33
|
-
name: model_data['modelName'] || capabilities.format_display_name(model_id),
|
|
34
|
-
provider: slug,
|
|
35
|
-
family: capabilities.model_family(model_id),
|
|
36
|
-
created_at: nil,
|
|
37
|
-
context_window: capabilities.context_window_for(model_id),
|
|
38
|
-
max_output_tokens: capabilities.max_tokens_for(model_id),
|
|
39
|
-
modalities: capabilities.modalities_for(model_id),
|
|
40
|
-
capabilities: capabilities.capabilities_for(model_id),
|
|
41
|
-
pricing: capabilities.pricing_for(model_id),
|
|
42
|
-
metadata: {
|
|
43
|
-
provider_name: model_data['providerName'],
|
|
44
|
-
inference_types: model_data['inferenceTypesSupported'] || [],
|
|
45
|
-
streaming_supported: model_data['responseStreamingSupported'] || false,
|
|
46
|
-
input_modalities: model_data['inputModalities'] || [],
|
|
47
|
-
output_modalities: model_data['outputModalities'] || []
|
|
48
|
-
}
|
|
49
|
-
)
|
|
20
|
+
def parse_list_models_response(response, slug, _capabilities)
|
|
21
|
+
Array(response.body['modelSummaries']).map do |model_data|
|
|
22
|
+
create_model_info(model_data, slug)
|
|
50
23
|
end
|
|
51
24
|
end
|
|
52
25
|
|
|
53
|
-
def create_model_info(model_data, slug, _capabilities)
|
|
54
|
-
model_id = model_data['modelId']
|
|
26
|
+
def create_model_info(model_data, slug, _capabilities = nil)
|
|
27
|
+
model_id = model_id_with_region(model_data['modelId'], model_data)
|
|
28
|
+
converse_data = model_data['converse'] || {}
|
|
55
29
|
|
|
56
30
|
Model::Info.new(
|
|
57
|
-
id:
|
|
58
|
-
name: model_data['modelName']
|
|
31
|
+
id: model_id,
|
|
32
|
+
name: model_data['modelName'],
|
|
59
33
|
provider: slug,
|
|
60
|
-
family: '
|
|
34
|
+
family: model_data['modelFamily'] || model_data['providerName']&.downcase,
|
|
61
35
|
created_at: nil,
|
|
62
|
-
context_window:
|
|
63
|
-
max_output_tokens:
|
|
64
|
-
modalities: {
|
|
65
|
-
|
|
36
|
+
context_window: parse_context_window(model_data),
|
|
37
|
+
max_output_tokens: converse_data['maxTokensDefault'] || converse_data['maxTokensMaximum'],
|
|
38
|
+
modalities: {
|
|
39
|
+
input: normalize_modalities(model_data['inputModalities']),
|
|
40
|
+
output: normalize_modalities(model_data['outputModalities'])
|
|
41
|
+
},
|
|
42
|
+
capabilities: parse_capabilities(model_data),
|
|
66
43
|
pricing: {},
|
|
67
|
-
metadata: {
|
|
44
|
+
metadata: {
|
|
45
|
+
provider_name: model_data['providerName'],
|
|
46
|
+
model_arn: model_data['modelArn'],
|
|
47
|
+
inference_types: model_data['inferenceTypesSupported'],
|
|
48
|
+
converse: converse_data
|
|
49
|
+
}
|
|
68
50
|
)
|
|
69
51
|
end
|
|
70
52
|
|
|
71
53
|
def model_id_with_region(model_id, model_data)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
model_data['inferenceTypesSupported'],
|
|
75
|
-
@config.bedrock_region
|
|
76
|
-
)
|
|
54
|
+
inference_types = Array(model_data['inferenceTypesSupported'])
|
|
55
|
+
normalize_inference_profile_id(model_id, inference_types, @config.bedrock_region)
|
|
77
56
|
end
|
|
78
57
|
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
return
|
|
58
|
+
def normalize_inference_profile_id(model_id, inference_types, region)
|
|
59
|
+
return model_id unless inference_types.include?('INFERENCE_PROFILE')
|
|
60
|
+
return model_id if inference_types.include?('ON_DEMAND')
|
|
82
61
|
|
|
83
|
-
|
|
62
|
+
with_region_prefix(model_id, region)
|
|
84
63
|
end
|
|
85
64
|
|
|
86
65
|
def with_region_prefix(model_id, region)
|
|
87
|
-
|
|
88
|
-
return model_id if model_id.start_with?("#{desired_prefix}.")
|
|
66
|
+
prefix = region_prefix(region)
|
|
89
67
|
|
|
90
|
-
|
|
91
|
-
|
|
68
|
+
if region_prefixed?(model_id)
|
|
69
|
+
model_id.sub(/\A(?:#{REGION_PREFIXES.join('|')})\./, "#{prefix}.")
|
|
70
|
+
else
|
|
71
|
+
"#{prefix}.#{model_id}"
|
|
72
|
+
end
|
|
92
73
|
end
|
|
93
74
|
|
|
94
|
-
def
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
75
|
+
def region_prefix(region)
|
|
76
|
+
prefix = region.to_s.split('-').first
|
|
77
|
+
prefix = '' if prefix.nil?
|
|
78
|
+
prefix.empty? ? 'us' : prefix
|
|
79
|
+
end
|
|
98
80
|
|
|
99
|
-
|
|
81
|
+
def region_prefixed?(model_id)
|
|
82
|
+
model_id.match?(/\A(?:#{REGION_PREFIXES.join('|')})\./)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def normalize_modalities(modalities)
|
|
86
|
+
Array(modalities).map do |modality|
|
|
87
|
+
normalized = modality.to_s.downcase
|
|
88
|
+
case normalized
|
|
89
|
+
when 'embedding' then 'embeddings'
|
|
90
|
+
when 'speech' then 'audio'
|
|
91
|
+
else normalized
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def parse_capabilities(model_data)
|
|
97
|
+
capabilities = []
|
|
98
|
+
capabilities << 'streaming' if model_data['responseStreamingSupported']
|
|
99
|
+
|
|
100
|
+
converse = model_data['converse'] || {}
|
|
101
|
+
capabilities << 'function_calling' if converse.is_a?(Hash)
|
|
102
|
+
capabilities << 'reasoning' if converse.dig('reasoningSupported', 'embedded')
|
|
103
|
+
capabilities << 'structured_output' if supports_structured_output?(model_data['modelId'])
|
|
104
|
+
|
|
105
|
+
capabilities
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Structured output supported on Claude 4.5+ and assumed for future major versions.
|
|
109
|
+
# Bedrock IDs look like: us.anthropic.claude-haiku-4-5-20251001-v1:0
|
|
110
|
+
# Must handle optional region prefix (us./eu./global.) and anthropic. prefix.
|
|
111
|
+
def supports_structured_output?(model_id)
|
|
112
|
+
return false unless model_id
|
|
113
|
+
|
|
114
|
+
normalized = model_id.sub(/\A(?:#{REGION_PREFIXES.join('|')})\./, '').delete_prefix('anthropic.')
|
|
115
|
+
match = normalized.match(/claude-(?:opus|sonnet|haiku)-(\d+)-(\d{1,2})(?:\b|-)/)
|
|
116
|
+
return false unless match
|
|
117
|
+
|
|
118
|
+
major = match[1].to_i
|
|
119
|
+
minor = match[2].to_i
|
|
120
|
+
major > 4 || (major == 4 && minor >= 5)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def reasoning_embedded?(model)
|
|
124
|
+
metadata = RubyLLM::Utils.deep_symbolize_keys(model.metadata || {})
|
|
125
|
+
converse = metadata[:converse] || {}
|
|
126
|
+
reasoning_supported = converse[:reasoningSupported] || {}
|
|
127
|
+
reasoning_supported[:embedded] || false
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def parse_context_window(model_data)
|
|
131
|
+
value = model_data.dig('description', 'maxContextWindow')
|
|
132
|
+
return unless value.is_a?(String)
|
|
133
|
+
|
|
134
|
+
if value.match?(/\A\d+[kK]\z/)
|
|
135
|
+
value.to_i * 1000
|
|
136
|
+
elsif value.match?(/\A\d+\z/)
|
|
137
|
+
value.to_i
|
|
138
|
+
end
|
|
100
139
|
end
|
|
101
140
|
end
|
|
102
141
|
end
|
|
@@ -1,17 +1,318 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative 'streaming/base'
|
|
4
|
-
require_relative 'streaming/content_extraction'
|
|
5
|
-
require_relative 'streaming/message_processing'
|
|
6
|
-
require_relative 'streaming/payload_processing'
|
|
7
|
-
require_relative 'streaming/prelude_handling'
|
|
8
|
-
|
|
9
3
|
module RubyLLM
|
|
10
4
|
module Providers
|
|
11
5
|
class Bedrock
|
|
12
|
-
# Streaming implementation for
|
|
6
|
+
# Streaming implementation for Bedrock ConverseStream (AWS Event Stream).
|
|
13
7
|
module Streaming
|
|
14
|
-
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def stream_url
|
|
11
|
+
"/model/#{@model.id}/converse-stream"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def stream_response(connection, payload, additional_headers = {}, &block)
|
|
15
|
+
accumulator = StreamAccumulator.new
|
|
16
|
+
decoder = event_stream_decoder
|
|
17
|
+
request_payload = api_payload(payload)
|
|
18
|
+
body = JSON.generate(request_payload)
|
|
19
|
+
|
|
20
|
+
response = connection.post(stream_url, request_payload) do |req|
|
|
21
|
+
req.headers.merge!(sign_headers('POST', stream_url, body))
|
|
22
|
+
req.headers.merge!(additional_headers) unless additional_headers.empty?
|
|
23
|
+
req.headers['Accept'] = 'application/vnd.amazon.eventstream'
|
|
24
|
+
|
|
25
|
+
if Faraday::VERSION.start_with?('1')
|
|
26
|
+
req.options[:on_data] = proc do |chunk, _size|
|
|
27
|
+
parse_stream_chunk(decoder, chunk, accumulator, &block)
|
|
28
|
+
end
|
|
29
|
+
else
|
|
30
|
+
req.options.on_data = proc do |chunk, _bytes, env|
|
|
31
|
+
if env&.status == 200
|
|
32
|
+
parse_stream_chunk(decoder, chunk, accumulator, &block)
|
|
33
|
+
else
|
|
34
|
+
handle_failed_stream(chunk, env)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
message = accumulator.to_message(response)
|
|
41
|
+
RubyLLM.logger.debug { "Stream completed: #{message.content}" }
|
|
42
|
+
message
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def event_stream_decoder
|
|
46
|
+
require 'aws-eventstream'
|
|
47
|
+
Aws::EventStream::Decoder.new
|
|
48
|
+
rescue LoadError
|
|
49
|
+
raise Error,
|
|
50
|
+
'The aws-eventstream gem is required for Bedrock streaming. ' \
|
|
51
|
+
'Please add it to your Gemfile: gem "aws-eventstream"'
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def handle_failed_stream(chunk, env)
|
|
55
|
+
data = JSON.parse(chunk)
|
|
56
|
+
error_response = env.merge(body: data)
|
|
57
|
+
ErrorMiddleware.parse_error(provider: self, response: error_response)
|
|
58
|
+
rescue JSON::ParserError
|
|
59
|
+
RubyLLM.logger.debug { "Failed Bedrock stream error chunk: #{chunk}" }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def parse_stream_chunk(decoder, raw_chunk, accumulator)
|
|
63
|
+
handle_non_eventstream_error_chunk(raw_chunk)
|
|
64
|
+
|
|
65
|
+
decode_events(decoder, raw_chunk).each do |event|
|
|
66
|
+
chunk = build_chunk(event)
|
|
67
|
+
next unless chunk
|
|
68
|
+
|
|
69
|
+
accumulator.add(chunk)
|
|
70
|
+
yield chunk
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def handle_non_eventstream_error_chunk(raw_chunk)
|
|
75
|
+
text = raw_chunk.to_s
|
|
76
|
+
|
|
77
|
+
if text.start_with?('event: error')
|
|
78
|
+
payload = text.lines.find { |line| line.start_with?('data:') }&.delete_prefix('data:')&.strip
|
|
79
|
+
raise_streaming_chunk_error(payload) if payload
|
|
80
|
+
return
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
return unless text.lstrip.start_with?('{') && text.include?('"error"')
|
|
84
|
+
|
|
85
|
+
raise_streaming_chunk_error(text)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def raise_streaming_chunk_error(payload)
|
|
89
|
+
parsed = JSON.parse(payload)
|
|
90
|
+
message = parsed.dig('error', 'message') || parsed['message'] || 'Bedrock streaming error'
|
|
91
|
+
response = Struct.new(:body, :status).new({ 'message' => message }, 500)
|
|
92
|
+
ErrorMiddleware.parse_error(provider: self, response: response)
|
|
93
|
+
rescue JSON::ParserError
|
|
94
|
+
nil
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def decode_events(decoder, raw_chunk)
|
|
98
|
+
events = []
|
|
99
|
+
message, eof = decoder.decode_chunk(raw_chunk)
|
|
100
|
+
|
|
101
|
+
while message
|
|
102
|
+
event = decode_event_payload(message.payload.read)
|
|
103
|
+
if event && RubyLLM.config.log_stream_debug
|
|
104
|
+
RubyLLM.logger.debug do
|
|
105
|
+
"Bedrock stream event keys: #{event.keys}"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
events << event if event
|
|
109
|
+
break if eof
|
|
110
|
+
|
|
111
|
+
message, eof = decoder.decode_chunk
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
events
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def decode_event_payload(payload)
|
|
118
|
+
outer = JSON.parse(payload)
|
|
119
|
+
|
|
120
|
+
if outer['bytes'].is_a?(String)
|
|
121
|
+
JSON.parse(Base64.decode64(outer['bytes']))
|
|
122
|
+
else
|
|
123
|
+
outer
|
|
124
|
+
end
|
|
125
|
+
rescue JSON::ParserError => e
|
|
126
|
+
RubyLLM.logger.debug { "Failed to decode Bedrock stream event payload: #{e.message}" }
|
|
127
|
+
nil
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def build_chunk(event)
|
|
131
|
+
raise_stream_error(event) if stream_error_event?(event)
|
|
132
|
+
|
|
133
|
+
metadata_usage, usage, message_usage = event_usage(event)
|
|
134
|
+
|
|
135
|
+
Chunk.new(
|
|
136
|
+
role: :assistant,
|
|
137
|
+
model_id: event['modelId'] || event.dig('message', 'model') || @model&.id,
|
|
138
|
+
content: extract_content_delta(event),
|
|
139
|
+
thinking: Thinking.build(
|
|
140
|
+
text: extract_thinking_delta(event),
|
|
141
|
+
signature: extract_thinking_signature(event)
|
|
142
|
+
),
|
|
143
|
+
tool_calls: extract_tool_calls(event),
|
|
144
|
+
input_tokens: extract_input_tokens(metadata_usage, usage, message_usage),
|
|
145
|
+
output_tokens: extract_output_tokens(metadata_usage, usage),
|
|
146
|
+
cached_tokens: extract_cached_tokens(metadata_usage, usage),
|
|
147
|
+
cache_creation_tokens: extract_cache_creation_tokens(metadata_usage, usage),
|
|
148
|
+
thinking_tokens: extract_reasoning_tokens(metadata_usage, usage)
|
|
149
|
+
)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def event_usage(event)
|
|
153
|
+
[
|
|
154
|
+
event.dig('metadata', 'usage') || {},
|
|
155
|
+
event['usage'] || {},
|
|
156
|
+
event.dig('message', 'usage') || {}
|
|
157
|
+
]
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def extract_input_tokens(metadata_usage, usage, message_usage)
|
|
161
|
+
metadata_usage['inputTokens'] || usage['inputTokens'] || message_usage['input_tokens']
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def extract_output_tokens(metadata_usage, usage)
|
|
165
|
+
metadata_usage['outputTokens'] || usage['outputTokens'] || usage['output_tokens']
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def extract_cached_tokens(metadata_usage, usage)
|
|
169
|
+
metadata_usage['cacheReadInputTokens'] || usage['cacheReadInputTokens'] || usage['cache_read_input_tokens']
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def extract_cache_creation_tokens(metadata_usage, usage)
|
|
173
|
+
metadata_usage['cacheWriteInputTokens'] || usage['cacheWriteInputTokens'] ||
|
|
174
|
+
usage['cache_creation_input_tokens']
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def extract_reasoning_tokens(metadata_usage, usage)
|
|
178
|
+
metadata_usage['reasoningTokens'] || usage['reasoningTokens'] ||
|
|
179
|
+
usage.dig('output_tokens_details', 'thinking_tokens')
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def stream_error_event?(event)
|
|
183
|
+
event.keys.any? { |key| key.end_with?('Exception') } || event['type'] == 'error'
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def raise_stream_error(event)
|
|
187
|
+
if event['type'] == 'error'
|
|
188
|
+
message = event.dig('error', 'message') || 'Bedrock streaming error'
|
|
189
|
+
response = Struct.new(:body, :status).new({ 'message' => message }, 500)
|
|
190
|
+
ErrorMiddleware.parse_error(provider: self, response: response)
|
|
191
|
+
return
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
key = event.keys.find { |candidate| candidate.end_with?('Exception') }
|
|
195
|
+
payload = event[key]
|
|
196
|
+
message = payload['message'] || key
|
|
197
|
+
status = case key
|
|
198
|
+
when 'throttlingException' then 429
|
|
199
|
+
when 'validationException' then 400
|
|
200
|
+
when 'accessDeniedException', 'unrecognizedClientException' then 401
|
|
201
|
+
when 'serviceUnavailableException' then 503
|
|
202
|
+
else 500
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
response = Struct.new(:body, :status).new({ 'message' => message }, status)
|
|
206
|
+
ErrorMiddleware.parse_error(provider: self, response: response)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def extract_content_delta(event)
|
|
210
|
+
delta = normalized_delta(event)
|
|
211
|
+
return delta['text'] if delta['text']
|
|
212
|
+
|
|
213
|
+
return event.dig('delta', 'text') if event.dig('delta', 'type') == 'text_delta'
|
|
214
|
+
|
|
215
|
+
nil
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def extract_thinking_delta(event)
|
|
219
|
+
delta = normalized_delta(event)
|
|
220
|
+
reasoning_content = delta['reasoningContent'] || {}
|
|
221
|
+
|
|
222
|
+
reasoning_text = reasoning_content['reasoningText'] || {}
|
|
223
|
+
return reasoning_text['text'] if reasoning_text['text']
|
|
224
|
+
return event.dig('delta', 'thinking') if event.dig('delta', 'type') == 'thinking_delta'
|
|
225
|
+
|
|
226
|
+
nil
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def extract_thinking_signature(event)
|
|
230
|
+
signature = extract_signature_from_delta(event)
|
|
231
|
+
return signature if signature
|
|
232
|
+
|
|
233
|
+
signature = extract_signature_from_start(event)
|
|
234
|
+
return signature if signature
|
|
235
|
+
|
|
236
|
+
nil
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def extract_signature_from_delta(event)
|
|
240
|
+
delta = normalized_delta(event)
|
|
241
|
+
reasoning_content = delta['reasoningContent'] || {}
|
|
242
|
+
reasoning_text = reasoning_content['reasoningText'] || {}
|
|
243
|
+
return reasoning_text['signature'] if reasoning_text['signature']
|
|
244
|
+
return event.dig('delta', 'signature') if event.dig('delta', 'type') == 'signature_delta'
|
|
245
|
+
|
|
246
|
+
nil
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def extract_signature_from_start(event)
|
|
250
|
+
start = event.dig('contentBlockStart', 'start', 'reasoningContent')
|
|
251
|
+
return nil unless start
|
|
252
|
+
|
|
253
|
+
reasoning_text = start['reasoningText'] || {}
|
|
254
|
+
return reasoning_text['signature'] if reasoning_text['signature']
|
|
255
|
+
return start['redactedContent'] if start['redactedContent']
|
|
256
|
+
|
|
257
|
+
nil
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def extract_tool_calls(event)
|
|
261
|
+
return extract_tool_call_start(event) if tool_call_start_event?(event)
|
|
262
|
+
return extract_tool_call_delta(event) if tool_call_delta_event?(event)
|
|
263
|
+
|
|
264
|
+
nil
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def tool_call_start_event?(event)
|
|
268
|
+
event['contentBlockStart'] || event['start'] || event.dig('content_block', 'tool_use')
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def tool_call_delta_event?(event)
|
|
272
|
+
event['contentBlockDelta'] || event.dig('delta', 'toolUse') || event.dig('delta', 'tool_use') ||
|
|
273
|
+
event.dig('delta', 'partial_json')
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def extract_tool_call_start(event)
|
|
277
|
+
tool_use = event.dig('contentBlockStart', 'start', 'toolUse')
|
|
278
|
+
tool_use ||= event.dig('start', 'toolUse')
|
|
279
|
+
tool_use ||= event.dig('content_block', 'tool_use') if event['type'] == 'content_block_start'
|
|
280
|
+
return nil unless tool_use
|
|
281
|
+
|
|
282
|
+
tool_use_id = tool_use['toolUseId'] || tool_use['id']
|
|
283
|
+
tool_name = tool_use['name']
|
|
284
|
+
tool_input = tool_use['input'] || {}
|
|
285
|
+
|
|
286
|
+
{
|
|
287
|
+
tool_use_id => ToolCall.new(
|
|
288
|
+
id: tool_use_id,
|
|
289
|
+
name: tool_name,
|
|
290
|
+
arguments: tool_input
|
|
291
|
+
)
|
|
292
|
+
}
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def extract_tool_call_delta(event)
|
|
296
|
+
input = normalized_delta(event).dig('toolUse', 'input')
|
|
297
|
+
input ||= normalized_delta(event).dig('tool_use', 'input')
|
|
298
|
+
input ||= event.dig('delta', 'partial_json') if event.dig('delta', 'type') == 'input_json_delta'
|
|
299
|
+
return nil unless input
|
|
300
|
+
|
|
301
|
+
{ nil => ToolCall.new(id: nil, name: nil, arguments: input) }
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def normalized_delta(event)
|
|
305
|
+
delta = event.dig('contentBlockDelta', 'delta') || event['delta'] || {}
|
|
306
|
+
return delta if delta.is_a?(Hash)
|
|
307
|
+
|
|
308
|
+
if delta.is_a?(String) && !delta.empty?
|
|
309
|
+
JSON.parse(delta)
|
|
310
|
+
else
|
|
311
|
+
{}
|
|
312
|
+
end
|
|
313
|
+
rescue JSON::ParserError
|
|
314
|
+
{}
|
|
315
|
+
end
|
|
15
316
|
end
|
|
16
317
|
end
|
|
17
318
|
end
|
|
@@ -1,82 +1,99 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require 'openssl'
|
|
4
|
-
require 'time'
|
|
5
|
-
|
|
6
3
|
module RubyLLM
|
|
7
4
|
module Providers
|
|
8
|
-
# AWS Bedrock API integration.
|
|
5
|
+
# AWS Bedrock Converse API integration.
|
|
9
6
|
class Bedrock < Provider
|
|
7
|
+
include Bedrock::Auth
|
|
10
8
|
include Bedrock::Chat
|
|
11
|
-
include Bedrock::Streaming
|
|
12
|
-
include Bedrock::Models
|
|
13
|
-
include Bedrock::Signing
|
|
14
9
|
include Bedrock::Media
|
|
15
|
-
include
|
|
10
|
+
include Bedrock::Models
|
|
11
|
+
include Bedrock::Streaming
|
|
16
12
|
|
|
17
13
|
def api_base
|
|
18
|
-
"https://bedrock-runtime.#{
|
|
14
|
+
"https://bedrock-runtime.#{bedrock_region}.amazonaws.com"
|
|
19
15
|
end
|
|
20
16
|
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
body = try_parse_json(response.body)
|
|
25
|
-
case body
|
|
26
|
-
when Hash
|
|
27
|
-
body['message']
|
|
28
|
-
when Array
|
|
29
|
-
body.map do |part|
|
|
30
|
-
part['message']
|
|
31
|
-
end.join('. ')
|
|
32
|
-
else
|
|
33
|
-
body
|
|
34
|
-
end
|
|
17
|
+
def headers
|
|
18
|
+
{}
|
|
35
19
|
end
|
|
36
20
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
end
|
|
21
|
+
# rubocop:disable Metrics/ParameterLists
|
|
22
|
+
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil,
|
|
23
|
+
tool_prefs: nil, &)
|
|
24
|
+
normalized_params = normalize_params(params, model:)
|
|
42
25
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
26
|
+
super(
|
|
27
|
+
messages,
|
|
28
|
+
tools: tools,
|
|
29
|
+
tool_prefs: tool_prefs,
|
|
30
|
+
temperature: temperature,
|
|
31
|
+
model: model,
|
|
32
|
+
params: normalized_params,
|
|
33
|
+
headers: headers,
|
|
34
|
+
schema: schema,
|
|
35
|
+
thinking: thinking,
|
|
36
|
+
&
|
|
37
|
+
)
|
|
51
38
|
end
|
|
39
|
+
# rubocop:enable Metrics/ParameterLists
|
|
52
40
|
|
|
53
|
-
def
|
|
54
|
-
|
|
55
|
-
connection: @connection,
|
|
56
|
-
http_method: method,
|
|
57
|
-
url: url || completion_url,
|
|
58
|
-
body: payload ? JSON.generate(payload, ascii_only: false) : nil
|
|
59
|
-
}
|
|
60
|
-
end
|
|
41
|
+
def parse_error(response)
|
|
42
|
+
return if response.body.nil? || response.body.empty?
|
|
61
43
|
|
|
62
|
-
|
|
63
|
-
|
|
44
|
+
body = try_parse_json(response.body)
|
|
45
|
+
return body if body.is_a?(String)
|
|
64
46
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
47
|
+
body['message'] || body['Message'] || body['error'] || body['__type'] || super
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def list_models
|
|
51
|
+
response = signed_get(models_api_base, models_url)
|
|
52
|
+
parse_list_models_response(response, slug, capabilities)
|
|
69
53
|
end
|
|
70
54
|
|
|
71
55
|
class << self
|
|
72
|
-
def
|
|
73
|
-
|
|
56
|
+
def configuration_options
|
|
57
|
+
%i[bedrock_api_key bedrock_secret_key bedrock_region bedrock_session_token]
|
|
74
58
|
end
|
|
75
59
|
|
|
76
60
|
def configuration_requirements
|
|
77
61
|
%i[bedrock_api_key bedrock_secret_key bedrock_region]
|
|
78
62
|
end
|
|
79
63
|
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def bedrock_region
|
|
68
|
+
@config.bedrock_region
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def sync_response(connection, payload, additional_headers = {})
|
|
72
|
+
signed_post(connection, completion_url, payload, additional_headers)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def normalize_params(params, model:)
|
|
76
|
+
normalized = RubyLLM::Utils.deep_symbolize_keys(params || {})
|
|
77
|
+
additional_fields = normalized[:additionalModelRequestFields] || {}
|
|
78
|
+
|
|
79
|
+
top_k = normalized.delete(:top_k)
|
|
80
|
+
if !top_k.nil? && model_supports_top_k?(model)
|
|
81
|
+
additional_fields = RubyLLM::Utils.deep_merge(additional_fields, { top_k: top_k })
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
normalized[:additionalModelRequestFields] = additional_fields unless additional_fields.empty?
|
|
85
|
+
normalized
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def model_supports_top_k?(model)
|
|
89
|
+
Bedrock::Models.reasoning_embedded?(model)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def api_payload(payload)
|
|
93
|
+
cleaned = RubyLLM::Utils.deep_symbolize_keys(RubyLLM::Utils.deep_dup(payload))
|
|
94
|
+
cleaned.delete(:tools)
|
|
95
|
+
cleaned
|
|
96
|
+
end
|
|
80
97
|
end
|
|
81
98
|
end
|
|
82
99
|
end
|