ruby_llm_community 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/ruby_llm/chat.rb +7 -0
- data/lib/ruby_llm/configuration.rb +0 -2
- data/lib/ruby_llm/message.rb +7 -2
- data/lib/ruby_llm/models.rb +0 -4
- data/lib/ruby_llm/provider.rb +3 -1
- data/lib/ruby_llm/providers/anthropic/chat.rb +34 -20
- data/lib/ruby_llm/providers/anthropic/media.rb +70 -46
- data/lib/ruby_llm/providers/anthropic/models.rb +8 -0
- data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
- data/lib/ruby_llm/providers/bedrock/chat.rb +21 -10
- data/lib/ruby_llm/providers/bedrock/media.rb +37 -25
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +8 -0
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +2 -0
- data/lib/ruby_llm/providers/gemini/chat.rb +9 -2
- data/lib/ruby_llm/providers/gemini/streaming.rb +9 -1
- data/lib/ruby_llm/providers/mistral/chat.rb +1 -1
- data/lib/ruby_llm/providers/openai/chat.rb +2 -1
- data/lib/ruby_llm/providers/openai/response.rb +2 -1
- data/lib/ruby_llm/providers/openai/streaming.rb +2 -1
- data/lib/ruby_llm/providers/openai.rb +3 -3
- data/lib/ruby_llm/stream_accumulator.rb +6 -0
- data/lib/ruby_llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a8638f12f0c6d0e811f078bdf311d3dde1bc969f6881ed64cf1a5133256b574
|
4
|
+
data.tar.gz: 4bba1ef73b4624fca8ef83c4cd661d9bdb3a15f209da11a3a962cab24b4fdb80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e083eaa68a50b78854d780b3ddd2dc2467652aceb62fd9633b1b9a75f48b89bb8ad885800f65cb376b4f2997336284d27baf3508f3fe2bbfe920cac064f85df5
|
7
|
+
data.tar.gz: c068573609da8e4755201536aea1084b08b819e6d928c8624090f3a20133e253a86688d4ad5db206254be86c5795d66e3f12ffe945e308dc5cf24af479c8d403
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
**One *beautiful* Ruby API for GPT, Claude, Gemini, and more.** Easily build chatbots, AI agents, RAG applications, and content generators. Features chat (text, images, audio, PDFs), image generation, embeddings, tools (function calling), structured output, Rails integration, and streaming. Works with OpenAI, Anthropic, Google Gemini, AWS Bedrock, DeepSeek, Mistral, Ollama (local models), OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API.
|
7
7
|
|
8
8
|
<div class="badge-container">
|
9
|
-
<a href="https://badge.fury.io/rb/ruby_llm"><img src="https://badge.fury.io/rb/ruby_llm.svg?a=
|
9
|
+
<a href="https://badge.fury.io/rb/ruby_llm"><img src="https://badge.fury.io/rb/ruby_llm.svg?a=5" alt="Gem Version" /></a>
|
10
10
|
<a href="https://github.com/testdouble/standard"><img src="https://img.shields.io/badge/code_style-standard-brightgreen.svg" alt="Ruby Style Guide" /></a>
|
11
11
|
<a href="https://rubygems.org/gems/ruby_llm"><img alt="Gem Downloads" src="https://img.shields.io/gem/dt/ruby_llm"></a>
|
12
12
|
<a href="https://codecov.io/gh/crmne/ruby_llm"><img src="https://codecov.io/gh/crmne/ruby_llm/branch/main/graph/badge.svg" alt="codecov" /></a>
|
data/lib/ruby_llm/chat.rb
CHANGED
@@ -25,6 +25,7 @@ module RubyLLM
|
|
25
25
|
@temperature = 0.7
|
26
26
|
@messages = []
|
27
27
|
@tools = {}
|
28
|
+
@cache_prompts = { system: false, user: false, tools: false }
|
28
29
|
@params = {}
|
29
30
|
@headers = {}
|
30
31
|
@schema = nil
|
@@ -127,12 +128,18 @@ module RubyLLM
|
|
127
128
|
messages.each(&)
|
128
129
|
end
|
129
130
|
|
131
|
+
def cache_prompts(system: false, user: false, tools: false)
|
132
|
+
@cache_prompts = { system: system, user: user, tools: tools }
|
133
|
+
self
|
134
|
+
end
|
135
|
+
|
130
136
|
def complete(&) # rubocop:disable Metrics/PerceivedComplexity
|
131
137
|
response = @provider.complete(
|
132
138
|
messages,
|
133
139
|
tools: @tools,
|
134
140
|
temperature: @temperature,
|
135
141
|
model: @model.id,
|
142
|
+
cache_prompts: @cache_prompts.dup,
|
136
143
|
params: @params,
|
137
144
|
headers: @headers,
|
138
145
|
schema: @schema,
|
@@ -44,7 +44,6 @@ module RubyLLM
|
|
44
44
|
:logger,
|
45
45
|
:log_file,
|
46
46
|
:log_level,
|
47
|
-
:log_assume_model_exists,
|
48
47
|
:log_stream_debug
|
49
48
|
|
50
49
|
def initialize
|
@@ -64,7 +63,6 @@ module RubyLLM
|
|
64
63
|
# Logging configuration
|
65
64
|
@log_file = $stdout
|
66
65
|
@log_level = ENV['RUBYLLM_DEBUG'] ? Logger::DEBUG : Logger::INFO
|
67
|
-
@log_assume_model_exists = true
|
68
66
|
@log_stream_debug = ENV['RUBYLLM_STREAM_DEBUG'] == 'true'
|
69
67
|
end
|
70
68
|
|
data/lib/ruby_llm/message.rb
CHANGED
@@ -7,7 +7,8 @@ module RubyLLM
|
|
7
7
|
class Message
|
8
8
|
ROLES = %i[system user assistant tool].freeze
|
9
9
|
|
10
|
-
attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw
|
10
|
+
attr_reader :role, :tool_calls, :tool_call_id, :input_tokens, :output_tokens, :model_id, :raw,
|
11
|
+
:cached_tokens, :cache_creation_tokens
|
11
12
|
attr_writer :content
|
12
13
|
|
13
14
|
def initialize(options = {})
|
@@ -18,6 +19,8 @@ module RubyLLM
|
|
18
19
|
@output_tokens = options[:output_tokens]
|
19
20
|
@model_id = options[:model_id]
|
20
21
|
@tool_call_id = options[:tool_call_id]
|
22
|
+
@cached_tokens = options[:cached_tokens]
|
23
|
+
@cache_creation_tokens = options[:cache_creation_tokens]
|
21
24
|
@raw = options[:raw]
|
22
25
|
|
23
26
|
ensure_valid_role
|
@@ -51,7 +54,9 @@ module RubyLLM
|
|
51
54
|
tool_call_id: tool_call_id,
|
52
55
|
input_tokens: input_tokens,
|
53
56
|
output_tokens: output_tokens,
|
54
|
-
model_id: model_id
|
57
|
+
model_id: model_id,
|
58
|
+
cache_creation_tokens: cache_creation_tokens,
|
59
|
+
cached_tokens: cached_tokens
|
55
60
|
}.compact
|
56
61
|
end
|
57
62
|
|
data/lib/ruby_llm/models.rb
CHANGED
@@ -70,10 +70,6 @@ module RubyLLM
|
|
70
70
|
modalities: { input: %w[text image], output: %w[text] },
|
71
71
|
metadata: { warning: 'Assuming model exists, capabilities may not be accurate' }
|
72
72
|
)
|
73
|
-
if RubyLLM.config.log_assume_model_exists
|
74
|
-
RubyLLM.logger.warn "Assuming model '#{model_id}' exists for provider '#{provider}'. " \
|
75
|
-
'Capabilities may not be accurately reflected.'
|
76
|
-
end
|
77
73
|
else
|
78
74
|
model = Models.find model_id, provider
|
79
75
|
provider_class = Provider.providers[model.provider.to_sym] || raise(Error,
|
data/lib/ruby_llm/provider.rb
CHANGED
@@ -40,7 +40,8 @@ module RubyLLM
|
|
40
40
|
self.class.configuration_requirements
|
41
41
|
end
|
42
42
|
|
43
|
-
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil,
|
43
|
+
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, # rubocop:disable Metrics/ParameterLists
|
44
|
+
cache_prompts: { system: false, user: false, tools: false }, &)
|
44
45
|
normalized_temperature = maybe_normalize_temperature(temperature, model)
|
45
46
|
|
46
47
|
payload = Utils.deep_merge(
|
@@ -50,6 +51,7 @@ module RubyLLM
|
|
50
51
|
tools: tools,
|
51
52
|
temperature: normalized_temperature,
|
52
53
|
model: model,
|
54
|
+
cache_prompts: cache_prompts,
|
53
55
|
stream: block_given?,
|
54
56
|
schema: schema
|
55
57
|
)
|
@@ -11,12 +11,14 @@ module RubyLLM
|
|
11
11
|
'/v1/messages'
|
12
12
|
end
|
13
13
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
15
|
+
cache_prompts: { system: false, user: false, tools: false })
|
15
16
|
system_messages, chat_messages = separate_messages(messages)
|
16
|
-
system_content = build_system_content(system_messages)
|
17
|
+
system_content = build_system_content(system_messages, cache: cache_prompts[:system])
|
17
18
|
|
18
|
-
build_base_payload(chat_messages, model, stream).tap do |payload|
|
19
|
-
add_optional_fields(payload, system_content:, tools:, temperature
|
19
|
+
build_base_payload(chat_messages, model, stream, cache: cache_prompts[:user]).tap do |payload|
|
20
|
+
add_optional_fields(payload, system_content:, tools:, temperature:,
|
21
|
+
cache_tools: cache_prompts[:tools])
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
@@ -24,28 +26,34 @@ module RubyLLM
|
|
24
26
|
messages.partition { |msg| msg.role == :system }
|
25
27
|
end
|
26
28
|
|
27
|
-
def build_system_content(system_messages)
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
'Multiple system messages will be combined into one.'
|
32
|
-
)
|
29
|
+
def build_system_content(system_messages, cache: false)
|
30
|
+
system_messages.flat_map.with_index do |msg, idx|
|
31
|
+
message_cache = cache if idx == system_messages.size - 1
|
32
|
+
format_system_message(msg, cache: message_cache)
|
33
33
|
end
|
34
|
-
|
35
|
-
system_messages.map(&:content).join("\n\n")
|
36
34
|
end
|
37
35
|
|
38
|
-
def build_base_payload(chat_messages, model, stream)
|
36
|
+
def build_base_payload(chat_messages, model, stream, cache: false)
|
37
|
+
messages = chat_messages.map.with_index do |msg, idx|
|
38
|
+
message_cache = cache if idx == chat_messages.size - 1
|
39
|
+
format_message(msg, cache: message_cache)
|
40
|
+
end
|
41
|
+
|
39
42
|
{
|
40
43
|
model: model,
|
41
|
-
messages
|
44
|
+
messages:,
|
42
45
|
stream: stream,
|
43
46
|
max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
|
44
47
|
}
|
45
48
|
end
|
46
49
|
|
47
|
-
def add_optional_fields(payload, system_content:, tools:, temperature:)
|
48
|
-
|
50
|
+
def add_optional_fields(payload, system_content:, tools:, temperature:, cache_tools: false)
|
51
|
+
if tools.any?
|
52
|
+
tool_definitions = tools.values.map { |t| Tools.function_for(t) }
|
53
|
+
tool_definitions[-1][:cache_control] = { type: 'ephemeral' } if cache_tools
|
54
|
+
payload[:tools] = tool_definitions
|
55
|
+
end
|
56
|
+
|
49
57
|
payload[:system] = system_content unless system_content.empty?
|
50
58
|
payload[:temperature] = temperature unless temperature.nil?
|
51
59
|
end
|
@@ -73,24 +81,30 @@ module RubyLLM
|
|
73
81
|
input_tokens: data.dig('usage', 'input_tokens'),
|
74
82
|
output_tokens: data.dig('usage', 'output_tokens'),
|
75
83
|
model_id: data['model'],
|
84
|
+
cache_creation_tokens: data.dig('usage', 'cache_creation_input_tokens'),
|
85
|
+
cached_tokens: data.dig('usage', 'cache_read_input_tokens'),
|
76
86
|
raw: response
|
77
87
|
)
|
78
88
|
end
|
79
89
|
|
80
|
-
def format_message(msg)
|
90
|
+
def format_message(msg, cache: false)
|
81
91
|
if msg.tool_call?
|
82
92
|
Tools.format_tool_call(msg)
|
83
93
|
elsif msg.tool_result?
|
84
94
|
Tools.format_tool_result(msg)
|
85
95
|
else
|
86
|
-
format_basic_message(msg)
|
96
|
+
format_basic_message(msg, cache:)
|
87
97
|
end
|
88
98
|
end
|
89
99
|
|
90
|
-
def
|
100
|
+
def format_system_message(msg, cache: false)
|
101
|
+
Media.format_content(msg.content, cache:)
|
102
|
+
end
|
103
|
+
|
104
|
+
def format_basic_message(msg, cache: false)
|
91
105
|
{
|
92
106
|
role: convert_role(msg.role),
|
93
|
-
content: Media.format_content(msg.content)
|
107
|
+
content: Media.format_content(msg.content, cache:)
|
94
108
|
}
|
95
109
|
end
|
96
110
|
|
@@ -7,13 +7,13 @@ module RubyLLM
|
|
7
7
|
module Media
|
8
8
|
module_function
|
9
9
|
|
10
|
-
def format_content(content)
|
10
|
+
def format_content(content, cache: false)
|
11
11
|
# Convert Hash/Array back to JSON string for API
|
12
|
-
return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
|
13
|
-
return [format_text(content)] unless content.is_a?(Content)
|
12
|
+
return [format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
|
13
|
+
return [format_text(content, cache:)] unless content.is_a?(Content)
|
14
14
|
|
15
15
|
parts = []
|
16
|
-
parts << format_text(content.text) if content.text
|
16
|
+
parts << format_text(content.text, cache:) if content.text
|
17
17
|
|
18
18
|
content.attachments.each do |attachment|
|
19
19
|
case attachment.type
|
@@ -31,60 +31,84 @@ module RubyLLM
|
|
31
31
|
parts
|
32
32
|
end
|
33
33
|
|
34
|
-
def format_text(text)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
def format_text(text, cache: false)
|
35
|
+
with_cache_control(
|
36
|
+
{
|
37
|
+
type: 'text',
|
38
|
+
text: text
|
39
|
+
},
|
40
|
+
cache:
|
41
|
+
)
|
39
42
|
end
|
40
43
|
|
41
|
-
def format_image(image)
|
44
|
+
def format_image(image, cache: false)
|
42
45
|
if image.url?
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
with_cache_control(
|
47
|
+
{
|
48
|
+
type: 'image',
|
49
|
+
source: {
|
50
|
+
type: 'url',
|
51
|
+
url: image.source
|
52
|
+
}
|
53
|
+
},
|
54
|
+
cache:
|
55
|
+
)
|
50
56
|
else
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
57
|
+
with_cache_control(
|
58
|
+
{
|
59
|
+
type: 'image',
|
60
|
+
source: {
|
61
|
+
type: 'base64',
|
62
|
+
media_type: image.mime_type,
|
63
|
+
data: image.encoded
|
64
|
+
}
|
65
|
+
},
|
66
|
+
cache:
|
67
|
+
)
|
59
68
|
end
|
60
69
|
end
|
61
70
|
|
62
|
-
def format_pdf(pdf)
|
71
|
+
def format_pdf(pdf, cache: false)
|
63
72
|
if pdf.url?
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
73
|
+
with_cache_control(
|
74
|
+
{
|
75
|
+
type: 'document',
|
76
|
+
source: {
|
77
|
+
type: 'url',
|
78
|
+
url: pdf.source
|
79
|
+
}
|
80
|
+
},
|
81
|
+
cache:
|
82
|
+
)
|
71
83
|
else
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
84
|
+
with_cache_control(
|
85
|
+
{
|
86
|
+
type: 'document',
|
87
|
+
source: {
|
88
|
+
type: 'base64',
|
89
|
+
media_type: pdf.mime_type,
|
90
|
+
data: pdf.encoded
|
91
|
+
}
|
92
|
+
},
|
93
|
+
cache:
|
94
|
+
)
|
80
95
|
end
|
81
96
|
end
|
82
97
|
|
83
|
-
def format_text_file(text_file)
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
98
|
+
def format_text_file(text_file, cache: false)
|
99
|
+
with_cache_control(
|
100
|
+
{
|
101
|
+
type: 'text',
|
102
|
+
text: Utils.format_text_file_for_llm(text_file)
|
103
|
+
},
|
104
|
+
cache:
|
105
|
+
)
|
106
|
+
end
|
107
|
+
|
108
|
+
def with_cache_control(hash, cache: false)
|
109
|
+
return hash unless cache
|
110
|
+
|
111
|
+
hash.merge(cache_control: { type: 'ephemeral' })
|
88
112
|
end
|
89
113
|
end
|
90
114
|
end
|
@@ -42,6 +42,14 @@ module RubyLLM
|
|
42
42
|
def extract_output_tokens(data)
|
43
43
|
data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens')
|
44
44
|
end
|
45
|
+
|
46
|
+
def extract_cached_tokens(data)
|
47
|
+
data.dig('message', 'usage', 'cache_read_input_tokens')
|
48
|
+
end
|
49
|
+
|
50
|
+
def extract_cache_creation_tokens(data)
|
51
|
+
data.dig('message', 'usage', 'cache_creation_input_tokens')
|
52
|
+
end
|
45
53
|
end
|
46
54
|
end
|
47
55
|
end
|
@@ -18,6 +18,8 @@ module RubyLLM
|
|
18
18
|
content: data.dig('delta', 'text'),
|
19
19
|
input_tokens: extract_input_tokens(data),
|
20
20
|
output_tokens: extract_output_tokens(data),
|
21
|
+
cached_tokens: extract_cached_tokens(data),
|
22
|
+
cache_creation_tokens: extract_cache_creation_tokens(data),
|
21
23
|
tool_calls: extract_tool_calls(data)
|
22
24
|
)
|
23
25
|
end
|
@@ -17,20 +17,20 @@ module RubyLLM
|
|
17
17
|
Anthropic::Chat.parse_completion_response response
|
18
18
|
end
|
19
19
|
|
20
|
-
def format_message(msg)
|
20
|
+
def format_message(msg, cache: false)
|
21
21
|
if msg.tool_call?
|
22
22
|
Anthropic::Tools.format_tool_call(msg)
|
23
23
|
elsif msg.tool_result?
|
24
24
|
Anthropic::Tools.format_tool_result(msg)
|
25
25
|
else
|
26
|
-
format_basic_message(msg)
|
26
|
+
format_basic_message(msg, cache:)
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def format_basic_message(msg)
|
30
|
+
def format_basic_message(msg, cache: false)
|
31
31
|
{
|
32
32
|
role: Anthropic::Chat.convert_role(msg.role),
|
33
|
-
content: Media.format_content(msg.content)
|
33
|
+
content: Media.format_content(msg.content, cache:)
|
34
34
|
}
|
35
35
|
end
|
36
36
|
|
@@ -40,22 +40,33 @@ module RubyLLM
|
|
40
40
|
"model/#{@model_id}/invoke"
|
41
41
|
end
|
42
42
|
|
43
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil
|
43
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
|
44
|
+
cache_prompts: { system: false, user: false, tools: false })
|
44
45
|
# Hold model_id in instance variable for use in completion_url and stream_url
|
45
46
|
@model_id = model
|
46
47
|
|
47
48
|
system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
|
48
|
-
system_content = Anthropic::Chat.build_system_content(system_messages)
|
49
|
+
system_content = Anthropic::Chat.build_system_content(system_messages, cache: cache_prompts[:system])
|
49
50
|
|
50
|
-
build_base_payload(chat_messages, model).tap do |payload|
|
51
|
-
Anthropic::Chat.add_optional_fields(
|
51
|
+
build_base_payload(chat_messages, model, cache: cache_prompts[:user]).tap do |payload|
|
52
|
+
Anthropic::Chat.add_optional_fields(
|
53
|
+
payload,
|
54
|
+
system_content:,
|
55
|
+
tools:,
|
56
|
+
temperature:,
|
57
|
+
cache_tools: cache_prompts[:tools]
|
58
|
+
)
|
52
59
|
end
|
53
60
|
end
|
54
61
|
|
55
|
-
def build_base_payload(chat_messages, model)
|
62
|
+
def build_base_payload(chat_messages, model, cache: false)
|
63
|
+
messages = chat_messages.map.with_index do |msg, idx|
|
64
|
+
message_cache = cache if idx == chat_messages.size - 1
|
65
|
+
format_message(msg, cache: message_cache)
|
66
|
+
end
|
56
67
|
{
|
57
68
|
anthropic_version: 'bedrock-2023-05-31',
|
58
|
-
messages:
|
69
|
+
messages: messages,
|
59
70
|
max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
|
60
71
|
}
|
61
72
|
end
|
@@ -10,22 +10,22 @@ module RubyLLM
|
|
10
10
|
|
11
11
|
module_function
|
12
12
|
|
13
|
-
def format_content(content)
|
13
|
+
def format_content(content, cache: false)
|
14
14
|
# Convert Hash/Array back to JSON string for API
|
15
|
-
return [Anthropic::Media.format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
|
16
|
-
return [Anthropic::Media.format_text(content)] unless content.is_a?(Content)
|
15
|
+
return [Anthropic::Media.format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
|
16
|
+
return [Anthropic::Media.format_text(content, cache:)] unless content.is_a?(Content)
|
17
17
|
|
18
18
|
parts = []
|
19
|
-
parts << Anthropic::Media.format_text(content.text) if content.text
|
19
|
+
parts << Anthropic::Media.format_text(content.text, cache:) if content.text
|
20
20
|
|
21
21
|
content.attachments.each do |attachment|
|
22
22
|
case attachment.type
|
23
23
|
when :image
|
24
|
-
parts << format_image(attachment)
|
24
|
+
parts << format_image(attachment, cache:)
|
25
25
|
when :pdf
|
26
|
-
parts << format_pdf(attachment)
|
26
|
+
parts << format_pdf(attachment, cache:)
|
27
27
|
when :text
|
28
|
-
parts << Anthropic::Media.format_text_file(attachment)
|
28
|
+
parts << Anthropic::Media.format_text_file(attachment, cache:)
|
29
29
|
else
|
30
30
|
raise UnsupportedAttachmentError, attachment.type
|
31
31
|
end
|
@@ -34,26 +34,38 @@ module RubyLLM
|
|
34
34
|
parts
|
35
35
|
end
|
36
36
|
|
37
|
-
def format_image(image)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
37
|
+
def format_image(image, cache: false)
|
38
|
+
with_cache_control(
|
39
|
+
{
|
40
|
+
type: 'image',
|
41
|
+
source: {
|
42
|
+
type: 'base64',
|
43
|
+
media_type: image.mime_type,
|
44
|
+
data: image.encoded
|
45
|
+
}
|
46
|
+
},
|
47
|
+
cache:
|
48
|
+
)
|
46
49
|
end
|
47
50
|
|
48
|
-
def format_pdf(pdf)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
51
|
+
def format_pdf(pdf, cache: false)
|
52
|
+
with_cache_control(
|
53
|
+
{
|
54
|
+
type: 'document',
|
55
|
+
source: {
|
56
|
+
type: 'base64',
|
57
|
+
media_type: pdf.mime_type,
|
58
|
+
data: pdf.encoded
|
59
|
+
}
|
60
|
+
},
|
61
|
+
cache:
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
def with_cache_control(hash, cache: false)
|
66
|
+
return hash unless cache
|
67
|
+
|
68
|
+
hash.merge(cache_control: { type: 'ephemeral' })
|
57
69
|
end
|
58
70
|
end
|
59
71
|
end
|
@@ -39,6 +39,14 @@ module RubyLLM
|
|
39
39
|
data.dig('message', 'usage', 'output_tokens') || data.dig('usage', 'output_tokens')
|
40
40
|
end
|
41
41
|
|
42
|
+
def extract_cached_tokens(data)
|
43
|
+
data.dig('message', 'usage', 'cache_read_input_tokens')
|
44
|
+
end
|
45
|
+
|
46
|
+
def extract_cache_creation_tokens(data)
|
47
|
+
data.dig('message', 'usage', 'cache_creation_input_tokens')
|
48
|
+
end
|
49
|
+
|
42
50
|
private
|
43
51
|
|
44
52
|
def extract_content_by_type(data)
|
@@ -71,6 +71,8 @@ module RubyLLM
|
|
71
71
|
content: extract_streaming_content(data),
|
72
72
|
input_tokens: extract_input_tokens(data),
|
73
73
|
output_tokens: extract_output_tokens(data),
|
74
|
+
cached_tokens: extract_cached_tokens(data),
|
75
|
+
cache_creation_tokens: extract_cache_creation_tokens(data),
|
74
76
|
tool_calls: extract_tool_calls(data)
|
75
77
|
}
|
76
78
|
end
|
@@ -11,7 +11,7 @@ module RubyLLM
|
|
11
11
|
"models/#{@model}:generateContent"
|
12
12
|
end
|
13
13
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
15
15
|
@model = model # Store model for completion_url/stream_url
|
16
16
|
payload = {
|
17
17
|
contents: format_messages(messages),
|
@@ -80,7 +80,8 @@ module RubyLLM
|
|
80
80
|
content: extract_content(data),
|
81
81
|
tool_calls: tool_calls,
|
82
82
|
input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
|
83
|
-
output_tokens: data
|
83
|
+
output_tokens: calculate_output_tokens(data),
|
84
|
+
cached_tokens: data.dig('usageMetadata', 'cacheTokensDetails', 0, 'tokenCount') || 0,
|
84
85
|
model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0],
|
85
86
|
raw: response
|
86
87
|
)
|
@@ -133,6 +134,12 @@ module RubyLLM
|
|
133
134
|
parts = candidate.dig('content', 'parts')
|
134
135
|
parts&.any? { |p| p['functionCall'] }
|
135
136
|
end
|
137
|
+
|
138
|
+
def calculate_output_tokens(data)
|
139
|
+
candidates = data.dig('usageMetadata', 'candidatesTokenCount') || 0
|
140
|
+
thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
|
141
|
+
candidates + thoughts
|
142
|
+
end
|
136
143
|
end
|
137
144
|
end
|
138
145
|
end
|
@@ -16,6 +16,7 @@ module RubyLLM
|
|
16
16
|
content: extract_content(data),
|
17
17
|
input_tokens: extract_input_tokens(data),
|
18
18
|
output_tokens: extract_output_tokens(data),
|
19
|
+
cached_tokens: extract_cached_tokens(data),
|
19
20
|
tool_calls: extract_tool_calls(data)
|
20
21
|
)
|
21
22
|
end
|
@@ -42,7 +43,14 @@ module RubyLLM
|
|
42
43
|
end
|
43
44
|
|
44
45
|
def extract_output_tokens(data)
|
45
|
-
data.dig('usageMetadata', 'candidatesTokenCount')
|
46
|
+
candidates = data.dig('usageMetadata', 'candidatesTokenCount') || 0
|
47
|
+
thoughts = data.dig('usageMetadata', 'thoughtsTokenCount') || 0
|
48
|
+
total = candidates + thoughts
|
49
|
+
total.positive? ? total : nil
|
50
|
+
end
|
51
|
+
|
52
|
+
def extract_cached_tokens(data)
|
53
|
+
data.dig('usageMetadata', 'cachedContentTokenCount')
|
46
54
|
end
|
47
55
|
|
48
56
|
def parse_streaming_error(data)
|
@@ -13,7 +13,7 @@ module RubyLLM
|
|
13
13
|
end
|
14
14
|
|
15
15
|
# rubocop:disable Metrics/ParameterLists
|
16
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
|
16
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Metrics/ParameterLists
|
17
17
|
payload = super
|
18
18
|
# Mistral doesn't support stream_options
|
19
19
|
payload.delete(:stream_options)
|
@@ -11,7 +11,7 @@ module RubyLLM
|
|
11
11
|
|
12
12
|
module_function
|
13
13
|
|
14
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
14
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, cache_prompts: {}) # rubocop:disable Lint/UnusedMethodArgument, Metrics/ParameterLists
|
15
15
|
payload = {
|
16
16
|
model: model,
|
17
17
|
messages: format_messages(messages),
|
@@ -56,6 +56,7 @@ module RubyLLM
|
|
56
56
|
tool_calls: parse_tool_calls(message_data['tool_calls']),
|
57
57
|
input_tokens: data['usage']['prompt_tokens'],
|
58
58
|
output_tokens: data['usage']['completion_tokens'],
|
59
|
+
cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'),
|
59
60
|
model_id: data['model'],
|
60
61
|
raw: response
|
61
62
|
)
|
@@ -11,7 +11,7 @@ module RubyLLM
|
|
11
11
|
|
12
12
|
module_function
|
13
13
|
|
14
|
-
def render_response_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
14
|
+
def render_response_payload(messages, tools:, temperature:, model:, cache_prompts:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists,Lint/UnusedMethodArgument
|
15
15
|
payload = {
|
16
16
|
model: model,
|
17
17
|
input: format_input(messages),
|
@@ -97,6 +97,7 @@ module RubyLLM
|
|
97
97
|
tool_calls: parse_response_tool_calls(outputs),
|
98
98
|
input_tokens: data['usage']['input_tokens'],
|
99
99
|
output_tokens: data['usage']['output_tokens'],
|
100
|
+
cached_tokens: data.dig('usage', 'input_tokens_details', 'cached_tokens'),
|
100
101
|
model_id: data['model'],
|
101
102
|
raw: response
|
102
103
|
)
|
@@ -87,7 +87,8 @@ module RubyLLM
|
|
87
87
|
content: data.dig('choices', 0, 'delta', 'content'),
|
88
88
|
tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
|
89
89
|
input_tokens: data.dig('usage', 'prompt_tokens'),
|
90
|
-
output_tokens: data.dig('usage', 'completion_tokens')
|
90
|
+
output_tokens: data.dig('usage', 'completion_tokens'),
|
91
|
+
cached_tokens: data.dig('usage', 'cached_tokens')
|
91
92
|
)
|
92
93
|
end
|
93
94
|
|
@@ -17,12 +17,12 @@ module RubyLLM
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
20
|
+
def render_payload(messages, tools:, temperature:, model:, cache_prompts:, stream: false, schema: nil) # rubocop:disable Metrics/ParameterLists
|
21
21
|
@using_responses_api = !audio_input?(messages)
|
22
22
|
|
23
23
|
if @using_responses_api
|
24
|
-
render_response_payload(messages, tools: tools, temperature: temperature, model: model,
|
25
|
-
schema:
|
24
|
+
render_response_payload(messages, tools: tools, temperature: temperature, model: model,
|
25
|
+
cache_prompts:, stream:, schema:)
|
26
26
|
else
|
27
27
|
super
|
28
28
|
end
|
@@ -12,6 +12,8 @@ module RubyLLM
|
|
12
12
|
@tool_calls = {}
|
13
13
|
@input_tokens = 0
|
14
14
|
@output_tokens = 0
|
15
|
+
@cached_tokens = 0
|
16
|
+
@cache_creation_tokens = 0
|
15
17
|
@latest_tool_call_id = nil
|
16
18
|
end
|
17
19
|
|
@@ -37,6 +39,8 @@ module RubyLLM
|
|
37
39
|
tool_calls: tool_calls_from_stream,
|
38
40
|
input_tokens: @input_tokens.positive? ? @input_tokens : nil,
|
39
41
|
output_tokens: @output_tokens.positive? ? @output_tokens : nil,
|
42
|
+
cached_tokens: @cached_tokens.positive? ? @cached_tokens : nil,
|
43
|
+
cache_creation_tokens: @cache_creation_tokens.positive? ? @cache_creation_tokens : nil,
|
40
44
|
raw: response
|
41
45
|
)
|
42
46
|
end
|
@@ -92,6 +96,8 @@ module RubyLLM
|
|
92
96
|
def count_tokens(chunk)
|
93
97
|
@input_tokens = chunk.input_tokens if chunk.input_tokens
|
94
98
|
@output_tokens = chunk.output_tokens if chunk.output_tokens
|
99
|
+
@cached_tokens = chunk.cached_tokens if chunk.cached_tokens
|
100
|
+
@cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
|
95
101
|
end
|
96
102
|
end
|
97
103
|
end
|
data/lib/ruby_llm/version.rb
CHANGED