lex-llm-anthropic 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4f3196e6fe3ab6df1b6f1b06be9a71f317d22f3e37e32dc39515b5e392aa292f
|
|
4
|
+
data.tar.gz: d7e63aea83f71e31f4a4a4b9881ba331267d2dbc1a6378b737c78f72cc8edc56
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e83d533b933209dadb0ad6badfc6e03d546ca64e737189094440544eace90ff392ae9262990e81d36684bc37b43b7db586dd8d8b57ea2b512a4cd3124558303
|
|
7
|
+
data.tar.gz: f78eb6154a3f2a626c64231fc2bf6b36c7dac63fffc4e362e6aa670de5ad40fcc6af4ce29b225e1d73222b8647ef26fa537d25aed3370c6ab40a79ec5f143d8e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.12 - 2026-06-01
|
|
4
|
+
|
|
5
|
+
- Add `cache_control` markers to Anthropic Messages API requests for prompt caching
|
|
6
|
+
- System content and tool definitions are marked as cache breakpoints when `cache_enabled?`
|
|
7
|
+
- Early conversation turns are cacheable; final message is never cached (prefix break guard)
|
|
8
|
+
- Uses `cache_control_prefix_tokens` from lex-llm base provider for exclude count (default 4)
|
|
9
|
+
|
|
3
10
|
## 0.2.11 - 2026-05-21
|
|
4
11
|
|
|
5
12
|
- Add `api_version` and `default_max_tokens` to default_settings
|
|
@@ -46,7 +46,7 @@ module Legion
|
|
|
46
46
|
def headers
|
|
47
47
|
identity_headers.merge({
|
|
48
48
|
'x-api-key' => config.anthropic_api_key,
|
|
49
|
-
'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-
|
|
49
|
+
'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-10-02'
|
|
50
50
|
}.compact)
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -78,19 +78,23 @@ module Legion
|
|
|
78
78
|
|
|
79
79
|
private
|
|
80
80
|
|
|
81
|
-
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
81
|
+
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
|
|
82
82
|
log_render_payload(messages:, tools:, model:, stream:, schema:)
|
|
83
83
|
system_messages, chat_messages = messages.partition { |message| message.role == :system }
|
|
84
84
|
|
|
85
|
+
caching = cache_enabled?
|
|
86
|
+
exclude_count = caching ? [cache_control_prefix_tokens, 1].max : 0
|
|
87
|
+
cacheable_count = caching ? [chat_messages.size - exclude_count, 0].max : 0
|
|
88
|
+
|
|
85
89
|
{
|
|
86
90
|
model: model.id,
|
|
87
|
-
messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking)),
|
|
91
|
+
messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking), cacheable_count:),
|
|
88
92
|
stream: stream,
|
|
89
93
|
max_tokens: model.max_tokens || settings[:default_max_tokens] || 4096,
|
|
90
|
-
system: system_content(system_messages),
|
|
94
|
+
system: system_content(system_messages, cache: caching),
|
|
91
95
|
thinking: thinking_payload(thinking),
|
|
92
96
|
temperature: temperature,
|
|
93
|
-
tools: format_tools(tools),
|
|
97
|
+
tools: format_tools(tools, cache: caching),
|
|
94
98
|
tool_choice: tool_choice(tool_prefs),
|
|
95
99
|
output_config: output_config(schema)
|
|
96
100
|
}.compact
|
|
@@ -103,21 +107,24 @@ module Legion
|
|
|
103
107
|
end
|
|
104
108
|
end
|
|
105
109
|
|
|
106
|
-
def system_content(messages)
|
|
107
|
-
content = messages.flat_map
|
|
110
|
+
def system_content(messages, cache: false)
|
|
111
|
+
content = messages.flat_map do |message|
|
|
112
|
+
content_blocks(message.content, cache:)
|
|
113
|
+
end
|
|
108
114
|
content.empty? ? nil : content
|
|
109
115
|
end
|
|
110
116
|
|
|
111
|
-
def format_messages(messages, thinking:)
|
|
112
|
-
messages.map do |message|
|
|
117
|
+
def format_messages(messages, thinking:, cacheable_count: 0)
|
|
118
|
+
messages.each_with_index.map do |message, index|
|
|
119
|
+
cache = index < cacheable_count
|
|
113
120
|
if message.tool_call?
|
|
114
|
-
format_tool_call_message(message, thinking:
|
|
121
|
+
format_tool_call_message(message, thinking:, cache:)
|
|
115
122
|
elsif message.tool_result?
|
|
116
|
-
format_tool_result_message(message)
|
|
123
|
+
format_tool_result_message(message, cache:)
|
|
117
124
|
else
|
|
118
125
|
{
|
|
119
126
|
role: anthropic_role(message.role),
|
|
120
|
-
content: content_blocks(message.content, thinking
|
|
127
|
+
content: content_blocks(message.content, thinking:, message:, cache:)
|
|
121
128
|
}
|
|
122
129
|
end
|
|
123
130
|
end
|
|
@@ -127,12 +134,12 @@ module Legion
|
|
|
127
134
|
role == :assistant ? 'assistant' : 'user'
|
|
128
135
|
end
|
|
129
136
|
|
|
130
|
-
def content_blocks(content, thinking: false, message: nil)
|
|
137
|
+
def content_blocks(content, thinking: false, message: nil, cache: false)
|
|
131
138
|
raw_blocks = raw_content(content)
|
|
132
139
|
return with_thinking(raw_blocks, message, thinking) if raw_blocks
|
|
133
140
|
|
|
134
141
|
blocks = []
|
|
135
|
-
blocks << text_block(content_text(content)) unless content_text(content).to_s.empty?
|
|
142
|
+
blocks << text_block(content_text(content), cache:) unless content_text(content).to_s.empty?
|
|
136
143
|
blocks.concat(attachment_blocks(content)) if content.respond_to?(:attachments)
|
|
137
144
|
with_thinking(blocks, message, thinking)
|
|
138
145
|
end
|
|
@@ -149,8 +156,10 @@ module Legion
|
|
|
149
156
|
content.to_s
|
|
150
157
|
end
|
|
151
158
|
|
|
152
|
-
def text_block(text)
|
|
153
|
-
{ type: 'text', text: text }
|
|
159
|
+
def text_block(text, cache: false)
|
|
160
|
+
{ type: 'text', text: text }.tap do |block|
|
|
161
|
+
block[:cache_control] = { type: 'ephemeral' } if cache
|
|
162
|
+
end
|
|
154
163
|
end
|
|
155
164
|
|
|
156
165
|
def attachment_blocks(content)
|
|
@@ -175,30 +184,34 @@ module Legion
|
|
|
175
184
|
thinking_block ? [thinking_block, *blocks] : blocks
|
|
176
185
|
end
|
|
177
186
|
|
|
178
|
-
def format_tool_call_message(message, thinking:)
|
|
179
|
-
blocks = content_blocks(message.content, thinking
|
|
180
|
-
message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call) }
|
|
187
|
+
def format_tool_call_message(message, thinking:, cache:)
|
|
188
|
+
blocks = content_blocks(message.content, thinking:, message:, cache:)
|
|
189
|
+
message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call, cache:) }
|
|
181
190
|
{ role: 'assistant', content: blocks }
|
|
182
191
|
end
|
|
183
192
|
|
|
184
|
-
def tool_use_block(tool_call)
|
|
193
|
+
def tool_use_block(tool_call, cache: false)
|
|
185
194
|
{
|
|
186
195
|
type: 'tool_use',
|
|
187
196
|
id: tool_call.id,
|
|
188
197
|
name: tool_call.name,
|
|
189
|
-
input: tool_call.arguments
|
|
190
|
-
|
|
198
|
+
input: tool_call.arguments,
|
|
199
|
+
cache_control: { type: 'ephemeral' }
|
|
200
|
+
}.tap do |block|
|
|
201
|
+
block.delete(:cache_control) unless cache
|
|
202
|
+
end
|
|
191
203
|
end
|
|
192
204
|
|
|
193
|
-
def format_tool_result_message(message)
|
|
205
|
+
def format_tool_result_message(message, cache: false)
|
|
194
206
|
{
|
|
195
207
|
role: 'user',
|
|
196
208
|
content: [
|
|
197
209
|
{
|
|
198
210
|
type: 'tool_result',
|
|
199
211
|
tool_use_id: message.tool_call_id,
|
|
200
|
-
content: content_blocks(message.content)
|
|
201
|
-
|
|
212
|
+
content: content_blocks(message.content, cache:),
|
|
213
|
+
cache_control: { type: 'ephemeral' }
|
|
214
|
+
}.tap { |block| block.delete(:cache_control) unless cache }
|
|
202
215
|
]
|
|
203
216
|
}
|
|
204
217
|
end
|
|
@@ -234,16 +247,20 @@ module Legion
|
|
|
234
247
|
end
|
|
235
248
|
end
|
|
236
249
|
|
|
237
|
-
def format_tools(tools)
|
|
250
|
+
def format_tools(tools, cache: false)
|
|
238
251
|
return nil if tools.empty?
|
|
239
252
|
|
|
240
|
-
tools.values.map do |tool|
|
|
253
|
+
tool_array = tools.values.map do |tool|
|
|
241
254
|
{
|
|
242
255
|
name: tool.name,
|
|
243
256
|
description: tool.description,
|
|
244
257
|
input_schema: tool_schema(tool)
|
|
245
258
|
}
|
|
246
259
|
end
|
|
260
|
+
|
|
261
|
+
tool_array.last[:cache_control] = { type: 'ephemeral' } if cache && tool_array.any?
|
|
262
|
+
|
|
263
|
+
tool_array
|
|
247
264
|
end
|
|
248
265
|
|
|
249
266
|
def tool_schema(tool)
|
|
@@ -287,7 +304,7 @@ module Legion
|
|
|
287
304
|
normalized = normalized.dup
|
|
288
305
|
normalized.delete(:strict)
|
|
289
306
|
normalized.delete('strict')
|
|
290
|
-
{ format: { type: '
|
|
307
|
+
{ format: { type: 'json', schema: normalized } }
|
|
291
308
|
end
|
|
292
309
|
|
|
293
310
|
def parse_completion_response(response)
|
|
@@ -355,18 +372,13 @@ module Legion
|
|
|
355
372
|
)
|
|
356
373
|
end
|
|
357
374
|
|
|
358
|
-
def extract_streaming_tool_calls(data,
|
|
375
|
+
def extract_streaming_tool_calls(data, _delta_type)
|
|
359
376
|
content_block = data['content_block']
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
partial = data.dig('delta', 'partial_json')
|
|
366
|
-
return nil unless partial
|
|
367
|
-
|
|
368
|
-
{ nil => Legion::Extensions::Llm::ToolCall.new(id: nil, name: nil, arguments: partial) }
|
|
369
|
-
end
|
|
377
|
+
return nil unless content_block && content_block['type'] == 'tool_use'
|
|
378
|
+
|
|
379
|
+
{ content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
|
|
380
|
+
id: content_block['id'], name: content_block['name'], arguments: ''
|
|
381
|
+
) }
|
|
370
382
|
end
|
|
371
383
|
|
|
372
384
|
def parse_tool_calls(content_blocks)
|
|
@@ -405,6 +417,10 @@ module Legion
|
|
|
405
417
|
CONTEXT_WINDOWS.find { |prefix, _| model_id.start_with?(prefix) }&.last
|
|
406
418
|
end
|
|
407
419
|
|
|
420
|
+
def model_detail(model_name)
|
|
421
|
+
fetch_model_detail(model_name)
|
|
422
|
+
end
|
|
423
|
+
|
|
408
424
|
def fetch_model_detail(model_name)
|
|
409
425
|
ctx = infer_context_window(model_name)
|
|
410
426
|
ctx ? { context_window: ctx } : nil
|