lex-llm-anthropic 0.2.10 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4f3196e6fe3ab6df1b6f1b06be9a71f317d22f3e37e32dc39515b5e392aa292f
|
|
4
|
+
data.tar.gz: d7e63aea83f71e31f4a4a4b9881ba331267d2dbc1a6378b737c78f72cc8edc56
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e83d533b933209dadb0ad6badfc6e03d546ca64e737189094440544eace90ff392ae9262990e81d36684bc37b43b7db586dd8d8b57ea2b512a4cd3124558303
|
|
7
|
+
data.tar.gz: f78eb6154a3f2a626c64231fc2bf6b36c7dac63fffc4e362e6aa670de5ad40fcc6af4ce29b225e1d73222b8647ef26fa537d25aed3370c6ab40a79ec5f143d8e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.12 - 2026-06-01
|
|
4
|
+
|
|
5
|
+
- Add `cache_control` markers to Anthropic Messages API requests for prompt caching
|
|
6
|
+
- System content and tool definitions are marked as cache breakpoints when `cache_enabled?`
|
|
7
|
+
- Early conversation turns are cacheable; final message is never cached (prefix break guard)
|
|
8
|
+
- Uses `cache_control_prefix_tokens` from lex-llm base provider for exclude count (default 4)
|
|
9
|
+
|
|
10
|
+
## 0.2.11 - 2026-05-21
|
|
11
|
+
|
|
12
|
+
- Add `api_version` and `default_max_tokens` to default_settings
|
|
13
|
+
- api_base and anthropic-version read from settings fallback
|
|
14
|
+
- max_tokens reads from settings[:default_max_tokens]
|
|
15
|
+
- Identity headers included via base provider
|
|
16
|
+
|
|
17
|
+
|
|
3
18
|
## 0.2.10 - 2026-05-18
|
|
4
19
|
|
|
5
20
|
- Fix streaming tool call input accumulation: `build_chunk` now handles both `content_block_start` (tool_use with id+name) and `input_json_delta` (partial argument fragments) events. Previously only the start event was parsed, resulting in tool calls with empty arguments.
|
|
@@ -35,15 +35,19 @@ module Legion
|
|
|
35
35
|
def embeddings?(_model) = false
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
+
def settings
|
|
39
|
+
Anthropic.default_settings
|
|
40
|
+
end
|
|
41
|
+
|
|
38
42
|
def api_base
|
|
39
|
-
config.anthropic_api_base || 'https://api.anthropic.com'
|
|
43
|
+
config.anthropic_api_base || settings[:endpoint] || 'https://api.anthropic.com'
|
|
40
44
|
end
|
|
41
45
|
|
|
42
46
|
def headers
|
|
43
|
-
{
|
|
47
|
+
identity_headers.merge({
|
|
44
48
|
'x-api-key' => config.anthropic_api_key,
|
|
45
|
-
'anthropic-version' => config.anthropic_version || '2023-
|
|
46
|
-
}.compact
|
|
49
|
+
'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-10-02'
|
|
50
|
+
}.compact)
|
|
47
51
|
end
|
|
48
52
|
|
|
49
53
|
def completion_url = '/v1/messages'
|
|
@@ -74,19 +78,23 @@ module Legion
|
|
|
74
78
|
|
|
75
79
|
private
|
|
76
80
|
|
|
77
|
-
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
|
|
81
|
+
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
|
|
78
82
|
log_render_payload(messages:, tools:, model:, stream:, schema:)
|
|
79
83
|
system_messages, chat_messages = messages.partition { |message| message.role == :system }
|
|
80
84
|
|
|
85
|
+
caching = cache_enabled?
|
|
86
|
+
exclude_count = caching ? [cache_control_prefix_tokens, 1].max : 0
|
|
87
|
+
cacheable_count = caching ? [chat_messages.size - exclude_count, 0].max : 0
|
|
88
|
+
|
|
81
89
|
{
|
|
82
90
|
model: model.id,
|
|
83
|
-
messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking)),
|
|
91
|
+
messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking), cacheable_count:),
|
|
84
92
|
stream: stream,
|
|
85
|
-
max_tokens: model.max_tokens || 4096,
|
|
86
|
-
system: system_content(system_messages),
|
|
93
|
+
max_tokens: model.max_tokens || settings[:default_max_tokens] || 4096,
|
|
94
|
+
system: system_content(system_messages, cache: caching),
|
|
87
95
|
thinking: thinking_payload(thinking),
|
|
88
96
|
temperature: temperature,
|
|
89
|
-
tools: format_tools(tools),
|
|
97
|
+
tools: format_tools(tools, cache: caching),
|
|
90
98
|
tool_choice: tool_choice(tool_prefs),
|
|
91
99
|
output_config: output_config(schema)
|
|
92
100
|
}.compact
|
|
@@ -99,21 +107,24 @@ module Legion
|
|
|
99
107
|
end
|
|
100
108
|
end
|
|
101
109
|
|
|
102
|
-
def system_content(messages)
|
|
103
|
-
content = messages.flat_map
|
|
110
|
+
def system_content(messages, cache: false)
|
|
111
|
+
content = messages.flat_map do |message|
|
|
112
|
+
content_blocks(message.content, cache:)
|
|
113
|
+
end
|
|
104
114
|
content.empty? ? nil : content
|
|
105
115
|
end
|
|
106
116
|
|
|
107
|
-
def format_messages(messages, thinking:)
|
|
108
|
-
messages.map do |message|
|
|
117
|
+
def format_messages(messages, thinking:, cacheable_count: 0)
|
|
118
|
+
messages.each_with_index.map do |message, index|
|
|
119
|
+
cache = index < cacheable_count
|
|
109
120
|
if message.tool_call?
|
|
110
|
-
format_tool_call_message(message, thinking:
|
|
121
|
+
format_tool_call_message(message, thinking:, cache:)
|
|
111
122
|
elsif message.tool_result?
|
|
112
|
-
format_tool_result_message(message)
|
|
123
|
+
format_tool_result_message(message, cache:)
|
|
113
124
|
else
|
|
114
125
|
{
|
|
115
126
|
role: anthropic_role(message.role),
|
|
116
|
-
content: content_blocks(message.content, thinking
|
|
127
|
+
content: content_blocks(message.content, thinking:, message:, cache:)
|
|
117
128
|
}
|
|
118
129
|
end
|
|
119
130
|
end
|
|
@@ -123,12 +134,12 @@ module Legion
|
|
|
123
134
|
role == :assistant ? 'assistant' : 'user'
|
|
124
135
|
end
|
|
125
136
|
|
|
126
|
-
def content_blocks(content, thinking: false, message: nil)
|
|
137
|
+
def content_blocks(content, thinking: false, message: nil, cache: false)
|
|
127
138
|
raw_blocks = raw_content(content)
|
|
128
139
|
return with_thinking(raw_blocks, message, thinking) if raw_blocks
|
|
129
140
|
|
|
130
141
|
blocks = []
|
|
131
|
-
blocks << text_block(content_text(content)) unless content_text(content).to_s.empty?
|
|
142
|
+
blocks << text_block(content_text(content), cache:) unless content_text(content).to_s.empty?
|
|
132
143
|
blocks.concat(attachment_blocks(content)) if content.respond_to?(:attachments)
|
|
133
144
|
with_thinking(blocks, message, thinking)
|
|
134
145
|
end
|
|
@@ -145,8 +156,10 @@ module Legion
|
|
|
145
156
|
content.to_s
|
|
146
157
|
end
|
|
147
158
|
|
|
148
|
-
def text_block(text)
|
|
149
|
-
{ type: 'text', text: text }
|
|
159
|
+
def text_block(text, cache: false)
|
|
160
|
+
{ type: 'text', text: text }.tap do |block|
|
|
161
|
+
block[:cache_control] = { type: 'ephemeral' } if cache
|
|
162
|
+
end
|
|
150
163
|
end
|
|
151
164
|
|
|
152
165
|
def attachment_blocks(content)
|
|
@@ -171,30 +184,34 @@ module Legion
|
|
|
171
184
|
thinking_block ? [thinking_block, *blocks] : blocks
|
|
172
185
|
end
|
|
173
186
|
|
|
174
|
-
def format_tool_call_message(message, thinking:)
|
|
175
|
-
blocks = content_blocks(message.content, thinking
|
|
176
|
-
message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call) }
|
|
187
|
+
def format_tool_call_message(message, thinking:, cache:)
|
|
188
|
+
blocks = content_blocks(message.content, thinking:, message:, cache:)
|
|
189
|
+
message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call, cache:) }
|
|
177
190
|
{ role: 'assistant', content: blocks }
|
|
178
191
|
end
|
|
179
192
|
|
|
180
|
-
def tool_use_block(tool_call)
|
|
193
|
+
def tool_use_block(tool_call, cache: false)
|
|
181
194
|
{
|
|
182
195
|
type: 'tool_use',
|
|
183
196
|
id: tool_call.id,
|
|
184
197
|
name: tool_call.name,
|
|
185
|
-
input: tool_call.arguments
|
|
186
|
-
|
|
198
|
+
input: tool_call.arguments,
|
|
199
|
+
cache_control: { type: 'ephemeral' }
|
|
200
|
+
}.tap do |block|
|
|
201
|
+
block.delete(:cache_control) unless cache
|
|
202
|
+
end
|
|
187
203
|
end
|
|
188
204
|
|
|
189
|
-
def format_tool_result_message(message)
|
|
205
|
+
def format_tool_result_message(message, cache: false)
|
|
190
206
|
{
|
|
191
207
|
role: 'user',
|
|
192
208
|
content: [
|
|
193
209
|
{
|
|
194
210
|
type: 'tool_result',
|
|
195
211
|
tool_use_id: message.tool_call_id,
|
|
196
|
-
content: content_blocks(message.content)
|
|
197
|
-
|
|
212
|
+
content: content_blocks(message.content, cache:),
|
|
213
|
+
cache_control: { type: 'ephemeral' }
|
|
214
|
+
}.tap { |block| block.delete(:cache_control) unless cache }
|
|
198
215
|
]
|
|
199
216
|
}
|
|
200
217
|
end
|
|
@@ -230,16 +247,20 @@ module Legion
|
|
|
230
247
|
end
|
|
231
248
|
end
|
|
232
249
|
|
|
233
|
-
def format_tools(tools)
|
|
250
|
+
def format_tools(tools, cache: false)
|
|
234
251
|
return nil if tools.empty?
|
|
235
252
|
|
|
236
|
-
tools.values.map do |tool|
|
|
253
|
+
tool_array = tools.values.map do |tool|
|
|
237
254
|
{
|
|
238
255
|
name: tool.name,
|
|
239
256
|
description: tool.description,
|
|
240
257
|
input_schema: tool_schema(tool)
|
|
241
258
|
}
|
|
242
259
|
end
|
|
260
|
+
|
|
261
|
+
tool_array.last[:cache_control] = { type: 'ephemeral' } if cache && tool_array.any?
|
|
262
|
+
|
|
263
|
+
tool_array
|
|
243
264
|
end
|
|
244
265
|
|
|
245
266
|
def tool_schema(tool)
|
|
@@ -283,7 +304,7 @@ module Legion
|
|
|
283
304
|
normalized = normalized.dup
|
|
284
305
|
normalized.delete(:strict)
|
|
285
306
|
normalized.delete('strict')
|
|
286
|
-
{ format: { type: '
|
|
307
|
+
{ format: { type: 'json', schema: normalized } }
|
|
287
308
|
end
|
|
288
309
|
|
|
289
310
|
def parse_completion_response(response)
|
|
@@ -351,18 +372,13 @@ module Legion
|
|
|
351
372
|
)
|
|
352
373
|
end
|
|
353
374
|
|
|
354
|
-
def extract_streaming_tool_calls(data,
|
|
375
|
+
def extract_streaming_tool_calls(data, _delta_type)
|
|
355
376
|
content_block = data['content_block']
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
partial = data.dig('delta', 'partial_json')
|
|
362
|
-
return nil unless partial
|
|
363
|
-
|
|
364
|
-
{ nil => Legion::Extensions::Llm::ToolCall.new(id: nil, name: nil, arguments: partial) }
|
|
365
|
-
end
|
|
377
|
+
return nil unless content_block && content_block['type'] == 'tool_use'
|
|
378
|
+
|
|
379
|
+
{ content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
|
|
380
|
+
id: content_block['id'], name: content_block['name'], arguments: ''
|
|
381
|
+
) }
|
|
366
382
|
end
|
|
367
383
|
|
|
368
384
|
def parse_tool_calls(content_blocks)
|
|
@@ -401,6 +417,10 @@ module Legion
|
|
|
401
417
|
CONTEXT_WINDOWS.find { |prefix, _| model_id.start_with?(prefix) }&.last
|
|
402
418
|
end
|
|
403
419
|
|
|
420
|
+
def model_detail(model_name)
|
|
421
|
+
fetch_model_detail(model_name)
|
|
422
|
+
end
|
|
423
|
+
|
|
404
424
|
def fetch_model_detail(model_name)
|
|
405
425
|
ctx = infer_context_window(model_name)
|
|
406
426
|
ctx ? { context_window: ctx } : nil
|
|
@@ -24,6 +24,8 @@ module Legion
|
|
|
24
24
|
instance: {
|
|
25
25
|
default_model: 'claude-sonnet-4-6',
|
|
26
26
|
endpoint: 'https://api.anthropic.com',
|
|
27
|
+
api_version: '2023-10-02',
|
|
28
|
+
default_max_tokens: 4096,
|
|
27
29
|
tier: :frontier,
|
|
28
30
|
transport: :http,
|
|
29
31
|
credentials: { api_key: 'env://ANTHROPIC_API_KEY' },
|