lex-llm-anthropic 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1237cef4710e768a52ab02e062379ed70a6f2ee7456cca1b5a45cfb03489c5c4
4
- data.tar.gz: bb9a132592136f7c64c4f982a6d1f9896348e0362da9d46d77fb846566b6a2d8
3
+ metadata.gz: 4f3196e6fe3ab6df1b6f1b06be9a71f317d22f3e37e32dc39515b5e392aa292f
4
+ data.tar.gz: d7e63aea83f71e31f4a4a4b9881ba331267d2dbc1a6378b737c78f72cc8edc56
5
5
  SHA512:
6
- metadata.gz: 9e138f44cbb23a70c48dc489e53bc6dff5fef7964db248b41f519b37a71e839dee7aa2dcbc26ce7cc5aa41e3df5720c0889537c497b6fb25bfb31053864d0066
7
- data.tar.gz: 725a59b156f68280cbc9d276eccb6c90e21c638900dbf6d81405588a15bd0ce00c3413555c311aef411162669b959635e9f3d4363fcd124d364eedf8725ef189
6
+ metadata.gz: 7e83d533b933209dadb0ad6badfc6e03d546ca64e737189094440544eace90ff392ae9262990e81d36684bc37b43b7db586dd8d8b57ea2b512a4cd3124558303
7
+ data.tar.gz: f78eb6154a3f2a626c64231fc2bf6b36c7dac63fffc4e362e6aa670de5ad40fcc6af4ce29b225e1d73222b8647ef26fa537d25aed3370c6ab40a79ec5f143d8e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.12 - 2026-06-01
4
+
5
+ - Add `cache_control` markers to Anthropic Messages API requests for prompt caching
6
+ - System content and tool definitions are marked as cache breakpoints when `cache_enabled?`
7
+ - Early conversation turns are cacheable; final message is never cached (prefix break guard)
8
+ - Uses `cache_control_prefix_tokens` from lex-llm base provider for exclude count (default 4)
9
+
3
10
  ## 0.2.11 - 2026-05-21
4
11
 
5
12
  - Add `api_version` and `default_max_tokens` to default_settings
@@ -46,7 +46,7 @@ module Legion
46
46
  def headers
47
47
  identity_headers.merge({
48
48
  'x-api-key' => config.anthropic_api_key,
49
- 'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-06-01'
49
+ 'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-10-02'
50
50
  }.compact)
51
51
  end
52
52
 
@@ -78,19 +78,23 @@ module Legion
78
78
 
79
79
  private
80
80
 
81
- def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
81
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
82
82
  log_render_payload(messages:, tools:, model:, stream:, schema:)
83
83
  system_messages, chat_messages = messages.partition { |message| message.role == :system }
84
84
 
85
+ caching = cache_enabled?
86
+ exclude_count = caching ? [cache_control_prefix_tokens, 1].max : 0
87
+ cacheable_count = caching ? [chat_messages.size - exclude_count, 0].max : 0
88
+
85
89
  {
86
90
  model: model.id,
87
- messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking)),
91
+ messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking), cacheable_count:),
88
92
  stream: stream,
89
93
  max_tokens: model.max_tokens || settings[:default_max_tokens] || 4096,
90
- system: system_content(system_messages),
94
+ system: system_content(system_messages, cache: caching),
91
95
  thinking: thinking_payload(thinking),
92
96
  temperature: temperature,
93
- tools: format_tools(tools),
97
+ tools: format_tools(tools, cache: caching),
94
98
  tool_choice: tool_choice(tool_prefs),
95
99
  output_config: output_config(schema)
96
100
  }.compact
@@ -103,21 +107,24 @@ module Legion
103
107
  end
104
108
  end
105
109
 
106
- def system_content(messages)
107
- content = messages.flat_map { |message| content_blocks(message.content) }
110
+ def system_content(messages, cache: false)
111
+ content = messages.flat_map do |message|
112
+ content_blocks(message.content, cache:)
113
+ end
108
114
  content.empty? ? nil : content
109
115
  end
110
116
 
111
- def format_messages(messages, thinking:)
112
- messages.map do |message|
117
+ def format_messages(messages, thinking:, cacheable_count: 0)
118
+ messages.each_with_index.map do |message, index|
119
+ cache = index < cacheable_count
113
120
  if message.tool_call?
114
- format_tool_call_message(message, thinking: thinking)
121
+ format_tool_call_message(message, thinking:, cache:)
115
122
  elsif message.tool_result?
116
- format_tool_result_message(message)
123
+ format_tool_result_message(message, cache:)
117
124
  else
118
125
  {
119
126
  role: anthropic_role(message.role),
120
- content: content_blocks(message.content, thinking: thinking, message: message)
127
+ content: content_blocks(message.content, thinking:, message:, cache:)
121
128
  }
122
129
  end
123
130
  end
@@ -127,12 +134,12 @@ module Legion
127
134
  role == :assistant ? 'assistant' : 'user'
128
135
  end
129
136
 
130
- def content_blocks(content, thinking: false, message: nil)
137
+ def content_blocks(content, thinking: false, message: nil, cache: false)
131
138
  raw_blocks = raw_content(content)
132
139
  return with_thinking(raw_blocks, message, thinking) if raw_blocks
133
140
 
134
141
  blocks = []
135
- blocks << text_block(content_text(content)) unless content_text(content).to_s.empty?
142
+ blocks << text_block(content_text(content), cache:) unless content_text(content).to_s.empty?
136
143
  blocks.concat(attachment_blocks(content)) if content.respond_to?(:attachments)
137
144
  with_thinking(blocks, message, thinking)
138
145
  end
@@ -149,8 +156,10 @@ module Legion
149
156
  content.to_s
150
157
  end
151
158
 
152
- def text_block(text)
153
- { type: 'text', text: text }
159
+ def text_block(text, cache: false)
160
+ { type: 'text', text: text }.tap do |block|
161
+ block[:cache_control] = { type: 'ephemeral' } if cache
162
+ end
154
163
  end
155
164
 
156
165
  def attachment_blocks(content)
@@ -175,30 +184,34 @@ module Legion
175
184
  thinking_block ? [thinking_block, *blocks] : blocks
176
185
  end
177
186
 
178
- def format_tool_call_message(message, thinking:)
179
- blocks = content_blocks(message.content, thinking: thinking, message: message)
180
- message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call) }
187
+ def format_tool_call_message(message, thinking:, cache:)
188
+ blocks = content_blocks(message.content, thinking:, message:, cache:)
189
+ message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call, cache:) }
181
190
  { role: 'assistant', content: blocks }
182
191
  end
183
192
 
184
- def tool_use_block(tool_call)
193
+ def tool_use_block(tool_call, cache: false)
185
194
  {
186
195
  type: 'tool_use',
187
196
  id: tool_call.id,
188
197
  name: tool_call.name,
189
- input: tool_call.arguments
190
- }
198
+ input: tool_call.arguments,
199
+ cache_control: { type: 'ephemeral' }
200
+ }.tap do |block|
201
+ block.delete(:cache_control) unless cache
202
+ end
191
203
  end
192
204
 
193
- def format_tool_result_message(message)
205
+ def format_tool_result_message(message, cache: false)
194
206
  {
195
207
  role: 'user',
196
208
  content: [
197
209
  {
198
210
  type: 'tool_result',
199
211
  tool_use_id: message.tool_call_id,
200
- content: content_blocks(message.content)
201
- }
212
+ content: content_blocks(message.content, cache:),
213
+ cache_control: { type: 'ephemeral' }
214
+ }.tap { |block| block.delete(:cache_control) unless cache }
202
215
  ]
203
216
  }
204
217
  end
@@ -234,16 +247,20 @@ module Legion
234
247
  end
235
248
  end
236
249
 
237
- def format_tools(tools)
250
+ def format_tools(tools, cache: false)
238
251
  return nil if tools.empty?
239
252
 
240
- tools.values.map do |tool|
253
+ tool_array = tools.values.map do |tool|
241
254
  {
242
255
  name: tool.name,
243
256
  description: tool.description,
244
257
  input_schema: tool_schema(tool)
245
258
  }
246
259
  end
260
+
261
+ tool_array.last[:cache_control] = { type: 'ephemeral' } if cache && tool_array.any?
262
+
263
+ tool_array
247
264
  end
248
265
 
249
266
  def tool_schema(tool)
@@ -287,7 +304,7 @@ module Legion
287
304
  normalized = normalized.dup
288
305
  normalized.delete(:strict)
289
306
  normalized.delete('strict')
290
- { format: { type: 'json_schema', schema: normalized } }
307
+ { format: { type: 'json', schema: normalized } }
291
308
  end
292
309
 
293
310
  def parse_completion_response(response)
@@ -355,18 +372,13 @@ module Legion
355
372
  )
356
373
  end
357
374
 
358
- def extract_streaming_tool_calls(data, delta_type)
375
+ def extract_streaming_tool_calls(data, _delta_type)
359
376
  content_block = data['content_block']
360
- if content_block && content_block['type'] == 'tool_use'
361
- { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
362
- id: content_block['id'], name: content_block['name'], arguments: ''
363
- ) }
364
- elsif delta_type == 'input_json_delta'
365
- partial = data.dig('delta', 'partial_json')
366
- return nil unless partial
367
-
368
- { nil => Legion::Extensions::Llm::ToolCall.new(id: nil, name: nil, arguments: partial) }
369
- end
377
+ return nil unless content_block && content_block['type'] == 'tool_use'
378
+
379
+ { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
380
+ id: content_block['id'], name: content_block['name'], arguments: ''
381
+ ) }
370
382
  end
371
383
 
372
384
  def parse_tool_calls(content_blocks)
@@ -405,6 +417,10 @@ module Legion
405
417
  CONTEXT_WINDOWS.find { |prefix, _| model_id.start_with?(prefix) }&.last
406
418
  end
407
419
 
420
+ def model_detail(model_name)
421
+ fetch_model_detail(model_name)
422
+ end
423
+
408
424
  def fetch_model_detail(model_name)
409
425
  ctx = infer_context_window(model_name)
410
426
  ctx ? { context_window: ctx } : nil
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Anthropic
7
- VERSION = '0.2.11'
7
+ VERSION = '0.2.12'
8
8
  end
9
9
  end
10
10
  end
@@ -24,7 +24,7 @@ module Legion
24
24
  instance: {
25
25
  default_model: 'claude-sonnet-4-6',
26
26
  endpoint: 'https://api.anthropic.com',
27
- api_version: '2023-06-01',
27
+ api_version: '2023-10-02',
28
28
  default_max_tokens: 4096,
29
29
  tier: :frontier,
30
30
  transport: :http,
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.2.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO