lex-llm-anthropic 0.2.10 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 698085fefbd69b9c2689ac472c42ce579c05e0ef2be2d06dc146f324df2ba69e
4
- data.tar.gz: c72e6c9974500f0285084da0da19af6390806ccfb4f93a9f42128e50a65205ad
3
+ metadata.gz: 4f3196e6fe3ab6df1b6f1b06be9a71f317d22f3e37e32dc39515b5e392aa292f
4
+ data.tar.gz: d7e63aea83f71e31f4a4a4b9881ba331267d2dbc1a6378b737c78f72cc8edc56
5
5
  SHA512:
6
- metadata.gz: 725109b45b7fdf9849fcbdffba9e1d8b338e0b6e80c23d495c0aac5ef06d733f075c19f6b6de8bf1d34ea77050efc53d71d0ecd42873f59a30afbf7e7bbe60ba
7
- data.tar.gz: 3406970db252257e2c074455132e55d7166dc4180a4c06f5b803ed267e7a17030e523ed063511c87e48f5345141891bd9211ee555d749af8e9cde496bf5e8568
6
+ metadata.gz: 7e83d533b933209dadb0ad6badfc6e03d546ca64e737189094440544eace90ff392ae9262990e81d36684bc37b43b7db586dd8d8b57ea2b512a4cd3124558303
7
+ data.tar.gz: f78eb6154a3f2a626c64231fc2bf6b36c7dac63fffc4e362e6aa670de5ad40fcc6af4ce29b225e1d73222b8647ef26fa537d25aed3370c6ab40a79ec5f143d8e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.12 - 2026-06-01
4
+
5
+ - Add `cache_control` markers to Anthropic Messages API requests for prompt caching
6
+ - System content and tool definitions are marked as cache breakpoints when `cache_enabled?`
7
+ - Early conversation turns are cacheable; final message is never cached (prefix break guard)
8
+ - Uses `cache_control_prefix_tokens` from lex-llm base provider for exclude count (default 4)
9
+
10
+ ## 0.2.11 - 2026-05-21
11
+
12
+ - Add `api_version` and `default_max_tokens` to default_settings
13
+ - api_base and anthropic-version read from settings fallback
14
+ - max_tokens reads from settings[:default_max_tokens]
15
+ - Identity headers included via base provider
16
+
17
+
3
18
  ## 0.2.10 - 2026-05-18
4
19
 
5
20
  - Fix streaming tool call input accumulation: `build_chunk` now handles both `content_block_start` (tool_use with id+name) and `input_json_delta` (partial argument fragments) events. Previously only the start event was parsed, resulting in tool calls with empty arguments.
@@ -35,15 +35,19 @@ module Legion
35
35
  def embeddings?(_model) = false
36
36
  end
37
37
 
38
+ def settings
39
+ Anthropic.default_settings
40
+ end
41
+
38
42
  def api_base
39
- config.anthropic_api_base || 'https://api.anthropic.com'
43
+ config.anthropic_api_base || settings[:endpoint] || 'https://api.anthropic.com'
40
44
  end
41
45
 
42
46
  def headers
43
- {
47
+ identity_headers.merge({
44
48
  'x-api-key' => config.anthropic_api_key,
45
- 'anthropic-version' => config.anthropic_version || '2023-06-01'
46
- }.compact
49
+ 'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-10-02'
50
+ }.compact)
47
51
  end
48
52
 
49
53
  def completion_url = '/v1/messages'
@@ -74,19 +78,23 @@ module Legion
74
78
 
75
79
  private
76
80
 
77
- def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
81
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
78
82
  log_render_payload(messages:, tools:, model:, stream:, schema:)
79
83
  system_messages, chat_messages = messages.partition { |message| message.role == :system }
80
84
 
85
+ caching = cache_enabled?
86
+ exclude_count = caching ? [cache_control_prefix_tokens, 1].max : 0
87
+ cacheable_count = caching ? [chat_messages.size - exclude_count, 0].max : 0
88
+
81
89
  {
82
90
  model: model.id,
83
- messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking)),
91
+ messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking), cacheable_count:),
84
92
  stream: stream,
85
- max_tokens: model.max_tokens || 4096,
86
- system: system_content(system_messages),
93
+ max_tokens: model.max_tokens || settings[:default_max_tokens] || 4096,
94
+ system: system_content(system_messages, cache: caching),
87
95
  thinking: thinking_payload(thinking),
88
96
  temperature: temperature,
89
- tools: format_tools(tools),
97
+ tools: format_tools(tools, cache: caching),
90
98
  tool_choice: tool_choice(tool_prefs),
91
99
  output_config: output_config(schema)
92
100
  }.compact
@@ -99,21 +107,24 @@ module Legion
99
107
  end
100
108
  end
101
109
 
102
- def system_content(messages)
103
- content = messages.flat_map { |message| content_blocks(message.content) }
110
+ def system_content(messages, cache: false)
111
+ content = messages.flat_map do |message|
112
+ content_blocks(message.content, cache:)
113
+ end
104
114
  content.empty? ? nil : content
105
115
  end
106
116
 
107
- def format_messages(messages, thinking:)
108
- messages.map do |message|
117
+ def format_messages(messages, thinking:, cacheable_count: 0)
118
+ messages.each_with_index.map do |message, index|
119
+ cache = index < cacheable_count
109
120
  if message.tool_call?
110
- format_tool_call_message(message, thinking: thinking)
121
+ format_tool_call_message(message, thinking:, cache:)
111
122
  elsif message.tool_result?
112
- format_tool_result_message(message)
123
+ format_tool_result_message(message, cache:)
113
124
  else
114
125
  {
115
126
  role: anthropic_role(message.role),
116
- content: content_blocks(message.content, thinking: thinking, message: message)
127
+ content: content_blocks(message.content, thinking:, message:, cache:)
117
128
  }
118
129
  end
119
130
  end
@@ -123,12 +134,12 @@ module Legion
123
134
  role == :assistant ? 'assistant' : 'user'
124
135
  end
125
136
 
126
- def content_blocks(content, thinking: false, message: nil)
137
+ def content_blocks(content, thinking: false, message: nil, cache: false)
127
138
  raw_blocks = raw_content(content)
128
139
  return with_thinking(raw_blocks, message, thinking) if raw_blocks
129
140
 
130
141
  blocks = []
131
- blocks << text_block(content_text(content)) unless content_text(content).to_s.empty?
142
+ blocks << text_block(content_text(content), cache:) unless content_text(content).to_s.empty?
132
143
  blocks.concat(attachment_blocks(content)) if content.respond_to?(:attachments)
133
144
  with_thinking(blocks, message, thinking)
134
145
  end
@@ -145,8 +156,10 @@ module Legion
145
156
  content.to_s
146
157
  end
147
158
 
148
- def text_block(text)
149
- { type: 'text', text: text }
159
+ def text_block(text, cache: false)
160
+ { type: 'text', text: text }.tap do |block|
161
+ block[:cache_control] = { type: 'ephemeral' } if cache
162
+ end
150
163
  end
151
164
 
152
165
  def attachment_blocks(content)
@@ -171,30 +184,34 @@ module Legion
171
184
  thinking_block ? [thinking_block, *blocks] : blocks
172
185
  end
173
186
 
174
- def format_tool_call_message(message, thinking:)
175
- blocks = content_blocks(message.content, thinking: thinking, message: message)
176
- message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call) }
187
+ def format_tool_call_message(message, thinking:, cache:)
188
+ blocks = content_blocks(message.content, thinking:, message:, cache:)
189
+ message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call, cache:) }
177
190
  { role: 'assistant', content: blocks }
178
191
  end
179
192
 
180
- def tool_use_block(tool_call)
193
+ def tool_use_block(tool_call, cache: false)
181
194
  {
182
195
  type: 'tool_use',
183
196
  id: tool_call.id,
184
197
  name: tool_call.name,
185
- input: tool_call.arguments
186
- }
198
+ input: tool_call.arguments,
199
+ cache_control: { type: 'ephemeral' }
200
+ }.tap do |block|
201
+ block.delete(:cache_control) unless cache
202
+ end
187
203
  end
188
204
 
189
- def format_tool_result_message(message)
205
+ def format_tool_result_message(message, cache: false)
190
206
  {
191
207
  role: 'user',
192
208
  content: [
193
209
  {
194
210
  type: 'tool_result',
195
211
  tool_use_id: message.tool_call_id,
196
- content: content_blocks(message.content)
197
- }
212
+ content: content_blocks(message.content, cache:),
213
+ cache_control: { type: 'ephemeral' }
214
+ }.tap { |block| block.delete(:cache_control) unless cache }
198
215
  ]
199
216
  }
200
217
  end
@@ -230,16 +247,20 @@ module Legion
230
247
  end
231
248
  end
232
249
 
233
- def format_tools(tools)
250
+ def format_tools(tools, cache: false)
234
251
  return nil if tools.empty?
235
252
 
236
- tools.values.map do |tool|
253
+ tool_array = tools.values.map do |tool|
237
254
  {
238
255
  name: tool.name,
239
256
  description: tool.description,
240
257
  input_schema: tool_schema(tool)
241
258
  }
242
259
  end
260
+
261
+ tool_array.last[:cache_control] = { type: 'ephemeral' } if cache && tool_array.any?
262
+
263
+ tool_array
243
264
  end
244
265
 
245
266
  def tool_schema(tool)
@@ -283,7 +304,7 @@ module Legion
283
304
  normalized = normalized.dup
284
305
  normalized.delete(:strict)
285
306
  normalized.delete('strict')
286
- { format: { type: 'json_schema', schema: normalized } }
307
+ { format: { type: 'json', schema: normalized } }
287
308
  end
288
309
 
289
310
  def parse_completion_response(response)
@@ -351,18 +372,13 @@ module Legion
351
372
  )
352
373
  end
353
374
 
354
- def extract_streaming_tool_calls(data, delta_type)
375
+ def extract_streaming_tool_calls(data, _delta_type)
355
376
  content_block = data['content_block']
356
- if content_block && content_block['type'] == 'tool_use'
357
- { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
358
- id: content_block['id'], name: content_block['name'], arguments: ''
359
- ) }
360
- elsif delta_type == 'input_json_delta'
361
- partial = data.dig('delta', 'partial_json')
362
- return nil unless partial
363
-
364
- { nil => Legion::Extensions::Llm::ToolCall.new(id: nil, name: nil, arguments: partial) }
365
- end
377
+ return nil unless content_block && content_block['type'] == 'tool_use'
378
+
379
+ { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
380
+ id: content_block['id'], name: content_block['name'], arguments: ''
381
+ ) }
366
382
  end
367
383
 
368
384
  def parse_tool_calls(content_blocks)
@@ -401,6 +417,10 @@ module Legion
401
417
  CONTEXT_WINDOWS.find { |prefix, _| model_id.start_with?(prefix) }&.last
402
418
  end
403
419
 
420
+ def model_detail(model_name)
421
+ fetch_model_detail(model_name)
422
+ end
423
+
404
424
  def fetch_model_detail(model_name)
405
425
  ctx = infer_context_window(model_name)
406
426
  ctx ? { context_window: ctx } : nil
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Anthropic
7
- VERSION = '0.2.10'
7
+ VERSION = '0.2.12'
8
8
  end
9
9
  end
10
10
  end
@@ -24,6 +24,8 @@ module Legion
24
24
  instance: {
25
25
  default_model: 'claude-sonnet-4-6',
26
26
  endpoint: 'https://api.anthropic.com',
27
+ api_version: '2023-10-02',
28
+ default_max_tokens: 4096,
27
29
  tier: :frontier,
28
30
  transport: :http,
29
31
  credentials: { api_key: 'env://ANTHROPIC_API_KEY' },
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.10
4
+ version: 0.2.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO