lex-llm-anthropic 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1237cef4710e768a52ab02e062379ed70a6f2ee7456cca1b5a45cfb03489c5c4
4
- data.tar.gz: bb9a132592136f7c64c4f982a6d1f9896348e0362da9d46d77fb846566b6a2d8
3
+ metadata.gz: 7ea542d2f08da7b03e8f42e683dca5638bdaf07966f3389cdfcccfa22f8581b0
4
+ data.tar.gz: b0e902241e5dd1c94c3742f9cee76d4aa2a41516829ce5c481084e70378826cc
5
5
  SHA512:
6
- metadata.gz: 9e138f44cbb23a70c48dc489e53bc6dff5fef7964db248b41f519b37a71e839dee7aa2dcbc26ce7cc5aa41e3df5720c0889537c497b6fb25bfb31053864d0066
7
- data.tar.gz: 725a59b156f68280cbc9d276eccb6c90e21c638900dbf6d81405588a15bd0ce00c3413555c311aef411162669b959635e9f3d4363fcd124d364eedf8725ef189
6
+ metadata.gz: 333cff908f11b23fe522c2bb79743d879b5adf0d7ce952e0b989becb9da775b58a528ba7f52ec42b7f7ef188d4d7b3db29db9700df4b17e0435c0e4de0087b1c
7
+ data.tar.gz: 57ad771fb6b041d74e1459bb46e5cd471f8f37f416b014bcc6873f97aca3fe0aea74c7a943ddfb3d8418bbf6ade69a0cda8fa6cc755134df53eabf9abeaaf1f1
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.13 - 2026-06-02
4
+
5
+ - **Fix invalid anthropic-version header** — Default `api_version` was `'2023-10-02'` (typo), which Anthropic rejects. Changed to `'2023-10-16'` (anthropic.rb)
6
+ - **Add per-provider discovery refresh actor** — New `actors/discovery_refresh.rb` that only refreshes Anthropic models, avoiding coupling to other providers' discovery cycles
7
+
8
+ ## 0.2.12 - 2026-06-01
9
+
10
+ - Add `cache_control` markers to Anthropic Messages API requests for prompt caching
11
+ - System content and tool definitions are marked as cache breakpoints when `cache_enabled?`
12
+ - Early conversation turns are cacheable; final message is never cached (prefix break guard)
13
+ - Uses `cache_control_prefix_tokens` from lex-llm base provider for exclude count (default 4)
14
+
3
15
  ## 0.2.11 - 2026-05-21
4
16
 
5
17
  - Add `api_version` and `default_max_tokens` to default_settings
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'legion/extensions/actors/every'
5
+ rescue LoadError => e
6
+ warn(e.message) if $VERBOSE
7
+ end
8
+
9
+ return unless defined?(Legion::Extensions::Actors::Every)
10
+
11
+ module Legion
12
+ module Extensions
13
+ module Llm
14
+ module Anthropic
15
+ module Actor
16
+ class DiscoveryRefresh < Legion::Extensions::Actors::Every # rubocop:disable Style/Documentation
17
+ include Legion::Logging::Helper
18
+
19
+ REFRESH_INTERVAL = 1800
20
+
21
+ def runner_class = self.class
22
+ def runner_function = 'manual'
23
+ def run_now? = true
24
+ def use_runner? = false
25
+ def check_subtask? = false
26
+ def generate_task? = false
27
+
28
+ def time
29
+ return REFRESH_INTERVAL unless defined?(Legion::Settings)
30
+
31
+ Legion::Settings.dig(:extensions, :llm, :anthropic, :discovery_interval) || REFRESH_INTERVAL
32
+ end
33
+
34
+ def manual
35
+ log.debug('[anthropic][discovery_refresh] refreshing model list')
36
+ return unless defined?(Legion::LLM::Discovery)
37
+
38
+ Legion::LLM::Discovery.refresh_discovered_models!(provider: :anthropic)
39
+ rescue StandardError => e
40
+ handle_exception(e, level: :warn, handled: true, operation: 'anthropic.actor.discovery_refresh')
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -78,19 +78,23 @@ module Legion
78
78
 
79
79
  private
80
80
 
81
- def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
81
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
82
82
  log_render_payload(messages:, tools:, model:, stream:, schema:)
83
83
  system_messages, chat_messages = messages.partition { |message| message.role == :system }
84
84
 
85
+ caching = cache_enabled?
86
+ exclude_count = caching ? [cache_control_prefix_tokens, 1].max : 0
87
+ cacheable_count = caching ? [chat_messages.size - exclude_count, 0].max : 0
88
+
85
89
  {
86
90
  model: model.id,
87
- messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking)),
91
+ messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking), cacheable_count:),
88
92
  stream: stream,
89
93
  max_tokens: model.max_tokens || settings[:default_max_tokens] || 4096,
90
- system: system_content(system_messages),
94
+ system: system_content(system_messages, cache: caching),
91
95
  thinking: thinking_payload(thinking),
92
96
  temperature: temperature,
93
- tools: format_tools(tools),
97
+ tools: format_tools(tools, cache: caching),
94
98
  tool_choice: tool_choice(tool_prefs),
95
99
  output_config: output_config(schema)
96
100
  }.compact
@@ -103,21 +107,24 @@ module Legion
103
107
  end
104
108
  end
105
109
 
106
- def system_content(messages)
107
- content = messages.flat_map { |message| content_blocks(message.content) }
110
+ def system_content(messages, cache: false)
111
+ content = messages.flat_map do |message|
112
+ content_blocks(message.content, cache:)
113
+ end
108
114
  content.empty? ? nil : content
109
115
  end
110
116
 
111
- def format_messages(messages, thinking:)
112
- messages.map do |message|
117
+ def format_messages(messages, thinking:, cacheable_count: 0)
118
+ messages.each_with_index.map do |message, index|
119
+ cache = index < cacheable_count
113
120
  if message.tool_call?
114
- format_tool_call_message(message, thinking: thinking)
121
+ format_tool_call_message(message, thinking:, cache:)
115
122
  elsif message.tool_result?
116
- format_tool_result_message(message)
123
+ format_tool_result_message(message, cache:)
117
124
  else
118
125
  {
119
126
  role: anthropic_role(message.role),
120
- content: content_blocks(message.content, thinking: thinking, message: message)
127
+ content: content_blocks(message.content, thinking:, message:, cache:)
121
128
  }
122
129
  end
123
130
  end
@@ -127,12 +134,12 @@ module Legion
127
134
  role == :assistant ? 'assistant' : 'user'
128
135
  end
129
136
 
130
- def content_blocks(content, thinking: false, message: nil)
137
+ def content_blocks(content, thinking: false, message: nil, cache: false)
131
138
  raw_blocks = raw_content(content)
132
139
  return with_thinking(raw_blocks, message, thinking) if raw_blocks
133
140
 
134
141
  blocks = []
135
- blocks << text_block(content_text(content)) unless content_text(content).to_s.empty?
142
+ blocks << text_block(content_text(content), cache:) unless content_text(content).to_s.empty?
136
143
  blocks.concat(attachment_blocks(content)) if content.respond_to?(:attachments)
137
144
  with_thinking(blocks, message, thinking)
138
145
  end
@@ -149,8 +156,10 @@ module Legion
149
156
  content.to_s
150
157
  end
151
158
 
152
- def text_block(text)
153
- { type: 'text', text: text }
159
+ def text_block(text, cache: false)
160
+ { type: 'text', text: text }.tap do |block|
161
+ block[:cache_control] = { type: 'ephemeral' } if cache
162
+ end
154
163
  end
155
164
 
156
165
  def attachment_blocks(content)
@@ -175,30 +184,34 @@ module Legion
175
184
  thinking_block ? [thinking_block, *blocks] : blocks
176
185
  end
177
186
 
178
- def format_tool_call_message(message, thinking:)
179
- blocks = content_blocks(message.content, thinking: thinking, message: message)
180
- message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call) }
187
+ def format_tool_call_message(message, thinking:, cache:)
188
+ blocks = content_blocks(message.content, thinking:, message:, cache:)
189
+ message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call, cache:) }
181
190
  { role: 'assistant', content: blocks }
182
191
  end
183
192
 
184
- def tool_use_block(tool_call)
193
+ def tool_use_block(tool_call, cache: false)
185
194
  {
186
195
  type: 'tool_use',
187
196
  id: tool_call.id,
188
197
  name: tool_call.name,
189
- input: tool_call.arguments
190
- }
198
+ input: tool_call.arguments,
199
+ cache_control: { type: 'ephemeral' }
200
+ }.tap do |block|
201
+ block.delete(:cache_control) unless cache
202
+ end
191
203
  end
192
204
 
193
- def format_tool_result_message(message)
205
+ def format_tool_result_message(message, cache: false)
194
206
  {
195
207
  role: 'user',
196
208
  content: [
197
209
  {
198
210
  type: 'tool_result',
199
211
  tool_use_id: message.tool_call_id,
200
- content: content_blocks(message.content)
201
- }
212
+ content: content_blocks(message.content, cache:),
213
+ cache_control: { type: 'ephemeral' }
214
+ }.tap { |block| block.delete(:cache_control) unless cache }
202
215
  ]
203
216
  }
204
217
  end
@@ -234,16 +247,20 @@ module Legion
234
247
  end
235
248
  end
236
249
 
237
- def format_tools(tools)
250
+ def format_tools(tools, cache: false)
238
251
  return nil if tools.empty?
239
252
 
240
- tools.values.map do |tool|
253
+ tool_array = tools.values.map do |tool|
241
254
  {
242
255
  name: tool.name,
243
256
  description: tool.description,
244
257
  input_schema: tool_schema(tool)
245
258
  }
246
259
  end
260
+
261
+ tool_array.last[:cache_control] = { type: 'ephemeral' } if cache && tool_array.any?
262
+
263
+ tool_array
247
264
  end
248
265
 
249
266
  def tool_schema(tool)
@@ -287,7 +304,7 @@ module Legion
287
304
  normalized = normalized.dup
288
305
  normalized.delete(:strict)
289
306
  normalized.delete('strict')
290
- { format: { type: 'json_schema', schema: normalized } }
307
+ { format: { type: 'json', schema: normalized } }
291
308
  end
292
309
 
293
310
  def parse_completion_response(response)
@@ -355,18 +372,13 @@ module Legion
355
372
  )
356
373
  end
357
374
 
358
- def extract_streaming_tool_calls(data, delta_type)
375
+ def extract_streaming_tool_calls(data, _delta_type)
359
376
  content_block = data['content_block']
360
- if content_block && content_block['type'] == 'tool_use'
361
- { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
362
- id: content_block['id'], name: content_block['name'], arguments: ''
363
- ) }
364
- elsif delta_type == 'input_json_delta'
365
- partial = data.dig('delta', 'partial_json')
366
- return nil unless partial
367
-
368
- { nil => Legion::Extensions::Llm::ToolCall.new(id: nil, name: nil, arguments: partial) }
369
- end
377
+ return nil unless content_block && content_block['type'] == 'tool_use'
378
+
379
+ { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
380
+ id: content_block['id'], name: content_block['name'], arguments: ''
381
+ ) }
370
382
  end
371
383
 
372
384
  def parse_tool_calls(content_blocks)
@@ -405,6 +417,10 @@ module Legion
405
417
  CONTEXT_WINDOWS.find { |prefix, _| model_id.start_with?(prefix) }&.last
406
418
  end
407
419
 
420
+ def model_detail(model_name)
421
+ fetch_model_detail(model_name)
422
+ end
423
+
408
424
  def fetch_model_detail(model_name)
409
425
  ctx = infer_context_window(model_name)
410
426
  ctx ? { context_window: ctx } : nil
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Anthropic
7
- VERSION = '0.2.11'
7
+ VERSION = '0.2.13'
8
8
  end
9
9
  end
10
10
  end
@@ -24,7 +24,7 @@ module Legion
24
24
  instance: {
25
25
  default_model: 'claude-sonnet-4-6',
26
26
  endpoint: 'https://api.anthropic.com',
27
- api_version: '2023-06-01',
27
+ api_version: '2023-10-16',
28
28
  default_max_tokens: 4096,
29
29
  tier: :frontier,
30
30
  transport: :http,
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.11
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -97,6 +97,7 @@ files:
97
97
  - README.md
98
98
  - lex-llm-anthropic.gemspec
99
99
  - lib/legion/extensions/llm/anthropic.rb
100
+ - lib/legion/extensions/llm/anthropic/actors/discovery_refresh.rb
100
101
  - lib/legion/extensions/llm/anthropic/actors/fleet_worker.rb
101
102
  - lib/legion/extensions/llm/anthropic/provider.rb
102
103
  - lib/legion/extensions/llm/anthropic/registry_event_builder.rb