RubyGems - lex-llm-anthropic - Versions diffs - 0.2.10 → 0.2.12 - Mend

lex-llm-anthropic 0.2.10 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +15 -0
data/lib/legion/extensions/llm/anthropic/provider.rb +63 -43
data/lib/legion/extensions/llm/anthropic/version.rb +1 -1
data/lib/legion/extensions/llm/anthropic.rb +2 -0
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 698085fefbd69b9c2689ac472c42ce579c05e0ef2be2d06dc146f324df2ba69e
-  data.tar.gz: c72e6c9974500f0285084da0da19af6390806ccfb4f93a9f42128e50a65205ad
+  metadata.gz: 4f3196e6fe3ab6df1b6f1b06be9a71f317d22f3e37e32dc39515b5e392aa292f
+  data.tar.gz: d7e63aea83f71e31f4a4a4b9881ba331267d2dbc1a6378b737c78f72cc8edc56
 SHA512:
-  metadata.gz: 725109b45b7fdf9849fcbdffba9e1d8b338e0b6e80c23d495c0aac5ef06d733f075c19f6b6de8bf1d34ea77050efc53d71d0ecd42873f59a30afbf7e7bbe60ba
-  data.tar.gz: 3406970db252257e2c074455132e55d7166dc4180a4c06f5b803ed267e7a17030e523ed063511c87e48f5345141891bd9211ee555d749af8e9cde496bf5e8568
+  metadata.gz: 7e83d533b933209dadb0ad6badfc6e03d546ca64e737189094440544eace90ff392ae9262990e81d36684bc37b43b7db586dd8d8b57ea2b512a4cd3124558303
+  data.tar.gz: f78eb6154a3f2a626c64231fc2bf6b36c7dac63fffc4e362e6aa670de5ad40fcc6af4ce29b225e1d73222b8647ef26fa537d25aed3370c6ab40a79ec5f143d8e

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,20 @@
 # Changelog
+## 0.2.12 - 2026-06-01
+- Add `cache_control` markers to Anthropic Messages API requests for prompt caching
+- System content and tool definitions are marked as cache breakpoints when `cache_enabled?`
+- Early conversation turns are cacheable; final message is never cached (prefix break guard)
+- Uses `cache_control_prefix_tokens` from lex-llm base provider for exclude count (default 4)
+## 0.2.11 - 2026-05-21
+- Add `api_version` and `default_max_tokens` to default_settings
+- api_base and anthropic-version read from settings fallback
+- max_tokens reads from settings[:default_max_tokens]
+- Identity headers included via base provider
 ## 0.2.10 - 2026-05-18
 - Fix streaming tool call input accumulation: `build_chunk` now handles both `content_block_start` (tool_use with id+name) and `input_json_delta` (partial argument fragments) events. Previously only the start event was parsed, resulting in tool calls with empty arguments.

data/lib/legion/extensions/llm/anthropic/provider.rb CHANGED Viewed

@@ -35,15 +35,19 @@ module Legion
             def embeddings?(_model) = false
           end
+          def settings
+            Anthropic.default_settings
+          end
           def api_base
-            config.anthropic_api_base || 'https://api.anthropic.com'
+            config.anthropic_api_base || settings[:endpoint] || 'https://api.anthropic.com'
           end
           def headers
-            {
+            identity_headers.merge({
               'x-api-key' => config.anthropic_api_key,
-              'anthropic-version' => config.anthropic_version || '2023-06-01'
-            }.compact
+              'anthropic-version' => config.anthropic_version || settings[:api_version] || '2023-10-02'
+            }.compact)
           end
           def completion_url = '/v1/messages'
@@ -74,19 +78,23 @@ module Legion
           private
-          def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
+          def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists, Metrics/AbcSize
             log_render_payload(messages:, tools:, model:, stream:, schema:)
             system_messages, chat_messages = messages.partition { |message| message.role == :system }
+            caching = cache_enabled?
+            exclude_count = caching ? [cache_control_prefix_tokens, 1].max : 0
+            cacheable_count = caching ? [chat_messages.size - exclude_count, 0].max : 0
             {
               model: model.id,
-              messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking)),
+              messages: format_messages(chat_messages, thinking: thinking_enabled?(thinking), cacheable_count:),
               stream: stream,
-              max_tokens: model.max_tokens || 4096,
-              system: system_content(system_messages),
+              max_tokens: model.max_tokens || settings[:default_max_tokens] || 4096,
+              system: system_content(system_messages, cache: caching),
               thinking: thinking_payload(thinking),
               temperature: temperature,
-              tools: format_tools(tools),
+              tools: format_tools(tools, cache: caching),
               tool_choice: tool_choice(tool_prefs),
               output_config: output_config(schema)
             }.compact
@@ -99,21 +107,24 @@ module Legion
             end
           end
-          def system_content(messages)
-            content = messages.flat_map { |message| content_blocks(message.content) }
+          def system_content(messages, cache: false)
+            content = messages.flat_map do |message|
+              content_blocks(message.content, cache:)
+            end
             content.empty? ? nil : content
           end
-          def format_messages(messages, thinking:)
-            messages.map do |message|
+          def format_messages(messages, thinking:, cacheable_count: 0)
+            messages.each_with_index.map do |message, index|
+              cache = index < cacheable_count
               if message.tool_call?
-                format_tool_call_message(message, thinking: thinking)
+                format_tool_call_message(message, thinking:, cache:)
               elsif message.tool_result?
-                format_tool_result_message(message)
+                format_tool_result_message(message, cache:)
               else
                 {
                   role: anthropic_role(message.role),
-                  content: content_blocks(message.content, thinking: thinking, message: message)
+                  content: content_blocks(message.content, thinking:, message:, cache:)
                 }
               end
             end
@@ -123,12 +134,12 @@ module Legion
             role == :assistant ? 'assistant' : 'user'
           end
-          def content_blocks(content, thinking: false, message: nil)
+          def content_blocks(content, thinking: false, message: nil, cache: false)
             raw_blocks = raw_content(content)
             return with_thinking(raw_blocks, message, thinking) if raw_blocks
             blocks = []
-            blocks << text_block(content_text(content)) unless content_text(content).to_s.empty?
+            blocks << text_block(content_text(content), cache:) unless content_text(content).to_s.empty?
             blocks.concat(attachment_blocks(content)) if content.respond_to?(:attachments)
             with_thinking(blocks, message, thinking)
           end
@@ -145,8 +156,10 @@ module Legion
             content.to_s
           end
-          def text_block(text)
-            { type: 'text', text: text }
+          def text_block(text, cache: false)
+            { type: 'text', text: text }.tap do |block|
+              block[:cache_control] = { type: 'ephemeral' } if cache
+            end
           end
           def attachment_blocks(content)
@@ -171,30 +184,34 @@ module Legion
             thinking_block ? [thinking_block, *blocks] : blocks
           end
-          def format_tool_call_message(message, thinking:)
-            blocks = content_blocks(message.content, thinking: thinking, message: message)
-            message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call) }
+          def format_tool_call_message(message, thinking:, cache:)
+            blocks = content_blocks(message.content, thinking:, message:, cache:)
+            message.tool_calls.each_value { |tool_call| blocks << tool_use_block(tool_call, cache:) }
             { role: 'assistant', content: blocks }
           end
-          def tool_use_block(tool_call)
+          def tool_use_block(tool_call, cache: false)
             {
               type: 'tool_use',
               id: tool_call.id,
               name: tool_call.name,
-              input: tool_call.arguments
-            }
+              input: tool_call.arguments,
+              cache_control: { type: 'ephemeral' }
+            }.tap do |block|
+              block.delete(:cache_control) unless cache
+            end
           end
-          def format_tool_result_message(message)
+          def format_tool_result_message(message, cache: false)
             {
               role: 'user',
               content: [
                 {
                   type: 'tool_result',
                   tool_use_id: message.tool_call_id,
-                  content: content_blocks(message.content)
-                }
+                  content: content_blocks(message.content, cache:),
+                  cache_control: { type: 'ephemeral' }
+                }.tap { |block| block.delete(:cache_control) unless cache }
               ]
             }
           end
@@ -230,16 +247,20 @@ module Legion
             end
           end
-          def format_tools(tools)
+          def format_tools(tools, cache: false)
             return nil if tools.empty?
-            tools.values.map do |tool|
+            tool_array = tools.values.map do |tool|
               {
                 name: tool.name,
                 description: tool.description,
                 input_schema: tool_schema(tool)
               }
             end
+            tool_array.last[:cache_control] = { type: 'ephemeral' } if cache && tool_array.any?
+            tool_array
           end
           def tool_schema(tool)
@@ -283,7 +304,7 @@ module Legion
             normalized = normalized.dup
             normalized.delete(:strict)
             normalized.delete('strict')
-            { format: { type: 'json_schema', schema: normalized } }
+            { format: { type: 'json', schema: normalized } }
           end
           def parse_completion_response(response)
@@ -351,18 +372,13 @@ module Legion
             )
           end
-          def extract_streaming_tool_calls(data, delta_type)
+          def extract_streaming_tool_calls(data, _delta_type)
             content_block = data['content_block']
-            if content_block && content_block['type'] == 'tool_use'
-              { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
-                id: content_block['id'], name: content_block['name'], arguments: ''
-              ) }
-            elsif delta_type == 'input_json_delta'
-              partial = data.dig('delta', 'partial_json')
-              return nil unless partial
-              { nil => Legion::Extensions::Llm::ToolCall.new(id: nil, name: nil, arguments: partial) }
-            end
+            return nil unless content_block && content_block['type'] == 'tool_use'
+            { content_block['id'] => Legion::Extensions::Llm::ToolCall.new(
+              id: content_block['id'], name: content_block['name'], arguments: ''
+            ) }
           end
           def parse_tool_calls(content_blocks)
@@ -401,6 +417,10 @@ module Legion
             CONTEXT_WINDOWS.find { |prefix, _| model_id.start_with?(prefix) }&.last
           end
+          def model_detail(model_name)
+            fetch_model_detail(model_name)
+          end
           def fetch_model_detail(model_name)
             ctx = infer_context_window(model_name)
             ctx ? { context_window: ctx } : nil

data/lib/legion/extensions/llm/anthropic/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Legion
   module Extensions
     module Llm
       module Anthropic
-        VERSION = '0.2.10'
+        VERSION = '0.2.12'
       end
     end
   end

data/lib/legion/extensions/llm/anthropic.rb CHANGED Viewed

@@ -24,6 +24,8 @@ module Legion
             instance: {
               default_model: 'claude-sonnet-4-6',
               endpoint: 'https://api.anthropic.com',
+              api_version: '2023-10-02',
+              default_max_tokens: 4096,
               tier: :frontier,
               transport: :http,
               credentials: { api_key: 'env://ANTHROPIC_API_KEY' },

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lex-llm-anthropic
 version: !ruby/object:Gem::Version
-  version: 0.2.10
+  version: 0.2.12
 platform: ruby
 authors:
 - LegionIO