RubyGems - turnkit - Versions diffs - 0.2.2 → 0.2.3 - Mend

turnkit 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +73 -135
data/lib/turnkit/adapters/ruby_llm.rb +40 -4
data/lib/turnkit/turn.rb +4 -1
data/lib/turnkit/usage.rb +5 -3
data/lib/turnkit/version.rb +1 -1
data/lib/turnkit.rb +2 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: be681d2deacaf1e3be9de2eb84eef412a686baf90a8b0c0a41280cf6a76ecc55
-  data.tar.gz: f0e6d232f50a67ce4a2cd5c46360549b7755a7b6ab100968bd9a2bf16f3cab0a
+  metadata.gz: 2c02ad5eef683595c702a33806438f414ed2da9e18c607a8b314bba4ae442404
+  data.tar.gz: 4da3877b7c20aecae1dd77e6df4497bb64a3909d28419fb1413feb37fa5fa298
 SHA512:
-  metadata.gz: dc9fbeca56bbdc7e737a56dcbb0caa87eb17186c035f052285532749e1e27546884d020c216c72f008950ad38053fc67dbd71e5cfd8d572f169029d4a78ba116
-  data.tar.gz: ae8b955e099d1d81026ff34b3bae9e4a5009122e1e8cccaa64aed0675f888c0cb12fcae503781daa6ad710e8f9f56aec2387c1ce856cf66d6850430141b28dfe
+  metadata.gz: b5de4c365826d8a4154d2ee013fe0f7289796b91b63eb34ad81693993eb55b8f8d0282f8415e7798f9eb698d2f6f4aa52b79949e1c89c0c64effe506cf26ef0b
+  data.tar.gz: b168324cf4f97485ce7854006565441fd0fe67e1f84835805d98d67f27a2a793fe2ce8bd27a6939c6ccbf3cc92023bc93c8aff5e8049fb0b2991a50548d211d6

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # Changelog
+## 0.2.3 - 2026-06-06
+- Add Anthropic prompt cache support for stable system prompt sections.
+- Track cache write tokens and aggregate model costs on turns.
+- Refresh README usage examples for prompt caching and usage tracking.
 ## 0.2.0 - 2026-06-04
 - Add configurable system prompt sections and custom system prompt builders.

data/README.md CHANGED Viewed

@@ -26,33 +26,9 @@ Set a provider key:
 ```sh
 export ANTHROPIC_API_KEY=...
-# or OPENAI_API_KEY=..., GEMINI_API_KEY=..., OPENROUTER_API_KEY=...
 ```
-TurnKit uses RubyLLM by default. Choose the provider by choosing a RubyLLM model name:
-```ruby
-TurnKit.default_model = "claude-sonnet-4-5" # Anthropic
-# TurnKit.default_model = "gpt-4.1-mini"    # OpenAI
-# TurnKit.default_model = "gemini-2.5-flash" # Gemini
-```
-You can also override the model per agent or per run.
-To use a different model SDK, provide a client object that responds to `chat`:
-```ruby
-class MyClient < TurnKit::Client
-  def chat(model:, messages:, tools:, instructions:, temperature: nil, metadata: nil)
-    # Call your provider here.
-    TurnKit::Result.new(text: "provider response", model: model)
-  end
-end
-TurnKit.client = MyClient.new
-```
-Ask an agent:
+Create an agent:
 ```ruby
 require "turnkit"
@@ -68,6 +44,22 @@ puts turn.output_text
 ## Usage
+Choose a model:
+```ruby
+TurnKit.default_model = "claude-sonnet-4-5"
+```
+Use OpenAI:
+```sh
+export OPENAI_API_KEY=...
+```
+```ruby
+TurnKit.default_model = "gpt-4.1-mini"
+```
 Create a conversation:
 ```ruby
@@ -101,7 +93,7 @@ class SaveReport < TurnKit::Tool
 end
 ```
-Use the tool:
+Use a tool:
 ```ruby
 agent = TurnKit::Agent.new(
@@ -125,142 +117,99 @@ agent = TurnKit::Agent.new(
 )
 ```
-List available skills:
+Delegate to sub-agents:
 ```ruby
-research = TurnKit::Skill.from_file(
-  "skills/research.md",
-  description: "Use for source-backed research tasks."
+writer = TurnKit::Agent.new(
+  name: "writer",
+  description: "Draft concise copy."
 )
-agent = TurnKit::Agent.new(
-  name: "researcher",
-  instructions: "Prefer primary sources.",
-  tools: [WebSearch, ReadWebPage],
-  available_skills: [research]
+editor = TurnKit::Agent.new(
+  name: "editor",
+  sub_agents: [writer]
 )
-```
-Add subject context:
-```ruby
-article = Article.find(1)
-conversation = agent.conversation(subject: article)
+turn = editor.conversation.ask("Ask the writer for three headlines.")
+puts turn.output_text
 ```
-Choose prompt sections:
+Use prompt caching:
 ```ruby
-agent = TurnKit::Agent.new(
-  name: "writer",
-  instructions: "Write plainly.",
-  prompt_sections: %i[agent instructions tools environment]
-)
+TurnKit.prompt_cache = :auto
 ```
-Build a custom prompt:
+Disable prompt caching:
 ```ruby
-agent = TurnKit::Agent.new(
-  name: "custom",
-  instructions: "Answer in JSON.",
-  system_prompt: ->(prompt) {
-    [
-      prompt.agent_section,
-      prompt.instructions_section,
-      "Return only valid JSON."
-    ].compact.join("\n\n")
-  }
-)
+TurnKit.prompt_cache = :off
 ```
-Use safe prompt data blocks for pipeline-specific prompts:
+Split custom prompts:
 ```ruby
 agent = TurnKit::Agent.new(
-  name: "researcher",
-  system_prompt: ->(prompt) {
-    [
-      prompt.section(:agent),
-      prompt.section(:behavior),
-      prompt.untrusted_section(
-        :retrieval_context,
-        ExternalSearch.results_for("turnkit"),
-        label: "Retrieved external evidence."
-      ),
-      prompt.section(:tools),
-      prompt.section(:environment)
-    ].compact.join("\n\n")
-  }
+  name: "cached",
+  system_prompt: [
+    "Stable instructions and tool guidance.",
+    TurnKit::SystemPrompt::CACHE_BOUNDARY,
+    "Dynamic subject and live context."
+  ].join("\n")
 )
 ```
-Choose a prompt mode:
+Inspect usage:
 ```ruby
-TurnKit::Agent.new(name: "main", prompt_mode: :full)    # default sections
-TurnKit::Agent.new(name: "worker", prompt_mode: :minimal) # agent, instructions, behavior, tools, environment
-TurnKit::Agent.new(name: "raw", prompt_mode: :none)     # tiny TurnKit identity prompt
+record = TurnKit.store.load_turn(turn.id)
+record.fetch("usage")
 ```
-TurnKit automatically uses the minimal prompt mode for delegated sub-agent turns unless the child agent sets its own `prompt_mode`.
-Inject live context on each turn:
+Return usage from custom clients:
 ```ruby
-TurnKit.context_contributors << ->(context) {
-  TurnKit::LiveContextContribution.new(
-    name: "account",
-    content: AccountSummary.for(context.conversation.metadata["account_id"]),
-    trusted: false
-  )
-}
+class MyClient < TurnKit::Client
+  def chat(model:, messages:, tools:, instructions:, temperature: nil, metadata: nil)
+    TurnKit::Result.new(
+      text: "provider response",
+      model: model,
+      usage: TurnKit::Usage.new(
+        input_tokens: 100,
+        output_tokens: 20,
+        cached_tokens: 80,
+        cache_write_tokens: 100
+      )
+    )
+  end
+end
 ```
-Live context and subject context are rendered below `TurnKit::SystemPrompt::CACHE_BOUNDARY`, so provider adapters can reuse the stable prefix in the future.
-Add model-specific prompt guidance:
+Split instructions inside custom clients:
 ```ruby
-TurnKit.model_prompt_contributors[/claude/] = ->(context) {
-  TurnKit::PromptContribution.new(
-    stable_prefix: "Provider guidance for #{context.model}.",
-    section_overrides: {
-      behavior: "Be concise, tool-aware, and explicit about uncertainty."
-    }
-  )
-}
+stable, dynamic = TurnKit::SystemPrompt.split_cache_boundary(instructions)
 ```
-Inspect prompt shape without storing raw prompt text:
+Send `stable` with provider cache controls.
-```ruby
-prompt = TurnKit::SystemPrompt.new(agent: agent, turn: turn, conversation: conversation)
-prompt.report
-# => { "chars" => ..., "hash" => ..., "stable_chars" => ..., "dynamic_chars" => ... }
-```
+Send `dynamic` as normal prompt content.
-Delegate to sub-agents:
+Use a custom client:
 ```ruby
-writer = TurnKit::Agent.new(
-  name: "writer",
-  description: "Draft concise copy."
-)
-editor = TurnKit::Agent.new(
-  name: "editor",
-  sub_agents: [writer]
-)
-turn = editor.conversation.ask("Ask the writer for three headlines.")
-puts turn.output_text
+TurnKit.client = MyClient.new
 ```
 Install Rails persistence:
 ```sh
 bin/rails generate turnkit:install
+```
+Run migrations:
+```sh
 bin/rails db:migrate
 ```
@@ -269,7 +218,6 @@ Configure Rails:
 ```ruby
 TurnKit.store = TurnKit::ActiveRecordStore.new
 TurnKit.default_model = "claude-sonnet-4-5"
-TurnKit.timeout = 300
 ```
 Reconcile stale turns:
@@ -289,9 +237,10 @@ TurnKit.timeout = 300
 TurnKit.max_depth = 3
 TurnKit.max_tool_executions = 100
 TurnKit.cost_limit = nil
+TurnKit.prompt_cache = :auto
 ```
-Override defaults per agent:
+Override an agent:
 ```ruby
 agent = TurnKit::Agent.new(
@@ -303,29 +252,18 @@ agent = TurnKit::Agent.new(
 )
 ```
-Override the model for a single conversation or turn:
-```ruby
-conversation = agent.conversation(model: "claude-opus-4-1")
-turn = conversation.run!(model: "gpt-4.1-mini")
-```
 | Option | Description |
 | --- | --- |
-| `default_model` | Set the default RubyLLM model. The model name determines the provider. |
-| `client` | Set the model client. Defaults to `TurnKit::Adapters::RubyLLM.new`. |
+| `default_model` | Set the default RubyLLM model. |
+| `client` | Set the model client. |
 | `store` | Set the conversation store. |
 | `max_iterations` | Limit model calls per turn. |
 | `timeout` | Limit seconds per root turn. |
 | `max_depth` | Limit sub-agent nesting. |
 | `max_tool_executions` | Limit tool calls per root turn. |
 | `cost_limit` | Limit cost per root turn. |
-| `prompt_sections` | Set default system prompt sections. |
-| `prompt_behavior` | Override the default behavior section text. |
-| `prompt_data_max_chars` | Limit data-block content rendered into prompts. |
-| `context_contributors` | Add live per-turn prompt context blocks. |
-| `system_prompt_contributors` | Add global prompt prefix/suffix/section overrides. |
-| `model_prompt_contributors` | Add model-matched prompt contributions. |
+| `prompt_cache` | Use provider prompt caching. |
+| `prompt_sections` | Set default prompt sections. |
 ## Contributing

data/lib/turnkit/adapters/ruby_llm.rb CHANGED Viewed

@@ -9,7 +9,7 @@ module TurnKit
         configure_from_environment
         chat = ::RubyLLM.chat(model: model)
-        chat.with_instructions(instructions) if instructions && !instructions.empty?
+        add_instructions(chat, instructions, model: model)
         chat.with_temperature(temperature) if temperature
         Array(tools).each { |tool| chat.with_tool(ruby_llm_tool(tool)) }
         Array(messages).each { |message| add_message(chat, message) }
@@ -55,6 +55,37 @@ module TurnKit
           )
         end
+        def add_instructions(chat, instructions, model:)
+          return if instructions.nil? || instructions.empty?
+          if prompt_cache_enabled? && anthropic_model?(model) && instructions.include?(SystemPrompt::CACHE_BOUNDARY)
+            stable, dynamic = SystemPrompt.split_cache_boundary(instructions)
+            add_system_message(chat, stable, cache: true)
+            add_system_message(chat, dynamic, cache: false)
+          else
+            chat.with_instructions(instructions)
+          end
+        end
+        def add_system_message(chat, content, cache: false)
+          content = content.to_s.strip
+          return if content.empty?
+          if cache
+            content = ::RubyLLM::Providers::Anthropic::Content.new(content, cache: true)
+          end
+          chat.add_message(role: :system, content: content)
+        end
+        def prompt_cache_enabled?
+          TurnKit.prompt_cache != :off
+        end
+        def anthropic_model?(model)
+          model.to_s.start_with?("claude")
+        end
         def ruby_llm_tool_calls(tool_calls)
           return nil if tool_calls.nil? || tool_calls.empty?
@@ -88,9 +119,10 @@ module TurnKit
             ToolCall.new(id: call.id, name: call.name, arguments: call.arguments)
           end
           usage = Usage.new(
-            input_tokens: response.respond_to?(:input_tokens) ? response.input_tokens : 0,
-            output_tokens: response.respond_to?(:output_tokens) ? response.output_tokens : 0,
-            cached_tokens: response.respond_to?(:cached_tokens) ? response.cached_tokens : 0
+            input_tokens: token_value(response, :input_tokens),
+            output_tokens: token_value(response, :output_tokens),
+            cached_tokens: token_value(response, :cached_tokens),
+            cache_write_tokens: token_value(response, :cache_creation_tokens)
           )
           Result.new(
             text: response.respond_to?(:content) ? response.content.to_s : response.to_s,
@@ -99,6 +131,10 @@ module TurnKit
             model: response.respond_to?(:model_id) ? response.model_id : model
           )
         end
+        def token_value(response, method)
+          response.respond_to?(method) ? response.public_send(method).to_i : 0
+        end
     end
   end
 end

data/lib/turnkit/turn.rb CHANGED Viewed

@@ -123,9 +123,12 @@ module TurnKit
           "input_tokens" => current["input_tokens"].to_i + usage.input_tokens,
           "output_tokens" => current["output_tokens"].to_i + usage.output_tokens,
           "cached_tokens" => current["cached_tokens"].to_i + usage.cached_tokens,
+          "cache_write_tokens" => current["cache_write_tokens"].to_i + usage.cache_write_tokens,
           "total_tokens" => current["total_tokens"].to_i + usage.total_tokens
         }
-        update!(usage: totals, heartbeat_at: Clock.now)
+        attributes = { usage: totals, heartbeat_at: Clock.now }
+        attributes[:cost] = @record["cost"].to_f + usage.cost.to_f if usage.cost
+        update!(attributes)
       end
       def update!(attributes)

data/lib/turnkit/usage.rb CHANGED Viewed

@@ -2,17 +2,18 @@
 module TurnKit
   class Usage
-    attr_reader :input_tokens, :output_tokens, :cached_tokens, :cost
+    attr_reader :input_tokens, :output_tokens, :cached_tokens, :cache_write_tokens, :cost
-    def initialize(input_tokens: 0, output_tokens: 0, cached_tokens: 0, cost: nil)
+    def initialize(input_tokens: 0, output_tokens: 0, cached_tokens: 0, cache_write_tokens: 0, cost: nil)
       @input_tokens = input_tokens.to_i
       @output_tokens = output_tokens.to_i
       @cached_tokens = cached_tokens.to_i
+      @cache_write_tokens = cache_write_tokens.to_i
       @cost = cost
     end
     def total_tokens
-      input_tokens + output_tokens + cached_tokens
+      input_tokens + output_tokens + cached_tokens + cache_write_tokens
     end
     def to_h
@@ -20,6 +21,7 @@ module TurnKit
         "input_tokens" => input_tokens,
         "output_tokens" => output_tokens,
         "cached_tokens" => cached_tokens,
+        "cache_write_tokens" => cache_write_tokens,
         "total_tokens" => total_tokens,
         "cost" => cost
       }.compact

data/lib/turnkit/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module TurnKit
-  VERSION = "0.2.2"
+  VERSION = "0.2.3"
 end

data/lib/turnkit.rb CHANGED Viewed

@@ -41,7 +41,7 @@ module TurnKit
   class << self
     attr_accessor :default_model, :client, :store, :logger
     attr_accessor :max_iterations, :timeout, :max_depth, :max_tool_executions
-    attr_accessor :cost_limit
+    attr_accessor :cost_limit, :prompt_cache
     attr_accessor :prompt_sections, :prompt_behavior, :available_skills
     attr_accessor :prompt_data_max_chars, :context_contributors
     attr_accessor :system_prompt_contributors, :model_prompt_contributors
@@ -56,6 +56,7 @@ module TurnKit
   self.timeout = 300
   self.max_depth = 3
   self.max_tool_executions = 100
+  self.prompt_cache = :auto
   self.prompt_sections = SystemPrompt::DEFAULT_SECTIONS.dup
   self.prompt_data_max_chars = 20_000
   self.available_skills = []

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: turnkit
 version: !ruby/object:Gem::Version
-  version: 0.2.2
+  version: 0.2.3
 platform: ruby
 authors:
 - Sam Couch
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-06-05 00:00:00.000000000 Z
+date: 2026-06-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ruby_llm