turnkit 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +306 -3
- data/lib/turnkit/adapters/ruby_llm.rb +12 -1
- data/lib/turnkit/agent.rb +23 -2
- data/lib/turnkit/client.rb +1 -1
- data/lib/turnkit/compaction.rb +406 -0
- data/lib/turnkit/conversation.rb +15 -4
- data/lib/turnkit/cost.rb +9 -4
- data/lib/turnkit/error.rb +1 -0
- data/lib/turnkit/message.rb +21 -1
- data/lib/turnkit/message_projection.rb +28 -1
- data/lib/turnkit/turn.rb +21 -2
- data/lib/turnkit/usage.rb +7 -3
- data/lib/turnkit/version.rb +1 -1
- data/lib/turnkit.rb +3 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 34429a11d156c9631705ec193c77c2ad166fb3dffc182a7b730cffd38b52f694
|
|
4
|
+
data.tar.gz: c497d2042388a33e80c037145e82a6adf1cc47286073441b7fb7f21fcd4a89b7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 330444b7c8964271b8f11ec562f22c331cf6f00d470880082edc1efa263c33708e68b436ed29276c417ec173044993ecb105c05c788fa84405ac34f90f9521a2
|
|
7
|
+
data.tar.gz: 5bb9900c687ffa6c9eed0678c0d1a36bba08c79ceb0a4ab3767046e772394b4794c5f81f2b9a52142411873f8aea1bc19e148b101b00fc4fd1cb6fe89933f531
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.6 - 2026-06-07
|
|
4
|
+
|
|
5
|
+
- Add automatic context compaction for long conversations. TurnKit now stores append-only `context_summary` messages and projects compacted history into future model calls while keeping the full transcript durable.
|
|
6
|
+
|
|
7
|
+
## 0.2.5 - 2026-06-06
|
|
8
|
+
|
|
9
|
+
- Add per-agent and per-turn provider thinking configuration.
|
|
10
|
+
|
|
3
11
|
## 0.2.4 - 2026-06-06
|
|
4
12
|
|
|
5
13
|
- Add Anthropic prompt cache support for stable system prompt sections.
|
data/README.md
CHANGED
|
@@ -22,12 +22,21 @@ bundle install
|
|
|
22
22
|
|
|
23
23
|
## Quick Start
|
|
24
24
|
|
|
25
|
-
Set a provider key:
|
|
25
|
+
Set a provider key. TurnKit uses RubyLLM under the hood and defaults to Anthropic Claude:
|
|
26
26
|
|
|
27
27
|
```sh
|
|
28
28
|
export ANTHROPIC_API_KEY=...
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
| Provider | Env var | Example model |
|
|
32
|
+
| --- | --- | --- |
|
|
33
|
+
| Anthropic | `ANTHROPIC_API_KEY` | `claude-sonnet-4-5` |
|
|
34
|
+
| OpenAI | `OPENAI_API_KEY` | `gpt-4.1-mini` |
|
|
35
|
+
| Gemini | `GEMINI_API_KEY` | `gemini-2.5-flash` |
|
|
36
|
+
|
|
37
|
+
> [!WARNING]
|
|
38
|
+
> TurnKit defaults to `claude-sonnet-4-5`. If `ANTHROPIC_API_KEY` is unset or blank, set `TurnKit.default_model` to a provider you have configured.
|
|
39
|
+
|
|
31
40
|
Create an agent:
|
|
32
41
|
|
|
33
42
|
```ruby
|
|
@@ -68,6 +77,52 @@ Set an OpenAI model:
|
|
|
68
77
|
TurnKit.default_model = "gpt-4.1-mini"
|
|
69
78
|
```
|
|
70
79
|
|
|
80
|
+
Use Gemini:
|
|
81
|
+
|
|
82
|
+
```sh
|
|
83
|
+
export GEMINI_API_KEY=...
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Set a Gemini model:
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
TurnKit.default_model = "gemini-2.5-flash"
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Thinking
|
|
93
|
+
|
|
94
|
+
Enable provider reasoning or extended thinking per agent:
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
agent = TurnKit::Agent.new(
|
|
98
|
+
name: "reasoner",
|
|
99
|
+
model: "claude-sonnet-4-5",
|
|
100
|
+
thinking: { budget: 4_000 }
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Use effort-based thinking for providers that support it:
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
agent = TurnKit::Agent.new(
|
|
108
|
+
name: "reasoner",
|
|
109
|
+
model: "gemini-2.5-flash",
|
|
110
|
+
thinking: { effort: :high }
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Override or disable thinking for one turn:
|
|
115
|
+
|
|
116
|
+
```ruby
|
|
117
|
+
conversation = agent.conversation
|
|
118
|
+
conversation.ask("Solve this carefully.", thinking: { budget: 8_000 })
|
|
119
|
+
conversation.ask("Answer quickly.", thinking: nil)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
TurnKit passes `thinking` to RubyLLM as `{ effort:, budget: }`. Anthropic requires `budget`; Gemini and OpenRouter can use `effort`, `budget`, or both depending on the model.
|
|
123
|
+
|
|
124
|
+
When the provider reports reasoning usage, TurnKit records it as `thinking_tokens` and includes it in usage totals and cost calculation.
|
|
125
|
+
|
|
71
126
|
### Conversations
|
|
72
127
|
|
|
73
128
|
Create a conversation:
|
|
@@ -93,6 +148,93 @@ turn = conversation.run!
|
|
|
93
148
|
puts turn.output_text
|
|
94
149
|
```
|
|
95
150
|
|
|
151
|
+
### Context compaction
|
|
152
|
+
|
|
153
|
+
TurnKit automatically compacts long conversations. Older messages are summarized for future model calls, while the original transcript remains stored durably.
|
|
154
|
+
|
|
155
|
+
```ruby
|
|
156
|
+
conversation = agent.conversation
|
|
157
|
+
conversation.ask("Work through this long task.")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
By default, compaction is enabled and uses the current turn model for the summary call. If a turn runs with `gpt-5`, compaction uses `gpt-5` unless you configure a separate summary model.
|
|
161
|
+
|
|
162
|
+
Disable compaction globally:
|
|
163
|
+
|
|
164
|
+
```ruby
|
|
165
|
+
TurnKit.compaction = false
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Use a different model for summaries:
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
TurnKit.compaction = {
|
|
172
|
+
model: "gpt-4.1-mini"
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
You can also configure the compaction threshold and estimated context limit:
|
|
177
|
+
|
|
178
|
+
```ruby
|
|
179
|
+
TurnKit.compaction = {
|
|
180
|
+
model: "gpt-4.1-mini",
|
|
181
|
+
threshold: 0.75,
|
|
182
|
+
context_limit: 128_000
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Configure compaction for one agent:
|
|
187
|
+
|
|
188
|
+
```ruby
|
|
189
|
+
agent = TurnKit::Agent.new(
|
|
190
|
+
name: "engineer",
|
|
191
|
+
model: "gpt-5",
|
|
192
|
+
compaction: {
|
|
193
|
+
model: "gpt-4.1-mini",
|
|
194
|
+
threshold: 0.75,
|
|
195
|
+
context_limit: 128_000
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
In this example, normal turns use `gpt-5` and compaction summaries use `gpt-4.1-mini`.
|
|
201
|
+
|
|
202
|
+
Override the model for one manual compaction:
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
conversation.compact!(model: "gpt-4.1-mini")
|
|
206
|
+
conversation.compact!(focus: "billing migration", model: "gpt-4.1-mini")
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Disable compaction for a single turn:
|
|
210
|
+
|
|
211
|
+
```ruby
|
|
212
|
+
conversation.ask("Continue", compact: false)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
Manually compact a conversation:
|
|
216
|
+
|
|
217
|
+
```ruby
|
|
218
|
+
conversation.compact!
|
|
219
|
+
conversation.compact!(focus: "billing migration")
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Compaction is append-only: TurnKit stores a `context_summary` message with metadata describing the message range it replaces for model projection. The original messages are not deleted, so `conversation.messages` remains the full durable transcript. Future model calls see a compacted projection that includes a reference-only summary and the recent tail.
|
|
223
|
+
|
|
224
|
+
The model-visible projection uses a synthetic summary exchange followed by recent messages:
|
|
225
|
+
|
|
226
|
+
```text
|
|
227
|
+
user: What did we do so far?
|
|
228
|
+
assistant: [CONTEXT COMPACTION — REFERENCE ONLY] ...
|
|
229
|
+
user: latest request
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
For a local smoke test without calling a real provider, run:
|
|
233
|
+
|
|
234
|
+
```sh
|
|
235
|
+
ruby script/manual_compaction.rb
|
|
236
|
+
```
|
|
237
|
+
|
|
96
238
|
### Tools
|
|
97
239
|
|
|
98
240
|
Create a tool:
|
|
@@ -130,6 +272,76 @@ turn = agent.conversation.ask("Save a short status report.")
|
|
|
130
272
|
puts turn.output_text
|
|
131
273
|
```
|
|
132
274
|
|
|
275
|
+
#### Defining application tools
|
|
276
|
+
|
|
277
|
+
Tools are classes, not instances. Namespaced tools work fine, and the default tool name comes from the class name: `Assistant::Tools::WebSearch` becomes `web_search`.
|
|
278
|
+
|
|
279
|
+
```ruby
|
|
280
|
+
module Assistant
|
|
281
|
+
module Tools
|
|
282
|
+
class WebSearch < TurnKit::Tool
|
|
283
|
+
description "Search the web for current information."
|
|
284
|
+
usage_hint "Use when current external information is needed."
|
|
285
|
+
|
|
286
|
+
parameter :objective, :string, required: true
|
|
287
|
+
parameter :search_queries, :array, required: false
|
|
288
|
+
|
|
289
|
+
def call(objective:, search_queries: nil, context:)
|
|
290
|
+
ParallelClient.new.web_search(
|
|
291
|
+
objective: objective,
|
|
292
|
+
search_queries: search_queries
|
|
293
|
+
)
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Register tool classes on the agent:
|
|
301
|
+
|
|
302
|
+
```ruby
|
|
303
|
+
agent = TurnKit::Agent.new(
|
|
304
|
+
name: "researcher",
|
|
305
|
+
tools: [
|
|
306
|
+
Assistant::Tools::WebSearch,
|
|
307
|
+
Assistant::Tools::ReadWebPage
|
|
308
|
+
]
|
|
309
|
+
)
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
#### Tool context
|
|
313
|
+
|
|
314
|
+
Every tool receives a `context:` object. Use it for logging, correlation, persistence, and domain scoping:
|
|
315
|
+
|
|
316
|
+
```ruby
|
|
317
|
+
def call(query:, context:)
|
|
318
|
+
context.turn # The TurnKit::Turn being run
|
|
319
|
+
context.execution # The TurnKit::ToolExecution for this tool call
|
|
320
|
+
|
|
321
|
+
{ query: query }
|
|
322
|
+
end
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
If your application already uses a `context:` keyword for something else, use `turnkit_context:` instead:
|
|
326
|
+
|
|
327
|
+
```ruby
|
|
328
|
+
def call(query:, turnkit_context:)
|
|
329
|
+
{ turn_id: turnkit_context.turn.id, query: query }
|
|
330
|
+
end
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
#### Tool return values
|
|
334
|
+
|
|
335
|
+
Prefer returning a `Hash`. TurnKit serializes the normalized value as the tool result:
|
|
336
|
+
|
|
337
|
+
| Return value | Stored tool result |
|
|
338
|
+
| --- | --- |
|
|
339
|
+
| `Hash` | Keys are stringified. |
|
|
340
|
+
| `Array` | Wrapped as `{ "items" => [...] }`. |
|
|
341
|
+
| Scalar | Wrapped as `{ "result" => value.to_s }`. |
|
|
342
|
+
|
|
343
|
+
Avoid returning arbitrary objects unless you convert them to a plain Hash or Array first.
|
|
344
|
+
|
|
133
345
|
### Skills
|
|
134
346
|
|
|
135
347
|
Load a skill:
|
|
@@ -260,7 +472,7 @@ Create a client:
|
|
|
260
472
|
|
|
261
473
|
```ruby
|
|
262
474
|
class MyClient < TurnKit::Client
|
|
263
|
-
def chat(model:, messages:, tools:, instructions:, temperature: nil, metadata: nil)
|
|
475
|
+
def chat(model:, messages:, tools:, instructions:, temperature: nil, thinking: nil, metadata: nil)
|
|
264
476
|
TurnKit::Result.new(
|
|
265
477
|
text: "provider response",
|
|
266
478
|
model: model,
|
|
@@ -295,6 +507,17 @@ Install Rails persistence:
|
|
|
295
507
|
bin/rails generate turnkit:install
|
|
296
508
|
```
|
|
297
509
|
|
|
510
|
+
The installer creates:
|
|
511
|
+
|
|
512
|
+
- `config/initializers/turnkit.rb`
|
|
513
|
+
- `app/models/turnkit/conversation.rb`
|
|
514
|
+
- `app/models/turnkit/turn.rb`
|
|
515
|
+
- `app/models/turnkit/message.rb`
|
|
516
|
+
- `app/models/turnkit/tool_execution.rb`
|
|
517
|
+
- a migration for TurnKit persistence
|
|
518
|
+
|
|
519
|
+
The generated migration currently uses `ActiveRecord::Migration[7.1]`. In a newer Rails app, update that version if your app requires it, for example `ActiveRecord::Migration[8.1]`.
|
|
520
|
+
|
|
298
521
|
Run migrations:
|
|
299
522
|
|
|
300
523
|
```sh
|
|
@@ -307,12 +530,88 @@ Configure Rails:
|
|
|
307
530
|
TurnKit.store = TurnKit::ActiveRecordStore.new
|
|
308
531
|
```
|
|
309
532
|
|
|
533
|
+
Suggested Rails file layout for your application AI code:
|
|
534
|
+
|
|
535
|
+
```text
|
|
536
|
+
app/models/assistant/
|
|
537
|
+
tools/
|
|
538
|
+
web_search.rb
|
|
539
|
+
read_web_page.rb
|
|
540
|
+
skills/
|
|
541
|
+
prompts/
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
If you prefer to keep AI infrastructure out of `app/models`, add an autoloaded directory such as:
|
|
545
|
+
|
|
546
|
+
```text
|
|
547
|
+
app/ai/
|
|
548
|
+
tools/
|
|
549
|
+
skills/
|
|
550
|
+
prompts/
|
|
551
|
+
```
|
|
552
|
+
|
|
310
553
|
Reconcile stale turns:
|
|
311
554
|
|
|
312
555
|
```ruby
|
|
313
556
|
TurnKit.reconcile_stale!
|
|
314
557
|
```
|
|
315
558
|
|
|
559
|
+
#### Debugging Rails persistence
|
|
560
|
+
|
|
561
|
+
Inspect the latest persisted turn in a Rails console:
|
|
562
|
+
|
|
563
|
+
```ruby
|
|
564
|
+
turn = Turnkit::Turn.order(created_at: :desc).first
|
|
565
|
+
turn.status
|
|
566
|
+
turn.error
|
|
567
|
+
turn.output_text
|
|
568
|
+
```
|
|
569
|
+
|
|
570
|
+
Check whether the model actually called tools:
|
|
571
|
+
|
|
572
|
+
```ruby
|
|
573
|
+
Turnkit::ToolExecution
|
|
574
|
+
.where(turn_uid: turn.uid)
|
|
575
|
+
.order(:created_at)
|
|
576
|
+
.map { |execution|
|
|
577
|
+
{
|
|
578
|
+
name: execution.tool_name,
|
|
579
|
+
status: execution.status,
|
|
580
|
+
arguments: execution.arguments,
|
|
581
|
+
result_keys: execution.result&.keys,
|
|
582
|
+
error: execution.error
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
#### Live smoke test
|
|
588
|
+
|
|
589
|
+
Use a model whose provider key is configured, then run a real tool-using turn:
|
|
590
|
+
|
|
591
|
+
```ruby
|
|
592
|
+
TurnKit.default_model = "gpt-4.1-mini"
|
|
593
|
+
|
|
594
|
+
agent = TurnKit::Agent.new(
|
|
595
|
+
name: "researcher",
|
|
596
|
+
instructions: "Use web_search, then read_web_page, before answering.",
|
|
597
|
+
tools: [
|
|
598
|
+
Assistant::Tools::WebSearch,
|
|
599
|
+
Assistant::Tools::ReadWebPage
|
|
600
|
+
]
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
turn = agent.conversation.ask(
|
|
604
|
+
"Search for the TurnKit Ruby gem, read the first useful result, then summarize it."
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
puts turn.output_text
|
|
608
|
+
|
|
609
|
+
pp Turnkit::ToolExecution
|
|
610
|
+
.where(turn_uid: turn.id)
|
|
611
|
+
.order(:created_at)
|
|
612
|
+
.pluck(:tool_name, :status, :error)
|
|
613
|
+
```
|
|
614
|
+
|
|
316
615
|
## Options
|
|
317
616
|
|
|
318
617
|
Configure defaults:
|
|
@@ -327,6 +626,7 @@ TurnKit.cost_limit = nil
|
|
|
327
626
|
TurnKit.cost_rates = {}
|
|
328
627
|
TurnKit.cost_calculator = nil
|
|
329
628
|
TurnKit.prompt_cache = :auto
|
|
629
|
+
TurnKit.compaction = true
|
|
330
630
|
```
|
|
331
631
|
|
|
332
632
|
Override an agent:
|
|
@@ -337,7 +637,8 @@ agent = TurnKit::Agent.new(
|
|
|
337
637
|
model: "gpt-4.1-mini",
|
|
338
638
|
max_iterations: 10,
|
|
339
639
|
timeout: 60,
|
|
340
|
-
cost_limit: 0.25
|
|
640
|
+
cost_limit: 0.25,
|
|
641
|
+
thinking: { effort: :low }
|
|
341
642
|
)
|
|
342
643
|
```
|
|
343
644
|
|
|
@@ -350,9 +651,11 @@ agent = TurnKit::Agent.new(
|
|
|
350
651
|
| `timeout` | Limit seconds per root turn. |
|
|
351
652
|
| `max_tool_executions` | Limit tool calls per root turn. |
|
|
352
653
|
| `cost_limit` | Limit cost per root turn. |
|
|
654
|
+
| `thinking` | Configure provider reasoning or extended thinking per agent. |
|
|
353
655
|
| `cost_rates` | Override prices by model. |
|
|
354
656
|
| `cost_calculator` | Override cost calculation. |
|
|
355
657
|
| `prompt_cache` | Use provider prompt caching. |
|
|
658
|
+
| `compaction` | Enable, disable, or configure automatic context compaction. |
|
|
356
659
|
|
|
357
660
|
## Contributing
|
|
358
661
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
module Adapters
|
|
5
5
|
class RubyLLM < Client
|
|
6
|
-
def chat(model:, messages:, tools:, instructions:, temperature: nil, metadata: nil)
|
|
6
|
+
def chat(model:, messages:, tools:, instructions:, temperature: nil, thinking: nil, metadata: nil)
|
|
7
7
|
require "ruby_llm"
|
|
8
8
|
|
|
9
9
|
configure_from_environment
|
|
@@ -11,6 +11,7 @@ module TurnKit
|
|
|
11
11
|
chat = ::RubyLLM.chat(model: model)
|
|
12
12
|
add_instructions(chat, instructions, model: model)
|
|
13
13
|
chat.with_temperature(temperature) if temperature
|
|
14
|
+
apply_thinking(chat, thinking)
|
|
14
15
|
Array(tools).each { |tool| chat.with_tool(ruby_llm_tool(tool)) }
|
|
15
16
|
Array(messages).each { |message| add_message(chat, message) }
|
|
16
17
|
|
|
@@ -27,6 +28,11 @@ module TurnKit
|
|
|
27
28
|
config.openrouter_api_key ||= ENV["OPENROUTER_API_KEY"]
|
|
28
29
|
end
|
|
29
30
|
|
|
31
|
+
def apply_thinking(chat, thinking)
|
|
32
|
+
thinking = Agent.normalize_thinking(thinking)
|
|
33
|
+
chat.with_thinking(**thinking) if thinking
|
|
34
|
+
end
|
|
35
|
+
|
|
30
36
|
def complete_without_tool_execution(chat)
|
|
31
37
|
provider = chat.instance_variable_get(:@provider)
|
|
32
38
|
provider.complete(
|
|
@@ -123,6 +129,7 @@ module TurnKit
|
|
|
123
129
|
output_tokens: token_value(response, :output_tokens),
|
|
124
130
|
cached_tokens: token_value(response, :cached_tokens),
|
|
125
131
|
cache_write_tokens: token_value(response, :cache_creation_tokens),
|
|
132
|
+
thinking_tokens: thinking_token_value(response),
|
|
126
133
|
cost: response_cost(response)
|
|
127
134
|
)
|
|
128
135
|
Result.new(
|
|
@@ -137,6 +144,10 @@ module TurnKit
|
|
|
137
144
|
response.respond_to?(method) ? response.public_send(method).to_i : 0
|
|
138
145
|
end
|
|
139
146
|
|
|
147
|
+
def thinking_token_value(response)
|
|
148
|
+
token_value(response, :thinking_tokens).nonzero? || token_value(response, :reasoning_tokens)
|
|
149
|
+
end
|
|
150
|
+
|
|
140
151
|
def response_cost(response)
|
|
141
152
|
return unless response.respond_to?(:cost)
|
|
142
153
|
|
data/lib/turnkit/agent.rb
CHANGED
|
@@ -4,11 +4,11 @@ module TurnKit
|
|
|
4
4
|
class Agent
|
|
5
5
|
attr_reader :name, :description, :model, :instructions, :tools, :skills, :available_skills, :sub_agents
|
|
6
6
|
attr_reader :client, :store, :max_iterations, :timeout, :cost_limit, :max_depth, :max_tool_executions
|
|
7
|
-
attr_reader :prompt_sections, :system_prompt, :prompt_mode
|
|
7
|
+
attr_reader :prompt_sections, :system_prompt, :prompt_mode, :thinking, :compaction
|
|
8
8
|
|
|
9
9
|
def initialize(name:, description: "", model: nil, instructions: "", tools: [], skills: [], available_skills: [], sub_agents: [],
|
|
10
10
|
system_prompt: nil, prompt_sections: nil, prompt_mode: nil, client: nil, store: nil,
|
|
11
|
-
max_iterations: nil, timeout: nil, cost_limit: nil, max_depth: nil, max_tool_executions: nil)
|
|
11
|
+
max_iterations: nil, timeout: nil, cost_limit: nil, max_depth: nil, max_tool_executions: nil, thinking: nil, compaction: nil)
|
|
12
12
|
@name = name.to_s
|
|
13
13
|
@description = description.to_s
|
|
14
14
|
@model = model
|
|
@@ -27,9 +27,26 @@ module TurnKit
|
|
|
27
27
|
@cost_limit = cost_limit
|
|
28
28
|
@max_depth = max_depth
|
|
29
29
|
@max_tool_executions = max_tool_executions
|
|
30
|
+
@thinking = self.class.normalize_thinking(thinking)
|
|
31
|
+
@compaction = compaction
|
|
30
32
|
raise ArgumentError, "name is required" if @name.empty?
|
|
31
33
|
end
|
|
32
34
|
|
|
35
|
+
def self.normalize_thinking(value)
|
|
36
|
+
return nil if value.nil?
|
|
37
|
+
|
|
38
|
+
attrs = value.respond_to?(:to_h) ? value.to_h : value
|
|
39
|
+
raise ArgumentError, "thinking must be a hash" unless attrs.is_a?(Hash)
|
|
40
|
+
|
|
41
|
+
attrs = attrs.transform_keys(&:to_sym)
|
|
42
|
+
unknown = attrs.keys - %i[effort budget]
|
|
43
|
+
raise ArgumentError, "unknown thinking attributes: #{unknown.join(", ")}" if unknown.any?
|
|
44
|
+
raise ArgumentError, "thinking requires :effort or :budget" if attrs[:effort].nil? && attrs[:budget].nil?
|
|
45
|
+
raise ArgumentError, "thinking budget must be an Integer" if attrs[:budget] && !attrs[:budget].is_a?(Integer)
|
|
46
|
+
|
|
47
|
+
attrs.slice(:effort, :budget).compact
|
|
48
|
+
end
|
|
49
|
+
|
|
33
50
|
def conversation(model: nil, subject: nil, metadata: {})
|
|
34
51
|
store = effective_store
|
|
35
52
|
record = store.create_conversation(
|
|
@@ -53,6 +70,10 @@ module TurnKit
|
|
|
53
70
|
model || TurnKit.default_model
|
|
54
71
|
end
|
|
55
72
|
|
|
73
|
+
def effective_thinking
|
|
74
|
+
thinking
|
|
75
|
+
end
|
|
76
|
+
|
|
56
77
|
def effective_client
|
|
57
78
|
client || TurnKit.client
|
|
58
79
|
end
|
data/lib/turnkit/client.rb
CHANGED
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module TurnKit
|
|
4
|
+
module Compaction
|
|
5
|
+
DEFAULTS = {
|
|
6
|
+
"enabled" => true,
|
|
7
|
+
"threshold" => 0.75,
|
|
8
|
+
"context_limit" => 128_000,
|
|
9
|
+
"reserved_tokens" => 20_000,
|
|
10
|
+
"head_messages" => 0,
|
|
11
|
+
"tail_messages" => 12,
|
|
12
|
+
"tail_tokens" => 8_000,
|
|
13
|
+
"summary_ratio" => 0.20,
|
|
14
|
+
"min_summary_tokens" => 1_000,
|
|
15
|
+
"max_summary_tokens" => 12_000,
|
|
16
|
+
"tool_output_max_chars" => 2_000,
|
|
17
|
+
"model" => nil,
|
|
18
|
+
"client" => nil
|
|
19
|
+
}.freeze
|
|
20
|
+
|
|
21
|
+
KNOWN_KEYS = DEFAULTS.keys.freeze
|
|
22
|
+
|
|
23
|
+
COMPACTION_SYSTEM_PROMPT = <<~TEXT.strip
|
|
24
|
+
You are an anchored context summarization assistant for TurnKit conversations.
|
|
25
|
+
|
|
26
|
+
Summarize only the conversation history you are given. Recent turns may be kept verbatim outside your summary, so focus on older context that still matters for continuing the work.
|
|
27
|
+
|
|
28
|
+
If a previous summary is provided, update it by preserving still-true details, removing stale details, and merging in new facts.
|
|
29
|
+
|
|
30
|
+
Produce only the requested Markdown summary. Do not answer the conversation itself. Do not mention that you are summarizing, compacting, or merging context.
|
|
31
|
+
|
|
32
|
+
Write in the same language the user was using.
|
|
33
|
+
|
|
34
|
+
Never include API keys, tokens, passwords, secrets, credentials, or connection strings. Replace secret values with [REDACTED].
|
|
35
|
+
TEXT
|
|
36
|
+
|
|
37
|
+
SUMMARY_TEMPLATE = <<~TEXT.strip
|
|
38
|
+
Use this exact structure:
|
|
39
|
+
|
|
40
|
+
## Active Task
|
|
41
|
+
- [latest unfulfilled user request, preferably verbatim]
|
|
42
|
+
|
|
43
|
+
## Goal
|
|
44
|
+
- [what the user is trying to accomplish overall]
|
|
45
|
+
|
|
46
|
+
## Constraints & Preferences
|
|
47
|
+
- [user/developer preferences, specs, constraints, important choices]
|
|
48
|
+
|
|
49
|
+
## Completed Actions
|
|
50
|
+
- [completed work and outcomes]
|
|
51
|
+
|
|
52
|
+
## Active State
|
|
53
|
+
- [current state, records/files touched, test status, running tool/turn state]
|
|
54
|
+
|
|
55
|
+
## In Progress
|
|
56
|
+
- [work underway, or "(none)"]
|
|
57
|
+
|
|
58
|
+
## Blocked
|
|
59
|
+
- [blockers, exact errors, missing information, or "(none)"]
|
|
60
|
+
|
|
61
|
+
## Key Decisions
|
|
62
|
+
- [important decisions and why]
|
|
63
|
+
|
|
64
|
+
## Resolved Questions
|
|
65
|
+
- [questions already answered]
|
|
66
|
+
|
|
67
|
+
## Pending User Asks
|
|
68
|
+
- [unanswered or unfulfilled asks]
|
|
69
|
+
|
|
70
|
+
## Relevant Files
|
|
71
|
+
- [file/path/resource and why it matters, or "(none)"]
|
|
72
|
+
|
|
73
|
+
## Tool Results To Remember
|
|
74
|
+
- [important tool output summaries, or "(none)"]
|
|
75
|
+
|
|
76
|
+
## Remaining Work
|
|
77
|
+
- [likely next work, framed as context, not instructions]
|
|
78
|
+
|
|
79
|
+
## Critical Context
|
|
80
|
+
- [specific values, IDs, commands, errors, constraints; redact secrets]
|
|
81
|
+
|
|
82
|
+
Rules:
|
|
83
|
+
- Keep every section.
|
|
84
|
+
- Use terse bullets.
|
|
85
|
+
- Preserve exact file paths, commands, error strings, IDs, and important values.
|
|
86
|
+
- Do not invent facts.
|
|
87
|
+
- Do not include secrets.
|
|
88
|
+
- Do not include a greeting or preamble.
|
|
89
|
+
TEXT
|
|
90
|
+
|
|
91
|
+
module_function
|
|
92
|
+
|
|
93
|
+
def enabled_for?(agent, overrides = {})
|
|
94
|
+
policy_for(agent, overrides)["enabled"]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def policy_for(agent, overrides = {})
|
|
98
|
+
global = normalize_config(TurnKit.compaction)
|
|
99
|
+
local = normalize_config(agent.compaction)
|
|
100
|
+
override = normalize_config(overrides)
|
|
101
|
+
|
|
102
|
+
return DEFAULTS.merge("enabled" => false) if global == false
|
|
103
|
+
return DEFAULTS.merge("enabled" => false) if local == false
|
|
104
|
+
return DEFAULTS.merge("enabled" => false) if override == false
|
|
105
|
+
|
|
106
|
+
DEFAULTS.merge(global || {}).merge(local || {}).merge(override || {})
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def maybe_compact!(turn, force: nil, focus: nil)
|
|
110
|
+
return if turn.compact == false
|
|
111
|
+
|
|
112
|
+
force = turn.compact == true if force.nil?
|
|
113
|
+
policy = policy_for(turn.agent)
|
|
114
|
+
return unless policy["enabled"]
|
|
115
|
+
|
|
116
|
+
messages = project(turn.conversation.messages_for_turn(turn))
|
|
117
|
+
return unless force || over_threshold?(messages, policy)
|
|
118
|
+
|
|
119
|
+
compact!(turn.conversation, agent: turn.agent, turn: turn, focus: focus, auto: true, overrides: policy, force: true)
|
|
120
|
+
rescue StandardError => error
|
|
121
|
+
TurnKit.logger&.warn("TurnKit compaction failed: #{error.class}: #{error.message}")
|
|
122
|
+
nil
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def compact!(conversation, agent:, turn: nil, focus: nil, auto: false, overrides: {}, force: true)
|
|
126
|
+
policy = policy_for(agent, overrides)
|
|
127
|
+
raise CompactionError, "compaction is disabled" unless policy["enabled"]
|
|
128
|
+
|
|
129
|
+
messages = turn ? conversation.messages_for_turn(turn) : conversation.messages
|
|
130
|
+
projected = project(messages)
|
|
131
|
+
selected = select_messages(projected, policy)
|
|
132
|
+
return nil if selected.nil? && auto
|
|
133
|
+
raise CompactionError, "not enough messages to compact" unless selected
|
|
134
|
+
|
|
135
|
+
selected_tokens = estimate_messages_tokens(selected.fetch("middle"))
|
|
136
|
+
return nil if auto && !force && !over_threshold?(projected, policy)
|
|
137
|
+
|
|
138
|
+
summary = generate_summary(
|
|
139
|
+
agent: agent,
|
|
140
|
+
policy: policy,
|
|
141
|
+
messages: selected.fetch("middle"),
|
|
142
|
+
previous_summary: selected["previous_summary"]&.text,
|
|
143
|
+
focus: focus,
|
|
144
|
+
target_tokens: summary_budget(selected_tokens, policy),
|
|
145
|
+
fallback_model: turn&.model || conversation.model || agent.effective_model,
|
|
146
|
+
conversation_id: conversation.id,
|
|
147
|
+
turn_id: turn&.id
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
append_summary(conversation, turn: turn, summary: summary, selected: selected, policy: policy, focus: focus, auto: auto, input_tokens: selected_tokens)
|
|
151
|
+
rescue CompactionError
|
|
152
|
+
raise
|
|
153
|
+
rescue StandardError => error
|
|
154
|
+
raise CompactionError, "#{error.class}: #{error.message}"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def project(messages)
|
|
158
|
+
rows = Array(messages).sort_by { |message| [ message.sequence.to_i, message.id ] }
|
|
159
|
+
summaries = active_summaries(rows)
|
|
160
|
+
ranges = summaries.filter_map { |summary| range_for(summary) }
|
|
161
|
+
summaries_by_id = summaries.to_h { |summary| [ summary.id, summary ] }
|
|
162
|
+
inserted = {}
|
|
163
|
+
projected = []
|
|
164
|
+
|
|
165
|
+
rows.each do |message|
|
|
166
|
+
summaries.each do |summary|
|
|
167
|
+
range = range_for(summary)
|
|
168
|
+
next unless range
|
|
169
|
+
next if inserted[summary.id]
|
|
170
|
+
next unless range.begin <= message.sequence.to_i
|
|
171
|
+
|
|
172
|
+
projected << summary
|
|
173
|
+
inserted[summary.id] = true
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
if message.context_summary?
|
|
177
|
+
projected << message if summaries_by_id[message.id] && !inserted[message.id] && !range_for(message)
|
|
178
|
+
inserted[message.id] = true if summaries_by_id[message.id]
|
|
179
|
+
next
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
next if ranges.any? { |range| range.cover?(message.sequence.to_i) }
|
|
183
|
+
|
|
184
|
+
projected << message
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
summaries.each do |summary|
|
|
188
|
+
next if inserted[summary.id]
|
|
189
|
+
|
|
190
|
+
projected << summary
|
|
191
|
+
inserted[summary.id] = true
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
projected
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def estimate_messages_tokens(messages)
|
|
198
|
+
Array(messages).sum { |message| estimate_text_tokens(message.text) + 8 }
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def estimate_text_tokens(text)
|
|
202
|
+
(text.to_s.length / 4.0).ceil
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def summary_budget(input_tokens, policy)
|
|
206
|
+
budget = (input_tokens.to_i * policy["summary_ratio"].to_f).ceil
|
|
207
|
+
budget = [ budget, policy["min_summary_tokens"].to_i ].max
|
|
208
|
+
[ budget, policy["max_summary_tokens"].to_i ].min
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def over_threshold?(messages, policy)
|
|
212
|
+
usable = [ policy["context_limit"].to_i - policy["reserved_tokens"].to_i, 1 ].max
|
|
213
|
+
estimate_messages_tokens(messages) >= (usable * policy["threshold"].to_f)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def select_messages(messages, policy)
|
|
217
|
+
rows = Array(messages)
|
|
218
|
+
return nil if rows.length <= policy["head_messages"].to_i + 1
|
|
219
|
+
|
|
220
|
+
previous_summary = rows.reverse.find(&:context_summary?)
|
|
221
|
+
candidates = rows.reject(&:context_summary?)
|
|
222
|
+
return nil if candidates.length <= policy["head_messages"].to_i + 1
|
|
223
|
+
|
|
224
|
+
head_count = policy["head_messages"].to_i
|
|
225
|
+
tail_start = tail_start_index(candidates, policy)
|
|
226
|
+
tail_start = [ tail_start, head_count ].max
|
|
227
|
+
tail_start = expand_tail_start_for_tool_pairs(candidates, tail_start)
|
|
228
|
+
middle = candidates[head_count...tail_start]
|
|
229
|
+
return nil if middle.nil? || middle.empty?
|
|
230
|
+
|
|
231
|
+
from_sequence = middle.first.sequence.to_i
|
|
232
|
+
through_sequence = middle.last.sequence.to_i
|
|
233
|
+
if previous_summary
|
|
234
|
+
from_sequence = [ from_sequence, previous_summary.sequence.to_i ].min
|
|
235
|
+
through_sequence = [ through_sequence, previous_summary.sequence.to_i ].max
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
{
|
|
239
|
+
"middle" => middle,
|
|
240
|
+
"previous_summary" => previous_summary,
|
|
241
|
+
"replaces_from_sequence" => from_sequence,
|
|
242
|
+
"replaces_through_sequence" => through_sequence,
|
|
243
|
+
"tail_start_sequence" => candidates[tail_start]&.sequence
|
|
244
|
+
}
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def build_prompt(previous_summary:, focus:, target_tokens:)
|
|
248
|
+
parts = []
|
|
249
|
+
if previous_summary && !previous_summary.empty?
|
|
250
|
+
parts << <<~TEXT.strip
|
|
251
|
+
Update the anchored summary below using the conversation history above.
|
|
252
|
+
|
|
253
|
+
Preserve still-true details, remove stale details, and merge in new facts. Remove stale details that are no longer relevant or have been superseded.
|
|
254
|
+
|
|
255
|
+
<previous-summary>
|
|
256
|
+
#{previous_summary}
|
|
257
|
+
</previous-summary>
|
|
258
|
+
TEXT
|
|
259
|
+
else
|
|
260
|
+
parts << <<~TEXT.strip
|
|
261
|
+
Create a structured context checkpoint for the conversation history above.
|
|
262
|
+
|
|
263
|
+
This summary will replace older TurnKit messages in future model prompts while the original messages remain stored durably.
|
|
264
|
+
TEXT
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
if focus && !focus.to_s.strip.empty?
|
|
268
|
+
parts << <<~TEXT.strip
|
|
269
|
+
Focus topic: "#{focus}"
|
|
270
|
+
|
|
271
|
+
Preserve extra detail related to this focus topic. Summarize unrelated context more aggressively, but do not omit constraints or active blockers that affect the current task.
|
|
272
|
+
TEXT
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
parts << "Target length: approximately #{target_tokens} tokens."
|
|
276
|
+
parts << SUMMARY_TEMPLATE
|
|
277
|
+
parts.join("\n\n")
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def normalize_config(value)
|
|
281
|
+
case value
|
|
282
|
+
when nil, true
|
|
283
|
+
nil
|
|
284
|
+
when false
|
|
285
|
+
false
|
|
286
|
+
when Hash
|
|
287
|
+
attrs = value.transform_keys(&:to_s)
|
|
288
|
+
unknown = attrs.keys - KNOWN_KEYS
|
|
289
|
+
raise ConfigError, "unknown compaction options: #{unknown.join(", ")}" if unknown.any?
|
|
290
|
+
|
|
291
|
+
attrs
|
|
292
|
+
else
|
|
293
|
+
raise ConfigError, "compaction must be true, false, nil, or a Hash"
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def range_for(summary)
|
|
298
|
+
metadata = summary.compaction_metadata
|
|
299
|
+
from = metadata["replaces_from_sequence"]
|
|
300
|
+
through = metadata["replaces_through_sequence"]
|
|
301
|
+
return nil unless from && through
|
|
302
|
+
|
|
303
|
+
(from.to_i..through.to_i)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def active_summaries(messages)
|
|
307
|
+
summaries = Array(messages).select(&:context_summary?).sort_by { |summary| summary.sequence.to_i }
|
|
308
|
+
active = []
|
|
309
|
+
|
|
310
|
+
summaries.reverse_each do |summary|
|
|
311
|
+
next if active.any? { |newer| (range_for(newer)&.cover?(summary.sequence.to_i)) }
|
|
312
|
+
|
|
313
|
+
active << summary
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
active.reverse
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def tail_start_index(messages, policy)
|
|
320
|
+
max_messages = policy["tail_messages"].to_i
|
|
321
|
+
max_tokens = policy["tail_tokens"].to_i
|
|
322
|
+
count = 0
|
|
323
|
+
tokens = 0
|
|
324
|
+
index = messages.length
|
|
325
|
+
|
|
326
|
+
(messages.length - 1).downto(0) do |i|
|
|
327
|
+
message_tokens = estimate_text_tokens(messages[i].text) + 8
|
|
328
|
+
break if count >= max_messages
|
|
329
|
+
break if count.positive? && tokens + message_tokens > max_tokens
|
|
330
|
+
|
|
331
|
+
count += 1
|
|
332
|
+
tokens += message_tokens
|
|
333
|
+
index = i
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
index
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def expand_tail_start_for_tool_pairs(messages, tail_start)
|
|
340
|
+
index = tail_start
|
|
341
|
+
while index.positive? && messages[index]&.tool_result?
|
|
342
|
+
call_id = messages[index].metadata["tool_call_id"]
|
|
343
|
+
call_index = (index - 1).downto(0).find do |i|
|
|
344
|
+
messages[i].tool_call? && Array(messages[i].metadata["tool_calls"]).any? { |call| call["id"] == call_id || call[:id] == call_id }
|
|
345
|
+
end
|
|
346
|
+
break unless call_index
|
|
347
|
+
|
|
348
|
+
index = call_index
|
|
349
|
+
end
|
|
350
|
+
index
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def generate_summary(agent:, policy:, messages:, previous_summary:, focus:, target_tokens:, fallback_model:, conversation_id:, turn_id:)
|
|
354
|
+
client = policy["client"] || agent.effective_client
|
|
355
|
+
model = policy["model"] || fallback_model
|
|
356
|
+
safe_messages = messages.map { |message| sanitize_message(message, policy) }
|
|
357
|
+
prompt = build_prompt(previous_summary: previous_summary, focus: focus, target_tokens: target_tokens)
|
|
358
|
+
result = client.chat(
|
|
359
|
+
model: model,
|
|
360
|
+
messages: MessageProjection.for(safe_messages) + [ { role: :user, content: prompt } ],
|
|
361
|
+
tools: [],
|
|
362
|
+
instructions: COMPACTION_SYSTEM_PROMPT,
|
|
363
|
+
metadata: { compaction: true, conversation_id: conversation_id, turn_id: turn_id }
|
|
364
|
+
)
|
|
365
|
+
text = result.text.to_s.strip
|
|
366
|
+
raise CompactionError, "compaction model returned an empty summary" if text.empty?
|
|
367
|
+
|
|
368
|
+
text
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def sanitize_message(message, policy)
|
|
372
|
+
return message unless message.tool_result?
|
|
373
|
+
|
|
374
|
+
max = policy["tool_output_max_chars"].to_i
|
|
375
|
+
return message if max <= 0 || message.text.length <= max
|
|
376
|
+
|
|
377
|
+
attrs = message.to_h
|
|
378
|
+
text = "#{message.text[0, max]}\n\n[Tool result truncated for compaction]"
|
|
379
|
+
Message.new(attrs.merge("text" => text, "content" => [ { "type" => "text", "text" => text } ]))
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def append_summary(conversation, turn:, summary:, selected:, policy:, focus:, auto:, input_tokens:)
|
|
383
|
+
model = policy["model"] || turn&.model || conversation.model || conversation.agent.effective_model
|
|
384
|
+
conversation.append_message(
|
|
385
|
+
role: "assistant",
|
|
386
|
+
kind: "context_summary",
|
|
387
|
+
text: summary,
|
|
388
|
+
turn_id: turn&.id,
|
|
389
|
+
metadata: {
|
|
390
|
+
"compaction" => {
|
|
391
|
+
"auto" => auto,
|
|
392
|
+
"focus" => focus,
|
|
393
|
+
"replaces_from_sequence" => selected.fetch("replaces_from_sequence"),
|
|
394
|
+
"replaces_through_sequence" => selected.fetch("replaces_through_sequence"),
|
|
395
|
+
"tail_start_sequence" => selected["tail_start_sequence"],
|
|
396
|
+
"summary_model" => model,
|
|
397
|
+
"input_tokens" => input_tokens,
|
|
398
|
+
"summary_tokens" => estimate_text_tokens(summary),
|
|
399
|
+
"created_for_turn_id" => turn&.id,
|
|
400
|
+
"created_at" => Clock.now.iso8601
|
|
401
|
+
}.compact
|
|
402
|
+
}
|
|
403
|
+
)
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
end
|
data/lib/turnkit/conversation.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
class Conversation
|
|
5
|
+
THINKING_UNSET = Object.new.freeze
|
|
6
|
+
|
|
5
7
|
attr_reader :agent, :id, :store, :model, :subject, :metadata
|
|
6
8
|
|
|
7
9
|
def initialize(agent:, record:, store:, model:, subject: nil, metadata: {})
|
|
@@ -24,12 +26,16 @@ module TurnKit
|
|
|
24
26
|
async ? turn : turn.run!
|
|
25
27
|
end
|
|
26
28
|
|
|
27
|
-
def run!(trigger_message_id: nil, model: nil, budget: nil, parent_turn: nil, parent_tool_execution: nil, depth: 0, agent: self.agent)
|
|
28
|
-
build_turn(trigger_message_id: trigger_message_id, model: model, budget: budget, parent_turn: parent_turn, parent_tool_execution: parent_tool_execution, depth: depth, agent: agent).run!
|
|
29
|
+
def run!(trigger_message_id: nil, model: nil, budget: nil, parent_turn: nil, parent_tool_execution: nil, depth: 0, agent: self.agent, thinking: THINKING_UNSET, compact: nil)
|
|
30
|
+
build_turn(trigger_message_id: trigger_message_id, model: model, budget: budget, parent_turn: parent_turn, parent_tool_execution: parent_tool_execution, depth: depth, agent: agent, thinking: thinking, compact: compact).run!
|
|
29
31
|
end
|
|
30
32
|
|
|
31
|
-
def build_turn(trigger_message_id: nil, model: nil, budget: nil, parent_turn: nil, parent_tool_execution: nil, depth: 0, agent: self.agent)
|
|
33
|
+
def build_turn(trigger_message_id: nil, model: nil, budget: nil, parent_turn: nil, parent_tool_execution: nil, depth: 0, agent: self.agent, thinking: THINKING_UNSET, compact: nil)
|
|
32
34
|
snapshot = latest_message_sequence
|
|
35
|
+
effective_thinking = thinking.equal?(THINKING_UNSET) ? agent.effective_thinking : Agent.normalize_thinking(thinking)
|
|
36
|
+
options = { "trigger_message_id" => trigger_message_id }.compact
|
|
37
|
+
options["thinking"] = effective_thinking
|
|
38
|
+
options["compact"] = compact unless compact.nil?
|
|
33
39
|
record = store.create_turn(
|
|
34
40
|
"conversation_id" => id,
|
|
35
41
|
"agent_name" => agent.name,
|
|
@@ -39,11 +45,16 @@ module TurnKit
|
|
|
39
45
|
"context_message_sequence" => snapshot,
|
|
40
46
|
"status" => "pending",
|
|
41
47
|
"model" => model || self.model || agent.effective_model,
|
|
42
|
-
"options" =>
|
|
48
|
+
"options" => options
|
|
43
49
|
)
|
|
44
50
|
Turn.new(agent: agent, conversation: self, record: record, store: store, budget: budget, depth: depth)
|
|
45
51
|
end
|
|
46
52
|
|
|
53
|
+
def compact!(focus: nil, model: nil)
|
|
54
|
+
overrides = { "model" => model }.compact
|
|
55
|
+
TurnKit::Compaction.compact!(self, agent: agent, focus: focus, auto: false, overrides: overrides)
|
|
56
|
+
end
|
|
57
|
+
|
|
47
58
|
def messages
|
|
48
59
|
store.list_messages(id).map { |attrs| Message.new(attrs) }
|
|
49
60
|
end
|
data/lib/turnkit/cost.rb
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
class Cost
|
|
5
|
-
COMPONENTS = %i[input output cache_read cache_write].freeze
|
|
5
|
+
COMPONENTS = %i[input output cache_read cache_write thinking].freeze
|
|
6
6
|
PER_MILLION = 1_000_000.0
|
|
7
7
|
|
|
8
|
-
attr_reader :input, :output, :cache_read, :cache_write
|
|
8
|
+
attr_reader :input, :output, :cache_read, :cache_write, :thinking
|
|
9
9
|
|
|
10
10
|
def self.aggregate(costs)
|
|
11
11
|
costs = costs.compact
|
|
@@ -55,6 +55,7 @@ module TurnKit
|
|
|
55
55
|
output: amount(usage.output_tokens, rates[:output] || rates[:output_per_million]),
|
|
56
56
|
cache_read: amount(usage.cached_tokens, rates[:cache_read] || rates[:cached_input] || rates[:cache_read_input_per_million] || rates[:cached_input_per_million]),
|
|
57
57
|
cache_write: amount(usage.cache_write_tokens, rates[:cache_write] || rates[:cache_creation] || rates[:cache_write_input_per_million] || rates[:cache_creation_input_per_million]),
|
|
58
|
+
thinking: amount(usage.thinking_tokens, rates[:thinking] || rates[:reasoning] || rates[:thinking_output] || rates[:reasoning_output] || rates[:thinking_output_per_million] || rates[:reasoning_output_per_million]),
|
|
58
59
|
strict: true
|
|
59
60
|
)
|
|
60
61
|
end
|
|
@@ -70,7 +71,8 @@ module TurnKit
|
|
|
70
71
|
input: usage.input_tokens,
|
|
71
72
|
output: usage.output_tokens,
|
|
72
73
|
cached: usage.cached_tokens,
|
|
73
|
-
cache_creation: usage.cache_write_tokens
|
|
74
|
+
cache_creation: usage.cache_write_tokens,
|
|
75
|
+
thinking: usage.thinking_tokens
|
|
74
76
|
)
|
|
75
77
|
from_hash(::RubyLLM::Cost.new(tokens: tokens, model: model_info).to_h)
|
|
76
78
|
else
|
|
@@ -92,6 +94,7 @@ module TurnKit
|
|
|
92
94
|
output: hash[:output],
|
|
93
95
|
cache_read: hash[:cache_read] || hash[:cached_input],
|
|
94
96
|
cache_write: hash[:cache_write] || hash[:cache_creation],
|
|
97
|
+
thinking: hash[:thinking] || hash[:reasoning] || hash[:thinking_output] || hash[:reasoning_output],
|
|
95
98
|
total: hash[:total]
|
|
96
99
|
)
|
|
97
100
|
end
|
|
@@ -119,11 +122,12 @@ module TurnKit
|
|
|
119
122
|
tokens.to_i * price.to_f / PER_MILLION
|
|
120
123
|
end
|
|
121
124
|
|
|
122
|
-
def initialize(input: nil, output: nil, cache_read: nil, cache_write: nil, total: nil, strict: false)
|
|
125
|
+
def initialize(input: nil, output: nil, cache_read: nil, cache_write: nil, thinking: nil, total: nil, strict: false)
|
|
123
126
|
@input = number(input)
|
|
124
127
|
@output = number(output)
|
|
125
128
|
@cache_read = number(cache_read)
|
|
126
129
|
@cache_write = number(cache_write)
|
|
130
|
+
@thinking = number(thinking)
|
|
127
131
|
@total = number(total)
|
|
128
132
|
@strict = strict
|
|
129
133
|
end
|
|
@@ -142,6 +146,7 @@ module TurnKit
|
|
|
142
146
|
"output" => output,
|
|
143
147
|
"cache_read" => cache_read,
|
|
144
148
|
"cache_write" => cache_write,
|
|
149
|
+
"thinking" => thinking,
|
|
145
150
|
"total" => total
|
|
146
151
|
}.compact
|
|
147
152
|
end
|
data/lib/turnkit/error.rb
CHANGED
data/lib/turnkit/message.rb
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
class Message
|
|
5
5
|
ROLES = %w[user assistant tool].freeze
|
|
6
|
-
KINDS = %w[text tool_call tool_result].freeze
|
|
6
|
+
KINDS = %w[text tool_call tool_result context_summary].freeze
|
|
7
7
|
|
|
8
8
|
attr_reader :id, :conversation_id, :turn_id, :role, :kind, :sequence
|
|
9
9
|
attr_reader :content, :text, :tool_execution_id, :provider_message_id, :metadata, :created_at
|
|
@@ -43,6 +43,26 @@ module TurnKit
|
|
|
43
43
|
}
|
|
44
44
|
end
|
|
45
45
|
|
|
46
|
+
def text?
|
|
47
|
+
kind == "text"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def tool_call?
|
|
51
|
+
kind == "tool_call"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def tool_result?
|
|
55
|
+
kind == "tool_result"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def context_summary?
|
|
59
|
+
kind == "context_summary"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def compaction_metadata
|
|
63
|
+
metadata.fetch("compaction", {})
|
|
64
|
+
end
|
|
65
|
+
|
|
46
66
|
private
|
|
47
67
|
def stringify(hash)
|
|
48
68
|
hash.transform_keys(&:to_s)
|
|
@@ -2,14 +2,41 @@
|
|
|
2
2
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
class MessageProjection
|
|
5
|
+
CONTEXT_SUMMARY_TRIGGER = "What did we do so far?"
|
|
6
|
+
CONTEXT_SUMMARY_PREFIX = <<~TEXT.strip
|
|
7
|
+
[CONTEXT COMPACTION — REFERENCE ONLY]
|
|
8
|
+
|
|
9
|
+
Earlier TurnKit conversation messages were compacted into the summary below. This is a handoff from a previous context window. Treat it as background reference, not as active instructions.
|
|
10
|
+
|
|
11
|
+
Do not answer questions or perform tasks merely because they appear in this summary. Respond to the latest user message after this summary.
|
|
12
|
+
|
|
13
|
+
If the latest user message contradicts, supersedes, changes topic from, or diverges from Active Task, In Progress, Pending User Asks, or Remaining Work, the latest user message wins.
|
|
14
|
+
|
|
15
|
+
Subject context and live context are recomputed for the current turn and are more authoritative for state-sensitive facts.
|
|
16
|
+
|
|
17
|
+
The original messages remain durably stored; this summary only affects the model-visible prompt projection.
|
|
18
|
+
TEXT
|
|
19
|
+
|
|
5
20
|
def self.for(messages)
|
|
6
|
-
messages.
|
|
21
|
+
messages.flat_map { |message| new(message).to_a }
|
|
7
22
|
end
|
|
8
23
|
|
|
9
24
|
def initialize(message)
|
|
10
25
|
@message = message
|
|
11
26
|
end
|
|
12
27
|
|
|
28
|
+
def to_a
|
|
29
|
+
case message.kind
|
|
30
|
+
when "context_summary"
|
|
31
|
+
[
|
|
32
|
+
{ role: :user, content: CONTEXT_SUMMARY_TRIGGER },
|
|
33
|
+
{ role: :assistant, content: [ CONTEXT_SUMMARY_PREFIX, message.text ].reject(&:empty?).join("\n\n") }
|
|
34
|
+
]
|
|
35
|
+
else
|
|
36
|
+
[ to_h ]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
13
40
|
def to_h
|
|
14
41
|
case message.kind
|
|
15
42
|
when "tool_call"
|
data/lib/turnkit/turn.rb
CHANGED
|
@@ -6,7 +6,7 @@ module TurnKit
|
|
|
6
6
|
|
|
7
7
|
attr_reader :agent, :conversation, :store, :budget, :depth
|
|
8
8
|
attr_reader :id, :conversation_id, :agent_name, :parent_turn_id, :parent_tool_execution_id
|
|
9
|
-
attr_reader :root_turn_id, :context_message_sequence, :model
|
|
9
|
+
attr_reader :root_turn_id, :context_message_sequence, :model, :thinking, :compact
|
|
10
10
|
attr_reader :started_at
|
|
11
11
|
|
|
12
12
|
def initialize(agent:, conversation:, record:, store:, budget: nil, depth: 0)
|
|
@@ -22,6 +22,8 @@ module TurnKit
|
|
|
22
22
|
@root_turn_id = @record["root_turn_id"] || id
|
|
23
23
|
@context_message_sequence = @record["context_message_sequence"].to_i
|
|
24
24
|
@model = @record["model"] || agent.effective_model
|
|
25
|
+
@thinking = thinking_from_options
|
|
26
|
+
@compact = compact_from_options
|
|
25
27
|
@started_at = @record["started_at"]
|
|
26
28
|
@budget = budget || agent.build_budget
|
|
27
29
|
@depth = depth
|
|
@@ -34,12 +36,14 @@ module TurnKit
|
|
|
34
36
|
loop do
|
|
35
37
|
budget.check!(depth: depth)
|
|
36
38
|
budget.count_iteration!
|
|
39
|
+
TurnKit::Compaction.maybe_compact!(self)
|
|
37
40
|
|
|
38
41
|
result = agent.effective_client.chat(
|
|
39
42
|
model: model,
|
|
40
43
|
messages: llm_messages,
|
|
41
44
|
tools: agent.effective_tools,
|
|
42
45
|
instructions: agent.system_prompt_for(turn: self, conversation: conversation),
|
|
46
|
+
thinking: thinking,
|
|
43
47
|
metadata: { turn_id: id, conversation_id: conversation.id }
|
|
44
48
|
)
|
|
45
49
|
result_cost = Cost.from_usage(result.usage, model: result.model || model)
|
|
@@ -94,6 +98,8 @@ module TurnKit
|
|
|
94
98
|
|
|
95
99
|
def reload
|
|
96
100
|
@record = store.load_turn(id)
|
|
101
|
+
@thinking = thinking_from_options
|
|
102
|
+
@compact = compact_from_options
|
|
97
103
|
self
|
|
98
104
|
end
|
|
99
105
|
|
|
@@ -103,7 +109,19 @@ module TurnKit
|
|
|
103
109
|
|
|
104
110
|
private
|
|
105
111
|
def llm_messages
|
|
106
|
-
MessageProjection.for(conversation.messages_for_turn(self))
|
|
112
|
+
MessageProjection.for(TurnKit::Compaction.project(conversation.messages_for_turn(self)))
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def thinking_from_options
|
|
116
|
+
options = (@record["options"] || {}).transform_keys(&:to_s)
|
|
117
|
+
return Agent.normalize_thinking(options["thinking"]) if options.key?("thinking")
|
|
118
|
+
|
|
119
|
+
agent.effective_thinking
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def compact_from_options
|
|
123
|
+
options = (@record["options"] || {}).transform_keys(&:to_s)
|
|
124
|
+
options["compact"] if options.key?("compact")
|
|
107
125
|
end
|
|
108
126
|
|
|
109
127
|
def persist_assistant_message(result)
|
|
@@ -133,6 +151,7 @@ module TurnKit
|
|
|
133
151
|
"output_tokens" => current["output_tokens"].to_i + usage.output_tokens,
|
|
134
152
|
"cached_tokens" => current["cached_tokens"].to_i + usage.cached_tokens,
|
|
135
153
|
"cache_write_tokens" => current["cache_write_tokens"].to_i + usage.cache_write_tokens,
|
|
154
|
+
"thinking_tokens" => current["thinking_tokens"].to_i + usage.thinking_tokens,
|
|
136
155
|
"total_tokens" => current["total_tokens"].to_i + usage.total_tokens
|
|
137
156
|
}
|
|
138
157
|
totals["cost_details"] = aggregate_cost(current["cost_details"], cost).to_h if cost&.total
|
data/lib/turnkit/usage.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module TurnKit
|
|
4
4
|
class Usage
|
|
5
|
-
attr_reader :input_tokens, :output_tokens, :cached_tokens, :cache_write_tokens, :cost
|
|
5
|
+
attr_reader :input_tokens, :output_tokens, :cached_tokens, :cache_write_tokens, :thinking_tokens, :cost
|
|
6
6
|
|
|
7
7
|
def self.aggregate(usages)
|
|
8
8
|
usages = usages.compact
|
|
@@ -13,6 +13,7 @@ module TurnKit
|
|
|
13
13
|
output_tokens: usages.sum(&:output_tokens),
|
|
14
14
|
cached_tokens: usages.sum(&:cached_tokens),
|
|
15
15
|
cache_write_tokens: usages.sum(&:cache_write_tokens),
|
|
16
|
+
thinking_tokens: usages.sum(&:thinking_tokens),
|
|
16
17
|
cost: cost
|
|
17
18
|
)
|
|
18
19
|
end
|
|
@@ -29,20 +30,22 @@ module TurnKit
|
|
|
29
30
|
output_tokens: attrs["output_tokens"],
|
|
30
31
|
cached_tokens: attrs["cached_tokens"],
|
|
31
32
|
cache_write_tokens: attrs["cache_write_tokens"],
|
|
33
|
+
thinking_tokens: attrs["thinking_tokens"] || attrs["reasoning_tokens"],
|
|
32
34
|
cost: cost
|
|
33
35
|
)
|
|
34
36
|
end
|
|
35
37
|
|
|
36
|
-
def initialize(input_tokens: 0, output_tokens: 0, cached_tokens: 0, cache_write_tokens: 0, cost: nil)
|
|
38
|
+
def initialize(input_tokens: 0, output_tokens: 0, cached_tokens: 0, cache_write_tokens: 0, thinking_tokens: 0, cost: nil)
|
|
37
39
|
@input_tokens = input_tokens.to_i
|
|
38
40
|
@output_tokens = output_tokens.to_i
|
|
39
41
|
@cached_tokens = cached_tokens.to_i
|
|
40
42
|
@cache_write_tokens = cache_write_tokens.to_i
|
|
43
|
+
@thinking_tokens = thinking_tokens.to_i
|
|
41
44
|
@cost = cost
|
|
42
45
|
end
|
|
43
46
|
|
|
44
47
|
def total_tokens
|
|
45
|
-
input_tokens + output_tokens + cached_tokens + cache_write_tokens
|
|
48
|
+
input_tokens + output_tokens + cached_tokens + cache_write_tokens + thinking_tokens
|
|
46
49
|
end
|
|
47
50
|
|
|
48
51
|
def to_h
|
|
@@ -51,6 +54,7 @@ module TurnKit
|
|
|
51
54
|
"output_tokens" => output_tokens,
|
|
52
55
|
"cached_tokens" => cached_tokens,
|
|
53
56
|
"cache_write_tokens" => cache_write_tokens,
|
|
57
|
+
"thinking_tokens" => thinking_tokens,
|
|
54
58
|
"total_tokens" => total_tokens,
|
|
55
59
|
"cost" => cost
|
|
56
60
|
}.compact
|
data/lib/turnkit/version.rb
CHANGED
data/lib/turnkit.rb
CHANGED
|
@@ -25,6 +25,7 @@ require_relative "turnkit/prompt_contribution"
|
|
|
25
25
|
require_relative "turnkit/system_prompt"
|
|
26
26
|
require_relative "turnkit/store"
|
|
27
27
|
require_relative "turnkit/memory_store"
|
|
28
|
+
require_relative "turnkit/compaction"
|
|
28
29
|
require_relative "turnkit/tool"
|
|
29
30
|
require_relative "turnkit/tool_call"
|
|
30
31
|
require_relative "turnkit/tool_execution"
|
|
@@ -43,6 +44,7 @@ module TurnKit
|
|
|
43
44
|
attr_accessor :default_model, :client, :store, :logger
|
|
44
45
|
attr_accessor :max_iterations, :timeout, :max_depth, :max_tool_executions
|
|
45
46
|
attr_accessor :cost_limit, :prompt_cache
|
|
47
|
+
attr_accessor :compaction
|
|
46
48
|
attr_accessor :cost_rates, :cost_calculator
|
|
47
49
|
attr_accessor :prompt_sections, :prompt_behavior, :available_skills
|
|
48
50
|
attr_accessor :prompt_data_max_chars, :context_contributors
|
|
@@ -59,6 +61,7 @@ module TurnKit
|
|
|
59
61
|
self.max_depth = 3
|
|
60
62
|
self.max_tool_executions = 100
|
|
61
63
|
self.prompt_cache = :auto
|
|
64
|
+
self.compaction = true
|
|
62
65
|
self.cost_rates = {}
|
|
63
66
|
self.prompt_sections = SystemPrompt::DEFAULT_SECTIONS.dup
|
|
64
67
|
self.prompt_data_max_chars = 20_000
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: turnkit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sam Couch
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: ruby_llm
|
|
@@ -42,6 +42,7 @@ files:
|
|
|
42
42
|
- lib/turnkit/budget.rb
|
|
43
43
|
- lib/turnkit/client.rb
|
|
44
44
|
- lib/turnkit/clock.rb
|
|
45
|
+
- lib/turnkit/compaction.rb
|
|
45
46
|
- lib/turnkit/conversation.rb
|
|
46
47
|
- lib/turnkit/cost.rb
|
|
47
48
|
- lib/turnkit/error.rb
|