mistri 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +162 -0
  3. data/README.md +314 -3
  4. data/lib/generators/mistri/install/install_generator.rb +54 -0
  5. data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
  6. data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
  7. data/lib/mistri/abort_signal.rb +63 -0
  8. data/lib/mistri/agent.rb +340 -0
  9. data/lib/mistri/budget.rb +29 -0
  10. data/lib/mistri/compaction.rb +78 -0
  11. data/lib/mistri/compactor.rb +182 -0
  12. data/lib/mistri/content.rb +89 -0
  13. data/lib/mistri/edit.rb +238 -0
  14. data/lib/mistri/errors.rb +94 -0
  15. data/lib/mistri/event.rb +50 -0
  16. data/lib/mistri/memory.rb +26 -0
  17. data/lib/mistri/message.rb +90 -0
  18. data/lib/mistri/models.rb +43 -0
  19. data/lib/mistri/partial_json.rb +210 -0
  20. data/lib/mistri/providers/anthropic/assembler.rb +205 -0
  21. data/lib/mistri/providers/anthropic/serializer.rb +106 -0
  22. data/lib/mistri/providers/anthropic.rb +106 -0
  23. data/lib/mistri/providers/fake.rb +109 -0
  24. data/lib/mistri/providers/gemini/assembler.rb +163 -0
  25. data/lib/mistri/providers/gemini/serializer.rb +109 -0
  26. data/lib/mistri/providers/gemini.rb +73 -0
  27. data/lib/mistri/providers/openai/assembler.rb +205 -0
  28. data/lib/mistri/providers/openai/serializer.rb +104 -0
  29. data/lib/mistri/providers/openai.rb +72 -0
  30. data/lib/mistri/result.rb +30 -0
  31. data/lib/mistri/retry_policy.rb +47 -0
  32. data/lib/mistri/schema.rb +162 -0
  33. data/lib/mistri/session.rb +124 -0
  34. data/lib/mistri/sinks/action_cable.rb +30 -0
  35. data/lib/mistri/sinks/coalesced.rb +61 -0
  36. data/lib/mistri/sinks/sse.rb +26 -0
  37. data/lib/mistri/skill.rb +15 -0
  38. data/lib/mistri/skills.rb +81 -0
  39. data/lib/mistri/sse.rb +50 -0
  40. data/lib/mistri/stop_reason.rb +25 -0
  41. data/lib/mistri/stores/active_record.rb +47 -0
  42. data/lib/mistri/stores/jsonl.rb +37 -0
  43. data/lib/mistri/stores/memory.rb +22 -0
  44. data/lib/mistri/sub_agent.rb +211 -0
  45. data/lib/mistri/tool.rb +94 -0
  46. data/lib/mistri/tool_call.rb +18 -0
  47. data/lib/mistri/tool_context.rb +15 -0
  48. data/lib/mistri/tool_executor.rb +66 -0
  49. data/lib/mistri/tool_result.rb +23 -0
  50. data/lib/mistri/tools/edit_file.rb +37 -0
  51. data/lib/mistri/tools/find_in_file.rb +36 -0
  52. data/lib/mistri/tools/list_files.rb +16 -0
  53. data/lib/mistri/tools/read_file.rb +38 -0
  54. data/lib/mistri/tools/read_memory.rb +16 -0
  55. data/lib/mistri/tools/update_memory.rb +22 -0
  56. data/lib/mistri/tools/write_file.rb +20 -0
  57. data/lib/mistri/tools.rb +50 -0
  58. data/lib/mistri/transport.rb +187 -0
  59. data/lib/mistri/usage.rb +79 -0
  60. data/lib/mistri/version.rb +1 -1
  61. data/lib/mistri/workspace/active_record.rb +47 -0
  62. data/lib/mistri/workspace/directory.rb +52 -0
  63. data/lib/mistri/workspace/memory.rb +40 -0
  64. data/lib/mistri/workspace/single.rb +48 -0
  65. data/lib/mistri.rb +87 -0
  66. metadata +68 -5
@@ -0,0 +1,340 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Mistri
6
+ # The agent loop: prompt the provider, run any tools it calls, feed the
7
+ # results back, and repeat until it answers without calling tools. Every
8
+ # streamed event reaches the caller's block as it arrives, and every run
9
+ # returns a Result.
10
+ #
11
+ # Each message persists to the session the moment it completes, so a crash
12
+ # or an abort leaves a replay-valid transcript with no repair step. A tool
13
+ # marked needs_approval suspends the run instead of executing: the run
14
+ # returns at once (no thread ever waits on a human), the decision arrives
15
+ # later as a session entry from any process, and resume settles it and
16
+ # carries on. Session#steer queues a user message from any process while
17
+ # the loop runs; it folds into the transcript at the next turn boundary.
18
+ class Agent
19
+ # compaction defaults on so long sessions survive their context window;
20
+ # pass false to disable, or a tuned Compaction. It only ever triggers
21
+ # when the model's window is known (catalog or Compaction#window).
22
+ # skills: an array of Skill (or a directory path for Skills.load). Their
23
+ # descriptions join the system prompt and a read_skill tool serves full
24
+ # bodies on demand.
25
+ def initialize(provider:, session: nil, system: nil, tools: [], budget: nil,
26
+ max_concurrency: 4, transform_context: nil, compaction: Compaction.new,
27
+ retries: RetryPolicy.new, skills: [])
28
+ @provider = provider
29
+ @session = session || Session.new(store: Stores::Memory.new)
30
+ skills = skills.is_a?(String) ? Skills.load(skills) : Array(skills)
31
+ @system = Skills.amend(system, skills)
32
+ @tools = skills.empty? ? tools : tools + [Skills.reader(skills)]
33
+ @tools_by_name = @tools.to_h { |tool| [tool.name, tool] }
34
+ raise ConfigurationError, "duplicate tool names" if @tools_by_name.length != @tools.length
35
+
36
+ @budget = budget || Budget.new
37
+ @max_concurrency = max_concurrency
38
+ @transform_context = transform_context
39
+ @compaction = compaction || nil
40
+ @retries = retries || nil
41
+ end
42
+
43
+ attr_reader :session
44
+
45
+ # Run one exchange: append the user turn, then loop until the model
46
+ # answers without tools, a gated tool suspends the run, the run aborts,
47
+ # or a budget stops it.
48
+ # output_schema constrains every non-tool answer to JSON matching the
49
+ # schema, natively where the provider supports it. task adds validation
50
+ # on top; run alone does not validate.
51
+ def run(input, images: [], signal: nil, output_schema: nil, &emit)
52
+ if @session.open_approvals.any?
53
+ raise ConfigurationError, "session is awaiting approval decisions; call resume"
54
+ end
55
+ if input.to_s.empty? && Array(images).empty?
56
+ raise ArgumentError, "run needs input text or images"
57
+ end
58
+
59
+ fold_steers # steers queued while idle arrived first; keep that order
60
+ @session.append_message(Message.user_with_images(input, images))
61
+ loop_turns(signal, output_schema, &emit)
62
+ end
63
+
64
+ # Continue a suspended run. Undecided approvals return immediately, still
65
+ # suspended. Decided ones settle first: approved calls execute, denied
66
+ # calls answer in band so the model knows and can react. Then the loop
67
+ # carries on as if it never stopped.
68
+ def resume(signal: nil, &emit)
69
+ open = @session.open_approvals
70
+ pending = open.select { |approval| approval[:decision].nil? }
71
+ if pending.any?
72
+ return Result.new(message: nil, status: :awaiting_approval,
73
+ pending: pending.map { |approval| approval[:call] })
74
+ end
75
+
76
+ settle(open, signal, &emit)
77
+ loop_turns(signal, nil, &emit)
78
+ end
79
+
80
+ # Run an exchange that must end in a JSON value matching schema. Tools
81
+ # run as usual; providers constrain the final answer natively where they
82
+ # can, and the answer is validated here regardless. A violation goes
83
+ # back to the model (fixes more times), then raises SchemaError. The
84
+ # Result carries the validated value as output.
85
+ #
86
+ # A run that suspends for approval returns as-is: validation applies to
87
+ # completed runs only, so resume the session and re-ask if that happens
88
+ # mid-task.
89
+ def task(input, schema:, images: [], signal: nil, fixes: 1, &emit)
90
+ result = run(task_input(input, schema), images: images, signal: signal,
91
+ output_schema: schema, &emit)
92
+ fixes.downto(0) do |remaining|
93
+ return result unless result.completed?
94
+
95
+ value = parse_output(result.text)
96
+ errors = task_errors(value, schema)
97
+ return result.with(output: value) if errors.empty?
98
+ raise SchemaError, "task output failed validation: #{errors.join("; ")}" if remaining.zero?
99
+
100
+ result = run(fix_prompt(errors), signal: signal, output_schema: schema, &emit)
101
+ end
102
+ end
103
+
104
+ # How full the context is: {tokens:, window:, fraction:}. Hosts render
105
+ # meters and near-limit warnings from this; window is nil for models the
106
+ # catalog does not know unless Compaction#window supplies one.
107
+ def context_usage
108
+ tokens = Compaction.context_tokens(@session.messages)
109
+ window = context_window
110
+ { tokens: tokens, window: window,
111
+ fraction: window && (tokens.to_f / window).round(3) }
112
+ end
113
+
114
+ # Compact now (a UI button, a pre-flight trim before a big task). Returns
115
+ # the Compactor result, or nil when there is nothing worth compacting.
116
+ def compact(&)
117
+ Compactor.call(session: @session, provider: @provider,
118
+ settings: @compaction || Compaction.new, &)
119
+ end
120
+
121
+ private
122
+
123
+ def loop_turns(signal, output_schema = nil, &emit)
124
+ turns = 0
125
+ usage = Usage.zero
126
+ started = monotonic_now
127
+ loop do
128
+ reason = @budget.exceeded(turns: turns, usage: usage, elapsed: monotonic_now - started)
129
+ return stop_for_budget(reason, &emit) if reason
130
+
131
+ fold_steers
132
+ compacted = auto_compact(&emit)
133
+ usage += compacted[:usage] if compacted&.dig(:usage)
134
+ last = run_turn(signal, output_schema, &emit)
135
+ turns += 1
136
+ usage += last.usage if last.usage
137
+
138
+ # Any tool call the turn made must be answered or parked, or the
139
+ # transcript is unpairable and replay fails.
140
+ parked = last.tool_calls? ? run_tools(last, signal, &emit) : []
141
+ return suspended(last, parked) if parked.any?
142
+ return finished(last) if done?(last, signal)
143
+ end
144
+ end
145
+
146
+ # A steer that lands while the model finishes cleanly extends the run one
147
+ # more turn so it gets answered. Aborts, errors, and length stops always
148
+ # end the run; the steer stays pending for the next one.
149
+ def done?(last, signal)
150
+ return false if last.stop_reason == StopReason::TOOL_USE && !signal&.aborted?
151
+ return true if signal&.aborted? || last.stop_reason != StopReason::STOP
152
+
153
+ @session.pending_steers.empty?
154
+ end
155
+
156
+ # Compact when the context has grown into the reserve. A failed
157
+ # summarization skips quietly here: if the context genuinely no longer
158
+ # fits, the next turn surfaces the real provider error.
159
+ def auto_compact(&)
160
+ return nil unless @compaction
161
+
162
+ tokens = Compaction.context_tokens(@session.messages)
163
+ return nil unless @compaction.needed?(tokens, context_window)
164
+
165
+ Compactor.call(session: @session, provider: @provider, settings: @compaction, &)
166
+ rescue CompactionError
167
+ nil
168
+ end
169
+
170
+ def context_window
171
+ @compaction&.window || Models.find(@provider.model)&.context_window
172
+ end
173
+
174
+ # Materialize queued steers into the transcript in arrival order. The
175
+ # folded message entry carries the steer id, which is what marks the steer
176
+ # consumed: one append is both the fold and the marker, so a crash between
177
+ # steers never double-delivers.
178
+ def fold_steers
179
+ @session.pending_steers.each do |steer|
180
+ @session.append("message", "message" => steer["message"], "steer_id" => steer["id"])
181
+ end
182
+ end
183
+
184
+ # transform_context reshapes what the model sees each turn (reminders,
185
+ # redaction, windowing) without touching what the session stores. The
186
+ # lambda gets the replay messages and returns the messages to send; it
187
+ # must keep every tool call paired with its result or providers reject
188
+ # the request.
189
+ #
190
+ # A transient failure retries the same request with backoff; the failed
191
+ # attempt is recorded as a retry entry, never as a message, so retries
192
+ # stay invisible to the model. Only the final outcome persists.
193
+ def run_turn(signal, output_schema = nil, &emit)
194
+ history = @session.messages
195
+ history = @transform_context.call(history) if @transform_context
196
+ attempt = 0
197
+ loop do
198
+ message = @provider.stream(messages: history, system: @system,
199
+ tools: @tools.map(&:spec), signal: signal,
200
+ output_schema: output_schema, &emit)
201
+ attempt += 1
202
+ if retry_turn?(message, attempt, signal)
203
+ pause = @retries.delay(attempt, message.error&.dig("retry_after"))
204
+ record_retry(message, attempt, pause, &emit)
205
+ wait(pause, signal)
206
+ next unless signal&.aborted?
207
+ end
208
+ @session.append_message(message)
209
+ return message
210
+ end
211
+ end
212
+
213
+ def retry_turn?(message, attempt, signal)
214
+ return false unless @retries && message.stop_reason == StopReason::ERROR
215
+ return false if signal&.aborted?
216
+
217
+ @retries.retry?(message.error, attempt)
218
+ end
219
+
220
+ def record_retry(message, attempt, pause, &emit)
221
+ @session.append("retry", "attempt" => attempt, "error" => message.error,
222
+ "delay" => pause.round(2))
223
+ note = format("attempt %<attempt>d failed; retrying in %<pause>.1fs",
224
+ attempt: attempt, pause: pause)
225
+ emit&.call(Event.new(type: :retry, content: note, reason: StopReason::ERROR,
226
+ message: message))
227
+ end
228
+
229
+ # Backoff that an abort can cut short.
230
+ def wait(seconds, signal)
231
+ deadline = monotonic_now + seconds
232
+ sleep(0.1) while monotonic_now < deadline && !signal&.aborted?
233
+ end
234
+
235
+ # Answer or park the assistant's tool calls. Ungated calls execute (only
236
+ # on a genuine tool_use turn with no abort; otherwise they pair with
237
+ # interrupted results). Gated calls park as approval requests and are
238
+ # returned, so the loop can suspend. Nothing is left dangling either way.
239
+ def run_tools(assistant, signal, &emit)
240
+ calls = assistant.tool_calls
241
+ unless assistant.stop_reason == StopReason::TOOL_USE && !signal&.aborted?
242
+ calls.each { |call| answer(call, ToolExecutor::INTERRUPTED, &emit) }
243
+ return []
244
+ end
245
+
246
+ parked, free = calls.partition { |call| gated?(call) }
247
+ execute(free, signal, &emit)
248
+ parked.each do |call|
249
+ @session.append("approval_request", "call" => call.to_h)
250
+ emit&.call(Event.new(type: :approval_needed, tool_call: call))
251
+ end
252
+ parked
253
+ end
254
+
255
+ def settle(open, signal, &emit)
256
+ approved, denied = open.partition { |approval| approval[:decision]["approved"] }
257
+ execute(approved.map { |approval| approval[:call] }, signal, &emit)
258
+ denied.each do |approval|
259
+ note = approval[:decision]["note"]
260
+ text = "The user denied this tool call#{note ? ": #{note}" : "."}"
261
+ answer(approval[:call], text, &emit)
262
+ end
263
+ end
264
+
265
+ def execute(calls, signal, &emit)
266
+ return if calls.empty?
267
+
268
+ results = ToolExecutor.call(calls, @tools_by_name, signal: signal,
269
+ max_concurrency: @max_concurrency,
270
+ session: @session, emit: emit)
271
+ results.each { |call, result| answer(call, result, &emit) }
272
+ end
273
+
274
+ # The tool message carries both channels; the :tool_result event exposes
275
+ # it whole so hosts read event.message.ui for their side of the result.
276
+ def answer(call, result, &emit)
277
+ content, ui = result.is_a?(ToolResult) ? [result.content, result.ui] : [result, nil]
278
+ message = @session.append_message(Message.tool(content: content, tool_call_id: call.id,
279
+ tool_name: call.name, ui: ui))
280
+ text = content.is_a?(String) ? content : "[content]"
281
+ emit&.call(Event.new(type: :tool_result, tool_call: call, content: text, message: message))
282
+ end
283
+
284
+ def gated?(call)
285
+ tool = @tools_by_name[call.name]
286
+ tool ? tool.needs_approval?(call.arguments) : false
287
+ end
288
+
289
+ def finished(message)
290
+ status = { StopReason::ABORTED => :aborted, StopReason::BUDGET => :budget,
291
+ StopReason::ERROR => :error }.fetch(message.stop_reason, :completed)
292
+ Result.new(message: message, status: status)
293
+ end
294
+
295
+ def suspended(message, parked)
296
+ Result.new(message: message, status: :awaiting_approval, pending: parked)
297
+ end
298
+
299
+ def stop_for_budget(reason, &emit)
300
+ message = Message.assistant(content: "Run stopped: #{reason} budget reached.",
301
+ stop_reason: StopReason::BUDGET,
302
+ error_message: "budget_#{reason}")
303
+ @session.append_message(message)
304
+ emit&.call(Event.new(type: :error, reason: StopReason::BUDGET, message: message,
305
+ error_message: "budget_#{reason}"))
306
+ Result.new(message: message, status: :budget)
307
+ end
308
+
309
+ # Distinguishable from a parsed nil: JSON "null" is a valid value.
310
+ PARSE_FAILED = Object.new.freeze
311
+ private_constant :PARSE_FAILED
312
+
313
+ def task_input(input, schema)
314
+ "#{input}\n\nAnswer with ONLY a JSON value matching this schema:\n" \
315
+ "#{JSON.generate(Schema.strict(schema))}"
316
+ end
317
+
318
+ def parse_output(text)
319
+ body = text.to_s.strip
320
+ body = body[/\A```(?:json)?\s*(.*?)```\z/m, 1] || body
321
+ JSON.parse(body)
322
+ rescue JSON::ParserError
323
+ PARSE_FAILED
324
+ end
325
+
326
+ def task_errors(value, schema)
327
+ return ["the answer is not valid JSON"] if value.equal?(PARSE_FAILED)
328
+
329
+ Schema.violations(value, schema)
330
+ end
331
+
332
+ def fix_prompt(errors)
333
+ lines = errors.map { |error| "- #{error}" }.join("\n")
334
+ "Your answer did not satisfy the required output schema. Problems:\n" \
335
+ "#{lines}\nReply with ONLY the corrected JSON."
336
+ end
337
+
338
+ def monotonic_now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
339
+ end
340
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mistri
4
+ # Optional per-run ceilings: turns, tokens, dollars, wall-clock seconds.
5
+ # Nothing is enforced unless the host sets it; an empty budget never stops a
6
+ # run. Pure limits with no clock of its own, so one Budget shared across
7
+ # agents or runs behaves identically for each: the loop measures and asks
8
+ # between turns, and a run always finishes the turn it is in.
9
+ class Budget
10
+ def initialize(turns: nil, tokens: nil, cost_usd: nil, wall_clock: nil)
11
+ @turns = turns
12
+ @tokens = tokens
13
+ @cost_usd = cost_usd
14
+ @wall_clock = wall_clock
15
+ end
16
+
17
+ def none? = [@turns, @tokens, @cost_usd, @wall_clock].all?(&:nil?)
18
+
19
+ # The reason the run should stop, or nil to continue.
20
+ def exceeded(turns:, usage:, elapsed: 0)
21
+ return :turns if @turns && turns >= @turns
22
+ return :tokens if @tokens && usage.total_tokens >= @tokens
23
+ return :cost if @cost_usd && usage.cost.total >= @cost_usd
24
+ return :wall_clock if @wall_clock && elapsed >= @wall_clock
25
+
26
+ nil
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Mistri
6
+ # When a session compacts, and how much of it survives. Compaction is
7
+ # client-side and provider-agnostic: the session's own provider writes a
8
+ # visible summary, so a host can always show the user exactly what the
9
+ # model still remembers.
10
+ #
11
+ # The trigger measures real token accounting, not guesses: the last healthy
12
+ # turn's reported usage plus a character heuristic for whatever came after
13
+ # it.
14
+ class Compaction
15
+ DEFAULT_RESERVE = 16_384
16
+ DEFAULT_KEEP_RECENT = 20_000
17
+ IMAGE_CHARS = 4_800
18
+
19
+ SUMMARY_PREFACE = "The earlier conversation was compacted. This summary replaces it:"
20
+
21
+ attr_reader :reserve, :keep_recent, :window, :instructions
22
+
23
+ # window overrides the model catalog's context window (required for
24
+ # models the catalog does not know). instructions add a host-specific
25
+ # focus to the summary prompt.
26
+ def initialize(reserve: DEFAULT_RESERVE, keep_recent: DEFAULT_KEEP_RECENT,
27
+ window: nil, instructions: nil)
28
+ @reserve = reserve
29
+ @keep_recent = keep_recent
30
+ @window = window
31
+ @instructions = instructions
32
+ end
33
+
34
+ # Compact when the context has grown into the reserve headroom. An
35
+ # unknown window never triggers.
36
+ def needed?(tokens, window)
37
+ window ? tokens > window - reserve : false
38
+ end
39
+
40
+ class << self
41
+ # Context size for a replay: the last healthy turn's reported tokens
42
+ # (prompt, cache, and output all sit in context next turn) plus an
43
+ # estimate of every message after it.
44
+ def context_tokens(messages)
45
+ index = messages.rindex { |message| reported(message) }
46
+ base = index ? reported(messages[index]) : 0
47
+ messages.drop(index ? index + 1 : 0).sum(base) { |message| estimate(message) }
48
+ end
49
+
50
+ def estimate(message)
51
+ (chars(message) / 4.0).ceil
52
+ end
53
+
54
+ private
55
+
56
+ def reported(message)
57
+ return nil unless message.assistant? && message.usage
58
+ return nil if %i[aborted error].include?(message.stop_reason)
59
+
60
+ usage = message.usage
61
+ total = usage.input + usage.cache_read + usage.cache_write + usage.output
62
+ total.positive? ? total : nil
63
+ end
64
+
65
+ def chars(message)
66
+ message.content.sum do |block|
67
+ case block
68
+ when Content::Text then block.text.length
69
+ when Content::Thinking then block.thinking.length
70
+ when Content::Image then IMAGE_CHARS
71
+ when ToolCall then block.name.length + JSON.generate(block.arguments).length
72
+ else 0
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Mistri
6
+ # Compacts a session in place: everything before a cut point is summarized
7
+ # by the provider, and a compaction entry redirects replay to the summary
8
+ # plus the kept tail. Append-only — the full history stays in the store for
9
+ # transcript UIs; only what the model sees shrinks. Callable from any
10
+ # process (a UI button, a job), with or without a running agent.
11
+ #
12
+ # Cuts land only on user messages, so a tool call and its result always
13
+ # stay on the same side, and a parked approval's turn is never cut away
14
+ # from the resume that must answer it.
15
+ class Compactor
16
+ SUMMARIZER_SYSTEM = <<~PROMPT
17
+ You are a context summarization assistant. Read the conversation and
18
+ produce only the structured summary you are asked for. Do not continue
19
+ the conversation and do not answer questions inside it.
20
+ PROMPT
21
+
22
+ FORMAT = <<~FORMAT
23
+ ## Goal
24
+ [What is the user trying to accomplish?]
25
+
26
+ ## Constraints & Preferences
27
+ - [Constraints or preferences the user stated, or "(none)"]
28
+
29
+ ## Progress
30
+ ### Done
31
+ - [x] [Completed work]
32
+ ### In Progress
33
+ - [ ] [Current work]
34
+ ### Blocked
35
+ - [Blockers, if any]
36
+
37
+ ## Key Decisions
38
+ - **[Decision]**: [Rationale]
39
+
40
+ ## Next Steps
41
+ 1. [What should happen next]
42
+
43
+ ## Critical Context
44
+ - [Data, names, or references needed to continue, or "(none)"]
45
+
46
+ Keep each section concise. Preserve exact identifiers, names, and error
47
+ messages.
48
+ FORMAT
49
+
50
+ CHECKPOINT_PROMPT = <<~PROMPT.freeze
51
+ The messages above are a conversation to summarize. Create a structured
52
+ context checkpoint that another LLM will use to continue the work.
53
+
54
+ Use this EXACT format:
55
+
56
+ #{FORMAT}
57
+ PROMPT
58
+
59
+ UPDATE_PROMPT = <<~PROMPT.freeze
60
+ The messages above are NEW conversation messages to fold into the
61
+ existing summary in <previous-summary> tags. Preserve everything still
62
+ relevant from the previous summary, add new progress and decisions,
63
+ move finished work to Done, and update Next Steps.
64
+
65
+ Use this EXACT format:
66
+
67
+ #{FORMAT}
68
+ PROMPT
69
+
70
+ class << self
71
+ # Summarize and cut. Returns {summary:, tokens_before:, tokens_after:,
72
+ # usage:}, or nil when there is nothing worth compacting. Emits
73
+ # :compacting and :compaction when a block is given.
74
+ def call(session:, provider:, settings: Compaction.new, &emit)
75
+ replay = session.replay
76
+ cut = cut_index(replay, session, settings)
77
+ return nil unless cut
78
+
79
+ previous = session.last_compaction&.fetch("summary", nil)
80
+ head = replay.take_while { |(_, index)| index.nil? || index < cut }.map(&:first)
81
+ head.shift if previous # the synthetic summary rides in <previous-summary>
82
+ return nil if head.empty?
83
+
84
+ emit&.call(Event.new(type: :compacting))
85
+ tokens_before = Compaction.context_tokens(replay.map(&:first))
86
+ reply = summarize(provider, head, previous, settings.instructions)
87
+ session.append("compaction", "summary" => reply.text,
88
+ "kept_from" => cut, "tokens_before" => tokens_before)
89
+ finish(session, reply, tokens_before, &emit)
90
+ end
91
+
92
+ private
93
+
94
+ def cut_index(replay, session, settings)
95
+ boundary = keep_boundary(replay, settings.keep_recent)
96
+ return nil unless boundary
97
+
98
+ candidates = replay.filter_map { |(message, index)| index if index && message.user? }
99
+ cut = candidates.find { |index| index >= boundary } || candidates.last
100
+ cut = clamp_to_open_approvals(cut, session)
101
+ return nil unless cut
102
+
103
+ first = replay.find { |(_, index)| index }&.last
104
+ first && cut > first ? cut : nil
105
+ end
106
+
107
+ # Walk back from the tail until the keep budget is spent; the cut then
108
+ # snaps forward to a user message, so replay keeps at most about
109
+ # keep_recent tokens of recent turns.
110
+ def keep_boundary(replay, keep_recent)
111
+ kept = 0
112
+ replay.reverse_each do |(message, index)|
113
+ kept += Compaction.estimate(message)
114
+ return index || 0 if kept >= keep_recent
115
+ end
116
+ nil
117
+ end
118
+
119
+ # Never cut past a parked approval: its tool call must stay in replay,
120
+ # paired, for resume to answer.
121
+ def clamp_to_open_approvals(cut, session)
122
+ return cut unless cut
123
+
124
+ open_ids = session.open_approvals.map { |approval| approval[:call].id }
125
+ return cut if open_ids.empty?
126
+
127
+ turn_start = approval_turn_start(session.entries, open_ids)
128
+ turn_start && turn_start < cut ? turn_start : cut
129
+ end
130
+
131
+ def approval_turn_start(entries, open_ids)
132
+ request = entries.index do |entry|
133
+ entry["type"] == "approval_request" && open_ids.include?(entry.dig("call", "id"))
134
+ end
135
+ return nil unless request
136
+
137
+ entries[0...request].rindex do |entry|
138
+ entry["type"] == "message" && entry.dig("message", "role") == "user"
139
+ end
140
+ end
141
+
142
+ def summarize(provider, messages, previous, instructions)
143
+ prompt = "<conversation>\n#{serialize(messages)}\n</conversation>\n\n"
144
+ prompt << "<previous-summary>\n#{previous}\n</previous-summary>\n\n" if previous
145
+ prompt << (previous ? UPDATE_PROMPT : CHECKPOINT_PROMPT)
146
+ prompt << "\nAdditional focus: #{instructions}\n" if instructions
147
+ reply = provider.stream(messages: [Message.user(prompt)], system: SUMMARIZER_SYSTEM)
148
+ raise CompactionError, "summarization failed: #{reply.error_message}" unless usable?(reply)
149
+
150
+ reply
151
+ end
152
+
153
+ def usable?(reply)
154
+ reply.stop_reason != StopReason::ERROR && !reply.text.to_s.strip.empty?
155
+ end
156
+
157
+ def finish(session, reply, tokens_before, &emit)
158
+ tokens_after = Compaction.context_tokens(session.messages)
159
+ emit&.call(Event.new(type: :compaction, content: reply.text))
160
+ { summary: reply.text, tokens_before: tokens_before,
161
+ tokens_after: tokens_after, usage: reply.usage }
162
+ end
163
+
164
+ # The summarizer reads a plain-text rendering: tool calls by name and
165
+ # arguments, results as text, thinking never (it stays in its turn).
166
+ def serialize(messages)
167
+ messages.map { |message| "#{message.role.to_s.upcase}:\n#{text_of(message)}" }
168
+ .join("\n\n")
169
+ end
170
+
171
+ def text_of(message)
172
+ message.content.filter_map do |block|
173
+ case block
174
+ when Content::Text then block.text
175
+ when Content::Image then "[image]"
176
+ when ToolCall then "[called #{block.name} with #{JSON.generate(block.arguments)}]"
177
+ end
178
+ end.join("\n")
179
+ end
180
+ end
181
+ end
182
+ end