mistri 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +162 -0
- data/README.md +314 -3
- data/lib/generators/mistri/install/install_generator.rb +54 -0
- data/lib/generators/mistri/install/templates/migration.rb.tt +14 -0
- data/lib/generators/mistri/install/templates/model.rb.tt +4 -0
- data/lib/mistri/abort_signal.rb +63 -0
- data/lib/mistri/agent.rb +340 -0
- data/lib/mistri/budget.rb +29 -0
- data/lib/mistri/compaction.rb +78 -0
- data/lib/mistri/compactor.rb +182 -0
- data/lib/mistri/content.rb +89 -0
- data/lib/mistri/edit.rb +238 -0
- data/lib/mistri/errors.rb +94 -0
- data/lib/mistri/event.rb +50 -0
- data/lib/mistri/memory.rb +26 -0
- data/lib/mistri/message.rb +90 -0
- data/lib/mistri/models.rb +43 -0
- data/lib/mistri/partial_json.rb +210 -0
- data/lib/mistri/providers/anthropic/assembler.rb +205 -0
- data/lib/mistri/providers/anthropic/serializer.rb +106 -0
- data/lib/mistri/providers/anthropic.rb +106 -0
- data/lib/mistri/providers/fake.rb +109 -0
- data/lib/mistri/providers/gemini/assembler.rb +163 -0
- data/lib/mistri/providers/gemini/serializer.rb +109 -0
- data/lib/mistri/providers/gemini.rb +73 -0
- data/lib/mistri/providers/openai/assembler.rb +205 -0
- data/lib/mistri/providers/openai/serializer.rb +104 -0
- data/lib/mistri/providers/openai.rb +72 -0
- data/lib/mistri/result.rb +30 -0
- data/lib/mistri/retry_policy.rb +47 -0
- data/lib/mistri/schema.rb +162 -0
- data/lib/mistri/session.rb +124 -0
- data/lib/mistri/sinks/action_cable.rb +30 -0
- data/lib/mistri/sinks/coalesced.rb +61 -0
- data/lib/mistri/sinks/sse.rb +26 -0
- data/lib/mistri/skill.rb +15 -0
- data/lib/mistri/skills.rb +81 -0
- data/lib/mistri/sse.rb +50 -0
- data/lib/mistri/stop_reason.rb +25 -0
- data/lib/mistri/stores/active_record.rb +47 -0
- data/lib/mistri/stores/jsonl.rb +37 -0
- data/lib/mistri/stores/memory.rb +22 -0
- data/lib/mistri/sub_agent.rb +211 -0
- data/lib/mistri/tool.rb +94 -0
- data/lib/mistri/tool_call.rb +18 -0
- data/lib/mistri/tool_context.rb +15 -0
- data/lib/mistri/tool_executor.rb +66 -0
- data/lib/mistri/tool_result.rb +23 -0
- data/lib/mistri/tools/edit_file.rb +37 -0
- data/lib/mistri/tools/find_in_file.rb +36 -0
- data/lib/mistri/tools/list_files.rb +16 -0
- data/lib/mistri/tools/read_file.rb +38 -0
- data/lib/mistri/tools/read_memory.rb +16 -0
- data/lib/mistri/tools/update_memory.rb +22 -0
- data/lib/mistri/tools/write_file.rb +20 -0
- data/lib/mistri/tools.rb +50 -0
- data/lib/mistri/transport.rb +187 -0
- data/lib/mistri/usage.rb +79 -0
- data/lib/mistri/version.rb +1 -1
- data/lib/mistri/workspace/active_record.rb +47 -0
- data/lib/mistri/workspace/directory.rb +52 -0
- data/lib/mistri/workspace/memory.rb +40 -0
- data/lib/mistri/workspace/single.rb +48 -0
- data/lib/mistri.rb +87 -0
- metadata +68 -5
data/lib/mistri/agent.rb
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
# The agent loop: prompt the provider, run any tools it calls, feed the
|
|
7
|
+
# results back, and repeat until it answers without calling tools. Every
|
|
8
|
+
# streamed event reaches the caller's block as it arrives, and every run
|
|
9
|
+
# returns a Result.
|
|
10
|
+
#
|
|
11
|
+
# Each message persists to the session the moment it completes, so a crash
|
|
12
|
+
# or an abort leaves a replay-valid transcript with no repair step. A tool
|
|
13
|
+
# marked needs_approval suspends the run instead of executing: the run
|
|
14
|
+
# returns at once (no thread ever waits on a human), the decision arrives
|
|
15
|
+
# later as a session entry from any process, and resume settles it and
|
|
16
|
+
# carries on. Session#steer queues a user message from any process while
|
|
17
|
+
# the loop runs; it folds into the transcript at the next turn boundary.
|
|
18
|
+
class Agent
|
|
19
|
+
# compaction defaults on so long sessions survive their context window;
|
|
20
|
+
# pass false to disable, or a tuned Compaction. It only ever triggers
|
|
21
|
+
# when the model's window is known (catalog or Compaction#window).
|
|
22
|
+
# skills: an array of Skill (or a directory path for Skills.load). Their
|
|
23
|
+
# descriptions join the system prompt and a read_skill tool serves full
|
|
24
|
+
# bodies on demand.
|
|
25
|
+
def initialize(provider:, session: nil, system: nil, tools: [], budget: nil,
|
|
26
|
+
max_concurrency: 4, transform_context: nil, compaction: Compaction.new,
|
|
27
|
+
retries: RetryPolicy.new, skills: [])
|
|
28
|
+
@provider = provider
|
|
29
|
+
@session = session || Session.new(store: Stores::Memory.new)
|
|
30
|
+
skills = skills.is_a?(String) ? Skills.load(skills) : Array(skills)
|
|
31
|
+
@system = Skills.amend(system, skills)
|
|
32
|
+
@tools = skills.empty? ? tools : tools + [Skills.reader(skills)]
|
|
33
|
+
@tools_by_name = @tools.to_h { |tool| [tool.name, tool] }
|
|
34
|
+
raise ConfigurationError, "duplicate tool names" if @tools_by_name.length != @tools.length
|
|
35
|
+
|
|
36
|
+
@budget = budget || Budget.new
|
|
37
|
+
@max_concurrency = max_concurrency
|
|
38
|
+
@transform_context = transform_context
|
|
39
|
+
@compaction = compaction || nil
|
|
40
|
+
@retries = retries || nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
attr_reader :session
|
|
44
|
+
|
|
45
|
+
# Run one exchange: append the user turn, then loop until the model
|
|
46
|
+
# answers without tools, a gated tool suspends the run, the run aborts,
|
|
47
|
+
# or a budget stops it.
|
|
48
|
+
# output_schema constrains every non-tool answer to JSON matching the
|
|
49
|
+
# schema, natively where the provider supports it. task adds validation
|
|
50
|
+
# on top; run alone does not validate.
|
|
51
|
+
def run(input, images: [], signal: nil, output_schema: nil, &emit)
|
|
52
|
+
if @session.open_approvals.any?
|
|
53
|
+
raise ConfigurationError, "session is awaiting approval decisions; call resume"
|
|
54
|
+
end
|
|
55
|
+
if input.to_s.empty? && Array(images).empty?
|
|
56
|
+
raise ArgumentError, "run needs input text or images"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
fold_steers # steers queued while idle arrived first; keep that order
|
|
60
|
+
@session.append_message(Message.user_with_images(input, images))
|
|
61
|
+
loop_turns(signal, output_schema, &emit)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Continue a suspended run. Undecided approvals return immediately, still
|
|
65
|
+
# suspended. Decided ones settle first: approved calls execute, denied
|
|
66
|
+
# calls answer in band so the model knows and can react. Then the loop
|
|
67
|
+
# carries on as if it never stopped.
|
|
68
|
+
def resume(signal: nil, &emit)
|
|
69
|
+
open = @session.open_approvals
|
|
70
|
+
pending = open.select { |approval| approval[:decision].nil? }
|
|
71
|
+
if pending.any?
|
|
72
|
+
return Result.new(message: nil, status: :awaiting_approval,
|
|
73
|
+
pending: pending.map { |approval| approval[:call] })
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
settle(open, signal, &emit)
|
|
77
|
+
loop_turns(signal, nil, &emit)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Run an exchange that must end in a JSON value matching schema. Tools
|
|
81
|
+
# run as usual; providers constrain the final answer natively where they
|
|
82
|
+
# can, and the answer is validated here regardless. A violation goes
|
|
83
|
+
# back to the model (fixes more times), then raises SchemaError. The
|
|
84
|
+
# Result carries the validated value as output.
|
|
85
|
+
#
|
|
86
|
+
# A run that suspends for approval returns as-is: validation applies to
|
|
87
|
+
# completed runs only, so resume the session and re-ask if that happens
|
|
88
|
+
# mid-task.
|
|
89
|
+
def task(input, schema:, images: [], signal: nil, fixes: 1, &emit)
|
|
90
|
+
result = run(task_input(input, schema), images: images, signal: signal,
|
|
91
|
+
output_schema: schema, &emit)
|
|
92
|
+
fixes.downto(0) do |remaining|
|
|
93
|
+
return result unless result.completed?
|
|
94
|
+
|
|
95
|
+
value = parse_output(result.text)
|
|
96
|
+
errors = task_errors(value, schema)
|
|
97
|
+
return result.with(output: value) if errors.empty?
|
|
98
|
+
raise SchemaError, "task output failed validation: #{errors.join("; ")}" if remaining.zero?
|
|
99
|
+
|
|
100
|
+
result = run(fix_prompt(errors), signal: signal, output_schema: schema, &emit)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# How full the context is: {tokens:, window:, fraction:}. Hosts render
|
|
105
|
+
# meters and near-limit warnings from this; window is nil for models the
|
|
106
|
+
# catalog does not know unless Compaction#window supplies one.
|
|
107
|
+
def context_usage
|
|
108
|
+
tokens = Compaction.context_tokens(@session.messages)
|
|
109
|
+
window = context_window
|
|
110
|
+
{ tokens: tokens, window: window,
|
|
111
|
+
fraction: window && (tokens.to_f / window).round(3) }
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Compact now (a UI button, a pre-flight trim before a big task). Returns
|
|
115
|
+
# the Compactor result, or nil when there is nothing worth compacting.
|
|
116
|
+
def compact(&)
|
|
117
|
+
Compactor.call(session: @session, provider: @provider,
|
|
118
|
+
settings: @compaction || Compaction.new, &)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
def loop_turns(signal, output_schema = nil, &emit)
|
|
124
|
+
turns = 0
|
|
125
|
+
usage = Usage.zero
|
|
126
|
+
started = monotonic_now
|
|
127
|
+
loop do
|
|
128
|
+
reason = @budget.exceeded(turns: turns, usage: usage, elapsed: monotonic_now - started)
|
|
129
|
+
return stop_for_budget(reason, &emit) if reason
|
|
130
|
+
|
|
131
|
+
fold_steers
|
|
132
|
+
compacted = auto_compact(&emit)
|
|
133
|
+
usage += compacted[:usage] if compacted&.dig(:usage)
|
|
134
|
+
last = run_turn(signal, output_schema, &emit)
|
|
135
|
+
turns += 1
|
|
136
|
+
usage += last.usage if last.usage
|
|
137
|
+
|
|
138
|
+
# Any tool call the turn made must be answered or parked, or the
|
|
139
|
+
# transcript is unpairable and replay fails.
|
|
140
|
+
parked = last.tool_calls? ? run_tools(last, signal, &emit) : []
|
|
141
|
+
return suspended(last, parked) if parked.any?
|
|
142
|
+
return finished(last) if done?(last, signal)
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# A steer that lands while the model finishes cleanly extends the run one
|
|
147
|
+
# more turn so it gets answered. Aborts, errors, and length stops always
|
|
148
|
+
# end the run; the steer stays pending for the next one.
|
|
149
|
+
def done?(last, signal)
|
|
150
|
+
return false if last.stop_reason == StopReason::TOOL_USE && !signal&.aborted?
|
|
151
|
+
return true if signal&.aborted? || last.stop_reason != StopReason::STOP
|
|
152
|
+
|
|
153
|
+
@session.pending_steers.empty?
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Compact when the context has grown into the reserve. A failed
|
|
157
|
+
# summarization skips quietly here: if the context genuinely no longer
|
|
158
|
+
# fits, the next turn surfaces the real provider error.
|
|
159
|
+
def auto_compact(&)
|
|
160
|
+
return nil unless @compaction
|
|
161
|
+
|
|
162
|
+
tokens = Compaction.context_tokens(@session.messages)
|
|
163
|
+
return nil unless @compaction.needed?(tokens, context_window)
|
|
164
|
+
|
|
165
|
+
Compactor.call(session: @session, provider: @provider, settings: @compaction, &)
|
|
166
|
+
rescue CompactionError
|
|
167
|
+
nil
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def context_window
|
|
171
|
+
@compaction&.window || Models.find(@provider.model)&.context_window
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Materialize queued steers into the transcript in arrival order. The
|
|
175
|
+
# folded message entry carries the steer id, which is what marks the steer
|
|
176
|
+
# consumed: one append is both the fold and the marker, so a crash between
|
|
177
|
+
# steers never double-delivers.
|
|
178
|
+
def fold_steers
|
|
179
|
+
@session.pending_steers.each do |steer|
|
|
180
|
+
@session.append("message", "message" => steer["message"], "steer_id" => steer["id"])
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# transform_context reshapes what the model sees each turn (reminders,
|
|
185
|
+
# redaction, windowing) without touching what the session stores. The
|
|
186
|
+
# lambda gets the replay messages and returns the messages to send; it
|
|
187
|
+
# must keep every tool call paired with its result or providers reject
|
|
188
|
+
# the request.
|
|
189
|
+
#
|
|
190
|
+
# A transient failure retries the same request with backoff; the failed
|
|
191
|
+
# attempt is recorded as a retry entry, never as a message, so retries
|
|
192
|
+
# stay invisible to the model. Only the final outcome persists.
|
|
193
|
+
def run_turn(signal, output_schema = nil, &emit)
|
|
194
|
+
history = @session.messages
|
|
195
|
+
history = @transform_context.call(history) if @transform_context
|
|
196
|
+
attempt = 0
|
|
197
|
+
loop do
|
|
198
|
+
message = @provider.stream(messages: history, system: @system,
|
|
199
|
+
tools: @tools.map(&:spec), signal: signal,
|
|
200
|
+
output_schema: output_schema, &emit)
|
|
201
|
+
attempt += 1
|
|
202
|
+
if retry_turn?(message, attempt, signal)
|
|
203
|
+
pause = @retries.delay(attempt, message.error&.dig("retry_after"))
|
|
204
|
+
record_retry(message, attempt, pause, &emit)
|
|
205
|
+
wait(pause, signal)
|
|
206
|
+
next unless signal&.aborted?
|
|
207
|
+
end
|
|
208
|
+
@session.append_message(message)
|
|
209
|
+
return message
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def retry_turn?(message, attempt, signal)
|
|
214
|
+
return false unless @retries && message.stop_reason == StopReason::ERROR
|
|
215
|
+
return false if signal&.aborted?
|
|
216
|
+
|
|
217
|
+
@retries.retry?(message.error, attempt)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def record_retry(message, attempt, pause, &emit)
|
|
221
|
+
@session.append("retry", "attempt" => attempt, "error" => message.error,
|
|
222
|
+
"delay" => pause.round(2))
|
|
223
|
+
note = format("attempt %<attempt>d failed; retrying in %<pause>.1fs",
|
|
224
|
+
attempt: attempt, pause: pause)
|
|
225
|
+
emit&.call(Event.new(type: :retry, content: note, reason: StopReason::ERROR,
|
|
226
|
+
message: message))
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Backoff that an abort can cut short.
|
|
230
|
+
def wait(seconds, signal)
|
|
231
|
+
deadline = monotonic_now + seconds
|
|
232
|
+
sleep(0.1) while monotonic_now < deadline && !signal&.aborted?
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Answer or park the assistant's tool calls. Ungated calls execute (only
|
|
236
|
+
# on a genuine tool_use turn with no abort; otherwise they pair with
|
|
237
|
+
# interrupted results). Gated calls park as approval requests and are
|
|
238
|
+
# returned, so the loop can suspend. Nothing is left dangling either way.
|
|
239
|
+
def run_tools(assistant, signal, &emit)
|
|
240
|
+
calls = assistant.tool_calls
|
|
241
|
+
unless assistant.stop_reason == StopReason::TOOL_USE && !signal&.aborted?
|
|
242
|
+
calls.each { |call| answer(call, ToolExecutor::INTERRUPTED, &emit) }
|
|
243
|
+
return []
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
parked, free = calls.partition { |call| gated?(call) }
|
|
247
|
+
execute(free, signal, &emit)
|
|
248
|
+
parked.each do |call|
|
|
249
|
+
@session.append("approval_request", "call" => call.to_h)
|
|
250
|
+
emit&.call(Event.new(type: :approval_needed, tool_call: call))
|
|
251
|
+
end
|
|
252
|
+
parked
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def settle(open, signal, &emit)
|
|
256
|
+
approved, denied = open.partition { |approval| approval[:decision]["approved"] }
|
|
257
|
+
execute(approved.map { |approval| approval[:call] }, signal, &emit)
|
|
258
|
+
denied.each do |approval|
|
|
259
|
+
note = approval[:decision]["note"]
|
|
260
|
+
text = "The user denied this tool call#{note ? ": #{note}" : "."}"
|
|
261
|
+
answer(approval[:call], text, &emit)
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def execute(calls, signal, &emit)
|
|
266
|
+
return if calls.empty?
|
|
267
|
+
|
|
268
|
+
results = ToolExecutor.call(calls, @tools_by_name, signal: signal,
|
|
269
|
+
max_concurrency: @max_concurrency,
|
|
270
|
+
session: @session, emit: emit)
|
|
271
|
+
results.each { |call, result| answer(call, result, &emit) }
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# The tool message carries both channels; the :tool_result event exposes
|
|
275
|
+
# it whole so hosts read event.message.ui for their side of the result.
|
|
276
|
+
def answer(call, result, &emit)
|
|
277
|
+
content, ui = result.is_a?(ToolResult) ? [result.content, result.ui] : [result, nil]
|
|
278
|
+
message = @session.append_message(Message.tool(content: content, tool_call_id: call.id,
|
|
279
|
+
tool_name: call.name, ui: ui))
|
|
280
|
+
text = content.is_a?(String) ? content : "[content]"
|
|
281
|
+
emit&.call(Event.new(type: :tool_result, tool_call: call, content: text, message: message))
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def gated?(call)
|
|
285
|
+
tool = @tools_by_name[call.name]
|
|
286
|
+
tool ? tool.needs_approval?(call.arguments) : false
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def finished(message)
|
|
290
|
+
status = { StopReason::ABORTED => :aborted, StopReason::BUDGET => :budget,
|
|
291
|
+
StopReason::ERROR => :error }.fetch(message.stop_reason, :completed)
|
|
292
|
+
Result.new(message: message, status: status)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def suspended(message, parked)
|
|
296
|
+
Result.new(message: message, status: :awaiting_approval, pending: parked)
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def stop_for_budget(reason, &emit)
|
|
300
|
+
message = Message.assistant(content: "Run stopped: #{reason} budget reached.",
|
|
301
|
+
stop_reason: StopReason::BUDGET,
|
|
302
|
+
error_message: "budget_#{reason}")
|
|
303
|
+
@session.append_message(message)
|
|
304
|
+
emit&.call(Event.new(type: :error, reason: StopReason::BUDGET, message: message,
|
|
305
|
+
error_message: "budget_#{reason}"))
|
|
306
|
+
Result.new(message: message, status: :budget)
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Distinguishable from a parsed nil: JSON "null" is a valid value.
|
|
310
|
+
PARSE_FAILED = Object.new.freeze
|
|
311
|
+
private_constant :PARSE_FAILED
|
|
312
|
+
|
|
313
|
+
def task_input(input, schema)
|
|
314
|
+
"#{input}\n\nAnswer with ONLY a JSON value matching this schema:\n" \
|
|
315
|
+
"#{JSON.generate(Schema.strict(schema))}"
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def parse_output(text)
|
|
319
|
+
body = text.to_s.strip
|
|
320
|
+
body = body[/\A```(?:json)?\s*(.*?)```\z/m, 1] || body
|
|
321
|
+
JSON.parse(body)
|
|
322
|
+
rescue JSON::ParserError
|
|
323
|
+
PARSE_FAILED
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def task_errors(value, schema)
|
|
327
|
+
return ["the answer is not valid JSON"] if value.equal?(PARSE_FAILED)
|
|
328
|
+
|
|
329
|
+
Schema.violations(value, schema)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def fix_prompt(errors)
|
|
333
|
+
lines = errors.map { |error| "- #{error}" }.join("\n")
|
|
334
|
+
"Your answer did not satisfy the required output schema. Problems:\n" \
|
|
335
|
+
"#{lines}\nReply with ONLY the corrected JSON."
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def monotonic_now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
339
|
+
end
|
|
340
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mistri
|
|
4
|
+
# Optional per-run ceilings: turns, tokens, dollars, wall-clock seconds.
|
|
5
|
+
# Nothing is enforced unless the host sets it; an empty budget never stops a
|
|
6
|
+
# run. Pure limits with no clock of its own, so one Budget shared across
|
|
7
|
+
# agents or runs behaves identically for each: the loop measures and asks
|
|
8
|
+
# between turns, and a run always finishes the turn it is in.
|
|
9
|
+
class Budget
|
|
10
|
+
def initialize(turns: nil, tokens: nil, cost_usd: nil, wall_clock: nil)
|
|
11
|
+
@turns = turns
|
|
12
|
+
@tokens = tokens
|
|
13
|
+
@cost_usd = cost_usd
|
|
14
|
+
@wall_clock = wall_clock
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def none? = [@turns, @tokens, @cost_usd, @wall_clock].all?(&:nil?)
|
|
18
|
+
|
|
19
|
+
# The reason the run should stop, or nil to continue.
|
|
20
|
+
def exceeded(turns:, usage:, elapsed: 0)
|
|
21
|
+
return :turns if @turns && turns >= @turns
|
|
22
|
+
return :tokens if @tokens && usage.total_tokens >= @tokens
|
|
23
|
+
return :cost if @cost_usd && usage.cost.total >= @cost_usd
|
|
24
|
+
return :wall_clock if @wall_clock && elapsed >= @wall_clock
|
|
25
|
+
|
|
26
|
+
nil
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
# When a session compacts, and how much of it survives. Compaction is
|
|
7
|
+
# client-side and provider-agnostic: the session's own provider writes a
|
|
8
|
+
# visible summary, so a host can always show the user exactly what the
|
|
9
|
+
# model still remembers.
|
|
10
|
+
#
|
|
11
|
+
# The trigger measures real token accounting, not guesses: the last healthy
|
|
12
|
+
# turn's reported usage plus a character heuristic for whatever came after
|
|
13
|
+
# it.
|
|
14
|
+
class Compaction
|
|
15
|
+
DEFAULT_RESERVE = 16_384
|
|
16
|
+
DEFAULT_KEEP_RECENT = 20_000
|
|
17
|
+
IMAGE_CHARS = 4_800
|
|
18
|
+
|
|
19
|
+
SUMMARY_PREFACE = "The earlier conversation was compacted. This summary replaces it:"
|
|
20
|
+
|
|
21
|
+
attr_reader :reserve, :keep_recent, :window, :instructions
|
|
22
|
+
|
|
23
|
+
# window overrides the model catalog's context window (required for
|
|
24
|
+
# models the catalog does not know). instructions add a host-specific
|
|
25
|
+
# focus to the summary prompt.
|
|
26
|
+
def initialize(reserve: DEFAULT_RESERVE, keep_recent: DEFAULT_KEEP_RECENT,
|
|
27
|
+
window: nil, instructions: nil)
|
|
28
|
+
@reserve = reserve
|
|
29
|
+
@keep_recent = keep_recent
|
|
30
|
+
@window = window
|
|
31
|
+
@instructions = instructions
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Compact when the context has grown into the reserve headroom. An
|
|
35
|
+
# unknown window never triggers.
|
|
36
|
+
def needed?(tokens, window)
|
|
37
|
+
window ? tokens > window - reserve : false
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
class << self
|
|
41
|
+
# Context size for a replay: the last healthy turn's reported tokens
|
|
42
|
+
# (prompt, cache, and output all sit in context next turn) plus an
|
|
43
|
+
# estimate of every message after it.
|
|
44
|
+
def context_tokens(messages)
|
|
45
|
+
index = messages.rindex { |message| reported(message) }
|
|
46
|
+
base = index ? reported(messages[index]) : 0
|
|
47
|
+
messages.drop(index ? index + 1 : 0).sum(base) { |message| estimate(message) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def estimate(message)
|
|
51
|
+
(chars(message) / 4.0).ceil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def reported(message)
|
|
57
|
+
return nil unless message.assistant? && message.usage
|
|
58
|
+
return nil if %i[aborted error].include?(message.stop_reason)
|
|
59
|
+
|
|
60
|
+
usage = message.usage
|
|
61
|
+
total = usage.input + usage.cache_read + usage.cache_write + usage.output
|
|
62
|
+
total.positive? ? total : nil
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def chars(message)
|
|
66
|
+
message.content.sum do |block|
|
|
67
|
+
case block
|
|
68
|
+
when Content::Text then block.text.length
|
|
69
|
+
when Content::Thinking then block.thinking.length
|
|
70
|
+
when Content::Image then IMAGE_CHARS
|
|
71
|
+
when ToolCall then block.name.length + JSON.generate(block.arguments).length
|
|
72
|
+
else 0
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Mistri
|
|
6
|
+
# Compacts a session in place: everything before a cut point is summarized
|
|
7
|
+
# by the provider, and a compaction entry redirects replay to the summary
|
|
8
|
+
# plus the kept tail. Append-only — the full history stays in the store for
|
|
9
|
+
# transcript UIs; only what the model sees shrinks. Callable from any
|
|
10
|
+
# process (a UI button, a job), with or without a running agent.
|
|
11
|
+
#
|
|
12
|
+
# Cuts land only on user messages, so a tool call and its result always
|
|
13
|
+
# stay on the same side, and a parked approval's turn is never cut away
|
|
14
|
+
# from the resume that must answer it.
|
|
15
|
+
class Compactor
|
|
16
|
+
SUMMARIZER_SYSTEM = <<~PROMPT
|
|
17
|
+
You are a context summarization assistant. Read the conversation and
|
|
18
|
+
produce only the structured summary you are asked for. Do not continue
|
|
19
|
+
the conversation and do not answer questions inside it.
|
|
20
|
+
PROMPT
|
|
21
|
+
|
|
22
|
+
FORMAT = <<~FORMAT
|
|
23
|
+
## Goal
|
|
24
|
+
[What is the user trying to accomplish?]
|
|
25
|
+
|
|
26
|
+
## Constraints & Preferences
|
|
27
|
+
- [Constraints or preferences the user stated, or "(none)"]
|
|
28
|
+
|
|
29
|
+
## Progress
|
|
30
|
+
### Done
|
|
31
|
+
- [x] [Completed work]
|
|
32
|
+
### In Progress
|
|
33
|
+
- [ ] [Current work]
|
|
34
|
+
### Blocked
|
|
35
|
+
- [Blockers, if any]
|
|
36
|
+
|
|
37
|
+
## Key Decisions
|
|
38
|
+
- **[Decision]**: [Rationale]
|
|
39
|
+
|
|
40
|
+
## Next Steps
|
|
41
|
+
1. [What should happen next]
|
|
42
|
+
|
|
43
|
+
## Critical Context
|
|
44
|
+
- [Data, names, or references needed to continue, or "(none)"]
|
|
45
|
+
|
|
46
|
+
Keep each section concise. Preserve exact identifiers, names, and error
|
|
47
|
+
messages.
|
|
48
|
+
FORMAT
|
|
49
|
+
|
|
50
|
+
CHECKPOINT_PROMPT = <<~PROMPT.freeze
|
|
51
|
+
The messages above are a conversation to summarize. Create a structured
|
|
52
|
+
context checkpoint that another LLM will use to continue the work.
|
|
53
|
+
|
|
54
|
+
Use this EXACT format:
|
|
55
|
+
|
|
56
|
+
#{FORMAT}
|
|
57
|
+
PROMPT
|
|
58
|
+
|
|
59
|
+
UPDATE_PROMPT = <<~PROMPT.freeze
|
|
60
|
+
The messages above are NEW conversation messages to fold into the
|
|
61
|
+
existing summary in <previous-summary> tags. Preserve everything still
|
|
62
|
+
relevant from the previous summary, add new progress and decisions,
|
|
63
|
+
move finished work to Done, and update Next Steps.
|
|
64
|
+
|
|
65
|
+
Use this EXACT format:
|
|
66
|
+
|
|
67
|
+
#{FORMAT}
|
|
68
|
+
PROMPT
|
|
69
|
+
|
|
70
|
+
class << self
|
|
71
|
+
# Summarize and cut. Returns {summary:, tokens_before:, tokens_after:,
|
|
72
|
+
# usage:}, or nil when there is nothing worth compacting. Emits
|
|
73
|
+
# :compacting and :compaction when a block is given.
|
|
74
|
+
def call(session:, provider:, settings: Compaction.new, &emit)
|
|
75
|
+
replay = session.replay
|
|
76
|
+
cut = cut_index(replay, session, settings)
|
|
77
|
+
return nil unless cut
|
|
78
|
+
|
|
79
|
+
previous = session.last_compaction&.fetch("summary", nil)
|
|
80
|
+
head = replay.take_while { |(_, index)| index.nil? || index < cut }.map(&:first)
|
|
81
|
+
head.shift if previous # the synthetic summary rides in <previous-summary>
|
|
82
|
+
return nil if head.empty?
|
|
83
|
+
|
|
84
|
+
emit&.call(Event.new(type: :compacting))
|
|
85
|
+
tokens_before = Compaction.context_tokens(replay.map(&:first))
|
|
86
|
+
reply = summarize(provider, head, previous, settings.instructions)
|
|
87
|
+
session.append("compaction", "summary" => reply.text,
|
|
88
|
+
"kept_from" => cut, "tokens_before" => tokens_before)
|
|
89
|
+
finish(session, reply, tokens_before, &emit)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def cut_index(replay, session, settings)
|
|
95
|
+
boundary = keep_boundary(replay, settings.keep_recent)
|
|
96
|
+
return nil unless boundary
|
|
97
|
+
|
|
98
|
+
candidates = replay.filter_map { |(message, index)| index if index && message.user? }
|
|
99
|
+
cut = candidates.find { |index| index >= boundary } || candidates.last
|
|
100
|
+
cut = clamp_to_open_approvals(cut, session)
|
|
101
|
+
return nil unless cut
|
|
102
|
+
|
|
103
|
+
first = replay.find { |(_, index)| index }&.last
|
|
104
|
+
first && cut > first ? cut : nil
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Walk back from the tail until the keep budget is spent; the cut then
|
|
108
|
+
# snaps forward to a user message, so replay keeps at most about
|
|
109
|
+
# keep_recent tokens of recent turns.
|
|
110
|
+
def keep_boundary(replay, keep_recent)
|
|
111
|
+
kept = 0
|
|
112
|
+
replay.reverse_each do |(message, index)|
|
|
113
|
+
kept += Compaction.estimate(message)
|
|
114
|
+
return index || 0 if kept >= keep_recent
|
|
115
|
+
end
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Never cut past a parked approval: its tool call must stay in replay,
|
|
120
|
+
# paired, for resume to answer.
|
|
121
|
+
def clamp_to_open_approvals(cut, session)
|
|
122
|
+
return cut unless cut
|
|
123
|
+
|
|
124
|
+
open_ids = session.open_approvals.map { |approval| approval[:call].id }
|
|
125
|
+
return cut if open_ids.empty?
|
|
126
|
+
|
|
127
|
+
turn_start = approval_turn_start(session.entries, open_ids)
|
|
128
|
+
turn_start && turn_start < cut ? turn_start : cut
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def approval_turn_start(entries, open_ids)
|
|
132
|
+
request = entries.index do |entry|
|
|
133
|
+
entry["type"] == "approval_request" && open_ids.include?(entry.dig("call", "id"))
|
|
134
|
+
end
|
|
135
|
+
return nil unless request
|
|
136
|
+
|
|
137
|
+
entries[0...request].rindex do |entry|
|
|
138
|
+
entry["type"] == "message" && entry.dig("message", "role") == "user"
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def summarize(provider, messages, previous, instructions)
|
|
143
|
+
prompt = "<conversation>\n#{serialize(messages)}\n</conversation>\n\n"
|
|
144
|
+
prompt << "<previous-summary>\n#{previous}\n</previous-summary>\n\n" if previous
|
|
145
|
+
prompt << (previous ? UPDATE_PROMPT : CHECKPOINT_PROMPT)
|
|
146
|
+
prompt << "\nAdditional focus: #{instructions}\n" if instructions
|
|
147
|
+
reply = provider.stream(messages: [Message.user(prompt)], system: SUMMARIZER_SYSTEM)
|
|
148
|
+
raise CompactionError, "summarization failed: #{reply.error_message}" unless usable?(reply)
|
|
149
|
+
|
|
150
|
+
reply
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def usable?(reply)
|
|
154
|
+
reply.stop_reason != StopReason::ERROR && !reply.text.to_s.strip.empty?
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def finish(session, reply, tokens_before, &emit)
|
|
158
|
+
tokens_after = Compaction.context_tokens(session.messages)
|
|
159
|
+
emit&.call(Event.new(type: :compaction, content: reply.text))
|
|
160
|
+
{ summary: reply.text, tokens_before: tokens_before,
|
|
161
|
+
tokens_after: tokens_after, usage: reply.usage }
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# The summarizer reads a plain-text rendering: tool calls by name and
|
|
165
|
+
# arguments, results as text, thinking never (it stays in its turn).
|
|
166
|
+
def serialize(messages)
|
|
167
|
+
messages.map { |message| "#{message.role.to_s.upcase}:\n#{text_of(message)}" }
|
|
168
|
+
.join("\n\n")
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def text_of(message)
|
|
172
|
+
message.content.filter_map do |block|
|
|
173
|
+
case block
|
|
174
|
+
when Content::Text then block.text
|
|
175
|
+
when Content::Image then "[image]"
|
|
176
|
+
when ToolCall then "[called #{block.name} with #{JSON.generate(block.arguments)}]"
|
|
177
|
+
end
|
|
178
|
+
end.join("\n")
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|