@possumtech/rummy 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.env.example +31 -5
  2. package/BENCH_ENVIRONMENT.md +230 -0
  3. package/CLIENT_INTERFACE.md +396 -0
  4. package/PLUGINS.md +93 -1
  5. package/SPEC.md +389 -28
  6. package/bin/postinstall.js +2 -2
  7. package/bin/rummy.js +2 -2
  8. package/last_run.txt +5617 -0
  9. package/migrations/001_initial_schema.sql +2 -1
  10. package/package.json +13 -9
  11. package/scriptify/ask_run.js +77 -0
  12. package/scriptify/cache_probe.js +66 -0
  13. package/scriptify/cache_probe_grok.js +74 -0
  14. package/service.js +22 -11
  15. package/src/agent/AgentLoop.js +62 -157
  16. package/src/agent/ContextAssembler.js +2 -9
  17. package/src/agent/Entries.js +54 -98
  18. package/src/agent/ProjectAgent.js +4 -11
  19. package/src/agent/TurnExecutor.js +48 -83
  20. package/src/agent/XmlParser.js +247 -273
  21. package/src/agent/budget.js +5 -28
  22. package/src/agent/config.js +38 -0
  23. package/src/agent/errors.js +7 -13
  24. package/src/agent/httpStatus.js +1 -19
  25. package/src/agent/known_queries.sql +1 -1
  26. package/src/agent/known_store.sql +12 -2
  27. package/src/agent/materializeContext.js +15 -18
  28. package/src/agent/pathEncode.js +5 -0
  29. package/src/agent/rummyHome.js +9 -0
  30. package/src/agent/runs.sql +37 -0
  31. package/src/agent/tokens.js +7 -7
  32. package/src/hooks/HookRegistry.js +1 -16
  33. package/src/hooks/Hooks.js +8 -33
  34. package/src/hooks/PluginContext.js +3 -21
  35. package/src/hooks/RpcRegistry.js +1 -4
  36. package/src/hooks/RummyContext.js +6 -16
  37. package/src/hooks/ToolRegistry.js +5 -15
  38. package/src/llm/LlmProvider.js +41 -33
  39. package/src/llm/errors.js +41 -4
  40. package/src/llm/openaiStream.js +125 -0
  41. package/src/llm/retry.js +109 -0
  42. package/src/plugins/budget/budget.js +55 -76
  43. package/src/plugins/cli/README.md +87 -0
  44. package/src/plugins/cli/bin.js +61 -0
  45. package/src/plugins/cli/cli.js +120 -0
  46. package/src/plugins/env/README.md +2 -1
  47. package/src/plugins/env/env.js +4 -6
  48. package/src/plugins/env/envDoc.md +2 -2
  49. package/src/plugins/error/error.js +23 -23
  50. package/src/plugins/file/file.js +2 -22
  51. package/src/plugins/get/get.js +12 -34
  52. package/src/plugins/get/getDoc.md +8 -6
  53. package/src/plugins/hedberg/edits.js +1 -11
  54. package/src/plugins/hedberg/hedberg.js +3 -26
  55. package/src/plugins/hedberg/normalize.js +1 -5
  56. package/src/plugins/hedberg/patterns.js +4 -15
  57. package/src/plugins/hedberg/sed.js +1 -7
  58. package/src/plugins/helpers.js +28 -20
  59. package/src/plugins/index.js +25 -41
  60. package/src/plugins/instructions/README.md +18 -0
  61. package/src/plugins/instructions/instructions.js +97 -38
  62. package/src/plugins/instructions/instructions.md +24 -15
  63. package/src/plugins/instructions/instructions_104.md +5 -4
  64. package/src/plugins/instructions/instructions_105.md +29 -36
  65. package/src/plugins/instructions/instructions_106.md +22 -0
  66. package/src/plugins/instructions/instructions_107.md +17 -0
  67. package/src/plugins/instructions/instructions_108.md +0 -8
  68. package/src/plugins/known/README.md +26 -6
  69. package/src/plugins/known/known.js +37 -34
  70. package/src/plugins/log/README.md +2 -2
  71. package/src/plugins/log/log.js +27 -34
  72. package/src/plugins/ollama/ollama.js +50 -66
  73. package/src/plugins/openai/openai.js +26 -44
  74. package/src/plugins/openrouter/openrouter.js +28 -52
  75. package/src/plugins/policy/README.md +8 -2
  76. package/src/plugins/policy/policy.js +8 -21
  77. package/src/plugins/prompt/README.md +22 -0
  78. package/src/plugins/prompt/prompt.js +14 -16
  79. package/src/plugins/rm/rm.js +5 -2
  80. package/src/plugins/rm/rmDoc.md +4 -4
  81. package/src/plugins/rpc/README.md +2 -1
  82. package/src/plugins/rpc/rpc.js +62 -48
  83. package/src/plugins/set/README.md +5 -1
  84. package/src/plugins/set/set.js +23 -33
  85. package/src/plugins/set/setDoc.md +1 -1
  86. package/src/plugins/sh/README.md +2 -1
  87. package/src/plugins/sh/sh.js +5 -11
  88. package/src/plugins/sh/shDoc.md +2 -2
  89. package/src/plugins/stream/README.md +6 -5
  90. package/src/plugins/stream/stream.js +6 -35
  91. package/src/plugins/telemetry/telemetry.js +26 -19
  92. package/src/plugins/think/think.js +4 -7
  93. package/src/plugins/unknown/unknown.js +8 -13
  94. package/src/plugins/update/update.js +42 -25
  95. package/src/plugins/update/updateDoc.md +3 -3
  96. package/src/plugins/xai/xai.js +30 -20
  97. package/src/plugins/yolo/yolo.js +159 -0
  98. package/src/server/ClientConnection.js +17 -47
  99. package/src/server/SocketServer.js +14 -14
  100. package/src/server/protocol.js +1 -10
  101. package/src/sql/functions/slugify.js +5 -7
  102. package/src/sql/v_model_context.sql +4 -11
  103. package/turns/cli_1777462658211/turn_001.txt +772 -0
  104. package/turns/cli_1777462658211/turn_002.txt +606 -0
  105. package/turns/cli_1777462658211/turn_003.txt +667 -0
  106. package/turns/cli_1777462658211/turn_004.txt +297 -0
  107. package/turns/cli_1777462658211/turn_005.txt +301 -0
  108. package/turns/cli_1777462658211/turn_006.txt +262 -0
  109. package/turns/cli_1777465095132/turn_001.txt +715 -0
  110. package/turns/cli_1777465095132/turn_002.txt +236 -0
  111. package/turns/cli_1777465095132/turn_003.txt +287 -0
  112. package/turns/cli_1777465095132/turn_004.txt +694 -0
  113. package/turns/cli_1777465095132/turn_005.txt +422 -0
  114. package/turns/cli_1777465095132/turn_006.txt +365 -0
  115. package/turns/cli_1777465095132/turn_007.txt +885 -0
  116. package/turns/cli_1777465095132/turn_008.txt +1277 -0
  117. package/turns/cli_1777465095132/turn_009.txt +736 -0
@@ -1,5 +1,6 @@
1
1
  import slugify from "../sql/functions/slugify.js";
2
2
  import { PermissionError } from "./errors.js";
3
+ import encodeSegment from "./pathEncode.js";
3
4
 
4
5
  export default class Entries {
5
6
  #db;
@@ -14,10 +15,7 @@ export default class Entries {
14
15
  this.#onChanged = onChanged;
15
16
  }
16
17
 
17
- /**
18
- * Populate the scheme cache. Can be called explicitly (e.g. at boot
19
- * after initPlugins finishes) or runs lazily on first need. Idempotent.
20
- */
18
+ // Populate the scheme cache; idempotent, lazy on first need.
21
19
  async loadSchemes(db) {
22
20
  const rows = await (db || this.#db).get_all_schemes.all();
23
21
  this.#schemes.clear();
@@ -38,6 +36,7 @@ export default class Entries {
38
36
  }
39
37
 
40
38
  static scheme(path) {
39
+ if (!path) return null;
41
40
  const idx = path.indexOf("://");
42
41
  return idx > 0 ? path.slice(0, idx) : null;
43
42
  }
@@ -50,9 +49,9 @@ export default class Entries {
50
49
  try {
51
50
  // Decode first (idempotent), then encode — but preserve slashes
52
51
  const decoded = decodeURIComponent(rest);
53
- return `${scheme}://${decoded.split("/").map(encodeURIComponent).join("/")}`;
52
+ return `${scheme}://${decoded.split("/").map(encodeSegment).join("/")}`;
54
53
  } catch {
55
- return `${scheme}://${rest.split("/").map(encodeURIComponent).join("/")}`;
54
+ return `${scheme}://${rest.split("/").map(encodeSegment).join("/")}`;
56
55
  }
57
56
  }
58
57
 
@@ -62,7 +61,7 @@ export default class Entries {
62
61
  }
63
62
 
64
63
  async dedup(runId, scheme, target, turn) {
65
- const encodedTarget = encodeURIComponent(target);
64
+ const encodedTarget = encodeSegment(target);
66
65
  const turnPrefix = turn ? `turn_${turn}/` : "";
67
66
  const candidate = `${scheme}://${turnPrefix}${encodedTarget}`;
68
67
  const existing = await this.#db.get_entry_body.get({
@@ -73,12 +72,15 @@ export default class Entries {
73
72
  return `${candidate}_${++this.#seq}`;
74
73
  }
75
74
 
76
- // Log entries share a single namespace at log://turn_N/action/slug.
77
- // The action segment is the tool/plugin name (set, get, search, update,
78
- // error, etc.). Target is URL-encoded so slashes and scheme separators
79
- // survive round-trips.
75
+ // Single namespace log://turn_N/action/slug; target URL-encoded for round-trip safety.
80
76
  async logPath(runId, turn, action, target) {
81
- const encodedTarget = encodeURIComponent(target);
77
+ // Cap target before encoding: the schema's CHECK(length(path) <= 2048)
78
+ // otherwise blows up when callers pass long error messages or other
79
+ // arbitrary text. encodeURIComponent expands ~3x for ASCII, more for
80
+ // Unicode; 150 raw chars stays comfortably under 2048 even after
81
+ // worst-case expansion. The full message belongs in body, not path.
82
+ const safeTarget = String(target).slice(0, 150);
83
+ const encodedTarget = encodeSegment(safeTarget);
82
84
  const candidate = `log://turn_${turn}/${action}/${encodedTarget}`;
83
85
  const existing = await this.#db.get_entry_body.get({
84
86
  run_id: runId,
@@ -89,9 +91,7 @@ export default class Entries {
89
91
  }
90
92
 
91
93
  async slugPath(runId, scheme, content, summary) {
92
- // Prefer summary, fall back to body content, then empty slugify
93
- // handles empty explicitly by returning "" and the caller generates
94
- // a sequence-only path.
94
+ // summary > content > empty; slugify("") yields "" and we sequence-only.
95
95
  let source = "";
96
96
  if (summary) source = summary;
97
97
  else if (content) source = content;
@@ -110,12 +110,7 @@ export default class Entries {
110
110
  return `${prefix}${base}_${++this.#seq}`;
111
111
  }
112
112
 
113
- /**
114
- * Resolve a scheme's declared scope kind + writer list + category.
115
- * Unregistered or declaration-less schemes default to run-level +
116
- * model/plugin writers so ad-hoc paths (e.g. bare filenames) still
117
- * work.
118
- */
113
+ // Scheme's scope/writers/category; bare paths default to run + model/plugin.
119
114
  async #schemeRules(scheme) {
120
115
  await this.#ensureSchemes();
121
116
  const row = scheme ? this.#schemes.get(scheme) : null;
@@ -153,17 +148,7 @@ export default class Entries {
153
148
  return `run:${runId}`;
154
149
  }
155
150
 
156
- /**
157
- * set — create or update an entry. The semantically wide primitive.
158
- *
159
- * Modes (selected by which options are present):
160
- * — write content: body given, state ∈ {proposed,streaming,resolved,failed,cancelled}
161
- * — change visibility only: visibility given, body omitted
162
- * — change state only: state given, body omitted (resolve a proposal)
163
- * — merge attributes: attributes given, body omitted
164
- * — append to body: append:true (streaming)
165
- * — pattern match: path contains wildcards or bodyFilter set
166
- */
151
+ // set — create or update an entry; see PLUGINS.md primitives.
167
152
  async set({
168
153
  runId,
169
154
  projectId = null,
@@ -184,14 +169,9 @@ export default class Entries {
184
169
  if (!runId) throw new Error("set: runId is required");
185
170
  if (!path) throw new Error("set: path is required");
186
171
 
187
- // Pattern mode is explicit (pattern: true) or implicit when a
188
- // body filter is supplied. The literal `*` character can appear
189
- // inside legitimate exact paths (e.g. rm://foo%2F* as a result
190
- // path for an rm against a pattern); we don't infer pattern mode
191
- // from the path alone.
172
+ // Pattern mode is explicit; never inferred from `*` in path.
192
173
  const isPattern = pattern === true || bodyFilter !== null;
193
174
 
194
- // Pattern mode: update matching entries (visibility / body / both).
195
175
  if (isPattern) {
196
176
  if (body != null && !append) {
197
177
  await this.#db.update_body_by_pattern.run({
@@ -278,14 +258,7 @@ export default class Entries {
278
258
  throw new PermissionError(scheme, writer, writers);
279
259
  }
280
260
  const scope = this.#resolveScope(kind, runId, projectId);
281
- // Log entries self-describe via `action` so consumers (renderer,
282
- // client UIs, tests) can read the action without parsing the
283
- // path. Only inject `action` when the caller passes attributes
284
- // — a null `attributes` means "don't touch existing" and the
285
- // SQL's COALESCE handles preservation on UPDATE. If we generated
286
- // `{action: m[1]}` for every null-attributes log write, every
287
- // body-only update to a log entry would clobber existing attrs
288
- // (command, summary, demotedCount, ...).
261
+ // Inject `action` only when caller passes attributes; null means COALESCE preserves existing.
289
262
  const effectiveAttributes = attributes ? { ...attributes } : null;
290
263
  if (scheme === "log" && effectiveAttributes) {
291
264
  const m = normalized.match(/^log:\/\/turn_\d+\/([^/]+)\//);
@@ -320,11 +293,7 @@ export default class Entries {
320
293
  }
321
294
  }
322
295
 
323
- /**
324
- * get — promote entry(ies) to visible visibility. Default visibility is
325
- * "visible"; pass visibility explicitly for a read-with-side-effect at
326
- * a different visibility (rare).
327
- */
296
+ // get — promote entry(ies); see PLUGINS.md primitives.
328
297
  async get({
329
298
  runId,
330
299
  turn = 0,
@@ -351,11 +320,7 @@ export default class Entries {
351
320
  this.#emitChanged(runId, path, "promote");
352
321
  }
353
322
 
354
- /**
355
- * rm — remove entry view(s). Matches single path or pattern; optional
356
- * bodyFilter narrows pattern matches. `filesOnly` restricts to bare
357
- * file-scheme entries (scheme IS NULL).
358
- */
323
+ // rm — remove entry view(s); see PLUGINS.md primitives.
359
324
  async rm({ runId, path, bodyFilter = null, filesOnly = false }) {
360
325
  if (!runId) throw new Error("rm: runId is required");
361
326
  if (!path) throw new Error("rm: path is required");
@@ -380,10 +345,7 @@ export default class Entries {
380
345
  this.#emitChanged(runId, path, "remove");
381
346
  }
382
347
 
383
- /**
384
- * cp — copy an entry to a new path. Source body becomes new body;
385
- * source view unchanged.
386
- */
348
+ // cp — copy an entry to a new path; see PLUGINS.md primitives.
387
349
  async cp({
388
350
  runId,
389
351
  turn = 0,
@@ -410,9 +372,7 @@ export default class Entries {
410
372
  });
411
373
  }
412
374
 
413
- /**
414
- * mv — rename an entry. Equivalent to cp + rm on source.
415
- */
375
+ // mv — rename (cp + rm).
416
376
  async mv({
417
377
  runId,
418
378
  turn = 0,
@@ -438,13 +398,7 @@ export default class Entries {
438
398
  await this.rm({ runId, path: from });
439
399
  }
440
400
 
441
- /**
442
- * update — once-per-turn lifecycle signal from the model (or plugin
443
- * speaking on its behalf). Writes to update://<slug> with body as the
444
- * content and attributes.status carrying the model's continuation code
445
- * (102 continue, 200/204 terminal, 422 can't-answer). Returns the
446
- * slug path.
447
- */
401
+ // update — once-per-turn lifecycle signal; see PLUGINS.md.
448
402
  async update({
449
403
  runId,
450
404
  turn = 0,
@@ -474,7 +428,12 @@ export default class Entries {
474
428
  runId,
475
429
  path,
476
430
  body = null,
477
- { limit = null, offset = null } = {},
431
+ {
432
+ limit = null,
433
+ offset = null,
434
+ since = null,
435
+ includeAuditSchemes = false,
436
+ } = {},
478
437
  ) {
479
438
  return this.#db.get_entries_by_pattern.all({
480
439
  run_id: runId,
@@ -482,6 +441,8 @@ export default class Entries {
482
441
  body: body ? body : null,
483
442
  limit,
484
443
  offset,
444
+ since,
445
+ include_audit_schemes: includeAuditSchemes ? 1 : null,
485
446
  });
486
447
  }
487
448
 
@@ -494,7 +455,16 @@ export default class Entries {
494
455
  }
495
456
  }
496
457
 
497
- waitForResolution(runId, path) {
458
+ async waitForResolution(runId, path) {
459
+ // Pre-check: yolo's synchronous resolver may have already flipped state, no drain will fire.
460
+ const current = await this.getState(runId, path);
461
+ if (
462
+ current &&
463
+ current.state !== "proposed" &&
464
+ current.state !== "streaming"
465
+ ) {
466
+ return;
467
+ }
498
468
  const normalized = Entries.normalizePath(path);
499
469
  const key = `${runId}:${normalized}`;
500
470
  return new Promise((resolve) => {
@@ -545,9 +515,7 @@ export default class Entries {
545
515
  return new Set(rows.map((r) => r.body));
546
516
  }
547
517
 
548
- /**
549
- * Unknown entries for a run, in DB order. Rows include path + body.
550
- */
518
+ // Unknown entries in DB order; rows include path + body.
551
519
  async getUnknowns(runId) {
552
520
  return this.#db.get_unknowns.all({ run_id: runId });
553
521
  }
@@ -559,14 +527,14 @@ export default class Entries {
559
527
  });
560
528
  }
561
529
 
562
- /**
563
- * Demote all promoted entries for a run on a given turn. Returns the
564
- * affected rows (path, tokens) so callers can summarize.
565
- *
566
- * Implemented as SELECT-then-UPDATE because SQLite's RETURNING doesn't
567
- * support the cross-table lookup needed to report content paths/tokens
568
- * from the view-layer update.
569
- */
530
+ async archivePriorPromptArtifacts(runId, currentTurn) {
531
+ await this.#db.archive_prior_prompt_artifacts.run({
532
+ run_id: runId,
533
+ current_turn: currentTurn,
534
+ });
535
+ }
536
+
537
+ // SELECT-then-UPDATE: SQLite RETURNING can't cross to the view layer.
570
538
  async demoteTurnEntries(runId, turn) {
571
539
  const targets = await this.#db.get_turn_demotion_targets.all({
572
540
  run_id: runId,
@@ -576,14 +544,7 @@ export default class Entries {
576
544
  return targets;
577
545
  }
578
546
 
579
- /**
580
- * Demote every currently-visible entry in a run. Used by budget
581
- * postDispatch as the fallback when this-turn demotion finds nothing
582
- * and the packet still overflows — left-over promotions from prior
583
- * turns the model didn't demote themselves. Returns the affected
584
- * rows (path, tokens, turn) ordered oldest promotion first so the
585
- * error body can name them.
586
- */
547
+ // Budget postDispatch fallback: demote every visible entry in the run.
587
548
  async demoteRunVisibleEntries(runId) {
588
549
  const targets = await this.#db.get_run_visible_targets.all({
589
550
  run_id: runId,
@@ -592,17 +553,12 @@ export default class Entries {
592
553
  return targets;
593
554
  }
594
555
 
595
- /**
596
- * Run metadata lookup. Exposed here so plugins don't reach into
597
- * core.db for run-scoped lookups.
598
- */
556
+ // Plugin-facing run lookup; avoids reaching into core.db.
599
557
  async getRun(runId) {
600
558
  return this.#db.get_run_by_id.get({ id: runId });
601
559
  }
602
560
 
603
- /**
604
- * Turn-level usage stats write (telemetry). Same rationale as getRun.
605
- */
561
+ // Plugin-facing turn-stats write.
606
562
  async updateTurnStats(stats) {
607
563
  return this.#db.update_turn_stats.run(stats);
608
564
  }
@@ -83,14 +83,11 @@ export default class ProjectAgent {
83
83
  return this.#agentLoop.resolve(run, resolution);
84
84
  }
85
85
 
86
- async inject(run, message, mode) {
87
- return this.#agentLoop.inject(run, message, mode);
86
+ async inject(run, message, mode, options = {}) {
87
+ return this.#agentLoop.inject(run, message, mode, options);
88
88
  }
89
89
 
90
- // Synchronously create (or fork) a run row and return the alias.
91
- // Caller is expected to follow up with a kickoff (ask/act) that
92
- // operates on the returned alias. Lets RPC respond with the real
93
- // alias before the long-running loop starts.
90
+ // Create/fork the run row synchronously; caller follows up with ask/act.
94
91
  async ensureRun(projectId, model, run, prompt, options = {}) {
95
92
  return this.#agentLoop.ensureRun(projectId, model, run, prompt, options);
96
93
  }
@@ -103,11 +100,7 @@ export default class ProjectAgent {
103
100
  this.#agentLoop.abort(runId);
104
101
  }
105
102
 
106
- /**
107
- * Abort every in-flight run and wait for them to settle. Called
108
- * from the server's close path so the Node event loop isn't held
109
- * open by detached kickoff Promises after shutdown.
110
- */
103
+ // Abort all in-flight runs and drain so the event loop can exit.
111
104
  async shutdown() {
112
105
  await this.#agentLoop.abortAll();
113
106
  }
@@ -1,5 +1,6 @@
1
1
  import RummyContext from "../hooks/RummyContext.js";
2
2
  import { ContextExceededError } from "../llm/errors.js";
3
+ import { PermissionError } from "./errors.js";
3
4
  import materializeContext from "./materializeContext.js";
4
5
  import XmlParser from "./XmlParser.js";
5
6
 
@@ -30,6 +31,7 @@ export default class TurnExecutor {
30
31
  noWeb,
31
32
  noInteraction,
32
33
  noProposals,
34
+ yolo,
33
35
  toolSet,
34
36
  contextSize,
35
37
  options,
@@ -43,7 +45,6 @@ export default class TurnExecutor {
43
45
  sequence: turn,
44
46
  });
45
47
 
46
- // Build RummyContext before turn.started so plugins can write entries
47
48
  const rummy = new RummyContext(
48
49
  {
49
50
  tag: "turn",
@@ -70,13 +71,13 @@ export default class TurnExecutor {
70
71
  noWeb,
71
72
  noInteraction,
72
73
  noProposals,
74
+ yolo,
73
75
  toolSet,
74
76
  contextSize,
75
77
  systemPrompt: null,
76
78
  loopPrompt,
77
79
  },
78
80
  );
79
- // Plugins write prompt/instructions entries
80
81
  await this.#hooks.turn.started.emit({
81
82
  rummy,
82
83
  mode,
@@ -87,12 +88,9 @@ export default class TurnExecutor {
87
88
 
88
89
  await this.#hooks.processTurn(rummy);
89
90
 
90
- // Project instructions://system through the instructions tool's projection
91
91
  const systemPrompt =
92
92
  await this.#hooks.instructions.resolveSystemPrompt(rummy);
93
93
 
94
- // Materialize turn_context: VIEW rows projected through tools
95
- const demoted = [];
96
94
  const budgetCtx = {
97
95
  runId: currentRunId,
98
96
  loopId: currentLoopId,
@@ -100,7 +98,6 @@ export default class TurnExecutor {
100
98
  systemPrompt,
101
99
  mode,
102
100
  toolSet,
103
- demoted,
104
101
  loopIteration,
105
102
  };
106
103
  const initial = await materializeContext({
@@ -116,13 +113,6 @@ export default class TurnExecutor {
116
113
  rowCount: initial.rows.length,
117
114
  });
118
115
 
119
- await this.#hooks.run.progress.emit({
120
- projectId,
121
- run: currentAlias,
122
- turn,
123
- status: "thinking",
124
- });
125
-
126
116
  const budgetResult = await this.#hooks.budget.enforce({
127
117
  contextSize,
128
118
  messages: initial.messages,
@@ -156,15 +146,19 @@ export default class TurnExecutor {
156
146
  turn,
157
147
  });
158
148
 
159
- // Call LLM. Transient-error retry + context-exceeded detection live
160
- // in LlmProvider; context-exceeded surfaces as ContextExceededError.
161
149
  await this.#hooks.llm.request.started.emit({ model: requestedModel, turn });
162
150
  let rawResult;
163
151
  try {
164
152
  rawResult = await this.#llmProvider.completion(
165
153
  filteredMessages,
166
154
  requestedModel,
167
- { temperature: options?.temperature, signal },
155
+ {
156
+ temperature: options?.temperature,
157
+ signal,
158
+ // Per-run stable identifier for provider-side prompt caching
159
+ // (xAI prompt_cache_key, OpenAI prompt_cache_key, etc.).
160
+ runAlias: runRow?.alias || `run_${currentRunId}`,
161
+ },
168
162
  );
169
163
  } catch (err) {
170
164
  if (err instanceof ContextExceededError) {
@@ -199,19 +193,8 @@ export default class TurnExecutor {
199
193
  usage: result.usage,
200
194
  });
201
195
  const responseMessage = result.choices?.[0]?.message;
202
- // A valid completion response always carries content (possibly
203
- // empty) on the message; protect against that specific case so
204
- // downstream parsers see a string.
205
196
  const content = responseMessage?.content ? responseMessage.content : "";
206
197
 
207
- await this.#hooks.run.progress.emit({
208
- projectId,
209
- run: currentAlias,
210
- turn,
211
- status: "processing",
212
- });
213
-
214
- // Parse and emit — plugins handle audit storage
215
198
  const { commands, warnings, unparsed } = XmlParser.parse(content);
216
199
  for (const w of warnings) {
217
200
  await this.#hooks.error.log.emit({
@@ -223,7 +206,7 @@ export default class TurnExecutor {
223
206
  status: 422,
224
207
  });
225
208
  }
226
- if (commands.length === 0 && !!unparsed?.trim() && warnings.length === 0) {
209
+ if (commands.length === 0 && unparsed?.trim() && warnings.length === 0) {
227
210
  await this.#hooks.error.log.emit({
228
211
  store: this.#entries,
229
212
  runId: currentRunId,
@@ -234,10 +217,7 @@ export default class TurnExecutor {
234
217
  });
235
218
  }
236
219
 
237
- // Merge reasoning contributions from subscribers (think plugin's
238
- // <think> tag, other plugin reasoning sources). Filter starts with
239
- // the API-provided reasoning_content and layers on each plugin's
240
- // contribution.
220
+ // Layer plugin reasoning contributions onto the API-provided seed.
241
221
  if (responseMessage) {
242
222
  const seed = responseMessage.reasoning_content
243
223
  ? responseMessage.reasoning_content
@@ -262,7 +242,7 @@ export default class TurnExecutor {
262
242
  userMsg: userMsg?.content,
263
243
  });
264
244
 
265
- // --- PHASE 1: RECORD ---
245
+ // PHASE 1: RECORD
266
246
  const recorded = [];
267
247
  for (const cmd of commands) {
268
248
  const entry = await this.#record(
@@ -275,14 +255,7 @@ export default class TurnExecutor {
275
255
  if (entry) recorded.push(entry);
276
256
  }
277
257
 
278
- // --- PHASE 2: DISPATCH ---
279
- // Sequential queue. Each tool completes before the next starts.
280
- // On failure: abort remaining. On proposal: notify client, await
281
- // resolution, continue.
282
- // Narration text outside tags is fine when the turn also emitted
283
- // at least one command — "OK", "Let me check:", reasoning prefixes
284
- // are natural. Parse warnings and no-tags responses already emitted
285
- // errors above; dispatch crashes and failed entries emit below.
258
+ // PHASE 2: DISPATCH — sequential; abort-after-failure; proposals notify-and-await.
286
259
  let abortAfter = null;
287
260
 
288
261
  for (const entry of recorded) {
@@ -307,6 +280,21 @@ export default class TurnExecutor {
307
280
  try {
308
281
  await this.#hooks.tools.dispatch(entry.scheme, entry, rummy);
309
282
  } catch (dispatchErr) {
283
+ // PermissionError is the model attempting a documented-forbidden
284
+ // write (e.g. <set path="prompt://1"> with body). Surface as a
285
+ // soft 403 so the model can adjust on the next turn; do not
286
+ // abort sibling entries — the rest of the turn was valid.
287
+ if (dispatchErr instanceof PermissionError) {
288
+ await this.#hooks.error.log.emit({
289
+ store: this.#entries,
290
+ runId: currentRunId,
291
+ turn,
292
+ loopId: currentLoopId,
293
+ message: dispatchErr.message,
294
+ status: 403,
295
+ });
296
+ continue;
297
+ }
310
298
  await this.#hooks.error.log.emit({
311
299
  store: this.#entries,
312
300
  runId: currentRunId,
@@ -321,53 +309,31 @@ export default class TurnExecutor {
321
309
  await this.#hooks.tool.after.emit({ entry, rummy });
322
310
  await this.#hooks.entry.created.emit(entry);
323
311
 
324
- // Plugins (e.g. set) materialize pending proposals from the
325
- // recorded entry — e.g. search/replace revisions → set:// 202.
312
+ // Plugins materialize pending proposals (e.g. set search/replace 202).
326
313
  await this.#hooks.proposal.prepare.emit({ rummy, recorded: [entry] });
327
314
 
328
- // Check for any proposals created by this entry's dispatch
329
315
  const proposed = await this.#entries.getUnresolved(currentRunId);
330
316
  for (const p of proposed) {
331
317
  await this.#hooks.proposal.pending.emit({
332
318
  projectId,
333
319
  run: currentAlias,
334
320
  proposed: [p],
321
+ rummy,
335
322
  });
336
323
  await this.#entries.waitForResolution(currentRunId, p.path);
337
324
  const resolved = await this.#entries.getState(currentRunId, p.path);
338
- if (resolved?.status >= 400) {
339
- await this.#hooks.error.log.emit({
340
- store: this.#entries,
341
- runId: currentRunId,
342
- turn,
343
- loopId: currentLoopId,
344
- message: `Proposal ${p.path} rejected: status ${resolved.status}.`,
345
- status: resolved.status,
346
- });
347
- abortAfter = entry.scheme;
348
- }
325
+ // Failure surfaces in the proposal entry itself; abort cascade
326
+ // triggers the trailing-action "Aborted — preceding <X>" body.
327
+ if (resolved?.status >= 400) abortAfter = entry.scheme;
349
328
  }
350
329
 
351
330
  if (!abortAfter) {
352
331
  const entryPath = entry.resultPath || entry.path;
353
332
  const row = await this.#entries.getState(currentRunId, entryPath);
354
- if (row?.status >= 400) {
355
- await this.#hooks.error.log.emit({
356
- store: this.#entries,
357
- runId: currentRunId,
358
- turn,
359
- loopId: currentLoopId,
360
- message: `Entry ${entryPath} failed: status ${row.status}.`,
361
- status: row.status,
362
- });
363
- abortAfter = entry.scheme;
364
- }
333
+ if (row?.status >= 400) abortAfter = entry.scheme;
365
334
  }
366
335
  }
367
336
 
368
- // Turn Demotion: budget plugin re-materializes end-of-turn context
369
- // and demotes this turn's promoted entries on overflow. Overflow
370
- // emits an error (status 413) via the unified error channel.
371
337
  await this.#hooks.budget.postDispatch({
372
338
  contextSize,
373
339
  ctx: budgetCtx,
@@ -406,21 +372,14 @@ export default class TurnExecutor {
406
372
  return turnResult;
407
373
  }
408
374
 
409
- /**
410
- * Record a parsed command as a known_entries row.
411
- * Returns the recorded entry descriptor, or null if rejected/skipped.
412
- */
375
+ // Record a parsed command; returns the entry descriptor or rejects on bad shapes.
413
376
  async #record(runId, loopId, turn, mode, cmd) {
414
377
  const scheme = cmd.name;
415
- // Each tool's XmlParser shape surfaces exactly one of these
416
- // three fields as its addressable target. Treat absent as empty
417
- // so the length/control-char validation below catches bad shapes
418
- // rather than letting an undefined slip through.
419
378
  let rawTarget = "";
420
379
  if (cmd.path) rawTarget = cmd.path;
421
380
  else if (cmd.command) rawTarget = cmd.command;
422
381
  else if (cmd.question) rawTarget = cmd.question;
423
- // Reject paths that are likely reasoning bleed too long or contain non-printing chars
382
+ // Reject likely reasoning bleed: oversize or control chars in target.
424
383
  if (rawTarget.length > 512 || /\p{Cc}/u.test(rawTarget)) {
425
384
  const rejectPath = await this.#entries.logPath(
426
385
  runId,
@@ -451,18 +410,14 @@ export default class TurnExecutor {
451
410
  const target = rawTarget;
452
411
  const resultPath = await this.#entries.logPath(runId, turn, scheme, target);
453
412
 
454
- // Pass parsed command fields through as attributes
455
413
  const { name: _, ...attributes } = cmd;
456
414
  if (cmd.path) attributes.path = target;
457
415
 
458
- // Same per-shape resolution as rawTarget; the three sources are
459
- // mutually exclusive per tool. Empty string when none set.
460
416
  let body = "";
461
417
  if (cmd.body) body = cmd.body;
462
418
  else if (cmd.command) body = cmd.command;
463
419
  else if (cmd.question) body = cmd.question;
464
420
 
465
- // Filter: plugins can validate/transform before recording
466
421
  const filtered = await this.#hooks.entry.recording.filter(
467
422
  {
468
423
  scheme,
@@ -475,7 +430,17 @@ export default class TurnExecutor {
475
430
  { store: this.#entries, runId, turn, loopId, mode },
476
431
  );
477
432
  if (filtered.state === "failed" || filtered.state === "cancelled") {
478
- return filtered;
433
+ await this.#entries.set({
434
+ runId,
435
+ turn,
436
+ loopId,
437
+ path: filtered.path,
438
+ body: filtered.body,
439
+ state: filtered.state,
440
+ outcome: filtered.outcome,
441
+ attributes: filtered.attributes,
442
+ });
443
+ return { ...filtered, resultPath: filtered.path };
479
444
  }
480
445
 
481
446
  return {