@possumtech/rummy 2.2.1 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/package.json +14 -6
  2. package/service.js +18 -10
  3. package/src/agent/AgentLoop.js +2 -11
  4. package/src/agent/ContextAssembler.js +34 -3
  5. package/src/agent/Entries.js +16 -89
  6. package/src/agent/ProjectAgent.js +1 -16
  7. package/src/agent/TurnExecutor.js +12 -52
  8. package/src/agent/XmlParser.js +30 -117
  9. package/src/agent/errors.js +3 -22
  10. package/src/agent/materializeContext.js +3 -11
  11. package/src/hooks/Hooks.js +0 -29
  12. package/src/lib/hedberg/hedberg.js +4 -14
  13. package/src/lib/hedberg/marker.js +15 -59
  14. package/src/llm/LlmProvider.js +13 -26
  15. package/src/llm/errors.js +3 -11
  16. package/src/llm/openaiStream.js +6 -46
  17. package/src/plugins/ask_user/ask_user.js +12 -17
  18. package/src/plugins/budget/README.md +46 -8
  19. package/src/plugins/budget/budget.js +23 -42
  20. package/src/plugins/cp/cp.js +28 -18
  21. package/src/plugins/env/env.js +11 -7
  22. package/src/plugins/error/error.js +8 -37
  23. package/src/plugins/get/get.js +42 -24
  24. package/src/plugins/google/google.js +23 -3
  25. package/src/plugins/helpers.js +34 -50
  26. package/src/plugins/instructions/README.md +2 -2
  27. package/src/plugins/instructions/instructions-user.md +1 -1
  28. package/src/plugins/instructions/instructions.js +19 -6
  29. package/src/plugins/known/known.js +1 -8
  30. package/src/plugins/log/log.js +15 -1
  31. package/src/plugins/mv/mv.js +29 -19
  32. package/src/plugins/persona/persona.js +4 -4
  33. package/src/plugins/prompt/README.md +1 -1
  34. package/src/plugins/prompt/prompt.js +1 -1
  35. package/src/plugins/rm/rm.js +26 -15
  36. package/src/plugins/rm/rmDoc.md +0 -2
  37. package/src/plugins/set/set.js +37 -84
  38. package/src/plugins/set/setDoc.md +16 -16
  39. package/src/plugins/sh/sh.js +10 -8
  40. package/src/plugins/skill/skillDoc.md +1 -1
  41. package/src/plugins/unknown/README.md +1 -1
  42. package/src/plugins/unknown/unknown.js +2 -6
  43. package/src/plugins/update/update.js +3 -2
  44. package/src/plugins/update/updateDoc.md +1 -1
  45. package/.env.example +0 -152
  46. package/.xai.key +0 -1
  47. package/PLUGINS.md +0 -962
  48. package/SPEC.md +0 -1897
  49. package/biome/no-fallbacks.grit +0 -50
  50. package/gemini.key +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@possumtech/rummy",
3
- "version": "2.2.1",
3
+ "version": "2.3.1",
4
4
  "description": "Relational Unknowns Memory Management Yoke",
5
5
  "keywords": [
6
6
  "llm"
@@ -28,6 +28,14 @@
28
28
  },
29
29
  "type": "module",
30
30
  "main": "service.js",
31
+ "files": [
32
+ "service.js",
33
+ "bin/",
34
+ "src/",
35
+ "migrations/",
36
+ "lang/",
37
+ "!**/*.test.js"
38
+ ],
31
39
  "scripts": {
32
40
  "postinstall": "node ./bin/postinstall.js",
33
41
  "start": "node --env-file-if-exists=.env.example --env-file-if-exists=.env service.js",
@@ -53,15 +61,15 @@
53
61
  "test:lme:clean": "rm -rf test/lme/results/*/",
54
62
  "test:swe:clean": "rm -rf test/swe/results/*/ test/swe/repos/",
55
63
  "test:tbench:setup": "bash -c 'set -a; source .env.tbench; set +a; bash test/tbench/setup.sh'",
56
- "test:tbench": "bash -c 'echo \"Specify a profile: test:tbench:xfast | :gemma | :xfast_or\" >&2 && exit 64'",
57
- "test:tbench:xfast": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench --env-file-if-exists=.env.tbench.xfast test/tbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/tbench_xfast_$(date +%Y%m%dT%H%M%S).log' --",
64
+ "test:tbench": "bash -c 'echo \"Specify a profile: test:tbench:grok | :gemma\" >&2 && exit 64'",
58
65
  "test:tbench:gemma": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench --env-file-if-exists=.env.tbench.gemma test/tbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/tbench_gemma_$(date +%Y%m%dT%H%M%S).log' --",
59
- "test:tbench:xfast_or": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench --env-file-if-exists=.env.tbench.xfast_or test/tbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/tbench_xfast_or_$(date +%Y%m%dT%H%M%S).log' --",
60
- "test:tbench:g43": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench --env-file-if-exists=.env.tbench.g43 test/tbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/tbench_g43_$(date +%Y%m%dT%H%M%S).log' --",
66
+ "test:tbench:grok": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench --env-file-if-exists=.env.tbench.grok test/tbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/tbench_grok_$(date +%Y%m%dT%H%M%S).log' --",
61
67
  "test:tbench:clean": "rm -rf test/tbench/results/*/",
62
68
  "test:tbench:summary": "node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench test/tbench/summarize.js",
63
69
  "test:programbench:setup": "bash test/programbench/setup.sh",
64
- "test:programbench": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.tbench --env-file-if-exists=.env.tbench.gemma test/programbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/programbench_$(date +%Y%m%dT%H%M%S).log' --",
70
+ "test:programbench": "bash -c 'echo \"Specify a profile: test:programbench:grok | :gemma\" >&2 && exit 64'",
71
+ "test:programbench:grok": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.programbench.grok test/programbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/programbench_grok_$(date +%Y%m%dT%H%M%S).log' --",
72
+ "test:programbench:gemma": "bash -c 'set -o pipefail; mkdir -p /tmp/rummy_test_diag && node --env-file-if-exists=.env.example --env-file-if-exists=.env --env-file-if-exists=.env.programbench.gemma test/programbench/runner.js \"$@\" 2>&1 | tee /tmp/rummy_test_diag/programbench_gemma_$(date +%Y%m%dT%H%M%S).log' --",
65
73
  "test:programbench:eval": "bash -c 'cd test/programbench && . .venv/bin/activate && programbench eval \"$@\"' --",
66
74
  "test:programbench:clean": "rm -rf test/programbench/results/*/",
67
75
  "test:clear": "rm -rf /tmp/rummy_test_diag /tmp/rummy_test_*.db /tmp/rummy_test_*.db-shm /tmp/rummy_test_*.db-wal /tmp/rummy-stories-*",
package/service.js CHANGED
@@ -112,24 +112,32 @@ async function main() {
112
112
  // 6. Initialize plugins (register schemes)
113
113
  await initPlugins(db, hooks, pluginInstances);
114
114
 
115
- // 7. Bootstrap models from env vars
115
+ // 7. Reconcile models to env. The env cascade is the single source of
116
+ // truth for app configuration: every `RUMMY_MODEL_<alias>=...` becomes
117
+ // (or refreshes) a row in the `models` table, and every row whose
118
+ // alias is NOT in the current env is dropped. No accumulated cruft
119
+ // from prior sessions; no surprises at the CLI surface.
116
120
  {
117
- const modelAliases = [];
121
+ const envAliases = new Set();
118
122
  for (const key of Object.keys(process.env)) {
119
123
  if (!key.startsWith("RUMMY_MODEL_")) continue;
120
124
  const alias = key.replace("RUMMY_MODEL_", "");
121
125
  const actual = process.env[key];
122
126
  const contextEnv = process.env[`RUMMY_CONTEXT_${alias}`];
123
127
  const context_length = contextEnv ? Number.parseInt(contextEnv, 10) : null;
124
- await db.upsert_model.get({
125
- alias,
126
- actual,
127
- context_length,
128
- });
129
- modelAliases.push(alias);
128
+ await db.upsert_model.get({ alias, actual, context_length });
129
+ envAliases.add(alias);
130
130
  }
131
- if (modelAliases.length > 0) {
132
- console.log(`[RUMMY] Models: ${modelAliases.join(", ")}`);
131
+ const dbRows = await db.get_models.all({ limit: null, offset: null });
132
+ for (const row of dbRows) {
133
+ if (!envAliases.has(row.alias)) {
134
+ await db.delete_model.run({ alias: row.alias });
135
+ }
136
+ }
137
+ if (envAliases.size > 0) {
138
+ console.log(
139
+ `[RUMMY] Models: ${[...envAliases].toSorted().join(", ")}`,
140
+ );
133
141
  }
134
142
  }
135
143
 
@@ -156,11 +156,7 @@ export default class AgentLoop {
156
156
  context_limit: contextLimit,
157
157
  });
158
158
  await this.#entries.forkEntries(existingRun.id, runRow.id);
159
- // Absolute turn numbering across the lineage; SPEC
160
- // §budget_enforcement. Without this, the fork's first
161
- // dispatch lands at turn 1 while inherited run_views carry
162
- // parent-side turn values, and the budget grinder's
163
- // `current_turn − 1` rule sees nothing meaningful.
159
+ // Absolute turn numbering across the lineage; SPEC §budget_enforcement.
164
160
  await this.#entries.setNextTurn(runRow.id, existingRun.next_turn);
165
161
  await this.#writeRunEntry(runRow.id, alias, prompt, {
166
162
  projectId,
@@ -693,14 +689,9 @@ export default class AgentLoop {
693
689
 
694
690
  const nextTurn = runRow.next_turn;
695
691
 
696
- // Resolve the owning loop_id BEFORE writing the prompt entry so
697
- // it lands with correct loop scope. Active run → reuse the
698
- // running loop; otherwise enqueue the next loop and write the
699
- // prompt with the new loop's id.
692
+ // Resolve loop_id before writing the prompt entry so loop scope is correct.
700
693
  let loopId;
701
694
  if (this.#activeRuns.has(runRow.id)) {
702
- // Active runs have exactly one loop at status=102 by the
703
- // loops table invariant — trust the contract.
704
695
  const currentLoop = await this.#db.get_current_loop.get({
705
696
  run_id: runRow.id,
706
697
  });
@@ -1,4 +1,9 @@
1
- // Orchestrates assembly.system / assembly.user filter chains; plugins do all rendering.
1
+ import {
2
+ ceiling,
3
+ computePacketTokens,
4
+ substituteBudgetPlaceholders,
5
+ } from "../plugins/budget/budget.js";
6
+
2
7
  export default class ContextAssembler {
3
8
  static async assembleFromTurnContext(
4
9
  rows,
@@ -13,7 +18,6 @@ export default class ContextAssembler {
13
18
  } = {},
14
19
  hooks,
15
20
  ) {
16
- // Loop boundary from active prompt; absent on turn 1 before prompt plugin's turn.started.
17
21
  const promptEntry = rows.findLast(
18
22
  (r) => r.category === "prompt" && r.scheme === "prompt",
19
23
  );
@@ -32,7 +36,34 @@ export default class ContextAssembler {
32
36
  };
33
37
 
34
38
  const system = await hooks.assembly.system.filter(systemPrompt, ctx);
35
- const user = await hooks.assembly.user.filter("", ctx);
39
+ const userWithPlaceholders = await hooks.assembly.user.filter("", ctx);
40
+
41
+ // Iterate to a fixed point: substituted numbers are shorter than the
42
+ // placeholders, so the re-measured packet shifts slightly. Converges
43
+ // in 1-2 passes (only the digit-count varies). SPEC §token_accounting.
44
+ let tokenUsage = computePacketTokens({
45
+ system,
46
+ user: userWithPlaceholders,
47
+ });
48
+ let tokensFree = contextSize
49
+ ? Math.max(0, ceiling(contextSize) - tokenUsage)
50
+ : 0;
51
+ let user = substituteBudgetPlaceholders(userWithPlaceholders, {
52
+ tokenUsage,
53
+ tokensFree,
54
+ });
55
+ for (let i = 0; i < 5; i++) {
56
+ const measured = computePacketTokens({ system, user });
57
+ if (measured === tokenUsage) break;
58
+ tokenUsage = measured;
59
+ tokensFree = contextSize
60
+ ? Math.max(0, ceiling(contextSize) - tokenUsage)
61
+ : 0;
62
+ user = substituteBudgetPlaceholders(userWithPlaceholders, {
63
+ tokenUsage,
64
+ tokensFree,
65
+ });
66
+ }
36
67
 
37
68
  return [
38
69
  { role: "system", content: system },
@@ -2,18 +2,10 @@ import slugify from "../sql/functions/slugify.js";
2
2
  import { EntryOverflowError, PermissionError } from "./errors.js";
3
3
  import encodeSegment from "./pathEncode.js";
4
4
 
5
- // Update entry bodies are promised ≤ 80 chars to clients (run summary
6
- // payload, model-facing <log> rendering). Mirror of SUMMARY_MAX_CHARS:
7
- // the boundary chops + emits a soft error so the violation is visible
8
- // without crashing the run. Lives here because Entries.update is the
9
- // canonical persistence boundary all callers fund-route through.
10
5
  const UPDATE_BODY_MAX = 80;
11
6
 
12
- // SQLite surfaces the CHECK as either err.code === "SQLITE_CONSTRAINT_CHECK"
13
- // or an Error whose message names the failing column. Both forms appear in
14
- // the wild depending on the driver build, so we match defensively.
15
- // Caller-side contract: only invoked from a SQL try/catch, so err is always
16
- // an Error instance — err.message is a string (possibly empty), not undefined.
7
+ // SQLite surfaces the body-length CHECK as either an error code or message;
8
+ // match both because the driver build varies in the wild.
17
9
  function isBodyOverflow(err) {
18
10
  if (!err) return false;
19
11
  if (err.code === "SQLITE_CONSTRAINT_CHECK") return true;
@@ -26,16 +18,10 @@ function translateBodyOverflow(err, path, body) {
26
18
  return new EntryOverflowError(path, size);
27
19
  }
28
20
 
29
- // Already-an-error path: log://turn_N/error/<slug>. The auto-failure
30
- // hook below skips these to break the recursion (error.log.emit's
31
- // handler ALSO writes state=failed when materializing its own entry).
21
+ // Skipped by the auto-failure hook to break recursion (error.log emits its own).
32
22
  const ERROR_PATH_RE = /^log:\/\/turn_\d+\/error\//;
33
23
 
34
- // Streaming data channels for env/sh actions (env://turn_N/cmd_K,
35
- // sh://turn_N/cmd_K). Their failure is already captured by the parent
36
- // log://turn_N/<scheme>/<slug> action entry's auto-emit; emitting again
37
- // for each channel produces redundant duplicates with empty-body
38
- // fallback messages.
24
+ // Stream channels failure already captured by the parent action entry.
39
25
  const CHANNEL_PATH_RE = /^(env|sh):\/\/turn_\d+\//;
40
26
 
41
27
  export default class Entries {
@@ -49,23 +35,10 @@ export default class Entries {
49
35
  #seq = 0;
50
36
  #pendingResolutions = new Map();
51
37
 
52
- // onError is the centralized site for storage-layer rejections that
53
- // should surface to the model as strikes rather than crash the run.
54
- // Today: EntryOverflowError (RUMMY_ENTRY_SIZE_MAX CHECK violations).
55
- // When onError is supplied, set() catches the typed error, dispatches
56
- // it to the callback (which emits hooks.error.log → 413 strike), and
57
- // returns silently — callers don't need to handle storage-layer
58
- // rejections at every write site. When onError is null (e.g. unit
59
- // tests with a bare Entries), the error propagates as before.
60
- //
61
- // onFailed is the universal failure-rendering enforcer: every
62
- // transition to state="failed" on a non-error path fires this
63
- // callback so a SEPARATE log://turn_N/error/<slug> entry is created
64
- // alongside the action entry. Without this, plugins that record
65
- // failure via entries.set({state: "failed", ...}) leave nothing for
66
- // the model to recognize as an error — failure encodes only as tiny
67
- // JSON metadata indistinguishable from a successful entry. The
68
- // callback wires to hooks.error.log.emit (see ProjectAgent).
38
+ // onError: catches storage-layer rejections (EntryOverflowError) and routes
39
+ // to error.log strike; callers don't handle at each write site.
40
+ // onFailed: every state="failed" on a non-error path fires this so a
41
+ // sibling log://turn_N/error/ entry materializes (model-facing).
69
42
  constructor(
70
43
  db,
71
44
  {
@@ -82,7 +55,6 @@ export default class Entries {
82
55
  this.#onSoftError = onSoftError;
83
56
  }
84
57
 
85
- // Populate the scheme cache; idempotent, lazy on first need.
86
58
  async loadSchemes(db) {
87
59
  const rows = await (db || this.#db).get_all_schemes.all();
88
60
  this.#schemes.clear();
@@ -111,11 +83,7 @@ export default class Entries {
111
83
  static normalizePath(path) {
112
84
  if (!path) return path;
113
85
  if (!path.includes("://")) {
114
- // Bare file path: strip a single leading `./` for canonical
115
- // form. `./main.go` and `main.go` must resolve to the same
116
- // entry — otherwise SEARCH/REPLACE edits on `./main.go`
117
- // land in a phantom entry while reads of `main.go` see the
118
- // original, and the model can't reconcile.
86
+ // Strip leading `./` so `./main.go` and `main.go` are one entry.
119
87
  if (path.startsWith("./")) return path.slice(2);
120
88
  return path;
121
89
  }
@@ -123,7 +91,6 @@ export default class Entries {
123
91
  const scheme = path.slice(0, sep).toLowerCase();
124
92
  const rest = path.slice(sep + 3);
125
93
  try {
126
- // Decode first (idempotent), then encode — but preserve slashes
127
94
  const decoded = decodeURIComponent(rest);
128
95
  return `${scheme}://${decoded.split("/").map(encodeSegment).join("/")}`;
129
96
  } catch {
@@ -148,12 +115,7 @@ export default class Entries {
148
115
  return `${candidate}_${++this.#seq}`;
149
116
  }
150
117
 
151
- // Single namespace log://turn_N/action/slug. slug is built via slugify
152
- // (80-char cap + integer tie-breaker on collision) — same contract as
153
- // slugPath. Plugins (including externals) can trust that any target
154
- // they pass will produce a bounded, unique log path, regardless of
155
- // the target's length or character composition. Full payload always
156
- // belongs in the entry body, not the slug.
118
+ // log://turn_N/action/slug slugify caps + collision-suffixes.
157
119
  async logPath(runId, turn, action, target) {
158
120
  const slug = target == null ? "" : slugify(String(target));
159
121
  const base = slug
@@ -168,7 +130,7 @@ export default class Entries {
168
130
  }
169
131
 
170
132
  async slugPath(runId, scheme, content, tags) {
171
- // tags > content > empty; slugify("") yields "" and we sequence-only.
133
+ // tags > content > sequence-only.
172
134
  let source = "";
173
135
  if (tags) source = tags;
174
136
  else if (content) source = content;
@@ -187,7 +149,6 @@ export default class Entries {
187
149
  return `${prefix}${base}_${++this.#seq}`;
188
150
  }
189
151
 
190
- // Scheme's scope/writers/category; bare paths default to run + model/plugin.
191
152
  async #schemeRules(scheme) {
192
153
  await this.#ensureSchemes();
193
154
  const row = scheme ? this.#schemes.get(scheme) : null;
@@ -225,22 +186,14 @@ export default class Entries {
225
186
  return `run:${runId}`;
226
187
  }
227
188
 
228
- // set — create or update an entry; see PLUGINS.md primitives.
229
189
  async set(args) {
230
190
  if (!args.runId) throw new Error("set: runId is required");
231
191
  if (!args.path) throw new Error("set: path is required");
232
192
  try {
233
193
  return await this.#setImpl(args);
234
194
  } catch (err) {
235
- // EntryOverflowError: storage-layer CHECK fired. When the host
236
- // supplies onError (the production wiring), route the strike
237
- // to error.log and return silently — every set() caller in
238
- // the codebase becomes overflow-safe without per-site catches.
239
- // Without onError (raw unit tests), propagate as before.
195
+ // EntryOverflowError error.log when onError is wired.
240
196
  if (err instanceof EntryOverflowError && this.#onError) {
241
- // Destructure with the same defaults as #setImpl so the
242
- // callback sees the same loopId/turn shape callers wrote
243
- // against — no `??` fallback shim, just contract alignment.
244
197
  const { runId, loopId = null, turn = 0 } = args;
245
198
  await this.#onError({
246
199
  runId,
@@ -271,7 +224,6 @@ export default class Entries {
271
224
  loopId = null,
272
225
  writer = "plugin",
273
226
  }) {
274
- // Pattern mode is explicit; never inferred from `*` in path.
275
227
  const isPattern = pattern === true || bodyFilter !== null;
276
228
 
277
229
  if (isPattern) {
@@ -315,7 +267,6 @@ export default class Entries {
315
267
  const normalized = Entries.normalizePath(path);
316
268
  const scheme = Entries.scheme(normalized);
317
269
 
318
- // Append mode: streaming body growth on an existing entry.
319
270
  if (append) {
320
271
  if (body == null) throw new Error("set: append requires body");
321
272
  try {
@@ -331,7 +282,6 @@ export default class Entries {
331
282
  return;
332
283
  }
333
284
 
334
- // Body-less state or visibility change on an existing entry.
335
285
  if (body == null) {
336
286
  if (state != null) {
337
287
  await this.#db.resolve_known_entry_view.run({
@@ -371,13 +321,11 @@ export default class Entries {
371
321
  return;
372
322
  }
373
323
 
374
- // Full write/upsert: body + state + visibility + attributes.
375
324
  const { kind, writers, category } = await this.#schemeRules(scheme);
376
325
  if (!writers.includes(writer)) {
377
326
  throw new PermissionError(scheme, writer, writers);
378
327
  }
379
328
  const scope = this.#resolveScope(kind, runId, projectId);
380
- // Inject `action` only when caller passes attributes; null means COALESCE preserves existing.
381
329
  const effectiveAttributes = attributes ? { ...attributes } : null;
382
330
  if (scheme === "log" && effectiveAttributes) {
383
331
  const m = normalized.match(/^log:\/\/turn_\d+\/([^/]+)\//);
@@ -398,11 +346,7 @@ export default class Entries {
398
346
  throw translateBodyOverflow(err, normalized, body);
399
347
  }
400
348
  const effectiveState = state === undefined ? "resolved" : state;
401
- // Visibility resolution: explicit > preserve-existing > scheme-default.
402
- // A body update without visibility= must NOT silently reset visibility
403
- // to the scheme default — that would hide content the model just
404
- // promoted (e.g. a model <get>'d file then <set> SEARCH/REPLACE
405
- // would lose its visible status). Preserve what's there.
349
+ // Visibility: explicit > preserve-existing > scheme-default.
406
350
  let effectiveVisibility;
407
351
  if (visibility !== undefined) {
408
352
  effectiveVisibility = visibility;
@@ -439,17 +383,10 @@ export default class Entries {
439
383
  }
440
384
  }
441
385
 
442
- // Fire onFailed for any state→failed transition on a non-error path.
443
- // The auto-emit creates a sibling log://turn_N/error/<slug> entry so
444
- // the failure appears in the model's <log> as a category-distinct
445
- // item, not just metadata buried in the action's own log entry.
446
386
  async #fireFailed({ runId, turn, loopId, path, body, outcome }) {
447
387
  if (!this.#onFailed) return;
448
388
  if (ERROR_PATH_RE.test(path)) return;
449
389
  if (CHANNEL_PATH_RE.test(path)) return;
450
- // Body-less state changes don't carry a message; fall back to the
451
- // outcome string (or the path itself) so the error entry has a
452
- // recognizable slug instead of an empty one.
453
390
  let message = body;
454
391
  if (!message) {
455
392
  if (outcome) message = `failed: ${outcome}`;
@@ -465,7 +402,6 @@ export default class Entries {
465
402
  });
466
403
  }
467
404
 
468
- // get — promote entry(ies); see PLUGINS.md primitives.
469
405
  async get({
470
406
  runId,
471
407
  turn = 0,
@@ -492,7 +428,6 @@ export default class Entries {
492
428
  this.#emitChanged(runId, path, "promote");
493
429
  }
494
430
 
495
- // rm — remove entry view(s); see PLUGINS.md primitives.
496
431
  async rm({ runId, path, bodyFilter = null, filesOnly = false }) {
497
432
  if (!runId) throw new Error("rm: runId is required");
498
433
  if (!path) throw new Error("rm: path is required");
@@ -517,7 +452,6 @@ export default class Entries {
517
452
  this.#emitChanged(runId, path, "remove");
518
453
  }
519
454
 
520
- // cp — copy an entry to a new path; see PLUGINS.md primitives.
521
455
  async cp({
522
456
  runId,
523
457
  turn = 0,
@@ -544,7 +478,6 @@ export default class Entries {
544
478
  });
545
479
  }
546
480
 
547
- // mv — rename (cp + rm).
548
481
  async mv({
549
482
  runId,
550
483
  turn = 0,
@@ -570,10 +503,7 @@ export default class Entries {
570
503
  await this.rm({ runId, path: from });
571
504
  }
572
505
 
573
- // update once-per-turn lifecycle signal; see PLUGINS.md.
574
- // Body chopped to UPDATE_BODY_MAX with a soft error fire so clients
575
- // always receive ≤ 80 chars and the violation is visible to the model
576
- // next turn. Applies to ALL callers — system, plugin, model.
506
+ // Inner text capped at UPDATE_BODY_MAX with soft-error emission.
577
507
  async update({
578
508
  runId,
579
509
  turn = 0,
@@ -643,7 +573,7 @@ export default class Entries {
643
573
  }
644
574
 
645
575
  async waitForResolution(runId, path) {
646
- // Pre-check: yolo's synchronous resolver may have already flipped state, no drain will fire.
576
+ // Pre-check: yolo may have already flipped state synchronously.
647
577
  const current = await this.getState(runId, path);
648
578
  if (
649
579
  current &&
@@ -702,7 +632,6 @@ export default class Entries {
702
632
  return new Set(rows.map((r) => r.body));
703
633
  }
704
634
 
705
- // Unknown entries in DB order; rows include path + body.
706
635
  async getUnknowns(runId) {
707
636
  return this.#db.get_unknowns.all({ run_id: runId });
708
637
  }
@@ -721,7 +650,7 @@ export default class Entries {
721
650
  });
722
651
  }
723
652
 
724
- // SELECT-then-UPDATE: SQLite RETURNING can't cross to the view layer.
653
+ // SELECT-then-UPDATE: RETURNING can't cross to the view layer in SQLite.
725
654
  async demoteTurnEntries(runId, turn) {
726
655
  const targets = await this.#db.get_turn_demotion_targets.all({
727
656
  run_id: runId,
@@ -731,12 +660,10 @@ export default class Entries {
731
660
  return targets;
732
661
  }
733
662
 
734
- // Plugin-facing run lookup; avoids reaching into core.db.
735
663
  async getRun(runId) {
736
664
  return this.#db.get_run_by_id.get({ id: runId });
737
665
  }
738
666
 
739
- // Plugin-facing turn-stats write.
740
667
  async updateTurnStats(stats) {
741
668
  return this.#db.update_turn_stats.run(stats);
742
669
  }
@@ -27,20 +27,7 @@ export default class ProjectAgent {
27
27
  status: 413,
28
28
  attributes: { path: error.path, size: error.size },
29
29
  }),
30
- // Universal failure-rendering: every state→failed transition on
31
- // a non-error path fires error.log.emit so a sibling
32
- // log://turn_N/error/<slug> entry is created. The error plugin's
33
- // own #onErrorLog handler also writes state=failed on the error
34
- // entry; Entries.#fireFailed skips when path matches
35
- // log://turn_*/error/* so no recursion.
36
- //
37
- // soft=true when the outcome is in SOFT_FAILURE_OUTCOMES
38
- // (not_found, conflict): the error entry still renders so the
39
- // model can read the finding, but error.log skips turnErrors++
40
- // so the strike accumulator doesn't penalize legitimate
41
- // state-discovery via the auto-emit path. Without this, soft
42
- // outcomes count as strikes on the turnErrors path even though
43
- // recordedFailed correctly excludes them.
30
+ // soft=true for SOFT_FAILURE_OUTCOMES so auto-emitted errors don't strike.
44
31
  onFailed: ({ runId, loopId, turn, sourcePath, body, outcome }) =>
45
32
  hooks.error.log.emit({
46
33
  store: this.#entries,
@@ -131,7 +118,6 @@ export default class ProjectAgent {
131
118
  return this.#agentLoop.inject(run, message, mode, options);
132
119
  }
133
120
 
134
- // Create/fork the run row synchronously; caller follows up with ask/act.
135
121
  async ensureRun(projectId, model, run, prompt, options = {}) {
136
122
  return this.#agentLoop.ensureRun(projectId, model, run, prompt, options);
137
123
  }
@@ -144,7 +130,6 @@ export default class ProjectAgent {
144
130
  this.#agentLoop.abort(runId);
145
131
  }
146
132
 
147
- // Abort all in-flight runs and drain so the event loop can exit.
148
133
  async shutdown() {
149
134
  await this.#agentLoop.abortAll();
150
135
  }
@@ -89,10 +89,6 @@ export default class TurnExecutor {
89
89
 
90
90
  await this.#hooks.processTurn(rummy);
91
91
 
92
- // Run persona feeds the assembly.system chain (persona plugin's
93
- // participant at priority 150). Loaded once per turn; the system
94
- // prompt is built directly by the chain — no resolveSystemPrompt
95
- // indirection.
96
92
  const runRow = await this.#db.get_run_by_id.get({ id: currentRunId });
97
93
 
98
94
  const budgetCtx = {
@@ -164,9 +160,10 @@ export default class TurnExecutor {
164
160
  {
165
161
  temperature: options?.temperature,
166
162
  signal,
167
- // Per-run stable identifier for provider-side prompt caching
168
- // (xAI prompt_cache_key, OpenAI prompt_cache_key, etc.).
163
+ // Stable per-run id for provider prompt caching.
169
164
  runAlias: runRow?.alias || `run_${currentRunId}`,
165
+ // Real prompt_tokens for accurate max_tokens derivation.
166
+ lastPromptTokens: initial.lastContextTokens,
170
167
  },
171
168
  );
172
169
  } catch (err) {
@@ -189,15 +186,8 @@ export default class TurnExecutor {
189
186
  contextSize,
190
187
  };
191
188
  }
192
- // LLM fetch hit its per-call ceiling (provider's
193
- // AbortSignal.timeout(FETCH_TIMEOUT) fired). Convert to a
194
- // 504 strike so the loop continues — one timed-out turn is
195
- // recoverable; MAX_STRIKES in a row abandon at 499. Without
196
- // this catch the AbortError escapes to AgentLoop's outer
197
- // catch and the run dies at status=500, losing all prior
198
- // productive turns. signal.aborted being true means OUR
199
- // controller fired (drain), not a fetch timeout — re-throw
200
- // so AgentLoop ends the run cleanly at 499.
189
+ // LLM fetch hit per-call ceiling → 504 strike (recoverable).
190
+ // signal.aborted is OUR drain — re-throw to end run at 499.
201
191
  if (err?.name === "TimeoutError" || err?.name === "AbortError") {
202
192
  if (signal?.aborted) throw err;
203
193
  await this.#hooks.error.log.emit({
@@ -234,10 +224,8 @@ export default class TurnExecutor {
234
224
  const content = responseMessage?.content ? responseMessage.content : "";
235
225
 
236
226
  const { commands, warnings, unparsed } = XmlParser.parse(content);
237
- // Parser warnings are recovered emissions — the parser already
238
- // corrected a mismatched/unclosed tag and produced commands. Log
239
- // them so the model sees what happened, but don't strike: the
240
- // turn's productive work is intact.
227
+ // Parser warnings are recovered emissions — visible to the model,
228
+ // no strike.
241
229
  for (const w of warnings) {
242
230
  await this.#hooks.error.log.emit({
243
231
  store: this.#entries,
@@ -260,28 +248,11 @@ export default class TurnExecutor {
260
248
  });
261
249
  }
262
250
 
263
- // Contract floor: a turn without <update> is malformed; refuse to
264
- // honor its side effects. Repetition loops, partial outputs, and
265
- // other broken responses commonly emit actions without closure;
266
- // dispatching them anyway lets a broken turn corrupt state. Skip
267
- // recording AND dispatching when commands are present but no
268
- // <update> closes the turn — the strike system still fires via
269
- // turnErrors, model retries cleanly next turn.
251
+ // Skip dispatch when commands but no <update> broken turn, no side
252
+ // effects. The missing-update strike fires from update.resolve below.
270
253
  const hasUpdate = commands.some((c) => c.name === "update");
271
254
  const skipDispatch = commands.length > 0 && !hasUpdate;
272
- if (skipDispatch) {
273
- await this.#hooks.error.log.emit({
274
- store: this.#entries,
275
- runId: currentRunId,
276
- turn,
277
- loopId: currentLoopId,
278
- message:
279
- "Turn rejected: no <update> emitted. Actions are not honored unless the turn ends with an <update>.",
280
- status: 422,
281
- });
282
- }
283
255
 
284
- // Layer plugin reasoning contributions onto the API-provided seed.
285
256
  if (responseMessage) {
286
257
  const seed = responseMessage.reasoning_content
287
258
  ? responseMessage.reasoning_content
@@ -306,7 +277,6 @@ export default class TurnExecutor {
306
277
  userMsg: userMsg?.content,
307
278
  });
308
279
 
309
- // PHASE 1: RECORD (skipped when skipDispatch — broken turn, no side effects)
310
280
  const recorded = [];
311
281
  if (!skipDispatch) {
312
282
  for (const cmd of commands) {
@@ -321,7 +291,7 @@ export default class TurnExecutor {
321
291
  }
322
292
  }
323
293
 
324
- // PHASE 2: DISPATCH — sequential; abort-after-failure; proposals notify-and-await.
294
+ // Sequential dispatch; abort-after-failure; proposals notify-and-await.
325
295
  let abortAfter = null;
326
296
 
327
297
  for (const entry of recorded) {
@@ -346,10 +316,7 @@ export default class TurnExecutor {
346
316
  try {
347
317
  await this.#hooks.tools.dispatch(entry.scheme, entry, rummy);
348
318
  } catch (dispatchErr) {
349
- // PermissionError is the model attempting a documented-forbidden
350
- // write (e.g. <set path="prompt://1"> with body). Surface as a
351
- // soft 403 so the model can adjust on the next turn; do not
352
- // abort sibling entries — the rest of the turn was valid.
319
+ // PermissionError soft 403, no sibling abort.
353
320
  if (dispatchErr instanceof PermissionError) {
354
321
  await this.#hooks.error.log.emit({
355
322
  store: this.#entries,
@@ -375,7 +342,6 @@ export default class TurnExecutor {
375
342
  await this.#hooks.tool.after.emit({ entry, rummy });
376
343
  await this.#hooks.entry.created.emit(entry);
377
344
 
378
- // Plugins materialize pending proposals (e.g. set search/replace → 202).
379
345
  await this.#hooks.proposal.prepare.emit({ rummy, recorded: [entry] });
380
346
 
381
347
  const proposed = await this.#entries.getUnresolved(currentRunId);
@@ -388,8 +354,6 @@ export default class TurnExecutor {
388
354
  });
389
355
  await this.#entries.waitForResolution(currentRunId, p.path);
390
356
  const resolved = await this.#entries.getState(currentRunId, p.path);
391
- // Failure surfaces in the proposal entry itself; abort cascade
392
- // triggers the trailing-action "Aborted — preceding <X>" body.
393
357
  if (resolved?.status >= 400) abortAfter = entry.scheme;
394
358
  }
395
359
 
@@ -438,17 +402,13 @@ export default class TurnExecutor {
438
402
  return turnResult;
439
403
  }
440
404
 
441
- // Record a parsed command; returns the entry descriptor or rejects on bad shapes.
442
405
  async #record(runId, loopId, turn, mode, cmd) {
443
406
  const scheme = cmd.name;
444
407
  let rawTarget = "";
445
408
  if (cmd.path) rawTarget = cmd.path;
446
409
  else if (cmd.command) rawTarget = cmd.command;
447
410
  else if (cmd.question) rawTarget = cmd.question;
448
- // Reject reasoning-bleed in path-shaped fields only. cmd.command
449
- // (sh/env shell scripts) and cmd.question (ask_user prose) are
450
- // content fields where newlines/tabs/length are legitimate; the
451
- // slugifier sanitizes them downstream when deriving the log path.
411
+ // Reject reasoning-bleed in path-shaped fields only.
452
412
  if (cmd.path && (cmd.path.length > 2048 || /\p{Cc}/u.test(cmd.path))) {
453
413
  const rejectPath = await this.#entries.logPath(
454
414
  runId,