@possumtech/rummy 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.env.example +31 -5
  2. package/BENCH_ENVIRONMENT.md +230 -0
  3. package/CLIENT_INTERFACE.md +396 -0
  4. package/PLUGINS.md +93 -1
  5. package/SPEC.md +389 -28
  6. package/bin/postinstall.js +2 -2
  7. package/bin/rummy.js +2 -2
  8. package/last_run.txt +5617 -0
  9. package/migrations/001_initial_schema.sql +2 -1
  10. package/package.json +13 -9
  11. package/scriptify/ask_run.js +77 -0
  12. package/scriptify/cache_probe.js +66 -0
  13. package/scriptify/cache_probe_grok.js +74 -0
  14. package/service.js +22 -11
  15. package/src/agent/AgentLoop.js +62 -157
  16. package/src/agent/ContextAssembler.js +2 -9
  17. package/src/agent/Entries.js +54 -98
  18. package/src/agent/ProjectAgent.js +4 -11
  19. package/src/agent/TurnExecutor.js +48 -83
  20. package/src/agent/XmlParser.js +247 -273
  21. package/src/agent/budget.js +5 -28
  22. package/src/agent/config.js +38 -0
  23. package/src/agent/errors.js +7 -13
  24. package/src/agent/httpStatus.js +1 -19
  25. package/src/agent/known_queries.sql +1 -1
  26. package/src/agent/known_store.sql +12 -2
  27. package/src/agent/materializeContext.js +15 -18
  28. package/src/agent/pathEncode.js +5 -0
  29. package/src/agent/rummyHome.js +9 -0
  30. package/src/agent/runs.sql +37 -0
  31. package/src/agent/tokens.js +7 -7
  32. package/src/hooks/HookRegistry.js +1 -16
  33. package/src/hooks/Hooks.js +8 -33
  34. package/src/hooks/PluginContext.js +3 -21
  35. package/src/hooks/RpcRegistry.js +1 -4
  36. package/src/hooks/RummyContext.js +6 -16
  37. package/src/hooks/ToolRegistry.js +5 -15
  38. package/src/llm/LlmProvider.js +41 -33
  39. package/src/llm/errors.js +41 -4
  40. package/src/llm/openaiStream.js +125 -0
  41. package/src/llm/retry.js +109 -0
  42. package/src/plugins/budget/budget.js +55 -76
  43. package/src/plugins/cli/README.md +87 -0
  44. package/src/plugins/cli/bin.js +61 -0
  45. package/src/plugins/cli/cli.js +120 -0
  46. package/src/plugins/env/README.md +2 -1
  47. package/src/plugins/env/env.js +4 -6
  48. package/src/plugins/env/envDoc.md +2 -2
  49. package/src/plugins/error/error.js +23 -23
  50. package/src/plugins/file/file.js +2 -22
  51. package/src/plugins/get/get.js +12 -34
  52. package/src/plugins/get/getDoc.md +8 -6
  53. package/src/plugins/hedberg/edits.js +1 -11
  54. package/src/plugins/hedberg/hedberg.js +3 -26
  55. package/src/plugins/hedberg/normalize.js +1 -5
  56. package/src/plugins/hedberg/patterns.js +4 -15
  57. package/src/plugins/hedberg/sed.js +1 -7
  58. package/src/plugins/helpers.js +28 -20
  59. package/src/plugins/index.js +25 -41
  60. package/src/plugins/instructions/README.md +18 -0
  61. package/src/plugins/instructions/instructions.js +97 -38
  62. package/src/plugins/instructions/instructions.md +24 -15
  63. package/src/plugins/instructions/instructions_104.md +5 -4
  64. package/src/plugins/instructions/instructions_105.md +29 -36
  65. package/src/plugins/instructions/instructions_106.md +22 -0
  66. package/src/plugins/instructions/instructions_107.md +17 -0
  67. package/src/plugins/instructions/instructions_108.md +0 -8
  68. package/src/plugins/known/README.md +26 -6
  69. package/src/plugins/known/known.js +37 -34
  70. package/src/plugins/log/README.md +2 -2
  71. package/src/plugins/log/log.js +27 -34
  72. package/src/plugins/ollama/ollama.js +50 -66
  73. package/src/plugins/openai/openai.js +26 -44
  74. package/src/plugins/openrouter/openrouter.js +28 -52
  75. package/src/plugins/policy/README.md +8 -2
  76. package/src/plugins/policy/policy.js +8 -21
  77. package/src/plugins/prompt/README.md +22 -0
  78. package/src/plugins/prompt/prompt.js +14 -16
  79. package/src/plugins/rm/rm.js +5 -2
  80. package/src/plugins/rm/rmDoc.md +4 -4
  81. package/src/plugins/rpc/README.md +2 -1
  82. package/src/plugins/rpc/rpc.js +62 -48
  83. package/src/plugins/set/README.md +5 -1
  84. package/src/plugins/set/set.js +23 -33
  85. package/src/plugins/set/setDoc.md +1 -1
  86. package/src/plugins/sh/README.md +2 -1
  87. package/src/plugins/sh/sh.js +5 -11
  88. package/src/plugins/sh/shDoc.md +2 -2
  89. package/src/plugins/stream/README.md +6 -5
  90. package/src/plugins/stream/stream.js +6 -35
  91. package/src/plugins/telemetry/telemetry.js +26 -19
  92. package/src/plugins/think/think.js +4 -7
  93. package/src/plugins/unknown/unknown.js +8 -13
  94. package/src/plugins/update/update.js +42 -25
  95. package/src/plugins/update/updateDoc.md +3 -3
  96. package/src/plugins/xai/xai.js +30 -20
  97. package/src/plugins/yolo/yolo.js +159 -0
  98. package/src/server/ClientConnection.js +17 -47
  99. package/src/server/SocketServer.js +14 -14
  100. package/src/server/protocol.js +1 -10
  101. package/src/sql/functions/slugify.js +5 -7
  102. package/src/sql/v_model_context.sql +4 -11
  103. package/turns/cli_1777462658211/turn_001.txt +772 -0
  104. package/turns/cli_1777462658211/turn_002.txt +606 -0
  105. package/turns/cli_1777462658211/turn_003.txt +667 -0
  106. package/turns/cli_1777462658211/turn_004.txt +297 -0
  107. package/turns/cli_1777462658211/turn_005.txt +301 -0
  108. package/turns/cli_1777462658211/turn_006.txt +262 -0
  109. package/turns/cli_1777465095132/turn_001.txt +715 -0
  110. package/turns/cli_1777465095132/turn_002.txt +236 -0
  111. package/turns/cli_1777465095132/turn_003.txt +287 -0
  112. package/turns/cli_1777465095132/turn_004.txt +694 -0
  113. package/turns/cli_1777465095132/turn_005.txt +422 -0
  114. package/turns/cli_1777465095132/turn_006.txt +365 -0
  115. package/turns/cli_1777465095132/turn_007.txt +885 -0
  116. package/turns/cli_1777465095132/turn_008.txt +1277 -0
  117. package/turns/cli_1777465095132/turn_009.txt +736 -0
@@ -15,7 +15,7 @@ SEARCH/REPLACE edits, and pattern updates.
15
15
  - **Category**: `logging`
16
16
  - **Handler**: Routes based on attributes:
17
17
  - `blocks` or `search` — SEARCH/REPLACE edit via `processEdit`.
18
- - `preview` — pattern preview (dry run).
18
+ - `manifest` — pattern manifest (lists matches without performing the set).
19
19
  - Scheme path — direct upsert at status 200.
20
20
  - File path — produces status 202 (proposed) with unified diff patch.
21
21
  - Glob/filter — bulk update via `updateBodyByPattern`.
@@ -31,3 +31,7 @@ the merge conflict block when a SEARCH/REPLACE was performed.
31
31
  - **Heuristic fallback**: On literal failure, fuzzy matching with warnings.
32
32
  - **Patch generation**: `generatePatch` produces unified diff for client display.
33
33
  - File writes are always status 202 (proposed); scheme writes resolve immediately.
34
+ - **`proposal.content` filter** — when the client accepts a proposed
35
+ set, this plugin overrides the resolved body to the body it
36
+ already staged on the audit entry (rather than whatever literal
37
+ body the client passed through `resolve`).
@@ -79,12 +79,7 @@ export default class Set {
79
79
  }
80
80
  }
81
81
  const turn = (await db.get_run_by_id.get({ id: runId })).next_turn;
82
- // Preserve the file entry's current visibility a <get>
83
- // earlier in the run may have promoted it. Updating the
84
- // body without specifying visibility falls through to
85
- // the data-category default ("summarized") and wipes
86
- // the promotion, making the model re-get the file next
87
- // turn (then cycle-strike out).
82
+ // Preserve current visibility; default would wipe an earlier <get>'s promotion.
88
83
  const existingState = await entries.getState(runId, attrs.path);
89
84
  await entries.set({
90
85
  runId,
@@ -94,9 +89,13 @@ export default class Set {
94
89
  visibility: existingState?.visibility,
95
90
  });
96
91
  if (projectRoot) {
97
- const { writeFile } = await import("node:fs/promises");
98
- const { join } = await import("node:path");
99
- await writeFile(join(projectRoot, attrs.path), patched).catch(() => {});
92
+ const { writeFile, mkdir } = await import("node:fs/promises");
93
+ const { dirname, isAbsolute, join } = await import("node:path");
94
+ const targetPath = isAbsolute(attrs.path)
95
+ ? attrs.path
96
+ : join(projectRoot, attrs.path);
97
+ await mkdir(dirname(targetPath), { recursive: true });
98
+ await writeFile(targetPath, patched);
100
99
  }
101
100
  if (isNewFile && projectId) {
102
101
  await File.setConstraint(db, projectId, attrs.path, "active");
@@ -112,24 +111,22 @@ export default class Set {
112
111
  const rawSummary = typeof attrs.summary === "string" ? attrs.summary : null;
113
112
  const summaryText = rawSummary ? rawSummary.slice(0, 80) : null;
114
113
 
115
- // Invalid visibility value on a body-less set: reject with an
116
- // error instead of falling through to the write path. Without
117
- // this guard, a typo like visibility="promoted" (pre-migration
118
- // terminology) silently body-wiped the target — the fidelity
119
- // regression that cost us multiple demo runs.
114
+ // Reject invalid visibility on body-less set; otherwise a typo silently wipes the body.
120
115
  if (
121
116
  !entry.body &&
122
117
  attrs.path &&
123
118
  attrs.visibility !== undefined &&
124
119
  !visibilityAttr
125
120
  ) {
126
- await rummy.hooks.error.log.emit({
127
- store,
121
+ await store.set({
128
122
  runId,
129
123
  turn,
130
124
  loopId,
131
- message: `Invalid visibility "${attrs.visibility}" on <set path="${attrs.path}"/>. Use visibility="visible|summarized|archived".`,
132
- status: 400,
125
+ path: entry.resultPath,
126
+ body: `Invalid visibility "${attrs.visibility}" on <set path="${attrs.path}"/>. Use visibility="visible|summarized|archived".`,
127
+ state: "failed",
128
+ outcome: "validation",
129
+ attributes: { path: attrs.path },
133
130
  });
134
131
  return;
135
132
  }
@@ -187,8 +184,8 @@ export default class Set {
187
184
  // Edit: sed patterns or SEARCH/REPLACE blocks
188
185
  if (attrs.blocks || attrs.search != null) {
189
186
  await this.#processEdit(rummy, entry, attrs);
190
- } else if (attrs.preview && attrs.path) {
191
- // Preview
187
+ } else if (attrs.manifest && attrs.path) {
188
+ // Manifest: list paths and token costs without performing the operation.
192
189
  const matches = await store.getEntriesByPattern(
193
190
  runId,
194
191
  attrs.path,
@@ -202,7 +199,7 @@ export default class Set {
202
199
  attrs.path,
203
200
  attrs.body,
204
201
  matches,
205
- { preview: true, loopId },
202
+ { manifest: true, loopId },
206
203
  );
207
204
  return;
208
205
  } else {
@@ -262,8 +259,7 @@ export default class Set {
262
259
  { loopId },
263
260
  );
264
261
  } else {
265
- // Direct scheme write (known://, unknown://, etc.)
266
- // Same result shape as file writes — diff against existing.
262
+ // Direct scheme write; same diff-against-existing shape as file writes.
267
263
  const existing = await store.getBody(runId, target);
268
264
  const oldContent = existing === null ? "" : existing;
269
265
  const newContent = entry.body;
@@ -280,8 +276,7 @@ export default class Set {
280
276
  path: target,
281
277
  body: newContent,
282
278
  state: "resolved",
283
- // Scheme writes default to promoted — the model wrote it, so
284
- // it's material unless they explicitly demote/archive.
279
+ // Scheme writes default visible; the model wrote it.
285
280
  visibility: visibilityAttr ? visibilityAttr : "visible",
286
281
  attributes: summaryText ? { summary: summaryText } : null,
287
282
  loopId,
@@ -340,8 +335,7 @@ export default class Set {
340
335
 
341
336
  summary(entry) {
342
337
  if (!entry.body) return "";
343
- // Preserve SEARCH/REPLACE merge blocks intact truncating them
344
- // drops the before/after the model needs to recognize its edit.
338
+ // Preserve SEARCH/REPLACE blocks intact; truncation strips before/after the model needs.
345
339
  if (/<<<<<<< SEARCH[\s\S]*>>>>>>> REPLACE/.test(entry.body)) {
346
340
  return entry.body;
347
341
  }
@@ -370,10 +364,7 @@ export default class Set {
370
364
 
371
365
  for (const match of matches) {
372
366
  if (match.scheme === null) {
373
- // Bare file path — apply the edit immediately against the
374
- // match body so the log carries a concrete before/after
375
- // merge. #materializeRevisions still runs at turn-end to
376
- // consolidate the set:// proposal for client acceptance.
367
+ // Bare file: apply edit immediately so log carries before/after merge.
377
368
  const canonicalPath = `set://${match.path}`;
378
369
  const revision = Set.#buildRevision(attrs);
379
370
  const existingAttrs = await rummy.getAttributes(canonicalPath);
@@ -533,8 +524,7 @@ export default class Set {
533
524
  }
534
525
  }
535
526
 
536
- // `replace` attr is optional in search/replace form absence means
537
- // "delete the match"; normalize to empty string at this boundary.
527
+ // Missing `replace` = delete the match; normalize to empty string.
538
528
  static #resolveReplace(attrs) {
539
529
  return attrs.replace === undefined ? "" : attrs.replace;
540
530
  }
@@ -18,5 +18,5 @@ Example: <set path="src/config.js">s/port = 3000/port = 8080/g;s/We're almost do
18
18
  Example: <set path="example.md">Full file content here</set>
19
19
  <!-- Create: body contents are entire file. -->
20
20
 
21
- * YOU MUST NOT use <sh></sh> or <env></env> to list, create, read, or edit files — use <get></get> and <set></set>
21
+ YOU MUST NOT use <sh></sh> or <env></env> to list, create, read, or edit files — use <get></get> and <set></set>
22
22
  <!-- Reinforces at the decision point — model reading setDoc for file ops sees the prohibition here, not just buried in shDoc/envDoc which it may not be reading. -->
@@ -24,7 +24,8 @@ record, one data payload:
24
24
  - **Data channels**: `sh://turn_N/{slug}_1` (stdout), `sh://turn_N/{slug}_2`
25
25
  (stderr) — scheme=`sh`, category=`data`. Created at status=102 on
26
26
  proposal acceptance, grow via the `stream` RPC, transition to 200/500
27
- via `stream/completed`. Render inside the `<context>` block as `<sh>`.
27
+ via `stream/completed`. Render inside `<visible>` as `<sh>` when
28
+ promoted; listed in `<summarized>` otherwise.
28
29
 
29
30
  The `sh` scheme exists **only** for the data channels. The proposal/log
30
31
  entry itself is in the unified `log://` namespace along with every
@@ -1,4 +1,4 @@
1
- import { logPathToDataBase } from "../helpers.js";
1
+ import { logPathToDataBase, streamSummary } from "../helpers.js";
2
2
  import docs from "./shDoc.js";
3
3
 
4
4
  const LOG_ACTION_RE = /^log:\/\/turn_\d+\/(\w+)\//;
@@ -8,11 +8,7 @@ export default class Sh {
8
8
 
9
9
  constructor(core) {
10
10
  this.#core = core;
11
- // `sh` scheme holds the streamed stdout/stderr payload that's
12
- // data the model reads, not an audit record. The log entry at
13
- // log://turn_N/sh/{slug} (scheme=log, category=logging) is the
14
- // audit record; it lives in a separate namespace by design.
15
- // See SPEC §streaming_entries and the scheme/category invariant.
11
+ // data scheme = streamed stdout/stderr; audit lives in log://. SPEC #streaming_entries.
16
12
  core.registerScheme({ category: "data" });
17
13
  core.on("handler", this.handler.bind(this));
18
14
  core.on("visible", this.full.bind(this));
@@ -53,9 +49,7 @@ export default class Sh {
53
49
 
54
50
  async handler(entry, rummy) {
55
51
  const { entries: store, sequence: turn, runId, loopId } = rummy;
56
- // Proposal at 202 with the command as summary and empty body the
57
- // body fills in on accept (log message about the action). Data
58
- // entries with stdout/stderr are created on accept in resolve().
52
+ // 202 with command summary, empty body; stdout/stderr entries created on accept.
59
53
  await store.set({
60
54
  runId,
61
55
  turn,
@@ -71,7 +65,7 @@ export default class Sh {
71
65
  return `# sh ${entry.attributes.command}\n${entry.body}`;
72
66
  }
73
67
 
74
- summary() {
75
- return "";
68
+ summary(entry) {
69
+ return streamSummary("sh", entry);
76
70
  }
77
71
  }
@@ -6,8 +6,8 @@ Example: <sh>npm install express</sh>
6
6
  Example: <sh>npm test</sh>
7
7
  <!-- Test execution. Another common side-effect action. -->
8
8
 
9
- * YOU MUST NOT use <sh></sh> to read, create, or edit files — use <get></get> and <set></set>
9
+ YOU MUST NOT use <sh></sh> to read, create, or edit files — use <get></get> and <set></set>
10
10
  <!-- Forces file operations through the entry system. -->
11
11
 
12
- * YOU MUST use <env></env> for commands without side effects
12
+ YOU MUST use <env></env> for commands without side effects
13
13
  <!-- Reinforces the env/sh split. Read = env, mutate = sh. -->
@@ -16,12 +16,13 @@ A streaming action lives in **two namespaces** by design:
16
16
  `{action}://turn_N/{slug}_2`, ... — scheme=`{action}` (sh, env, ...),
17
17
  category=`data`. Created at status=102 on proposal acceptance. Grow
18
18
  via `stream`; terminal via `stream/completed` / `stream/aborted` /
19
- `stream/cancel`. Render inside `<context>`.
19
+ `stream/cancel`. Render inside `<visible>` (or `<summarized>` if
20
+ demoted).
20
21
 
21
- The stream RPC `path` param is always the **log-entry path** (that's
22
- what clients receive on `run/proposal`). The server derives the data
23
- base path internally via `logPathToDataBase`. See
24
- [scheme_category_split](#scheme_category_split).
22
+ The stream RPC `path` param is always the **log-entry path** (the
23
+ `log://...` path the client discovers via `getEntries` after a
24
+ `run/changed` pulse). The server derives the data base path internally
25
+ via `logPathToDataBase`. See [scheme_category_split](#scheme_category_split).
25
26
 
26
27
  ## RPC Methods
27
28
 
@@ -1,22 +1,6 @@
1
1
  import { logPathToDataBase } from "../helpers.js";
2
2
 
3
- /**
4
- * Stream plugin — generic streaming entry infrastructure.
5
- *
6
- * Receives chunks from the client (or any producer) and appends them to
7
- * existing data entries. Producers (sh/env handlers) create the data
8
- * entries at status=102 on proposal acceptance; this plugin handles the
9
- * subsequent append + terminal-status transition via two RPC methods.
10
- *
11
- * RPC `path` param is the **log-entry path** (log://turn_N/{action}/{slug}
12
- * — that's what the client sees on `run/proposal`). Channels live under
13
- * the producer scheme ({action}://turn_N/{slug}_N) for a clean
14
- * data-vs-logging namespace split; this plugin derives the data base from
15
- * the log path on every RPC call.
16
- *
17
- * Not a model-facing tool. No scheme, no tooldoc, no dispatch handler.
18
- * Pure RPC plumbing that any streaming-producer plugin can leverage.
19
- */
3
+ // RPC plumbing that appends/terminates streaming data entries; see plugin README.
20
4
  export default class Stream {
21
5
  #core;
22
6
 
@@ -25,9 +9,7 @@ export default class Stream {
25
9
  const hooks = core.hooks;
26
10
  const r = hooks.rpc.registry;
27
11
 
28
- // stream: append a chunk to a streaming entry.
29
- // Entry path is constructed as `${path}_${channel}` per the Unix FD
30
- // convention (1=stdout, 2=stderr, higher=other producer channels).
12
+ // stream: append chunk; channel = Unix FD (1=stdout, 2=stderr).
31
13
  r.register("stream", {
32
14
  handler: async (params, ctx) => {
33
15
  if (!params.run) throw new Error("run is required");
@@ -67,8 +49,7 @@ export default class Stream {
67
49
  requiresInit: true,
68
50
  });
69
51
 
70
- // stream/completed: transition all data channels for this producer
71
- // to their terminal status and finalize the log entry body.
52
+ // stream/completed: terminal status on all channels + finalize log body.
72
53
  r.register("stream/completed", {
73
54
  handler: async (params, ctx) => {
74
55
  if (!params.run) throw new Error("run is required");
@@ -107,8 +88,7 @@ export default class Stream {
107
88
  });
108
89
  }
109
90
 
110
- // Update the log entry body with final stats. Keep it terse —
111
- // one line summarizing exit code, duration, and channel sizes.
91
+ // One-line final stats for the log entry body.
112
92
  const logEntry = await store.getAttributes(runId, params.path);
113
93
  let command = "";
114
94
  if (logEntry?.command) command = logEntry.command;
@@ -138,11 +118,7 @@ export default class Stream {
138
118
  requiresInit: true,
139
119
  });
140
120
 
141
- // stream/aborted: client-initiated cancellation. Transitions all data
142
- // channels to status 499 (Client Closed Request — the de-facto HTTP
143
- // status for client-terminated requests) and rewrites the log entry
144
- // body to note the abort. Shape mirrors stream/completed for client
145
- // symmetry: same run/path addressing, same channel sweep.
121
+ // stream/aborted: client cancellation; channels 499; mirrors stream/completed.
146
122
  r.register("stream/aborted", {
147
123
  handler: async (params, ctx) => {
148
124
  if (!params.run) throw new Error("run is required");
@@ -211,12 +187,7 @@ export default class Stream {
211
187
  requiresInit: true,
212
188
  });
213
189
 
214
- // stream/cancel: server-initiated cancellation. Any client (or
215
- // internal server code) can cancel a streaming producer — the server
216
- // transitions channels to 499 immediately and pushes a
217
- // stream/cancelled notification so connected clients can kill their
218
- // local processes. Also serves as stale 102 cleanup: if the client
219
- // died mid-stream, call stream/cancel to mark orphaned entries terminal.
190
+ // stream/cancel: server-initiated; pushes stream/cancelled notification; cleans stale 102s.
220
191
  r.register("stream/cancel", {
221
192
  handler: async (params, ctx) => {
222
193
  if (!params.run) throw new Error("run is required");
@@ -1,12 +1,16 @@
1
1
  import { mkdir, writeFile } from "node:fs/promises";
2
2
  import { join } from "node:path";
3
3
 
4
+ // model://N is a diagnostic slice; full content is in assistant://N.
5
+ const MODEL_SNAPSHOT_BYTES = 4096;
6
+
4
7
  export default class Telemetry {
5
8
  #core;
6
9
  #starts = new Map();
7
10
  #lastRunPath = null;
8
11
  #turnsDir = null;
9
12
  #turnLog = [];
13
+ #turnStartIdx = 0;
10
14
  #currentRunAlias = null;
11
15
  #currentTurn = null;
12
16
 
@@ -31,8 +35,8 @@ export default class Telemetry {
31
35
  async #onRpcStarted({ method, id, params }) {
32
36
  this.#starts.set(id, Date.now());
33
37
  let summary = "";
34
- if (method === "ask" || method === "act") {
35
- const prompt = params?.prompt ? params.prompt : "";
38
+ if (method === "set" && params?.path?.startsWith("run://")) {
39
+ const prompt = params?.body ? params.body : "";
36
40
  summary = `prompt="${prompt.slice(0, 60)}"`;
37
41
  } else if (method === "run/abort") {
38
42
  summary = `run=${params?.run}`;
@@ -40,10 +44,6 @@ export default class Telemetry {
40
44
  summary = `run=${params?.run} action=${params?.resolution?.action}`;
41
45
  }
42
46
  console.log(`[RPC] → ${method}(${id})${summary ? ` ${summary}` : ""}`);
43
-
44
- if (method === "ask" || method === "act") {
45
- this.#turnLog = [];
46
- }
47
47
  }
48
48
 
49
49
  async #onRpcCompleted({ method, id, result }) {
@@ -136,7 +136,7 @@ export default class Telemetry {
136
136
  reasoning_content: responseMessage?.reasoning_content
137
137
  ? responseMessage.reasoning_content
138
138
  : null,
139
- content: content.slice(0, 4096),
139
+ content: content.slice(0, MODEL_SNAPSHOT_BYTES),
140
140
  usage: result.usage ? result.usage : null,
141
141
  model: result.model ? result.model : null,
142
142
  }),
@@ -161,10 +161,7 @@ export default class Telemetry {
161
161
  }
162
162
  }
163
163
 
164
- // content://N — unparsed text. 400 Bad Request because anything in
165
- // unparsed is text the parser couldn't dispatch (malformed XML, native
166
- // tool call attempts, reasoning bleed). Visible to the model so it
167
- // sees the rejection on its next turn and can correct.
164
+ // content://N — visible-rejected unparsed text so the model can correct next turn.
168
165
  if (unparsed) {
169
166
  await store.set({
170
167
  runId,
@@ -179,9 +176,7 @@ export default class Telemetry {
179
176
  });
180
177
  }
181
178
 
182
- // Commit usage stats. Providers surface token counts under
183
- // incompatible keys; walk them in priority order and fall back
184
- // to 0 only as the definitional "not reported" value.
179
+ // Per-provider key drift; walk in priority order, 0 = not reported.
185
180
  const usage = result.usage ? result.usage : {};
186
181
  const cachedSources = [
187
182
  usage.cached_tokens,
@@ -206,8 +201,7 @@ export default class Telemetry {
206
201
  reasoningTokens = v;
207
202
  break;
208
203
  }
209
- // Use LLM's actual prompt_tokens as the ground-truth context size
210
- // when available; falls back to our pre-call estimate.
204
+ // LLM's prompt_tokens is ground truth; estimator is pre-call fallback.
211
205
  let actualContextTokens = 0;
212
206
  if (usage.prompt_tokens) actualContextTokens = usage.prompt_tokens;
213
207
  else if (assembledTokens) actualContextTokens = assembledTokens;
@@ -223,15 +217,27 @@ export default class Telemetry {
223
217
  completion_tokens: numberOrZero(usage.completion_tokens),
224
218
  reasoning_tokens: reasoningTokens,
225
219
  total_tokens: numberOrZero(usage.total_tokens),
226
- cost: numberOrZero(usage.cost),
220
+ // usage.cost is what the relay BILLED us; it reads 0 when routed
221
+ // via BYOK (relay didn't bill — upstream charged our key directly).
222
+ // upstream_inference_cost is the true compute cost in either case.
223
+ cost:
224
+ numberOrZero(usage.cost) ||
225
+ numberOrZero(usage.cost_details?.upstream_inference_cost),
227
226
  });
228
227
  }
229
228
 
230
229
  async #logMessages(messages, context) {
231
- this.#currentRunAlias = context.runAlias
230
+ const newAlias = context.runAlias
232
231
  ? context.runAlias
233
232
  : `run_${context.runId}`;
233
+ // Reset on alias change (the semantic run boundary).
234
+ if (newAlias !== this.#currentRunAlias) {
235
+ this.#turnLog = [];
236
+ }
237
+ this.#currentRunAlias = newAlias;
234
238
  this.#currentTurn = context.turn === undefined ? null : context.turn;
239
+ // Per-turn slice index; turn_NNN.txt = this turn only, last_run.txt = cumulative.
240
+ this.#turnStartIdx = this.#turnLog.length;
235
241
  const turnLabel = this.#currentTurn === null ? "?" : this.#currentTurn;
236
242
  this.#turnLog.push(
237
243
  `\n${"=".repeat(60)}\nTURN ${turnLabel} — model=${context.model} run=${this.#currentRunAlias}\n${"=".repeat(60)}`,
@@ -272,6 +278,7 @@ export default class Telemetry {
272
278
  const runDir = join(this.#turnsDir, this.#currentRunAlias);
273
279
  await mkdir(runDir, { recursive: true });
274
280
  const fileName = `turn_${String(this.#currentTurn).padStart(3, "0")}.txt`;
275
- await writeFile(join(runDir, fileName), `${this.#turnLog.join("\n")}\n`);
281
+ const turnSlice = this.#turnLog.slice(this.#turnStartIdx);
282
+ await writeFile(join(runDir, fileName), `${turnSlice.join("\n")}\n`);
276
283
  }
277
284
  }
@@ -1,13 +1,12 @@
1
+ import config from "../../agent/config.js";
1
2
  import docs from "./thinkDoc.js";
2
3
 
3
- const THINK_ENABLED = process.env.RUMMY_THINK;
4
- if (THINK_ENABLED === undefined)
5
- throw new Error("RUMMY_THINK must be set (1 or 0)");
4
+ const { THINK } = config;
6
5
 
7
6
  export default class Think {
8
7
  constructor(core) {
9
8
  core.registerScheme({ modelVisible: 0, category: "logging" });
10
- if (THINK_ENABLED === "1") {
9
+ if (THINK === "1") {
11
10
  core.ensureTool();
12
11
  core.filter("instructions.toolDocs", async (docsMap) => {
13
12
  docsMap.think = docs;
@@ -15,9 +14,7 @@ export default class Think {
15
14
  });
16
15
  }
17
16
 
18
- // Merge <think> tag bodies into the turn's reasoning_content so
19
- // models without a dedicated reasoning channel still expose their
20
- // reasoning through the same field.
17
+ // Merge <think> bodies into reasoning_content for models without a reasoning channel.
21
18
  core.filter("llm.reasoning", (reasoning, { commands }) => {
22
19
  const thinkText = commands
23
20
  .filter((c) => c.name === "think")
@@ -1,8 +1,5 @@
1
1
  export default class Unknown {
2
- #core;
3
-
4
2
  constructor(core) {
5
- this.#core = core;
6
3
  core.ensureTool();
7
4
  core.registerScheme({
8
5
  category: "unknown",
@@ -10,28 +7,28 @@ export default class Unknown {
10
7
  core.on("handler", this.handler.bind(this));
11
8
  core.on("visible", this.full.bind(this));
12
9
  core.on("summarized", this.summary.bind(this));
13
- core.filter("assembly.user", this.assembleUnknowns.bind(this), 200);
10
+ core.filter("assembly.user", this.assembleUnknowns.bind(this), 150);
14
11
  core.markHidden();
15
12
  }
16
13
 
17
14
  async handler(entry, rummy) {
18
15
  const { entries: store, sequence: turn, runId, loopId } = rummy;
19
16
 
20
- // Deduplicate — if this exact body already exists, skip
21
17
  const existingValues = await store.getUnknownValues(runId);
22
18
  if (existingValues.has(entry.body)) {
23
- await this.#core.hooks.error.log.emit({
24
- store,
19
+ await store.set({
25
20
  runId,
26
21
  turn,
27
22
  loopId,
28
- message: `Unknown deduped: "${entry.body.slice(0, 60)}"`,
23
+ path: entry.resultPath || entry.path,
24
+ body: `Unknown deduped: "${entry.body.slice(0, 60)}"`,
25
+ state: "failed",
26
+ outcome: "duplicate",
29
27
  });
30
28
  return;
31
29
  }
32
30
 
33
- // Generate slug path and upsert. Summary (if provided) becomes the
34
- // path so the model can round-trip it via <get>; body is the fallback.
31
+ // summary > body for slug; lets the model round-trip via <get>.
35
32
  const unknownPath = await store.slugPath(
36
33
  runId,
37
34
  "unknown",
@@ -52,9 +49,7 @@ export default class Unknown {
52
49
  return entry.body;
53
50
  }
54
51
 
55
- // Same principle as knowns: keep the first 500 characters on
56
- // summarized unknowns so demotion doesn't erase the question,
57
- // but cap large bodies to bound the packet cost.
52
+ // First 500 chars; matches knowns/prompt summarized.
58
53
  summary(entry) {
59
54
  if (!entry.body) return "";
60
55
  if (entry.body.length <= 500) return entry.body;
@@ -32,44 +32,61 @@ export default class Update {
32
32
  }
33
33
 
34
34
  async handler(entry, rummy) {
35
+ const { entries: store, sequence: turn, runId, loopId } = rummy;
35
36
  const status = entry.attributes?.status ?? 102;
37
+ const validation = await rummy.hooks.instructions.validateNavigation(
38
+ status,
39
+ rummy,
40
+ );
41
+ if (!validation.ok) {
42
+ entry.state = "failed";
43
+ entry.outcome = "invalid_navigation";
44
+ entry.body = validation.reason;
45
+ await store.set({
46
+ runId,
47
+ turn,
48
+ loopId,
49
+ path: entry.resultPath,
50
+ body: validation.reason,
51
+ state: "failed",
52
+ outcome: "invalid_navigation",
53
+ attributes: { status },
54
+ });
55
+ return;
56
+ }
57
+ if (!isValidStatus(status)) {
58
+ entry.state = "failed";
59
+ entry.outcome = "invalid_status";
60
+ const message = `Invalid status ${status} on update — use 1xx to continue or 200 to conclude.`;
61
+ entry.body = message;
62
+ await store.set({
63
+ runId,
64
+ turn,
65
+ loopId,
66
+ path: entry.resultPath,
67
+ body: message,
68
+ state: "failed",
69
+ outcome: "invalid_status",
70
+ attributes: { status },
71
+ });
72
+ return;
73
+ }
36
74
  await rummy.update(entry.body, { status });
37
75
  }
38
76
 
39
- /**
40
- * Classify this turn's update state.
41
- *
42
- * Returns { summaryText, updateText }:
43
- * - summaryText: non-null → model claimed terminal (200/204/422)
44
- * - updateText: non-null → model is continuing (1xx)
45
- *
46
- * Errors (invalid status, missing update) emit via hooks.error.log.
47
- * The "terminal + turn had errors → not actually terminal" rule
48
- * lives in the error plugin's verdict, not here.
49
- */
50
77
  async resolve({ recorded, content, runId, turn, loopId, rummy }) {
51
78
  const entry = recorded.findLast((e) => e.scheme === "update");
52
79
  const status = entry?.attributes?.status ?? 102;
53
- const isTerminal = TERMINAL_STATUSES.has(status);
80
+ const failed = entry?.state === "failed";
81
+ const isTerminal = TERMINAL_STATUSES.has(status) && !failed;
54
82
  let summaryText = null;
55
83
  let updateText = null;
56
- if (entry?.body) {
84
+ if (entry?.body && !failed) {
57
85
  if (isTerminal) summaryText = entry.body;
58
86
  else updateText = entry.body;
59
87
  }
60
88
 
61
- if (entry && !isValidStatus(status)) {
62
- await rummy.hooks.error.log.emit({
63
- store: rummy.entries,
64
- runId,
65
- turn,
66
- loopId,
67
- message: `Invalid status ${entry.attributes?.status} on update — use 1xx to continue or 200 to conclude.`,
68
- status: 422,
69
- });
70
- }
71
-
72
- if (!summaryText && !updateText) {
89
+ if (!summaryText && !updateText && !failed) {
73
90
  const empty = !content || content.trim() === "";
74
91
  await rummy.hooks.error.log.emit({
75
92
  store: rummy.entries,
@@ -1,8 +1,8 @@
1
- ## <update status="N">{brief status}</update> - Status report (exactly one per turn, at the end)
1
+ ## <update status="N">{brief status}</update> - Report turn status (exactly one per turn, at the end)
2
2
  <!-- Header defines position, frequency, and status code requirement. -->
3
3
 
4
- REQUIRED: the valid values of N are defined by your current stage instructions.
4
+ YOU MUST refer to your current stage instructions for valid values of N.
5
5
  <!-- Single source of truth for codes is the current phase instructions block, not this doc. Listing codes here leaks termination knowledge (e.g. 200) that strong models use to short-circuit the protocol. -->
6
6
 
7
- REQUIRED: YOU MUST keep <update></update> body to <= 80 characters.
7
+ YOU MUST keep <update></update> body to <= 80 characters.
8
8
  <!-- Length cap. -->