@possumtech/rummy 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.env.example +12 -7
  2. package/BENCH_ENVIRONMENT.md +230 -0
  3. package/CLIENT_INTERFACE.md +396 -0
  4. package/PLUGINS.md +93 -1
  5. package/SPEC.md +305 -28
  6. package/bin/postinstall.js +2 -2
  7. package/bin/rummy.js +2 -2
  8. package/last_run.txt +5617 -0
  9. package/migrations/001_initial_schema.sql +2 -1
  10. package/package.json +6 -2
  11. package/scriptify/cache_probe.js +66 -0
  12. package/scriptify/cache_probe_grok.js +74 -0
  13. package/service.js +22 -11
  14. package/src/agent/AgentLoop.js +33 -139
  15. package/src/agent/ContextAssembler.js +2 -9
  16. package/src/agent/Entries.js +36 -101
  17. package/src/agent/ProjectAgent.js +2 -9
  18. package/src/agent/TurnExecutor.js +45 -83
  19. package/src/agent/XmlParser.js +247 -273
  20. package/src/agent/budget.js +5 -28
  21. package/src/agent/config.js +38 -0
  22. package/src/agent/errors.js +7 -13
  23. package/src/agent/httpStatus.js +1 -19
  24. package/src/agent/known_store.sql +7 -2
  25. package/src/agent/materializeContext.js +12 -17
  26. package/src/agent/pathEncode.js +5 -0
  27. package/src/agent/rummyHome.js +9 -0
  28. package/src/agent/runs.sql +18 -0
  29. package/src/agent/tokens.js +2 -8
  30. package/src/hooks/HookRegistry.js +1 -16
  31. package/src/hooks/Hooks.js +8 -33
  32. package/src/hooks/PluginContext.js +3 -21
  33. package/src/hooks/RpcRegistry.js +1 -4
  34. package/src/hooks/RummyContext.js +2 -16
  35. package/src/hooks/ToolRegistry.js +5 -15
  36. package/src/llm/LlmProvider.js +28 -23
  37. package/src/llm/errors.js +41 -4
  38. package/src/llm/openaiStream.js +125 -0
  39. package/src/llm/retry.js +61 -15
  40. package/src/plugins/budget/budget.js +14 -81
  41. package/src/plugins/cli/README.md +87 -0
  42. package/src/plugins/cli/bin.js +61 -0
  43. package/src/plugins/cli/cli.js +120 -0
  44. package/src/plugins/env/README.md +2 -1
  45. package/src/plugins/env/env.js +4 -6
  46. package/src/plugins/env/envDoc.md +2 -2
  47. package/src/plugins/error/error.js +23 -23
  48. package/src/plugins/file/file.js +2 -22
  49. package/src/plugins/get/get.js +12 -34
  50. package/src/plugins/get/getDoc.md +5 -3
  51. package/src/plugins/hedberg/edits.js +1 -11
  52. package/src/plugins/hedberg/hedberg.js +3 -26
  53. package/src/plugins/hedberg/normalize.js +1 -5
  54. package/src/plugins/hedberg/patterns.js +4 -15
  55. package/src/plugins/hedberg/sed.js +1 -7
  56. package/src/plugins/helpers.js +28 -20
  57. package/src/plugins/index.js +25 -41
  58. package/src/plugins/instructions/README.md +18 -0
  59. package/src/plugins/instructions/instructions.js +13 -76
  60. package/src/plugins/instructions/instructions.md +19 -18
  61. package/src/plugins/instructions/instructions_104.md +5 -4
  62. package/src/plugins/instructions/instructions_105.md +16 -15
  63. package/src/plugins/instructions/instructions_106.md +15 -14
  64. package/src/plugins/instructions/instructions_107.md +13 -6
  65. package/src/plugins/known/README.md +26 -6
  66. package/src/plugins/known/known.js +36 -34
  67. package/src/plugins/log/README.md +2 -2
  68. package/src/plugins/log/log.js +6 -33
  69. package/src/plugins/ollama/ollama.js +50 -66
  70. package/src/plugins/openai/openai.js +26 -44
  71. package/src/plugins/openrouter/openrouter.js +28 -52
  72. package/src/plugins/policy/README.md +8 -2
  73. package/src/plugins/policy/policy.js +8 -21
  74. package/src/plugins/prompt/README.md +22 -0
  75. package/src/plugins/prompt/prompt.js +8 -16
  76. package/src/plugins/rm/rm.js +5 -2
  77. package/src/plugins/rm/rmDoc.md +4 -4
  78. package/src/plugins/rpc/README.md +2 -1
  79. package/src/plugins/rpc/rpc.js +51 -47
  80. package/src/plugins/set/README.md +5 -1
  81. package/src/plugins/set/set.js +23 -33
  82. package/src/plugins/set/setDoc.md +1 -1
  83. package/src/plugins/sh/README.md +2 -1
  84. package/src/plugins/sh/sh.js +5 -11
  85. package/src/plugins/sh/shDoc.md +2 -2
  86. package/src/plugins/stream/README.md +6 -5
  87. package/src/plugins/stream/stream.js +6 -35
  88. package/src/plugins/telemetry/telemetry.js +26 -19
  89. package/src/plugins/think/think.js +4 -7
  90. package/src/plugins/unknown/unknown.js +8 -13
  91. package/src/plugins/update/update.js +36 -35
  92. package/src/plugins/update/updateDoc.md +3 -3
  93. package/src/plugins/xai/xai.js +30 -20
  94. package/src/plugins/yolo/yolo.js +8 -41
  95. package/src/server/ClientConnection.js +17 -47
  96. package/src/server/SocketServer.js +14 -14
  97. package/src/server/protocol.js +1 -10
  98. package/src/sql/functions/slugify.js +5 -7
  99. package/src/sql/v_model_context.sql +4 -11
  100. package/turns/cli_1777462658211/turn_001.txt +772 -0
  101. package/turns/cli_1777462658211/turn_002.txt +606 -0
  102. package/turns/cli_1777462658211/turn_003.txt +667 -0
  103. package/turns/cli_1777462658211/turn_004.txt +297 -0
  104. package/turns/cli_1777462658211/turn_005.txt +301 -0
  105. package/turns/cli_1777462658211/turn_006.txt +262 -0
  106. package/turns/cli_1777465095132/turn_001.txt +715 -0
  107. package/turns/cli_1777465095132/turn_002.txt +236 -0
  108. package/turns/cli_1777465095132/turn_003.txt +287 -0
  109. package/turns/cli_1777465095132/turn_004.txt +694 -0
  110. package/turns/cli_1777465095132/turn_005.txt +422 -0
  111. package/turns/cli_1777465095132/turn_006.txt +365 -0
  112. package/turns/cli_1777465095132/turn_007.txt +885 -0
  113. package/turns/cli_1777465095132/turn_008.txt +1277 -0
  114. package/turns/cli_1777465095132/turn_009.txt +736 -0
@@ -1,12 +1,16 @@
1
1
  import { mkdir, writeFile } from "node:fs/promises";
2
2
  import { join } from "node:path";
3
3
 
4
+ // model://N is a diagnostic slice; full content is in assistant://N.
5
+ const MODEL_SNAPSHOT_BYTES = 4096;
6
+
4
7
  export default class Telemetry {
5
8
  #core;
6
9
  #starts = new Map();
7
10
  #lastRunPath = null;
8
11
  #turnsDir = null;
9
12
  #turnLog = [];
13
+ #turnStartIdx = 0;
10
14
  #currentRunAlias = null;
11
15
  #currentTurn = null;
12
16
 
@@ -31,8 +35,8 @@ export default class Telemetry {
31
35
  async #onRpcStarted({ method, id, params }) {
32
36
  this.#starts.set(id, Date.now());
33
37
  let summary = "";
34
- if (method === "ask" || method === "act") {
35
- const prompt = params?.prompt ? params.prompt : "";
38
+ if (method === "set" && params?.path?.startsWith("run://")) {
39
+ const prompt = params?.body ? params.body : "";
36
40
  summary = `prompt="${prompt.slice(0, 60)}"`;
37
41
  } else if (method === "run/abort") {
38
42
  summary = `run=${params?.run}`;
@@ -40,10 +44,6 @@ export default class Telemetry {
40
44
  summary = `run=${params?.run} action=${params?.resolution?.action}`;
41
45
  }
42
46
  console.log(`[RPC] → ${method}(${id})${summary ? ` ${summary}` : ""}`);
43
-
44
- if (method === "ask" || method === "act") {
45
- this.#turnLog = [];
46
- }
47
47
  }
48
48
 
49
49
  async #onRpcCompleted({ method, id, result }) {
@@ -136,7 +136,7 @@ export default class Telemetry {
136
136
  reasoning_content: responseMessage?.reasoning_content
137
137
  ? responseMessage.reasoning_content
138
138
  : null,
139
- content: content.slice(0, 4096),
139
+ content: content.slice(0, MODEL_SNAPSHOT_BYTES),
140
140
  usage: result.usage ? result.usage : null,
141
141
  model: result.model ? result.model : null,
142
142
  }),
@@ -161,10 +161,7 @@ export default class Telemetry {
161
161
  }
162
162
  }
163
163
 
164
- // content://N — unparsed text. 400 Bad Request because anything in
165
- // unparsed is text the parser couldn't dispatch (malformed XML, native
166
- // tool call attempts, reasoning bleed). Visible to the model so it
167
- // sees the rejection on its next turn and can correct.
164
+ // content://N — visible-rejected unparsed text so the model can correct next turn.
168
165
  if (unparsed) {
169
166
  await store.set({
170
167
  runId,
@@ -179,9 +176,7 @@ export default class Telemetry {
179
176
  });
180
177
  }
181
178
 
182
- // Commit usage stats. Providers surface token counts under
183
- // incompatible keys; walk them in priority order and fall back
184
- // to 0 only as the definitional "not reported" value.
179
+ // Per-provider key drift; walk in priority order, 0 = not reported.
185
180
  const usage = result.usage ? result.usage : {};
186
181
  const cachedSources = [
187
182
  usage.cached_tokens,
@@ -206,8 +201,7 @@ export default class Telemetry {
206
201
  reasoningTokens = v;
207
202
  break;
208
203
  }
209
- // Use LLM's actual prompt_tokens as the ground-truth context size
210
- // when available; falls back to our pre-call estimate.
204
+ // LLM's prompt_tokens is ground truth; estimator is pre-call fallback.
211
205
  let actualContextTokens = 0;
212
206
  if (usage.prompt_tokens) actualContextTokens = usage.prompt_tokens;
213
207
  else if (assembledTokens) actualContextTokens = assembledTokens;
@@ -223,15 +217,27 @@ export default class Telemetry {
223
217
  completion_tokens: numberOrZero(usage.completion_tokens),
224
218
  reasoning_tokens: reasoningTokens,
225
219
  total_tokens: numberOrZero(usage.total_tokens),
226
- cost: numberOrZero(usage.cost),
220
+ // usage.cost is what the relay BILLED us; it reads 0 when routed
221
+ // via BYOK (relay didn't bill — upstream charged our key directly).
222
+ // upstream_inference_cost is the true compute cost in either case.
223
+ cost:
224
+ numberOrZero(usage.cost) ||
225
+ numberOrZero(usage.cost_details?.upstream_inference_cost),
227
226
  });
228
227
  }
229
228
 
230
229
  async #logMessages(messages, context) {
231
- this.#currentRunAlias = context.runAlias
230
+ const newAlias = context.runAlias
232
231
  ? context.runAlias
233
232
  : `run_${context.runId}`;
233
+ // Reset on alias change (the semantic run boundary).
234
+ if (newAlias !== this.#currentRunAlias) {
235
+ this.#turnLog = [];
236
+ }
237
+ this.#currentRunAlias = newAlias;
234
238
  this.#currentTurn = context.turn === undefined ? null : context.turn;
239
+ // Per-turn slice index; turn_NNN.txt = this turn only, last_run.txt = cumulative.
240
+ this.#turnStartIdx = this.#turnLog.length;
235
241
  const turnLabel = this.#currentTurn === null ? "?" : this.#currentTurn;
236
242
  this.#turnLog.push(
237
243
  `\n${"=".repeat(60)}\nTURN ${turnLabel} — model=${context.model} run=${this.#currentRunAlias}\n${"=".repeat(60)}`,
@@ -272,6 +278,7 @@ export default class Telemetry {
272
278
  const runDir = join(this.#turnsDir, this.#currentRunAlias);
273
279
  await mkdir(runDir, { recursive: true });
274
280
  const fileName = `turn_${String(this.#currentTurn).padStart(3, "0")}.txt`;
275
- await writeFile(join(runDir, fileName), `${this.#turnLog.join("\n")}\n`);
281
+ const turnSlice = this.#turnLog.slice(this.#turnStartIdx);
282
+ await writeFile(join(runDir, fileName), `${turnSlice.join("\n")}\n`);
276
283
  }
277
284
  }
@@ -1,13 +1,12 @@
1
+ import config from "../../agent/config.js";
1
2
  import docs from "./thinkDoc.js";
2
3
 
3
- const THINK_ENABLED = process.env.RUMMY_THINK;
4
- if (THINK_ENABLED === undefined)
5
- throw new Error("RUMMY_THINK must be set (1 or 0)");
4
+ const { THINK } = config;
6
5
 
7
6
  export default class Think {
8
7
  constructor(core) {
9
8
  core.registerScheme({ modelVisible: 0, category: "logging" });
10
- if (THINK_ENABLED === "1") {
9
+ if (THINK === "1") {
11
10
  core.ensureTool();
12
11
  core.filter("instructions.toolDocs", async (docsMap) => {
13
12
  docsMap.think = docs;
@@ -15,9 +14,7 @@ export default class Think {
15
14
  });
16
15
  }
17
16
 
18
- // Merge <think> tag bodies into the turn's reasoning_content so
19
- // models without a dedicated reasoning channel still expose their
20
- // reasoning through the same field.
17
+ // Merge <think> bodies into reasoning_content for models without a reasoning channel.
21
18
  core.filter("llm.reasoning", (reasoning, { commands }) => {
22
19
  const thinkText = commands
23
20
  .filter((c) => c.name === "think")
@@ -1,8 +1,5 @@
1
1
  export default class Unknown {
2
- #core;
3
-
4
2
  constructor(core) {
5
- this.#core = core;
6
3
  core.ensureTool();
7
4
  core.registerScheme({
8
5
  category: "unknown",
@@ -10,28 +7,28 @@ export default class Unknown {
10
7
  core.on("handler", this.handler.bind(this));
11
8
  core.on("visible", this.full.bind(this));
12
9
  core.on("summarized", this.summary.bind(this));
13
- core.filter("assembly.user", this.assembleUnknowns.bind(this), 200);
10
+ core.filter("assembly.user", this.assembleUnknowns.bind(this), 150);
14
11
  core.markHidden();
15
12
  }
16
13
 
17
14
  async handler(entry, rummy) {
18
15
  const { entries: store, sequence: turn, runId, loopId } = rummy;
19
16
 
20
- // Deduplicate — if this exact body already exists, skip
21
17
  const existingValues = await store.getUnknownValues(runId);
22
18
  if (existingValues.has(entry.body)) {
23
- await this.#core.hooks.error.log.emit({
24
- store,
19
+ await store.set({
25
20
  runId,
26
21
  turn,
27
22
  loopId,
28
- message: `Unknown deduped: "${entry.body.slice(0, 60)}"`,
23
+ path: entry.resultPath || entry.path,
24
+ body: `Unknown deduped: "${entry.body.slice(0, 60)}"`,
25
+ state: "failed",
26
+ outcome: "duplicate",
29
27
  });
30
28
  return;
31
29
  }
32
30
 
33
- // Generate slug path and upsert. Summary (if provided) becomes the
34
- // path so the model can round-trip it via <get>; body is the fallback.
31
+ // summary > body for slug; lets the model round-trip via <get>.
35
32
  const unknownPath = await store.slugPath(
36
33
  runId,
37
34
  "unknown",
@@ -52,9 +49,7 @@ export default class Unknown {
52
49
  return entry.body;
53
50
  }
54
51
 
55
- // Same principle as knowns: keep the first 500 characters on
56
- // summarized unknowns so demotion doesn't erase the question,
57
- // but cap large bodies to bound the packet cost.
52
+ // First 500 chars; matches knowns/prompt summarized.
58
53
  summary(entry) {
59
54
  if (!entry.body) return "";
60
55
  if (entry.body.length <= 500) return entry.body;
@@ -32,60 +32,61 @@ export default class Update {
32
32
  }
33
33
 
34
34
  async handler(entry, rummy) {
35
+ const { entries: store, sequence: turn, runId, loopId } = rummy;
35
36
  const status = entry.attributes?.status ?? 102;
36
37
  const validation = await rummy.hooks.instructions.validateNavigation(
37
38
  status,
38
39
  rummy,
39
40
  );
40
- const attributes = validation.ok ? {} : { rejected: true };
41
- await rummy.update(entry.body, { status, attributes });
42
41
  if (!validation.ok) {
43
- await rummy.hooks.error.log.emit({
44
- store: rummy.entries,
45
- runId: rummy.runId,
46
- turn: rummy.sequence,
47
- loopId: rummy.loopId,
48
- message: validation.reason,
49
- status: 422,
42
+ entry.state = "failed";
43
+ entry.outcome = "invalid_navigation";
44
+ entry.body = validation.reason;
45
+ await store.set({
46
+ runId,
47
+ turn,
48
+ loopId,
49
+ path: entry.resultPath,
50
+ body: validation.reason,
51
+ state: "failed",
52
+ outcome: "invalid_navigation",
53
+ attributes: { status },
54
+ });
55
+ return;
56
+ }
57
+ if (!isValidStatus(status)) {
58
+ entry.state = "failed";
59
+ entry.outcome = "invalid_status";
60
+ const message = `Invalid status ${status} on update — use 1xx to continue or 200 to conclude.`;
61
+ entry.body = message;
62
+ await store.set({
63
+ runId,
64
+ turn,
65
+ loopId,
66
+ path: entry.resultPath,
67
+ body: message,
68
+ state: "failed",
69
+ outcome: "invalid_status",
70
+ attributes: { status },
50
71
  });
72
+ return;
51
73
  }
74
+ await rummy.update(entry.body, { status });
52
75
  }
53
76
 
54
- /**
55
- * Classify this turn's update state.
56
- *
57
- * Returns { summaryText, updateText }:
58
- * - summaryText: non-null → model claimed terminal (200/204/422)
59
- * - updateText: non-null → model is continuing (1xx)
60
- *
61
- * Errors (invalid status, missing update) emit via hooks.error.log.
62
- * The "terminal + turn had errors → not actually terminal" rule
63
- * lives in the error plugin's verdict, not here.
64
- */
65
77
  async resolve({ recorded, content, runId, turn, loopId, rummy }) {
66
78
  const entry = recorded.findLast((e) => e.scheme === "update");
67
79
  const status = entry?.attributes?.status ?? 102;
68
- const rejected = entry?.attributes?.rejected === true;
69
- const isTerminal = TERMINAL_STATUSES.has(status) && !rejected;
80
+ const failed = entry?.state === "failed";
81
+ const isTerminal = TERMINAL_STATUSES.has(status) && !failed;
70
82
  let summaryText = null;
71
83
  let updateText = null;
72
- if (entry?.body) {
84
+ if (entry?.body && !failed) {
73
85
  if (isTerminal) summaryText = entry.body;
74
86
  else updateText = entry.body;
75
87
  }
76
88
 
77
- if (entry && !isValidStatus(status)) {
78
- await rummy.hooks.error.log.emit({
79
- store: rummy.entries,
80
- runId,
81
- turn,
82
- loopId,
83
- message: `Invalid status ${entry.attributes?.status} on update — use 1xx to continue or 200 to conclude.`,
84
- status: 422,
85
- });
86
- }
87
-
88
- if (!summaryText && !updateText) {
89
+ if (!summaryText && !updateText && !failed) {
89
90
  const empty = !content || content.trim() === "";
90
91
  await rummy.hooks.error.log.emit({
91
92
  store: rummy.entries,
@@ -1,8 +1,8 @@
1
- ## <update status="N">{brief status}</update> - Status report (exactly one per turn, at the end)
1
+ ## <update status="N">{brief status}</update> - Report turn status (exactly one per turn, at the end)
2
2
  <!-- Header defines position, frequency, and status code requirement. -->
3
3
 
4
- REQUIRED: the valid values of N are defined by your current stage instructions.
4
+ YOU MUST refer to your current stage instructions for valid values of N.
5
5
  <!-- Single source of truth for codes is the current phase instructions block, not this doc. Listing codes here leaks termination knowledge (e.g. 200) that strong models use to short-circuit the protocol. -->
6
6
 
7
- REQUIRED: YOU MUST keep <update></update> body to <= 80 characters.
7
+ YOU MUST keep <update></update> body to <= 80 characters.
8
8
  <!-- Length cap. -->
@@ -1,16 +1,12 @@
1
+ import config from "../../agent/config.js";
1
2
  import msg from "../../agent/messages.js";
3
+ import { parseRetryAfter } from "../../llm/errors.js";
2
4
 
3
- const FETCH_TIMEOUT = Number(process.env.RUMMY_FETCH_TIMEOUT);
4
- if (!FETCH_TIMEOUT) throw new Error("RUMMY_FETCH_TIMEOUT must be set");
5
+ const { FETCH_TIMEOUT } = config;
5
6
 
6
7
  const PROVIDER = "xai";
7
8
 
8
- /**
9
- * xAI (Grok) LLM provider plugin. Registers with hooks.llm.providers if
10
- * XAI_BASE_URL is set; inert otherwise. Handles model aliases of the
11
- * form `xai/{modelName}`. Normalizes xAI's distinct response shape
12
- * into the common OpenAI-shaped envelope.
13
- */
9
+ // Inert unless XAI_BASE_URL set; xai/{model} aliases; normalizes to OpenAI envelope.
14
10
  export default class Xai {
15
11
  #baseUrl;
16
12
  #apiKey;
@@ -39,6 +35,11 @@ export default class Xai {
39
35
  const body = { model, input: messages };
40
36
  if (options.temperature !== undefined)
41
37
  body.temperature = options.temperature;
38
+ // xAI auto-caches per-server; stable prompt_cache_key keeps a multi-
39
+ // turn run pinned to the same backend so the cached prefix actually
40
+ // hits. Without this, requests load-balance and cache_tokens stays
41
+ // near-zero. See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
42
+ if (options.runAlias) body.prompt_cache_key = options.runAlias;
42
43
 
43
44
  const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT);
44
45
  const signal = options.signal
@@ -56,15 +57,27 @@ export default class Xai {
56
57
  });
57
58
 
58
59
  if (!response.ok) {
59
- const error = await response.text();
60
+ const errorBody = await response.text();
61
+ const retryAfter = parseRetryAfter(response.headers.get("retry-after"));
60
62
  if (response.status === 401 || response.status === 403) {
61
- throw new Error(
62
- msg("error.xai_auth", { status: `${response.status} - ${error}` }),
63
+ const err = new Error(
64
+ msg("error.xai_auth", {
65
+ status: `${response.status} - ${errorBody}`,
66
+ }),
63
67
  );
68
+ err.status = response.status;
69
+ err.body = errorBody;
70
+ throw err;
64
71
  }
65
- throw new Error(
66
- msg("error.xai_api", { status: `${response.status} - ${error}` }),
72
+ const err = new Error(
73
+ msg("error.xai_api", {
74
+ status: `${response.status} - ${errorBody}`,
75
+ }),
67
76
  );
77
+ err.status = response.status;
78
+ err.body = errorBody;
79
+ err.retryAfter = retryAfter;
80
+ throw err;
68
81
  }
69
82
 
70
83
  return this.#normalize(await response.json());
@@ -133,12 +146,11 @@ export default class Xai {
133
146
  const modelsUrl = this.#baseUrl.replace(/\/responses$/, "/models");
134
147
  const res = await fetch(modelsUrl, {
135
148
  headers: { Authorization: `Bearer ${this.#apiKey}` },
136
- signal: AbortSignal.timeout(5000),
149
+ signal: AbortSignal.timeout(FETCH_TIMEOUT),
137
150
  });
138
151
  if (res.ok) {
139
152
  const data = await res.json();
140
- // xAI's /models returns either { data: [...] } or { models: [...] }
141
- // depending on the API version; accept either and crash otherwise.
153
+ // xAI /models response shape varies by API version.
142
154
  let models;
143
155
  if (data.data) models = data.data;
144
156
  else if (data.models) models = data.models;
@@ -156,12 +168,10 @@ export default class Xai {
156
168
  /\/responses$/,
157
169
  `/language-models/${model}`,
158
170
  );
159
- // Optional endpoint probe. If the network call fails (404 on older
160
- // API versions, timeout, etc.) we fall through to the next strategy
161
- // below; a terminal throw fires if no strategy resolves.
171
+ // Optional probe; failure falls through to terminal throw below.
162
172
  const langRes = await fetch(langUrl, {
163
173
  headers: { Authorization: `Bearer ${this.#apiKey}` },
164
- signal: AbortSignal.timeout(5000),
174
+ signal: AbortSignal.timeout(FETCH_TIMEOUT),
165
175
  }).catch(() => null);
166
176
  if (langRes?.ok) {
167
177
  const langData = await langRes.json();
@@ -3,35 +3,17 @@ import { logPathToDataBase } from "../helpers.js";
3
3
 
4
4
  const SH_PATH_RE = /^log:\/\/turn_\d+\/(sh|env)\//;
5
5
 
6
- /**
7
- * YOLO plugin — for runs started with `yolo: true`, auto-resolves every
8
- * proposal server-side and spawns sh/env commands locally, streaming
9
- * output to the same data-channel entries the existing `stream`/
10
- * `stream/completed` RPC contract uses.
11
- *
12
- * Pattern parallel to `noRepo`/`noWeb`/`noInteraction`/`noProposals`:
13
- * `yolo` is a run attribute plumbed via rpc.js → AgentLoop loop config →
14
- * RummyContext.yolo. This plugin reads `rummy.yolo` off the proposal
15
- * payload and engages only when set; non-yolo runs are unaffected.
16
- *
17
- * The plugin replicates AgentLoop.resolve()'s accept path inline rather
18
- * than calling an exposed projectAgent — keeps yolo logic contained in
19
- * the yolo plugin and out of backbone files.
20
- */
6
+ // Auto-resolves proposals + spawns sh/env locally for runs started with yolo:true. SPEC #yolo_mode.
21
7
  export default class Yolo {
22
8
  constructor(core) {
23
9
  this.core = core;
24
10
  core.hooks.proposal.pending.on(this.#onPending.bind(this));
25
11
  }
26
12
 
27
- async #onPending({ run, proposed, rummy }) {
13
+ async #onPending({ proposed, rummy }) {
28
14
  if (!rummy?.yolo) return;
29
15
  for (const p of proposed) {
30
- // Resolve first that fires proposal.accepted, which lets the
31
- // sh/env plugin seed the streaming channel entries. Then spawn
32
- // into those existing channels. If we spawned first, sh.js's
33
- // post-accept channel creation would clobber the body we just
34
- // streamed (sets state=streaming, body="").
16
+ // Resolve first so sh/env's post-accept seeds channels before we stream into them.
35
17
  await this.#serverResolve(rummy, p.path);
36
18
  if (SH_PATH_RE.test(p.path)) {
37
19
  await this.#executeShellProposal(rummy, p.path);
@@ -39,11 +21,7 @@ export default class Yolo {
39
21
  }
40
22
  }
41
23
 
42
- /**
43
- * Replicate AgentLoop.resolve()'s accept path: accepting filter
44
- * (veto check), content filter (resolved body), set state="resolved",
45
- * emit proposal.accepted for plugin side effects.
46
- */
24
+ // Inline mirror of AgentLoop.resolve()'s accept path.
47
25
  async #serverResolve(rummy, path) {
48
26
  const runId = rummy.runId;
49
27
  const entries = rummy.entries;
@@ -88,13 +66,7 @@ export default class Yolo {
88
66
  await this.core.hooks.proposal.accepted.emit({ ...ctx, resolvedBody });
89
67
  }
90
68
 
91
- /**
92
- * Spawn the sh/env command locally and stream stdout/stderr into
93
- * `{dataBase}_1` and `{dataBase}_2` data entries. Mirrors the
94
- * stream/stream-completed RPC contract — same channel layout, same
95
- * terminal-state transitions on exit. Done inline (no RPC roundtrip)
96
- * so the run is fully autonomous.
97
- */
69
+ // Spawn locally and stream into {dataBase}_{1,2}; mirrors stream/stream-completed RPC.
98
70
  async #executeShellProposal(rummy, logPath) {
99
71
  const runId = rummy.runId;
100
72
  const entries = rummy.entries;
@@ -118,9 +90,7 @@ export default class Yolo {
118
90
  cwd: projectRoot,
119
91
  env: process.env,
120
92
  });
121
- // Buffer chunks synchronously and write once after exit. Avoids
122
- // the race where multiple async appends interleave with the
123
- // terminal-state transition fired on 'close'.
93
+ // Buffer + write-once-on-exit; async appends would race the terminal-state transition.
124
94
  const stdoutChunks = [];
125
95
  const stderrChunks = [];
126
96
  child.stdout.on("data", (data) => stdoutChunks.push(data.toString()));
@@ -154,10 +124,7 @@ export default class Yolo {
154
124
  const duration = `${Math.round((Date.now() - start) / 1000)}s`;
155
125
  const terminalState = exitCode === 0 ? "resolved" : "failed";
156
126
  const outcome = exitCode === 0 ? null : `exit:${exitCode}`;
157
- // Transition state without touching body getState doesn't
158
- // return body, and entries.set with body=undefined preserves
159
- // the streamed content already in place. (`body: ""` would
160
- // wipe everything we just streamed.)
127
+ // body=undefined preserves streamed content; body="" would wipe it.
161
128
  for (const path of [stdoutPath, stderrPath]) {
162
129
  try {
163
130
  await entries.set({
@@ -175,7 +142,7 @@ export default class Yolo {
175
142
  null,
176
143
  );
177
144
  const summary = channels
178
- .map((c) => `${c.path} (${c.tokens || 0} tokens)`)
145
+ .map((c) => `${c.path} (${c.tokens} tokens)`)
179
146
  .join(", ");
180
147
  const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
181
148
  await entries.set({
@@ -23,8 +23,7 @@ export default class ClientConnection {
23
23
 
24
24
  this.#ws.on("message", (data) => this.#handleMessage(data));
25
25
  this.#ws.on("close", () => {
26
- // Fire-and-forget: the Promise is cached by `shutdown()` so
27
- // server-initiated close can await the same work.
26
+ // Fire-and-forget; shutdown() caches the Promise for server-initiated close to await.
28
27
  this.shutdown().catch((err) => {
29
28
  console.warn(`[RUMMY] shutdown on ws close failed: ${err.message}`);
30
29
  });
@@ -33,25 +32,6 @@ export default class ClientConnection {
33
32
  this.#setupNotifications();
34
33
  }
35
34
 
36
- #onProgress = (payload) => {
37
- if (payload.projectId === this.#context.projectId) {
38
- this.#sendNotification("run/progress", {
39
- run: payload.run,
40
- turn: payload.turn,
41
- status: payload.status,
42
- });
43
- }
44
- };
45
-
46
- #onProposal = (payload) => {
47
- if (payload.projectId === this.#context.projectId) {
48
- this.#sendNotification("run/proposal", {
49
- run: payload.run,
50
- proposed: payload.proposed,
51
- });
52
- }
53
- };
54
-
55
35
  #onRender = (payload) => {
56
36
  if (payload.projectId === this.#context.projectId) {
57
37
  this.#sendNotification("ui/render", {
@@ -80,44 +60,35 @@ export default class ClientConnection {
80
60
  }
81
61
  };
82
62
 
83
- #onState = (payload) => {
84
- if (payload.projectId === this.#context.projectId) {
85
- this.#sendNotification("run/state", {
86
- run: payload.run,
87
- turn: payload.turn,
88
- status: payload.status,
89
- summary: payload.summary,
90
- history: payload.history,
91
- unknowns: payload.unknowns,
92
- telemetry: payload.telemetry,
93
- });
94
- }
63
+ // Pulse: any entry write in this client's project. Content-free hint
64
+ // client reconciles via getEntriesByPattern with `since`.
65
+ #onEntryChanged = async ({ runId, path, changeType }) => {
66
+ if (this.#context.projectId == null) return;
67
+ const run = await this.#db.get_run_by_id.get({ id: runId });
68
+ if (!run || run.project_id !== this.#context.projectId) return;
69
+ this.#sendNotification("run/changed", {
70
+ run: run.alias,
71
+ runId,
72
+ path,
73
+ changeType,
74
+ });
95
75
  };
96
76
 
97
77
  #setupNotifications() {
98
- this.#hooks.run.progress.on(this.#onProgress);
99
- this.#hooks.proposal.pending.on(this.#onProposal);
100
78
  this.#hooks.ui.render.on(this.#onRender);
101
79
  this.#hooks.ui.notify.on(this.#onNotify);
102
- this.#hooks.run.state.on(this.#onState);
103
80
  this.#hooks.stream.cancelled.on(this.#onStreamCancelled);
81
+ this.#hooks.entry.changed.on(this.#onEntryChanged);
104
82
  }
105
83
 
106
84
  #teardown() {
107
- this.#hooks.run.progress.off(this.#onProgress);
108
- this.#hooks.proposal.pending.off(this.#onProposal);
109
85
  this.#hooks.ui.render.off(this.#onRender);
110
86
  this.#hooks.ui.notify.off(this.#onNotify);
111
- this.#hooks.run.state.off(this.#onState);
112
87
  this.#hooks.stream.cancelled.off(this.#onStreamCancelled);
88
+ this.#hooks.entry.changed.off(this.#onEntryChanged);
113
89
  }
114
90
 
115
- /**
116
- * Abort in-flight runs on this connection and wait for them to
117
- * settle. Idempotent: `ws.on("close")` and server-initiated close
118
- * both call this; the cached Promise guarantees the work happens
119
- * exactly once and both callers observe the same completion.
120
- */
91
+ // Idempotent abort+drain; cached Promise lets ws.close and server.close share completion.
121
92
  shutdown() {
122
93
  if (!this.#shutdownPromise) {
123
94
  this.#shutdownPromise = (async () => {
@@ -241,8 +212,7 @@ export default class ClientConnection {
241
212
  } catch (error) {
242
213
  console.error(`[RUMMY] RPC Error: ${error.message}`);
243
214
  console.error(`[RUMMY] Stack: ${error.stack}`);
244
- // JSON-RPC: error responses for malformed requests with no id
245
- // MUST carry null per the spec.
215
+ // JSON-RPC requires null id for malformed requests with no id.
246
216
  this.#send({
247
217
  jsonrpc: "2.0",
248
218
  error: { code: -32603, message: error.message },
@@ -15,18 +15,13 @@ export default class SocketServer {
15
15
  this.#wss.on("connection", (ws, _req) => {
16
16
  const conn = new ClientConnection(ws, this.#db, this.#hooks);
17
17
  this.#connections.add(conn);
18
- // Remove from the tracking set only after the connection's
19
- // shutdown drain has fully settled — not on raw ws-close —
20
- // so server close() can still find and await an in-progress
21
- // shutdown kicked off by a client-initiated disconnect.
18
+ // Delete after drain settles so server.close() can await client-initiated shutdowns.
22
19
  ws.on("close", () => {
23
20
  conn.shutdown().finally(() => this.#connections.delete(conn));
24
21
  });
25
22
  });
26
23
 
27
- this.#wss.on("error", (_err) => {
28
- // Proxy to registry or handle locally
29
- });
24
+ this.#wss.on("error", (_err) => {});
30
25
  }
31
26
 
32
27
  address() {
@@ -38,14 +33,19 @@ export default class SocketServer {
38
33
  }
39
34
 
40
35
  async close() {
41
- // Drain in-flight runs on each connection before closing the
42
- // socket otherwise detached kickoff Promises keep the Node
43
- // event loop alive past server shutdown.
44
- const shutdowns = [];
45
- for (const conn of this.#connections) {
46
- shutdowns.push(conn.shutdown().catch(() => {}));
36
+ // Drain in-flight runs first; otherwise detached kickoffs pin the event loop.
37
+ // Best-effort: a single connection failing to shut down cleanly should not
38
+ // prevent the others from closing, but the failure must be visible.
39
+ const results = await Promise.allSettled(
40
+ Array.from(this.#connections, (conn) => conn.shutdown()),
41
+ );
42
+ for (const r of results) {
43
+ if (r.status === "rejected") {
44
+ console.error(
45
+ `[RUMMY] Connection shutdown failed: ${r.reason?.message ?? r.reason}`,
46
+ );
47
+ }
47
48
  }
48
- await Promise.all(shutdowns);
49
49
  this.#connections.clear();
50
50
 
51
51
  await new Promise((resolve) => {