@possumtech/rummy 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.env.example +40 -15
  2. package/.xai.key +1 -0
  3. package/PLUGINS.md +169 -53
  4. package/README.md +38 -32
  5. package/SPEC.md +366 -179
  6. package/bin/digest.js +1097 -0
  7. package/biome/no-fallbacks.grit +2 -2
  8. package/gemini.key +1 -0
  9. package/lang/en.json +10 -1
  10. package/migrations/001_initial_schema.sql +9 -2
  11. package/package.json +19 -8
  12. package/service.js +1 -0
  13. package/src/agent/AgentLoop.js +76 -26
  14. package/src/agent/ContextAssembler.js +2 -0
  15. package/src/agent/Entries.js +238 -60
  16. package/src/agent/ProjectAgent.js +44 -0
  17. package/src/agent/TurnExecutor.js +99 -30
  18. package/src/agent/XmlParser.js +206 -111
  19. package/src/agent/errors.js +35 -0
  20. package/src/agent/known_queries.sql +1 -1
  21. package/src/agent/known_store.sql +3 -42
  22. package/src/agent/materializeContext.js +30 -1
  23. package/src/agent/runs.sql +8 -18
  24. package/src/agent/tokens.js +0 -1
  25. package/src/agent/turns.sql +1 -0
  26. package/src/hooks/Hooks.js +26 -0
  27. package/src/hooks/RummyContext.js +12 -1
  28. package/src/lib/hedberg/README.md +60 -0
  29. package/src/lib/hedberg/hedberg.js +60 -0
  30. package/src/lib/hedberg/marker.js +158 -0
  31. package/src/{plugins → lib}/hedberg/matcher.js +1 -2
  32. package/src/llm/LlmProvider.js +41 -3
  33. package/src/llm/openaiStream.js +17 -0
  34. package/src/plugins/ask_user/ask_user.js +12 -2
  35. package/src/plugins/ask_user/ask_userDoc.md +1 -5
  36. package/src/plugins/budget/README.md +29 -24
  37. package/src/plugins/budget/budget.js +166 -110
  38. package/src/plugins/cli/README.md +3 -4
  39. package/src/plugins/cli/cli.js +31 -5
  40. package/src/plugins/cloudflare/cloudflare.js +136 -0
  41. package/src/plugins/cp/cp.js +41 -4
  42. package/src/plugins/cp/cpDoc.md +5 -6
  43. package/src/plugins/engine/engine.sql +1 -1
  44. package/src/plugins/env/README.md +5 -4
  45. package/src/plugins/env/env.js +7 -4
  46. package/src/plugins/env/envDoc.md +7 -8
  47. package/src/plugins/error/error.js +56 -15
  48. package/src/plugins/file/README.md +12 -3
  49. package/src/plugins/file/file.js +2 -2
  50. package/src/plugins/get/get.js +59 -36
  51. package/src/plugins/get/getDoc.md +10 -34
  52. package/src/plugins/google/google.js +115 -0
  53. package/src/plugins/hedberg/hedberg.js +13 -56
  54. package/src/plugins/helpers.js +66 -12
  55. package/src/plugins/index.js +1 -2
  56. package/src/plugins/instructions/README.md +44 -47
  57. package/src/plugins/instructions/instructions-system.md +44 -0
  58. package/src/plugins/instructions/instructions-user.md +53 -0
  59. package/src/plugins/instructions/instructions.js +58 -189
  60. package/src/plugins/known/README.md +6 -7
  61. package/src/plugins/known/known.js +24 -30
  62. package/src/plugins/log/log.js +41 -32
  63. package/src/plugins/mv/mv.js +40 -1
  64. package/src/plugins/mv/mvDoc.md +1 -8
  65. package/src/plugins/ollama/ollama.js +4 -3
  66. package/src/plugins/openai/openai.js +4 -3
  67. package/src/plugins/openrouter/openrouter.js +14 -4
  68. package/src/plugins/persona/README.md +11 -13
  69. package/src/plugins/persona/default.md +29 -0
  70. package/src/plugins/persona/persona.js +10 -66
  71. package/src/plugins/policy/policy.js +23 -22
  72. package/src/plugins/prompt/README.md +37 -27
  73. package/src/plugins/prompt/prompt.js +13 -19
  74. package/src/plugins/rm/rm.js +18 -0
  75. package/src/plugins/rm/rmDoc.md +5 -6
  76. package/src/plugins/rpc/rpc.js +3 -3
  77. package/src/plugins/set/set.js +205 -323
  78. package/src/plugins/set/setDoc.md +47 -17
  79. package/src/plugins/sh/README.md +6 -5
  80. package/src/plugins/sh/sh.js +8 -5
  81. package/src/plugins/sh/shDoc.md +7 -8
  82. package/src/plugins/skill/README.md +37 -14
  83. package/src/plugins/skill/skill.js +200 -101
  84. package/src/plugins/skill/skillDoc.js +3 -0
  85. package/src/plugins/skill/skillDoc.md +9 -0
  86. package/src/plugins/stream/README.md +7 -6
  87. package/src/plugins/stream/finalize.js +100 -0
  88. package/src/plugins/stream/stream.js +13 -45
  89. package/src/plugins/telemetry/telemetry.js +27 -4
  90. package/src/plugins/think/think.js +2 -3
  91. package/src/plugins/think/thinkDoc.md +2 -4
  92. package/src/plugins/unknown/README.md +1 -1
  93. package/src/plugins/unknown/unknown.js +17 -19
  94. package/src/plugins/update/update.js +4 -51
  95. package/src/plugins/update/updateDoc.md +21 -6
  96. package/src/plugins/xai/xai.js +68 -102
  97. package/src/plugins/yolo/yolo.js +102 -75
  98. package/src/sql/functions/hedmatch.js +1 -1
  99. package/src/sql/functions/hedreplace.js +1 -1
  100. package/src/sql/functions/hedsearch.js +1 -1
  101. package/src/sql/functions/slugify.js +16 -2
  102. package/BENCH_ENVIRONMENT.md +0 -230
  103. package/CLIENT_INTERFACE.md +0 -396
  104. package/last_run.txt +0 -5617
  105. package/scriptify/ask_run.js +0 -77
  106. package/scriptify/cache_probe.js +0 -66
  107. package/scriptify/cache_probe_grok.js +0 -74
  108. package/src/agent/budget.js +0 -33
  109. package/src/agent/config.js +0 -38
  110. package/src/plugins/hedberg/README.md +0 -71
  111. package/src/plugins/hedberg/docs.md +0 -0
  112. package/src/plugins/hedberg/edits.js +0 -55
  113. package/src/plugins/hedberg/normalize.js +0 -17
  114. package/src/plugins/hedberg/sed.js +0 -49
  115. package/src/plugins/instructions/instructions.md +0 -34
  116. package/src/plugins/instructions/instructions_104.md +0 -8
  117. package/src/plugins/instructions/instructions_105.md +0 -39
  118. package/src/plugins/instructions/instructions_106.md +0 -22
  119. package/src/plugins/instructions/instructions_107.md +0 -17
  120. package/src/plugins/instructions/instructions_108.md +0 -0
  121. package/src/plugins/known/knownDoc.js +0 -3
  122. package/src/plugins/known/knownDoc.md +0 -8
  123. package/src/plugins/unknown/unknownDoc.js +0 -3
  124. package/src/plugins/unknown/unknownDoc.md +0 -11
  125. package/turns/cli_1777462658211/turn_001.txt +0 -772
  126. package/turns/cli_1777462658211/turn_002.txt +0 -606
  127. package/turns/cli_1777462658211/turn_003.txt +0 -667
  128. package/turns/cli_1777462658211/turn_004.txt +0 -297
  129. package/turns/cli_1777462658211/turn_005.txt +0 -301
  130. package/turns/cli_1777462658211/turn_006.txt +0 -262
  131. package/turns/cli_1777465095132/turn_001.txt +0 -715
  132. package/turns/cli_1777465095132/turn_002.txt +0 -236
  133. package/turns/cli_1777465095132/turn_003.txt +0 -287
  134. package/turns/cli_1777465095132/turn_004.txt +0 -694
  135. package/turns/cli_1777465095132/turn_005.txt +0 -422
  136. package/turns/cli_1777465095132/turn_006.txt +0 -365
  137. package/turns/cli_1777465095132/turn_007.txt +0 -885
  138. package/turns/cli_1777465095132/turn_008.txt +0 -1277
  139. package/turns/cli_1777465095132/turn_009.txt +0 -736
  140. /package/src/{plugins → lib}/hedberg/patterns.js +0 -0
@@ -0,0 +1,100 @@
1
+ import Entries from "../../agent/Entries.js";
2
+ import { logPathToDataBase } from "../helpers.js";
3
+
4
+ // Single termination site for streaming entries. Both stream/completed
5
+ // (external producer signaling close) and yolo's local child-spawn
6
+ // close handler funnel through here so finalization shape stays
7
+ // identical: channel terminal states, log-entry body rewrite, and
8
+ // dormant-run wake all live in one place.
9
+ //
10
+ // terminalState: "resolved" (exit_code=0), "failed" (non-zero).
11
+ // Aborts/cancellations write their own state ("cancelled") through the
12
+ // stream/aborted and stream/cancel paths and do NOT call this helper —
13
+ // explicit cancellation should not summon a follow-up turn.
14
+ export default async function finalizeStream({
15
+ db,
16
+ entries,
17
+ hooks,
18
+ runRow,
19
+ path,
20
+ exitCode = 0,
21
+ duration = null,
22
+ wake = true,
23
+ }) {
24
+ const rawBase = logPathToDataBase(path);
25
+ if (!rawBase) {
26
+ throw new Error(
27
+ `path must be a log entry (log://turn_N/...); got: ${path}`,
28
+ );
29
+ }
30
+ // The log entry path may arrive in its raw URL-encoded form (e.g.
31
+ // `%20` for spaces) but the data-channel rows are stored under the
32
+ // canonical form (`%20` → `_` via encodeSegment). Normalize the
33
+ // derived dataBase so `${dataBase}_*` matches the stored channel
34
+ // paths regardless of which form the caller passed in.
35
+ const dataBase = Entries.normalizePath(rawBase);
36
+ // Pin every state-transition write to the action's originating turn.
37
+ // Without this, entries.set's default turn=0 re-stamps the entry's
38
+ // run_view.turn to 0 — and the auto-failure hook then derives
39
+ // log://turn_0/error/... for failures that actually happened on
40
+ // turn N.
41
+ const turnMatch = path.match(/^log:\/\/turn_(\d+)\//);
42
+ const turn = turnMatch ? Number(turnMatch[1]) : 0;
43
+
44
+ const runId = runRow.id;
45
+ const terminalState = exitCode === 0 ? "resolved" : "failed";
46
+ const terminalOutcome = exitCode === 0 ? null : `exit:${exitCode}`;
47
+
48
+ const channels = await entries.getEntriesByPattern(
49
+ runId,
50
+ `${dataBase}_*`,
51
+ null,
52
+ );
53
+ for (const ch of channels) {
54
+ await entries.set({
55
+ runId,
56
+ turn,
57
+ path: ch.path,
58
+ state: terminalState,
59
+ body: ch.body,
60
+ outcome: terminalOutcome,
61
+ });
62
+ }
63
+
64
+ const logEntry = await entries.getAttributes(runId, path);
65
+ let command = "";
66
+ if (logEntry?.command) command = logEntry.command;
67
+ else if (logEntry?.summary) command = logEntry.summary;
68
+ const channelSummary = channels
69
+ .map((c) => {
70
+ const size = c.body ? `${c.tokens} tokens` : "empty";
71
+ return `${c.path} (${size})`;
72
+ })
73
+ .join(", ");
74
+ const dur = duration ? ` (${duration})` : "";
75
+ const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
76
+ const body = `ran '${command}', ${exitLabel}${dur}. Output: ${channelSummary}`;
77
+ await entries.set({ runId, turn, path, state: "resolved", body });
78
+
79
+ if (!wake) return { channels: channels.length };
80
+
81
+ // Dormancy: any pending (100) or active (102) loop on the run blocks
82
+ // the wake — the active loop will see the new log entry on its next
83
+ // turn assembly and the producer doesn't owe it a fresh prompt.
84
+ const inflight = await db.get_pending_loops.all({ run_id: runId });
85
+ if (inflight.length > 0) return { channels: channels.length, woke: false };
86
+
87
+ // Mode for the wake loop: inherit from the latest completed loop on
88
+ // the run. Fresh runs without a completed loop don't get woken (the
89
+ // child closing before any loop terminated is a state we'd never
90
+ // reach in practice).
91
+ const latest = await db.get_latest_completed_loop.get({ run_id: runId });
92
+ if (!latest) return { channels: channels.length, woke: false };
93
+
94
+ await hooks.run.wake.emit({
95
+ runAlias: runRow.alias,
96
+ body: "Process complete",
97
+ mode: latest.mode,
98
+ });
99
+ return { channels: channels.length, woke: true };
100
+ }
@@ -1,4 +1,5 @@
1
1
  import { logPathToDataBase } from "../helpers.js";
2
+ import finalizeStream from "./finalize.js";
2
3
 
3
4
  // RPC plumbing that appends/terminates streaming data entries; see plugin README.
4
5
  export default class Stream {
@@ -59,55 +60,22 @@ export default class Stream {
59
60
  alias: params.run,
60
61
  });
61
62
  if (!runRow) throw new Error(`run not found: ${params.run}`);
62
- const runId = runRow.id;
63
63
 
64
64
  const { exit_code: exitCode = 0, duration = null } = params;
65
- const terminalState = exitCode === 0 ? "resolved" : "failed";
66
- const terminalOutcome = exitCode === 0 ? null : `exit:${exitCode}`;
67
-
68
- const dataBase = logPathToDataBase(params.path);
69
- if (!dataBase) {
70
- throw new Error(
71
- `path must be a log entry (log://turn_N/...); got: ${params.path}`,
72
- );
73
- }
74
- // Find all `{dataBase}_*` data entries (channels 1, 2, ...).
75
- const store = ctx.projectAgent.entries;
76
- const channels = await store.getEntriesByPattern(
77
- runId,
78
- `${dataBase}_*`,
79
- null,
80
- );
81
- for (const ch of channels) {
82
- await store.set({
83
- runId,
84
- path: ch.path,
85
- state: terminalState,
86
- body: ch.body,
87
- outcome: terminalOutcome,
88
- });
89
- }
90
-
91
- // One-line final stats for the log entry body.
92
- const logEntry = await store.getAttributes(runId, params.path);
93
- let command = "";
94
- if (logEntry?.command) command = logEntry.command;
95
- else if (logEntry?.summary) command = logEntry.summary;
96
- const channelSummary = channels
97
- .map((c) => {
98
- const size = c.body ? `${c.tokens} tokens` : "empty";
99
- return `${c.path} (${size})`;
100
- })
101
- .join(", ");
102
- const dur = duration ? ` (${duration})` : "";
103
- const exitLabel = exitCode === 0 ? "exit=0" : `exit=${exitCode}`;
104
- const body = `ran '${command}', ${exitLabel}${dur}. Output: ${channelSummary}`;
105
- await store.set({ runId, path: params.path, state: "resolved", body });
106
-
107
- return { ok: true, channels: channels.length };
65
+ const result = await finalizeStream({
66
+ db: ctx.db,
67
+ entries: ctx.projectAgent.entries,
68
+ hooks,
69
+ runRow,
70
+ path: params.path,
71
+ exitCode,
72
+ duration,
73
+ wake: true,
74
+ });
75
+ return { ok: true, ...result };
108
76
  },
109
77
  description:
110
- "Finalize a streaming producer. Transitions all `{path}_*` data channels to terminal status (200 on exit_code=0, 500 otherwise) and rewrites the log entry body with exit code, duration, and channel sizes.",
78
+ "Finalize a streaming producer. Transitions all `{path}_*` data channels to terminal status (200 on exit_code=0, 500 otherwise), rewrites the log entry body with exit code/duration/channel sizes, and wakes the run with a 'Process complete' prompt if it has gone dormant.",
111
79
  params: {
112
80
  run: "string — run alias",
113
81
  path: "string — log-entry path (log://turn_N/{action}/{slug}); server derives the data channel path",
@@ -206,6 +206,18 @@ export default class Telemetry {
206
206
  if (usage.prompt_tokens) actualContextTokens = usage.prompt_tokens;
207
207
  else if (assembledTokens) actualContextTokens = assembledTokens;
208
208
  const numberOrZero = (v) => (typeof v === "number" ? v : 0);
209
+ // Forensic metadata blob — everything the provider sent that
210
+ // isn't content/reasoning_content (those live elsewhere) or
211
+ // already-derived columns (token counts, cost). Catches
212
+ // finish_reason, system_fingerprint, response id, service_tier,
213
+ // raw usage, and any provider-specific fields that may light up
214
+ // future investigations. JSON column tolerates shape drift.
215
+ const responseMetadata = {
216
+ finish_reason: result.choices[0].finish_reason,
217
+ model_returned: result.model,
218
+ usage: result.usage,
219
+ ...result.chunkMetadata,
220
+ };
209
221
  await rummy.entries.updateTurnStats({
210
222
  id: rummy.turnId,
211
223
  context_tokens: actualContextTokens,
@@ -217,12 +229,23 @@ export default class Telemetry {
217
229
  completion_tokens: numberOrZero(usage.completion_tokens),
218
230
  reasoning_tokens: reasoningTokens,
219
231
  total_tokens: numberOrZero(usage.total_tokens),
220
- // usage.cost is what the relay BILLED us; it reads 0 when routed
221
- // via BYOK (relay didn't bill upstream charged our key directly).
222
- // upstream_inference_cost is the true compute cost in either case.
232
+ // Cost surfaces under different field names by provider:
233
+ // - OpenRouter direct: `usage.cost` (USD, what the relay billed us)
234
+ // - OpenRouter BYOK: `usage.cost.upstream_inference_cost` (USD,
235
+ // relay didn't bill — upstream charged our key directly, so
236
+ // `usage.cost` is 0 and the true compute cost lives here).
237
+ // - xAI direct: `usage.cost_in_usd_ticks` where 1 tick = 10⁻¹⁰
238
+ // USD (verified empirically: 11 uncached + 161 cached + 1
239
+ // output tokens → 107,500 ticks → $0.00001075 at xAI's
240
+ // $0.20/M input, $0.05/M cached, $0.50/M output rates).
241
+ // Divide by 1e10 to land in USD alongside the others.
242
+ // All three normalized to USD; downstream summaries sum them
243
+ // as comparable dollars.
223
244
  cost:
224
245
  numberOrZero(usage.cost) ||
225
- numberOrZero(usage.cost_details?.upstream_inference_cost),
246
+ numberOrZero(usage.cost_details?.upstream_inference_cost) ||
247
+ numberOrZero(usage.cost_in_usd_ticks) / 1e10,
248
+ response_metadata: JSON.stringify(responseMetadata),
226
249
  });
227
250
  }
228
251
 
@@ -1,12 +1,11 @@
1
- import config from "../../agent/config.js";
2
1
  import docs from "./thinkDoc.js";
3
2
 
4
- const { THINK } = config;
3
+ const THINK = process.env.RUMMY_THINK === "1";
5
4
 
6
5
  export default class Think {
7
6
  constructor(core) {
8
7
  core.registerScheme({ modelVisible: 0, category: "logging" });
9
- if (THINK === "1") {
8
+ if (THINK) {
10
9
  core.ensureTool();
11
10
  core.filter("instructions.toolDocs", async (docsMap) => {
12
11
  docsMap.think = docs;
@@ -1,7 +1,5 @@
1
1
  ## <think>[reasoning]</think> - Think before acting
2
2
 
3
- * Use <think></think> before any other tools to plan your approach
4
- <!-- Positioning: think first, then act. Prevents degenerate tool-call storms. -->
3
+ Example: <think>Plan: <search> for X; <get> the top-ranked result; distill into known://Y.</think>
5
4
 
6
- * Reasoning inside <think></think> is private — it does not appear in your context
7
- <!-- Frees the model to reason without consuming context budget. -->
5
+ * Reasoning inside <think></think> is private — it does not appear in your context.
@@ -9,7 +9,7 @@ The Rumsfeld mechanism. The model registers what it doesn't know before acting.
9
9
  - **Tool**: `unknown`
10
10
  - **Category**: `unknown`
11
11
  - **Handler**: None — recorded by TurnExecutor, deduplicated against existing unknowns.
12
- - **Filter**: `assembly.user` at priority 200 — renders `<unknowns>` adjacent to `<prompt>` (priority 300), after `<performed>` (priority 100). Unknowns are active work, not stable environment state; they belong in the user packet.
12
+ - **Filter**: `assembly.user` at priority 150 — renders `<unknowns>` after `<log>` (priority 100) and before `<instructions>` (priority 165) in the sandwich. Unknowns are active work, not stable environment state; they belong in the user packet.
13
13
 
14
14
  ## Projection
15
15
 
@@ -1,3 +1,5 @@
1
+ import { renderEntry, SUMMARY_MAX_CHARS } from "../helpers.js";
2
+
1
3
  export default class Unknown {
2
4
  constructor(core) {
3
5
  core.ensureTool();
@@ -7,7 +9,10 @@ export default class Unknown {
7
9
  core.on("handler", this.handler.bind(this));
8
10
  core.on("visible", this.full.bind(this));
9
11
  core.on("summarized", this.summary.bind(this));
10
- core.filter("assembly.user", this.assembleUnknowns.bind(this), 150);
12
+ core.filter("assembly.user", this.assembleUnknowns.bind(this), 175);
13
+ // Hidden from the advertised tool list — the model writes unknowns
14
+ // via <set path="unknown://..."/>. The unknown:// scheme lifecycle
15
+ // is taught in instructions-user.md, not in a separate tooldoc.
11
16
  core.markHidden();
12
17
  }
13
18
 
@@ -28,12 +33,12 @@ export default class Unknown {
28
33
  return;
29
34
  }
30
35
 
31
- // summary > body for slug; lets the model round-trip via <get>.
36
+ // tags > body for slug; lets the model round-trip via <get>.
32
37
  const unknownPath = await store.slugPath(
33
38
  runId,
34
39
  "unknown",
35
40
  entry.body,
36
- entry.attributes?.summary,
41
+ entry.attributes?.tags,
37
42
  );
38
43
  await store.set({
39
44
  runId,
@@ -49,11 +54,10 @@ export default class Unknown {
49
54
  return entry.body;
50
55
  }
51
56
 
52
- // First 500 chars; matches knowns/prompt summarized.
57
+ // First SUMMARY_MAX_CHARS of the body. Matches <known> / <prompt>.
53
58
  summary(entry) {
54
59
  if (!entry.body) return "";
55
- if (entry.body.length <= 500) return entry.body;
56
- return `${entry.body.slice(0, 500)}\n[truncated — promote to see the full question]`;
60
+ return entry.body.slice(0, SUMMARY_MAX_CHARS);
57
61
  }
58
62
 
59
63
  async assembleUnknowns(content, ctx) {
@@ -69,18 +73,12 @@ function renderUnknownTag(entry) {
69
73
  typeof entry.attributes === "string"
70
74
  ? JSON.parse(entry.attributes)
71
75
  : entry.attributes;
72
- const turn = entry.source_turn ? ` turn="${entry.source_turn}"` : "";
73
- const visibility = entry.visibility
74
- ? ` visibility="${entry.visibility}"`
75
- : "";
76
- const tokens = entry.aTokens != null ? ` tokens="${entry.aTokens}"` : "";
77
- const summary =
78
- typeof attrs?.summary === "string"
79
- ? ` summary="${attrs.summary.replace(/"/g, "'").slice(0, 80)}"`
80
- : "";
81
- const attrStr = `${turn}${summary}${visibility}${tokens}`;
82
- if (entry.body) {
83
- return `<unknown path="${entry.path}"${attrStr}>${entry.body}</unknown>`;
76
+ const meta = {};
77
+ if (entry.source_turn) meta.turn = entry.source_turn;
78
+ if (typeof attrs?.tags === "string") {
79
+ meta.tags = attrs.tags.slice(0, 80);
84
80
  }
85
- return `<unknown path="${entry.path}"${attrStr}/>`;
81
+ if (entry.visibility) meta.visibility = entry.visibility;
82
+ if (entry.aTokens != null) meta.tokens = entry.aTokens;
83
+ return renderEntry(entry.path, meta, entry.body);
86
84
  }
@@ -1,16 +1,8 @@
1
1
  import docs from "./updateDoc.js";
2
2
 
3
- const TERMINAL_STATUSES = new Set([200, 204, 422, 500]);
4
-
5
3
  const CONTRACT_REMINDER = "Missing update";
6
4
 
7
- const EMPTY_RESPONSE_REMINDER =
8
- "Response empty - Update with status 500 if unable to fulfill request.";
9
-
10
- function isValidStatus(status) {
11
- if (TERMINAL_STATUSES.has(status)) return true;
12
- return Number.isInteger(status) && status >= 100 && status < 200;
13
- }
5
+ const EMPTY_RESPONSE_REMINDER = "Response empty";
14
6
 
15
7
  export default class Update {
16
8
  #core;
@@ -32,53 +24,14 @@ export default class Update {
32
24
  }
33
25
 
34
26
  async handler(entry, rummy) {
35
- const { entries: store, sequence: turn, runId, loopId } = rummy;
36
- const status = entry.attributes?.status ?? 102;
37
- const validation = await rummy.hooks.instructions.validateNavigation(
38
- status,
39
- rummy,
40
- );
41
- if (!validation.ok) {
42
- entry.state = "failed";
43
- entry.outcome = "invalid_navigation";
44
- entry.body = validation.reason;
45
- await store.set({
46
- runId,
47
- turn,
48
- loopId,
49
- path: entry.resultPath,
50
- body: validation.reason,
51
- state: "failed",
52
- outcome: "invalid_navigation",
53
- attributes: { status },
54
- });
55
- return;
56
- }
57
- if (!isValidStatus(status)) {
58
- entry.state = "failed";
59
- entry.outcome = "invalid_status";
60
- const message = `Invalid status ${status} on update — use 1xx to continue or 200 to conclude.`;
61
- entry.body = message;
62
- await store.set({
63
- runId,
64
- turn,
65
- loopId,
66
- path: entry.resultPath,
67
- body: message,
68
- state: "failed",
69
- outcome: "invalid_status",
70
- attributes: { status },
71
- });
72
- return;
73
- }
74
- await rummy.update(entry.body, { status });
27
+ await rummy.update(entry.body, { status: entry.attributes?.status });
75
28
  }
76
29
 
77
30
  async resolve({ recorded, content, runId, turn, loopId, rummy }) {
78
31
  const entry = recorded.findLast((e) => e.scheme === "update");
79
- const status = entry?.attributes?.status ?? 102;
32
+ const status = entry?.attributes?.status;
80
33
  const failed = entry?.state === "failed";
81
- const isTerminal = TERMINAL_STATUSES.has(status) && !failed;
34
+ const isTerminal = status === 200 && !failed;
82
35
  let summaryText = null;
83
36
  let updateText = null;
84
37
  if (entry?.body && !failed) {
@@ -1,8 +1,23 @@
1
- ## <update status="N">{brief status}</update> - Report turn status (exactly one per turn, at the end)
2
- <!-- Header defines position, frequency, and status code requirement. -->
1
+ ## <update status="N">{ direct answer or one-line summary }</update> - Turn termination
3
2
 
4
- YOU MUST refer to your current stage instructions for valid values of N.
5
- <!-- Single source of truth for codes is the current phase instructions block, not this doc. Listing codes here leaks termination knowledge (e.g. 200) that strong models use to short-circuit the protocol. -->
3
+ YOU MUST conclude every turn with one (and only one) <update status="N"></update>.
4
+ YOU MUST keep the update body to <= 80 characters.
5
+ YOU MUST use status 102 for continuation and 200 for final delivery.
6
6
 
7
- YOU MUST keep <update></update> body to <= 80 characters.
8
- <!-- Length cap. -->
7
+ Example:
8
+ { demote irrelevant source entries and log entries }
9
+ <set path="known://plan"><<SEARCH
10
+ - [ ] Distill geography unknowns
11
+ SEARCH
12
+ <<REPLACE
13
+ - [x] Distill geography unknowns
14
+ REPLACE</set>
15
+ <update status="102">distilled three unknowns into known://trivia/geography/capitals</update>
16
+ Example:
17
+ <set path="known://plan"><<SEARCH
18
+ - [ ] Deliver direct answer
19
+ SEARCH
20
+ <<REPLACE
21
+ - [x] Deliver direct answer
22
+ REPLACE</set>
23
+ <update status="200">Paris</update>
@@ -1,12 +1,28 @@
1
- import config from "../../agent/config.js";
2
1
  import msg from "../../agent/messages.js";
3
- import { parseRetryAfter } from "../../llm/errors.js";
2
+ import { chatCompletionStream } from "../../llm/openaiStream.js";
4
3
 
5
- const { FETCH_TIMEOUT } = config;
4
+ const FETCH_TIMEOUT = Number(process.env.RUMMY_FETCH_TIMEOUT);
5
+
6
+ // reasoning_effort takes low|medium|high|none. Models that don't support
7
+ // the parameter reject the request with 400, so the env knob is opt-in:
8
+ // set it only on profiles targeting a model that accepts it.
9
+ const REASONING_EFFORT = process.env.RUMMY_REASONING_EFFORT;
6
10
 
7
11
  const PROVIDER = "xai";
8
12
 
9
- // Inert unless XAI_BASE_URL set; xai/{model} aliases; normalizes to OpenAI envelope.
13
+ // Inert unless XAI_BASE_URL set; xai/{model} aliases.
14
+ //
15
+ // XAI_BASE_URL points at xAI's v1 root (e.g. https://api.x.ai/v1).
16
+ // We POST to {base}/chat/completions and stream the response via the
17
+ // shared OpenAI-compatible client — this is the path that surfaces
18
+ // reasoning_content deltas. The /v1/responses endpoint is xAI's newer
19
+ // API but its non-streaming output drops reasoning content (we still
20
+ // pay for it via reasoning_tokens; we just never see it). Streaming on
21
+ // /v1/responses uses a different event shape that our shared stream
22
+ // client doesn't speak. So we use /v1/chat/completions: caching is
23
+ // preserved via the `x-grok-conv-id` header (xAI's chat-completions
24
+ // equivalent of the /v1/responses `prompt_cache_key` body field).
25
+ // See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
10
26
  export default class Xai {
11
27
  #baseUrl;
12
28
  #apiKey;
@@ -15,7 +31,22 @@ export default class Xai {
15
31
  constructor(core) {
16
32
  const baseUrl = process.env.XAI_BASE_URL;
17
33
  if (!baseUrl) return;
18
- this.#baseUrl = baseUrl;
34
+ this.#baseUrl = baseUrl.replace(/\/$/, "");
35
+ // Fail-fast on the legacy `/v1/responses` endpoint (used in earlier
36
+ // rummy versions before we switched to streaming /chat/completions).
37
+ // Composing `${baseUrl}/chat/completions` against a stale shell
38
+ // `XAI_BASE_URL=https://api.x.ai/v1/responses` produces a 404 route
39
+ // that escapes to AgentLoop's outer catch and 500-storms a sweep
40
+ // silently. Throwing at construction surfaces the env trap before
41
+ // any task starts (verified pathology: 2026-05-01 sweep, 31/31
42
+ // status=500). xAI's API root ends in `/v1`; anything else is wrong.
43
+ if (!/\/v1$/.test(this.#baseUrl)) {
44
+ throw new Error(
45
+ `XAI_BASE_URL must be the API root ending in /v1 (got "${this.#baseUrl}"). ` +
46
+ "Likely a stale shell env from earlier /v1/responses usage; " +
47
+ "set XAI_BASE_URL=https://api.x.ai/v1 (or the relevant proxy root).",
48
+ );
49
+ }
19
50
  this.#apiKey = process.env.XAI_API_KEY;
20
51
 
21
52
  const wireModel = (alias) => alias.split("/").slice(1).join("/");
@@ -32,119 +63,57 @@ export default class Xai {
32
63
  async #completion(messages, model, options = {}) {
33
64
  if (!this.#apiKey) throw new Error(msg("error.xai_api_key_missing"));
34
65
 
35
- const body = { model, input: messages };
66
+ const body = { model, messages };
67
+ if (options.maxTokens !== undefined) body.max_tokens = options.maxTokens;
36
68
  if (options.temperature !== undefined)
37
69
  body.temperature = options.temperature;
38
- // xAI auto-caches per-server; stable prompt_cache_key keeps a multi-
39
- // turn run pinned to the same backend so the cached prefix actually
40
- // hits. Without this, requests load-balance and cache_tokens stays
41
- // near-zero. See https://docs.x.ai/developers/advanced-api-usage/prompt-caching.
42
- if (options.runAlias) body.prompt_cache_key = options.runAlias;
70
+ if (REASONING_EFFORT) body.reasoning_effort = REASONING_EFFORT;
43
71
 
44
72
  const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT);
45
73
  const signal = options.signal
46
74
  ? AbortSignal.any([options.signal, timeoutSignal])
47
75
  : timeoutSignal;
48
76
 
49
- const response = await fetch(this.#baseUrl, {
50
- method: "POST",
51
- headers: {
52
- Authorization: `Bearer ${this.#apiKey}`,
53
- "Content-Type": "application/json",
54
- },
55
- body: JSON.stringify(body),
56
- signal,
57
- });
58
-
59
- if (!response.ok) {
60
- const errorBody = await response.text();
61
- const retryAfter = parseRetryAfter(response.headers.get("retry-after"));
62
- if (response.status === 401 || response.status === 403) {
63
- const err = new Error(
77
+ const headers = {
78
+ Authorization: `Bearer ${this.#apiKey}`,
79
+ };
80
+ // Pin caching to the run alias. xAI's chat-completions cache is
81
+ // per-server; same conv-id routes to the same backend, which is
82
+ // where the cached prefix lives. Without this, requests load-
83
+ // balance across servers and cached_tokens stays near zero.
84
+ if (options.runAlias) headers["x-grok-conv-id"] = options.runAlias;
85
+
86
+ try {
87
+ return await chatCompletionStream({
88
+ url: `${this.#baseUrl}/chat/completions`,
89
+ headers,
90
+ body,
91
+ signal,
92
+ });
93
+ } catch (err) {
94
+ if (err.status === 401 || err.status === 403) {
95
+ throw new Error(
64
96
  msg("error.xai_auth", {
65
- status: `${response.status} - ${errorBody}`,
97
+ status: `${err.status} - ${err.body}`,
66
98
  }),
67
99
  );
68
- err.status = response.status;
69
- err.body = errorBody;
70
- throw err;
71
100
  }
72
- const err = new Error(
73
- msg("error.xai_api", {
74
- status: `${response.status} - ${errorBody}`,
75
- }),
76
- );
77
- err.status = response.status;
78
- err.body = errorBody;
79
- err.retryAfter = retryAfter;
80
- throw err;
81
- }
82
-
83
- return this.#normalize(await response.json());
84
- }
85
-
86
- #normalize(data) {
87
- let content = "";
88
- let reasoningContent = null;
89
-
90
- for (const item of data.output) {
91
- if (item.type === "reasoning") {
92
- const text = this.#extractText(item.content);
93
- if (text)
94
- reasoningContent = reasoningContent
95
- ? `${reasoningContent}\n${text}`
96
- : text;
97
- }
98
- if (item.type === "message") {
99
- const text = this.#extractText(item.content);
100
- if (text) content = content ? `${content}\n${text}` : text;
101
+ if (err.status) {
102
+ throw new Error(
103
+ msg("error.xai_api", {
104
+ status: `${err.status} - ${err.body}`,
105
+ }),
106
+ );
101
107
  }
108
+ throw err;
102
109
  }
103
-
104
- const { usage } = data;
105
- const inputTokens = usage.input_tokens;
106
- const outputTokens = usage.output_tokens;
107
- // Optional per xAI API; absent on providers that don't surface them.
108
- const cached = usage.input_tokens_details?.cached_tokens;
109
- const reasoningTokens = usage.output_tokens_details?.reasoning_tokens;
110
- const costTicks = usage.cost_in_usd_ticks;
111
- return {
112
- choices: [
113
- {
114
- message: {
115
- role: "assistant",
116
- content,
117
- reasoning_content: reasoningContent,
118
- },
119
- },
120
- ],
121
- usage: {
122
- prompt_tokens: inputTokens,
123
- cached_tokens: cached === undefined ? 0 : cached,
124
- completion_tokens: outputTokens,
125
- reasoning_tokens: reasoningTokens === undefined ? 0 : reasoningTokens,
126
- total_tokens: inputTokens + outputTokens,
127
- cost: costTicks === undefined ? 0 : costTicks / 10_000_000_000,
128
- },
129
- };
130
- }
131
-
132
- #extractText(content) {
133
- if (typeof content === "string") return content;
134
- if (!Array.isArray(content)) return null;
135
- const joined = content
136
- .filter((c) => c.type === "text" || c.type === "output_text")
137
- .map((c) => c.text)
138
- .join("\n");
139
- return joined ? joined : null;
140
110
  }
141
111
 
142
112
  async #getContextSize(model) {
143
113
  if (this.#contextCache.has(model)) return this.#contextCache.get(model);
144
114
  if (!this.#apiKey) throw new Error(msg("error.xai_api_key_missing"));
145
115
 
146
- const modelsUrl = this.#baseUrl.replace(/\/responses$/, "/models");
147
- const res = await fetch(modelsUrl, {
116
+ const res = await fetch(`${this.#baseUrl}/models`, {
148
117
  headers: { Authorization: `Bearer ${this.#apiKey}` },
149
118
  signal: AbortSignal.timeout(FETCH_TIMEOUT),
150
119
  });
@@ -164,10 +133,7 @@ export default class Xai {
164
133
  }
165
134
  }
166
135
 
167
- const langUrl = this.#baseUrl.replace(
168
- /\/responses$/,
169
- `/language-models/${model}`,
170
- );
136
+ const langUrl = `${this.#baseUrl}/language-models/${model}`;
171
137
  // Optional probe; failure falls through to terminal throw below.
172
138
  const langRes = await fetch(langUrl, {
173
139
  headers: { Authorization: `Bearer ${this.#apiKey}` },