@possumtech/rummy 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.env.example +12 -7
  2. package/BENCH_ENVIRONMENT.md +230 -0
  3. package/CLIENT_INTERFACE.md +396 -0
  4. package/PLUGINS.md +93 -1
  5. package/SPEC.md +305 -28
  6. package/bin/postinstall.js +2 -2
  7. package/bin/rummy.js +2 -2
  8. package/last_run.txt +5617 -0
  9. package/migrations/001_initial_schema.sql +2 -1
  10. package/package.json +6 -2
  11. package/scriptify/cache_probe.js +66 -0
  12. package/scriptify/cache_probe_grok.js +74 -0
  13. package/service.js +22 -11
  14. package/src/agent/AgentLoop.js +33 -139
  15. package/src/agent/ContextAssembler.js +2 -9
  16. package/src/agent/Entries.js +36 -101
  17. package/src/agent/ProjectAgent.js +2 -9
  18. package/src/agent/TurnExecutor.js +45 -83
  19. package/src/agent/XmlParser.js +247 -273
  20. package/src/agent/budget.js +5 -28
  21. package/src/agent/config.js +38 -0
  22. package/src/agent/errors.js +7 -13
  23. package/src/agent/httpStatus.js +1 -19
  24. package/src/agent/known_store.sql +7 -2
  25. package/src/agent/materializeContext.js +12 -17
  26. package/src/agent/pathEncode.js +5 -0
  27. package/src/agent/rummyHome.js +9 -0
  28. package/src/agent/runs.sql +18 -0
  29. package/src/agent/tokens.js +2 -8
  30. package/src/hooks/HookRegistry.js +1 -16
  31. package/src/hooks/Hooks.js +8 -33
  32. package/src/hooks/PluginContext.js +3 -21
  33. package/src/hooks/RpcRegistry.js +1 -4
  34. package/src/hooks/RummyContext.js +2 -16
  35. package/src/hooks/ToolRegistry.js +5 -15
  36. package/src/llm/LlmProvider.js +28 -23
  37. package/src/llm/errors.js +41 -4
  38. package/src/llm/openaiStream.js +125 -0
  39. package/src/llm/retry.js +61 -15
  40. package/src/plugins/budget/budget.js +14 -81
  41. package/src/plugins/cli/README.md +87 -0
  42. package/src/plugins/cli/bin.js +61 -0
  43. package/src/plugins/cli/cli.js +120 -0
  44. package/src/plugins/env/README.md +2 -1
  45. package/src/plugins/env/env.js +4 -6
  46. package/src/plugins/env/envDoc.md +2 -2
  47. package/src/plugins/error/error.js +23 -23
  48. package/src/plugins/file/file.js +2 -22
  49. package/src/plugins/get/get.js +12 -34
  50. package/src/plugins/get/getDoc.md +5 -3
  51. package/src/plugins/hedberg/edits.js +1 -11
  52. package/src/plugins/hedberg/hedberg.js +3 -26
  53. package/src/plugins/hedberg/normalize.js +1 -5
  54. package/src/plugins/hedberg/patterns.js +4 -15
  55. package/src/plugins/hedberg/sed.js +1 -7
  56. package/src/plugins/helpers.js +28 -20
  57. package/src/plugins/index.js +25 -41
  58. package/src/plugins/instructions/README.md +18 -0
  59. package/src/plugins/instructions/instructions.js +13 -76
  60. package/src/plugins/instructions/instructions.md +19 -18
  61. package/src/plugins/instructions/instructions_104.md +5 -4
  62. package/src/plugins/instructions/instructions_105.md +16 -15
  63. package/src/plugins/instructions/instructions_106.md +15 -14
  64. package/src/plugins/instructions/instructions_107.md +13 -6
  65. package/src/plugins/known/README.md +26 -6
  66. package/src/plugins/known/known.js +36 -34
  67. package/src/plugins/log/README.md +2 -2
  68. package/src/plugins/log/log.js +6 -33
  69. package/src/plugins/ollama/ollama.js +50 -66
  70. package/src/plugins/openai/openai.js +26 -44
  71. package/src/plugins/openrouter/openrouter.js +28 -52
  72. package/src/plugins/policy/README.md +8 -2
  73. package/src/plugins/policy/policy.js +8 -21
  74. package/src/plugins/prompt/README.md +22 -0
  75. package/src/plugins/prompt/prompt.js +8 -16
  76. package/src/plugins/rm/rm.js +5 -2
  77. package/src/plugins/rm/rmDoc.md +4 -4
  78. package/src/plugins/rpc/README.md +2 -1
  79. package/src/plugins/rpc/rpc.js +51 -47
  80. package/src/plugins/set/README.md +5 -1
  81. package/src/plugins/set/set.js +23 -33
  82. package/src/plugins/set/setDoc.md +1 -1
  83. package/src/plugins/sh/README.md +2 -1
  84. package/src/plugins/sh/sh.js +5 -11
  85. package/src/plugins/sh/shDoc.md +2 -2
  86. package/src/plugins/stream/README.md +6 -5
  87. package/src/plugins/stream/stream.js +6 -35
  88. package/src/plugins/telemetry/telemetry.js +26 -19
  89. package/src/plugins/think/think.js +4 -7
  90. package/src/plugins/unknown/unknown.js +8 -13
  91. package/src/plugins/update/update.js +36 -35
  92. package/src/plugins/update/updateDoc.md +3 -3
  93. package/src/plugins/xai/xai.js +30 -20
  94. package/src/plugins/yolo/yolo.js +8 -41
  95. package/src/server/ClientConnection.js +17 -47
  96. package/src/server/SocketServer.js +14 -14
  97. package/src/server/protocol.js +1 -10
  98. package/src/sql/functions/slugify.js +5 -7
  99. package/src/sql/v_model_context.sql +4 -11
  100. package/turns/cli_1777462658211/turn_001.txt +772 -0
  101. package/turns/cli_1777462658211/turn_002.txt +606 -0
  102. package/turns/cli_1777462658211/turn_003.txt +667 -0
  103. package/turns/cli_1777462658211/turn_004.txt +297 -0
  104. package/turns/cli_1777462658211/turn_005.txt +301 -0
  105. package/turns/cli_1777462658211/turn_006.txt +262 -0
  106. package/turns/cli_1777465095132/turn_001.txt +715 -0
  107. package/turns/cli_1777465095132/turn_002.txt +236 -0
  108. package/turns/cli_1777465095132/turn_003.txt +287 -0
  109. package/turns/cli_1777465095132/turn_004.txt +694 -0
  110. package/turns/cli_1777465095132/turn_005.txt +422 -0
  111. package/turns/cli_1777465095132/turn_006.txt +365 -0
  112. package/turns/cli_1777465095132/turn_007.txt +885 -0
  113. package/turns/cli_1777465095132/turn_008.txt +1277 -0
  114. package/turns/cli_1777465095132/turn_009.txt +736 -0
package/src/llm/errors.js CHANGED
@@ -13,9 +13,46 @@ export function isContextExceededMessage(message) {
13
13
  return CONTEXT_EXCEEDED_PATTERN.test(String(message));
14
14
  }
15
15
 
16
- const TRANSIENT_PATTERN =
17
- /\b(500|502|503|504|429|timeout|TimeoutError|aborted|unavailable|ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|EPIPE|ECONNABORTED|fetch failed)\b/i;
16
+ const ABORT_PATTERN = /\b(aborted|AbortError|TimeoutError)\b/;
17
+ // `terminated` is undici's err.message when the underlying socket closes
18
+ // mid-fetch (TLSSocket.onHttpSocketClose → Fetch.onAborted) — same lane
19
+ // as ECONNRESET, just surfaced through a streaming-fetch path.
20
+ const GATEWAY_PATTERN =
21
+ /\b(502|504|ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|EPIPE|ECONNABORTED|fetch failed|terminated)\b/i;
22
+ const RATE_LIMIT_PATTERN = /\b429\b/;
23
+ const STATUS_503_PATTERN = /\b503\b/;
24
+ const STATUS_500_PATTERN = /\b500\b/;
25
+ // llamacpp / OpenAI-compatible servers signal model-warmup with this body.
26
+ const MODEL_WARMUP_PATTERN = /\bLoading model\b/i;
18
27
 
19
- export function isTransientMessage(message) {
20
- return TRANSIENT_PATTERN.test(String(message));
28
+ // Returns "gateway" | "warmup" | "rate_limit" | "server" | null.
29
+ // null = do not retry, propagate immediately. Operator/internal aborts,
30
+ // auth failures, malformed-request errors, unknown shapes all fall here.
31
+ export function classifyTransient(err) {
32
+ if (!err || typeof err.message !== "string") return null;
33
+ const { message } = err;
34
+
35
+ if (ABORT_PATTERN.test(message)) return null;
36
+ if (GATEWAY_PATTERN.test(message)) return "gateway";
37
+ if (RATE_LIMIT_PATTERN.test(message)) return "rate_limit";
38
+ if (STATUS_503_PATTERN.test(message)) {
39
+ // 503 + explicit warmup signal → wait it out.
40
+ if (MODEL_WARMUP_PATTERN.test(message)) return "warmup";
41
+ if (typeof err.body === "string" && MODEL_WARMUP_PATTERN.test(err.body)) {
42
+ return "warmup";
43
+ }
44
+ return "server";
45
+ }
46
+ if (STATUS_500_PATTERN.test(message)) return "server";
47
+ return null;
48
+ }
49
+
50
+ // HTTP Retry-After: integer seconds (most common form). Returns
51
+ // undefined for missing, malformed, or HTTP-date forms — callers
52
+ // fall through to backoff in those cases.
53
+ export function parseRetryAfter(value) {
54
+ if (!value) return undefined;
55
+ const seconds = Number(value);
56
+ if (Number.isFinite(seconds) && seconds >= 0) return seconds;
57
+ return undefined;
21
58
  }
@@ -0,0 +1,125 @@
1
+ import { parseRetryAfter } from "./errors.js";
2
+
3
+ /**
4
+ * Shared streaming client for OpenAI-compatible /chat/completions endpoints.
5
+ *
6
+ * Provider plugins (openai, openrouter, ollama) construct the request body
7
+ * and headers; this module handles the SSE parsing, accumulates deltas into
8
+ * a non-streaming-shape response, and surfaces errors with the same ergonomics
9
+ * as the previous fetch-then-json pattern.
10
+ *
11
+ * Streaming is preferred over non-streaming for two reasons:
12
+ *
13
+ * 1. Long-running completions through CDN proxies (e.g. Cloudflare's 100s
14
+ * origin-timeout) can't survive a non-streaming hold; streaming keeps
15
+ * the connection alive byte-by-byte.
16
+ * 2. Future UI surfaces ("thinking" displays) want the deltas live; a
17
+ * streaming-first plugin layer gives them a hook.
18
+ *
19
+ * The xAI Responses API (`/v1/responses`) uses a different streaming format
20
+ * and is out of scope for this client.
21
+ */
22
+
23
+ /**
24
+ * @param {Object} args
25
+ * @param {string} args.url Full POST URL (e.g. `${baseUrl}/v1/chat/completions`).
26
+ * @param {Object} args.headers Plugin-specific headers (Authorization, etc.).
27
+ * @param {Object} args.body Request body (without `stream` — added here).
28
+ * @param {AbortSignal} [args.signal] Cancellation signal.
29
+ * @returns {Promise<Object>} Non-streaming-shape response: `{ choices, usage, model }`.
30
+ * Throws on non-2xx with `err.status` and `err.body` populated.
31
+ */
32
+ export async function chatCompletionStream({ url, headers, body, signal }) {
33
+ const requestBody = {
34
+ ...body,
35
+ stream: true,
36
+ // Tells OpenAI / OpenAI-compatible servers to emit a final usage chunk.
37
+ stream_options: { include_usage: true },
38
+ };
39
+
40
+ const response = await fetch(url, {
41
+ method: "POST",
42
+ headers: { "Content-Type": "application/json", ...headers },
43
+ body: JSON.stringify(requestBody),
44
+ signal,
45
+ });
46
+
47
+ if (!response.ok) {
48
+ const errorBody = await response.text();
49
+ const err = new Error(`${response.status} - ${errorBody}`);
50
+ err.status = response.status;
51
+ err.body = errorBody;
52
+ err.retryAfter = parseRetryAfter(response.headers.get("retry-after"));
53
+ throw err;
54
+ }
55
+
56
+ const reader = response.body.getReader();
57
+ const decoder = new TextDecoder();
58
+
59
+ let buffer = "";
60
+ let content = "";
61
+ let reasoningContent = "";
62
+ let usage = null;
63
+ let model = null;
64
+ let finishReason = null;
65
+
66
+ while (true) {
67
+ const { done, value } = await reader.read();
68
+ if (done) break;
69
+ buffer += decoder.decode(value, { stream: true });
70
+
71
+ // SSE frames are separated by blank lines; within a frame, a `data:`
72
+ // line carries the JSON payload. Process complete lines and keep any
73
+ // trailing partial-line in the buffer for the next read.
74
+ const lines = buffer.split("\n");
75
+ buffer = lines.pop();
76
+
77
+ for (const rawLine of lines) {
78
+ const line = rawLine.trim();
79
+ if (!line.startsWith("data:")) continue;
80
+ const payload = line.slice(5).trimStart();
81
+ if (payload === "[DONE]" || payload === "") continue;
82
+
83
+ let chunk;
84
+ try {
85
+ chunk = JSON.parse(payload);
86
+ } catch {
87
+ continue;
88
+ }
89
+
90
+ if (chunk.model) model = chunk.model;
91
+ if (chunk.usage) usage = chunk.usage;
92
+
93
+ const choice = chunk.choices?.[0];
94
+ if (!choice) continue;
95
+ if (choice.finish_reason) finishReason = choice.finish_reason;
96
+
97
+ const delta = choice.delta;
98
+ if (!delta) continue;
99
+ if (typeof delta.content === "string") content += delta.content;
100
+ // Different providers surface reasoning under different field names.
101
+ // Concatenate any that show up.
102
+ if (typeof delta.reasoning_content === "string")
103
+ reasoningContent += delta.reasoning_content;
104
+ if (typeof delta.reasoning === "string")
105
+ reasoningContent += delta.reasoning;
106
+ if (typeof delta.thinking === "string")
107
+ reasoningContent += delta.thinking;
108
+ }
109
+ }
110
+
111
+ return {
112
+ model,
113
+ choices: [
114
+ {
115
+ message: {
116
+ role: "assistant",
117
+ content,
118
+ reasoning_content: reasoningContent,
119
+ },
120
+ finish_reason: finishReason,
121
+ },
122
+ ],
123
+ usage,
124
+ };
125
+ }
package/src/llm/retry.js CHANGED
@@ -1,18 +1,4 @@
1
- /**
2
- * Exponential backoff with full jitter, time-bounded.
3
- *
4
- * Calls `fn` until it returns a value, the deadline elapses, or a
5
- * non-retryable error is thrown. Between attempts, sleeps for a
6
- * random duration in [0, min(maxDelayMs, baseDelayMs * 2^attempt)).
7
- * Full jitter (AWS / Google Cloud pattern) prevents thundering-herd
8
- * synchronization across concurrent clients hitting the same API.
9
- *
10
- * Time-bounded, not count-bounded: a connect-level outage that
11
- * recovers in 4 minutes is invisible to the caller, but a persistent
12
- * outage fails after deadlineMs with a clear cause chain.
13
- *
14
- * Aborts immediately if the supplied AbortSignal fires — even mid-sleep.
15
- */
1
+ // Time-bounded exponential backoff with full jitter; mid-sleep AbortSignal-aware.
16
2
  export async function retryWithBackoff(
17
3
  fn,
18
4
  {
@@ -50,6 +36,66 @@ export async function retryWithBackoff(
50
36
  }
51
37
  }
52
38
 
39
+ // Per-category retry. Each category gets its own deadline budget; a
40
+ // category transition resets prior category state — the rationale being
41
+ // that seeing a different category proves upstream is alive in some way,
42
+ // so prior gateway/server storms aren't relevant to the new attempt.
43
+ // Honors err.retryAfter (seconds) as a delay floor for rate-limit hints.
44
+ export async function retryClassified(
45
+ fn,
46
+ { signal, classify, policies, onRetry } = {},
47
+ ) {
48
+ const state = new Map(); // category → { start: ms, attempts: number }
49
+ let lastCategory = null;
50
+
51
+ while (true) {
52
+ signal?.throwIfAborted();
53
+ try {
54
+ return await fn();
55
+ } catch (err) {
56
+ const category = classify(err);
57
+ if (!category) throw err;
58
+ const policy = policies[category];
59
+ if (!policy) {
60
+ throw new Error(
61
+ `retryClassified: no policy for category "${category}"`,
62
+ { cause: err },
63
+ );
64
+ }
65
+
66
+ if (lastCategory !== category) state.clear();
67
+ if (!state.has(category)) {
68
+ state.set(category, { start: Date.now(), attempts: 0 });
69
+ }
70
+ lastCategory = category;
71
+
72
+ const s = state.get(category);
73
+ const elapsedMs = Date.now() - s.start;
74
+ const remainingMs = policy.deadlineMs - elapsedMs;
75
+ if (remainingMs <= 0) {
76
+ throw new Error(
77
+ `${category} retry exhausted after ${Math.round(elapsedMs / 1000)}s; last error: ${err.message}`,
78
+ { cause: err },
79
+ );
80
+ }
81
+
82
+ const expCap = Math.min(
83
+ policy.maxDelayMs,
84
+ policy.baseDelayMs * 2 ** s.attempts,
85
+ );
86
+ const jittered = Math.floor(Math.random() * expCap);
87
+ const delayMs =
88
+ err.retryAfter !== undefined
89
+ ? Math.min(remainingMs, Math.max(err.retryAfter * 1000, jittered))
90
+ : Math.min(remainingMs, jittered);
91
+
92
+ onRetry?.(err, category, s.attempts + 1, delayMs, remainingMs);
93
+ await sleep(delayMs, signal);
94
+ s.attempts++;
95
+ }
96
+ }
97
+ }
98
+
53
99
  function sleep(ms, signal) {
54
100
  return new Promise((resolve, reject) => {
55
101
  const t = setTimeout(resolve, ms);
@@ -2,15 +2,7 @@ import { ceiling, computeBudget, measureMessages } from "../../agent/budget.js";
2
2
  import materializeContext from "../../agent/materializeContext.js";
3
3
  import { countTokens } from "../../agent/tokens.js";
4
4
 
5
- /**
6
- * Delta-from-actual baseline. The pre-call <prompt tokenUsage> reports
7
- * the prior turn's actual API prompt_tokens; post-dispatch predicts
8
- * next turn's packet = this turn's actual tokens + tokens of new rows
9
- * written this turn. Keeps the 413 body on the same scale as the
10
- * model's <prompt> arithmetic — a 60% divergence between pre-call
11
- * (actual) and post-check (conservative estimator) makes the model
12
- * dismiss the system as janky and stop following rules.
13
- */
5
+ // Delta-from-actual; same scale as <prompt tokenUsage>. SPEC #budget_enforcement.
14
6
  function predictNextPacket(rows, currentTurn, baseline) {
15
7
  let delta = 0;
16
8
  for (const r of rows) {
@@ -19,13 +11,7 @@ function predictNextPacket(rows, currentTurn, baseline) {
19
11
  return baseline + delta;
20
12
  }
21
13
 
22
- /**
23
- * Format the 413 error body. Names each demoted path with its turn
24
- * and token count so the model can avoid re-promoting them next turn.
25
- * Exported (not private) so unit tests can assert the exact wire
26
- * format — the model reads this string, so its shape is part of the
27
- * contract.
28
- */
14
+ // 413 error body; wire format is part of the model contract.
29
15
  export function overflowBody(overflow, contextSize, demoted) {
30
16
  const cap = ceiling(contextSize);
31
17
  const size = cap + overflow;
@@ -50,35 +36,22 @@ export default class Budget {
50
36
  enforce: this.enforce.bind(this),
51
37
  postDispatch: this.postDispatch.bind(this),
52
38
  };
53
- core.filter("assembly.user", this.assembleBudget.bind(this), 275);
39
+ core.filter("assembly.user", this.assembleBudget.bind(this), 175);
54
40
  }
55
41
 
56
- /**
57
- * Render the <budget> table between <instructions> and <prompt>.
58
- * See SPEC @token_accounting for the contract: per-row tokens are
59
- * aTokens (the promotion premium = vTokens − sTokens), summarized
60
- * entries collapse into a single aggregate line, system overhead
61
- * (system prompt + tool defs) gets its own line.
62
- */
42
+ // Renders <budget> at priority 275; see SPEC #token_accounting.
63
43
  assembleBudget(content, ctx) {
64
44
  const { rows, contextSize, systemPrompt } = ctx;
65
45
  if (!contextSize) return content;
66
46
 
67
47
  const cap = ceiling(contextSize);
68
48
 
69
- // Per-scheme aggregation: counts and costs at each visibility tier
70
- // plus the savings (premium) the model would unlock by demoting
71
- // visible → summarized. All math derives from per-row vTokens
72
- // (cost as visible) / sTokens (cost as summarized) / aTokens
73
- // (= vTokens − sTokens, the promotion premium).
74
49
  const byScheme = new Map();
75
50
  let visibleCount = 0;
76
51
  let premiumTokens = 0;
77
52
  let summarizedCount = 0;
78
53
  let _summarizedTokens = 0;
79
54
  let floorTokens = 0;
80
- let knownVTokens = 0;
81
- let sourceVTokens = 0;
82
55
 
83
56
  const schemeEntry = (s) => {
84
57
  let e = byScheme.get(s);
@@ -102,36 +75,26 @@ export default class Budget {
102
75
  const entry = schemeEntry(s);
103
76
  if (r.visibility === "visible") {
104
77
  entry.vis += 1;
105
- entry.visTokens += r.vTokens || 0;
106
- entry.visIfSumTokens += r.sTokens || 0;
107
- entry.premium += r.aTokens || 0;
78
+ entry.visTokens += r.vTokens;
79
+ entry.visIfSumTokens += r.sTokens;
80
+ entry.premium += r.aTokens;
108
81
  visibleCount += 1;
109
82
  premiumTokens += r.aTokens;
110
83
  floorTokens += r.sTokens;
111
- const v = r.vTokens || 0;
112
- if (s === "known") knownVTokens += v;
113
- else if (s === "prompt") sourceVTokens += v;
114
- else if (r.category === "data") sourceVTokens += v;
115
84
  } else if (r.visibility === "summarized") {
116
85
  entry.sum += 1;
117
- entry.sumTokens += r.sTokens || 0;
86
+ entry.sumTokens += r.sTokens;
118
87
  summarizedCount += 1;
119
88
  _summarizedTokens += r.sTokens;
120
89
  floorTokens += r.sTokens;
121
90
  }
122
91
  }
123
92
 
124
- const fcrmDenom = knownVTokens + sourceVTokens;
125
- const fcrmScore =
126
- fcrmDenom > 0 ? (knownVTokens / fcrmDenom).toFixed(2) : "1.00";
127
-
128
- const systemTokens = countTokens(systemPrompt || "");
93
+ const systemTokens = countTokens(systemPrompt);
129
94
  const tokenUsage = floorTokens + premiumTokens + systemTokens;
130
95
  const tokensFree = Math.max(0, cap - tokenUsage);
131
96
 
132
- // Sort schemes by current cost descending biggest-impact rows
133
- // land at the top, so "what should I demote first?" reads
134
- // straight off the table.
97
+ // Sort by current cost desc so biggest-impact rows are top.
135
98
  const schemeRows = [...byScheme.entries()]
136
99
  .toSorted(
137
100
  ([, a], [, b]) =>
@@ -161,7 +124,7 @@ export default class Budget {
161
124
  "- premium: savings from demoting visible → summarized (cost − if-all-sum)",
162
125
  ].join("\n");
163
126
 
164
- return `${content}<budget tokenUsage="${tokenUsage}" tokensFree="${tokensFree}" fcrmScore="${fcrmScore}">\n${table}\n\n${legend}\n${systemLine}\n${totalLine}\n</budget>\n`;
127
+ return `${content}<budget tokenUsage="${tokenUsage}" tokensFree="${tokensFree}">\n${table}\n\n${legend}\n${systemLine}\n${totalLine}\n</budget>\n`;
165
128
  }
166
129
 
167
130
  #check({ contextSize, messages, rows, lastPromptTokens = 0 }) {
@@ -197,16 +160,7 @@ export default class Budget {
197
160
  });
198
161
  }
199
162
 
200
- /**
201
- * Pre-LLM budget enforcement. On first-turn overflow, demotes the
202
- * incoming prompt and re-materializes; re-checks and returns the
203
- * post-demotion result. If overflow persists after demotion (or on
204
- * later iterations), emits a 413 error (strike) and returns !ok so
205
- * TurnExecutor can skip the LLM call this turn.
206
- *
207
- * ctx = { runId, loopId, turn, systemPrompt, mode, toolSet, demoted,
208
- * loopIteration }
209
- */
163
+ // Pre-LLM enforce: SPEC #budget_enforcement.
210
164
  async enforce({
211
165
  contextSize,
212
166
  messages,
@@ -259,7 +213,6 @@ export default class Budget {
259
213
  mode: ctx.mode,
260
214
  toolSet: ctx.toolSet,
261
215
  contextSize,
262
- demoted: ctx.demoted,
263
216
  });
264
217
  const rechecked = this.#check({
265
218
  contextSize,
@@ -280,14 +233,7 @@ export default class Budget {
280
233
  return rechecked;
281
234
  }
282
235
 
283
- /**
284
- * Post-dispatch Turn Demotion. Re-materializes end-of-turn context and
285
- * checks against the ceiling. On overflow, demotes this turn's promoted
286
- * entries and emits a 413 error (strike) with the descriptive body so
287
- * the model sees it next turn via the unified error channel.
288
- *
289
- * ctx = { runId, loopId, turn, systemPrompt, mode, toolSet, demoted }
290
- */
236
+ // Post-dispatch Turn Demotion: SPEC #budget_enforcement.
291
237
  async postDispatch({ contextSize, ctx, rummy }) {
292
238
  if (!contextSize) return { failed: false };
293
239
  const postMat = await materializeContext({
@@ -300,13 +246,7 @@ export default class Budget {
300
246
  mode: ctx.mode,
301
247
  toolSet: ctx.toolSet,
302
248
  contextSize,
303
- demoted: ctx.demoted,
304
249
  });
305
- // Baseline from this turn's actual API tokens (telemetry wrote it
306
- // before post-dispatch runs). Delta from rows added this turn.
307
- // Predicted next-turn packet stays on the tokenUsage scale the
308
- // model can verify against its own arithmetic. materializeContext
309
- // guarantees a number (0 when no prior API call exists).
310
250
  const baseline = postMat.lastContextTokens;
311
251
  const predicted = predictNextPacket(postMat.rows, ctx.turn, baseline);
312
252
  const cap = ceiling(contextSize);
@@ -315,14 +255,7 @@ export default class Budget {
315
255
 
316
256
  const store = rummy.entries;
317
257
  let demotedEntries = await store.demoteTurnEntries(ctx.runId, ctx.turn);
318
- // Fallback: if this turn had nothing to demote but the packet still
319
- // overflows, the pressure is coming from prior-turn promotions the
320
- // model never demoted itself. Widen to all currently-visible
321
- // entries in the run. Without this fallback, overflow-with-nothing
322
- // strikes out runs where the base context has drifted over ceiling
323
- // through no fault of the current turn (observed: runs where 3
324
- // stale promotions from turns 12–14 saturate every subsequent
325
- // turn's budget).
258
+ // Prior-turn-pressure fallback; SPEC #budget_enforcement.
326
259
  if (demotedEntries.length === 0) {
327
260
  demotedEntries = await store.demoteRunVisibleEntries(ctx.runId);
328
261
  }
@@ -0,0 +1,87 @@
1
+ # cli
2
+
3
+ One-shot CLI client. Boots the service, runs a single `ask`/`act`,
4
+ prints the final summary to stdout, exits with code `0` on terminal
5
+ status `200` (non-zero otherwise). Server mode is unaffected — the
6
+ plugin is inert when `RUMMY_PROMPT` is unset.
7
+
8
+ ## Invocation
9
+
10
+ ```bash
11
+ rummy-cli --RUMMY_PROMPT="list files in /tmp" --RUMMY_MODEL=xfast
12
+ ```
13
+
14
+ All args are env-var-shape: `--KEY=value`, `--KEY value`, or `--KEY`
15
+ (boolean shorthand → `"1"`). Anything else is rejected with exit
16
+ code `2`. CLI flags trump every `.env*` file (Node's `loadEnvFile`
17
+ preserves existing vars).
18
+
19
+ ## Required env
20
+
21
+ | Var | Effect |
22
+ |---|---|
23
+ | `RUMMY_PROMPT` | Activates the plugin and supplies the instruction. |
24
+ | `RUMMY_MODEL` | Model alias (must match a registered `RUMMY_MODEL_<alias>`). |
25
+
26
+ ## Optional env
27
+
28
+ | Var | Default | Effect |
29
+ |---|---|---|
30
+ | `RUMMY_MODE` | `act` | `ask` or `act`. |
31
+
32
+ `RUMMY_RUN_TIMEOUT` is required at boot via `src/agent/config.js`;
33
+ default lives in `.env.example`. Watchdog exits with code `124` on
34
+ overflow.
35
+
36
+ Per-run defaults (`RUMMY_YOLO`, `RUMMY_NO_REPO`, `RUMMY_NO_WEB`,
37
+ `RUMMY_NO_INTERACTION`, `RUMMY_NO_PROPOSALS`) cascade through
38
+ `AgentLoop`'s boundary normalization — see `.env.example`.
39
+
40
+ ## Profile pattern
41
+
42
+ Layer profile-specific defaults via Node's `--env-file-if-exists`:
43
+
44
+ ```bash
45
+ node --env-file-if-exists=.env.example \
46
+ --env-file-if-exists=.env \
47
+ --env-file-if-exists=.env.tbench \
48
+ src/plugins/cli/bin.js \
49
+ --RUMMY_PROMPT="..." --RUMMY_MODEL=xfast
50
+ ```
51
+
52
+ A `.env.tbench` profile typically pins `RUMMY_YOLO=1`,
53
+ `RUMMY_NO_INTERACTION=1`, `RUMMY_NO_WEB=1`, plus model alias and
54
+ provider key. Bench harnesses call `rummy-cli` with just
55
+ `--RUMMY_PROMPT="..."` and let the profile carry the rest.
56
+
57
+ ## Exit codes
58
+
59
+ | Code | Meaning |
60
+ |---|---|
61
+ | `0` | Terminal status `200`. Model claimed success. |
62
+ | `1` | Terminal status in `{204, 413, 422, 499, 500}` or run crashed. |
63
+ | `2` | Arg parse error (invalid flag shape, missing required env). |
64
+ | `124` | Wall-clock timeout (`RUMMY_RUN_TIMEOUT` exceeded). |
65
+
66
+ External verifiers (terminal-bench, SWE-bench, etc.) decide actual
67
+ task success — the exit code only reports rummy's internal terminal
68
+ status.
69
+
70
+ ## Files
71
+
72
+ - **`cli.js`** — plugin class. Subscribes to `boot.completed`; on fire,
73
+ if `RUMMY_PROMPT` is set, constructs a `ProjectAgent`, kicks off
74
+ the run, awaits its terminal status, prints the latest update body,
75
+ exits.
76
+ - **`bin.js`** — executable. Parses env-shape args, mirrors
77
+ `bin/rummy.js`'s env-loading prelude, imports `service.js`.
78
+
79
+ ## Architectural notes
80
+
81
+ - The plugin uses the same `ProjectAgent` constructor as
82
+ `ClientConnection`. In CLI mode, `SocketServer` still starts (it's
83
+ cheap) — `process.exit()` from the plugin terminates everything.
84
+ - `core.on("boot.completed", ...)` is the plugin's only hook.
85
+ Subscribing earlier (e.g. constructor-time) would race plugin
86
+ registration order; `boot.completed` fires after all plugins are
87
+ inited and the DB is open.
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { existsSync } from "node:fs";
4
+ import { dirname, isAbsolute, join } from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+ import resolveRummyHome from "../../agent/rummyHome.js";
7
+
8
+ // Env-var-shape args: --KEY=value, --KEY value, or --KEY (→ "1").
9
+ const ENV_FLAG = /^--([A-Z][A-Z0-9_]*)(?:=([\s\S]*))?$/;
10
+
11
+ function parseEnvArgs(argv) {
12
+ const args = argv.slice(2);
13
+ let i = 0;
14
+ while (i < args.length) {
15
+ const m = args[i].match(ENV_FLAG);
16
+ if (!m) {
17
+ console.error(
18
+ `rummy-cli: unknown arg ${JSON.stringify(args[i])}. ` +
19
+ "All args must be --KEY=value, --KEY value, or --KEY (env-var-shape).",
20
+ );
21
+ process.exit(2);
22
+ }
23
+ const [, name, inline] = m;
24
+ if (inline !== undefined) {
25
+ process.env[name] = inline;
26
+ i += 1;
27
+ continue;
28
+ }
29
+ const next = args[i + 1];
30
+ if (next === undefined || next.startsWith("--")) {
31
+ process.env[name] = "1";
32
+ i += 1;
33
+ continue;
34
+ }
35
+ process.env[name] = next;
36
+ i += 2;
37
+ }
38
+ }
39
+
40
+ parseEnvArgs(process.argv);
41
+
42
+ // Same env cascade as bin/rummy.js; CLI flags trump because loadEnvFile preserves existing vars.
43
+ const __dirname = dirname(fileURLToPath(import.meta.url));
44
+ const packageRoot = join(__dirname, "../../..");
45
+ const rummyHome = resolveRummyHome();
46
+
47
+ const cwd = process.cwd();
48
+ const baseDir = existsSync(join(cwd, ".env.example")) ? cwd : rummyHome;
49
+ if (existsSync(join(baseDir, ".env.example"))) {
50
+ process.loadEnvFile(join(baseDir, ".env.example"));
51
+ }
52
+ const userEnv = join(baseDir, ".env");
53
+ if (existsSync(userEnv)) process.loadEnvFile(userEnv);
54
+
55
+ process.env.RUMMY_HOME = rummyHome;
56
+ const dbPath = process.env.RUMMY_DB_PATH;
57
+ if (dbPath && !isAbsolute(dbPath)) {
58
+ process.env.RUMMY_DB_PATH = join(rummyHome, dbPath);
59
+ }
60
+
61
+ await import(join(packageRoot, "service.js"));