@possumtech/rummy 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.env.example +31 -5
  2. package/BENCH_ENVIRONMENT.md +230 -0
  3. package/CLIENT_INTERFACE.md +396 -0
  4. package/PLUGINS.md +93 -1
  5. package/SPEC.md +389 -28
  6. package/bin/postinstall.js +2 -2
  7. package/bin/rummy.js +2 -2
  8. package/last_run.txt +5617 -0
  9. package/migrations/001_initial_schema.sql +2 -1
  10. package/package.json +13 -9
  11. package/scriptify/ask_run.js +77 -0
  12. package/scriptify/cache_probe.js +66 -0
  13. package/scriptify/cache_probe_grok.js +74 -0
  14. package/service.js +22 -11
  15. package/src/agent/AgentLoop.js +62 -157
  16. package/src/agent/ContextAssembler.js +2 -9
  17. package/src/agent/Entries.js +54 -98
  18. package/src/agent/ProjectAgent.js +4 -11
  19. package/src/agent/TurnExecutor.js +48 -83
  20. package/src/agent/XmlParser.js +247 -273
  21. package/src/agent/budget.js +5 -28
  22. package/src/agent/config.js +38 -0
  23. package/src/agent/errors.js +7 -13
  24. package/src/agent/httpStatus.js +1 -19
  25. package/src/agent/known_queries.sql +1 -1
  26. package/src/agent/known_store.sql +12 -2
  27. package/src/agent/materializeContext.js +15 -18
  28. package/src/agent/pathEncode.js +5 -0
  29. package/src/agent/rummyHome.js +9 -0
  30. package/src/agent/runs.sql +37 -0
  31. package/src/agent/tokens.js +7 -7
  32. package/src/hooks/HookRegistry.js +1 -16
  33. package/src/hooks/Hooks.js +8 -33
  34. package/src/hooks/PluginContext.js +3 -21
  35. package/src/hooks/RpcRegistry.js +1 -4
  36. package/src/hooks/RummyContext.js +6 -16
  37. package/src/hooks/ToolRegistry.js +5 -15
  38. package/src/llm/LlmProvider.js +41 -33
  39. package/src/llm/errors.js +41 -4
  40. package/src/llm/openaiStream.js +125 -0
  41. package/src/llm/retry.js +109 -0
  42. package/src/plugins/budget/budget.js +55 -76
  43. package/src/plugins/cli/README.md +87 -0
  44. package/src/plugins/cli/bin.js +61 -0
  45. package/src/plugins/cli/cli.js +120 -0
  46. package/src/plugins/env/README.md +2 -1
  47. package/src/plugins/env/env.js +4 -6
  48. package/src/plugins/env/envDoc.md +2 -2
  49. package/src/plugins/error/error.js +23 -23
  50. package/src/plugins/file/file.js +2 -22
  51. package/src/plugins/get/get.js +12 -34
  52. package/src/plugins/get/getDoc.md +8 -6
  53. package/src/plugins/hedberg/edits.js +1 -11
  54. package/src/plugins/hedberg/hedberg.js +3 -26
  55. package/src/plugins/hedberg/normalize.js +1 -5
  56. package/src/plugins/hedberg/patterns.js +4 -15
  57. package/src/plugins/hedberg/sed.js +1 -7
  58. package/src/plugins/helpers.js +28 -20
  59. package/src/plugins/index.js +25 -41
  60. package/src/plugins/instructions/README.md +18 -0
  61. package/src/plugins/instructions/instructions.js +97 -38
  62. package/src/plugins/instructions/instructions.md +24 -15
  63. package/src/plugins/instructions/instructions_104.md +5 -4
  64. package/src/plugins/instructions/instructions_105.md +29 -36
  65. package/src/plugins/instructions/instructions_106.md +22 -0
  66. package/src/plugins/instructions/instructions_107.md +17 -0
  67. package/src/plugins/instructions/instructions_108.md +0 -8
  68. package/src/plugins/known/README.md +26 -6
  69. package/src/plugins/known/known.js +37 -34
  70. package/src/plugins/log/README.md +2 -2
  71. package/src/plugins/log/log.js +27 -34
  72. package/src/plugins/ollama/ollama.js +50 -66
  73. package/src/plugins/openai/openai.js +26 -44
  74. package/src/plugins/openrouter/openrouter.js +28 -52
  75. package/src/plugins/policy/README.md +8 -2
  76. package/src/plugins/policy/policy.js +8 -21
  77. package/src/plugins/prompt/README.md +22 -0
  78. package/src/plugins/prompt/prompt.js +14 -16
  79. package/src/plugins/rm/rm.js +5 -2
  80. package/src/plugins/rm/rmDoc.md +4 -4
  81. package/src/plugins/rpc/README.md +2 -1
  82. package/src/plugins/rpc/rpc.js +62 -48
  83. package/src/plugins/set/README.md +5 -1
  84. package/src/plugins/set/set.js +23 -33
  85. package/src/plugins/set/setDoc.md +1 -1
  86. package/src/plugins/sh/README.md +2 -1
  87. package/src/plugins/sh/sh.js +5 -11
  88. package/src/plugins/sh/shDoc.md +2 -2
  89. package/src/plugins/stream/README.md +6 -5
  90. package/src/plugins/stream/stream.js +6 -35
  91. package/src/plugins/telemetry/telemetry.js +26 -19
  92. package/src/plugins/think/think.js +4 -7
  93. package/src/plugins/unknown/unknown.js +8 -13
  94. package/src/plugins/update/update.js +42 -25
  95. package/src/plugins/update/updateDoc.md +3 -3
  96. package/src/plugins/xai/xai.js +30 -20
  97. package/src/plugins/yolo/yolo.js +159 -0
  98. package/src/server/ClientConnection.js +17 -47
  99. package/src/server/SocketServer.js +14 -14
  100. package/src/server/protocol.js +1 -10
  101. package/src/sql/functions/slugify.js +5 -7
  102. package/src/sql/v_model_context.sql +4 -11
  103. package/turns/cli_1777462658211/turn_001.txt +772 -0
  104. package/turns/cli_1777462658211/turn_002.txt +606 -0
  105. package/turns/cli_1777462658211/turn_003.txt +667 -0
  106. package/turns/cli_1777462658211/turn_004.txt +297 -0
  107. package/turns/cli_1777462658211/turn_005.txt +301 -0
  108. package/turns/cli_1777462658211/turn_006.txt +262 -0
  109. package/turns/cli_1777465095132/turn_001.txt +715 -0
  110. package/turns/cli_1777465095132/turn_002.txt +236 -0
  111. package/turns/cli_1777465095132/turn_003.txt +287 -0
  112. package/turns/cli_1777465095132/turn_004.txt +694 -0
  113. package/turns/cli_1777465095132/turn_005.txt +422 -0
  114. package/turns/cli_1777465095132/turn_006.txt +365 -0
  115. package/turns/cli_1777465095132/turn_007.txt +885 -0
  116. package/turns/cli_1777465095132/turn_008.txt +1277 -0
  117. package/turns/cli_1777465095132/turn_009.txt +736 -0
@@ -1,23 +1,33 @@
1
+ import config from "../agent/config.js";
1
2
  import msg from "../agent/messages.js";
2
3
  import {
3
4
  ContextExceededError,
5
+ classifyTransient,
4
6
  isContextExceededMessage,
5
- isTransientMessage,
6
7
  } from "./errors.js";
8
+ import { retryClassified } from "./retry.js";
7
9
 
8
- const MAX_TRANSIENT_RETRIES = 3;
10
+ const { LLM_DEADLINE, LLM_MAX_BACKOFF } = config;
9
11
 
10
- /**
11
- * Thin dispatcher over the LLM provider registry (`hooks.llm.providers`).
12
- * Resolves the model alias via the DB, finds the highest-priority provider
13
- * whose `matches()` returns true, and delegates. Wraps the call with
14
- * transient-error retry and surfaces context-exceeded as a typed
15
- * ContextExceededError.
16
- *
17
- * Vendor-specific HTTP is owned by per-vendor plugins under
18
- * `src/plugins/{openai,ollama,xai,openrouter,...}/`. Adding a new vendor
19
- * is a matter of adding a plugin — no changes here.
20
- */
12
+ // Per-category retry policies. Gateway/server are bounded short because
13
+ // upstream-down won't recover by waiting; warmup/rate_limit get the full
14
+ // LLM deadline because they're recoverable wait states with knowable bounds.
15
+ const POLICIES = Object.freeze({
16
+ gateway: { deadlineMs: 30_000, baseDelayMs: 500, maxDelayMs: 5_000 },
17
+ warmup: {
18
+ deadlineMs: LLM_DEADLINE,
19
+ baseDelayMs: 2000,
20
+ maxDelayMs: LLM_MAX_BACKOFF,
21
+ },
22
+ rate_limit: {
23
+ deadlineMs: LLM_DEADLINE,
24
+ baseDelayMs: 1000,
25
+ maxDelayMs: LLM_MAX_BACKOFF,
26
+ },
27
+ server: { deadlineMs: 60_000, baseDelayMs: 1000, maxDelayMs: 10_000 },
28
+ });
29
+
30
+ // Dispatches to hooks.llm.providers; per-category transient retry; ContextExceededError surface.
21
31
  export default class LlmProvider {
22
32
  #db;
23
33
  #hooks;
@@ -55,27 +65,25 @@ export default class LlmProvider {
55
65
  );
56
66
  }
57
67
 
58
- for (let attempt = 0; ; attempt++) {
59
- try {
60
- return await provider.completion(
61
- messages,
62
- resolvedModel,
63
- resolvedOptions,
64
- );
65
- } catch (err) {
66
- if (isContextExceededMessage(err.message)) {
67
- throw new ContextExceededError(err.message, { cause: err });
68
- }
69
- if (
70
- isTransientMessage(err.message) &&
71
- attempt < MAX_TRANSIENT_RETRIES
72
- ) {
73
- const delay = 1000 * 2 ** attempt;
74
- await new Promise((r) => setTimeout(r, delay));
75
- continue;
76
- }
77
- throw err;
68
+ try {
69
+ return await retryClassified(
70
+ () => provider.completion(messages, resolvedModel, resolvedOptions),
71
+ {
72
+ signal: options.signal,
73
+ classify: classifyTransient,
74
+ policies: POLICIES,
75
+ onRetry: (err, category, attempt, delayMs, remainingMs) => {
76
+ console.error(
77
+ `[LLM] ${category} on ${provider.name} attempt ${attempt}: ${err.message}; retrying in ${delayMs}ms (${Math.round(remainingMs / 1000)}s ${category} budget remaining)`,
78
+ );
79
+ },
80
+ },
81
+ );
82
+ } catch (err) {
83
+ if (isContextExceededMessage(err.message)) {
84
+ throw new ContextExceededError(err.message, { cause: err });
78
85
  }
86
+ throw err;
79
87
  }
80
88
  }
81
89
 
package/src/llm/errors.js CHANGED
@@ -13,9 +13,46 @@ export function isContextExceededMessage(message) {
13
13
  return CONTEXT_EXCEEDED_PATTERN.test(String(message));
14
14
  }
15
15
 
16
- const TRANSIENT_PATTERN =
17
- /\b(503|429|timeout|ECONNREFUSED|ECONNRESET|unavailable)\b/i;
16
+ const ABORT_PATTERN = /\b(aborted|AbortError|TimeoutError)\b/;
17
+ // `terminated` is undici's err.message when the underlying socket closes
18
+ // mid-fetch (TLSSocket.onHttpSocketClose → Fetch.onAborted) — same lane
19
+ // as ECONNRESET, just surfaced through a streaming-fetch path.
20
+ const GATEWAY_PATTERN =
21
+ /\b(502|504|ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|EPIPE|ECONNABORTED|fetch failed|terminated)\b/i;
22
+ const RATE_LIMIT_PATTERN = /\b429\b/;
23
+ const STATUS_503_PATTERN = /\b503\b/;
24
+ const STATUS_500_PATTERN = /\b500\b/;
25
+ // llamacpp / OpenAI-compatible servers signal model-warmup with this body.
26
+ const MODEL_WARMUP_PATTERN = /\bLoading model\b/i;
18
27
 
19
- export function isTransientMessage(message) {
20
- return TRANSIENT_PATTERN.test(String(message));
28
+ // Returns "gateway" | "warmup" | "rate_limit" | "server" | null.
29
+ // null = do not retry, propagate immediately. Operator/internal aborts,
30
+ // auth failures, malformed-request errors, unknown shapes all fall here.
31
+ export function classifyTransient(err) {
32
+ if (!err || typeof err.message !== "string") return null;
33
+ const { message } = err;
34
+
35
+ if (ABORT_PATTERN.test(message)) return null;
36
+ if (GATEWAY_PATTERN.test(message)) return "gateway";
37
+ if (RATE_LIMIT_PATTERN.test(message)) return "rate_limit";
38
+ if (STATUS_503_PATTERN.test(message)) {
39
+ // 503 + explicit warmup signal → wait it out.
40
+ if (MODEL_WARMUP_PATTERN.test(message)) return "warmup";
41
+ if (typeof err.body === "string" && MODEL_WARMUP_PATTERN.test(err.body)) {
42
+ return "warmup";
43
+ }
44
+ return "server";
45
+ }
46
+ if (STATUS_500_PATTERN.test(message)) return "server";
47
+ return null;
48
+ }
49
+
50
+ // HTTP Retry-After: integer seconds (most common form). Returns
51
+ // undefined for missing, malformed, or HTTP-date forms — callers
52
+ // fall through to backoff in those cases.
53
+ export function parseRetryAfter(value) {
54
+ if (!value) return undefined;
55
+ const seconds = Number(value);
56
+ if (Number.isFinite(seconds) && seconds >= 0) return seconds;
57
+ return undefined;
21
58
  }
@@ -0,0 +1,125 @@
1
+ import { parseRetryAfter } from "./errors.js";
2
+
3
+ /**
4
+ * Shared streaming client for OpenAI-compatible /chat/completions endpoints.
5
+ *
6
+ * Provider plugins (openai, openrouter, ollama) construct the request body
7
+ * and headers; this module handles the SSE parsing, accumulates deltas into
8
+ * a non-streaming-shape response, and surfaces errors with the same ergonomics
9
+ * as the previous fetch-then-json pattern.
10
+ *
11
+ * Streaming is preferred over non-streaming for two reasons:
12
+ *
13
+ * 1. Long-running completions through CDN proxies (e.g. Cloudflare's 100s
14
+ * origin-timeout) can't survive a non-streaming hold; streaming keeps
15
+ * the connection alive byte-by-byte.
16
+ * 2. Future UI surfaces ("thinking" displays) want the deltas live; a
17
+ * streaming-first plugin layer gives them a hook.
18
+ *
19
+ * The xAI Responses API (`/v1/responses`) uses a different streaming format
20
+ * and is out of scope for this client.
21
+ */
22
+
23
+ /**
24
+ * @param {Object} args
25
+ * @param {string} args.url Full POST URL (e.g. `${baseUrl}/v1/chat/completions`).
26
+ * @param {Object} args.headers Plugin-specific headers (Authorization, etc.).
27
+ * @param {Object} args.body Request body (without `stream` — added here).
28
+ * @param {AbortSignal} [args.signal] Cancellation signal.
29
+ * @returns {Promise<Object>} Non-streaming-shape response: `{ choices, usage, model }`.
30
+ * Throws on non-2xx with `err.status` and `err.body` populated.
31
+ */
32
+ export async function chatCompletionStream({ url, headers, body, signal }) {
33
+ const requestBody = {
34
+ ...body,
35
+ stream: true,
36
+ // Tells OpenAI / OpenAI-compatible servers to emit a final usage chunk.
37
+ stream_options: { include_usage: true },
38
+ };
39
+
40
+ const response = await fetch(url, {
41
+ method: "POST",
42
+ headers: { "Content-Type": "application/json", ...headers },
43
+ body: JSON.stringify(requestBody),
44
+ signal,
45
+ });
46
+
47
+ if (!response.ok) {
48
+ const errorBody = await response.text();
49
+ const err = new Error(`${response.status} - ${errorBody}`);
50
+ err.status = response.status;
51
+ err.body = errorBody;
52
+ err.retryAfter = parseRetryAfter(response.headers.get("retry-after"));
53
+ throw err;
54
+ }
55
+
56
+ const reader = response.body.getReader();
57
+ const decoder = new TextDecoder();
58
+
59
+ let buffer = "";
60
+ let content = "";
61
+ let reasoningContent = "";
62
+ let usage = null;
63
+ let model = null;
64
+ let finishReason = null;
65
+
66
+ while (true) {
67
+ const { done, value } = await reader.read();
68
+ if (done) break;
69
+ buffer += decoder.decode(value, { stream: true });
70
+
71
+ // SSE frames are separated by blank lines; within a frame, a `data:`
72
+ // line carries the JSON payload. Process complete lines and keep any
73
+ // trailing partial-line in the buffer for the next read.
74
+ const lines = buffer.split("\n");
75
+ buffer = lines.pop();
76
+
77
+ for (const rawLine of lines) {
78
+ const line = rawLine.trim();
79
+ if (!line.startsWith("data:")) continue;
80
+ const payload = line.slice(5).trimStart();
81
+ if (payload === "[DONE]" || payload === "") continue;
82
+
83
+ let chunk;
84
+ try {
85
+ chunk = JSON.parse(payload);
86
+ } catch {
87
+ continue;
88
+ }
89
+
90
+ if (chunk.model) model = chunk.model;
91
+ if (chunk.usage) usage = chunk.usage;
92
+
93
+ const choice = chunk.choices?.[0];
94
+ if (!choice) continue;
95
+ if (choice.finish_reason) finishReason = choice.finish_reason;
96
+
97
+ const delta = choice.delta;
98
+ if (!delta) continue;
99
+ if (typeof delta.content === "string") content += delta.content;
100
+ // Different providers surface reasoning under different field names.
101
+ // Concatenate any that show up.
102
+ if (typeof delta.reasoning_content === "string")
103
+ reasoningContent += delta.reasoning_content;
104
+ if (typeof delta.reasoning === "string")
105
+ reasoningContent += delta.reasoning;
106
+ if (typeof delta.thinking === "string")
107
+ reasoningContent += delta.thinking;
108
+ }
109
+ }
110
+
111
+ return {
112
+ model,
113
+ choices: [
114
+ {
115
+ message: {
116
+ role: "assistant",
117
+ content,
118
+ reasoning_content: reasoningContent,
119
+ },
120
+ finish_reason: finishReason,
121
+ },
122
+ ],
123
+ usage,
124
+ };
125
+ }
@@ -0,0 +1,109 @@
1
+ // Time-bounded exponential backoff with full jitter; mid-sleep AbortSignal-aware.
2
+ export async function retryWithBackoff(
3
+ fn,
4
+ {
5
+ signal,
6
+ deadlineMs,
7
+ baseDelayMs = 1000,
8
+ maxDelayMs = 30_000,
9
+ isRetryable,
10
+ onRetry,
11
+ } = {},
12
+ ) {
13
+ const startTime = Date.now();
14
+ let attempt = 0;
15
+ while (true) {
16
+ signal?.throwIfAborted();
17
+ try {
18
+ return await fn();
19
+ } catch (err) {
20
+ if (!isRetryable(err)) throw err;
21
+ const elapsedMs = Date.now() - startTime;
22
+ const remainingMs = deadlineMs - elapsedMs;
23
+ if (remainingMs <= 0) {
24
+ throw new Error(
25
+ `transient failures persisted ${Math.round(elapsedMs / 1000)}s past deadline; last error: ${err.message}`,
26
+ { cause: err },
27
+ );
28
+ }
29
+ const expCap = Math.min(maxDelayMs, baseDelayMs * 2 ** attempt);
30
+ const jittered = Math.floor(Math.random() * expCap);
31
+ const delayMs = Math.min(remainingMs, jittered);
32
+ onRetry?.(err, attempt + 1, delayMs, remainingMs);
33
+ await sleep(delayMs, signal);
34
+ attempt++;
35
+ }
36
+ }
37
+ }
38
+
39
+ // Per-category retry. Each category gets its own deadline budget; a
40
+ // category transition resets prior category state — the rationale being
41
+ // that seeing a different category proves upstream is alive in some way,
42
+ // so prior gateway/server storms aren't relevant to the new attempt.
43
+ // Honors err.retryAfter (seconds) as a delay floor for rate-limit hints.
44
+ export async function retryClassified(
45
+ fn,
46
+ { signal, classify, policies, onRetry } = {},
47
+ ) {
48
+ const state = new Map(); // category → { start: ms, attempts: number }
49
+ let lastCategory = null;
50
+
51
+ while (true) {
52
+ signal?.throwIfAborted();
53
+ try {
54
+ return await fn();
55
+ } catch (err) {
56
+ const category = classify(err);
57
+ if (!category) throw err;
58
+ const policy = policies[category];
59
+ if (!policy) {
60
+ throw new Error(
61
+ `retryClassified: no policy for category "${category}"`,
62
+ { cause: err },
63
+ );
64
+ }
65
+
66
+ if (lastCategory !== category) state.clear();
67
+ if (!state.has(category)) {
68
+ state.set(category, { start: Date.now(), attempts: 0 });
69
+ }
70
+ lastCategory = category;
71
+
72
+ const s = state.get(category);
73
+ const elapsedMs = Date.now() - s.start;
74
+ const remainingMs = policy.deadlineMs - elapsedMs;
75
+ if (remainingMs <= 0) {
76
+ throw new Error(
77
+ `${category} retry exhausted after ${Math.round(elapsedMs / 1000)}s; last error: ${err.message}`,
78
+ { cause: err },
79
+ );
80
+ }
81
+
82
+ const expCap = Math.min(
83
+ policy.maxDelayMs,
84
+ policy.baseDelayMs * 2 ** s.attempts,
85
+ );
86
+ const jittered = Math.floor(Math.random() * expCap);
87
+ const delayMs =
88
+ err.retryAfter !== undefined
89
+ ? Math.min(remainingMs, Math.max(err.retryAfter * 1000, jittered))
90
+ : Math.min(remainingMs, jittered);
91
+
92
+ onRetry?.(err, category, s.attempts + 1, delayMs, remainingMs);
93
+ await sleep(delayMs, signal);
94
+ s.attempts++;
95
+ }
96
+ }
97
+ }
98
+
99
+ function sleep(ms, signal) {
100
+ return new Promise((resolve, reject) => {
101
+ const t = setTimeout(resolve, ms);
102
+ if (!signal) return;
103
+ const onAbort = () => {
104
+ clearTimeout(t);
105
+ reject(signal.reason || new Error("aborted"));
106
+ };
107
+ signal.addEventListener("abort", onAbort, { once: true });
108
+ });
109
+ }
@@ -2,15 +2,7 @@ import { ceiling, computeBudget, measureMessages } from "../../agent/budget.js";
2
2
  import materializeContext from "../../agent/materializeContext.js";
3
3
  import { countTokens } from "../../agent/tokens.js";
4
4
 
5
- /**
6
- * Delta-from-actual baseline. The pre-call <prompt tokenUsage> reports
7
- * the prior turn's actual API prompt_tokens; post-dispatch predicts
8
- * next turn's packet = this turn's actual tokens + tokens of new rows
9
- * written this turn. Keeps the 413 body on the same scale as the
10
- * model's <prompt> arithmetic — a 60% divergence between pre-call
11
- * (actual) and post-check (conservative estimator) makes the model
12
- * dismiss the system as janky and stop following rules.
13
- */
5
+ // Delta-from-actual; same scale as <prompt tokenUsage>. SPEC #budget_enforcement.
14
6
  function predictNextPacket(rows, currentTurn, baseline) {
15
7
  let delta = 0;
16
8
  for (const r of rows) {
@@ -19,13 +11,7 @@ function predictNextPacket(rows, currentTurn, baseline) {
19
11
  return baseline + delta;
20
12
  }
21
13
 
22
- /**
23
- * Format the 413 error body. Names each demoted path with its turn
24
- * and token count so the model can avoid re-promoting them next turn.
25
- * Exported (not private) so unit tests can assert the exact wire
26
- * format — the model reads this string, so its shape is part of the
27
- * contract.
28
- */
14
+ // 413 error body; wire format is part of the model contract.
29
15
  export function overflowBody(overflow, contextSize, demoted) {
30
16
  const cap = ceiling(contextSize);
31
17
  const size = cap + overflow;
@@ -50,72 +36,95 @@ export default class Budget {
50
36
  enforce: this.enforce.bind(this),
51
37
  postDispatch: this.postDispatch.bind(this),
52
38
  };
53
- core.filter("assembly.user", this.assembleBudget.bind(this), 275);
39
+ core.filter("assembly.user", this.assembleBudget.bind(this), 175);
54
40
  }
55
41
 
56
- /**
57
- * Render the <budget> table between <instructions> and <prompt>.
58
- * See SPEC @token_accounting for the contract: per-row tokens are
59
- * aTokens (the promotion premium = vTokens − sTokens), summarized
60
- * entries collapse into a single aggregate line, system overhead
61
- * (system prompt + tool defs) gets its own line.
62
- */
42
+ // Renders <budget> at priority 275; see SPEC #token_accounting.
63
43
  assembleBudget(content, ctx) {
64
44
  const { rows, contextSize, systemPrompt } = ctx;
65
45
  if (!contextSize) return content;
66
46
 
67
47
  const cap = ceiling(contextSize);
68
48
 
69
- const visibleByScheme = new Map();
49
+ const byScheme = new Map();
70
50
  let visibleCount = 0;
71
51
  let premiumTokens = 0;
72
52
  let summarizedCount = 0;
73
- let summarizedTokens = 0;
53
+ let _summarizedTokens = 0;
74
54
  let floorTokens = 0;
75
55
 
56
+ const schemeEntry = (s) => {
57
+ let e = byScheme.get(s);
58
+ if (!e) {
59
+ e = {
60
+ vis: 0,
61
+ sum: 0,
62
+ visTokens: 0, // current cost of visible entries
63
+ visIfSumTokens: 0, // sTokens of visible (what they'd cost demoted)
64
+ sumTokens: 0, // current cost of summarized entries
65
+ premium: 0, // savings from demoting visible → summarized
66
+ };
67
+ byScheme.set(s, e);
68
+ }
69
+ return e;
70
+ };
71
+
76
72
  for (const r of rows) {
77
73
  if (r.aTokens == null) continue;
78
74
  const s = r.scheme || "file";
75
+ const entry = schemeEntry(s);
79
76
  if (r.visibility === "visible") {
80
- const entry = visibleByScheme.get(s) ?? { count: 0, tokens: 0 };
81
- entry.count += 1;
82
- entry.tokens += r.aTokens;
83
- visibleByScheme.set(s, entry);
77
+ entry.vis += 1;
78
+ entry.visTokens += r.vTokens;
79
+ entry.visIfSumTokens += r.sTokens;
80
+ entry.premium += r.aTokens;
84
81
  visibleCount += 1;
85
82
  premiumTokens += r.aTokens;
86
83
  floorTokens += r.sTokens;
87
84
  } else if (r.visibility === "summarized") {
85
+ entry.sum += 1;
86
+ entry.sumTokens += r.sTokens;
88
87
  summarizedCount += 1;
89
- summarizedTokens += r.sTokens;
88
+ _summarizedTokens += r.sTokens;
90
89
  floorTokens += r.sTokens;
91
90
  }
92
91
  }
93
92
 
94
- const systemTokens = countTokens(systemPrompt || "");
93
+ const systemTokens = countTokens(systemPrompt);
95
94
  const tokenUsage = floorTokens + premiumTokens + systemTokens;
96
95
  const tokensFree = Math.max(0, cap - tokenUsage);
97
96
 
98
- const schemeRows = [...visibleByScheme.entries()]
99
- .toSorted((a, b) => b[1].tokens - a[1].tokens)
100
- .map(([scheme, v]) => {
101
- const pct = Math.round((v.tokens / cap) * 100);
102
- return `| ${scheme} | ${v.count} | ${v.tokens} | ${pct}% |`;
97
+ // Sort by current cost desc so biggest-impact rows are top.
98
+ const schemeRows = [...byScheme.entries()]
99
+ .toSorted(
100
+ ([, a], [, b]) =>
101
+ b.visTokens + b.sumTokens - (a.visTokens + a.sumTokens),
102
+ )
103
+ .map(([scheme, e]) => {
104
+ const cost = e.visTokens + e.sumTokens;
105
+ const ifAllSum = e.visIfSumTokens + e.sumTokens;
106
+ return `| ${scheme} | ${e.vis} | ${e.sum} | ${cost} | ${ifAllSum} | ${e.premium} |`;
103
107
  });
104
108
 
105
- const summarizedPct = Math.round((summarizedTokens / cap) * 100);
106
- const systemPct = Math.round((systemTokens / cap) * 100);
109
+ const systemPct =
110
+ tokenUsage > 0 ? Math.round((systemTokens / tokenUsage) * 100) : 0;
107
111
 
108
112
  const table = [
109
- "| scheme | visible | tokens | % |",
110
- "|---|---|---|---|",
113
+ "| scheme | vis | sum | cost | if-all-sum | premium |",
114
+ "|---|---|---|---|---|---|",
111
115
  ...schemeRows,
112
116
  ].join("\n");
113
117
 
114
- const summarizedLine = `Summarized: ${summarizedCount} entries, ${summarizedTokens} tokens (${summarizedPct}% of budget).`;
115
118
  const systemLine = `System: ${systemTokens} tokens (${systemPct}% of budget).`;
116
119
  const totalLine = `Total: ${visibleCount} visible + ${summarizedCount} summarized entries; tokenUsage ${tokenUsage} / ceiling ${cap}. ${tokensFree} tokens free.`;
120
+ const legend = [
121
+ "Columns:",
122
+ "- cost: current cost of this scheme (vTokens for visible + sTokens for summarized)",
123
+ "- if-all-sum: cost if every entry of this scheme were demoted to summarized",
124
+ "- premium: savings from demoting visible → summarized (cost − if-all-sum)",
125
+ ].join("\n");
117
126
 
118
- return `${content}<budget tokenUsage="${tokenUsage}" tokensFree="${tokensFree}">\n${table}\n\n${summarizedLine}\n${systemLine}\n${totalLine}\n</budget>\n`;
127
+ return `${content}<budget tokenUsage="${tokenUsage}" tokensFree="${tokensFree}">\n${table}\n\n${legend}\n${systemLine}\n${totalLine}\n</budget>\n`;
119
128
  }
120
129
 
121
130
  #check({ contextSize, messages, rows, lastPromptTokens = 0 }) {
@@ -151,16 +160,7 @@ export default class Budget {
151
160
  });
152
161
  }
153
162
 
154
- /**
155
- * Pre-LLM budget enforcement. On first-turn overflow, demotes the
156
- * incoming prompt and re-materializes; re-checks and returns the
157
- * post-demotion result. If overflow persists after demotion (or on
158
- * later iterations), emits a 413 error (strike) and returns !ok so
159
- * TurnExecutor can skip the LLM call this turn.
160
- *
161
- * ctx = { runId, loopId, turn, systemPrompt, mode, toolSet, demoted,
162
- * loopIteration }
163
- */
163
+ // Pre-LLM enforce: SPEC #budget_enforcement.
164
164
  async enforce({
165
165
  contextSize,
166
166
  messages,
@@ -213,7 +213,6 @@ export default class Budget {
213
213
  mode: ctx.mode,
214
214
  toolSet: ctx.toolSet,
215
215
  contextSize,
216
- demoted: ctx.demoted,
217
216
  });
218
217
  const rechecked = this.#check({
219
218
  contextSize,
@@ -234,14 +233,7 @@ export default class Budget {
234
233
  return rechecked;
235
234
  }
236
235
 
237
- /**
238
- * Post-dispatch Turn Demotion. Re-materializes end-of-turn context and
239
- * checks against the ceiling. On overflow, demotes this turn's promoted
240
- * entries and emits a 413 error (strike) with the descriptive body so
241
- * the model sees it next turn via the unified error channel.
242
- *
243
- * ctx = { runId, loopId, turn, systemPrompt, mode, toolSet, demoted }
244
- */
236
+ // Post-dispatch Turn Demotion: SPEC #budget_enforcement.
245
237
  async postDispatch({ contextSize, ctx, rummy }) {
246
238
  if (!contextSize) return { failed: false };
247
239
  const postMat = await materializeContext({
@@ -254,13 +246,7 @@ export default class Budget {
254
246
  mode: ctx.mode,
255
247
  toolSet: ctx.toolSet,
256
248
  contextSize,
257
- demoted: ctx.demoted,
258
249
  });
259
- // Baseline from this turn's actual API tokens (telemetry wrote it
260
- // before post-dispatch runs). Delta from rows added this turn.
261
- // Predicted next-turn packet stays on the tokenUsage scale the
262
- // model can verify against its own arithmetic. materializeContext
263
- // guarantees a number (0 when no prior API call exists).
264
250
  const baseline = postMat.lastContextTokens;
265
251
  const predicted = predictNextPacket(postMat.rows, ctx.turn, baseline);
266
252
  const cap = ceiling(contextSize);
@@ -269,14 +255,7 @@ export default class Budget {
269
255
 
270
256
  const store = rummy.entries;
271
257
  let demotedEntries = await store.demoteTurnEntries(ctx.runId, ctx.turn);
272
- // Fallback: if this turn had nothing to demote but the packet still
273
- // overflows, the pressure is coming from prior-turn promotions the
274
- // model never demoted itself. Widen to all currently-visible
275
- // entries in the run. Without this fallback, overflow-with-nothing
276
- // strikes out runs where the base context has drifted over ceiling
277
- // through no fault of the current turn (observed: runs where 3
278
- // stale promotions from turns 12–14 saturate every subsequent
279
- // turn's budget).
258
+ // Prior-turn-pressure fallback; SPEC #budget_enforcement.
280
259
  if (demotedEntries.length === 0) {
281
260
  demotedEntries = await store.demoteRunVisibleEntries(ctx.runId);
282
261
  }