@possumtech/rummy 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.env.example +40 -15
  2. package/.xai.key +1 -0
  3. package/PLUGINS.md +169 -53
  4. package/README.md +38 -32
  5. package/SPEC.md +366 -179
  6. package/bin/digest.js +1097 -0
  7. package/biome/no-fallbacks.grit +2 -2
  8. package/gemini.key +1 -0
  9. package/lang/en.json +10 -1
  10. package/migrations/001_initial_schema.sql +9 -2
  11. package/package.json +19 -8
  12. package/service.js +1 -0
  13. package/src/agent/AgentLoop.js +76 -26
  14. package/src/agent/ContextAssembler.js +2 -0
  15. package/src/agent/Entries.js +238 -60
  16. package/src/agent/ProjectAgent.js +44 -0
  17. package/src/agent/TurnExecutor.js +99 -30
  18. package/src/agent/XmlParser.js +206 -111
  19. package/src/agent/errors.js +35 -0
  20. package/src/agent/known_queries.sql +1 -1
  21. package/src/agent/known_store.sql +3 -42
  22. package/src/agent/materializeContext.js +30 -1
  23. package/src/agent/runs.sql +8 -18
  24. package/src/agent/tokens.js +0 -1
  25. package/src/agent/turns.sql +1 -0
  26. package/src/hooks/Hooks.js +26 -0
  27. package/src/hooks/RummyContext.js +12 -1
  28. package/src/lib/hedberg/README.md +60 -0
  29. package/src/lib/hedberg/hedberg.js +60 -0
  30. package/src/lib/hedberg/marker.js +158 -0
  31. package/src/{plugins → lib}/hedberg/matcher.js +1 -2
  32. package/src/llm/LlmProvider.js +41 -3
  33. package/src/llm/openaiStream.js +17 -0
  34. package/src/plugins/ask_user/ask_user.js +12 -2
  35. package/src/plugins/ask_user/ask_userDoc.md +1 -5
  36. package/src/plugins/budget/README.md +29 -24
  37. package/src/plugins/budget/budget.js +166 -110
  38. package/src/plugins/cli/README.md +3 -4
  39. package/src/plugins/cli/cli.js +31 -5
  40. package/src/plugins/cloudflare/cloudflare.js +136 -0
  41. package/src/plugins/cp/cp.js +41 -4
  42. package/src/plugins/cp/cpDoc.md +5 -6
  43. package/src/plugins/engine/engine.sql +1 -1
  44. package/src/plugins/env/README.md +5 -4
  45. package/src/plugins/env/env.js +7 -4
  46. package/src/plugins/env/envDoc.md +7 -8
  47. package/src/plugins/error/error.js +56 -15
  48. package/src/plugins/file/README.md +12 -3
  49. package/src/plugins/file/file.js +2 -2
  50. package/src/plugins/get/get.js +59 -36
  51. package/src/plugins/get/getDoc.md +10 -34
  52. package/src/plugins/google/google.js +115 -0
  53. package/src/plugins/hedberg/hedberg.js +13 -56
  54. package/src/plugins/helpers.js +66 -12
  55. package/src/plugins/index.js +1 -2
  56. package/src/plugins/instructions/README.md +44 -47
  57. package/src/plugins/instructions/instructions-system.md +44 -0
  58. package/src/plugins/instructions/instructions-user.md +53 -0
  59. package/src/plugins/instructions/instructions.js +58 -189
  60. package/src/plugins/known/README.md +6 -7
  61. package/src/plugins/known/known.js +24 -30
  62. package/src/plugins/log/log.js +41 -32
  63. package/src/plugins/mv/mv.js +40 -1
  64. package/src/plugins/mv/mvDoc.md +1 -8
  65. package/src/plugins/ollama/ollama.js +4 -3
  66. package/src/plugins/openai/openai.js +4 -3
  67. package/src/plugins/openrouter/openrouter.js +14 -4
  68. package/src/plugins/persona/README.md +11 -13
  69. package/src/plugins/persona/default.md +29 -0
  70. package/src/plugins/persona/persona.js +10 -66
  71. package/src/plugins/policy/policy.js +23 -22
  72. package/src/plugins/prompt/README.md +37 -27
  73. package/src/plugins/prompt/prompt.js +13 -19
  74. package/src/plugins/rm/rm.js +18 -0
  75. package/src/plugins/rm/rmDoc.md +5 -6
  76. package/src/plugins/rpc/rpc.js +3 -3
  77. package/src/plugins/set/set.js +205 -323
  78. package/src/plugins/set/setDoc.md +47 -17
  79. package/src/plugins/sh/README.md +6 -5
  80. package/src/plugins/sh/sh.js +8 -5
  81. package/src/plugins/sh/shDoc.md +7 -8
  82. package/src/plugins/skill/README.md +37 -14
  83. package/src/plugins/skill/skill.js +200 -101
  84. package/src/plugins/skill/skillDoc.js +3 -0
  85. package/src/plugins/skill/skillDoc.md +9 -0
  86. package/src/plugins/stream/README.md +7 -6
  87. package/src/plugins/stream/finalize.js +100 -0
  88. package/src/plugins/stream/stream.js +13 -45
  89. package/src/plugins/telemetry/telemetry.js +27 -4
  90. package/src/plugins/think/think.js +2 -3
  91. package/src/plugins/think/thinkDoc.md +2 -4
  92. package/src/plugins/unknown/README.md +1 -1
  93. package/src/plugins/unknown/unknown.js +17 -19
  94. package/src/plugins/update/update.js +4 -51
  95. package/src/plugins/update/updateDoc.md +21 -6
  96. package/src/plugins/xai/xai.js +68 -102
  97. package/src/plugins/yolo/yolo.js +102 -75
  98. package/src/sql/functions/hedmatch.js +1 -1
  99. package/src/sql/functions/hedreplace.js +1 -1
  100. package/src/sql/functions/hedsearch.js +1 -1
  101. package/src/sql/functions/slugify.js +16 -2
  102. package/BENCH_ENVIRONMENT.md +0 -230
  103. package/CLIENT_INTERFACE.md +0 -396
  104. package/last_run.txt +0 -5617
  105. package/scriptify/ask_run.js +0 -77
  106. package/scriptify/cache_probe.js +0 -66
  107. package/scriptify/cache_probe_grok.js +0 -74
  108. package/src/agent/budget.js +0 -33
  109. package/src/agent/config.js +0 -38
  110. package/src/plugins/hedberg/README.md +0 -71
  111. package/src/plugins/hedberg/docs.md +0 -0
  112. package/src/plugins/hedberg/edits.js +0 -55
  113. package/src/plugins/hedberg/normalize.js +0 -17
  114. package/src/plugins/hedberg/sed.js +0 -49
  115. package/src/plugins/instructions/instructions.md +0 -34
  116. package/src/plugins/instructions/instructions_104.md +0 -8
  117. package/src/plugins/instructions/instructions_105.md +0 -39
  118. package/src/plugins/instructions/instructions_106.md +0 -22
  119. package/src/plugins/instructions/instructions_107.md +0 -17
  120. package/src/plugins/instructions/instructions_108.md +0 -0
  121. package/src/plugins/known/knownDoc.js +0 -3
  122. package/src/plugins/known/knownDoc.md +0 -8
  123. package/src/plugins/unknown/unknownDoc.js +0 -3
  124. package/src/plugins/unknown/unknownDoc.md +0 -11
  125. package/turns/cli_1777462658211/turn_001.txt +0 -772
  126. package/turns/cli_1777462658211/turn_002.txt +0 -606
  127. package/turns/cli_1777462658211/turn_003.txt +0 -667
  128. package/turns/cli_1777462658211/turn_004.txt +0 -297
  129. package/turns/cli_1777462658211/turn_005.txt +0 -301
  130. package/turns/cli_1777462658211/turn_006.txt +0 -262
  131. package/turns/cli_1777465095132/turn_001.txt +0 -715
  132. package/turns/cli_1777465095132/turn_002.txt +0 -236
  133. package/turns/cli_1777465095132/turn_003.txt +0 -287
  134. package/turns/cli_1777465095132/turn_004.txt +0 -694
  135. package/turns/cli_1777465095132/turn_005.txt +0 -422
  136. package/turns/cli_1777465095132/turn_006.txt +0 -365
  137. package/turns/cli_1777465095132/turn_007.txt +0 -885
  138. package/turns/cli_1777465095132/turn_008.txt +0 -1277
  139. package/turns/cli_1777465095132/turn_009.txt +0 -736
  140. /package/src/{plugins → lib}/hedberg/patterns.js +0 -0
@@ -1,5 +1,13 @@
1
+ import { SUMMARY_MAX_CHARS } from "../helpers.js";
1
2
  import docs from "./ask_userDoc.js";
2
3
 
4
+ // Per-side cap for the "question → answer" summary projection. Splitting
5
+ // before the arrow preserves the structural separator the model uses to
6
+ // read the pair as a unit; a single trailing slice could lose the arrow
7
+ // entirely when either side is large.
8
+ const ARROW = " → ";
9
+ const HALF = Math.floor((SUMMARY_MAX_CHARS - ARROW.length) / 2);
10
+
3
11
  const LOG_ACTION_RE = /^log:\/\/turn_\d+\/(\w+)\//;
4
12
 
5
13
  export default class AskUser {
@@ -68,7 +76,9 @@ export default class AskUser {
68
76
 
69
77
  summary(entry) {
70
78
  const { question, answer } = entry.attributes;
71
- if (answer) return `${question} → ${answer}`;
72
- return question;
79
+ if (answer) {
80
+ return `${question.slice(0, HALF)}${ARROW}${answer.slice(0, HALF)}`;
81
+ }
82
+ return question.slice(0, SUMMARY_MAX_CHARS);
73
83
  }
74
84
  }
@@ -1,10 +1,6 @@
1
1
  ## <ask_user question="[Question?]">[option1; option2; ...]</ask_user> - Ask the user a question
2
2
 
3
- * YOU SHOULD ONLY use for decisions, preferences, or approvals the user must make
4
- <!-- Positive framing. Shows what ask_user IS for. -->
3
+ YOU SHOULD ONLY use <ask_user> for decisions, preferences, or approvals the user must make.
5
4
 
6
5
  Example: <ask_user question="Which test framework?">Mocha; Jest; Node Native</ask_user>
7
- <!-- Preference decision. Model truly cannot know this without asking. -->
8
-
9
6
  Example: <ask_user question="Deploy to staging or production?">staging; production</ask_user>
10
- <!-- Consequential action. High-stakes choice. -->
@@ -7,37 +7,42 @@ Context ceiling enforcement.
7
7
  Ceiling = `floor(contextSize × RUMMY_BUDGET_CEILING)` (default 0.9). The
8
8
  10% headroom is the system's operating room for graceful overflow
9
9
  handling. No per-write gating — tools run uninterrupted. Enforcement
10
- happens at boundaries.
10
+ happens at one boundary: the pre-LLM grinder.
11
11
 
12
12
  ## Enforcement Points
13
13
 
14
- 1. **Pre-LLM enforce** (`hooks.budget.enforce`): checks assembled context
15
- before the LLM call. If over ceiling on turn 1 → Prompt Demotion
16
- (demote the incoming prompt, re-materialize, re-check). Runs in the
17
- headroom if that fits. On non-first turns or still-over after
18
- Prompt Demotion, emits a 413 error via `hooks.error.log` so the
19
- strike system treats the overflow as a turn-level event.
14
+ 1. **Pre-LLM grinder** (`turn.beforeDispatch` filter): four-step
15
+ ladder per SPEC §budget_enforcement.
20
16
 
21
- 2. **Post-dispatch Turn Demotion** (`hooks.budget.postDispatch`): after
22
- all tools dispatch, re-materialize and check. If over ceiling
23
- demote ALL visible entries from this turn (status < 400, status
24
- preserved demotion only changes visibility). Emits a 413 error
25
- with the 50% rule directive as its message; the error entry is
26
- what the model sees next turn.
17
+ 1. Check budget. If under ceiling → proceed.
18
+ 2. Soft 413: demote `(current_turn 1)` visible run_views to
19
+ `summarized` (all schemes, no exemption). Re-materialize, recheck.
20
+ 3. Soft 413: demote the incoming `prompt://N` to `summarized`.
21
+ Re-materialize, recheck.
22
+ 4. Hard 413: emit `error://`, set `ok=false` on the packet so
23
+ TurnExecutor short-circuits dispatch.
27
24
 
28
- 3. **LLM rejection** (`isContextExceeded` in TurnExecutor): turn-1
29
- token estimate drift causes LLM to reject. Same 413 error path as
30
- pre-LLM overflow.
25
+ Steps 2 and 3 also emit `error://` 413 entries when they fire so
26
+ the model sees what was auto-demoted next turn. The grinder never
27
+ demotes speculatively or helpfully — only in response to actual
28
+ overflow.
29
+
30
+ 2. **LLM rejection** (`isContextExceeded` in TurnExecutor): turn-1
31
+ token-estimate drift causes the LLM to reject. Same 413 error path
32
+ as the grinder's hard step.
31
33
 
32
34
  ## Files
33
35
 
34
- - **budget.js** — Plugin. Enforce + postDispatch methods exposed on
35
- `core.hooks.budget`.
36
+ - **budget.js** — Plugin. Math (`ceiling`, `measureMessages`,
37
+ `measureRows`, `computeBudget`), 413 body shaper (`overflowBody`),
38
+ and the plugin class itself.
39
+
40
+ ## Hook participation
36
41
 
37
- ## Registration
42
+ - `core.filter("turn.beforeDispatch", ...)` — pre-LLM grinder. Returns
43
+ the (possibly demoted) packet with `ok` / `overflow` flags.
44
+ - `core.filter("assembly.user", ..., 175)` — renders the `<budget>`
45
+ table into the user message.
38
46
 
39
- - **Hook**: `hooks.budget.enforce` pre-LLM ceiling check + first-turn
40
- Prompt Demotion.
41
- - **Hook**: `hooks.budget.postDispatch` — post-dispatch re-check + Turn
42
- Demotion. Emits 413 errors through the unified error channel; there
43
- is no separate `budget://` scheme.
47
+ Emits 413 errors through the unified error channel (`hooks.error.log.emit`);
48
+ there is no separate `budget://` scheme.
@@ -1,14 +1,35 @@
1
- import { ceiling, computeBudget, measureMessages } from "../../agent/budget.js";
2
- import materializeContext from "../../agent/materializeContext.js";
1
+ import ContextAssembler from "../../agent/ContextAssembler.js";
3
2
  import { countTokens } from "../../agent/tokens.js";
4
3
 
5
- // Delta-from-actual; same scale as <prompt tokenUsage>. SPEC #budget_enforcement.
6
- function predictNextPacket(rows, currentTurn, baseline) {
7
- let delta = 0;
8
- for (const r of rows) {
9
- if (r.source_turn === currentTurn) delta += countTokens(r.body);
10
- }
11
- return baseline + delta;
4
+ const CEILING_RATIO = Number(process.env.RUMMY_BUDGET_CEILING);
5
+
6
+ export function ceiling(contextSize) {
7
+ return Math.floor(contextSize * CEILING_RATIO);
8
+ }
9
+
10
+ // Sum assembled-message token counts; used by the enforce gate.
11
+ export function measureMessages(messages) {
12
+ return messages.reduce((sum, m) => sum + countTokens(m.content), 0);
13
+ }
14
+
15
+ // Sum projected row body token counts; used by prompt.js pre-assembly.
16
+ export function measureRows(rows) {
17
+ return rows.reduce((sum, r) => sum + countTokens(r.body), 0);
18
+ }
19
+
20
+ // Single source of truth for budget numbers; tokenUsage echoes totalTokens for the wire attribute.
21
+ export function computeBudget({ contextSize, totalTokens }) {
22
+ const cap = ceiling(contextSize);
23
+ const tokensFree = Math.max(0, cap - totalTokens);
24
+ const overflow = Math.max(0, totalTokens - cap);
25
+ return {
26
+ ceiling: cap,
27
+ totalTokens,
28
+ tokenUsage: totalTokens,
29
+ tokensFree,
30
+ overflow,
31
+ ok: overflow === 0,
32
+ };
12
33
  }
13
34
 
14
35
  // 413 error body; wire format is part of the model contract.
@@ -17,10 +38,10 @@ export function overflowBody(overflow, contextSize, demoted) {
17
38
  const size = cap + overflow;
18
39
  const count = demoted.length;
19
40
  const totalTokens = demoted.reduce((s, r) => s + r.tokens, 0);
20
- const head = `Token Budget overflow: packet was ${size} tokens, ceiling is ${cap}. ${count} promotion${count === 1 ? "" : "s"} (${totalTokens} tokens) demoted to fit.`;
41
+ const head = `Token Budget overflow: packet was ${size} tokens, ceiling is ${cap}. ${count} promotion${count === 1 ? "" : "s"} (${totalTokens} tokens) demoted.`;
21
42
  if (count === 0) return head;
22
43
  const lines = demoted.map((d) =>
23
- d.turn
44
+ d.turn != null
24
45
  ? `- ${d.path} (turn ${d.turn}, ${d.tokens} tokens)`
25
46
  : `- ${d.path} (${d.tokens} tokens)`,
26
47
  );
@@ -32,11 +53,23 @@ export default class Budget {
32
53
 
33
54
  constructor(core) {
34
55
  this.#core = core;
35
- core.hooks.budget = {
36
- enforce: this.enforce.bind(this),
37
- postDispatch: this.postDispatch.bind(this),
38
- };
39
- core.filter("assembly.user", this.assembleBudget.bind(this), 175);
56
+ core.filter("turn.beforeDispatch", this.#onBeforeDispatch.bind(this));
57
+ core.filter("assembly.user", this.assembleBudget.bind(this), 90);
58
+ }
59
+
60
+ // Filter participant. Receives the assembled packet; returns a
61
+ // (possibly modified) packet. The pre-LLM grinder demotes-and-
62
+ // rechecks per SPEC §budget_enforcement; if it can't fit after the
63
+ // ladder runs, sets ok=false so TurnExecutor short-circuits.
64
+ async #onBeforeDispatch(packet, ctxBag) {
65
+ return this.enforce({
66
+ contextSize: packet.contextSize,
67
+ messages: packet.messages,
68
+ rows: packet.rows,
69
+ lastPromptTokens: packet.lastPromptTokens,
70
+ ctx: ctxBag.ctx,
71
+ rummy: ctxBag.rummy,
72
+ });
40
73
  }
41
74
 
42
75
  // Renders <budget> at priority 275; see SPEC #token_accounting.
@@ -140,27 +173,47 @@ export default class Budget {
140
173
  };
141
174
  }
142
175
 
143
- async #emitOverflow({
144
- message,
145
- runId,
146
- turn,
147
- loopId,
148
- rummy,
149
- demotedCount = 0,
150
- demotedTokens = 0,
151
- }) {
176
+ async #emit({ message, ctx, rummy, demoted }) {
177
+ const totalTokens = demoted.reduce((s, r) => s + r.tokens, 0);
152
178
  await rummy.hooks.error.log.emit({
153
179
  store: rummy.entries,
154
- runId,
155
- turn,
156
- loopId,
180
+ runId: ctx.runId,
181
+ turn: ctx.turn,
182
+ loopId: ctx.loopId,
157
183
  message,
158
184
  status: 413,
159
- attributes: { demotedCount, demotedTokens },
185
+ attributes: {
186
+ demotedCount: demoted.length,
187
+ demotedTokens: totalTokens,
188
+ },
160
189
  });
161
190
  }
162
191
 
163
- // Pre-LLM enforce: SPEC #budget_enforcement.
192
+ async #reassemble({ rows, ctx, rummy, contextSize, lastPromptTokens }) {
193
+ return ContextAssembler.assembleFromTurnContext(
194
+ rows,
195
+ {
196
+ type: ctx.mode,
197
+ systemPrompt: ctx.systemPrompt,
198
+ contextSize,
199
+ toolSet: ctx.toolSet,
200
+ lastContextTokens: lastPromptTokens,
201
+ turn: ctx.turn,
202
+ },
203
+ rummy.hooks,
204
+ );
205
+ }
206
+
207
+ // Pre-LLM grinder ladder. SPEC §budget_enforcement.
208
+ //
209
+ // 1. Check budget. ok → return.
210
+ // 2. Soft 413: demote (current_turn − 1) visible. Recheck.
211
+ // 3. Soft 413: demote current prompt. Recheck.
212
+ // 4. Hard 413: emit and return ok=false.
213
+ //
214
+ // Every step that demotes anything emits a 413 error://. Soft 413s
215
+ // keep the run alive (turn proceeds to LLM); the hard 413 bubbles
216
+ // through to AgentLoop.
164
217
  async enforce({
165
218
  contextSize,
166
219
  messages,
@@ -173,6 +226,7 @@ export default class Budget {
173
226
  return { messages, rows, assembledTokens: 0, ok: true };
174
227
  }
175
228
 
229
+ // Step 1.
176
230
  const first = this.#check({
177
231
  contextSize,
178
232
  messages,
@@ -181,103 +235,105 @@ export default class Budget {
181
235
  });
182
236
  if (first.ok) return first;
183
237
 
184
- if (ctx?.loopIteration !== 1) {
185
- const cap = ceiling(contextSize);
186
- await this.#emitOverflow({
187
- message: `Token Budget overflow: packet was ${cap + first.overflow} tokens, ceiling is ${cap}.`,
188
- runId: ctx.runId,
189
- turn: ctx.turn,
190
- loopId: ctx.loopId,
238
+ const store = rummy.entries;
239
+
240
+ // Step 2: previous-turn demotion.
241
+ const prevTurn = ctx.turn - 1;
242
+ const rawTurnDemoted =
243
+ prevTurn >= 0 ? await store.demoteTurnEntries(ctx.runId, prevTurn) : [];
244
+ const turnDemoted = rawTurnDemoted.map((d) => ({ ...d, turn: prevTurn }));
245
+ if (turnDemoted.length > 0) {
246
+ for (const r of rows) {
247
+ if (r.source_turn === prevTurn && r.visibility === "visible") {
248
+ r.body = r.sBody;
249
+ r.visibility = "summarized";
250
+ }
251
+ }
252
+ const reMessages = await this.#reassemble({
253
+ rows,
254
+ ctx,
191
255
  rummy,
256
+ contextSize,
257
+ lastPromptTokens: 0,
192
258
  });
193
- return first;
259
+ const rechecked = this.#check({
260
+ contextSize,
261
+ messages: reMessages,
262
+ rows,
263
+ lastPromptTokens: 0,
264
+ });
265
+ if (rechecked.ok) {
266
+ await this.#emit({
267
+ message: overflowBody(first.overflow, contextSize, turnDemoted),
268
+ ctx,
269
+ rummy,
270
+ demoted: turnDemoted,
271
+ });
272
+ return rechecked;
273
+ }
274
+ first.overflow = rechecked.overflow;
194
275
  }
195
276
 
277
+ // Step 3: current-prompt demotion.
196
278
  const promptRow = rows.findLast(
197
279
  (r) => r.category === "prompt" && r.scheme === "prompt",
198
280
  );
199
- if (promptRow) {
200
- await rummy.entries.set({
281
+ const promptDemoted = [];
282
+ if (promptRow && promptRow.visibility === "visible") {
283
+ await store.set({
201
284
  runId: ctx.runId,
202
285
  path: promptRow.path,
203
286
  visibility: "summarized",
204
287
  });
205
- }
206
- const reMat = await materializeContext({
207
- db: rummy.db,
208
- hooks: rummy.hooks,
209
- runId: ctx.runId,
210
- loopId: ctx.loopId,
211
- turn: ctx.turn,
212
- systemPrompt: ctx.systemPrompt,
213
- mode: ctx.mode,
214
- toolSet: ctx.toolSet,
215
- contextSize,
216
- });
217
- const rechecked = this.#check({
218
- contextSize,
219
- messages: reMat.messages,
220
- rows: reMat.rows,
221
- lastPromptTokens: reMat.lastContextTokens,
222
- });
223
- if (!rechecked.ok) {
224
- const cap = ceiling(contextSize);
225
- await this.#emitOverflow({
226
- message: `Token Budget overflow: packet was ${cap + rechecked.overflow} tokens after demoting the prompt, ceiling is ${cap}.`,
227
- runId: ctx.runId,
228
- turn: ctx.turn,
229
- loopId: ctx.loopId,
288
+ promptDemoted.push({
289
+ path: promptRow.path,
290
+ turn: promptRow.source_turn,
291
+ tokens: countTokens(promptRow.body) - countTokens(promptRow.sBody),
292
+ });
293
+ promptRow.body = promptRow.sBody;
294
+ promptRow.visibility = "summarized";
295
+ const reMessages = await this.#reassemble({
296
+ rows,
297
+ ctx,
230
298
  rummy,
299
+ contextSize,
300
+ lastPromptTokens: 0,
231
301
  });
232
- }
233
- return rechecked;
234
- }
235
-
236
- // Post-dispatch Turn Demotion: SPEC #budget_enforcement.
237
- async postDispatch({ contextSize, ctx, rummy }) {
238
- if (!contextSize) return { failed: false };
239
- const postMat = await materializeContext({
240
- db: rummy.db,
241
- hooks: rummy.hooks,
242
- runId: ctx.runId,
243
- loopId: ctx.loopId,
244
- turn: ctx.turn,
245
- systemPrompt: ctx.systemPrompt,
246
- mode: ctx.mode,
247
- toolSet: ctx.toolSet,
248
- contextSize,
249
- });
250
- const baseline = postMat.lastContextTokens;
251
- const predicted = predictNextPacket(postMat.rows, ctx.turn, baseline);
252
- const cap = ceiling(contextSize);
253
- if (predicted <= cap) return { failed: false };
254
- const post = { overflow: predicted - cap };
255
-
256
- const store = rummy.entries;
257
- let demotedEntries = await store.demoteTurnEntries(ctx.runId, ctx.turn);
258
- // Prior-turn-pressure fallback; SPEC #budget_enforcement.
259
- if (demotedEntries.length === 0) {
260
- demotedEntries = await store.demoteRunVisibleEntries(ctx.runId);
261
- }
262
- const promptRow = postMat.rows.find((r) => r.scheme === "prompt");
263
- if (promptRow) {
264
- await store.set({
265
- runId: ctx.runId,
266
- path: promptRow.path,
267
- visibility: "summarized",
302
+ const rechecked = this.#check({
303
+ contextSize,
304
+ messages: reMessages,
305
+ rows,
306
+ lastPromptTokens: 0,
268
307
  });
308
+ if (rechecked.ok) {
309
+ await this.#emit({
310
+ message: overflowBody(first.overflow, contextSize, [
311
+ ...turnDemoted,
312
+ ...promptDemoted,
313
+ ]),
314
+ ctx,
315
+ rummy,
316
+ demoted: [...turnDemoted, ...promptDemoted],
317
+ });
318
+ return rechecked;
319
+ }
320
+ first.overflow = rechecked.overflow;
269
321
  }
270
322
 
271
- const totalDemoted = demotedEntries.reduce((s, r) => s + r.tokens, 0);
272
- await this.#emitOverflow({
273
- message: overflowBody(post.overflow, contextSize, demotedEntries),
274
- demotedCount: demotedEntries.length,
275
- demotedTokens: totalDemoted,
276
- runId: ctx.runId,
277
- turn: ctx.turn,
278
- loopId: ctx.loopId,
323
+ // Step 4: hard 413.
324
+ const allDemoted = [...turnDemoted, ...promptDemoted];
325
+ await this.#emit({
326
+ message: overflowBody(first.overflow, contextSize, allDemoted),
327
+ ctx,
279
328
  rummy,
329
+ demoted: allDemoted,
280
330
  });
281
- return { failed: true };
331
+ return {
332
+ messages,
333
+ rows,
334
+ assembledTokens: ceiling(contextSize) + first.overflow,
335
+ overflow: first.overflow,
336
+ ok: false,
337
+ };
282
338
  }
283
339
  }
@@ -29,9 +29,8 @@ preserves existing vars).
29
29
  |---|---|---|
30
30
  | `RUMMY_MODE` | `act` | `ask` or `act`. |
31
31
 
32
- `RUMMY_RUN_TIMEOUT` is required at boot via `src/agent/config.js`;
33
- default lives in `.env.example`. Watchdog exits with code `124` on
34
- overflow.
32
+ `RUMMY_LOOP_TIMEOUT` is declared in `.env.example` and read directly
33
+ from `process.env`. Watchdog exits with code `124` on overflow.
35
34
 
36
35
  Per-run defaults (`RUMMY_YOLO`, `RUMMY_NO_REPO`, `RUMMY_NO_WEB`,
37
36
  `RUMMY_NO_INTERACTION`, `RUMMY_NO_PROPOSALS`) cascade through
@@ -61,7 +60,7 @@ provider key. Bench harnesses call `rummy-cli` with just
61
60
  | `0` | Terminal status `200`. Model claimed success. |
62
61
  | `1` | Terminal status in `{204, 413, 422, 499, 500}` or run crashed. |
63
62
  | `2` | Arg parse error (invalid flag shape, missing required env). |
64
- | `124` | Wall-clock timeout (`RUMMY_RUN_TIMEOUT` exceeded). |
63
+ | `124` | Wall-clock timeout (`RUMMY_LOOP_TIMEOUT` exceeded). |
65
64
 
66
65
  External verifiers (terminal-bench, SWE-bench, etc.) decide actual
67
66
  task success — the exit code only reports rummy's internal terminal
@@ -1,5 +1,5 @@
1
- import config from "../../agent/config.js";
2
1
  import ProjectAgent from "../../agent/ProjectAgent.js";
2
+ import File from "../file/file.js";
3
3
 
4
4
  const TERMINAL_STATUSES = new Set([200, 204, 413, 422, 499, 500]);
5
5
 
@@ -42,10 +42,36 @@ export default class Cli {
42
42
  const projectAgent = new ProjectAgent(db, hooks);
43
43
  const { projectId } = await projectAgent.init(alias, projectRoot);
44
44
 
45
- // Watchdog; overridable via --RUMMY_RUN_TIMEOUT=<ms>.
46
- const timeoutMs = config.RUN_TIMEOUT;
47
- const timer = setTimeout(() => {
48
- console.error(`rummy-cli: timed out after ${timeoutMs}ms`);
45
+ // Operator-declared project surface (comma-separated literal paths,
46
+ // relative to project root). Files are ingested as entries with
47
+ // default visibility=archived; the model promotes specific
48
+ // entries via <get>. Decouples membership (constraint) from
49
+ // visibility (per-entry, model-controlled).
50
+ const projectFilesRaw = process.env.RUMMY_PROJECT_FILES;
51
+ if (projectFilesRaw) {
52
+ const patterns = projectFilesRaw
53
+ .split(",")
54
+ .map((s) => s.trim())
55
+ .filter(Boolean);
56
+ for (const pattern of patterns) {
57
+ await File.setConstraint(db, projectId, pattern, "add");
58
+ }
59
+ }
60
+
61
+ // Watchdog; overridable via --RUMMY_LOOP_TIMEOUT=<ms>. Drains
62
+ // the active loop before exit so SQLite, turn slices, and
63
+ // last_run.txt are durable on disk before the process dies —
64
+ // without this, harbor's outer asyncio.wait_for kills the
65
+ // docker exec mid-pipeline and the trial.log cp commands never
66
+ // run, leaving the post-mortem packet empty.
67
+ const timeoutMs = Number(process.env.RUMMY_LOOP_TIMEOUT);
68
+ const timer = setTimeout(async () => {
69
+ console.error(`rummy-cli: timed out after ${timeoutMs}ms — draining`);
70
+ try {
71
+ await projectAgent.shutdown();
72
+ } catch (err) {
73
+ console.error(`rummy-cli: drain failed: ${err.message}`);
74
+ }
49
75
  process.exit(124);
50
76
  }, timeoutMs);
51
77
  timer.unref();
@@ -0,0 +1,136 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ import msg from "../../agent/messages.js";
5
+ import { chatCompletionStream } from "../../llm/openaiStream.js";
6
+
7
+ const FETCH_TIMEOUT = Number(process.env.RUMMY_FETCH_TIMEOUT);
8
+
9
+ const PROVIDER = "@cf";
10
+
11
+ // Repo-root-relative key file. Resolved relative to this source file so
12
+ // CWD changes during runs (programbench/tbench cd into workspaces) don't
13
+ // break the lookup. Plugin is inert if the file is missing OR if
14
+ // CLOUDFLARE_ACCOUNT_ID is unset (the API path is account-scoped).
15
+ const __dirname = dirname(fileURLToPath(import.meta.url));
16
+ function resolveKeyFile() {
17
+ return process.env.RUMMY_CLOUDFLARE_KEY_FILE
18
+ ? process.env.RUMMY_CLOUDFLARE_KEY_FILE
19
+ : join(__dirname, "..", "..", "..", "cloudflare.key");
20
+ }
21
+
22
+ // Inert unless cloudflare.key exists and CLOUDFLARE_ACCOUNT_ID is set.
23
+ // Matches model aliases starting with `@cf/` — Cloudflare Workers AI's
24
+ // own namespace, used verbatim with no prefix stripping
25
+ // (`@cf/google/gemma-4-26b-a4b-it`).
26
+ //
27
+ // Uses Cloudflare's OpenAI-compatible endpoint
28
+ // (`/v1/chat/completions`) so the streaming SSE accumulator is shared
29
+ // with the other OpenAI-shaped providers. Context-size lookups go to
30
+ // the native models-search API which exposes `properties` including
31
+ // the model's context window.
32
+ export default class Cloudflare {
33
+ #apiKey;
34
+ #accountId;
35
+ #contextCache = new Map();
36
+
37
+ constructor(core) {
38
+ const accountId = process.env.CLOUDFLARE_ACCOUNT_ID;
39
+ if (!accountId) return;
40
+ const keyFile = resolveKeyFile();
41
+ if (!existsSync(keyFile)) return;
42
+ const raw = readFileSync(keyFile, "utf8").trim();
43
+ if (!raw) return;
44
+ this.#apiKey = raw;
45
+ this.#accountId = accountId;
46
+
47
+ core.hooks.llm.providers.push({
48
+ name: "cloudflare",
49
+ matches: (model) => model.split("/")[0] === PROVIDER,
50
+ completion: (messages, model, options) =>
51
+ this.#completion(messages, model, options),
52
+ getContextSize: (model) => this.#getContextSize(model),
53
+ });
54
+ }
55
+
56
+ #baseUrl() {
57
+ return `https://api.cloudflare.com/client/v4/accounts/${this.#accountId}/ai`;
58
+ }
59
+
60
+ async #completion(messages, model, options = {}) {
61
+ const body = { model, messages };
62
+ if (options.maxTokens !== undefined) body.max_tokens = options.maxTokens;
63
+ if (options.temperature !== undefined)
64
+ body.temperature = options.temperature;
65
+
66
+ const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT);
67
+ const signal = options.signal
68
+ ? AbortSignal.any([options.signal, timeoutSignal])
69
+ : timeoutSignal;
70
+
71
+ const headers = { Authorization: `Bearer ${this.#apiKey}` };
72
+
73
+ try {
74
+ return await chatCompletionStream({
75
+ url: `${this.#baseUrl()}/v1/chat/completions`,
76
+ headers,
77
+ body,
78
+ signal,
79
+ });
80
+ } catch (err) {
81
+ if (err.status === 401 || err.status === 403) {
82
+ throw new Error(
83
+ msg("error.cloudflare_auth", {
84
+ status: `${err.status} - ${err.body}`,
85
+ }),
86
+ );
87
+ }
88
+ if (err.status) {
89
+ throw new Error(
90
+ msg("error.cloudflare_api", {
91
+ status: `${err.status} - ${err.body}`,
92
+ }),
93
+ );
94
+ }
95
+ throw err;
96
+ }
97
+ }
98
+
99
+ async #getContextSize(model) {
100
+ if (this.#contextCache.has(model)) return this.#contextCache.get(model);
101
+
102
+ // Cloudflare's models-search returns model metadata including
103
+ // `properties` (an array with `property_id` / `value` pairs).
104
+ // `context_window` (or `max_input_tokens` on some entries) is
105
+ // the field we want.
106
+ const url = `${this.#baseUrl()}/models/search?search=${encodeURIComponent(model)}`;
107
+ const res = await fetch(url, {
108
+ headers: { Authorization: `Bearer ${this.#apiKey}` },
109
+ signal: AbortSignal.timeout(FETCH_TIMEOUT),
110
+ });
111
+ if (!res.ok) {
112
+ throw new Error(
113
+ msg("error.cloudflare_models_failed", { model, status: res.status }),
114
+ );
115
+ }
116
+ const data = await res.json();
117
+ const entry = data.result.find((m) => m.name === model);
118
+ if (!entry) {
119
+ throw new Error(msg("error.cloudflare_model_not_found", { model }));
120
+ }
121
+ const props = entry.properties;
122
+ // Prefer `context_window` (full prompt+output combined) over
123
+ // `max_input_tokens` (input-only). Some Cloudflare entries have
124
+ // both, some only one. Picking the larger one is wrong (would
125
+ // pick input cap when context is what we want); explicit priority.
126
+ const ctxProp =
127
+ props.find((p) => p.property_id === "context_window") ??
128
+ props.find((p) => p.property_id === "max_input_tokens");
129
+ const ctx = ctxProp ? Number(ctxProp.value) : null;
130
+ if (!ctx) {
131
+ throw new Error(msg("error.cloudflare_no_context_length", { model }));
132
+ }
133
+ this.#contextCache.set(model, ctx);
134
+ return ctx;
135
+ }
136
+ }