@possumtech/rummy 2.1.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +40 -15
- package/.xai.key +1 -0
- package/PLUGINS.md +169 -53
- package/README.md +38 -32
- package/SPEC.md +366 -179
- package/bin/digest.js +1097 -0
- package/biome/no-fallbacks.grit +2 -2
- package/gemini.key +1 -0
- package/lang/en.json +10 -1
- package/migrations/001_initial_schema.sql +9 -2
- package/package.json +19 -8
- package/service.js +1 -0
- package/src/agent/AgentLoop.js +76 -26
- package/src/agent/ContextAssembler.js +2 -0
- package/src/agent/Entries.js +238 -60
- package/src/agent/ProjectAgent.js +44 -0
- package/src/agent/TurnExecutor.js +99 -30
- package/src/agent/XmlParser.js +206 -111
- package/src/agent/errors.js +35 -0
- package/src/agent/known_queries.sql +1 -1
- package/src/agent/known_store.sql +3 -42
- package/src/agent/materializeContext.js +30 -1
- package/src/agent/runs.sql +8 -18
- package/src/agent/tokens.js +0 -1
- package/src/agent/turns.sql +1 -0
- package/src/hooks/Hooks.js +26 -0
- package/src/hooks/RummyContext.js +12 -1
- package/src/lib/hedberg/README.md +60 -0
- package/src/lib/hedberg/hedberg.js +60 -0
- package/src/lib/hedberg/marker.js +158 -0
- package/src/{plugins → lib}/hedberg/matcher.js +1 -2
- package/src/llm/LlmProvider.js +41 -3
- package/src/llm/openaiStream.js +17 -0
- package/src/plugins/ask_user/ask_user.js +12 -2
- package/src/plugins/ask_user/ask_userDoc.md +1 -5
- package/src/plugins/budget/README.md +29 -24
- package/src/plugins/budget/budget.js +166 -110
- package/src/plugins/cli/README.md +3 -4
- package/src/plugins/cli/cli.js +31 -5
- package/src/plugins/cloudflare/cloudflare.js +136 -0
- package/src/plugins/cp/cp.js +41 -4
- package/src/plugins/cp/cpDoc.md +5 -6
- package/src/plugins/engine/engine.sql +1 -1
- package/src/plugins/env/README.md +5 -4
- package/src/plugins/env/env.js +7 -4
- package/src/plugins/env/envDoc.md +7 -8
- package/src/plugins/error/error.js +56 -15
- package/src/plugins/file/README.md +12 -3
- package/src/plugins/file/file.js +2 -2
- package/src/plugins/get/get.js +59 -36
- package/src/plugins/get/getDoc.md +10 -34
- package/src/plugins/google/google.js +115 -0
- package/src/plugins/hedberg/hedberg.js +13 -56
- package/src/plugins/helpers.js +66 -12
- package/src/plugins/index.js +1 -2
- package/src/plugins/instructions/README.md +44 -47
- package/src/plugins/instructions/instructions-system.md +44 -0
- package/src/plugins/instructions/instructions-user.md +53 -0
- package/src/plugins/instructions/instructions.js +58 -189
- package/src/plugins/known/README.md +6 -7
- package/src/plugins/known/known.js +24 -30
- package/src/plugins/log/log.js +41 -32
- package/src/plugins/mv/mv.js +40 -1
- package/src/plugins/mv/mvDoc.md +1 -8
- package/src/plugins/ollama/ollama.js +4 -3
- package/src/plugins/openai/openai.js +4 -3
- package/src/plugins/openrouter/openrouter.js +14 -4
- package/src/plugins/persona/README.md +11 -13
- package/src/plugins/persona/default.md +29 -0
- package/src/plugins/persona/persona.js +10 -66
- package/src/plugins/policy/policy.js +23 -22
- package/src/plugins/prompt/README.md +37 -27
- package/src/plugins/prompt/prompt.js +13 -19
- package/src/plugins/rm/rm.js +18 -0
- package/src/plugins/rm/rmDoc.md +5 -6
- package/src/plugins/rpc/rpc.js +3 -3
- package/src/plugins/set/set.js +205 -323
- package/src/plugins/set/setDoc.md +47 -17
- package/src/plugins/sh/README.md +6 -5
- package/src/plugins/sh/sh.js +8 -5
- package/src/plugins/sh/shDoc.md +7 -8
- package/src/plugins/skill/README.md +37 -14
- package/src/plugins/skill/skill.js +200 -101
- package/src/plugins/skill/skillDoc.js +3 -0
- package/src/plugins/skill/skillDoc.md +9 -0
- package/src/plugins/stream/README.md +7 -6
- package/src/plugins/stream/finalize.js +100 -0
- package/src/plugins/stream/stream.js +13 -45
- package/src/plugins/telemetry/telemetry.js +27 -4
- package/src/plugins/think/think.js +2 -3
- package/src/plugins/think/thinkDoc.md +2 -4
- package/src/plugins/unknown/README.md +1 -1
- package/src/plugins/unknown/unknown.js +17 -19
- package/src/plugins/update/update.js +4 -51
- package/src/plugins/update/updateDoc.md +21 -6
- package/src/plugins/xai/xai.js +68 -102
- package/src/plugins/yolo/yolo.js +102 -75
- package/src/sql/functions/hedmatch.js +1 -1
- package/src/sql/functions/hedreplace.js +1 -1
- package/src/sql/functions/hedsearch.js +1 -1
- package/src/sql/functions/slugify.js +16 -2
- package/BENCH_ENVIRONMENT.md +0 -230
- package/CLIENT_INTERFACE.md +0 -396
- package/last_run.txt +0 -5617
- package/scriptify/ask_run.js +0 -77
- package/scriptify/cache_probe.js +0 -66
- package/scriptify/cache_probe_grok.js +0 -74
- package/src/agent/budget.js +0 -33
- package/src/agent/config.js +0 -38
- package/src/plugins/hedberg/README.md +0 -71
- package/src/plugins/hedberg/docs.md +0 -0
- package/src/plugins/hedberg/edits.js +0 -55
- package/src/plugins/hedberg/normalize.js +0 -17
- package/src/plugins/hedberg/sed.js +0 -49
- package/src/plugins/instructions/instructions.md +0 -34
- package/src/plugins/instructions/instructions_104.md +0 -8
- package/src/plugins/instructions/instructions_105.md +0 -39
- package/src/plugins/instructions/instructions_106.md +0 -22
- package/src/plugins/instructions/instructions_107.md +0 -17
- package/src/plugins/instructions/instructions_108.md +0 -0
- package/src/plugins/known/knownDoc.js +0 -3
- package/src/plugins/known/knownDoc.md +0 -8
- package/src/plugins/unknown/unknownDoc.js +0 -3
- package/src/plugins/unknown/unknownDoc.md +0 -11
- package/turns/cli_1777462658211/turn_001.txt +0 -772
- package/turns/cli_1777462658211/turn_002.txt +0 -606
- package/turns/cli_1777462658211/turn_003.txt +0 -667
- package/turns/cli_1777462658211/turn_004.txt +0 -297
- package/turns/cli_1777462658211/turn_005.txt +0 -301
- package/turns/cli_1777462658211/turn_006.txt +0 -262
- package/turns/cli_1777465095132/turn_001.txt +0 -715
- package/turns/cli_1777465095132/turn_002.txt +0 -236
- package/turns/cli_1777465095132/turn_003.txt +0 -287
- package/turns/cli_1777465095132/turn_004.txt +0 -694
- package/turns/cli_1777465095132/turn_005.txt +0 -422
- package/turns/cli_1777465095132/turn_006.txt +0 -365
- package/turns/cli_1777465095132/turn_007.txt +0 -885
- package/turns/cli_1777465095132/turn_008.txt +0 -1277
- package/turns/cli_1777465095132/turn_009.txt +0 -736
- /package/src/{plugins → lib}/hedberg/patterns.js +0 -0
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
import { SUMMARY_MAX_CHARS } from "../helpers.js";
|
|
1
2
|
import docs from "./ask_userDoc.js";
|
|
2
3
|
|
|
4
|
+
// Per-side cap for the "question → answer" summary projection. Splitting
|
|
5
|
+
// before the arrow preserves the structural separator the model uses to
|
|
6
|
+
// read the pair as a unit; a single trailing slice could lose the arrow
|
|
7
|
+
// entirely when either side is large.
|
|
8
|
+
const ARROW = " → ";
|
|
9
|
+
const HALF = Math.floor((SUMMARY_MAX_CHARS - ARROW.length) / 2);
|
|
10
|
+
|
|
3
11
|
const LOG_ACTION_RE = /^log:\/\/turn_\d+\/(\w+)\//;
|
|
4
12
|
|
|
5
13
|
export default class AskUser {
|
|
@@ -68,7 +76,9 @@ export default class AskUser {
|
|
|
68
76
|
|
|
69
77
|
summary(entry) {
|
|
70
78
|
const { question, answer } = entry.attributes;
|
|
71
|
-
if (answer)
|
|
72
|
-
|
|
79
|
+
if (answer) {
|
|
80
|
+
return `${question.slice(0, HALF)}${ARROW}${answer.slice(0, HALF)}`;
|
|
81
|
+
}
|
|
82
|
+
return question.slice(0, SUMMARY_MAX_CHARS);
|
|
73
83
|
}
|
|
74
84
|
}
|
|
@@ -1,10 +1,6 @@
|
|
|
1
1
|
## <ask_user question="[Question?]">[option1; option2; ...]</ask_user> - Ask the user a question
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
<!-- Positive framing. Shows what ask_user IS for. -->
|
|
3
|
+
YOU SHOULD ONLY use <ask_user> for decisions, preferences, or approvals the user must make.
|
|
5
4
|
|
|
6
5
|
Example: <ask_user question="Which test framework?">Mocha; Jest; Node Native</ask_user>
|
|
7
|
-
<!-- Preference decision. Model truly cannot know this without asking. -->
|
|
8
|
-
|
|
9
6
|
Example: <ask_user question="Deploy to staging or production?">staging; production</ask_user>
|
|
10
|
-
<!-- Consequential action. High-stakes choice. -->
|
|
@@ -7,37 +7,42 @@ Context ceiling enforcement.
|
|
|
7
7
|
Ceiling = `floor(contextSize × RUMMY_BUDGET_CEILING)` (default 0.9). The
|
|
8
8
|
10% headroom is the system's operating room for graceful overflow
|
|
9
9
|
handling. No per-write gating — tools run uninterrupted. Enforcement
|
|
10
|
-
happens at
|
|
10
|
+
happens at one boundary: the pre-LLM grinder.
|
|
11
11
|
|
|
12
12
|
## Enforcement Points
|
|
13
13
|
|
|
14
|
-
1. **Pre-LLM
|
|
15
|
-
|
|
16
|
-
(demote the incoming prompt, re-materialize, re-check). Runs in the
|
|
17
|
-
headroom if that fits. On non-first turns or still-over after
|
|
18
|
-
Prompt Demotion, emits a 413 error via `hooks.error.log` so the
|
|
19
|
-
strike system treats the overflow as a turn-level event.
|
|
14
|
+
1. **Pre-LLM grinder** (`turn.beforeDispatch` filter): four-step
|
|
15
|
+
ladder per SPEC §budget_enforcement.
|
|
20
16
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
17
|
+
1. Check budget. If under ceiling → proceed.
|
|
18
|
+
2. Soft 413: demote `(current_turn − 1)` visible run_views to
|
|
19
|
+
`summarized` (all schemes, no exemption). Re-materialize, recheck.
|
|
20
|
+
3. Soft 413: demote the incoming `prompt://N` to `summarized`.
|
|
21
|
+
Re-materialize, recheck.
|
|
22
|
+
4. Hard 413: emit `error://`, set `ok=false` on the packet so
|
|
23
|
+
TurnExecutor short-circuits dispatch.
|
|
27
24
|
|
|
28
|
-
3
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
Steps 2 and 3 also emit `error://` 413 entries when they fire so
|
|
26
|
+
the model sees what was auto-demoted next turn. The grinder never
|
|
27
|
+
demotes speculatively or helpfully — only in response to actual
|
|
28
|
+
overflow.
|
|
29
|
+
|
|
30
|
+
2. **LLM rejection** (`isContextExceeded` in TurnExecutor): turn-1
|
|
31
|
+
token-estimate drift causes the LLM to reject. Same 413 error path
|
|
32
|
+
as the grinder's hard step.
|
|
31
33
|
|
|
32
34
|
## Files
|
|
33
35
|
|
|
34
|
-
- **budget.js** — Plugin.
|
|
35
|
-
`
|
|
36
|
+
- **budget.js** — Plugin. Math (`ceiling`, `measureMessages`,
|
|
37
|
+
`measureRows`, `computeBudget`), 413 body shaper (`overflowBody`),
|
|
38
|
+
and the plugin class itself.
|
|
39
|
+
|
|
40
|
+
## Hook participation
|
|
36
41
|
|
|
37
|
-
|
|
42
|
+
- `core.filter("turn.beforeDispatch", ...)` — pre-LLM grinder. Returns
|
|
43
|
+
the (possibly demoted) packet with `ok` / `overflow` flags.
|
|
44
|
+
- `core.filter("assembly.user", ..., 175)` — renders the `<budget>`
|
|
45
|
+
table into the user message.
|
|
38
46
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
- **Hook**: `hooks.budget.postDispatch` — post-dispatch re-check + Turn
|
|
42
|
-
Demotion. Emits 413 errors through the unified error channel; there
|
|
43
|
-
is no separate `budget://` scheme.
|
|
47
|
+
Emits 413 errors through the unified error channel (`hooks.error.log.emit`);
|
|
48
|
+
there is no separate `budget://` scheme.
|
|
@@ -1,14 +1,35 @@
|
|
|
1
|
-
import
|
|
2
|
-
import materializeContext from "../../agent/materializeContext.js";
|
|
1
|
+
import ContextAssembler from "../../agent/ContextAssembler.js";
|
|
3
2
|
import { countTokens } from "../../agent/tokens.js";
|
|
4
3
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
4
|
+
const CEILING_RATIO = Number(process.env.RUMMY_BUDGET_CEILING);
|
|
5
|
+
|
|
6
|
+
export function ceiling(contextSize) {
|
|
7
|
+
return Math.floor(contextSize * CEILING_RATIO);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Sum assembled-message token counts; used by the enforce gate.
|
|
11
|
+
export function measureMessages(messages) {
|
|
12
|
+
return messages.reduce((sum, m) => sum + countTokens(m.content), 0);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Sum projected row body token counts; used by prompt.js pre-assembly.
|
|
16
|
+
export function measureRows(rows) {
|
|
17
|
+
return rows.reduce((sum, r) => sum + countTokens(r.body), 0);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Single source of truth for budget numbers; tokenUsage echoes totalTokens for the wire attribute.
|
|
21
|
+
export function computeBudget({ contextSize, totalTokens }) {
|
|
22
|
+
const cap = ceiling(contextSize);
|
|
23
|
+
const tokensFree = Math.max(0, cap - totalTokens);
|
|
24
|
+
const overflow = Math.max(0, totalTokens - cap);
|
|
25
|
+
return {
|
|
26
|
+
ceiling: cap,
|
|
27
|
+
totalTokens,
|
|
28
|
+
tokenUsage: totalTokens,
|
|
29
|
+
tokensFree,
|
|
30
|
+
overflow,
|
|
31
|
+
ok: overflow === 0,
|
|
32
|
+
};
|
|
12
33
|
}
|
|
13
34
|
|
|
14
35
|
// 413 error body; wire format is part of the model contract.
|
|
@@ -17,10 +38,10 @@ export function overflowBody(overflow, contextSize, demoted) {
|
|
|
17
38
|
const size = cap + overflow;
|
|
18
39
|
const count = demoted.length;
|
|
19
40
|
const totalTokens = demoted.reduce((s, r) => s + r.tokens, 0);
|
|
20
|
-
const head = `Token Budget overflow: packet was ${size} tokens, ceiling is ${cap}. ${count} promotion${count === 1 ? "" : "s"} (${totalTokens} tokens) demoted
|
|
41
|
+
const head = `Token Budget overflow: packet was ${size} tokens, ceiling is ${cap}. ${count} promotion${count === 1 ? "" : "s"} (${totalTokens} tokens) demoted.`;
|
|
21
42
|
if (count === 0) return head;
|
|
22
43
|
const lines = demoted.map((d) =>
|
|
23
|
-
d.turn
|
|
44
|
+
d.turn != null
|
|
24
45
|
? `- ${d.path} (turn ${d.turn}, ${d.tokens} tokens)`
|
|
25
46
|
: `- ${d.path} (${d.tokens} tokens)`,
|
|
26
47
|
);
|
|
@@ -32,11 +53,23 @@ export default class Budget {
|
|
|
32
53
|
|
|
33
54
|
constructor(core) {
|
|
34
55
|
this.#core = core;
|
|
35
|
-
core.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
56
|
+
core.filter("turn.beforeDispatch", this.#onBeforeDispatch.bind(this));
|
|
57
|
+
core.filter("assembly.user", this.assembleBudget.bind(this), 90);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Filter participant. Receives the assembled packet; returns a
|
|
61
|
+
// (possibly modified) packet. The pre-LLM grinder demotes-and-
|
|
62
|
+
// rechecks per SPEC §budget_enforcement; if it can't fit after the
|
|
63
|
+
// ladder runs, sets ok=false so TurnExecutor short-circuits.
|
|
64
|
+
async #onBeforeDispatch(packet, ctxBag) {
|
|
65
|
+
return this.enforce({
|
|
66
|
+
contextSize: packet.contextSize,
|
|
67
|
+
messages: packet.messages,
|
|
68
|
+
rows: packet.rows,
|
|
69
|
+
lastPromptTokens: packet.lastPromptTokens,
|
|
70
|
+
ctx: ctxBag.ctx,
|
|
71
|
+
rummy: ctxBag.rummy,
|
|
72
|
+
});
|
|
40
73
|
}
|
|
41
74
|
|
|
42
75
|
// Renders <budget> at priority 275; see SPEC #token_accounting.
|
|
@@ -140,27 +173,47 @@ export default class Budget {
|
|
|
140
173
|
};
|
|
141
174
|
}
|
|
142
175
|
|
|
143
|
-
async #
|
|
144
|
-
|
|
145
|
-
runId,
|
|
146
|
-
turn,
|
|
147
|
-
loopId,
|
|
148
|
-
rummy,
|
|
149
|
-
demotedCount = 0,
|
|
150
|
-
demotedTokens = 0,
|
|
151
|
-
}) {
|
|
176
|
+
async #emit({ message, ctx, rummy, demoted }) {
|
|
177
|
+
const totalTokens = demoted.reduce((s, r) => s + r.tokens, 0);
|
|
152
178
|
await rummy.hooks.error.log.emit({
|
|
153
179
|
store: rummy.entries,
|
|
154
|
-
runId,
|
|
155
|
-
turn,
|
|
156
|
-
loopId,
|
|
180
|
+
runId: ctx.runId,
|
|
181
|
+
turn: ctx.turn,
|
|
182
|
+
loopId: ctx.loopId,
|
|
157
183
|
message,
|
|
158
184
|
status: 413,
|
|
159
|
-
attributes: {
|
|
185
|
+
attributes: {
|
|
186
|
+
demotedCount: demoted.length,
|
|
187
|
+
demotedTokens: totalTokens,
|
|
188
|
+
},
|
|
160
189
|
});
|
|
161
190
|
}
|
|
162
191
|
|
|
163
|
-
|
|
192
|
+
async #reassemble({ rows, ctx, rummy, contextSize, lastPromptTokens }) {
|
|
193
|
+
return ContextAssembler.assembleFromTurnContext(
|
|
194
|
+
rows,
|
|
195
|
+
{
|
|
196
|
+
type: ctx.mode,
|
|
197
|
+
systemPrompt: ctx.systemPrompt,
|
|
198
|
+
contextSize,
|
|
199
|
+
toolSet: ctx.toolSet,
|
|
200
|
+
lastContextTokens: lastPromptTokens,
|
|
201
|
+
turn: ctx.turn,
|
|
202
|
+
},
|
|
203
|
+
rummy.hooks,
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Pre-LLM grinder ladder. SPEC §budget_enforcement.
|
|
208
|
+
//
|
|
209
|
+
// 1. Check budget. ok → return.
|
|
210
|
+
// 2. Soft 413: demote (current_turn − 1) visible. Recheck.
|
|
211
|
+
// 3. Soft 413: demote current prompt. Recheck.
|
|
212
|
+
// 4. Hard 413: emit and return ok=false.
|
|
213
|
+
//
|
|
214
|
+
// Every step that demotes anything emits a 413 error://. Soft 413s
|
|
215
|
+
// keep the run alive (turn proceeds to LLM); the hard 413 bubbles
|
|
216
|
+
// through to AgentLoop.
|
|
164
217
|
async enforce({
|
|
165
218
|
contextSize,
|
|
166
219
|
messages,
|
|
@@ -173,6 +226,7 @@ export default class Budget {
|
|
|
173
226
|
return { messages, rows, assembledTokens: 0, ok: true };
|
|
174
227
|
}
|
|
175
228
|
|
|
229
|
+
// Step 1.
|
|
176
230
|
const first = this.#check({
|
|
177
231
|
contextSize,
|
|
178
232
|
messages,
|
|
@@ -181,103 +235,105 @@ export default class Budget {
|
|
|
181
235
|
});
|
|
182
236
|
if (first.ok) return first;
|
|
183
237
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
238
|
+
const store = rummy.entries;
|
|
239
|
+
|
|
240
|
+
// Step 2: previous-turn demotion.
|
|
241
|
+
const prevTurn = ctx.turn - 1;
|
|
242
|
+
const rawTurnDemoted =
|
|
243
|
+
prevTurn >= 0 ? await store.demoteTurnEntries(ctx.runId, prevTurn) : [];
|
|
244
|
+
const turnDemoted = rawTurnDemoted.map((d) => ({ ...d, turn: prevTurn }));
|
|
245
|
+
if (turnDemoted.length > 0) {
|
|
246
|
+
for (const r of rows) {
|
|
247
|
+
if (r.source_turn === prevTurn && r.visibility === "visible") {
|
|
248
|
+
r.body = r.sBody;
|
|
249
|
+
r.visibility = "summarized";
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
const reMessages = await this.#reassemble({
|
|
253
|
+
rows,
|
|
254
|
+
ctx,
|
|
191
255
|
rummy,
|
|
256
|
+
contextSize,
|
|
257
|
+
lastPromptTokens: 0,
|
|
192
258
|
});
|
|
193
|
-
|
|
259
|
+
const rechecked = this.#check({
|
|
260
|
+
contextSize,
|
|
261
|
+
messages: reMessages,
|
|
262
|
+
rows,
|
|
263
|
+
lastPromptTokens: 0,
|
|
264
|
+
});
|
|
265
|
+
if (rechecked.ok) {
|
|
266
|
+
await this.#emit({
|
|
267
|
+
message: overflowBody(first.overflow, contextSize, turnDemoted),
|
|
268
|
+
ctx,
|
|
269
|
+
rummy,
|
|
270
|
+
demoted: turnDemoted,
|
|
271
|
+
});
|
|
272
|
+
return rechecked;
|
|
273
|
+
}
|
|
274
|
+
first.overflow = rechecked.overflow;
|
|
194
275
|
}
|
|
195
276
|
|
|
277
|
+
// Step 3: current-prompt demotion.
|
|
196
278
|
const promptRow = rows.findLast(
|
|
197
279
|
(r) => r.category === "prompt" && r.scheme === "prompt",
|
|
198
280
|
);
|
|
199
|
-
|
|
200
|
-
|
|
281
|
+
const promptDemoted = [];
|
|
282
|
+
if (promptRow && promptRow.visibility === "visible") {
|
|
283
|
+
await store.set({
|
|
201
284
|
runId: ctx.runId,
|
|
202
285
|
path: promptRow.path,
|
|
203
286
|
visibility: "summarized",
|
|
204
287
|
});
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
contextSize,
|
|
216
|
-
});
|
|
217
|
-
const rechecked = this.#check({
|
|
218
|
-
contextSize,
|
|
219
|
-
messages: reMat.messages,
|
|
220
|
-
rows: reMat.rows,
|
|
221
|
-
lastPromptTokens: reMat.lastContextTokens,
|
|
222
|
-
});
|
|
223
|
-
if (!rechecked.ok) {
|
|
224
|
-
const cap = ceiling(contextSize);
|
|
225
|
-
await this.#emitOverflow({
|
|
226
|
-
message: `Token Budget overflow: packet was ${cap + rechecked.overflow} tokens after demoting the prompt, ceiling is ${cap}.`,
|
|
227
|
-
runId: ctx.runId,
|
|
228
|
-
turn: ctx.turn,
|
|
229
|
-
loopId: ctx.loopId,
|
|
288
|
+
promptDemoted.push({
|
|
289
|
+
path: promptRow.path,
|
|
290
|
+
turn: promptRow.source_turn,
|
|
291
|
+
tokens: countTokens(promptRow.body) - countTokens(promptRow.sBody),
|
|
292
|
+
});
|
|
293
|
+
promptRow.body = promptRow.sBody;
|
|
294
|
+
promptRow.visibility = "summarized";
|
|
295
|
+
const reMessages = await this.#reassemble({
|
|
296
|
+
rows,
|
|
297
|
+
ctx,
|
|
230
298
|
rummy,
|
|
299
|
+
contextSize,
|
|
300
|
+
lastPromptTokens: 0,
|
|
231
301
|
});
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
async postDispatch({ contextSize, ctx, rummy }) {
|
|
238
|
-
if (!contextSize) return { failed: false };
|
|
239
|
-
const postMat = await materializeContext({
|
|
240
|
-
db: rummy.db,
|
|
241
|
-
hooks: rummy.hooks,
|
|
242
|
-
runId: ctx.runId,
|
|
243
|
-
loopId: ctx.loopId,
|
|
244
|
-
turn: ctx.turn,
|
|
245
|
-
systemPrompt: ctx.systemPrompt,
|
|
246
|
-
mode: ctx.mode,
|
|
247
|
-
toolSet: ctx.toolSet,
|
|
248
|
-
contextSize,
|
|
249
|
-
});
|
|
250
|
-
const baseline = postMat.lastContextTokens;
|
|
251
|
-
const predicted = predictNextPacket(postMat.rows, ctx.turn, baseline);
|
|
252
|
-
const cap = ceiling(contextSize);
|
|
253
|
-
if (predicted <= cap) return { failed: false };
|
|
254
|
-
const post = { overflow: predicted - cap };
|
|
255
|
-
|
|
256
|
-
const store = rummy.entries;
|
|
257
|
-
let demotedEntries = await store.demoteTurnEntries(ctx.runId, ctx.turn);
|
|
258
|
-
// Prior-turn-pressure fallback; SPEC #budget_enforcement.
|
|
259
|
-
if (demotedEntries.length === 0) {
|
|
260
|
-
demotedEntries = await store.demoteRunVisibleEntries(ctx.runId);
|
|
261
|
-
}
|
|
262
|
-
const promptRow = postMat.rows.find((r) => r.scheme === "prompt");
|
|
263
|
-
if (promptRow) {
|
|
264
|
-
await store.set({
|
|
265
|
-
runId: ctx.runId,
|
|
266
|
-
path: promptRow.path,
|
|
267
|
-
visibility: "summarized",
|
|
302
|
+
const rechecked = this.#check({
|
|
303
|
+
contextSize,
|
|
304
|
+
messages: reMessages,
|
|
305
|
+
rows,
|
|
306
|
+
lastPromptTokens: 0,
|
|
268
307
|
});
|
|
308
|
+
if (rechecked.ok) {
|
|
309
|
+
await this.#emit({
|
|
310
|
+
message: overflowBody(first.overflow, contextSize, [
|
|
311
|
+
...turnDemoted,
|
|
312
|
+
...promptDemoted,
|
|
313
|
+
]),
|
|
314
|
+
ctx,
|
|
315
|
+
rummy,
|
|
316
|
+
demoted: [...turnDemoted, ...promptDemoted],
|
|
317
|
+
});
|
|
318
|
+
return rechecked;
|
|
319
|
+
}
|
|
320
|
+
first.overflow = rechecked.overflow;
|
|
269
321
|
}
|
|
270
322
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
runId: ctx.runId,
|
|
277
|
-
turn: ctx.turn,
|
|
278
|
-
loopId: ctx.loopId,
|
|
323
|
+
// Step 4: hard 413.
|
|
324
|
+
const allDemoted = [...turnDemoted, ...promptDemoted];
|
|
325
|
+
await this.#emit({
|
|
326
|
+
message: overflowBody(first.overflow, contextSize, allDemoted),
|
|
327
|
+
ctx,
|
|
279
328
|
rummy,
|
|
329
|
+
demoted: allDemoted,
|
|
280
330
|
});
|
|
281
|
-
return {
|
|
331
|
+
return {
|
|
332
|
+
messages,
|
|
333
|
+
rows,
|
|
334
|
+
assembledTokens: ceiling(contextSize) + first.overflow,
|
|
335
|
+
overflow: first.overflow,
|
|
336
|
+
ok: false,
|
|
337
|
+
};
|
|
282
338
|
}
|
|
283
339
|
}
|
|
@@ -29,9 +29,8 @@ preserves existing vars).
|
|
|
29
29
|
|---|---|---|
|
|
30
30
|
| `RUMMY_MODE` | `act` | `ask` or `act`. |
|
|
31
31
|
|
|
32
|
-
`
|
|
33
|
-
|
|
34
|
-
overflow.
|
|
32
|
+
`RUMMY_LOOP_TIMEOUT` is declared in `.env.example` and read directly
|
|
33
|
+
from `process.env`. Watchdog exits with code `124` on overflow.
|
|
35
34
|
|
|
36
35
|
Per-run defaults (`RUMMY_YOLO`, `RUMMY_NO_REPO`, `RUMMY_NO_WEB`,
|
|
37
36
|
`RUMMY_NO_INTERACTION`, `RUMMY_NO_PROPOSALS`) cascade through
|
|
@@ -61,7 +60,7 @@ provider key. Bench harnesses call `rummy-cli` with just
|
|
|
61
60
|
| `0` | Terminal status `200`. Model claimed success. |
|
|
62
61
|
| `1` | Terminal status in `{204, 413, 422, 499, 500}` or run crashed. |
|
|
63
62
|
| `2` | Arg parse error (invalid flag shape, missing required env). |
|
|
64
|
-
| `124` | Wall-clock timeout (`
|
|
63
|
+
| `124` | Wall-clock timeout (`RUMMY_LOOP_TIMEOUT` exceeded). |
|
|
65
64
|
|
|
66
65
|
External verifiers (terminal-bench, SWE-bench, etc.) decide actual
|
|
67
66
|
task success — the exit code only reports rummy's internal terminal
|
package/src/plugins/cli/cli.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import config from "../../agent/config.js";
|
|
2
1
|
import ProjectAgent from "../../agent/ProjectAgent.js";
|
|
2
|
+
import File from "../file/file.js";
|
|
3
3
|
|
|
4
4
|
const TERMINAL_STATUSES = new Set([200, 204, 413, 422, 499, 500]);
|
|
5
5
|
|
|
@@ -42,10 +42,36 @@ export default class Cli {
|
|
|
42
42
|
const projectAgent = new ProjectAgent(db, hooks);
|
|
43
43
|
const { projectId } = await projectAgent.init(alias, projectRoot);
|
|
44
44
|
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
// Operator-declared project surface (comma-separated literal paths,
|
|
46
|
+
// relative to project root). Files are ingested as entries with
|
|
47
|
+
// default visibility=archived; the model promotes specific
|
|
48
|
+
// entries via <get>. Decouples membership (constraint) from
|
|
49
|
+
// visibility (per-entry, model-controlled).
|
|
50
|
+
const projectFilesRaw = process.env.RUMMY_PROJECT_FILES;
|
|
51
|
+
if (projectFilesRaw) {
|
|
52
|
+
const patterns = projectFilesRaw
|
|
53
|
+
.split(",")
|
|
54
|
+
.map((s) => s.trim())
|
|
55
|
+
.filter(Boolean);
|
|
56
|
+
for (const pattern of patterns) {
|
|
57
|
+
await File.setConstraint(db, projectId, pattern, "add");
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Watchdog; overridable via --RUMMY_LOOP_TIMEOUT=<ms>. Drains
|
|
62
|
+
// the active loop before exit so SQLite, turn slices, and
|
|
63
|
+
// last_run.txt are durable on disk before the process dies —
|
|
64
|
+
// without this, harbor's outer asyncio.wait_for kills the
|
|
65
|
+
// docker exec mid-pipeline and the trial.log cp commands never
|
|
66
|
+
// run, leaving the post-mortem packet empty.
|
|
67
|
+
const timeoutMs = Number(process.env.RUMMY_LOOP_TIMEOUT);
|
|
68
|
+
const timer = setTimeout(async () => {
|
|
69
|
+
console.error(`rummy-cli: timed out after ${timeoutMs}ms — draining`);
|
|
70
|
+
try {
|
|
71
|
+
await projectAgent.shutdown();
|
|
72
|
+
} catch (err) {
|
|
73
|
+
console.error(`rummy-cli: drain failed: ${err.message}`);
|
|
74
|
+
}
|
|
49
75
|
process.exit(124);
|
|
50
76
|
}, timeoutMs);
|
|
51
77
|
timer.unref();
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import msg from "../../agent/messages.js";
|
|
5
|
+
import { chatCompletionStream } from "../../llm/openaiStream.js";
|
|
6
|
+
|
|
7
|
+
const FETCH_TIMEOUT = Number(process.env.RUMMY_FETCH_TIMEOUT);
|
|
8
|
+
|
|
9
|
+
const PROVIDER = "@cf";
|
|
10
|
+
|
|
11
|
+
// Repo-root-relative key file. Resolved relative to this source file so
|
|
12
|
+
// CWD changes during runs (programbench/tbench cd into workspaces) don't
|
|
13
|
+
// break the lookup. Plugin is inert if the file is missing OR if
|
|
14
|
+
// CLOUDFLARE_ACCOUNT_ID is unset (the API path is account-scoped).
|
|
15
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
function resolveKeyFile() {
|
|
17
|
+
return process.env.RUMMY_CLOUDFLARE_KEY_FILE
|
|
18
|
+
? process.env.RUMMY_CLOUDFLARE_KEY_FILE
|
|
19
|
+
: join(__dirname, "..", "..", "..", "cloudflare.key");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Inert unless cloudflare.key exists and CLOUDFLARE_ACCOUNT_ID is set.
|
|
23
|
+
// Matches model aliases starting with `@cf/` — Cloudflare Workers AI's
|
|
24
|
+
// own namespace, used verbatim with no prefix stripping
|
|
25
|
+
// (`@cf/google/gemma-4-26b-a4b-it`).
|
|
26
|
+
//
|
|
27
|
+
// Uses Cloudflare's OpenAI-compatible endpoint
|
|
28
|
+
// (`/v1/chat/completions`) so the streaming SSE accumulator is shared
|
|
29
|
+
// with the other OpenAI-shaped providers. Context-size lookups go to
|
|
30
|
+
// the native models-search API which exposes `properties` including
|
|
31
|
+
// the model's context window.
|
|
32
|
+
export default class Cloudflare {
|
|
33
|
+
#apiKey;
|
|
34
|
+
#accountId;
|
|
35
|
+
#contextCache = new Map();
|
|
36
|
+
|
|
37
|
+
constructor(core) {
|
|
38
|
+
const accountId = process.env.CLOUDFLARE_ACCOUNT_ID;
|
|
39
|
+
if (!accountId) return;
|
|
40
|
+
const keyFile = resolveKeyFile();
|
|
41
|
+
if (!existsSync(keyFile)) return;
|
|
42
|
+
const raw = readFileSync(keyFile, "utf8").trim();
|
|
43
|
+
if (!raw) return;
|
|
44
|
+
this.#apiKey = raw;
|
|
45
|
+
this.#accountId = accountId;
|
|
46
|
+
|
|
47
|
+
core.hooks.llm.providers.push({
|
|
48
|
+
name: "cloudflare",
|
|
49
|
+
matches: (model) => model.split("/")[0] === PROVIDER,
|
|
50
|
+
completion: (messages, model, options) =>
|
|
51
|
+
this.#completion(messages, model, options),
|
|
52
|
+
getContextSize: (model) => this.#getContextSize(model),
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
#baseUrl() {
|
|
57
|
+
return `https://api.cloudflare.com/client/v4/accounts/${this.#accountId}/ai`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async #completion(messages, model, options = {}) {
|
|
61
|
+
const body = { model, messages };
|
|
62
|
+
if (options.maxTokens !== undefined) body.max_tokens = options.maxTokens;
|
|
63
|
+
if (options.temperature !== undefined)
|
|
64
|
+
body.temperature = options.temperature;
|
|
65
|
+
|
|
66
|
+
const timeoutSignal = AbortSignal.timeout(FETCH_TIMEOUT);
|
|
67
|
+
const signal = options.signal
|
|
68
|
+
? AbortSignal.any([options.signal, timeoutSignal])
|
|
69
|
+
: timeoutSignal;
|
|
70
|
+
|
|
71
|
+
const headers = { Authorization: `Bearer ${this.#apiKey}` };
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
return await chatCompletionStream({
|
|
75
|
+
url: `${this.#baseUrl()}/v1/chat/completions`,
|
|
76
|
+
headers,
|
|
77
|
+
body,
|
|
78
|
+
signal,
|
|
79
|
+
});
|
|
80
|
+
} catch (err) {
|
|
81
|
+
if (err.status === 401 || err.status === 403) {
|
|
82
|
+
throw new Error(
|
|
83
|
+
msg("error.cloudflare_auth", {
|
|
84
|
+
status: `${err.status} - ${err.body}`,
|
|
85
|
+
}),
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
if (err.status) {
|
|
89
|
+
throw new Error(
|
|
90
|
+
msg("error.cloudflare_api", {
|
|
91
|
+
status: `${err.status} - ${err.body}`,
|
|
92
|
+
}),
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
throw err;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async #getContextSize(model) {
|
|
100
|
+
if (this.#contextCache.has(model)) return this.#contextCache.get(model);
|
|
101
|
+
|
|
102
|
+
// Cloudflare's models-search returns model metadata including
|
|
103
|
+
// `properties` (an array with `property_id` / `value` pairs).
|
|
104
|
+
// `context_window` (or `max_input_tokens` on some entries) is
|
|
105
|
+
// the field we want.
|
|
106
|
+
const url = `${this.#baseUrl()}/models/search?search=${encodeURIComponent(model)}`;
|
|
107
|
+
const res = await fetch(url, {
|
|
108
|
+
headers: { Authorization: `Bearer ${this.#apiKey}` },
|
|
109
|
+
signal: AbortSignal.timeout(FETCH_TIMEOUT),
|
|
110
|
+
});
|
|
111
|
+
if (!res.ok) {
|
|
112
|
+
throw new Error(
|
|
113
|
+
msg("error.cloudflare_models_failed", { model, status: res.status }),
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
const data = await res.json();
|
|
117
|
+
const entry = data.result.find((m) => m.name === model);
|
|
118
|
+
if (!entry) {
|
|
119
|
+
throw new Error(msg("error.cloudflare_model_not_found", { model }));
|
|
120
|
+
}
|
|
121
|
+
const props = entry.properties;
|
|
122
|
+
// Prefer `context_window` (full prompt+output combined) over
|
|
123
|
+
// `max_input_tokens` (input-only). Some Cloudflare entries have
|
|
124
|
+
// both, some only one. Picking the larger one is wrong (would
|
|
125
|
+
// pick input cap when context is what we want); explicit priority.
|
|
126
|
+
const ctxProp =
|
|
127
|
+
props.find((p) => p.property_id === "context_window") ??
|
|
128
|
+
props.find((p) => p.property_id === "max_input_tokens");
|
|
129
|
+
const ctx = ctxProp ? Number(ctxProp.value) : null;
|
|
130
|
+
if (!ctx) {
|
|
131
|
+
throw new Error(msg("error.cloudflare_no_context_length", { model }));
|
|
132
|
+
}
|
|
133
|
+
this.#contextCache.set(model, ctx);
|
|
134
|
+
return ctx;
|
|
135
|
+
}
|
|
136
|
+
}
|