@possumtech/rummy 2.2.1 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -6
- package/service.js +18 -10
- package/src/agent/AgentLoop.js +2 -11
- package/src/agent/ContextAssembler.js +34 -3
- package/src/agent/Entries.js +16 -89
- package/src/agent/ProjectAgent.js +1 -16
- package/src/agent/TurnExecutor.js +12 -52
- package/src/agent/XmlParser.js +30 -117
- package/src/agent/errors.js +3 -22
- package/src/agent/materializeContext.js +3 -11
- package/src/hooks/Hooks.js +0 -29
- package/src/hooks/PluginContext.js +15 -0
- package/src/lib/hedberg/hedberg.js +4 -14
- package/src/lib/hedberg/marker.js +15 -59
- package/src/llm/LlmProvider.js +13 -26
- package/src/llm/errors.js +3 -11
- package/src/llm/openaiStream.js +6 -46
- package/src/plugins/ask_user/ask_user.js +12 -17
- package/src/plugins/budget/README.md +46 -8
- package/src/plugins/budget/budget.js +23 -42
- package/src/plugins/cp/cp.js +28 -18
- package/src/plugins/env/env.js +11 -7
- package/src/plugins/error/error.js +8 -37
- package/src/plugins/get/get.js +42 -24
- package/src/plugins/google/google.js +23 -3
- package/src/plugins/helpers.js +34 -50
- package/src/plugins/instructions/README.md +2 -2
- package/src/plugins/instructions/instructions-user.md +1 -1
- package/src/plugins/instructions/instructions.js +19 -6
- package/src/plugins/known/known.js +1 -8
- package/src/plugins/log/log.js +15 -1
- package/src/plugins/mv/mv.js +29 -19
- package/src/plugins/persona/persona.js +4 -4
- package/src/plugins/prompt/README.md +1 -1
- package/src/plugins/prompt/prompt.js +1 -1
- package/src/plugins/rm/rm.js +26 -15
- package/src/plugins/rm/rmDoc.md +0 -2
- package/src/plugins/set/set.js +37 -84
- package/src/plugins/set/setDoc.md +16 -16
- package/src/plugins/sh/sh.js +10 -8
- package/src/plugins/skill/skillDoc.md +1 -1
- package/src/plugins/unknown/README.md +1 -1
- package/src/plugins/unknown/unknown.js +2 -6
- package/src/plugins/update/update.js +3 -2
- package/src/plugins/update/updateDoc.md +1 -1
- package/.env.example +0 -152
- package/.xai.key +0 -1
- package/PLUGINS.md +0 -962
- package/SPEC.md +0 -1897
- package/biome/no-fallbacks.grit +0 -50
- package/gemini.key +0 -1
package/src/llm/errors.js
CHANGED
|
@@ -14,20 +14,15 @@ export function isContextExceededMessage(message) {
|
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
const ABORT_PATTERN = /\b(aborted|AbortError|TimeoutError)\b/;
|
|
17
|
-
// `terminated`
|
|
18
|
-
// mid-fetch (TLSSocket.onHttpSocketClose → Fetch.onAborted) — same lane
|
|
19
|
-
// as ECONNRESET, just surfaced through a streaming-fetch path.
|
|
17
|
+
// `terminated` = undici socket close mid-fetch (same lane as ECONNRESET).
|
|
20
18
|
const GATEWAY_PATTERN =
|
|
21
19
|
/\b(502|504|ECONNREFUSED|ECONNRESET|ENOTFOUND|EHOSTUNREACH|ETIMEDOUT|EPIPE|ECONNABORTED|fetch failed|terminated)\b/i;
|
|
22
20
|
const RATE_LIMIT_PATTERN = /\b429\b/;
|
|
23
21
|
const STATUS_503_PATTERN = /\b503\b/;
|
|
24
22
|
const STATUS_500_PATTERN = /\b500\b/;
|
|
25
|
-
// llamacpp / OpenAI-compatible servers signal model-warmup with this body.
|
|
26
23
|
const MODEL_WARMUP_PATTERN = /\bLoading model\b/i;
|
|
27
24
|
|
|
28
|
-
// Returns "gateway" | "warmup" | "rate_limit" | "server" | null.
|
|
29
|
-
// null = do not retry, propagate immediately. Operator/internal aborts,
|
|
30
|
-
// auth failures, malformed-request errors, unknown shapes all fall here.
|
|
25
|
+
// Returns "gateway" | "warmup" | "rate_limit" | "server" | null (don't retry).
|
|
31
26
|
export function classifyTransient(err) {
|
|
32
27
|
if (!err || typeof err.message !== "string") return null;
|
|
33
28
|
const { message } = err;
|
|
@@ -36,7 +31,6 @@ export function classifyTransient(err) {
|
|
|
36
31
|
if (GATEWAY_PATTERN.test(message)) return "gateway";
|
|
37
32
|
if (RATE_LIMIT_PATTERN.test(message)) return "rate_limit";
|
|
38
33
|
if (STATUS_503_PATTERN.test(message)) {
|
|
39
|
-
// 503 + explicit warmup signal → wait it out.
|
|
40
34
|
if (MODEL_WARMUP_PATTERN.test(message)) return "warmup";
|
|
41
35
|
if (typeof err.body === "string" && MODEL_WARMUP_PATTERN.test(err.body)) {
|
|
42
36
|
return "warmup";
|
|
@@ -47,9 +41,7 @@ export function classifyTransient(err) {
|
|
|
47
41
|
return null;
|
|
48
42
|
}
|
|
49
43
|
|
|
50
|
-
// HTTP Retry-After
|
|
51
|
-
// undefined for missing, malformed, or HTTP-date forms — callers
|
|
52
|
-
// fall through to backoff in those cases.
|
|
44
|
+
// HTTP Retry-After in seconds; undefined for missing/malformed/HTTP-date.
|
|
53
45
|
export function parseRetryAfter(value) {
|
|
54
46
|
if (!value) return undefined;
|
|
55
47
|
const seconds = Number(value);
|
package/src/llm/openaiStream.js
CHANGED
|
@@ -1,39 +1,13 @@
|
|
|
1
1
|
import { parseRetryAfter } from "./errors.js";
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
* and headers; this module handles the SSE parsing, accumulates deltas into
|
|
8
|
-
* a non-streaming-shape response, and surfaces errors with the same ergonomics
|
|
9
|
-
* as the previous fetch-then-json pattern.
|
|
10
|
-
*
|
|
11
|
-
* Streaming is preferred over non-streaming for two reasons:
|
|
12
|
-
*
|
|
13
|
-
* 1. Long-running completions through CDN proxies (e.g. Cloudflare's 100s
|
|
14
|
-
* origin-timeout) can't survive a non-streaming hold; streaming keeps
|
|
15
|
-
* the connection alive byte-by-byte.
|
|
16
|
-
* 2. Future UI surfaces ("thinking" displays) want the deltas live; a
|
|
17
|
-
* streaming-first plugin layer gives them a hook.
|
|
18
|
-
*
|
|
19
|
-
* The xAI Responses API (`/v1/responses`) uses a different streaming format
|
|
20
|
-
* and is out of scope for this client.
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* @param {Object} args
|
|
25
|
-
* @param {string} args.url Full POST URL (e.g. `${baseUrl}/v1/chat/completions`).
|
|
26
|
-
* @param {Object} args.headers Plugin-specific headers (Authorization, etc.).
|
|
27
|
-
* @param {Object} args.body Request body (without `stream` — added here).
|
|
28
|
-
* @param {AbortSignal} [args.signal] Cancellation signal.
|
|
29
|
-
* @returns {Promise<Object>} Non-streaming-shape response: `{ choices, usage, model }`.
|
|
30
|
-
* Throws on non-2xx with `err.status` and `err.body` populated.
|
|
31
|
-
*/
|
|
3
|
+
// SSE client for OpenAI-compatible /chat/completions. Streaming keeps
|
|
4
|
+
// long completions alive through CDN proxies (Cloudflare's 100s timeout).
|
|
5
|
+
// Returns non-streaming shape { choices, usage, model, chunkMetadata };
|
|
6
|
+
// throws on non-2xx with err.status / err.body / err.retryAfter.
|
|
32
7
|
export async function chatCompletionStream({ url, headers, body, signal }) {
|
|
33
8
|
const requestBody = {
|
|
34
9
|
...body,
|
|
35
10
|
stream: true,
|
|
36
|
-
// Tells OpenAI / OpenAI-compatible servers to emit a final usage chunk.
|
|
37
11
|
stream_options: { include_usage: true },
|
|
38
12
|
};
|
|
39
13
|
|
|
@@ -62,21 +36,13 @@ export async function chatCompletionStream({ url, headers, body, signal }) {
|
|
|
62
36
|
let usage = null;
|
|
63
37
|
let model = null;
|
|
64
38
|
let finishReason = null;
|
|
65
|
-
//
|
|
66
|
-
// id, system_fingerprint, service_tier, created, object, plus any
|
|
67
|
-
// provider-specific fields. The last-seen wins (these are typically
|
|
68
|
-
// stable across chunks; xAI/OpenAI repeat them, some land only on the
|
|
69
|
-
// final chunk).
|
|
39
|
+
// Last-seen wins for catch-all chunk fields (id, system_fingerprint, etc).
|
|
70
40
|
const chunkMetadata = {};
|
|
71
41
|
|
|
72
42
|
while (true) {
|
|
73
43
|
const { done, value } = await reader.read();
|
|
74
44
|
if (done) break;
|
|
75
45
|
buffer += decoder.decode(value, { stream: true });
|
|
76
|
-
|
|
77
|
-
// SSE frames are separated by blank lines; within a frame, a `data:`
|
|
78
|
-
// line carries the JSON payload. Process complete lines and keep any
|
|
79
|
-
// trailing partial-line in the buffer for the next read.
|
|
80
46
|
const lines = buffer.split("\n");
|
|
81
47
|
buffer = lines.pop();
|
|
82
48
|
|
|
@@ -96,11 +62,6 @@ export async function chatCompletionStream({ url, headers, body, signal }) {
|
|
|
96
62
|
if (chunk.model) model = chunk.model;
|
|
97
63
|
if (chunk.usage) usage = chunk.usage;
|
|
98
64
|
|
|
99
|
-
// Capture every non-content field the provider sends. We strip
|
|
100
|
-
// `choices` (handled below) and `usage` (already extracted) and
|
|
101
|
-
// keep the rest verbatim. Fields seen in a later chunk overwrite
|
|
102
|
-
// earlier ones — providers re-emit stable fields, and final-chunk
|
|
103
|
-
// fields (system_fingerprint on some, service_tier on others) win.
|
|
104
65
|
for (const [k, v] of Object.entries(chunk)) {
|
|
105
66
|
if (k === "choices" || k === "usage") continue;
|
|
106
67
|
chunkMetadata[k] = v;
|
|
@@ -113,8 +74,7 @@ export async function chatCompletionStream({ url, headers, body, signal }) {
|
|
|
113
74
|
const delta = choice.delta;
|
|
114
75
|
if (!delta) continue;
|
|
115
76
|
if (typeof delta.content === "string") content += delta.content;
|
|
116
|
-
//
|
|
117
|
-
// Concatenate any that show up.
|
|
77
|
+
// Reasoning surfaces under different field names per provider.
|
|
118
78
|
if (typeof delta.reasoning_content === "string")
|
|
119
79
|
reasoningContent += delta.reasoning_content;
|
|
120
80
|
if (typeof delta.reasoning === "string")
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
projectEmission,
|
|
3
|
+
SUMMARY_MAX_CHARS,
|
|
4
|
+
summarizeEmission,
|
|
5
|
+
} from "../helpers.js";
|
|
2
6
|
import docs from "./ask_userDoc.js";
|
|
3
7
|
|
|
4
|
-
// Per-side cap
|
|
5
|
-
// before the arrow preserves the structural separator the model uses to
|
|
6
|
-
// read the pair as a unit; a single trailing slice could lose the arrow
|
|
7
|
-
// entirely when either side is large.
|
|
8
|
+
// Per-side cap so summary preserves the arrow separator on long Q/A.
|
|
8
9
|
const ARROW = " → ";
|
|
9
10
|
const HALF = Math.floor((SUMMARY_MAX_CHARS - ARROW.length) / 2);
|
|
10
11
|
|
|
@@ -36,14 +37,12 @@ export default class AskUser {
|
|
|
36
37
|
runId: ctx.runId,
|
|
37
38
|
turn,
|
|
38
39
|
path: ctx.path,
|
|
39
|
-
body: ctx.resolvedBody,
|
|
40
40
|
attributes: { ...ctx.attrs, answer: ctx.output },
|
|
41
41
|
});
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
async handler(entry, rummy) {
|
|
45
45
|
const { entries: store, sequence: turn, runId, loopId } = rummy;
|
|
46
|
-
// XmlParser resolved question/options from attr-or-body already.
|
|
47
46
|
const { question, options: rawOptions } = entry.attributes;
|
|
48
47
|
|
|
49
48
|
let options = [];
|
|
@@ -59,7 +58,7 @@ export default class AskUser {
|
|
|
59
58
|
runId,
|
|
60
59
|
turn,
|
|
61
60
|
path: entry.resultPath,
|
|
62
|
-
body:
|
|
61
|
+
body: "",
|
|
63
62
|
state: "proposed",
|
|
64
63
|
attributes: { question, options },
|
|
65
64
|
loopId,
|
|
@@ -67,18 +66,14 @@ export default class AskUser {
|
|
|
67
66
|
}
|
|
68
67
|
|
|
69
68
|
full(entry) {
|
|
70
|
-
|
|
71
|
-
const lines = ["# ask_user"];
|
|
72
|
-
if (question) lines.push(`# Question: ${question}`);
|
|
73
|
-
if (answer) lines.push(`# Answer: ${answer}`);
|
|
74
|
-
return lines.join("\n");
|
|
69
|
+
return projectEmission(entry.body);
|
|
75
70
|
}
|
|
76
71
|
|
|
77
72
|
summary(entry) {
|
|
78
73
|
const { question, answer } = entry.attributes;
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
return
|
|
74
|
+
const text = answer
|
|
75
|
+
? `${question.slice(0, HALF)}${ARROW}${answer.slice(0, HALF)}`
|
|
76
|
+
: question.slice(0, SUMMARY_MAX_CHARS);
|
|
77
|
+
return summarizeEmission(text);
|
|
83
78
|
}
|
|
84
79
|
}
|
|
@@ -9,6 +9,44 @@ Ceiling = `floor(contextSize × RUMMY_BUDGET_CEILING)` (default 0.9). The
|
|
|
9
9
|
handling. No per-write gating — tools run uninterrupted. Enforcement
|
|
10
10
|
happens at one boundary: the pre-LLM grinder.
|
|
11
11
|
|
|
12
|
+
## Single source of truth
|
|
13
|
+
|
|
14
|
+
`tokenUsage` and `tokensFree` are one number derived from one helper:
|
|
15
|
+
|
|
16
|
+
```js
|
|
17
|
+
computePacketTokens({ system, user })
|
|
18
|
+
→ tokenUsage = countTokens(system) + countTokens(user)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
This is what the **`<budget>`** tag shows the model AND what the
|
|
22
|
+
`turn.beforeDispatch` enforce gate measures (when no prior-turn
|
|
23
|
+
`prompt_tokens` is available; otherwise enforce uses that real API
|
|
24
|
+
count). The two never diverge — they reach for the same function
|
|
25
|
+
against the same assembled bytes.
|
|
26
|
+
|
|
27
|
+
## How `<budget>` is rendered
|
|
28
|
+
|
|
29
|
+
`assembleBudget` runs at `assembly.user` priority 90. It emits the
|
|
30
|
+
breakdown table (per-scheme `aTokens` sums) and writes the
|
|
31
|
+
**placeholder** `<budget>` tag:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
<budget tokenUsage="{{tokenUsage}}" tokensFree="{{tokensFree}}">
|
|
35
|
+
… per-scheme breakdown table …
|
|
36
|
+
System: {{systemTokens}} tokens.
|
|
37
|
+
Total: …
|
|
38
|
+
</budget>
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
`ContextAssembler.assembleFromTurnContext` then assembles both
|
|
42
|
+
messages, calls `computePacketTokens`, and substitutes the placeholders
|
|
43
|
+
in-place. Single pass: assemble → measure → substitute → return.
|
|
44
|
+
|
|
45
|
+
The breakdown table values (per-scheme `aTokens`, summarized aggregate,
|
|
46
|
+
visible/summarized counts) are independent of the headline math. They
|
|
47
|
+
come from row-level measurements done at materialization time. The
|
|
48
|
+
headline is wire truth; the table is the action map.
|
|
49
|
+
|
|
12
50
|
## Enforcement Points
|
|
13
51
|
|
|
14
52
|
1. **Pre-LLM grinder** (`turn.beforeDispatch` filter): four-step
|
|
@@ -34,15 +72,15 @@ happens at one boundary: the pre-LLM grinder.
|
|
|
34
72
|
## Files
|
|
35
73
|
|
|
36
74
|
- **budget.js** — Plugin. Math (`ceiling`, `measureMessages`,
|
|
37
|
-
`measureRows`, `computeBudget
|
|
38
|
-
and the plugin class itself.
|
|
75
|
+
`measureRows`, `computeBudget`, **`computePacketTokens`**), 413
|
|
76
|
+
body shaper (`overflowBody`), and the plugin class itself.
|
|
39
77
|
|
|
40
78
|
## Hook participation
|
|
41
79
|
|
|
42
|
-
- `core.filter("turn.beforeDispatch", ...)` — pre-LLM grinder.
|
|
43
|
-
the (possibly demoted) packet with `ok` / `overflow` flags.
|
|
44
|
-
- `core.filter("assembly.user", ...,
|
|
45
|
-
table into the user message.
|
|
80
|
+
- `core.filter("turn.beforeDispatch", ...)` — pre-LLM grinder.
|
|
81
|
+
Returns the (possibly demoted) packet with `ok` / `overflow` flags.
|
|
82
|
+
- `core.filter("assembly.user", ..., 90)` — renders the `<budget>`
|
|
83
|
+
table + placeholder tag into the user message.
|
|
46
84
|
|
|
47
|
-
Emits 413 errors through the unified error channel
|
|
48
|
-
there is no separate `budget://` scheme.
|
|
85
|
+
Emits 413 errors through the unified error channel
|
|
86
|
+
(`hooks.error.log.emit`); there is no separate `budget://` scheme.
|
|
@@ -3,21 +3,26 @@ import { countTokens } from "../../agent/tokens.js";
|
|
|
3
3
|
|
|
4
4
|
const CEILING_RATIO = Number(process.env.RUMMY_BUDGET_CEILING);
|
|
5
5
|
|
|
6
|
+
// Substituted post-assembly by ContextAssembler with the headline numbers.
|
|
7
|
+
export const TOKEN_USAGE_PLACEHOLDER = "{{tokenUsage}}";
|
|
8
|
+
export const TOKENS_FREE_PLACEHOLDER = "{{tokensFree}}";
|
|
9
|
+
|
|
6
10
|
export function ceiling(contextSize) {
|
|
7
11
|
return Math.floor(contextSize * CEILING_RATIO);
|
|
8
12
|
}
|
|
9
13
|
|
|
10
|
-
// Sum assembled-message token counts; used by the enforce gate.
|
|
11
14
|
export function measureMessages(messages) {
|
|
12
15
|
return messages.reduce((sum, m) => sum + countTokens(m.content), 0);
|
|
13
16
|
}
|
|
14
17
|
|
|
15
|
-
// Sum projected row body token counts; used by prompt.js pre-assembly.
|
|
16
18
|
export function measureRows(rows) {
|
|
17
19
|
return rows.reduce((sum, r) => sum + countTokens(r.body), 0);
|
|
18
20
|
}
|
|
19
21
|
|
|
20
|
-
|
|
22
|
+
export function computePacketTokens({ system = "", user = "" } = {}) {
|
|
23
|
+
return countTokens(system) + countTokens(user);
|
|
24
|
+
}
|
|
25
|
+
|
|
21
26
|
export function computeBudget({ contextSize, totalTokens }) {
|
|
22
27
|
const cap = ceiling(contextSize);
|
|
23
28
|
const tokensFree = Math.max(0, cap - totalTokens);
|
|
@@ -32,7 +37,12 @@ export function computeBudget({ contextSize, totalTokens }) {
|
|
|
32
37
|
};
|
|
33
38
|
}
|
|
34
39
|
|
|
35
|
-
|
|
40
|
+
export function substituteBudgetPlaceholders(text, { tokenUsage, tokensFree }) {
|
|
41
|
+
return text
|
|
42
|
+
.replaceAll(TOKEN_USAGE_PLACEHOLDER, String(tokenUsage))
|
|
43
|
+
.replaceAll(TOKENS_FREE_PLACEHOLDER, String(tokensFree));
|
|
44
|
+
}
|
|
45
|
+
|
|
36
46
|
export function overflowBody(overflow, contextSize, demoted) {
|
|
37
47
|
const cap = ceiling(contextSize);
|
|
38
48
|
const size = cap + overflow;
|
|
@@ -57,10 +67,6 @@ export default class Budget {
|
|
|
57
67
|
core.filter("assembly.user", this.assembleBudget.bind(this), 90);
|
|
58
68
|
}
|
|
59
69
|
|
|
60
|
-
// Filter participant. Receives the assembled packet; returns a
|
|
61
|
-
// (possibly modified) packet. The pre-LLM grinder demotes-and-
|
|
62
|
-
// rechecks per SPEC §budget_enforcement; if it can't fit after the
|
|
63
|
-
// ladder runs, sets ok=false so TurnExecutor short-circuits.
|
|
64
70
|
async #onBeforeDispatch(packet, ctxBag) {
|
|
65
71
|
return this.enforce({
|
|
66
72
|
contextSize: packet.contextSize,
|
|
@@ -72,19 +78,17 @@ export default class Budget {
|
|
|
72
78
|
});
|
|
73
79
|
}
|
|
74
80
|
|
|
75
|
-
// Renders <budget>
|
|
81
|
+
// Renders <budget> with placeholder headline numbers; ContextAssembler
|
|
82
|
+
// post-substitutes them after measuring the assembled packet.
|
|
76
83
|
assembleBudget(content, ctx) {
|
|
77
|
-
const { rows, contextSize
|
|
84
|
+
const { rows, contextSize } = ctx;
|
|
78
85
|
if (!contextSize) return content;
|
|
79
86
|
|
|
80
87
|
const cap = ceiling(contextSize);
|
|
81
88
|
|
|
82
89
|
const byScheme = new Map();
|
|
83
90
|
let visibleCount = 0;
|
|
84
|
-
let premiumTokens = 0;
|
|
85
91
|
let summarizedCount = 0;
|
|
86
|
-
let _summarizedTokens = 0;
|
|
87
|
-
let floorTokens = 0;
|
|
88
92
|
|
|
89
93
|
const schemeEntry = (s) => {
|
|
90
94
|
let e = byScheme.get(s);
|
|
@@ -112,22 +116,13 @@ export default class Budget {
|
|
|
112
116
|
entry.visIfSumTokens += r.sTokens;
|
|
113
117
|
entry.premium += r.aTokens;
|
|
114
118
|
visibleCount += 1;
|
|
115
|
-
premiumTokens += r.aTokens;
|
|
116
|
-
floorTokens += r.sTokens;
|
|
117
119
|
} else if (r.visibility === "summarized") {
|
|
118
120
|
entry.sum += 1;
|
|
119
121
|
entry.sumTokens += r.sTokens;
|
|
120
122
|
summarizedCount += 1;
|
|
121
|
-
_summarizedTokens += r.sTokens;
|
|
122
|
-
floorTokens += r.sTokens;
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
-
const systemTokens = countTokens(systemPrompt);
|
|
127
|
-
const tokenUsage = floorTokens + premiumTokens + systemTokens;
|
|
128
|
-
const tokensFree = Math.max(0, cap - tokenUsage);
|
|
129
|
-
|
|
130
|
-
// Sort by current cost desc so biggest-impact rows are top.
|
|
131
126
|
const schemeRows = [...byScheme.entries()]
|
|
132
127
|
.toSorted(
|
|
133
128
|
([, a], [, b]) =>
|
|
@@ -139,17 +134,13 @@ export default class Budget {
|
|
|
139
134
|
return `| ${scheme} | ${e.vis} | ${e.sum} | ${cost} | ${ifAllSum} | ${e.premium} |`;
|
|
140
135
|
});
|
|
141
136
|
|
|
142
|
-
const systemPct =
|
|
143
|
-
tokenUsage > 0 ? Math.round((systemTokens / tokenUsage) * 100) : 0;
|
|
144
|
-
|
|
145
137
|
const table = [
|
|
146
138
|
"| scheme | vis | sum | cost | if-all-sum | premium |",
|
|
147
139
|
"|---|---|---|---|---|---|",
|
|
148
140
|
...schemeRows,
|
|
149
141
|
].join("\n");
|
|
150
142
|
|
|
151
|
-
const
|
|
152
|
-
const totalLine = `Total: ${visibleCount} visible + ${summarizedCount} summarized entries; tokenUsage ${tokenUsage} / ceiling ${cap}. ${tokensFree} tokens free.`;
|
|
143
|
+
const totalLine = `Total: ${visibleCount} visible + ${summarizedCount} summarized entries; tokenUsage ${TOKEN_USAGE_PLACEHOLDER} / ceiling ${cap}. ${TOKENS_FREE_PLACEHOLDER} tokens free.`;
|
|
153
144
|
const legend = [
|
|
154
145
|
"Columns:",
|
|
155
146
|
"- cost: current cost of this scheme (vTokens for visible + sTokens for summarized)",
|
|
@@ -157,7 +148,7 @@ export default class Budget {
|
|
|
157
148
|
"- premium: savings from demoting visible → summarized (cost − if-all-sum)",
|
|
158
149
|
].join("\n");
|
|
159
150
|
|
|
160
|
-
return `${content}<budget tokenUsage="${
|
|
151
|
+
return `${content}<budget tokenUsage="${TOKEN_USAGE_PLACEHOLDER}" tokensFree="${TOKENS_FREE_PLACEHOLDER}">\n${table}\n\n${legend}\n${totalLine}\n</budget>\n`;
|
|
161
152
|
}
|
|
162
153
|
|
|
163
154
|
#check({ contextSize, messages, rows, lastPromptTokens = 0 }) {
|
|
@@ -204,16 +195,7 @@ export default class Budget {
|
|
|
204
195
|
);
|
|
205
196
|
}
|
|
206
197
|
|
|
207
|
-
// Pre-LLM grinder ladder
|
|
208
|
-
//
|
|
209
|
-
// 1. Check budget. ok → return.
|
|
210
|
-
// 2. Soft 413: demote (current_turn − 1) visible. Recheck.
|
|
211
|
-
// 3. Soft 413: demote current prompt. Recheck.
|
|
212
|
-
// 4. Hard 413: emit and return ok=false.
|
|
213
|
-
//
|
|
214
|
-
// Every step that demotes anything emits a 413 error://. Soft 413s
|
|
215
|
-
// keep the run alive (turn proceeds to LLM); the hard 413 bubbles
|
|
216
|
-
// through to AgentLoop.
|
|
198
|
+
// Pre-LLM grinder ladder; SPEC §budget_enforcement.
|
|
217
199
|
async enforce({
|
|
218
200
|
contextSize,
|
|
219
201
|
messages,
|
|
@@ -226,7 +208,6 @@ export default class Budget {
|
|
|
226
208
|
return { messages, rows, assembledTokens: 0, ok: true };
|
|
227
209
|
}
|
|
228
210
|
|
|
229
|
-
// Step 1.
|
|
230
211
|
const first = this.#check({
|
|
231
212
|
contextSize,
|
|
232
213
|
messages,
|
|
@@ -237,7 +218,7 @@ export default class Budget {
|
|
|
237
218
|
|
|
238
219
|
const store = rummy.entries;
|
|
239
220
|
|
|
240
|
-
// Step
|
|
221
|
+
// Step 1: previous-turn demotion.
|
|
241
222
|
const prevTurn = ctx.turn - 1;
|
|
242
223
|
const rawTurnDemoted =
|
|
243
224
|
prevTurn >= 0 ? await store.demoteTurnEntries(ctx.runId, prevTurn) : [];
|
|
@@ -274,7 +255,7 @@ export default class Budget {
|
|
|
274
255
|
first.overflow = rechecked.overflow;
|
|
275
256
|
}
|
|
276
257
|
|
|
277
|
-
// Step
|
|
258
|
+
// Step 2: current-prompt demotion.
|
|
278
259
|
const promptRow = rows.findLast(
|
|
279
260
|
(r) => r.category === "prompt" && r.scheme === "prompt",
|
|
280
261
|
);
|
|
@@ -320,7 +301,7 @@ export default class Budget {
|
|
|
320
301
|
first.overflow = rechecked.overflow;
|
|
321
302
|
}
|
|
322
303
|
|
|
323
|
-
//
|
|
304
|
+
// Hard 413.
|
|
324
305
|
const allDemoted = [...turnDemoted, ...promptDemoted];
|
|
325
306
|
await this.#emit({
|
|
326
307
|
message: overflowBody(first.overflow, contextSize, allDemoted),
|
package/src/plugins/cp/cp.js
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import Entries from "../../agent/Entries.js";
|
|
2
|
-
import {
|
|
2
|
+
import { countTokens } from "../../agent/tokens.js";
|
|
3
|
+
import {
|
|
4
|
+
projectEmission,
|
|
5
|
+
storePatternResult,
|
|
6
|
+
summarizeEmission,
|
|
7
|
+
} from "../helpers.js";
|
|
3
8
|
import docs from "./cpDoc.js";
|
|
4
9
|
|
|
5
10
|
export default class Cp {
|
|
@@ -25,7 +30,6 @@ export default class Cp {
|
|
|
25
30
|
? entry.attributes.visibility
|
|
26
31
|
: undefined;
|
|
27
32
|
|
|
28
|
-
// Manifest: list what would be copied without performing the cp.
|
|
29
33
|
if (entry.attributes.manifest !== undefined) {
|
|
30
34
|
const matches = await store.getEntriesByPattern(runId, path);
|
|
31
35
|
await storePatternResult(store, runId, turn, "cp", path, null, matches, {
|
|
@@ -38,9 +42,7 @@ export default class Cp {
|
|
|
38
42
|
|
|
39
43
|
const source = await store.getBody(runId, path);
|
|
40
44
|
if (source === null) return;
|
|
41
|
-
// Tags
|
|
42
|
-
// destination inherits the source entry's tags. Same shape as
|
|
43
|
-
// visibility — explicit attr overrides, default inherits.
|
|
45
|
+
// Tags: explicit attr wins; otherwise destination inherits source's.
|
|
44
46
|
let destTags = null;
|
|
45
47
|
if (typeof entry.attributes.tags === "string") {
|
|
46
48
|
destTags = entry.attributes.tags;
|
|
@@ -56,19 +58,18 @@ export default class Cp {
|
|
|
56
58
|
const warning =
|
|
57
59
|
existing !== null ? `Overwrote existing entry at ${to}` : null;
|
|
58
60
|
|
|
59
|
-
const
|
|
61
|
+
const sourceTokens = countTokens(source);
|
|
62
|
+
const destOldTokens = existing !== null ? countTokens(existing) : 0;
|
|
63
|
+
const beforeTokens = sourceTokens + destOldTokens;
|
|
64
|
+
const afterTokens = sourceTokens * 2;
|
|
65
|
+
|
|
60
66
|
if (destScheme === null) {
|
|
61
|
-
// Bare-file
|
|
62
|
-
// #materializeFile, gated on attrs.path + attrs.patched) the
|
|
63
|
-
// authoritative new body so it writes the source content to
|
|
64
|
-
// disk on accept. Without this the proposal accepted but no
|
|
65
|
-
// file landed — the model's "<cp src dest> then <set dest>
|
|
66
|
-
// SEARCH/REPLACE" sequence silently no-op'd at materialize.
|
|
67
|
+
// Bare-file: hand the shared set.js materializer attrs.patched.
|
|
67
68
|
await store.set({
|
|
68
69
|
runId,
|
|
69
70
|
turn,
|
|
70
71
|
path: entry.resultPath,
|
|
71
|
-
body,
|
|
72
|
+
body: "",
|
|
72
73
|
state: "proposed",
|
|
73
74
|
attributes: {
|
|
74
75
|
from: path,
|
|
@@ -78,6 +79,8 @@ export default class Cp {
|
|
|
78
79
|
path: to,
|
|
79
80
|
patched: source,
|
|
80
81
|
visibility,
|
|
82
|
+
beforeActionTokens: beforeTokens,
|
|
83
|
+
afterActionTokens: afterTokens,
|
|
81
84
|
},
|
|
82
85
|
loopId,
|
|
83
86
|
});
|
|
@@ -96,19 +99,26 @@ export default class Cp {
|
|
|
96
99
|
runId,
|
|
97
100
|
turn,
|
|
98
101
|
path: entry.resultPath,
|
|
99
|
-
body,
|
|
102
|
+
body: "",
|
|
100
103
|
state: "resolved",
|
|
101
|
-
attributes: {
|
|
104
|
+
attributes: {
|
|
105
|
+
from: path,
|
|
106
|
+
to,
|
|
107
|
+
isMove: false,
|
|
108
|
+
warning,
|
|
109
|
+
beforeActionTokens: beforeTokens,
|
|
110
|
+
afterActionTokens: afterTokens,
|
|
111
|
+
},
|
|
102
112
|
loopId,
|
|
103
113
|
});
|
|
104
114
|
}
|
|
105
115
|
}
|
|
106
116
|
|
|
107
117
|
full(entry) {
|
|
108
|
-
return
|
|
118
|
+
return projectEmission(entry.body);
|
|
109
119
|
}
|
|
110
120
|
|
|
111
|
-
summary() {
|
|
112
|
-
return
|
|
121
|
+
summary(entry) {
|
|
122
|
+
return summarizeEmission(entry.body);
|
|
113
123
|
}
|
|
114
124
|
}
|
package/src/plugins/env/env.js
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
logPathToDataBase,
|
|
3
|
+
projectEmission,
|
|
4
|
+
streamSummary,
|
|
5
|
+
summarizeEmission,
|
|
6
|
+
} from "../helpers.js";
|
|
2
7
|
import docs from "./envDoc.js";
|
|
3
8
|
|
|
4
9
|
const LOG_ACTION_RE = /^log:\/\/turn_\d+\/(\w+)\//;
|
|
@@ -8,10 +13,7 @@ export default class Env {
|
|
|
8
13
|
|
|
9
14
|
constructor(core) {
|
|
10
15
|
this.#core = core;
|
|
11
|
-
// env
|
|
12
|
-
// Streaming stdout/stderr is time-indexed activity output, not
|
|
13
|
-
// topic-indexed state — category="logging" so it renders in <log>
|
|
14
|
-
// adjacent to its action entry, not in <summary>/<visible>.
|
|
16
|
+
// env is read-only (allowed in ask-mode); see plugin README.
|
|
15
17
|
core.registerScheme({ category: "logging" });
|
|
16
18
|
core.on("handler", this.handler.bind(this));
|
|
17
19
|
core.on("visible", this.full.bind(this));
|
|
@@ -46,7 +48,6 @@ export default class Env {
|
|
|
46
48
|
runId: ctx.runId,
|
|
47
49
|
path: ctx.path,
|
|
48
50
|
state: "resolved",
|
|
49
|
-
body: `ran '${command}' (in progress). Output: ${dataBase}_1, ${dataBase}_2`,
|
|
50
51
|
});
|
|
51
52
|
}
|
|
52
53
|
|
|
@@ -63,11 +64,14 @@ export default class Env {
|
|
|
63
64
|
});
|
|
64
65
|
}
|
|
65
66
|
|
|
67
|
+
// log:// entries: emission, tab-indented. env:// entries: stream bytes verbatim.
|
|
66
68
|
full(entry) {
|
|
67
|
-
|
|
69
|
+
if (entry.path.startsWith("log://")) return projectEmission(entry.body);
|
|
70
|
+
return entry.body;
|
|
68
71
|
}
|
|
69
72
|
|
|
70
73
|
summary(entry) {
|
|
74
|
+
if (entry.path.startsWith("log://")) return summarizeEmission(entry.body);
|
|
71
75
|
return streamSummary("env", entry);
|
|
72
76
|
}
|
|
73
77
|
}
|