@possumtech/rummy 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -0
- package/FIDELITY_CONTRACT.md +172 -0
- package/migrations/001_initial_schema.sql +3 -3
- package/package.json +1 -1
- package/src/agent/AgentLoop.js +1 -2
- package/src/agent/ContextAssembler.js +2 -0
- package/src/agent/KnownStore.js +1 -2
- package/src/agent/ResponseHealer.js +54 -1
- package/src/agent/TurnExecutor.js +51 -6
- package/src/agent/XmlParser.js +150 -41
- package/src/agent/known_store.sql +18 -11
- package/src/hooks/PluginContext.js +8 -2
- package/src/hooks/RummyContext.js +6 -3
- package/src/hooks/ToolRegistry.js +23 -27
- package/src/plugins/ask_user/ask_user.js +2 -2
- package/src/plugins/ask_user/ask_userDoc.js +4 -2
- package/src/plugins/budget/README.md +6 -4
- package/src/plugins/budget/budget.js +29 -9
- package/src/plugins/cp/cp.js +5 -5
- package/src/plugins/cp/cpDoc.js +0 -8
- package/src/plugins/engine/engine.sql +1 -1
- package/src/plugins/env/env.js +4 -4
- package/src/plugins/env/envDoc.js +2 -2
- package/src/plugins/file/file.js +2 -7
- package/src/plugins/get/get.js +31 -10
- package/src/plugins/get/getDoc.js +26 -37
- package/src/plugins/helpers.js +2 -2
- package/src/plugins/instructions/instructions.js +6 -5
- package/src/plugins/instructions/preamble.md +41 -33
- package/src/plugins/known/known.js +17 -16
- package/src/plugins/known/knownDoc.js +1 -13
- package/src/plugins/mv/mv.js +6 -6
- package/src/plugins/mv/mvDoc.js +2 -13
- package/src/plugins/previous/previous.js +10 -14
- package/src/plugins/progress/progress.js +22 -5
- package/src/plugins/prompt/prompt.js +14 -11
- package/src/plugins/rm/rm.js +4 -4
- package/src/plugins/rm/rmDoc.js +4 -8
- package/src/plugins/rpc/rpc.js +1 -1
- package/src/plugins/set/set.js +10 -12
- package/src/plugins/set/setDoc.js +4 -4
- package/src/plugins/sh/sh.js +4 -4
- package/src/plugins/sh/shDoc.js +2 -2
- package/src/plugins/skill/skill.js +2 -1
- package/src/plugins/summarize/summarize.js +2 -2
- package/src/plugins/summarize/summarizeDoc.js +9 -10
- package/src/plugins/telemetry/telemetry.js +36 -11
- package/src/plugins/think/think.js +2 -1
- package/src/plugins/think/thinkDoc.js +3 -5
- package/src/plugins/unknown/unknown.js +21 -14
- package/src/plugins/unknown/unknownDoc.js +2 -6
- package/src/plugins/update/update.js +2 -2
- package/src/plugins/update/updateDoc.js +9 -6
- package/src/sql/functions/slugify.js +13 -1
- package/src/sql/v_model_context.sql +3 -3
package/.env.example
CHANGED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Fidelity Contract — Observed State vs Intended
|
|
2
|
+
|
|
3
|
+
## Observed Behavior (traced from test/mab/results/2026-04-14T15-13-55-950Z/last_run.txt, turn 24)
|
|
4
|
+
|
|
5
|
+
### Flow
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Model emits tool
|
|
9
|
+
↓
|
|
10
|
+
Tool handler stores body in known_entries.body (raw, as model wrote it)
|
|
11
|
+
↓
|
|
12
|
+
Next turn: TurnExecutor materializes context
|
|
13
|
+
↓
|
|
14
|
+
For each row: hooks.tools.view(scheme, entry) → plugin's view hook returns projected body
|
|
15
|
+
↓
|
|
16
|
+
Projected body stored in turn_context.body with fidelity-projected token count
|
|
17
|
+
↓
|
|
18
|
+
Assembly phase: section renderers (knowns, unknowns, previous, performed) pull from ctx.rows (which has projected body) and render tags
|
|
19
|
+
↓
|
|
20
|
+
Model sees the assembled <knowns>, <previous>, etc. sections in the system prompt
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Fidelity Values (from code)
|
|
24
|
+
|
|
25
|
+
- **full**: fully shown
|
|
26
|
+
- **summary**: "compact" shown — but WHAT "compact" means varies per plugin
|
|
27
|
+
- **archive**: excluded by `v_model_context` SQL before reaching any renderer (clean)
|
|
28
|
+
|
|
29
|
+
## Three Breaks in the Intended Contract
|
|
30
|
+
|
|
31
|
+
### Break 1 — Plugins disagree on what summary means
|
|
32
|
+
|
|
33
|
+
Every plugin that registers view hooks decides what body to project per fidelity. Observed:
|
|
34
|
+
|
|
35
|
+
| Plugin | full() | summary() |
|
|
36
|
+
|--------|--------|-----------|
|
|
37
|
+
| known | `# known ${path}\n${body}` | **same as full** (wrong) |
|
|
38
|
+
| prompt | `body` | **500-char truncation + marker** (correct) |
|
|
39
|
+
| budget | `body` | `body` (ok — budget is naturally short) |
|
|
40
|
+
| skill | `body` | `body` (inherited default) |
|
|
41
|
+
| unknown | varies — needs audit | needs audit |
|
|
42
|
+
| others | needs audit | needs audit |
|
|
43
|
+
|
|
44
|
+
The `known` plugin's `summary()` returning the full body is a direct contract violation. The summary view should return a compact representation of the entry, not the same full body.
|
|
45
|
+
|
|
46
|
+
### Break 2 — Renderers re-apply fidelity logic
|
|
47
|
+
|
|
48
|
+
Two renderers currently re-check entry fidelity and override the plugin's projection:
|
|
49
|
+
|
|
50
|
+
**`known.js` `renderKnownTag`** (lines 111-115):
|
|
51
|
+
```js
|
|
52
|
+
if (entry.fidelity === "archive") return "";
|
|
53
|
+
if (entry.fidelity === "summary") {
|
|
54
|
+
return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}/>`;
|
|
55
|
+
}
|
|
56
|
+
return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}>${entry.body}</${tag}>`;
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
This ignores entry.body at summary fidelity and renders self-closing. It's a workaround for known.summary() returning the wrong content. Belt over broken suspenders.
|
|
60
|
+
|
|
61
|
+
**`previous.js` `renderToolTag`** (my edit this session):
|
|
62
|
+
```js
|
|
63
|
+
if (entry.fidelity === "full") {
|
|
64
|
+
return `<${entry.scheme} ${attrs}>${body}</${entry.scheme}>`;
|
|
65
|
+
}
|
|
66
|
+
// summary: self-closing with summary attr
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
I added this fidelity re-check when I should have trusted the plugin's projected body. Same mistake as known, added today.
|
|
70
|
+
|
|
71
|
+
### Break 3 — Model writes scheme headers into body
|
|
72
|
+
|
|
73
|
+
Every known/update/unknown entry in the DB has a body that starts with `# known known://path\n`, `# update\n`, or `# unknown\n`. The model writes this because the examples in the system prompt render tags with the body prefixed by `# ${scheme} ${path}\n`.
|
|
74
|
+
|
|
75
|
+
Then the plugin's `full()` hook prepends ANOTHER `# ${scheme} ${path}\n` when projecting. Result: duplicate headers in the rendered output.
|
|
76
|
+
|
|
77
|
+
Observed in turn 16 update body: `"# update\n# update\nDocuments 20-22 indexed and archived."`
|
|
78
|
+
|
|
79
|
+
And in unknown paths: the slug-generation for pathless unknowns takes the body including the `# unknown\n` prefix, resulting in URL-encoded paths like:
|
|
80
|
+
```
|
|
81
|
+
unknown://%23%20unknown%0ADocument%2023%20is%20missing%20from%20the%20prompt.
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## The Intended Contract
|
|
85
|
+
|
|
86
|
+
Based on the user's stated philosophy ("surface problems, don't solve them; plugin decides, renderer renders"):
|
|
87
|
+
|
|
88
|
+
### Layer 1 — Plugin decides per fidelity
|
|
89
|
+
|
|
90
|
+
Each plugin registers view hooks that return the body content for each fidelity value:
|
|
91
|
+
|
|
92
|
+
```js
|
|
93
|
+
core.hooks.tools.onView("known", (entry) => entry.body, "full");
|
|
94
|
+
core.hooks.tools.onView("known", (entry) => "", "summary");
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
At archive, no view hook is called (v_model_context excludes them).
|
|
98
|
+
|
|
99
|
+
### Layer 2 — Renderer shows the projected body
|
|
100
|
+
|
|
101
|
+
Renderers take the projected body from `ctx.rows[].body`:
|
|
102
|
+
- If non-empty, wrap in tag with body
|
|
103
|
+
- If empty, render self-closing tag
|
|
104
|
+
|
|
105
|
+
Renderers do NOT re-check entry.fidelity. They trust the plugin's projection.
|
|
106
|
+
|
|
107
|
+
### Layer 3 — Tag attributes always present
|
|
108
|
+
|
|
109
|
+
Tag attributes visible in both full and summary rendering:
|
|
110
|
+
- `path` — always
|
|
111
|
+
- `summary` — if present in entry.attributes.summary
|
|
112
|
+
- `turn` — if source_turn is set
|
|
113
|
+
- `status` — if status is set
|
|
114
|
+
- `fidelity` — always (the value itself)
|
|
115
|
+
- `tokens` — always (full-cost value, unchanged by fidelity per `set_fidelity` SQL)
|
|
116
|
+
|
|
117
|
+
### Per-plugin view decisions (revised)
|
|
118
|
+
|
|
119
|
+
| Plugin | Category | Full body | Summary body | Notes |
|
|
120
|
+
|--------|----------|-----------|--------------|-------|
|
|
121
|
+
| known | data | `entry.body` (no `# known` prefix) | `""` | Tag's summary attr carries the keywords |
|
|
122
|
+
| unknown | unknown | `entry.body` | `""` | Same pattern as known/skill — summary attr carries the label |
|
|
123
|
+
| prompt | prompt | `entry.body` | 500-char truncation with `[truncated...]` | Current behavior is correct |
|
|
124
|
+
| budget | logging | `entry.body` | `entry.body` | Feedback signal — always full |
|
|
125
|
+
| update | logging | `entry.body` | `entry.body` | Already 80-char capped |
|
|
126
|
+
| summarize | logging | `entry.body` | `entry.body` | Already 80-char capped |
|
|
127
|
+
| get | logging | result body | `""` | Just the action tag at summary |
|
|
128
|
+
| set, rm, cp, mv | logging | result body | `""` | Just the action tag at summary |
|
|
129
|
+
| env, sh | logging | output | `""` | Just the action tag at summary |
|
|
130
|
+
| search | logging | results | `""` | Just the action tag at summary |
|
|
131
|
+
| skill | data | `entry.body` | `""` | Same as known |
|
|
132
|
+
| file | data | `entry.body` | `""` | Same as known |
|
|
133
|
+
| http, https | data | — | — | **Move to rummy.web plugin** — not in core |
|
|
134
|
+
|
|
135
|
+
## The Body-Header Problem
|
|
136
|
+
|
|
137
|
+
Separate from fidelity: the model writes `# scheme path` into the body because examples show that shape. Plugin view hooks then prepend another header.
|
|
138
|
+
|
|
139
|
+
**Rule**: `# scheme` prefix belongs only in **logging** scheme outputs (tool execution results where the prefix identifies the log entry type). Non-logging schemes (known, unknown, prompt, data entries) should have no body prefix — tag attributes identify the entry.
|
|
140
|
+
|
|
141
|
+
**What to remove**:
|
|
142
|
+
- `known.js` `full()`: remove `# known ${entry.path}\n` prefix — just return `entry.body`
|
|
143
|
+
- `unknown.js` `full()`: remove any `# unknown\n` prefix
|
|
144
|
+
- Tooldoc examples for known/unknown that show bodies starting with `# scheme path` — remove so model stops copying
|
|
145
|
+
|
|
146
|
+
**What to keep**:
|
|
147
|
+
- Logging plugins (update, summarize, budget, get, set, etc.) may keep `# scheme` prefixes if present — they're describing tool execution results.
|
|
148
|
+
|
|
149
|
+
## Test Plan
|
|
150
|
+
|
|
151
|
+
To enforce the contract:
|
|
152
|
+
|
|
153
|
+
1. **Per-plugin unit tests**: Each plugin with fidelity-sensitive views tests `full(entry)` and `summary(entry)` return the expected content.
|
|
154
|
+
2. **Renderer tests**: Each section renderer (knowns, previous, performed, unknowns) tests that it trusts `entry.body` without re-checking fidelity.
|
|
155
|
+
3. **Integration test**: Load a DB with entries at each fidelity, assemble context, verify:
|
|
156
|
+
- Archive entries absent from any section
|
|
157
|
+
- Summary entries visible as compact tags
|
|
158
|
+
- Full entries visible with body
|
|
159
|
+
- No double headers in bodies
|
|
160
|
+
4. **Contract lint**: Grep for `entry.fidelity ===` in renderer files — should have zero matches.
|
|
161
|
+
|
|
162
|
+
## Deliverable Order
|
|
163
|
+
|
|
164
|
+
Before touching code, this document should be reviewed. Once aligned, the fix order would be:
|
|
165
|
+
|
|
166
|
+
1. Fix plugin view hooks to return correct body per fidelity
|
|
167
|
+
2. Remove fidelity re-checks from renderers
|
|
168
|
+
3. Remove the `# scheme path` header prepending (plugin-side) and examples (tooldoc-side)
|
|
169
|
+
4. Write tests per the plan above
|
|
170
|
+
5. Regenerate a sample context packet to confirm clean output
|
|
171
|
+
|
|
172
|
+
No silent interventions. No belt-and-suspenders logic. Plugin projects, renderer renders, model sees honest representation.
|
|
@@ -124,8 +124,8 @@ CREATE TABLE IF NOT EXISTS known_entries (
|
|
|
124
124
|
, body TEXT NOT NULL DEFAULT ''
|
|
125
125
|
, scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
|
|
126
126
|
, status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
|
|
127
|
-
, fidelity TEXT NOT NULL DEFAULT '
|
|
128
|
-
fidelity IN ('
|
|
127
|
+
, fidelity TEXT NOT NULL DEFAULT 'promoted' CHECK (
|
|
128
|
+
fidelity IN ('promoted', 'demoted', 'archived')
|
|
129
129
|
)
|
|
130
130
|
, hash TEXT
|
|
131
131
|
, attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
|
|
@@ -166,7 +166,7 @@ CREATE TABLE IF NOT EXISTS turn_context (
|
|
|
166
166
|
, path TEXT NOT NULL
|
|
167
167
|
, scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
|
|
168
168
|
, status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
|
|
169
|
-
, fidelity TEXT NOT NULL CHECK (fidelity IN ('
|
|
169
|
+
, fidelity TEXT NOT NULL CHECK (fidelity IN ('promoted', 'demoted'))
|
|
170
170
|
, body TEXT NOT NULL DEFAULT ''
|
|
171
171
|
, tokens INTEGER NOT NULL DEFAULT 0 CHECK (tokens >= 0)
|
|
172
172
|
, attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
|
package/package.json
CHANGED
package/src/agent/AgentLoop.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { advanceRecovery } from "../plugins/budget/recovery.js";
|
|
2
|
-
import KnownStore from "./KnownStore.js";
|
|
3
2
|
import msg from "./messages.js";
|
|
4
3
|
import ResponseHealer from "./ResponseHealer.js";
|
|
5
4
|
|
|
@@ -365,7 +364,7 @@ export default class AgentLoop {
|
|
|
365
364
|
await this.#knownStore.setFidelity(
|
|
366
365
|
currentRunId,
|
|
367
366
|
ra.promptPath,
|
|
368
|
-
"
|
|
367
|
+
"promoted",
|
|
369
368
|
);
|
|
370
369
|
}
|
|
371
370
|
if (ra.action === "hard413") {
|
|
@@ -14,6 +14,7 @@ export default class ContextAssembler {
|
|
|
14
14
|
toolSet = null,
|
|
15
15
|
lastContextTokens = 0,
|
|
16
16
|
turn = 1,
|
|
17
|
+
baselineTokens = 0,
|
|
17
18
|
} = {},
|
|
18
19
|
hooks,
|
|
19
20
|
) {
|
|
@@ -32,6 +33,7 @@ export default class ContextAssembler {
|
|
|
32
33
|
demoted,
|
|
33
34
|
toolSet,
|
|
34
35
|
turn,
|
|
36
|
+
baselineTokens,
|
|
35
37
|
};
|
|
36
38
|
|
|
37
39
|
const system = await hooks.assembly.system.filter(systemPrompt, ctx);
|
package/src/agent/KnownStore.js
CHANGED
|
@@ -84,7 +84,7 @@ export default class KnownStore {
|
|
|
84
84
|
body,
|
|
85
85
|
status,
|
|
86
86
|
{
|
|
87
|
-
fidelity = "
|
|
87
|
+
fidelity = "promoted",
|
|
88
88
|
attributes = null,
|
|
89
89
|
hash = null,
|
|
90
90
|
updatedAt = null,
|
|
@@ -247,7 +247,6 @@ export default class KnownStore {
|
|
|
247
247
|
this.#emitChanged(runId, "prompt://batch", "fidelity");
|
|
248
248
|
}
|
|
249
249
|
|
|
250
|
-
|
|
251
250
|
async getLog(runId) {
|
|
252
251
|
return this.#db.get_results.all({ run_id: runId });
|
|
253
252
|
}
|
|
@@ -2,6 +2,8 @@ const MAX_STALLS = Number(process.env.RUMMY_MAX_STALLS) || 3;
|
|
|
2
2
|
const MIN_CYCLES = Number(process.env.RUMMY_MIN_CYCLES) || 3;
|
|
3
3
|
const MAX_CYCLE_PERIOD = Number(process.env.RUMMY_MAX_CYCLE_PERIOD) || 4;
|
|
4
4
|
const MAX_UPDATE_REPEATS = Number(process.env.RUMMY_MAX_UPDATE_REPEATS) || 3;
|
|
5
|
+
const MAX_PATH_STAGNATION =
|
|
6
|
+
Number(process.env.RUMMY_MAX_PATH_STAGNATION) || 5;
|
|
5
7
|
|
|
6
8
|
/**
|
|
7
9
|
* Build a stable fingerprint for a single recorded entry.
|
|
@@ -47,11 +49,28 @@ function detectCycle(history) {
|
|
|
47
49
|
return { detected: false };
|
|
48
50
|
}
|
|
49
51
|
|
|
52
|
+
/**
|
|
53
|
+
* Extract the target paths a command touches for stagnation detection.
|
|
54
|
+
* Same target logic as cmdFingerprint but returns the raw path for set
|
|
55
|
+
* comparison across turns.
|
|
56
|
+
*/
|
|
57
|
+
function cmdPaths(entry) {
|
|
58
|
+
const attrs = entry.attributes ?? {};
|
|
59
|
+
const paths = [];
|
|
60
|
+
if (attrs.path) paths.push(attrs.path);
|
|
61
|
+
if (attrs.to) paths.push(attrs.to);
|
|
62
|
+
if (attrs.command) paths.push(attrs.command);
|
|
63
|
+
if (attrs.query) paths.push(attrs.query);
|
|
64
|
+
if (attrs.question) paths.push(attrs.question);
|
|
65
|
+
return paths;
|
|
66
|
+
}
|
|
67
|
+
|
|
50
68
|
export default class ResponseHealer {
|
|
51
69
|
#stallCount = 0;
|
|
52
70
|
#turnHistory = [];
|
|
53
71
|
#lastUpdateText = null;
|
|
54
72
|
#updateRepeatCount = 0;
|
|
73
|
+
#pathRuns = new Map(); // path → consecutive turns touched
|
|
55
74
|
|
|
56
75
|
/**
|
|
57
76
|
* Heal a missing status tag. Called when the model emits
|
|
@@ -67,8 +86,15 @@ export default class ResponseHealer {
|
|
|
67
86
|
static healStatus(content, commands) {
|
|
68
87
|
const trimmed = content.trim();
|
|
69
88
|
|
|
89
|
+
// Detect malformed-glitch content — model attempted a tool invocation
|
|
90
|
+
// (native call, malformed XML, etc.) that the parser couldn't dispatch.
|
|
91
|
+
// This is NOT an answer; it's a glitch that deserves the 3-strikes
|
|
92
|
+
// stall path so the model can recover. Without this check, the model
|
|
93
|
+
// emits one malformed call and the run terminates after a single turn.
|
|
94
|
+
const looksGlitched = /<\|tool_call>|<tool_call\|>/.test(trimmed);
|
|
95
|
+
|
|
70
96
|
// No commands + plain text = answered. Treat as summary.
|
|
71
|
-
if (commands.length === 0 && trimmed) {
|
|
97
|
+
if (commands.length === 0 && trimmed && !looksGlitched) {
|
|
72
98
|
console.warn("[RUMMY] Healed: plain text response treated as summary");
|
|
73
99
|
return { summaryText: trimmed.slice(0, 500), updateText: null };
|
|
74
100
|
}
|
|
@@ -120,6 +146,32 @@ export default class ResponseHealer {
|
|
|
120
146
|
return { continue: false, reason };
|
|
121
147
|
}
|
|
122
148
|
|
|
149
|
+
// Distinct-paths stagnation: the model might vary commands turn-to-turn
|
|
150
|
+
// (avoiding exact-cycle detection) but still churn on a single path.
|
|
151
|
+
// Track per-path consecutive touches; flag if any path is touched in
|
|
152
|
+
// MAX_PATH_STAGNATION consecutive turns. Catches semantic stagnation
|
|
153
|
+
// where the fingerprints differ in micro-detail but the work is stuck
|
|
154
|
+
// on one entry (e.g. endlessly re-setting/re-getting the same plan).
|
|
155
|
+
const touchedPaths = new Set();
|
|
156
|
+
for (const cmd of commands) {
|
|
157
|
+
for (const p of cmdPaths(cmd)) touchedPaths.add(p);
|
|
158
|
+
}
|
|
159
|
+
// Paths not touched this turn — run broken, remove from map.
|
|
160
|
+
for (const path of [...this.#pathRuns.keys()]) {
|
|
161
|
+
if (!touchedPaths.has(path)) this.#pathRuns.delete(path);
|
|
162
|
+
}
|
|
163
|
+
// Paths touched this turn — increment run.
|
|
164
|
+
for (const path of touchedPaths) {
|
|
165
|
+
this.#pathRuns.set(path, (this.#pathRuns.get(path) || 0) + 1);
|
|
166
|
+
}
|
|
167
|
+
for (const [path, run] of this.#pathRuns) {
|
|
168
|
+
if (run >= MAX_PATH_STAGNATION) {
|
|
169
|
+
const reason = `Path stagnation: ${path} touched ${run} consecutive turns`;
|
|
170
|
+
console.warn(`[RUMMY] ${reason}. Force-completing.`);
|
|
171
|
+
return { continue: false, reason };
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
123
175
|
return { continue: true };
|
|
124
176
|
}
|
|
125
177
|
|
|
@@ -184,5 +236,6 @@ export default class ResponseHealer {
|
|
|
184
236
|
this.#turnHistory = [];
|
|
185
237
|
this.#lastUpdateText = null;
|
|
186
238
|
this.#updateRepeatCount = 0;
|
|
239
|
+
this.#pathRuns = new Map();
|
|
187
240
|
}
|
|
188
241
|
}
|
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
import RummyContext from "../hooks/RummyContext.js";
|
|
2
2
|
import ContextAssembler from "./ContextAssembler.js";
|
|
3
|
-
import KnownStore from "./KnownStore.js";
|
|
4
|
-
import msg from "./messages.js";
|
|
5
3
|
import ResponseHealer from "./ResponseHealer.js";
|
|
6
4
|
import { countTokens } from "./tokens.js";
|
|
7
5
|
import XmlParser from "./XmlParser.js";
|
|
8
6
|
|
|
9
|
-
const ACTION_SCHEMES = new Set([
|
|
7
|
+
const ACTION_SCHEMES = new Set([
|
|
8
|
+
"get",
|
|
9
|
+
"set",
|
|
10
|
+
"rm",
|
|
11
|
+
"mv",
|
|
12
|
+
"cp",
|
|
13
|
+
"sh",
|
|
14
|
+
"env",
|
|
15
|
+
"search",
|
|
16
|
+
]);
|
|
10
17
|
const MUTATION_SCHEMES = new Set(["set", "rm", "sh", "mv", "cp"]);
|
|
11
18
|
const READ_SCHEMES = new Set(["get", "env", "search"]);
|
|
12
19
|
|
|
@@ -58,7 +65,12 @@ export default class TurnExecutor {
|
|
|
58
65
|
fidelity: row.fidelity,
|
|
59
66
|
status: row.status,
|
|
60
67
|
body: projectedBody ?? "",
|
|
61
|
-
|
|
68
|
+
// Full-body token count, not projected. This is the cost to
|
|
69
|
+
// promote the entry — the number the model needs to do Token
|
|
70
|
+
// Budget math. Projecting the demoted symbol-preview (145
|
|
71
|
+
// tokens for a 2108-token file) was misleading the model into
|
|
72
|
+
// promotes that blew the Token Budget by 10-30× per entry.
|
|
73
|
+
tokens: countTokens(row.body ?? ""),
|
|
62
74
|
attributes: row.attributes,
|
|
63
75
|
category: row.category,
|
|
64
76
|
source_turn: row.turn,
|
|
@@ -69,6 +81,35 @@ export default class TurnExecutor {
|
|
|
69
81
|
run_id: runId,
|
|
70
82
|
});
|
|
71
83
|
const lastContextTokens = lastCtx?.context_tokens ?? 0;
|
|
84
|
+
|
|
85
|
+
// Baseline materialization — assemble with model's promoted spending
|
|
86
|
+
// removed (promoted data, promoted logging). The resulting size is the
|
|
87
|
+
// fixed overhead the model can't reduce without further demotion.
|
|
88
|
+
const baselineRows = rows.filter(
|
|
89
|
+
(r) =>
|
|
90
|
+
!(
|
|
91
|
+
(r.category === "data" || r.category === "logging") &&
|
|
92
|
+
r.fidelity === "promoted"
|
|
93
|
+
),
|
|
94
|
+
);
|
|
95
|
+
const baselineMessages = await ContextAssembler.assembleFromTurnContext(
|
|
96
|
+
baselineRows,
|
|
97
|
+
{
|
|
98
|
+
type: mode,
|
|
99
|
+
systemPrompt,
|
|
100
|
+
contextSize,
|
|
101
|
+
demoted,
|
|
102
|
+
toolSet,
|
|
103
|
+
lastContextTokens,
|
|
104
|
+
turn,
|
|
105
|
+
},
|
|
106
|
+
this.#hooks,
|
|
107
|
+
);
|
|
108
|
+
const baselineTokens = baselineMessages.reduce(
|
|
109
|
+
(sum, m) => sum + countTokens(m.content),
|
|
110
|
+
0,
|
|
111
|
+
);
|
|
112
|
+
|
|
72
113
|
const messages = await ContextAssembler.assembleFromTurnContext(
|
|
73
114
|
rows,
|
|
74
115
|
{
|
|
@@ -79,6 +120,7 @@ export default class TurnExecutor {
|
|
|
79
120
|
toolSet,
|
|
80
121
|
lastContextTokens,
|
|
81
122
|
turn,
|
|
123
|
+
baselineTokens,
|
|
82
124
|
},
|
|
83
125
|
this.#hooks,
|
|
84
126
|
);
|
|
@@ -179,7 +221,7 @@ export default class TurnExecutor {
|
|
|
179
221
|
scheme: "instructions",
|
|
180
222
|
body: instrEntry[0]?.body || "",
|
|
181
223
|
attributes: instrAttrs,
|
|
182
|
-
fidelity: "
|
|
224
|
+
fidelity: "promoted",
|
|
183
225
|
category: "system",
|
|
184
226
|
});
|
|
185
227
|
|
|
@@ -232,7 +274,7 @@ export default class TurnExecutor {
|
|
|
232
274
|
await this.#knownStore.setFidelity(
|
|
233
275
|
currentRunId,
|
|
234
276
|
promptRow.path,
|
|
235
|
-
"
|
|
277
|
+
"demoted",
|
|
236
278
|
);
|
|
237
279
|
}
|
|
238
280
|
const reMat = await this.#materializeTurnContext({
|
|
@@ -281,10 +323,13 @@ export default class TurnExecutor {
|
|
|
281
323
|
}
|
|
282
324
|
}
|
|
283
325
|
|
|
326
|
+
const runRow = await this.#db.get_run_by_id.get({ id: currentRunId });
|
|
284
327
|
const filteredMessages = await this.#hooks.llm.messages.filter(messages, {
|
|
285
328
|
model: requestedModel,
|
|
286
329
|
projectId,
|
|
287
330
|
runId: currentRunId,
|
|
331
|
+
runAlias: runRow?.alias || `run_${currentRunId}`,
|
|
332
|
+
turn,
|
|
288
333
|
});
|
|
289
334
|
|
|
290
335
|
// Call LLM
|