@possumtech/rummy 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +12 -0
- package/FIDELITY_CONTRACT.md +172 -0
- package/README.md +5 -1
- package/SPEC.md +31 -17
- package/migrations/001_initial_schema.sql +3 -4
- package/package.json +1 -1
- package/src/agent/AgentLoop.js +51 -153
- package/src/agent/ContextAssembler.js +2 -0
- package/src/agent/KnownStore.js +16 -9
- package/src/agent/ResponseHealer.js +54 -1
- package/src/agent/TurnExecutor.js +125 -323
- package/src/agent/XmlParser.js +172 -42
- package/src/agent/known_queries.sql +1 -1
- package/src/agent/known_store.sql +29 -72
- package/src/agent/runs.sql +2 -2
- package/src/hooks/Hooks.js +1 -0
- package/src/hooks/PluginContext.js +8 -2
- package/src/hooks/RummyContext.js +6 -3
- package/src/hooks/ToolRegistry.js +29 -32
- package/src/plugins/ask_user/ask_user.js +2 -2
- package/src/plugins/ask_user/ask_userDoc.js +7 -10
- package/src/plugins/budget/README.md +28 -18
- package/src/plugins/budget/budget.js +80 -3
- package/src/plugins/budget/recovery.js +47 -0
- package/src/plugins/cp/cp.js +5 -5
- package/src/plugins/cp/cpDoc.js +1 -14
- package/src/plugins/engine/engine.sql +1 -1
- package/src/plugins/env/env.js +4 -4
- package/src/plugins/env/envDoc.js +4 -9
- package/src/plugins/file/file.js +2 -7
- package/src/plugins/get/get.js +32 -13
- package/src/plugins/get/getDoc.js +26 -44
- package/src/plugins/helpers.js +4 -4
- package/src/plugins/instructions/instructions.js +9 -7
- package/src/plugins/instructions/preamble.md +45 -26
- package/src/plugins/known/known.js +71 -15
- package/src/plugins/known/knownDoc.js +4 -20
- package/src/plugins/mv/mv.js +6 -6
- package/src/plugins/mv/mvDoc.js +4 -30
- package/src/plugins/policy/policy.js +47 -0
- package/src/plugins/previous/previous.js +10 -14
- package/src/plugins/progress/progress.js +29 -48
- package/src/plugins/prompt/prompt.js +18 -6
- package/src/plugins/rm/rm.js +4 -4
- package/src/plugins/rm/rmDoc.js +5 -14
- package/src/plugins/rpc/rpc.js +4 -2
- package/src/plugins/set/set.js +86 -91
- package/src/plugins/set/setDoc.js +28 -41
- package/src/plugins/sh/sh.js +4 -4
- package/src/plugins/sh/shDoc.js +4 -9
- package/src/plugins/skill/skill.js +2 -1
- package/src/plugins/summarize/summarize.js +9 -2
- package/src/plugins/summarize/summarizeDoc.js +10 -16
- package/src/plugins/telemetry/telemetry.js +36 -11
- package/src/plugins/think/think.js +13 -0
- package/src/plugins/think/thinkDoc.js +16 -0
- package/src/plugins/unknown/unknown.js +37 -9
- package/src/plugins/unknown/unknownDoc.js +7 -16
- package/src/plugins/update/update.js +9 -2
- package/src/plugins/update/updateDoc.js +12 -14
- package/src/server/ClientConnection.js +11 -1
- package/src/sql/functions/slugify.js +13 -1
- package/src/sql/v_model_context.sql +6 -6
package/.env.example
CHANGED
|
@@ -17,11 +17,13 @@ RUMMY_MMAP_MB=0
|
|
|
17
17
|
|
|
18
18
|
# Agent Loop Limits
|
|
19
19
|
RUMMY_MAX_TURNS=99
|
|
20
|
+
RUMMY_MAX_COMMANDS=15
|
|
20
21
|
RUMMY_MAX_UNKNOWN_WARNINGS=3
|
|
21
22
|
RUMMY_MAX_STALLS=3
|
|
22
23
|
RUMMY_MIN_CYCLES=3
|
|
23
24
|
RUMMY_MAX_CYCLE_PERIOD=4
|
|
24
25
|
RUMMY_MAX_UPDATE_REPEATS=3
|
|
26
|
+
RUMMY_MAX_PATH_STAGNATION=5
|
|
25
27
|
|
|
26
28
|
# Hygiene
|
|
27
29
|
# Days to keep completed/aborted runs before purging
|
|
@@ -34,6 +36,16 @@ RUMMY_FETCH_TIMEOUT=300000
|
|
|
34
36
|
# Debug
|
|
35
37
|
# RUMMY_DEBUG=true
|
|
36
38
|
|
|
39
|
+
# Think tag: 1 = model uses <think> tags for reasoning (default)
|
|
40
|
+
# 0 = disabled, model reasons via API reasoning_content field only
|
|
41
|
+
RUMMY_THINK=1
|
|
42
|
+
|
|
43
|
+
# Budget
|
|
44
|
+
# Fraction of context window used as ceiling. 0.9 = 90%, 10% reserved as headroom.
|
|
45
|
+
RUMMY_BUDGET_CEILING=0.9
|
|
46
|
+
# Maximum tokens per known entry. Entries exceeding this are rejected with 413.
|
|
47
|
+
RUMMY_MAX_ENTRY_TOKENS=512
|
|
48
|
+
|
|
37
49
|
# Token Estimation
|
|
38
50
|
# Characters per token. Lower = more conservative (fewer tokens per character).
|
|
39
51
|
# Default 2. Set to 1 for worst-case (1 token per character).
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Fidelity Contract — Observed State vs Intended
|
|
2
|
+
|
|
3
|
+
## Observed Behavior (traced from test/mab/results/2026-04-14T15-13-55-950Z/last_run.txt, turn 24)
|
|
4
|
+
|
|
5
|
+
### Flow
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Model emits tool
|
|
9
|
+
↓
|
|
10
|
+
Tool handler stores body in known_entries.body (raw, as model wrote it)
|
|
11
|
+
↓
|
|
12
|
+
Next turn: TurnExecutor materializes context
|
|
13
|
+
↓
|
|
14
|
+
For each row: hooks.tools.view(scheme, entry) → plugin's view hook returns projected body
|
|
15
|
+
↓
|
|
16
|
+
Projected body stored in turn_context.body with fidelity-projected token count
|
|
17
|
+
↓
|
|
18
|
+
Assembly phase: section renderers (knowns, unknowns, previous, performed) pull from ctx.rows (which has projected body) and render tags
|
|
19
|
+
↓
|
|
20
|
+
Model sees the assembled <knowns>, <previous>, etc. sections in the system prompt
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Fidelity Values (from code)
|
|
24
|
+
|
|
25
|
+
- **full**: fully shown
|
|
26
|
+
- **summary**: "compact" shown — but WHAT "compact" means varies per plugin
|
|
27
|
+
- **archive**: excluded by `v_model_context` SQL before reaching any renderer (clean)
|
|
28
|
+
|
|
29
|
+
## Three Breaks in the Intended Contract
|
|
30
|
+
|
|
31
|
+
### Break 1 — Plugins disagree on what summary means
|
|
32
|
+
|
|
33
|
+
Every plugin that registers view hooks decides what body to project per fidelity. Observed:
|
|
34
|
+
|
|
35
|
+
| Plugin | full() | summary() |
|
|
36
|
+
|--------|--------|-----------|
|
|
37
|
+
| known | `# known ${path}\n${body}` | **same as full** (wrong) |
|
|
38
|
+
| prompt | `body` | **500-char truncation + marker** (correct) |
|
|
39
|
+
| budget | `body` | `body` (ok — budget is naturally short) |
|
|
40
|
+
| skill | `body` | `body` (inherited default) |
|
|
41
|
+
| unknown | varies — needs audit | needs audit |
|
|
42
|
+
| others | needs audit | needs audit |
|
|
43
|
+
|
|
44
|
+
The `known` plugin's `summary()` returning the full body is a direct contract violation. The summary view should return a compact representation of the entry, not the same full body.
|
|
45
|
+
|
|
46
|
+
### Break 2 — Renderers re-apply fidelity logic
|
|
47
|
+
|
|
48
|
+
Two renderers currently re-check entry fidelity and override the plugin's projection:
|
|
49
|
+
|
|
50
|
+
**`known.js` `renderKnownTag`** (lines 111-115):
|
|
51
|
+
```js
|
|
52
|
+
if (entry.fidelity === "archive") return "";
|
|
53
|
+
if (entry.fidelity === "summary") {
|
|
54
|
+
return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}/>`;
|
|
55
|
+
}
|
|
56
|
+
return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}>${entry.body}</${tag}>`;
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
This ignores entry.body at summary fidelity and renders self-closing. It's a workaround for known.summary() returning the wrong content. Belt over broken suspenders.
|
|
60
|
+
|
|
61
|
+
**`previous.js` `renderToolTag`** (my edit this session):
|
|
62
|
+
```js
|
|
63
|
+
if (entry.fidelity === "full") {
|
|
64
|
+
return `<${entry.scheme} ${attrs}>${body}</${entry.scheme}>`;
|
|
65
|
+
}
|
|
66
|
+
// summary: self-closing with summary attr
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
I added this fidelity re-check when I should have trusted the plugin's projected body. Same mistake as known, added today.
|
|
70
|
+
|
|
71
|
+
### Break 3 — Model writes scheme headers into body
|
|
72
|
+
|
|
73
|
+
Every known/update/unknown entry in the DB has a body that starts with `# known known://path\n`, `# update\n`, or `# unknown\n`. The model writes this because the examples in the system prompt render tags with the body prefixed by `# ${scheme} ${path}\n`.
|
|
74
|
+
|
|
75
|
+
Then the plugin's `full()` hook prepends ANOTHER `# ${scheme} ${path}\n` when projecting. Result: duplicate headers in the rendered output.
|
|
76
|
+
|
|
77
|
+
Observed in turn 16 update body: `"# update\n# update\nDocuments 20-22 indexed and archived."`
|
|
78
|
+
|
|
79
|
+
And in unknown paths: the slug-generation for pathless unknowns takes the body including the `# unknown\n` prefix, resulting in URL-encoded paths like:
|
|
80
|
+
```
|
|
81
|
+
unknown://%23%20unknown%0ADocument%2023%20is%20missing%20from%20the%20prompt.
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## The Intended Contract
|
|
85
|
+
|
|
86
|
+
Based on the user's stated philosophy ("surface problems, don't solve them; plugin decides, renderer renders"):
|
|
87
|
+
|
|
88
|
+
### Layer 1 — Plugin decides per fidelity
|
|
89
|
+
|
|
90
|
+
Each plugin registers view hooks that return the body content for each fidelity value:
|
|
91
|
+
|
|
92
|
+
```js
|
|
93
|
+
core.hooks.tools.onView("known", (entry) => entry.body, "full");
|
|
94
|
+
core.hooks.tools.onView("known", (entry) => "", "summary");
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
At archive, no view hook is called (v_model_context excludes them).
|
|
98
|
+
|
|
99
|
+
### Layer 2 — Renderer shows the projected body
|
|
100
|
+
|
|
101
|
+
Renderers take the projected body from `ctx.rows[].body`:
|
|
102
|
+
- If non-empty, wrap in tag with body
|
|
103
|
+
- If empty, render self-closing tag
|
|
104
|
+
|
|
105
|
+
Renderers do NOT re-check entry.fidelity. They trust the plugin's projection.
|
|
106
|
+
|
|
107
|
+
### Layer 3 — Tag attributes always present
|
|
108
|
+
|
|
109
|
+
Tag attributes visible in both full and summary rendering:
|
|
110
|
+
- `path` — always
|
|
111
|
+
- `summary` — if present in entry.attributes.summary
|
|
112
|
+
- `turn` — if source_turn is set
|
|
113
|
+
- `status` — if status is set
|
|
114
|
+
- `fidelity` — always (the value itself)
|
|
115
|
+
- `tokens` — always (full-cost value, unchanged by fidelity per `set_fidelity` SQL)
|
|
116
|
+
|
|
117
|
+
### Per-plugin view decisions (revised)
|
|
118
|
+
|
|
119
|
+
| Plugin | Category | Full body | Summary body | Notes |
|
|
120
|
+
|--------|----------|-----------|--------------|-------|
|
|
121
|
+
| known | data | `entry.body` (no `# known` prefix) | `""` | Tag's summary attr carries the keywords |
|
|
122
|
+
| unknown | unknown | `entry.body` | `""` | Same pattern as known/skill — summary attr carries the label |
|
|
123
|
+
| prompt | prompt | `entry.body` | 500-char truncation with `[truncated...]` | Current behavior is correct |
|
|
124
|
+
| budget | logging | `entry.body` | `entry.body` | Feedback signal — always full |
|
|
125
|
+
| update | logging | `entry.body` | `entry.body` | Already 80-char capped |
|
|
126
|
+
| summarize | logging | `entry.body` | `entry.body` | Already 80-char capped |
|
|
127
|
+
| get | logging | result body | `""` | Just the action tag at summary |
|
|
128
|
+
| set, rm, cp, mv | logging | result body | `""` | Just the action tag at summary |
|
|
129
|
+
| env, sh | logging | output | `""` | Just the action tag at summary |
|
|
130
|
+
| search | logging | results | `""` | Just the action tag at summary |
|
|
131
|
+
| skill | data | `entry.body` | `""` | Same as known |
|
|
132
|
+
| file | data | `entry.body` | `""` | Same as known |
|
|
133
|
+
| http, https | data | — | — | **Move to rummy.web plugin** — not in core |
|
|
134
|
+
|
|
135
|
+
## The Body-Header Problem
|
|
136
|
+
|
|
137
|
+
Separate from fidelity: the model writes `# scheme path` into the body because examples show that shape. Plugin view hooks then prepend another header.
|
|
138
|
+
|
|
139
|
+
**Rule**: `# scheme` prefix belongs only in **logging** scheme outputs (tool execution results where the prefix identifies the log entry type). Non-logging schemes (known, unknown, prompt, data entries) should have no body prefix — tag attributes identify the entry.
|
|
140
|
+
|
|
141
|
+
**What to remove**:
|
|
142
|
+
- `known.js` `full()`: remove `# known ${entry.path}\n` prefix — just return `entry.body`
|
|
143
|
+
- `unknown.js` `full()`: remove any `# unknown\n` prefix
|
|
144
|
+
- Tooldoc examples for known/unknown that show bodies starting with `# scheme path` — remove so model stops copying
|
|
145
|
+
|
|
146
|
+
**What to keep**:
|
|
147
|
+
- Logging plugins (update, summarize, budget, get, set, etc.) may keep `# scheme` prefixes if present — they're describing tool execution results.
|
|
148
|
+
|
|
149
|
+
## Test Plan
|
|
150
|
+
|
|
151
|
+
To enforce the contract:
|
|
152
|
+
|
|
153
|
+
1. **Per-plugin unit tests**: Each plugin with fidelity-sensitive views tests `full(entry)` and `summary(entry)` return the expected content.
|
|
154
|
+
2. **Renderer tests**: Each section renderer (knowns, previous, performed, unknowns) tests that it trusts `entry.body` without re-checking fidelity.
|
|
155
|
+
3. **Integration test**: Load a DB with entries at each fidelity, assemble context, verify:
|
|
156
|
+
- Archive entries absent from any section
|
|
157
|
+
- Summary entries visible as compact tags
|
|
158
|
+
- Full entries visible with body
|
|
159
|
+
- No double headers in bodies
|
|
160
|
+
4. **Contract lint**: Grep for `entry.fidelity ===` in renderer files — should have zero matches.
|
|
161
|
+
|
|
162
|
+
## Deliverable Order
|
|
163
|
+
|
|
164
|
+
Before touching code, this document should be reviewed. Once aligned, the fix order would be:
|
|
165
|
+
|
|
166
|
+
1. Fix plugin view hooks to return correct body per fidelity
|
|
167
|
+
2. Remove fidelity re-checks from renderers
|
|
168
|
+
3. Remove the `# scheme path` header prepending (plugin-side) and examples (tooldoc-side)
|
|
169
|
+
4. Write tests per the plan above
|
|
170
|
+
5. Regenerate a sample context packet to confirm clean output
|
|
171
|
+
|
|
172
|
+
No silent interventions. No belt-and-suspenders logic. Plugin projects, renderer renders, model sees honest representation.
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# RUMMY: Relational Unknowns Memory Management Yoke
|
|
2
2
|
|
|
3
|
-
Rummy is the only LLM agent service inspired by and dedicated to the memory of former Secretary of
|
|
3
|
+
Rummy is the only LLM agent service inspired by and dedicated to the memory of former Secretary of Defense Donald "Rummy" Rumsfeld. Our unique fusion of apophatic and hedbergian engineering strategies yields more accurate and efficient results than any other agent. Our client/server and plugin architecture integrates it into more workflows than any other agent. It's also more flexible and lean than any other agent. Our dynamic cache management, model hot-swapping, and flexible router interface make it more affordable than any other agent.
|
|
4
4
|
|
|
5
5
|
## Key Features
|
|
6
6
|
|
|
@@ -10,6 +10,10 @@ Rummy is the only LLM agent service inspired by and dedicated to the memory of f
|
|
|
10
10
|
|
|
11
11
|
- **Hedberg:** The interpretation boundary between stochastic model output and deterministic system operations. Models speak in whatever syntax they were trained on — sed regex, SEARCH/REPLACE blocks, escaped characters. Hedberg normalizes all of it. Available to all plugins via `core.hooks.hedberg`.
|
|
12
12
|
|
|
13
|
+
- **Folksonomic Memory:** The model organizes its own knowledge into navigable path hierarchies with searchable summary tags. Not RAG — the model builds and curates its own taxonomy using `<known>` entries with paths like `known://project/architecture`.
|
|
14
|
+
|
|
15
|
+
- **Fidelity System:** Every entry has a visibility level: full, summary, index, archive. The model manages its own context by promoting what it needs and demoting what it doesn't. Budget enforcement catches overflow post-dispatch — tools run uninterrupted, demotion happens after.
|
|
16
|
+
|
|
13
17
|
- **Plugin Architecture:** Every `<tag>` the model sees is a plugin. Every scheme is registered by its owner. The prompt itself is assembled from plugins. Drop a directory into `~/.rummy/plugins/` or install via npm. See [PLUGINS.md](PLUGINS.md) for the complete plugin API.
|
|
14
18
|
|
|
15
19
|
- **Symbols Done Right:** Designed with universal language support in mind. Powered by [@possumtech/antlrmap](https://github.com/possumtech/antlrmap).
|
package/SPEC.md
CHANGED
|
@@ -44,7 +44,7 @@ body, attributes, and state.
|
|
|
44
44
|
known_entries (
|
|
45
45
|
id, run_id, loop_id, turn, path, body, scheme,
|
|
46
46
|
status INTEGER, fidelity TEXT, hash,
|
|
47
|
-
attributes, tokens,
|
|
47
|
+
attributes, tokens, refs, write_count,
|
|
48
48
|
created_at, updated_at
|
|
49
49
|
)
|
|
50
50
|
```
|
|
@@ -56,10 +56,9 @@ known_entries (
|
|
|
56
56
|
| `attributes` | Tag attributes as JSON. Handler-private workspace. `CHECK (json_valid)` |
|
|
57
57
|
| `scheme` | Generated from path via `schemeOf()`. Drives dispatch and view routing |
|
|
58
58
|
| `status` | HTTP status code (200, 202, 400, 413, etc.) |
|
|
59
|
-
| `fidelity` | Visibility level: full, summary,
|
|
59
|
+
| `fidelity` | Visibility level: full, summary, archive |
|
|
60
60
|
| `hash` | SHA-256 for file change detection |
|
|
61
|
-
| `tokens` |
|
|
62
|
-
| `tokens_full` | Cost of raw body at full fidelity |
|
|
61
|
+
| `tokens` | Full-body token cost. Never changes on demotion/promotion. |
|
|
63
62
|
| `turn` | Freshness — when was this entry last touched |
|
|
64
63
|
|
|
65
64
|
### 1.2 Schemes, Status & Fidelity
|
|
@@ -211,8 +210,8 @@ object is the same shape at every tier.
|
|
|
211
210
|
Model tier restrictions enforced by unified `resolveForLoop(mode, flags)`.
|
|
212
211
|
Ask mode excludes `sh`. Flags: `noInteraction` excludes `ask_user`,
|
|
213
212
|
`noWeb` excludes `search`, `noProposals` excludes `ask_user`/`env`/`sh`.
|
|
214
|
-
|
|
215
|
-
|
|
213
|
+
14 model tools: think, unknown, known, get, set, env, sh, rm, cp, mv,
|
|
214
|
+
ask_user, update, summarize, search.
|
|
216
215
|
Client tier requires project init. Plugin tier has no restrictions.
|
|
217
216
|
|
|
218
217
|
### 3.2 Dispatch Path
|
|
@@ -225,13 +224,28 @@ Client: JSON-RPC → { method, params } → #record() → dispatch(scheme, en
|
|
|
225
224
|
Plugin: rummy.rm({ path }) → #record() → dispatch(scheme, entry, rummy)
|
|
226
225
|
```
|
|
227
226
|
|
|
228
|
-
**
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
227
|
+
**Tool dispatch:** Commands are dispatched sequentially in the order
|
|
228
|
+
the model emitted them. Each tool either succeeds (200), fails (400+),
|
|
229
|
+
or proposes (202). On failure, all remaining tools are aborted. On
|
|
230
|
+
proposal, dispatch pauses, a notification is pushed to the client
|
|
231
|
+
(same WebSocket push pattern as `run/progress`), the client resolves
|
|
232
|
+
(accept/reject), and dispatch resumes — the proposal becomes 200 or
|
|
233
|
+
400+ like any other tool. The `ask`/`act` RPC response is only sent
|
|
234
|
+
when all tools have completed. Proposals are NOT batched — each is
|
|
235
|
+
sent and resolved inline during dispatch. The model controls tool
|
|
236
|
+
ordering; the system respects it.
|
|
237
|
+
|
|
238
|
+
If the model sends `<summarize>` but a preceding action in the same
|
|
239
|
+
turn failed, the summarize is overridden to an update (the model's
|
|
240
|
+
assertion that it's done is false). Both `<summarize>` and `<update>`
|
|
241
|
+
present → last signal wins.
|
|
242
|
+
|
|
243
|
+
**Post-dispatch budget check:** After all tools dispatch, the system
|
|
244
|
+
materializes context and checks the budget ceiling. If context exceeds
|
|
245
|
+
the ceiling, Turn Demotion fires — all entries from this turn are
|
|
246
|
+
demoted to summary and a `budget://` entry is written. This is a
|
|
247
|
+
system housekeeping step independent of tool success/failure. The
|
|
248
|
+
tools already ran; their outcomes are settled.
|
|
235
249
|
|
|
236
250
|
### 3.3 Plugin Convention
|
|
237
251
|
|
|
@@ -293,7 +307,7 @@ Two messages per turn. System = stable truth. User = active task.
|
|
|
293
307
|
[skills/]
|
|
294
308
|
[/instructions]
|
|
295
309
|
<knowns>
|
|
296
|
-
...entries sorted by fidelity (
|
|
310
|
+
...entries sorted by fidelity (summary, full), then by scheme
|
|
297
311
|
</knowns>
|
|
298
312
|
<previous>
|
|
299
313
|
(pre-loop entries, each with turn, status, summary, fidelity, tokens)
|
|
@@ -531,7 +545,7 @@ ask_user. `noRepo: true` — no file scanning during panic.
|
|
|
531
545
|
`budget.panicPrompt()`: the assembled token count, the target, and
|
|
532
546
|
the exact number of tokens to free. Turn 2+ receives a continuation
|
|
533
547
|
prompt. The model uses `<set fidelity="archive">`, `<mv
|
|
534
|
-
fidelity="
|
|
548
|
+
fidelity="summary">`, and similar fidelity operations to free space,
|
|
535
549
|
concluding with `<summarize>` when done or `<update>` while working.
|
|
536
550
|
|
|
537
551
|
---
|
|
@@ -660,7 +674,7 @@ simple to powerful — weak models learn from examples 1-2, strong models
|
|
|
660
674
|
pick up the pattern from example 3.
|
|
661
675
|
|
|
662
676
|
**Lifecycle continuity.** Examples weave stories across tools. The get
|
|
663
|
-
docs end with `<set path="..." fidelity="
|
|
677
|
+
docs end with `<set path="..." fidelity="summary"/>`. The known docs
|
|
664
678
|
reference `<get path="known://*">keyword</get>` for recall and
|
|
665
679
|
`<set path="known://..." archive/>` for archiving. The unknown docs
|
|
666
680
|
reference `<get/>` for investigation and `<rm/>` for cleanup. A model
|
|
@@ -746,7 +760,7 @@ Termination protocol:
|
|
|
746
760
|
- `<summarize>` → run terminates
|
|
747
761
|
- `<summarize>` + failed actions → overridden to `<update>` (continue)
|
|
748
762
|
- `<update>` → run continues
|
|
749
|
-
- Both →
|
|
763
|
+
- Both → last signal wins (respects the model's final intent)
|
|
750
764
|
- Neither + investigation tools → stall counter (RUMMY_MAX_STALLS)
|
|
751
765
|
- Neither + action-only tools → healed to summarize
|
|
752
766
|
- Neither + plain text → healed to summarize
|
|
@@ -124,13 +124,12 @@ CREATE TABLE IF NOT EXISTS known_entries (
|
|
|
124
124
|
, body TEXT NOT NULL DEFAULT ''
|
|
125
125
|
, scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
|
|
126
126
|
, status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
|
|
127
|
-
, fidelity TEXT NOT NULL DEFAULT '
|
|
128
|
-
fidelity IN ('
|
|
127
|
+
, fidelity TEXT NOT NULL DEFAULT 'promoted' CHECK (
|
|
128
|
+
fidelity IN ('promoted', 'demoted', 'archived')
|
|
129
129
|
)
|
|
130
130
|
, hash TEXT
|
|
131
131
|
, attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
|
|
132
132
|
, tokens INTEGER NOT NULL DEFAULT 0 CHECK (tokens >= 0)
|
|
133
|
-
, tokens_full INTEGER NOT NULL DEFAULT 0 CHECK (tokens_full >= 0)
|
|
134
133
|
, refs INTEGER NOT NULL DEFAULT 0 CHECK (refs >= 0)
|
|
135
134
|
, write_count INTEGER NOT NULL DEFAULT 1 CHECK (write_count >= 1)
|
|
136
135
|
, created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
@@ -167,7 +166,7 @@ CREATE TABLE IF NOT EXISTS turn_context (
|
|
|
167
166
|
, path TEXT NOT NULL
|
|
168
167
|
, scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
|
|
169
168
|
, status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
|
|
170
|
-
, fidelity TEXT NOT NULL CHECK (fidelity IN ('
|
|
169
|
+
, fidelity TEXT NOT NULL CHECK (fidelity IN ('promoted', 'demoted'))
|
|
171
170
|
, body TEXT NOT NULL DEFAULT ''
|
|
172
171
|
, tokens INTEGER NOT NULL DEFAULT 0 CHECK (tokens >= 0)
|
|
173
172
|
, attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
|
package/package.json
CHANGED
package/src/agent/AgentLoop.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { advanceRecovery } from "../plugins/budget/recovery.js";
|
|
2
2
|
import msg from "./messages.js";
|
|
3
3
|
import ResponseHealer from "./ResponseHealer.js";
|
|
4
4
|
|
|
@@ -70,14 +70,15 @@ export default class AgentLoop {
|
|
|
70
70
|
const existing = this.#activeRuns.get(existingRun.id);
|
|
71
71
|
if (existing) existing.abort();
|
|
72
72
|
|
|
73
|
+
// Clean up stale proposals from interrupted runs
|
|
73
74
|
const unresolved = await this.#knownStore.getUnresolved(existingRun.id);
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
75
|
+
for (const u of unresolved) {
|
|
76
|
+
await this.#knownStore.resolve(
|
|
77
|
+
existingRun.id,
|
|
78
|
+
u.path,
|
|
79
|
+
499,
|
|
80
|
+
"Stale proposal from interrupted run",
|
|
81
|
+
);
|
|
81
82
|
}
|
|
82
83
|
return { runId: existingRun.id, alias: existingRun.alias };
|
|
83
84
|
}
|
|
@@ -125,15 +126,6 @@ export default class AgentLoop {
|
|
|
125
126
|
const requestedModel = model;
|
|
126
127
|
|
|
127
128
|
const runInfo = await this.#ensureRun(projectId, model, run, options);
|
|
128
|
-
if (runInfo.blocked) {
|
|
129
|
-
return {
|
|
130
|
-
run: runInfo.alias,
|
|
131
|
-
status: 202,
|
|
132
|
-
remainingCount: runInfo.proposed.length,
|
|
133
|
-
proposed: runInfo.proposed,
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
|
|
137
129
|
const { runId: currentRunId, alias: currentAlias } = runInfo;
|
|
138
130
|
|
|
139
131
|
const loopSeq = await this.#db.next_loop.get({ run_id: currentRunId });
|
|
@@ -222,11 +214,9 @@ export default class AgentLoop {
|
|
|
222
214
|
|
|
223
215
|
await this.#db.complete_loop.run({
|
|
224
216
|
id: loop.id,
|
|
225
|
-
status: result.status
|
|
217
|
+
status: result.status,
|
|
226
218
|
result: JSON.stringify(result),
|
|
227
219
|
});
|
|
228
|
-
|
|
229
|
-
if (result.status === 202) return result;
|
|
230
220
|
}
|
|
231
221
|
|
|
232
222
|
const runRow = await this.#db.get_run_by_alias.get({
|
|
@@ -282,12 +272,9 @@ export default class AgentLoop {
|
|
|
282
272
|
let _lastAssembledTokens = 0;
|
|
283
273
|
let recovery = null; // { target, promptPath, strikes, lastTokens }
|
|
284
274
|
|
|
285
|
-
//
|
|
286
|
-
//
|
|
287
|
-
|
|
288
|
-
currentRunId,
|
|
289
|
-
currentLoopId,
|
|
290
|
-
);
|
|
275
|
+
// Previous loop entries stay at full fidelity — the model is
|
|
276
|
+
// instructed to summarize and demote them. Budget enforcement
|
|
277
|
+
// catches overflow if the model fails to manage context.
|
|
291
278
|
|
|
292
279
|
// Restore any prompt entries left at summary fidelity by a recovery
|
|
293
280
|
// phase that was interrupted (server crash, restart). If the full
|
|
@@ -347,7 +334,16 @@ export default class AgentLoop {
|
|
|
347
334
|
});
|
|
348
335
|
|
|
349
336
|
if (result.status === 413) {
|
|
350
|
-
|
|
337
|
+
await this.#db.complete_loop.run({
|
|
338
|
+
id: currentLoopId,
|
|
339
|
+
status: 413,
|
|
340
|
+
result: null,
|
|
341
|
+
});
|
|
342
|
+
await this.#db.update_run_status.run({
|
|
343
|
+
id: currentRunId,
|
|
344
|
+
status: 200,
|
|
345
|
+
});
|
|
346
|
+
const out = {
|
|
351
347
|
run: currentAlias,
|
|
352
348
|
status: 413,
|
|
353
349
|
overflow: result.overflow,
|
|
@@ -355,6 +351,8 @@ export default class AgentLoop {
|
|
|
355
351
|
contextSize: result.contextSize,
|
|
356
352
|
turn: result.turn,
|
|
357
353
|
};
|
|
354
|
+
await hook.completed.emit({ projectId, ...out });
|
|
355
|
+
return out;
|
|
358
356
|
}
|
|
359
357
|
|
|
360
358
|
_lastAssembledTokens = result.assembledTokens;
|
|
@@ -366,7 +364,7 @@ export default class AgentLoop {
|
|
|
366
364
|
await this.#knownStore.setFidelity(
|
|
367
365
|
currentRunId,
|
|
368
366
|
ra.promptPath,
|
|
369
|
-
"
|
|
367
|
+
"promoted",
|
|
370
368
|
);
|
|
371
369
|
}
|
|
372
370
|
if (ra.action === "hard413") {
|
|
@@ -390,8 +388,6 @@ export default class AgentLoop {
|
|
|
390
388
|
const unknowns = await this.#db.get_unknowns.all({
|
|
391
389
|
run_id: currentRunId,
|
|
392
390
|
});
|
|
393
|
-
const unresolved = await this.#knownStore.getUnresolved(currentRunId);
|
|
394
|
-
|
|
395
391
|
const latestSummary = history
|
|
396
392
|
.filter((e) => e.status === 200 && e.path?.startsWith("summarize://"))
|
|
397
393
|
.at(-1);
|
|
@@ -400,15 +396,10 @@ export default class AgentLoop {
|
|
|
400
396
|
projectId,
|
|
401
397
|
run: currentAlias,
|
|
402
398
|
turn: result.turn,
|
|
403
|
-
status:
|
|
399
|
+
status: 102,
|
|
404
400
|
summary: latestSummary?.body || "",
|
|
405
401
|
history,
|
|
406
402
|
unknowns: unknowns.map((u) => ({ path: u.path, body: u.body })),
|
|
407
|
-
proposed: unresolved.map((p) => ({
|
|
408
|
-
path: p.path,
|
|
409
|
-
type: KnownStore.toolFromPath(p.path) || "unknown",
|
|
410
|
-
attributes: p.attributes ? JSON.parse(p.attributes) : null,
|
|
411
|
-
})),
|
|
412
403
|
telemetry: {
|
|
413
404
|
modelAlias: result.modelAlias,
|
|
414
405
|
model: result.model,
|
|
@@ -433,21 +424,6 @@ export default class AgentLoop {
|
|
|
433
424
|
}),
|
|
434
425
|
},
|
|
435
426
|
});
|
|
436
|
-
if (unresolved.length > 0) {
|
|
437
|
-
await this.#db.update_run_status.run({
|
|
438
|
-
id: currentRunId,
|
|
439
|
-
status: 202,
|
|
440
|
-
});
|
|
441
|
-
const out = {
|
|
442
|
-
run: currentAlias,
|
|
443
|
-
status: 202,
|
|
444
|
-
turn: result.turn,
|
|
445
|
-
proposed: unresolved,
|
|
446
|
-
};
|
|
447
|
-
await hook.completed.emit({ projectId, ...out });
|
|
448
|
-
return out;
|
|
449
|
-
}
|
|
450
|
-
|
|
451
427
|
await this.#hooks.run.step.completed.emit({
|
|
452
428
|
projectId,
|
|
453
429
|
run: currentAlias,
|
|
@@ -574,6 +550,12 @@ export default class AgentLoop {
|
|
|
574
550
|
}
|
|
575
551
|
|
|
576
552
|
if (action === "accept") {
|
|
553
|
+
const projectId = runRow.project_id;
|
|
554
|
+
const project = await this.#db.get_project_by_id.get({
|
|
555
|
+
id: projectId,
|
|
556
|
+
});
|
|
557
|
+
const projectRoot = project?.project_root;
|
|
558
|
+
|
|
577
559
|
if (path.startsWith("set://") && attrs?.file && attrs?.merge) {
|
|
578
560
|
const fileBody = await this.#knownStore.getBody(runId, attrs.file);
|
|
579
561
|
if (fileBody != null) {
|
|
@@ -594,12 +576,25 @@ export default class AgentLoop {
|
|
|
594
576
|
patched,
|
|
595
577
|
200,
|
|
596
578
|
);
|
|
579
|
+
// Write patched content to disk
|
|
580
|
+
if (projectRoot) {
|
|
581
|
+
const { writeFile } = await import("node:fs/promises");
|
|
582
|
+
const { join } = await import("node:path");
|
|
583
|
+
await writeFile(join(projectRoot, attrs.file), patched).catch(
|
|
584
|
+
() => {},
|
|
585
|
+
);
|
|
586
|
+
}
|
|
597
587
|
}
|
|
598
588
|
}
|
|
599
589
|
|
|
600
590
|
if (path.startsWith("rm://")) {
|
|
601
591
|
if (attrs?.path) {
|
|
602
592
|
await this.#knownStore.remove(runId, attrs.path);
|
|
593
|
+
if (projectRoot) {
|
|
594
|
+
const { unlink } = await import("node:fs/promises");
|
|
595
|
+
const { join } = await import("node:path");
|
|
596
|
+
await unlink(join(projectRoot, attrs.path)).catch(() => {});
|
|
597
|
+
}
|
|
603
598
|
}
|
|
604
599
|
}
|
|
605
600
|
|
|
@@ -615,68 +610,9 @@ export default class AgentLoop {
|
|
|
615
610
|
throw new Error(msg("error.resolution_invalid", { action }));
|
|
616
611
|
}
|
|
617
612
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
run: runAlias,
|
|
622
|
-
status: 202,
|
|
623
|
-
remainingCount: unresolved.length,
|
|
624
|
-
proposed: unresolved,
|
|
625
|
-
};
|
|
626
|
-
}
|
|
627
|
-
|
|
628
|
-
// Scope completion checks to the current loop
|
|
629
|
-
const currentLoop = await this.#db.get_current_loop.get({ run_id: runId });
|
|
630
|
-
const loopId = currentLoop?.id ?? null;
|
|
631
|
-
|
|
632
|
-
if (await this.#knownStore.hasRejections(runId, loopId)) {
|
|
633
|
-
if (currentLoop)
|
|
634
|
-
await this.#db.complete_loop.run({
|
|
635
|
-
id: loopId,
|
|
636
|
-
status: 200,
|
|
637
|
-
result: null,
|
|
638
|
-
});
|
|
639
|
-
await this.#db.update_run_status.run({ id: runId, status: 200 });
|
|
640
|
-
return { run: runAlias, status: 200 };
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
const hasSummary = await this.#db.get_latest_summary.get({
|
|
644
|
-
run_id: runId,
|
|
645
|
-
loop_id: loopId,
|
|
646
|
-
});
|
|
647
|
-
if (hasSummary?.body) {
|
|
648
|
-
if (currentLoop)
|
|
649
|
-
await this.#db.complete_loop.run({
|
|
650
|
-
id: loopId,
|
|
651
|
-
status: 200,
|
|
652
|
-
result: null,
|
|
653
|
-
});
|
|
654
|
-
await this.#db.update_run_status.run({ id: runId, status: 200 });
|
|
655
|
-
return { run: runAlias, status: 200 };
|
|
656
|
-
}
|
|
657
|
-
|
|
658
|
-
// No summary and no rejections in this loop — resume it
|
|
659
|
-
const projectId = runRow.project_id;
|
|
660
|
-
const project = await this.#db.get_project_by_id.get({ id: projectId });
|
|
661
|
-
|
|
662
|
-
const latestPrompt = await this.#db.get_latest_prompt.get({
|
|
663
|
-
run_id: runId,
|
|
664
|
-
});
|
|
665
|
-
const resumeMode = latestPrompt?.attributes
|
|
666
|
-
? JSON.parse(latestPrompt.attributes).mode
|
|
667
|
-
: "ask";
|
|
668
|
-
|
|
669
|
-
// Re-enqueue the current loop's prompt to continue it
|
|
670
|
-
const loopSeq = await this.#db.next_loop.get({ run_id: runId });
|
|
671
|
-
await this.#db.enqueue_loop.get({
|
|
672
|
-
run_id: runId,
|
|
673
|
-
sequence: loopSeq.sequence,
|
|
674
|
-
mode: resumeMode,
|
|
675
|
-
model: runRow.model,
|
|
676
|
-
prompt: "",
|
|
677
|
-
config: currentLoop?.config || "{}",
|
|
678
|
-
});
|
|
679
|
-
return this.#drainQueue(runId, runAlias, projectId, project, {});
|
|
613
|
+
// The dispatch loop is awaiting resolution. This unblocks it.
|
|
614
|
+
// Dispatch continuation is handled by the loop, not here.
|
|
615
|
+
return { run: runAlias, status: 200 };
|
|
680
616
|
}
|
|
681
617
|
|
|
682
618
|
async #composeResolvedContent(runId, path, _attrs, output) {
|
|
@@ -741,43 +677,5 @@ export default class AgentLoop {
|
|
|
741
677
|
* @param {{ assembledTokens: number, budgetRecovery?: { target: number, promptPath: string|null } }} result
|
|
742
678
|
* @returns {{ next: object|null, action: null|'restore'|'hard413', promptPath: string|null }}
|
|
743
679
|
*/
|
|
744
|
-
export
|
|
745
|
-
|
|
746
|
-
if (result.budgetRecovery) {
|
|
747
|
-
if (!recovery) {
|
|
748
|
-
recovery = {
|
|
749
|
-
target: result.budgetRecovery.target,
|
|
750
|
-
promptPath: result.budgetRecovery.promptPath,
|
|
751
|
-
strikes: 0,
|
|
752
|
-
lastTokens: result.assembledTokens,
|
|
753
|
-
};
|
|
754
|
-
} else {
|
|
755
|
-
// Re-overflow during recovery: tighten target, don't count as strike.
|
|
756
|
-
recovery = {
|
|
757
|
-
...recovery,
|
|
758
|
-
target: Math.min(recovery.target, result.budgetRecovery.target),
|
|
759
|
-
};
|
|
760
|
-
}
|
|
761
|
-
}
|
|
762
|
-
|
|
763
|
-
if (recovery === null) return { next: null, action: null, promptPath: null };
|
|
764
|
-
|
|
765
|
-
const current = result.assembledTokens;
|
|
766
|
-
|
|
767
|
-
if (current <= recovery.target) {
|
|
768
|
-
return { next: null, action: "restore", promptPath: recovery.promptPath };
|
|
769
|
-
}
|
|
770
|
-
|
|
771
|
-
const noProgress = current >= recovery.lastTokens && !result.budgetRecovery;
|
|
772
|
-
const strikes = noProgress ? recovery.strikes + 1 : 0;
|
|
773
|
-
|
|
774
|
-
if (strikes >= 3) {
|
|
775
|
-
return { next: null, action: "hard413", promptPath: null };
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
return {
|
|
779
|
-
next: { ...recovery, strikes, lastTokens: current },
|
|
780
|
-
action: null,
|
|
781
|
-
promptPath: null,
|
|
782
|
-
};
|
|
783
|
-
}
|
|
680
|
+
// Re-export for backward compatibility with tests
|
|
681
|
+
export { advanceRecovery } from "../plugins/budget/recovery.js";
|