@possumtech/rummy 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.env.example +12 -0
  2. package/FIDELITY_CONTRACT.md +172 -0
  3. package/README.md +5 -1
  4. package/SPEC.md +31 -17
  5. package/migrations/001_initial_schema.sql +3 -4
  6. package/package.json +1 -1
  7. package/src/agent/AgentLoop.js +51 -153
  8. package/src/agent/ContextAssembler.js +2 -0
  9. package/src/agent/KnownStore.js +16 -9
  10. package/src/agent/ResponseHealer.js +54 -1
  11. package/src/agent/TurnExecutor.js +125 -323
  12. package/src/agent/XmlParser.js +172 -42
  13. package/src/agent/known_queries.sql +1 -1
  14. package/src/agent/known_store.sql +29 -72
  15. package/src/agent/runs.sql +2 -2
  16. package/src/hooks/Hooks.js +1 -0
  17. package/src/hooks/PluginContext.js +8 -2
  18. package/src/hooks/RummyContext.js +6 -3
  19. package/src/hooks/ToolRegistry.js +29 -32
  20. package/src/plugins/ask_user/ask_user.js +2 -2
  21. package/src/plugins/ask_user/ask_userDoc.js +7 -10
  22. package/src/plugins/budget/README.md +28 -18
  23. package/src/plugins/budget/budget.js +80 -3
  24. package/src/plugins/budget/recovery.js +47 -0
  25. package/src/plugins/cp/cp.js +5 -5
  26. package/src/plugins/cp/cpDoc.js +1 -14
  27. package/src/plugins/engine/engine.sql +1 -1
  28. package/src/plugins/env/env.js +4 -4
  29. package/src/plugins/env/envDoc.js +4 -9
  30. package/src/plugins/file/file.js +2 -7
  31. package/src/plugins/get/get.js +32 -13
  32. package/src/plugins/get/getDoc.js +26 -44
  33. package/src/plugins/helpers.js +4 -4
  34. package/src/plugins/instructions/instructions.js +9 -7
  35. package/src/plugins/instructions/preamble.md +45 -26
  36. package/src/plugins/known/known.js +71 -15
  37. package/src/plugins/known/knownDoc.js +4 -20
  38. package/src/plugins/mv/mv.js +6 -6
  39. package/src/plugins/mv/mvDoc.js +4 -30
  40. package/src/plugins/policy/policy.js +47 -0
  41. package/src/plugins/previous/previous.js +10 -14
  42. package/src/plugins/progress/progress.js +29 -48
  43. package/src/plugins/prompt/prompt.js +18 -6
  44. package/src/plugins/rm/rm.js +4 -4
  45. package/src/plugins/rm/rmDoc.js +5 -14
  46. package/src/plugins/rpc/rpc.js +4 -2
  47. package/src/plugins/set/set.js +86 -91
  48. package/src/plugins/set/setDoc.js +28 -41
  49. package/src/plugins/sh/sh.js +4 -4
  50. package/src/plugins/sh/shDoc.js +4 -9
  51. package/src/plugins/skill/skill.js +2 -1
  52. package/src/plugins/summarize/summarize.js +9 -2
  53. package/src/plugins/summarize/summarizeDoc.js +10 -16
  54. package/src/plugins/telemetry/telemetry.js +36 -11
  55. package/src/plugins/think/think.js +13 -0
  56. package/src/plugins/think/thinkDoc.js +16 -0
  57. package/src/plugins/unknown/unknown.js +37 -9
  58. package/src/plugins/unknown/unknownDoc.js +7 -16
  59. package/src/plugins/update/update.js +9 -2
  60. package/src/plugins/update/updateDoc.js +12 -14
  61. package/src/server/ClientConnection.js +11 -1
  62. package/src/sql/functions/slugify.js +13 -1
  63. package/src/sql/v_model_context.sql +6 -6
package/.env.example CHANGED
@@ -17,11 +17,13 @@ RUMMY_MMAP_MB=0
17
17
 
18
18
  # Agent Loop Limits
19
19
  RUMMY_MAX_TURNS=99
20
+ RUMMY_MAX_COMMANDS=15
20
21
  RUMMY_MAX_UNKNOWN_WARNINGS=3
21
22
  RUMMY_MAX_STALLS=3
22
23
  RUMMY_MIN_CYCLES=3
23
24
  RUMMY_MAX_CYCLE_PERIOD=4
24
25
  RUMMY_MAX_UPDATE_REPEATS=3
26
+ RUMMY_MAX_PATH_STAGNATION=5
25
27
 
26
28
  # Hygiene
27
29
  # Days to keep completed/aborted runs before purging
@@ -34,6 +36,16 @@ RUMMY_FETCH_TIMEOUT=300000
34
36
  # Debug
35
37
  # RUMMY_DEBUG=true
36
38
 
39
+ # Think tag: 1 = model uses <think> tags for reasoning (default)
40
+ # 0 = disabled, model reasons via API reasoning_content field only
41
+ RUMMY_THINK=1
42
+
43
+ # Budget
44
+ # Fraction of context window used as ceiling. 0.9 = 90%, 10% reserved as headroom.
45
+ RUMMY_BUDGET_CEILING=0.9
46
+ # Maximum tokens per known entry. Entries exceeding this are rejected with 413.
47
+ RUMMY_MAX_ENTRY_TOKENS=512
48
+
37
49
  # Token Estimation
38
50
  # Characters per token. Lower = more conservative (fewer tokens per character).
39
51
  # Default 2. Set to 1 for worst-case (1 token per character).
@@ -0,0 +1,172 @@
1
+ # Fidelity Contract — Observed State vs Intended
2
+
3
+ ## Observed Behavior (traced from test/mab/results/2026-04-14T15-13-55-950Z/last_run.txt, turn 24)
4
+
5
+ ### Flow
6
+
7
+ ```
8
+ Model emits tool
9
+
10
+ Tool handler stores body in known_entries.body (raw, as model wrote it)
11
+
12
+ Next turn: TurnExecutor materializes context
13
+
14
+ For each row: hooks.tools.view(scheme, entry) → plugin's view hook returns projected body
15
+
16
+ Projected body stored in turn_context.body with fidelity-projected token count
17
+
18
+ Assembly phase: section renderers (knowns, unknowns, previous, performed) pull from ctx.rows (which has projected body) and render tags
19
+
20
+ Model sees the assembled <knowns>, <previous>, etc. sections in the system prompt
21
+ ```
22
+
23
+ ### Fidelity Values (from code)
24
+
25
+ - **full**: fully shown
26
+ - **summary**: "compact" shown — but WHAT "compact" means varies per plugin
27
+ - **archive**: excluded by `v_model_context` SQL before reaching any renderer (clean)
28
+
29
+ ## Three Breaks in the Intended Contract
30
+
31
+ ### Break 1 — Plugins disagree on what summary means
32
+
33
+ Every plugin that registers view hooks decides what body to project per fidelity. Observed:
34
+
35
+ | Plugin | full() | summary() |
36
+ |--------|--------|-----------|
37
+ | known | `# known ${path}\n${body}` | **same as full** (wrong) |
38
+ | prompt | `body` | **500-char truncation + marker** (correct) |
39
+ | budget | `body` | `body` (ok — budget is naturally short) |
40
+ | skill | `body` | `body` (inherited default) |
41
+ | unknown | varies — needs audit | needs audit |
42
+ | others | needs audit | needs audit |
43
+
44
+ The `known` plugin's `summary()` returning the full body is a direct contract violation. The summary view should return a compact representation of the entry, not the same full body.
45
+
46
+ ### Break 2 — Renderers re-apply fidelity logic
47
+
48
+ Two renderers currently re-check entry fidelity and override the plugin's projection:
49
+
50
+ **`known.js` `renderKnownTag`** (lines 111-115):
51
+ ```js
52
+ if (entry.fidelity === "archive") return "";
53
+ if (entry.fidelity === "summary") {
54
+ return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}/>`;
55
+ }
56
+ return `<${tag} path="${entry.path}"...${summary}${fidelity}${tokens}${flag}>${entry.body}</${tag}>`;
57
+ ```
58
+
59
+ This ignores entry.body at summary fidelity and renders self-closing. It's a workaround for known.summary() returning the wrong content. Belt over broken suspenders.
60
+
61
+ **`previous.js` `renderToolTag`** (my edit this session):
62
+ ```js
63
+ if (entry.fidelity === "full") {
64
+ return `<${entry.scheme} ${attrs}>${body}</${entry.scheme}>`;
65
+ }
66
+ // summary: self-closing with summary attr
67
+ ```
68
+
69
+ I added this fidelity re-check when I should have trusted the plugin's projected body. Same mistake as known, added today.
70
+
71
+ ### Break 3 — Model writes scheme headers into body
72
+
73
+ Every known/update/unknown entry in the DB has a body that starts with `# known known://path\n`, `# update\n`, or `# unknown\n`. The model writes this because the examples in the system prompt render tags with the body prefixed by `# ${scheme} ${path}\n`.
74
+
75
+ Then the plugin's `full()` hook prepends ANOTHER `# ${scheme} ${path}\n` when projecting. Result: duplicate headers in the rendered output.
76
+
77
+ Observed in turn 16 update body: `"# update\n# update\nDocuments 20-22 indexed and archived."`
78
+
79
+ And in unknown paths: the slug-generation for pathless unknowns takes the body including the `# unknown\n` prefix, resulting in URL-encoded paths like:
80
+ ```
81
+ unknown://%23%20unknown%0ADocument%2023%20is%20missing%20from%20the%20prompt.
82
+ ```
83
+
84
+ ## The Intended Contract
85
+
86
+ Based on the user's stated philosophy ("surface problems, don't solve them; plugin decides, renderer renders"):
87
+
88
+ ### Layer 1 — Plugin decides per fidelity
89
+
90
+ Each plugin registers view hooks that return the body content for each fidelity value:
91
+
92
+ ```js
93
+ core.hooks.tools.onView("known", (entry) => entry.body, "full");
94
+ core.hooks.tools.onView("known", (entry) => "", "summary");
95
+ ```
96
+
97
+ At archive, no view hook is called (v_model_context excludes them).
98
+
99
+ ### Layer 2 — Renderer shows the projected body
100
+
101
+ Renderers take the projected body from `ctx.rows[].body`:
102
+ - If non-empty, wrap in tag with body
103
+ - If empty, render self-closing tag
104
+
105
+ Renderers do NOT re-check entry.fidelity. They trust the plugin's projection.
106
+
107
+ ### Layer 3 — Tag attributes always present
108
+
109
+ Tag attributes visible in both full and summary rendering:
110
+ - `path` — always
111
+ - `summary` — if present in entry.attributes.summary
112
+ - `turn` — if source_turn is set
113
+ - `status` — if status is set
114
+ - `fidelity` — always (the value itself)
115
+ - `tokens` — always (full-cost value, unchanged by fidelity per `set_fidelity` SQL)
116
+
117
+ ### Per-plugin view decisions (revised)
118
+
119
+ | Plugin | Category | Full body | Summary body | Notes |
120
+ |--------|----------|-----------|--------------|-------|
121
+ | known | data | `entry.body` (no `# known` prefix) | `""` | Tag's summary attr carries the keywords |
122
+ | unknown | unknown | `entry.body` | `""` | Same pattern as known/skill — summary attr carries the label |
123
+ | prompt | prompt | `entry.body` | 500-char truncation with `[truncated...]` | Current behavior is correct |
124
+ | budget | logging | `entry.body` | `entry.body` | Feedback signal — always full |
125
+ | update | logging | `entry.body` | `entry.body` | Already 80-char capped |
126
+ | summarize | logging | `entry.body` | `entry.body` | Already 80-char capped |
127
+ | get | logging | result body | `""` | Just the action tag at summary |
128
+ | set, rm, cp, mv | logging | result body | `""` | Just the action tag at summary |
129
+ | env, sh | logging | output | `""` | Just the action tag at summary |
130
+ | search | logging | results | `""` | Just the action tag at summary |
131
+ | skill | data | `entry.body` | `""` | Same as known |
132
+ | file | data | `entry.body` | `""` | Same as known |
133
+ | http, https | data | — | — | **Move to rummy.web plugin** — not in core |
134
+
135
+ ## The Body-Header Problem
136
+
137
+ Separate from fidelity: the model writes `# scheme path` into the body because examples show that shape. Plugin view hooks then prepend another header.
138
+
139
+ **Rule**: `# scheme` prefix belongs only in **logging** scheme outputs (tool execution results where the prefix identifies the log entry type). Non-logging schemes (known, unknown, prompt, data entries) should have no body prefix — tag attributes identify the entry.
140
+
141
+ **What to remove**:
142
+ - `known.js` `full()`: remove `# known ${entry.path}\n` prefix — just return `entry.body`
143
+ - `unknown.js` `full()`: remove any `# unknown\n` prefix
144
+ - Tooldoc examples for known/unknown that show bodies starting with `# scheme path` — remove so model stops copying
145
+
146
+ **What to keep**:
147
+ - Logging plugins (update, summarize, budget, get, set, etc.) may keep `# scheme` prefixes if present — they're describing tool execution results.
148
+
149
+ ## Test Plan
150
+
151
+ To enforce the contract:
152
+
153
+ 1. **Per-plugin unit tests**: Each plugin with fidelity-sensitive views tests `full(entry)` and `summary(entry)` return the expected content.
154
+ 2. **Renderer tests**: Each section renderer (knowns, previous, performed, unknowns) tests that it trusts `entry.body` without re-checking fidelity.
155
+ 3. **Integration test**: Load a DB with entries at each fidelity, assemble context, verify:
156
+ - Archive entries absent from any section
157
+ - Summary entries visible as compact tags
158
+ - Full entries visible with body
159
+ - No double headers in bodies
160
+ 4. **Contract lint**: Grep for `entry.fidelity ===` in renderer files — should have zero matches.
161
+
162
+ ## Deliverable Order
163
+
164
+ Before touching code, this document should be reviewed. Once aligned, the fix order would be:
165
+
166
+ 1. Fix plugin view hooks to return correct body per fidelity
167
+ 2. Remove fidelity re-checks from renderers
168
+ 3. Remove the `# scheme path` header prepending (plugin-side) and examples (tooldoc-side)
169
+ 4. Write tests per the plan above
170
+ 5. Regenerate a sample context packet to confirm clean output
171
+
172
+ No silent interventions. No belt-and-suspenders logic. Plugin projects, renderer renders, model sees honest representation.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # RUMMY: Relational Unknowns Memory Management Yoke
2
2
 
3
- Rummy is the only LLM agent service inspired by and dedicated to the memory of former Secretary of State Donald "Rummy" Rumsfeld. Our unique fusion of apophatic and hedbergian engineering strategies yields more accurate and efficient results than any other agent. Our client/server and plugin architecture integrates it into more workflows than any other agent. It's also more flexible and lean than any other agent. Our dynamic cache management, model hot-swapping, and flexible router interface make it more affordable than any other agent.
3
+ Rummy is the only LLM agent service inspired by and dedicated to the memory of former Secretary of Defense Donald "Rummy" Rumsfeld. Our unique fusion of apophatic and hedbergian engineering strategies yields more accurate and efficient results than any other agent. Our client/server and plugin architecture integrates it into more workflows than any other agent. It's also more flexible and lean than any other agent. Our dynamic cache management, model hot-swapping, and flexible router interface make it more affordable than any other agent.
4
4
 
5
5
  ## Key Features
6
6
 
@@ -10,6 +10,10 @@ Rummy is the only LLM agent service inspired by and dedicated to the memory of f
10
10
 
11
11
  - **Hedberg:** The interpretation boundary between stochastic model output and deterministic system operations. Models speak in whatever syntax they were trained on — sed regex, SEARCH/REPLACE blocks, escaped characters. Hedberg normalizes all of it. Available to all plugins via `core.hooks.hedberg`.
12
12
 
13
+ - **Folksonomic Memory:** The model organizes its own knowledge into navigable path hierarchies with searchable summary tags. Not RAG — the model builds and curates its own taxonomy using `<known>` entries with paths like `known://project/architecture`.
14
+
15
+ - **Fidelity System:** Every entry has a visibility level: full, summary, index, archive. The model manages its own context by promoting what it needs and demoting what it doesn't. Budget enforcement catches overflow post-dispatch — tools run uninterrupted, demotion happens after.
16
+
13
17
  - **Plugin Architecture:** Every `<tag>` the model sees is a plugin. Every scheme is registered by its owner. The prompt itself is assembled from plugins. Drop a directory into `~/.rummy/plugins/` or install via npm. See [PLUGINS.md](PLUGINS.md) for the complete plugin API.
14
18
 
15
19
  - **Symbols Done Right:** Designed with universal language support in mind. Powered by [@possumtech/antlrmap](https://github.com/possumtech/antlrmap).
package/SPEC.md CHANGED
@@ -44,7 +44,7 @@ body, attributes, and state.
44
44
  known_entries (
45
45
  id, run_id, loop_id, turn, path, body, scheme,
46
46
  status INTEGER, fidelity TEXT, hash,
47
- attributes, tokens, tokens_full, refs, write_count,
47
+ attributes, tokens, refs, write_count,
48
48
  created_at, updated_at
49
49
  )
50
50
  ```
@@ -56,10 +56,9 @@ known_entries (
56
56
  | `attributes` | Tag attributes as JSON. Handler-private workspace. `CHECK (json_valid)` |
57
57
  | `scheme` | Generated from path via `schemeOf()`. Drives dispatch and view routing |
58
58
  | `status` | HTTP status code (200, 202, 400, 413, etc.) |
59
- | `fidelity` | Visibility level: full, summary, index, archive |
59
+ | `fidelity` | Visibility level: full, summary, archive |
60
60
  | `hash` | SHA-256 for file change detection |
61
- | `tokens` | Display-only token count at current fidelity. NEVER used for budget. |
62
- | `tokens_full` | Cost of raw body at full fidelity |
61
+ | `tokens` | Full-body token cost. Never changes on demotion/promotion. |
63
62
  | `turn` | Freshness — when was this entry last touched |
64
63
 
65
64
  ### 1.2 Schemes, Status & Fidelity
@@ -211,8 +210,8 @@ object is the same shape at every tier.
211
210
  Model tier restrictions enforced by unified `resolveForLoop(mode, flags)`.
212
211
  Ask mode excludes `sh`. Flags: `noInteraction` excludes `ask_user`,
213
212
  `noWeb` excludes `search`, `noProposals` excludes `ask_user`/`env`/`sh`.
214
- 13 model tools: get, set, known, unknown, env, sh, rm, cp, mv, search,
215
- summarize, update, ask_user.
213
+ 14 model tools: think, unknown, known, get, set, env, sh, rm, cp, mv,
214
+ ask_user, update, summarize, search.
216
215
  Client tier requires project init. Plugin tier has no restrictions.
217
216
 
218
217
  ### 3.2 Dispatch Path
@@ -225,13 +224,28 @@ Client: JSON-RPC → { method, params } → #record() → dispatch(scheme, en
225
224
  Plugin: rummy.rm({ path }) → #record() → dispatch(scheme, entry, rummy)
226
225
  ```
227
226
 
228
- **Lifecycle/action split:** Commands are classified as lifecycle signals
229
- (`summarize`, `update`, `unknown`, `known`) or action commands (everything
230
- else). Lifecycle signals always dispatch they are state declarations that
231
- cannot be 409'd by sequential dispatch. Action commands dispatch sequentially;
232
- a 202 proposal or error aborts subsequent actions. If the model sends
233
- `<summarize>` but actions in the same turn failed, the summarize is
234
- overridden to an update (the model's assertion that it's done is false).
227
+ **Tool dispatch:** Commands are dispatched sequentially in the order
228
+ the model emitted them. Each tool either succeeds (200), fails (400+),
229
+ or proposes (202). On failure, all remaining tools are aborted. On
230
+ proposal, dispatch pauses, a notification is pushed to the client
231
+ (same WebSocket push pattern as `run/progress`), the client resolves
232
+ (accept/reject), and dispatch resumes the proposal becomes 200 or
233
+ 400+ like any other tool. The `ask`/`act` RPC response is only sent
234
+ when all tools have completed. Proposals are NOT batched — each is
235
+ sent and resolved inline during dispatch. The model controls tool
236
+ ordering; the system respects it.
237
+
238
+ If the model sends `<summarize>` but a preceding action in the same
239
+ turn failed, the summarize is overridden to an update (the model's
240
+ assertion that it's done is false). Both `<summarize>` and `<update>`
241
+ present → last signal wins.
242
+
243
+ **Post-dispatch budget check:** After all tools dispatch, the system
244
+ materializes context and checks the budget ceiling. If context exceeds
245
+ the ceiling, Turn Demotion fires — all entries from this turn are
246
+ demoted to summary and a `budget://` entry is written. This is a
247
+ system housekeeping step independent of tool success/failure. The
248
+ tools already ran; their outcomes are settled.
235
249
 
236
250
  ### 3.3 Plugin Convention
237
251
 
@@ -293,7 +307,7 @@ Two messages per turn. System = stable truth. User = active task.
293
307
  [skills/]
294
308
  [/instructions]
295
309
  <knowns>
296
- ...entries sorted by fidelity (index, summary, full), then by scheme
310
+ ...entries sorted by fidelity (summary, full), then by scheme
297
311
  </knowns>
298
312
  <previous>
299
313
  (pre-loop entries, each with turn, status, summary, fidelity, tokens)
@@ -531,7 +545,7 @@ ask_user. `noRepo: true` — no file scanning during panic.
531
545
  `budget.panicPrompt()`: the assembled token count, the target, and
532
546
  the exact number of tokens to free. Turn 2+ receives a continuation
533
547
  prompt. The model uses `<set fidelity="archive">`, `<mv
534
- fidelity="index">`, and similar fidelity operations to free space,
548
+ fidelity="summary">`, and similar fidelity operations to free space,
535
549
  concluding with `<summarize>` when done or `<update>` while working.
536
550
 
537
551
  ---
@@ -660,7 +674,7 @@ simple to powerful — weak models learn from examples 1-2, strong models
660
674
  pick up the pattern from example 3.
661
675
 
662
676
  **Lifecycle continuity.** Examples weave stories across tools. The get
663
- docs end with `<set path="..." fidelity="index"/>`. The known docs
677
+ docs end with `<set path="..." fidelity="summary"/>`. The known docs
664
678
  reference `<get path="known://*">keyword</get>` for recall and
665
679
  `<set path="known://..." archive/>` for archiving. The unknown docs
666
680
  reference `<get/>` for investigation and `<rm/>` for cleanup. A model
@@ -746,7 +760,7 @@ Termination protocol:
746
760
  - `<summarize>` → run terminates
747
761
  - `<summarize>` + failed actions → overridden to `<update>` (continue)
748
762
  - `<update>` → run continues
749
- - Both → update wins (if the model can't decide, it's not done)
763
+ - Both → last signal wins (respects the model's final intent)
750
764
  - Neither + investigation tools → stall counter (RUMMY_MAX_STALLS)
751
765
  - Neither + action-only tools → healed to summarize
752
766
  - Neither + plain text → healed to summarize
@@ -124,13 +124,12 @@ CREATE TABLE IF NOT EXISTS known_entries (
124
124
  , body TEXT NOT NULL DEFAULT ''
125
125
  , scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
126
126
  , status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
127
- , fidelity TEXT NOT NULL DEFAULT 'full' CHECK (
128
- fidelity IN ('full', 'summary', 'index', 'archive')
127
+ , fidelity TEXT NOT NULL DEFAULT 'promoted' CHECK (
128
+ fidelity IN ('promoted', 'demoted', 'archived')
129
129
  )
130
130
  , hash TEXT
131
131
  , attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
132
132
  , tokens INTEGER NOT NULL DEFAULT 0 CHECK (tokens >= 0)
133
- , tokens_full INTEGER NOT NULL DEFAULT 0 CHECK (tokens_full >= 0)
134
133
  , refs INTEGER NOT NULL DEFAULT 0 CHECK (refs >= 0)
135
134
  , write_count INTEGER NOT NULL DEFAULT 1 CHECK (write_count >= 1)
136
135
  , created_at DATETIME DEFAULT CURRENT_TIMESTAMP
@@ -167,7 +166,7 @@ CREATE TABLE IF NOT EXISTS turn_context (
167
166
  , path TEXT NOT NULL
168
167
  , scheme TEXT GENERATED ALWAYS AS (schemeOf(path)) STORED
169
168
  , status INTEGER NOT NULL DEFAULT 200 CHECK (status BETWEEN 100 AND 599)
170
- , fidelity TEXT NOT NULL CHECK (fidelity IN ('full', 'summary', 'index'))
169
+ , fidelity TEXT NOT NULL CHECK (fidelity IN ('promoted', 'demoted'))
171
170
  , body TEXT NOT NULL DEFAULT ''
172
171
  , tokens INTEGER NOT NULL DEFAULT 0 CHECK (tokens >= 0)
173
172
  , attributes JSON NOT NULL DEFAULT '{}' CHECK (json_valid(attributes))
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@possumtech/rummy",
3
- "version": "0.3.1",
3
+ "version": "0.5.0",
4
4
  "description": "Relational Unknowns Memory Management Yoke",
5
5
  "keywords": [
6
6
  "llm"
@@ -1,4 +1,4 @@
1
- import KnownStore from "./KnownStore.js";
1
+ import { advanceRecovery } from "../plugins/budget/recovery.js";
2
2
  import msg from "./messages.js";
3
3
  import ResponseHealer from "./ResponseHealer.js";
4
4
 
@@ -70,14 +70,15 @@ export default class AgentLoop {
70
70
  const existing = this.#activeRuns.get(existingRun.id);
71
71
  if (existing) existing.abort();
72
72
 
73
+ // Clean up stale proposals from interrupted runs
73
74
  const unresolved = await this.#knownStore.getUnresolved(existingRun.id);
74
- if (unresolved.length > 0) {
75
- return {
76
- runId: existingRun.id,
77
- alias: existingRun.alias,
78
- blocked: true,
79
- proposed: unresolved,
80
- };
75
+ for (const u of unresolved) {
76
+ await this.#knownStore.resolve(
77
+ existingRun.id,
78
+ u.path,
79
+ 499,
80
+ "Stale proposal from interrupted run",
81
+ );
81
82
  }
82
83
  return { runId: existingRun.id, alias: existingRun.alias };
83
84
  }
@@ -125,15 +126,6 @@ export default class AgentLoop {
125
126
  const requestedModel = model;
126
127
 
127
128
  const runInfo = await this.#ensureRun(projectId, model, run, options);
128
- if (runInfo.blocked) {
129
- return {
130
- run: runInfo.alias,
131
- status: 202,
132
- remainingCount: runInfo.proposed.length,
133
- proposed: runInfo.proposed,
134
- };
135
- }
136
-
137
129
  const { runId: currentRunId, alias: currentAlias } = runInfo;
138
130
 
139
131
  const loopSeq = await this.#db.next_loop.get({ run_id: currentRunId });
@@ -222,11 +214,9 @@ export default class AgentLoop {
222
214
 
223
215
  await this.#db.complete_loop.run({
224
216
  id: loop.id,
225
- status: result.status === 202 ? 202 : result.status,
217
+ status: result.status,
226
218
  result: JSON.stringify(result),
227
219
  });
228
-
229
- if (result.status === 202) return result;
230
220
  }
231
221
 
232
222
  const runRow = await this.#db.get_run_by_alias.get({
@@ -282,12 +272,9 @@ export default class AgentLoop {
282
272
  let _lastAssembledTokens = 0;
283
273
  let recovery = null; // { target, promptPath, strikes, lastTokens }
284
274
 
285
- // Demote full logging entries from previous loops to summary before
286
- // they appear in <previous>. General policy: keep <previous> compact.
287
- await this.#knownStore.demotePreviousLoopLogging(
288
- currentRunId,
289
- currentLoopId,
290
- );
275
+ // Previous loop entries stay at full fidelity the model is
276
+ // instructed to summarize and demote them. Budget enforcement
277
+ // catches overflow if the model fails to manage context.
291
278
 
292
279
  // Restore any prompt entries left at summary fidelity by a recovery
293
280
  // phase that was interrupted (server crash, restart). If the full
@@ -347,7 +334,16 @@ export default class AgentLoop {
347
334
  });
348
335
 
349
336
  if (result.status === 413) {
350
- return {
337
+ await this.#db.complete_loop.run({
338
+ id: currentLoopId,
339
+ status: 413,
340
+ result: null,
341
+ });
342
+ await this.#db.update_run_status.run({
343
+ id: currentRunId,
344
+ status: 200,
345
+ });
346
+ const out = {
351
347
  run: currentAlias,
352
348
  status: 413,
353
349
  overflow: result.overflow,
@@ -355,6 +351,8 @@ export default class AgentLoop {
355
351
  contextSize: result.contextSize,
356
352
  turn: result.turn,
357
353
  };
354
+ await hook.completed.emit({ projectId, ...out });
355
+ return out;
358
356
  }
359
357
 
360
358
  _lastAssembledTokens = result.assembledTokens;
@@ -366,7 +364,7 @@ export default class AgentLoop {
366
364
  await this.#knownStore.setFidelity(
367
365
  currentRunId,
368
366
  ra.promptPath,
369
- "full",
367
+ "promoted",
370
368
  );
371
369
  }
372
370
  if (ra.action === "hard413") {
@@ -390,8 +388,6 @@ export default class AgentLoop {
390
388
  const unknowns = await this.#db.get_unknowns.all({
391
389
  run_id: currentRunId,
392
390
  });
393
- const unresolved = await this.#knownStore.getUnresolved(currentRunId);
394
-
395
391
  const latestSummary = history
396
392
  .filter((e) => e.status === 200 && e.path?.startsWith("summarize://"))
397
393
  .at(-1);
@@ -400,15 +396,10 @@ export default class AgentLoop {
400
396
  projectId,
401
397
  run: currentAlias,
402
398
  turn: result.turn,
403
- status: unresolved.length > 0 ? 202 : 102,
399
+ status: 102,
404
400
  summary: latestSummary?.body || "",
405
401
  history,
406
402
  unknowns: unknowns.map((u) => ({ path: u.path, body: u.body })),
407
- proposed: unresolved.map((p) => ({
408
- path: p.path,
409
- type: KnownStore.toolFromPath(p.path) || "unknown",
410
- attributes: p.attributes ? JSON.parse(p.attributes) : null,
411
- })),
412
403
  telemetry: {
413
404
  modelAlias: result.modelAlias,
414
405
  model: result.model,
@@ -433,21 +424,6 @@ export default class AgentLoop {
433
424
  }),
434
425
  },
435
426
  });
436
- if (unresolved.length > 0) {
437
- await this.#db.update_run_status.run({
438
- id: currentRunId,
439
- status: 202,
440
- });
441
- const out = {
442
- run: currentAlias,
443
- status: 202,
444
- turn: result.turn,
445
- proposed: unresolved,
446
- };
447
- await hook.completed.emit({ projectId, ...out });
448
- return out;
449
- }
450
-
451
427
  await this.#hooks.run.step.completed.emit({
452
428
  projectId,
453
429
  run: currentAlias,
@@ -574,6 +550,12 @@ export default class AgentLoop {
574
550
  }
575
551
 
576
552
  if (action === "accept") {
553
+ const projectId = runRow.project_id;
554
+ const project = await this.#db.get_project_by_id.get({
555
+ id: projectId,
556
+ });
557
+ const projectRoot = project?.project_root;
558
+
577
559
  if (path.startsWith("set://") && attrs?.file && attrs?.merge) {
578
560
  const fileBody = await this.#knownStore.getBody(runId, attrs.file);
579
561
  if (fileBody != null) {
@@ -594,12 +576,25 @@ export default class AgentLoop {
594
576
  patched,
595
577
  200,
596
578
  );
579
+ // Write patched content to disk
580
+ if (projectRoot) {
581
+ const { writeFile } = await import("node:fs/promises");
582
+ const { join } = await import("node:path");
583
+ await writeFile(join(projectRoot, attrs.file), patched).catch(
584
+ () => {},
585
+ );
586
+ }
597
587
  }
598
588
  }
599
589
 
600
590
  if (path.startsWith("rm://")) {
601
591
  if (attrs?.path) {
602
592
  await this.#knownStore.remove(runId, attrs.path);
593
+ if (projectRoot) {
594
+ const { unlink } = await import("node:fs/promises");
595
+ const { join } = await import("node:path");
596
+ await unlink(join(projectRoot, attrs.path)).catch(() => {});
597
+ }
603
598
  }
604
599
  }
605
600
 
@@ -615,68 +610,9 @@ export default class AgentLoop {
615
610
  throw new Error(msg("error.resolution_invalid", { action }));
616
611
  }
617
612
 
618
- const unresolved = await this.#knownStore.getUnresolved(runId);
619
- if (unresolved.length > 0) {
620
- return {
621
- run: runAlias,
622
- status: 202,
623
- remainingCount: unresolved.length,
624
- proposed: unresolved,
625
- };
626
- }
627
-
628
- // Scope completion checks to the current loop
629
- const currentLoop = await this.#db.get_current_loop.get({ run_id: runId });
630
- const loopId = currentLoop?.id ?? null;
631
-
632
- if (await this.#knownStore.hasRejections(runId, loopId)) {
633
- if (currentLoop)
634
- await this.#db.complete_loop.run({
635
- id: loopId,
636
- status: 200,
637
- result: null,
638
- });
639
- await this.#db.update_run_status.run({ id: runId, status: 200 });
640
- return { run: runAlias, status: 200 };
641
- }
642
-
643
- const hasSummary = await this.#db.get_latest_summary.get({
644
- run_id: runId,
645
- loop_id: loopId,
646
- });
647
- if (hasSummary?.body) {
648
- if (currentLoop)
649
- await this.#db.complete_loop.run({
650
- id: loopId,
651
- status: 200,
652
- result: null,
653
- });
654
- await this.#db.update_run_status.run({ id: runId, status: 200 });
655
- return { run: runAlias, status: 200 };
656
- }
657
-
658
- // No summary and no rejections in this loop — resume it
659
- const projectId = runRow.project_id;
660
- const project = await this.#db.get_project_by_id.get({ id: projectId });
661
-
662
- const latestPrompt = await this.#db.get_latest_prompt.get({
663
- run_id: runId,
664
- });
665
- const resumeMode = latestPrompt?.attributes
666
- ? JSON.parse(latestPrompt.attributes).mode
667
- : "ask";
668
-
669
- // Re-enqueue the current loop's prompt to continue it
670
- const loopSeq = await this.#db.next_loop.get({ run_id: runId });
671
- await this.#db.enqueue_loop.get({
672
- run_id: runId,
673
- sequence: loopSeq.sequence,
674
- mode: resumeMode,
675
- model: runRow.model,
676
- prompt: "",
677
- config: currentLoop?.config || "{}",
678
- });
679
- return this.#drainQueue(runId, runAlias, projectId, project, {});
613
+ // The dispatch loop is awaiting resolution. This unblocks it.
614
+ // Dispatch continuation is handled by the loop, not here.
615
+ return { run: runAlias, status: 200 };
680
616
  }
681
617
 
682
618
  async #composeResolvedContent(runId, path, _attrs, output) {
@@ -741,43 +677,5 @@ export default class AgentLoop {
741
677
  * @param {{ assembledTokens: number, budgetRecovery?: { target: number, promptPath: string|null } }} result
742
678
  * @returns {{ next: object|null, action: null|'restore'|'hard413', promptPath: string|null }}
743
679
  */
744
- export function advanceRecovery(recovery, result) {
745
- // Initialise or update recovery state from a new Turn Demotion event.
746
- if (result.budgetRecovery) {
747
- if (!recovery) {
748
- recovery = {
749
- target: result.budgetRecovery.target,
750
- promptPath: result.budgetRecovery.promptPath,
751
- strikes: 0,
752
- lastTokens: result.assembledTokens,
753
- };
754
- } else {
755
- // Re-overflow during recovery: tighten target, don't count as strike.
756
- recovery = {
757
- ...recovery,
758
- target: Math.min(recovery.target, result.budgetRecovery.target),
759
- };
760
- }
761
- }
762
-
763
- if (recovery === null) return { next: null, action: null, promptPath: null };
764
-
765
- const current = result.assembledTokens;
766
-
767
- if (current <= recovery.target) {
768
- return { next: null, action: "restore", promptPath: recovery.promptPath };
769
- }
770
-
771
- const noProgress = current >= recovery.lastTokens && !result.budgetRecovery;
772
- const strikes = noProgress ? recovery.strikes + 1 : 0;
773
-
774
- if (strikes >= 3) {
775
- return { next: null, action: "hard413", promptPath: null };
776
- }
777
-
778
- return {
779
- next: { ...recovery, strikes, lastTokens: current },
780
- action: null,
781
- promptPath: null,
782
- };
783
- }
680
+ // Re-export for backward compatibility with tests
681
+ export { advanceRecovery } from "../plugins/budget/recovery.js";