@possumtech/rummy 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/.env.example +12 -3
  2. package/EXCEPTIONS.md +46 -0
  3. package/PLUGINS.md +454 -197
  4. package/SPEC.md +284 -93
  5. package/migrations/001_initial_schema.sql +57 -70
  6. package/package.json +16 -10
  7. package/service.js +1 -1
  8. package/src/agent/AgentLoop.js +254 -70
  9. package/src/agent/ContextAssembler.js +18 -4
  10. package/src/agent/KnownStore.js +156 -23
  11. package/src/agent/ProjectAgent.js +5 -4
  12. package/src/agent/ResponseHealer.js +21 -1
  13. package/src/agent/TurnExecutor.js +393 -115
  14. package/src/agent/XmlParser.js +92 -39
  15. package/src/agent/known_checks.sql +5 -4
  16. package/src/agent/known_queries.sql +4 -3
  17. package/src/agent/known_store.sql +45 -15
  18. package/src/agent/loops.sql +63 -0
  19. package/src/agent/runs.sql +7 -7
  20. package/src/agent/schemes.sql +5 -2
  21. package/src/agent/tokens.js +6 -21
  22. package/src/agent/turns.sql +13 -4
  23. package/src/hooks/Hooks.js +18 -0
  24. package/src/hooks/PluginContext.js +14 -10
  25. package/src/hooks/RummyContext.js +30 -10
  26. package/src/hooks/ToolRegistry.js +83 -19
  27. package/src/llm/LlmProvider.js +27 -8
  28. package/src/llm/OpenAiClient.js +20 -0
  29. package/src/llm/OpenRouterClient.js +24 -2
  30. package/src/llm/XaiClient.js +47 -2
  31. package/src/plugins/ask_user/README.md +4 -4
  32. package/src/plugins/ask_user/ask_user.js +8 -7
  33. package/src/plugins/ask_user/ask_userDoc.js +29 -0
  34. package/src/plugins/budget/BudgetGuard.js +74 -0
  35. package/src/plugins/budget/README.md +43 -0
  36. package/src/plugins/budget/budget.js +79 -0
  37. package/src/plugins/cp/README.md +5 -4
  38. package/src/plugins/cp/cp.js +16 -12
  39. package/src/plugins/cp/cpDoc.js +29 -0
  40. package/src/plugins/current/README.md +4 -4
  41. package/src/plugins/current/current.js +12 -10
  42. package/src/plugins/engine/engine.sql +5 -10
  43. package/src/plugins/engine/turn_context.sql +13 -13
  44. package/src/plugins/env/README.md +3 -4
  45. package/src/plugins/env/env.js +8 -7
  46. package/src/plugins/env/envDoc.js +29 -0
  47. package/src/plugins/file/README.md +9 -12
  48. package/src/plugins/file/file.js +34 -45
  49. package/src/plugins/get/README.md +2 -2
  50. package/src/plugins/get/get.js +28 -11
  51. package/src/plugins/get/getDoc.js +41 -0
  52. package/src/plugins/hedberg/docs.md +0 -9
  53. package/src/plugins/hedberg/hedberg.js +4 -6
  54. package/src/plugins/hedberg/matcher.js +1 -1
  55. package/src/plugins/hedberg/normalize.js +28 -0
  56. package/src/plugins/hedberg/patterns.js +31 -33
  57. package/src/plugins/hedberg/sed.js +17 -10
  58. package/src/plugins/helpers.js +2 -2
  59. package/src/plugins/index.js +93 -28
  60. package/src/plugins/instructions/README.md +6 -2
  61. package/src/plugins/instructions/instructions.js +21 -5
  62. package/src/plugins/instructions/preamble.md +9 -5
  63. package/src/plugins/known/README.md +10 -7
  64. package/src/plugins/known/known.js +33 -23
  65. package/src/plugins/known/knownDoc.js +33 -0
  66. package/src/plugins/mv/README.md +5 -4
  67. package/src/plugins/mv/mv.js +16 -12
  68. package/src/plugins/mv/mvDoc.js +31 -0
  69. package/src/plugins/persona/persona.js +78 -0
  70. package/src/plugins/previous/README.md +2 -2
  71. package/src/plugins/previous/previous.js +12 -8
  72. package/src/plugins/progress/progress.js +44 -12
  73. package/src/plugins/prompt/README.md +5 -5
  74. package/src/plugins/prompt/prompt.js +23 -19
  75. package/src/plugins/rm/README.md +4 -4
  76. package/src/plugins/rm/rm.js +29 -12
  77. package/src/plugins/rm/rmDoc.js +30 -0
  78. package/src/plugins/rpc/README.md +15 -28
  79. package/src/plugins/rpc/rpc.js +63 -107
  80. package/src/plugins/set/README.md +13 -12
  81. package/src/plugins/set/set.js +82 -21
  82. package/src/plugins/set/setDoc.js +45 -0
  83. package/src/plugins/sh/README.md +4 -4
  84. package/src/plugins/sh/sh.js +8 -7
  85. package/src/plugins/sh/shDoc.js +29 -0
  86. package/src/plugins/{skills/skills.js → skill/skill.js} +12 -54
  87. package/src/plugins/summarize/README.md +6 -5
  88. package/src/plugins/summarize/summarize.js +7 -6
  89. package/src/plugins/summarize/summarizeDoc.js +33 -0
  90. package/src/plugins/telemetry/telemetry.js +20 -8
  91. package/src/plugins/think/README.md +20 -0
  92. package/src/plugins/think/think.js +5 -0
  93. package/src/plugins/unknown/README.md +5 -5
  94. package/src/plugins/unknown/unknown.js +11 -8
  95. package/src/plugins/unknown/unknownDoc.js +31 -0
  96. package/src/plugins/update/README.md +3 -8
  97. package/src/plugins/update/update.js +7 -6
  98. package/src/plugins/update/updateDoc.js +33 -0
  99. package/src/server/ClientConnection.js +3 -5
  100. package/src/server/RpcRegistry.js +52 -4
  101. package/src/sql/v_model_context.sql +31 -39
  102. package/src/sql/v_run_log.sql +3 -3
  103. package/src/agent/prompt_queue.sql +0 -39
  104. package/src/plugins/ask_user/docs.md +0 -2
  105. package/src/plugins/cp/docs.md +0 -2
  106. package/src/plugins/env/docs.md +0 -2
  107. package/src/plugins/get/docs.md +0 -6
  108. package/src/plugins/known/docs.md +0 -3
  109. package/src/plugins/mv/docs.md +0 -2
  110. package/src/plugins/rm/docs.md +0 -4
  111. package/src/plugins/set/docs.md +0 -4
  112. package/src/plugins/sh/docs.md +0 -2
  113. package/src/plugins/skills/README.md +0 -25
  114. package/src/plugins/store/README.md +0 -20
  115. package/src/plugins/store/docs.md +0 -5
  116. package/src/plugins/store/store.js +0 -52
  117. package/src/plugins/summarize/docs.md +0 -4
  118. package/src/plugins/unknown/docs.md +0 -5
  119. package/src/plugins/update/docs.md +0 -4
package/SPEC.md CHANGED
@@ -15,8 +15,8 @@ that thread a value through subscribers in priority order).
15
15
 
16
16
  **Every `<tag>` the model sees is a plugin.** The `<known>` section
17
17
  of the system message is rendered by the known plugin. The `<progress>`
18
- section is rendered by the progress plugin. The `<ask>` tag is rendered
19
- by the prompt plugin. No monolithic assembler decides what goes where.
18
+ section is rendered by the progress plugin. The `<prompt>` tag is
19
+ rendered by the prompt plugin. No monolithic assembler decides what goes where.
20
20
  Each plugin filters for its own data from the shared row set, renders
21
21
  its section, and returns.
22
22
 
@@ -42,7 +42,8 @@ body, attributes, and state.
42
42
 
43
43
  ```sql
44
44
  known_entries (
45
- id, run_id, turn, path, body, scheme, state, hash,
45
+ id, run_id, loop_id, turn, path, body, scheme,
46
+ status INTEGER, fidelity TEXT, hash,
46
47
  attributes, tokens, tokens_full, refs, write_count,
47
48
  created_at, updated_at
48
49
  )
@@ -50,58 +51,63 @@ known_entries (
50
51
 
51
52
  | Column | Purpose |
52
53
  |--------|---------|
53
- | `path` | Entry identity. Bare paths (`src/app.js`) or URIs (`known://auth`) |
54
+ | `path` | Entry identity. Bare paths (`src/app.js`) or URIs (`known://auth`). Max 2048 chars. |
54
55
  | `body` | Tag body text. File content, tool output, skill docs. |
55
56
  | `attributes` | Tag attributes as JSON. Handler-private workspace. `CHECK (json_valid)` |
56
57
  | `scheme` | Generated from path via `schemeOf()`. Drives dispatch and view routing |
57
- | `state` | Lifecycle stage. Determines model visibility |
58
+ | `status` | HTTP status code (200, 202, 400, 413, etc.) |
59
+ | `fidelity` | Visibility level: full, summary, index, archive |
58
60
  | `hash` | SHA-256 for file change detection |
59
- | `tokens` | Context cost at current state |
61
+ | `tokens` | Display-only token count at current fidelity. NEVER used for budget. |
60
62
  | `tokens_full` | Cost of raw body at full fidelity |
61
63
  | `turn` | Freshness — when was this entry last touched |
62
64
 
63
- ### 1.2 Schemes & States
65
+ ### 1.2 Schemes, Status & Fidelity
64
66
 
65
- Paths use URI scheme syntax. Bare paths (no `://`) are files.
66
-
67
- **Files** (`scheme IS NULL`):
68
-
69
- | State | Model sees |
70
- |-------|-----------|
71
- | `full` | File content in code fence |
72
- | `index` | Path listed in File Index |
73
- | `stored` | Invisible, retrievable via `<get>` |
74
-
75
- **Knowledge** (`known://`, `unknown://`):
76
-
77
- | State | Model sees |
78
- |-------|-----------|
79
- | `full` | Key — value in bullet list |
80
- | `stored` | Key listed, no value |
81
-
82
- **Tool results** (`set://`, `sh://`, `env://`, `rm://`, `ask_user://`,
83
- `mv://`, `cp://`, `search://`, `get://`, `store://`):
84
-
85
- All start at `full` state when recorded. Handlers set the final state:
86
- `proposed`, `pass`, `rejected`, `error`, `pattern`, `read`, `stored`, `info`.
67
+ Every entry has two independent dimensions: **status** (HTTP integer)
68
+ and **fidelity** (visibility level). These are separate concerns.
87
69
 
88
- **Skills** (`skill://`): `full` or `stored`. Rendered in system message.
70
+ **Status** (lifecycle): 200 (OK), 202 (proposed), 400 (bad request),
71
+ 404 (not found), 409 (conflict), 413 (too large), 499 (aborted),
72
+ 500 (error).
89
73
 
90
- **Tools** (`tool://`): `full`, `model_visible = 0`. Internal plugin metadata.
74
+ **Fidelity** (visibility): `full` (body visible), `summary`
75
+ (model-authored summary), `index` (path only), `archive` (invisible,
76
+ retrievable via `<get>`).
91
77
 
92
- **URLs** (`http://`, `https://`): `full`, `summary`, `stored`.
93
-
94
- **Structural** (`summarize://`, `update://`): Status signals.
95
-
96
- **Audit** (`system://`, `prompt://`, `ask://`, `act://`, `progress://`,
97
- `reasoning://`, `model://`, `error://`, `user://`, `assistant://`,
98
- `content://`): `info` state, `model_visible = 0` (hidden from model).
99
-
100
- ### 1.3 State Validation
78
+ Paths use URI scheme syntax. Bare paths (no `://`) are files.
101
79
 
102
- The `schemes` table is a bootstrap registry — 30 rows of static config.
103
- INSERT/UPDATE triggers validate state against `schemes.valid_states`.
104
- Plugins cannot bypass this (circular dependency prevents schemes as entries).
80
+ Every entry plays one of four roles:
81
+
82
+ | Role | Category | Section | Description |
83
+ |------|----------|---------|-------------|
84
+ | **Data** | `data` | `<knowns>` | Entries the model works with — persistent state |
85
+ | **Logging** | `logging` | `<current>`/`<previous>` | Records of what happened — tool results, lifecycle signals |
86
+ | **Unknowns** | `unknown` | `<unknowns>` | Open questions the model is tracking |
87
+ | **Prompt** | `prompt` | `<prompt>` | The task driving the loop |
88
+
89
+ `logging` is the default category. Plugins opt into `data` explicitly.
90
+
91
+ | Scheme | Category | Description |
92
+ |--------|----------|-------------|
93
+ | `NULL` (bare path) | data | File content. JOINs via `COALESCE(scheme, 'file')`. `file://` prefix stripped by hedberg. |
94
+ | `known://` | data | Model-registered knowledge. One fact per entry. |
95
+ | `skill://` | data | Skill docs. Rendered in system message. |
96
+ | `http://`, `https://` | data | Web content. |
97
+ | `unknown://` | unknown | Unresolved questions. |
98
+ | `prompt://` | prompt | User prompt with `mode` attribute (`ask`/`act`). |
99
+ | `progress://` | prompt | Continuation prompt. |
100
+ | `set://`, `get://`, `sh://`, `env://`, `rm://`, `mv://`, `cp://`, `ask_user://`, `search://` | logging | Tool result entries. |
101
+ | `summarize://`, `update://` | logging | Lifecycle signals. |
102
+ | `tool://` | audit | Internal plugin metadata. `model_visible = 0`. |
103
+ | `system://`, `reasoning://`, `model://`, `error://`, `user://`, `assistant://`, `content://` | audit | Audit entries. `model_visible = 0`. |
104
+
105
+ ### 1.3 Scheme Registry
106
+
107
+ The `schemes` table is a bootstrap registry — static rows of
108
+ `(name, model_visible, category)`. Plugins register their scheme
109
+ via `core.registerScheme()` in the constructor. The `model_visible`
110
+ flag controls whether entries appear in `v_model_context`.
105
111
 
106
112
  ### 1.4 UPSERT Semantics
107
113
 
@@ -117,13 +123,21 @@ The K/V store is the memory. Relational tables are the skeleton.
117
123
  ```sql
118
124
  projects (id, name UNIQUE, project_root, config_path, created_at)
119
125
  models (id, alias UNIQUE, actual, context_length, created_at)
120
- runs (id, project_id, parent_run_id, model, alias UNIQUE, status,
121
- temperature, persona, context_limit, next_turn, created_at)
122
- turns (id, run_id, sequence, prompt_tokens, completion_tokens,
123
- total_tokens, cost, created_at)
126
+ runs (id, project_id, parent_run_id, model, alias UNIQUE,
127
+ status INTEGER, temperature, persona, context_limit,
128
+ next_turn, next_loop, created_at)
129
+ loops (id, run_id, sequence, mode, model, prompt, status INTEGER,
130
+ config JSON, result JSON, created_at)
131
+ turns (id, run_id, loop_id, sequence, context_tokens,
132
+ reasoning_content, prompt_tokens, cached_tokens,
133
+ completion_tokens, reasoning_tokens, total_tokens, cost,
134
+ created_at)
124
135
 
125
136
  file_constraints (id, project_id, pattern, visibility, created_at)
126
- prompt_queue (id, run_id, mode, model, prompt, config, status, result)
137
+ -- Project-level config. NOT tool dispatch. See §2.3.
138
+ turn_context (id, run_id, loop_id, turn, ordinal, path, scheme,
139
+ status, fidelity, body, tokens, attributes,
140
+ category, source_turn)
127
141
  rpc_log (id, project_id, method, rpc_id, params, result, error)
128
142
  ```
129
143
 
@@ -136,19 +150,39 @@ client picks for every run.
136
150
 
137
151
  ### 2.1 Run State Machine
138
152
 
153
+ All status fields are HTTP integer codes:
154
+
139
155
  ```
140
- queued → running → proposed → running → completed
141
- → completed
142
- → failed → running
143
- → aborted → running
156
+ 100 (queued)200 (running)202 (proposed)200 (running)200 (completed)
157
+ 200 (completed)
158
+ 500 (failed)200 (running)
159
+ 499 (aborted)200 (running)
144
160
  ```
145
161
 
146
162
  All terminal states allow transition back to `running`. Runs are long-lived.
147
163
 
148
- ### 2.2 Prompt Queue
164
+ ### 2.2 Loops Table
165
+
166
+ The loops table IS the prompt queue. Each `ask`/`act` creates a loop.
167
+ FIFO per run (ordered by sequence). One active at a time. Abort stops
168
+ the current loop; pending loops survive. Projects > runs > loops > turns.
149
169
 
150
- All prompts flow through `prompt_queue`. FIFO per run. One active at a time.
151
- Abort stops the current prompt; pending prompts survive.
170
+ ### 2.3 File Constraints
171
+
172
+ The `file_constraints` table is project-level configuration — it
173
+ defines which files a project cares about. This is backbone, not tool
174
+ dispatch. Constraints have three visibilities: `active` (promoted to
175
+ full), `readonly` (promoted but not editable), `ignore` (demoted).
176
+
177
+ **Boundary:** Setting a constraint (`File.setConstraint`) is a
178
+ project-config write. Promoting/demoting the matching entries is tool
179
+ dispatch that goes through the handler chain with budget enforcement.
180
+ These are separate operations: constraint persists across runs, entry
181
+ promotion is scoped to a run and subject to the same budget rules as
182
+ a model `<get>`.
183
+
184
+ `store` RPC manages constraints directly — it is not a model tool.
185
+ `get` RPC with `persist` sets the constraint AND dispatches promotion.
152
186
 
153
187
  ---
154
188
 
@@ -169,13 +203,17 @@ object is the same shape at every tier.
169
203
 
170
204
  | Method | Model | Client | Plugin |
171
205
  |--------|-------|--------|--------|
172
- | `get`, `set`, `rm`, `mv`, `cp`, `sh`, `env`, `store` | ✓ | ✓ | ✓ |
206
+ | `get`, `set`, `rm`, `mv`, `cp`, `sh`, `env`, `search` | ✓ | ✓ | ✓ |
173
207
  | `known`, `unknown`, `ask_user`, `summarize`, `update` | ✓ | ✓ | ✓ |
174
208
  | `ask`, `act`, `resolve`, `abort`, `startRun` | — | ✓ | ✓ |
175
209
  | `getRuns`, `getModels`, `getEntries` | — | ✓ | ✓ |
176
210
  | `on()`, `filter()`, db/store access | — | — | ✓ |
177
211
 
178
- Model tier restrictions enforced by mode (ask removes act-only tools).
212
+ Model tier restrictions enforced by unified `resolveForLoop(mode, flags)`.
213
+ Ask mode excludes `sh`. Flags: `noInteraction` excludes `ask_user`,
214
+ `noWeb` excludes `search`, `noBench` excludes `ask_user`/`env`/`sh`.
215
+ 13 model tools: get, set, known, unknown, env, sh, rm, cp, mv, search,
216
+ summarize, update, ask_user.
179
217
  Client tier requires project init. Plugin tier has no restrictions.
180
218
 
181
219
  ### 3.2 Dispatch Path
@@ -188,6 +226,14 @@ Client: JSON-RPC → { method, params } → #record() → dispatch(scheme, en
188
226
  Plugin: rummy.rm({ path }) → #record() → dispatch(scheme, entry, rummy)
189
227
  ```
190
228
 
229
+ **Lifecycle/action split:** Commands are classified as lifecycle signals
230
+ (`summarize`, `update`, `unknown`, `known`) or action commands (everything
231
+ else). Lifecycle signals always dispatch — they are state declarations that
232
+ cannot be 409'd by sequential dispatch. Action commands dispatch sequentially;
233
+ a 202 proposal or error aborts subsequent actions. If the model sends
234
+ `<summarize>` but actions in the same turn failed, the summarize is
235
+ overridden to an update (the model's assertion that it's done is false).
236
+
191
237
  ### 3.3 Plugin Convention
192
238
 
193
239
  A plugin is an instantiated class. The class name matches the file name.
@@ -247,9 +293,9 @@ Two messages per turn. System = stable truth. User = active task.
247
293
  [persona/]
248
294
  [skills/]
249
295
  [/instructions]
250
- <knowledge>
296
+ <knowns>
251
297
  ...entries sorted by fidelity (index, summary, full), then by scheme
252
- </knowledge>
298
+ </knowns>
253
299
  <previous>
254
300
  (pre-loop user prompt, model responses, agent warnings, and tools used, in order)
255
301
  </previous>
@@ -260,16 +306,14 @@ Two messages per turn. System = stable truth. User = active task.
260
306
  (current loop model responses, agent warnings, and tools used, in order)
261
307
  </current>
262
308
  <progress>the above actions have been performed on this user prompt:</progress>
263
- <ask tools="..." warn="...">user prompt</ask>
264
- — OR —
265
- <act tools="...">user prompt</act>
309
+ <prompt mode="ask|act" tools="...">user prompt</prompt>
266
310
  [/user]
267
311
  ```
268
312
 
269
313
  **System** contains everything the model needs to know.
270
314
  **User** contains everything the model needs to do.
271
315
 
272
- The `<ask>`/`<act>` tag is present on every turn — first turn and
316
+ The `<prompt>` tag is present on every turn — first turn and
273
317
  continuations alike. The model always sees its task. The active prompt
274
318
  is extracted from its chronological position and placed last for maximum
275
319
  recency. `<progress>` bridges the gap, narrating the causal relationship
@@ -287,7 +331,7 @@ first turn of the first loop.
287
331
 
288
332
  **Current** = the active loop's work so far. Model responses, tool
289
333
  results, agent warnings — in order. Does NOT include the user prompt
290
- (one per loop, extracted to `<ask>`/`<act>`). Lives in the user
334
+ (one per loop, extracted to `<prompt>`). Lives in the user
291
335
  message as immediate context. Empty on the first turn of a loop.
292
336
 
293
337
  When a new prompt arrives on an existing run, the prior loop's
@@ -313,7 +357,7 @@ text from body + attributes.
313
357
  Each turn:
314
358
 
315
359
  1. Write `instructions://system` (empty body, attributes = { persona })
316
- 2. Run plugin hooks (`onTurn`) — plugins modify entries before the model sees them
360
+ 2. Emit `turn.started` — plugins write prompt/progress/instructions entries
317
361
  3. Project `instructions://system` → instructions text
318
362
  4. Query `v_model_context` VIEW → visible entries
319
363
  5. Project each entry through its tool's `full`/`summary` projection
@@ -325,21 +369,76 @@ Each turn:
325
369
  8. Invoke `assembly.user` filter chain (empty string as base):
326
370
  - Current plugin (priority 100) → `<current>` section
327
371
  - Progress plugin (priority 200) → `<progress>` section
328
- - Prompt plugin (priority 300) → `<ask>`/`<act>` section
372
+ - Prompt plugin (priority 300) → `<prompt>` section
329
373
  9. Store as `system://N` and `user://N` audit entries
330
374
 
331
- The VIEW determines visibility. State IS fidelity:
375
+ The VIEW determines visibility from `fidelity` and `status`:
332
376
  - `full` → body visible
333
- - `summary` → body visible
377
+ - `summary` → summary visible (model-authored `summary` attribute if set)
334
378
  - `index` → path listed, no content
335
- - `stored` → invisible
336
- - `proposed` → invisible (pending client)
379
+ - `archive` → invisible (retrievable via `<get>`)
380
+ - `status = 202` → invisible (proposed, pending client)
337
381
  - `model_visible = 0` → invisible (audit, tool, instructions)
338
382
 
339
- ### 4.5 progress:// as Entry
383
+ Model controls fidelity via `<set>` attributes: `archive`, `summary`,
384
+ `index`, `full`. The `summary="..."` attribute attaches a description
385
+ (<= 80 chars) that persists across fidelity changes.
386
+
387
+ ### 4.5 Budget Enforcement
388
+
389
+ The model owns its context. The system enforces a hard ceiling and
390
+ provides advisory warnings — it does not automatically manage entries.
391
+
392
+ **Pre-LLM check:** The budget plugin measures `countTokens()` on the
393
+ assembled messages. If assembled tokens exceed `contextSize`, the turn
394
+ returns 413 without calling the LLM. This triggers panic mode (see
395
+ §4.6).
396
+
397
+ **Write-layer gate:** BudgetGuard on KnownStore gates every write
398
+ during dispatch. `upsert()`, `promoteByPattern()`, and
399
+ `updateBodyByPattern()` check token delta against remaining headroom.
400
+ Exceeding the budget throws `BudgetExceeded` — the tool 413s, the
401
+ guard trips, and all subsequent tools in the turn fail.
402
+
403
+ **Exemptions:** `status >= 400` entries (error results), `model_visible
404
+ = 0` entries (audit), `fidelity = "archive"` entries (not in context).
405
+
406
+ **Size gate:** Known entries exceeding 500 tokens are rejected with
407
+ 413, forcing atomic entries.
408
+
409
+ **Advisory warnings** (progress plugin):
410
+ - 50%: "You may free space by lowering the fidelity of entries"
411
+ - 75%: "YOU MUST free space... or the run will fail"
412
+
413
+ **Token math:** `Math.ceil(text.length / RUMMY_TOKEN_DIVISOR)`. One
414
+ formula, one file (`src/agent/tokens.js`), env-configurable. No
415
+ external dependencies. `contextSize` is the ceiling. Over = 413.
416
+ Under = 200. No margins.
340
417
 
341
- The continuation prompt is a `progress://N` entry. Plugins can modify its
342
- body before materialization.
418
+ ### 4.6 Panic Mode
419
+
420
+ When a new prompt arrives and the assembled context exceeds
421
+ `contextSize`, the system enters panic mode instead of failing to
422
+ the client.
423
+
424
+ 1. The failed loop is completed with 413 (audit trail)
425
+ 2. A panic loop is enqueued (`mode = "panic"`, `noRepo = true`)
426
+ 3. The original loop is re-enqueued to retry after panic
427
+ 4. The model receives a prompt with the exact shortfall in tokens
428
+ 5. Tools: get, set, known, unknown, rm, mv, cp, summarize, update
429
+ 6. Excluded: sh, env, search, ask_user
430
+
431
+ **Strike system:** Each turn without context reduction = 1 strike.
432
+ Any reduction resets the counter. 3 consecutive strikes = hard 413
433
+ to client. Unlimited turns as long as the model makes progress.
434
+
435
+ One panic attempt per drain cycle. If the retried original loop also
436
+ 413s, hard-fail to the client.
437
+
438
+ **`ToolRegistry.view()`** prepends `attributes.summary` above the
439
+ plugin's summary view output at summary fidelity. The model authors
440
+ summaries (<= 80 chars) via `<set summary="...">`. Summaries persist
441
+ across fidelity changes.
343
442
 
344
443
  ---
345
444
 
@@ -369,22 +468,25 @@ JSON-RPC 2.0 over WebSocket. `discover` returns the live catalog.
369
468
 
370
469
  | Method | Params |
371
470
  |--------|--------|
372
- | `read` | `{ path, run?, persist?, readonly? }` |
471
+ | `get` | `{ path, run, persist?, readonly? }` |
472
+ | `set` | `{ run, path, body?, attributes? }` |
473
+ | `rm` | `{ run, path }` |
474
+ | `mv` | `{ run, path, to }` |
475
+ | `cp` | `{ run, path, to }` |
373
476
  | `store` | `{ path, run?, persist?, ignore?, clear? }` |
374
- | `write` | `{ run, path, body?, state?, attributes? }` |
375
- | `delete` | `{ run, path }` |
376
477
  | `getEntries` | `{ pattern?, body?, run?, limit?, offset? }` |
377
478
 
378
- `persist` creates a project-level file constraint (operator privilege).
379
- Without `persist`, operations dispatch through the handler chain.
479
+ All entry operations dispatch through the handler chain. `persist`
480
+ on `get` also sets a project-level file constraint (operator privilege).
481
+ `store` manages file constraints — not a model tool.
380
482
 
381
483
  #### Runs
382
484
 
383
485
  | Method | Params |
384
486
  |--------|--------|
385
487
  | `startRun` | `{ model, temperature?, persona?, contextLimit? }` |
386
- | `ask` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noContext?, fork? }` |
387
- | `act` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noContext?, fork? }` |
488
+ | `ask` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noRepo?, noInteraction?, noWeb?, fork? }` |
489
+ | `act` | `{ prompt, model, run?, temperature?, persona?, contextLimit?, noRepo?, noInteraction?, noWeb?, fork? }` |
388
490
  | `run/resolve` | `{ run, resolution: { path, action, output? } }` |
389
491
  | `run/abort` | `{ run }` |
390
492
  | `run/rename` | `{ run, name }` |
@@ -392,6 +494,10 @@ Without `persist`, operations dispatch through the handler chain.
392
494
  | `run/config` | `{ run, temperature?, persona?, contextLimit?, model? }` |
393
495
 
394
496
  `model` is required on `ask`, `act`, and `startRun`. No default.
497
+ `noRepo` disables default project/repo file scanning (files can still
498
+ be added explicitly by the client).
499
+ `noInteraction` removes `ask_user` from the tool list.
500
+ `noWeb` removes `search` from the tool list.
395
501
 
396
502
  #### Queries
397
503
 
@@ -445,7 +551,80 @@ Each plugin has its own README at `src/plugins/{name}/README.md`.
445
551
 
446
552
  ---
447
553
 
448
- ## 7. Hedberg Editing Syntax
554
+ ## 7. Tool Documentation Design
555
+
556
+ Tool docs are the most carefully designed text in rummy. Every line
557
+ simultaneously teaches syntax, implies workflow priority, demonstrates
558
+ pattern capabilities, and constrains misuse. Each letter earns its place.
559
+
560
+ ### Principles
561
+
562
+ **Show, don't tell.** Examples ARE the documentation. A model learns
563
+ `<get path="known://*">auth</get>` from seeing it, not from being told
564
+ "you can filter known entries by keyword." Examples are ordered from
565
+ simple to powerful — weak models learn from examples 1-2, strong models
566
+ pick up the pattern from example 3.
567
+
568
+ **Lifecycle continuity.** Examples weave stories across tools. The get
569
+ docs end with `<set path="..." fidelity="index"/>`. The known docs
570
+ reference `<get path="known://*">keyword</get>` for recall and
571
+ `<set path="known://..." archive/>` for archiving. The unknown docs
572
+ reference `<get/>` for investigation and `<rm/>` for cleanup. A model
573
+ reading the full tool docs encounters a coherent workflow:
574
+ discover → load → reason → edit → archive → recall.
575
+
576
+ **RFC 2119 semantics.** Constraint bullets use YOU MUST, YOU MUST NOT,
577
+ YOU SHOULD, YOU MAY from RFC 2119. Every LLM has extensive pretraining
578
+ on RFC documents where these keywords carry precise semantic weight.
579
+ MUST is absolute. SHOULD is strong advisory. MAY is permissive. This
580
+ is not decorative — it's leveraging the model's existing understanding
581
+ of requirement levels.
582
+
583
+ **Consistent structure.** Every tool doc follows: header (syntax), 2+
584
+ examples, 2+ constraint bullets. Inconsistent formatting reads as
585
+ inconsistent importance. A tool with 5 examples and dense bullets feels
586
+ complex; a tool with 1 line feels disposable. Both are wrong — every
587
+ tool is equally real, each doc is proportional to the tool's surface area.
588
+
589
+ ### Format
590
+
591
+ Tool docs live in `*Doc.js` files as annotated line arrays:
592
+
593
+ ```js
594
+ const LINES = [
595
+ ["* Body text filters results by content match",
596
+ "Generalizes examples 2-3. Body = filter, not just path."],
597
+ ];
598
+ export default LINES.map(([text]) => text).join("\n");
599
+ ```
600
+
601
+ The first element is the model-facing text. The second is the rationale —
602
+ visible only in source. Changing any line requires reading all rationales
603
+ first. This prevents well-intentioned edits from breaking subtle behavioral
604
+ guarantees that adjacent lines depend on.
605
+
606
+ ### Tool Display Order
607
+
608
+ Tools are presented gather → reason → act → communicate. Position in
609
+ the list implies priority. `get` is first. `ask_user` is last. The
610
+ order is defined in `ToolRegistry.TOOL_ORDER` and applied by
611
+ `resolveForLoop()`. The same method handles all tool exclusions —
612
+ mode restrictions, `noInteraction`, `noWeb`, `noBench` — through
613
+ one unified mechanism.
614
+
615
+ ### Pattern Distribution
616
+
617
+ Hedbergian pattern matching (globs, body filters, preview) is taught
618
+ across multiple tools, not concentrated in one. `get` shows content
619
+ filtering. `cp` shows glob batch operations. `rm` shows preview safety.
620
+ Each tool reinforces the pattern vocabulary from a different angle.
621
+ A model that sees `path="known://*"` in get, `path="known://plan_*"` in
622
+ cp, and `path="known://temp_*" preview` in rm learns that patterns
623
+ are universal — not a feature of any single tool.
624
+
625
+ ---
626
+
627
+ ## 8. Hedberg Editing Syntax
449
628
 
450
629
  The model picks its preferred edit format. The parser understands all of them:
451
630
 
@@ -460,26 +639,36 @@ The model picks its preferred edit format. The parser understands all of them:
460
639
 
461
640
  ---
462
641
 
463
- ## 8. Response Healing
642
+ ## 9. Response Healing
464
643
 
465
- The server never throws on model output. Recovery order:
644
+ The server never throws on model output. "Model behavior" is never an
645
+ acceptable explanation. Recovery order:
466
646
 
467
647
  1. Can we recover? Extract the data and continue.
468
648
  2. Can we warn? Log structured warnings.
469
649
  3. Did our structure cause this? Check formatting, prompts.
470
- 4. Model drift is the LAST answer.
471
650
 
472
651
  Termination protocol:
473
652
  - `<summarize>` → run terminates
653
+ - `<summarize>` + failed actions → overridden to `<update>` (continue)
474
654
  - `<update>` → run continues
475
- - Both → summarize wins
476
- - Neither + tools → stall counter
655
+ - Both → update wins (if the model can't decide, it's not done)
656
+ - Neither + investigation tools → stall counter (RUMMY_MAX_STALLS)
657
+ - Neither + action-only tools → healed to summarize
477
658
  - Neither + plain text → healed to summarize
478
- - Repeated commands → loop detection
659
+ - Repeated commands → loop detection (RUMMY_MAX_REPETITIONS)
660
+ - Repeated update text → stall (RUMMY_MAX_UPDATE_REPEATS)
661
+
662
+ Format normalization:
663
+ - Gemma `\`\`\`tool_code` fences → stripped before parsing
664
+ - Qwen `<|tool_call>` format → normalized to XML
665
+ - OpenAI function_call JSON → normalized to XML
666
+ - Mistral `[TOOL_CALLS]` → normalized to XML
667
+ - Sed alternate delimiters (`s|old|new|`) → parsed like `s/old/new/`
479
668
 
480
669
  ---
481
670
 
482
- ## 9. Testing
671
+ ## 10. Testing
483
672
 
484
673
  | Tier | Location | LLM? |
485
674
  |------|----------|------|
@@ -493,12 +682,12 @@ E2E tests must NEVER mock the LLM. Environment cascade:
493
682
 
494
683
  ---
495
684
 
496
- ## 10. SQL Functions
685
+ ## 11. SQL Functions
497
686
 
498
687
  | Function | Purpose |
499
688
  |----------|---------|
500
689
  | `schemeOf(path)` | Extract URI scheme |
501
- | `countTokens(text)` | Token count (tiktoken o200k_base, `ceil(len/4)` fallback) |
690
+ | `countTokens(text)` | Token count (`ceil(len / RUMMY_TOKEN_DIVISOR)`) |
502
691
  | `hedmatch(pattern, string)` | Full-string pattern match (paths, equality) |
503
692
  | `hedsearch(pattern, string)` | Substring pattern search (content filtering) |
504
693
  | `hedreplace(pattern, replacement, string)` | Pattern-based replacement |
@@ -508,15 +697,17 @@ See [PLUGINS.md](PLUGINS.md) for the hedberg pattern type reference.
508
697
 
509
698
  ---
510
699
 
511
- ## 11. Configuration
700
+ ## 12. Configuration
512
701
 
513
702
  ```env
514
703
  RUMMY_HOME=~/.rummy
515
- RUMMY_MAX_TURNS=15
704
+ RUMMY_TOKEN_DIVISOR=2
705
+ RUMMY_MAX_TURNS=99
516
706
  RUMMY_MAX_STALLS=3
517
707
  RUMMY_MAX_REPETITIONS=3
708
+ RUMMY_MAX_UPDATE_REPEATS=3
518
709
  RUMMY_RETENTION_DAYS=31
519
- RUMMY_TEMPERATURE=0.7
710
+ RUMMY_TEMPERATURE=0.5
520
711
  RUMMY_DEBUG=false
521
712
  ```
522
713