@possumtech/rummy 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +12 -7
- package/BENCH_ENVIRONMENT.md +230 -0
- package/CLIENT_INTERFACE.md +396 -0
- package/PLUGINS.md +93 -1
- package/SPEC.md +305 -28
- package/bin/postinstall.js +2 -2
- package/bin/rummy.js +2 -2
- package/last_run.txt +5617 -0
- package/migrations/001_initial_schema.sql +2 -1
- package/package.json +6 -2
- package/scriptify/cache_probe.js +66 -0
- package/scriptify/cache_probe_grok.js +74 -0
- package/service.js +22 -11
- package/src/agent/AgentLoop.js +33 -139
- package/src/agent/ContextAssembler.js +2 -9
- package/src/agent/Entries.js +36 -101
- package/src/agent/ProjectAgent.js +2 -9
- package/src/agent/TurnExecutor.js +45 -83
- package/src/agent/XmlParser.js +247 -273
- package/src/agent/budget.js +5 -28
- package/src/agent/config.js +38 -0
- package/src/agent/errors.js +7 -13
- package/src/agent/httpStatus.js +1 -19
- package/src/agent/known_store.sql +7 -2
- package/src/agent/materializeContext.js +12 -17
- package/src/agent/pathEncode.js +5 -0
- package/src/agent/rummyHome.js +9 -0
- package/src/agent/runs.sql +18 -0
- package/src/agent/tokens.js +2 -8
- package/src/hooks/HookRegistry.js +1 -16
- package/src/hooks/Hooks.js +8 -33
- package/src/hooks/PluginContext.js +3 -21
- package/src/hooks/RpcRegistry.js +1 -4
- package/src/hooks/RummyContext.js +2 -16
- package/src/hooks/ToolRegistry.js +5 -15
- package/src/llm/LlmProvider.js +28 -23
- package/src/llm/errors.js +41 -4
- package/src/llm/openaiStream.js +125 -0
- package/src/llm/retry.js +61 -15
- package/src/plugins/budget/budget.js +14 -81
- package/src/plugins/cli/README.md +87 -0
- package/src/plugins/cli/bin.js +61 -0
- package/src/plugins/cli/cli.js +120 -0
- package/src/plugins/env/README.md +2 -1
- package/src/plugins/env/env.js +4 -6
- package/src/plugins/env/envDoc.md +2 -2
- package/src/plugins/error/error.js +23 -23
- package/src/plugins/file/file.js +2 -22
- package/src/plugins/get/get.js +12 -34
- package/src/plugins/get/getDoc.md +5 -3
- package/src/plugins/hedberg/edits.js +1 -11
- package/src/plugins/hedberg/hedberg.js +3 -26
- package/src/plugins/hedberg/normalize.js +1 -5
- package/src/plugins/hedberg/patterns.js +4 -15
- package/src/plugins/hedberg/sed.js +1 -7
- package/src/plugins/helpers.js +28 -20
- package/src/plugins/index.js +25 -41
- package/src/plugins/instructions/README.md +18 -0
- package/src/plugins/instructions/instructions.js +13 -76
- package/src/plugins/instructions/instructions.md +19 -18
- package/src/plugins/instructions/instructions_104.md +5 -4
- package/src/plugins/instructions/instructions_105.md +16 -15
- package/src/plugins/instructions/instructions_106.md +15 -14
- package/src/plugins/instructions/instructions_107.md +13 -6
- package/src/plugins/known/README.md +26 -6
- package/src/plugins/known/known.js +36 -34
- package/src/plugins/log/README.md +2 -2
- package/src/plugins/log/log.js +6 -33
- package/src/plugins/ollama/ollama.js +50 -66
- package/src/plugins/openai/openai.js +26 -44
- package/src/plugins/openrouter/openrouter.js +28 -52
- package/src/plugins/policy/README.md +8 -2
- package/src/plugins/policy/policy.js +8 -21
- package/src/plugins/prompt/README.md +22 -0
- package/src/plugins/prompt/prompt.js +8 -16
- package/src/plugins/rm/rm.js +5 -2
- package/src/plugins/rm/rmDoc.md +4 -4
- package/src/plugins/rpc/README.md +2 -1
- package/src/plugins/rpc/rpc.js +51 -47
- package/src/plugins/set/README.md +5 -1
- package/src/plugins/set/set.js +23 -33
- package/src/plugins/set/setDoc.md +1 -1
- package/src/plugins/sh/README.md +2 -1
- package/src/plugins/sh/sh.js +5 -11
- package/src/plugins/sh/shDoc.md +2 -2
- package/src/plugins/stream/README.md +6 -5
- package/src/plugins/stream/stream.js +6 -35
- package/src/plugins/telemetry/telemetry.js +26 -19
- package/src/plugins/think/think.js +4 -7
- package/src/plugins/unknown/unknown.js +8 -13
- package/src/plugins/update/update.js +36 -35
- package/src/plugins/update/updateDoc.md +3 -3
- package/src/plugins/xai/xai.js +30 -20
- package/src/plugins/yolo/yolo.js +8 -41
- package/src/server/ClientConnection.js +17 -47
- package/src/server/SocketServer.js +14 -14
- package/src/server/protocol.js +1 -10
- package/src/sql/functions/slugify.js +5 -7
- package/src/sql/v_model_context.sql +4 -11
- package/turns/cli_1777462658211/turn_001.txt +772 -0
- package/turns/cli_1777462658211/turn_002.txt +606 -0
- package/turns/cli_1777462658211/turn_003.txt +667 -0
- package/turns/cli_1777462658211/turn_004.txt +297 -0
- package/turns/cli_1777462658211/turn_005.txt +301 -0
- package/turns/cli_1777462658211/turn_006.txt +262 -0
- package/turns/cli_1777465095132/turn_001.txt +715 -0
- package/turns/cli_1777465095132/turn_002.txt +236 -0
- package/turns/cli_1777465095132/turn_003.txt +287 -0
- package/turns/cli_1777465095132/turn_004.txt +694 -0
- package/turns/cli_1777465095132/turn_005.txt +422 -0
- package/turns/cli_1777465095132/turn_006.txt +365 -0
- package/turns/cli_1777465095132/turn_007.txt +885 -0
- package/turns/cli_1777465095132/turn_008.txt +1277 -0
- package/turns/cli_1777465095132/turn_009.txt +736 -0
package/PLUGINS.md
CHANGED
|
@@ -241,11 +241,49 @@ ctx = {
|
|
|
241
241
|
toolSet, // Set<string> of active tool names for this loop
|
|
242
242
|
contextSize, // Model context window size
|
|
243
243
|
lastContextTokens, // Actual API tokens from the prior turn (0 on turn 1)
|
|
244
|
-
demoted, // Mutable array — plugins push paths they summarized
|
|
245
244
|
turn, // Current turn number
|
|
246
245
|
}
|
|
247
246
|
```
|
|
248
247
|
|
|
248
|
+
#### Filter Priority Bands {#plugins_filter_bands}
|
|
249
|
+
|
|
250
|
+
Filters run in ascending priority order. The packet renders in
|
|
251
|
+
top-to-bottom order matching that — lower priority appears earlier in
|
|
252
|
+
the message. Current `assembly.user` registrations:
|
|
253
|
+
|
|
254
|
+
| Priority | Block | Plugin | Mutates per turn? |
|
|
255
|
+
|---|---|---|---|
|
|
256
|
+
| 50 | `<summarized>` | `known.js` | Slow — only on new entry |
|
|
257
|
+
| 75 | `<visible>` | `known.js` | Fast — on every promote/demote |
|
|
258
|
+
| 100 | `<log>` | `log.js` | Always — appends per action |
|
|
259
|
+
| 200 | `<unknowns>` | `unknown.js` | On unknown lifecycle |
|
|
260
|
+
| 250 | `<instructions>` | `instructions.js` | On phase transition |
|
|
261
|
+
| 275 | `<budget>` | `budget.js` | Every turn (live) |
|
|
262
|
+
| 300 | `<prompt>` | `prompt.js` | Stable within a loop |
|
|
263
|
+
|
|
264
|
+
**Recommended ranges for new plugins** (for cache-friendly placement
|
|
265
|
+
and predictable rendering position):
|
|
266
|
+
|
|
267
|
+
| Range | Position | Use for |
|
|
268
|
+
|---|---|---|
|
|
269
|
+
| `0–49` | Top of user | Reserved (stable identity-tier blocks above `<summarized>`) |
|
|
270
|
+
| `50–99` | Codebase data surface | Don't add here — owned by `known.js` |
|
|
271
|
+
| `100–149` | History tier | Action history, timeline-style content |
|
|
272
|
+
| `150–199` | Open slot | Inter-history blocks (e.g. recent-decisions, tracked progress) |
|
|
273
|
+
| `200–249` | State tier | Model state (open questions, work-in-progress) |
|
|
274
|
+
| `250–299` | Phase + budget | Avoid; current phase / budget arithmetic owned here |
|
|
275
|
+
| `300–349` | Task | Reserved for prompt-tier content |
|
|
276
|
+
| `350–999` | Bottom | Append-after-prompt content (rare; usually wrong) |
|
|
277
|
+
|
|
278
|
+
Within a band, lower priority = renders higher. Pick the smallest
|
|
279
|
+
priority that lands you in the right band and leaves room above and
|
|
280
|
+
below.
|
|
281
|
+
|
|
282
|
+
`assembly.system` currently has no registrations — system message is
|
|
283
|
+
the static identity surface (instructions base + tool docs). Adding
|
|
284
|
+
to `assembly.system` invalidates the system-prefix cache on whatever
|
|
285
|
+
provider you target; reserve for content that's truly stable per-run.
|
|
286
|
+
|
|
249
287
|
### Tool Docs {#plugins_tool_docs}
|
|
250
288
|
|
|
251
289
|
Each tool plugin has a `*Doc.js` file with annotated line arrays.
|
|
@@ -282,6 +320,60 @@ entry = {
|
|
|
282
320
|
Multiple handlers per scheme. Lower priority runs first. Return
|
|
283
321
|
`false` to stop the chain.
|
|
284
322
|
|
|
323
|
+
#### Reporting outcomes {#plugins_handler_outcomes}
|
|
324
|
+
|
|
325
|
+
**The action entry IS its outcome.** Your handler finalizes the action's
|
|
326
|
+
own log entry at `entry.resultPath`. Success and failure are two values
|
|
327
|
+
of the same shape — body, state, outcome. The model sees both through
|
|
328
|
+
the same channel under your tool's scheme:
|
|
329
|
+
|
|
330
|
+
```js
|
|
331
|
+
async handler(entry, rummy) {
|
|
332
|
+
const { entries: store, runId, turn, loopId } = rummy;
|
|
333
|
+
const result = await runMyTool(entry.attributes);
|
|
334
|
+
|
|
335
|
+
if (result.failed) {
|
|
336
|
+
await store.set({
|
|
337
|
+
runId, turn, loopId,
|
|
338
|
+
path: entry.resultPath,
|
|
339
|
+
body: result.failureMessage,
|
|
340
|
+
state: "failed",
|
|
341
|
+
outcome: result.label, // "not_found", "validation", etc.
|
|
342
|
+
});
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
await store.set({
|
|
347
|
+
runId, turn, loopId,
|
|
348
|
+
path: entry.resultPath,
|
|
349
|
+
body: result.output,
|
|
350
|
+
state: "resolved",
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
That's the whole failure-reporting surface. Body is the result on
|
|
356
|
+
success, the failure message on failure. State labels the verdict
|
|
357
|
+
(`resolved` / `failed`). Outcome is a short machine-readable label.
|
|
358
|
+
|
|
359
|
+
The framework reads the post-handler state of every recorded entry
|
|
360
|
+
each turn; any `state="failed"` result counts as a strike toward
|
|
361
|
+
`MAX_STRIKES`. You don't need to do anything else to make the strike
|
|
362
|
+
fire — write the entry's outcome and the framework follows.
|
|
363
|
+
|
|
364
|
+
You do **not** call `hooks.error.log.emit` from a tool handler. That
|
|
365
|
+
hook is reserved for the framework's actionless-failure cases (parser
|
|
366
|
+
warnings, dispatch crashes, runtime watchdog, budget overflow) — none
|
|
367
|
+
of which a third-party plugin should be writing.
|
|
368
|
+
|
|
369
|
+
If your handler throws, the framework catches and emits a status-500
|
|
370
|
+
error entry on your behalf. That's the one case where the framework
|
|
371
|
+
writes for you. Throw with intent; don't try-catch your own handler
|
|
372
|
+
just to avoid a stack trace.
|
|
373
|
+
|
|
374
|
+
See SPEC [failure_reporting](SPEC.md#failure_reporting) for the
|
|
375
|
+
full contract and the rationale.
|
|
376
|
+
|
|
285
377
|
### full(entry) / summary(entry) {#plugins_views}
|
|
286
378
|
|
|
287
379
|
Returns the string the model sees for this tool's entries at the
|
package/SPEC.md
CHANGED
|
@@ -7,6 +7,31 @@ everything else.
|
|
|
7
7
|
|
|
8
8
|
---
|
|
9
9
|
|
|
10
|
+
## Glossary {#glossary}
|
|
11
|
+
|
|
12
|
+
Canonical meanings. When a doc, comment, test name, or commit message
|
|
13
|
+
uses one of these words, it should mean exactly what's written here.
|
|
14
|
+
|
|
15
|
+
| Term | Meaning |
|
|
16
|
+
|---|---|
|
|
17
|
+
| **run** | The alias-keyed lifetime of one project-agent invocation. Begins on `set run://{alias}` with a prompt; ends at terminal status (200/204/422/499/500). One run per alias; aliases are unique per project. |
|
|
18
|
+
| **loop** | One `ask` or `act` invocation and all its continuation turns until terminal `<update>`, abandonment, or abort. A run can contain multiple loops if a fresh prompt arrives on an existing run. |
|
|
19
|
+
| **turn** | One round-trip with the LLM: one assembled prompt sent, one response parsed. A loop is a sequence of turns. |
|
|
20
|
+
| **mode** | `ask` (read-only — no proposals, no `<sh>`, no edits) or `act` (full tool surface). Per loop, set at the entry point. |
|
|
21
|
+
| **phase** | (Primary, FCRM sense.) One of five FCRM states selected by `<update status="1XY">`: 104=Definition, 105=Discovery, 106=Demotion, 107=Deployment, 108=Verification. Maps to `instructions_10N.md` rendered in `<instructions>`. **The model-facing instructions call these "stages"** — same concept, dual vocabulary kept for the model's surface stability. Two non-FCRM uses of "phase" coexist in the codebase and AGENTS.md: (1) "two-phase turn execution" refers to RECORD→DISPATCH within a single turn; (2) AGENTS.md "Phase 1 / Phase 2 / ..." entries refer to project-development milestones (Schema, Primitives, etc.) — neither is the FCRM phase. Context disambiguates; if it doesn't, it's a doc bug. |
|
|
22
|
+
| **stage** | Model-facing synonym for **phase**. Lives in `instructions_*.md` and tooldocs. |
|
|
23
|
+
| **proposal** | A tool-call entry at status 202 awaiting client resolution (accept/reject). Side-effecting actions (`<sh>`, `<env>`, file `<set>`, file `<rm>`/`<mv>`/`<cp>`, `<ask_user>`) emit proposals. YOLO mode auto-accepts. |
|
|
24
|
+
| **verdict** | The end-of-turn ruling from `hooks.error.verdict` (owned by the error plugin). Returns `{continue, status, reason}`. Decides whether the loop continues to another turn or terminates. |
|
|
25
|
+
| **strike** | A turn whose verdict counts toward `MAX_STRIKES`. A strike fires when `turnErrors > 0` (any `error.log` entry that turn) or when cycle detection trips silently. The streak counter resets on a clean turn (no errors, no cycle); reaches `MAX_STRIKES` → loop abandons at 499. |
|
|
26
|
+
| **resolution** | Client's accept/reject of a proposal via `run/resolve` RPC. |
|
|
27
|
+
| **dispatch** | The DISPATCH phase of a turn — actually executing recorded action entries. |
|
|
28
|
+
|
|
29
|
+
**Hierarchy:** project ⊃ run ⊃ loop ⊃ turn. A turn is the smallest
|
|
30
|
+
unit of model interaction. A strike is a per-turn property that
|
|
31
|
+
accumulates across turns within a loop.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
10
35
|
## The Contract
|
|
11
36
|
|
|
12
37
|
Rummy has one contract. Every actor speaks it.
|
|
@@ -227,7 +252,7 @@ Every entry plays one of four roles:
|
|
|
227
252
|
|
|
228
253
|
| Role | Category | Section | Description |
|
|
229
254
|
|------|----------|---------|-------------|
|
|
230
|
-
| **Data** | `data` | `<
|
|
255
|
+
| **Data** | `data` | `<summarized>` + `<visible>` | Entries the model works with — persistent state and captured payload. Summary line in `<summarized>` for visible+summarized tiers; full body in `<visible>` only when promoted. |
|
|
231
256
|
| **Logging** | `logging` | `<log>` | Records of what happened — tool results, lifecycle signals |
|
|
232
257
|
| **Unknowns** | `unknown` | `<unknowns>` | Open questions the model is tracking |
|
|
233
258
|
| **Prompt** | `prompt` | `<prompt>` | The task driving the loop |
|
|
@@ -262,10 +287,11 @@ across two namespaces as a direct consequence:
|
|
|
262
287
|
scheme=`log`, category=`logging`. Renders in `<log>`.
|
|
263
288
|
- **Payload channels** live in `{action}://turn_N/{slug}_N` —
|
|
264
289
|
scheme=`{action}` (registered as `category: "data"`). Render in
|
|
265
|
-
`<
|
|
290
|
+
`<summarized>` (always, while tracked) and `<visible>` (when
|
|
291
|
+
promoted).
|
|
266
292
|
|
|
267
293
|
This keeps `<log>` a terse audit trail (what happened, exit code,
|
|
268
|
-
paths) while `<
|
|
294
|
+
paths) while `<visible>` carries the actual streamed bytes the model
|
|
269
295
|
reads. Conflating the two — e.g., writing channels under `log://...` —
|
|
270
296
|
mislabels payload as audit and pollutes the logging section with
|
|
271
297
|
multi-line command output. See [streaming_entries](#streaming_entries).
|
|
@@ -434,6 +460,32 @@ policy filtering, abort cascade). Plugin-tier convenience verbs
|
|
|
434
460
|
don't invoke the handler chain. Plugin code that wants full handler
|
|
435
461
|
semantics calls `hooks.tools.dispatch` directly.
|
|
436
462
|
|
|
463
|
+
**Two-phase turn execution.** Model output flows through
|
|
464
|
+
`TurnExecutor.execute` in strict order:
|
|
465
|
+
|
|
466
|
+
1. **RECORD** — every parsed command is materialized as a
|
|
467
|
+
`log://turn_N/action/slug` audit entry via `#record()`. Each
|
|
468
|
+
tool's parser shape surfaces exactly one of `path` / `command` /
|
|
469
|
+
`question` as its addressable target; absent fields are treated
|
|
470
|
+
as empty so the validation gate catches bad shapes rather than
|
|
471
|
+
letting `undefined` propagate. Targets longer than 512 chars or
|
|
472
|
+
containing control characters are rejected as likely reasoning
|
|
473
|
+
bleed (the model's chain-of-thought leaking into a tool path).
|
|
474
|
+
Plugins can validate or transform via the `entry.recording`
|
|
475
|
+
filter before the row is committed.
|
|
476
|
+
2. **DISPATCH** — recorded entries fire sequentially via
|
|
477
|
+
`hooks.tools.dispatch`. Each tool runs to completion before the
|
|
478
|
+
next starts. A failed entry sets `abortAfter`; subsequent
|
|
479
|
+
entries record as `outcome="aborted"`. Crashes inside dispatch
|
|
480
|
+
route through `hooks.error.log` at status 500 and trigger the
|
|
481
|
+
same abort cascade. After each entry, `proposal.prepare` lets
|
|
482
|
+
plugins materialize pending 202 proposals (e.g. `set`'s
|
|
483
|
+
search/replace revisions) from the just-recorded entry.
|
|
484
|
+
|
|
485
|
+
Narration outside tags is fine when the turn also emitted at least
|
|
486
|
+
one command — "OK", "Let me check:", reasoning prefixes are natural
|
|
487
|
+
and don't trigger the no-actionable-tags error path.
|
|
488
|
+
|
|
437
489
|
**Tool dispatch:** Commands are dispatched sequentially in the order
|
|
438
490
|
the model emitted them. Each tool either succeeds (200), fails (400+),
|
|
439
491
|
or proposes (202). On failure, all remaining tools are aborted. On
|
|
@@ -508,6 +560,68 @@ export default class Rm {
|
|
|
508
560
|
|
|
509
561
|
A plugin can be multiple types. Known is a tool AND an assembly plugin.
|
|
510
562
|
|
|
563
|
+
### Failure Reporting {#failure_reporting}
|
|
564
|
+
|
|
565
|
+
**The action entry IS its outcome.** Every action plugin's handler
|
|
566
|
+
finalizes the action's own log entry (`log://turn_N/{action}/{slug}`)
|
|
567
|
+
with body, state, and outcome. Success and failure are two values of
|
|
568
|
+
the same shape — only the field values change. The model sees both
|
|
569
|
+
through the same channel, rendered under the action's scheme.
|
|
570
|
+
|
|
571
|
+
```
|
|
572
|
+
<get path="src/x.js" status="200">…file body…</get> # success
|
|
573
|
+
<get path="src/x.js" state="failed" outcome="not_found"> # failure
|
|
574
|
+
src/x.js not found
|
|
575
|
+
</get>
|
|
576
|
+
```
|
|
577
|
+
|
|
578
|
+
State + outcome label the verdict; body is the result — file content
|
|
579
|
+
on success, failure message on failure. No separate error entry is
|
|
580
|
+
written for action-level failures; the model finds the failure exactly
|
|
581
|
+
where it would find the success: at the action's own log path.
|
|
582
|
+
|
|
583
|
+
**Strike attribution.** `error.js#verdict` looks up the post-handler
|
|
584
|
+
state of every recorded entry on each turn. Any `state="failed"`
|
|
585
|
+
result counts as a strike. Plugin authors write their action entry
|
|
586
|
+
once with the right state; the strike machinery follows. They never
|
|
587
|
+
call `error.log.emit` for action-level failures.
|
|
588
|
+
|
|
589
|
+
**`error.log.emit` is for actionless failures** — failures that have
|
|
590
|
+
no corresponding action entry to attach to:
|
|
591
|
+
|
|
592
|
+
- Dispatch crash — the framework caught an exception thrown from inside
|
|
593
|
+
a handler before the handler had a chance to write its own entry.
|
|
594
|
+
- Parser-level failures — malformed XML warnings, no-actionable-tags
|
|
595
|
+
responses, fired before any action entry could be recorded.
|
|
596
|
+
- Runtime watchdog firings — `ContextExceededError`, RPC timeout,
|
|
597
|
+
stream timeout — not bound to a specific action.
|
|
598
|
+
- Budget overflow — pre-dispatch rejection.
|
|
599
|
+
|
|
600
|
+
`error.log.emit` writes a `log://turn_N/error/<slug>` entry and
|
|
601
|
+
increments `state.turnErrors`, which also feeds strike accumulation.
|
|
602
|
+
Both channels (action-entry state=failed and `error.log.emit`)
|
|
603
|
+
contribute to the strike streak; either path advances it.
|
|
604
|
+
|
|
605
|
+
**Recording-filter rejection.** Plugins on the `entry.recording` filter
|
|
606
|
+
chain (e.g. `policy`) can return an entry with `state="failed"`. The
|
|
607
|
+
framework writes that entry to the store before returning from
|
|
608
|
+
`#record`, and dispatch skips it. The model sees the rejection at the
|
|
609
|
+
action's own log path, exactly like any other action-level failure.
|
|
610
|
+
|
|
611
|
+
Cycle detection is **silent** — it does not call `error.log.emit`.
|
|
612
|
+
The strike accumulates internally via `state.turnErrors++`; on
|
|
613
|
+
`MAX_STRIKES` the run abandons at 499 with a telemetry-side reason.
|
|
614
|
+
The model sees no special signal, because telling the model "you're
|
|
615
|
+
looping" invites superficial evasion (vary an attribute to bust the
|
|
616
|
+
fingerprint) without addressing the underlying confusion.
|
|
617
|
+
|
|
618
|
+
**Plugin author contract.** Your handler does one job: finalize the
|
|
619
|
+
action's own log entry with the right body/state/outcome. That's the
|
|
620
|
+
whole API for failure reporting. You do not call `error.log.emit`.
|
|
621
|
+
If your handler throws, the framework catches and routes through
|
|
622
|
+
`error.log.emit` at status 500 — that's the only situation where the
|
|
623
|
+
framework writes on your behalf.
|
|
624
|
+
|
|
511
625
|
### Mode Enforcement {#mode_enforcement}
|
|
512
626
|
|
|
513
627
|
Two mechanisms, operating at different layers:
|
|
@@ -520,9 +634,11 @@ Two mechanisms, operating at different layers:
|
|
|
520
634
|
2. **Per-invocation filtering** — the `policy` plugin subscribes to
|
|
521
635
|
`entry.recording` and inspects individual emissions for ask-mode
|
|
522
636
|
violations that the tool-list alone can't catch (file-scheme `<set>`
|
|
523
|
-
edits, file `<rm>`, file-destination `<mv>`/`<cp>`). Rejects
|
|
524
|
-
|
|
525
|
-
|
|
637
|
+
edits, file `<rm>`, file-destination `<mv>`/`<cp>`). Rejects by
|
|
638
|
+
marking the action entry `state="failed"`, `outcome="permission"`
|
|
639
|
+
with a body describing the rejection. Per the failure-reporting
|
|
640
|
+
contract — see [failure_reporting](#failure_reporting). The tool
|
|
641
|
+
remains advertised; the specific invocation is blocked.
|
|
526
642
|
|
|
527
643
|
### YOLO Mode {#yolo_mode}
|
|
528
644
|
|
|
@@ -634,11 +750,13 @@ log://turn_N/{action}/{slug} scheme=log category=logging status=202
|
|
|
634
750
|
{action}://turn_N/{slug}_1 scheme={action} category=data status=102 → 200/500
|
|
635
751
|
body: primary stream (stdout for shell)
|
|
636
752
|
summary="{command}" visibility=summarized
|
|
637
|
-
(
|
|
753
|
+
(line in <summarized>; full body in
|
|
754
|
+
<visible> when promoted)
|
|
638
755
|
|
|
639
756
|
{action}://turn_N/{slug}_2 scheme={action} category=data status=102 → 200/500
|
|
640
757
|
body: alt stream (stderr for shell)
|
|
641
|
-
(
|
|
758
|
+
(line in <summarized>; full body in
|
|
759
|
+
<visible> when promoted, often empty)
|
|
642
760
|
```
|
|
643
761
|
|
|
644
762
|
`{action}` is the producer plugin's name (`sh`, `env`, future: `search`,
|
|
@@ -653,6 +771,16 @@ channels. Non-process producers (search, fetch) map their streams onto
|
|
|
653
771
|
the same numeric space: `_1` for the primary data stream, `_2` for
|
|
654
772
|
anomalies/errors, `_3`+ for auxiliary streams.
|
|
655
773
|
|
|
774
|
+
**Search prefetch.** The `search` producer (provided by `rummy.web`
|
|
775
|
+
when wired) may prefetch its result URLs as separate `<https>` data
|
|
776
|
+
entries before the model emits any `<get>`. The model sees those
|
|
777
|
+
pages as already-summarized data without having explicitly loaded
|
|
778
|
+
them. Auditors reading dumps should be aware: the absence of a
|
|
779
|
+
corresponding `log://turn_N/get/` for a URL does **not** mean the
|
|
780
|
+
URL wasn't loaded — it may have arrived via search prefetch. The
|
|
781
|
+
prefetch policy is the search plugin's implementation detail; the
|
|
782
|
+
data entries themselves obey the streaming-producer shape above.
|
|
783
|
+
|
|
656
784
|
**Status 102 ("Processing") marks an entry in mid-stream:** body is
|
|
657
785
|
partial, will change; tokens grow as chunks arrive. Agents reading a
|
|
658
786
|
102 entry use `<get>` with `line`/`limit` (including negative `line`
|
|
@@ -684,11 +812,26 @@ Two messages per turn. System = stable truth. User = active task.
|
|
|
684
812
|
instructions text
|
|
685
813
|
(instructions.md base template + tool docs injected via
|
|
686
814
|
instructions.toolDocs filter; optional persona appended)
|
|
687
|
-
<context>
|
|
688
|
-
all category=data entries (knowns, files, http/https),
|
|
689
|
-
wrapped by known.js on assembly.system at priority 100
|
|
690
|
-
</context>
|
|
691
815
|
[user message]
|
|
816
|
+
<summarized>
|
|
817
|
+
one entry per category=data entry whose visibility is visible
|
|
818
|
+
or summarized; plus the named carve-out (archived prompts pass
|
|
819
|
+
through with visibility="archived" so the model can <get> the
|
|
820
|
+
active prompt back). Each entry renders under its scheme tag
|
|
821
|
+
with its summarized projection as the tag body — this is the
|
|
822
|
+
compact-but-informative view produced by the plugin's summary()
|
|
823
|
+
hook (e.g. truncated knowns, code symbols for files, page
|
|
824
|
+
abstracts for URLs). Identity-keyed, slow-mutating: only grows
|
|
825
|
+
when a new entry lands. (known.js, assembly.user priority 50)
|
|
826
|
+
</summarized>
|
|
827
|
+
<visible>
|
|
828
|
+
each category=data entry whose visibility is visible, rendered
|
|
829
|
+
under its scheme tag with its visible projection as the tag
|
|
830
|
+
body (full body per the plugin's visible() hook). Working-set:
|
|
831
|
+
append on promote, remove on demote. A visible entry exists in
|
|
832
|
+
BOTH blocks — summary projection up top, full body below.
|
|
833
|
+
(known.js, assembly.user priority 75)
|
|
834
|
+
</visible>
|
|
692
835
|
<log>
|
|
693
836
|
action history — log:// entries + pre-latest prompts
|
|
694
837
|
(log.js, assembly.user priority 100)
|
|
@@ -705,13 +848,23 @@ Two messages per turn. System = stable truth. User = active task.
|
|
|
705
848
|
```
|
|
706
849
|
|
|
707
850
|
**System** = stable world state the model operates within (identity,
|
|
708
|
-
tools, tool docs
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
phase, and current prompt.
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
851
|
+
tools, tool docs). Stable across turns within a run, which keeps
|
|
852
|
+
prompt caching intact. **User** = active work (what the model is
|
|
853
|
+
doing right now): the project's data surface, history, open questions,
|
|
854
|
+
current phase, and current prompt. Both phase-specific
|
|
855
|
+
`<instructions>` and the codebase blocks (`<summarized>` / `<visible>`)
|
|
856
|
+
live in the user message because they change turn-to-turn — putting
|
|
857
|
+
mutable state in system would invalidate the cache on every promote
|
|
858
|
+
or phase transition.
|
|
859
|
+
|
|
860
|
+
**Why two blocks instead of one `<context>`.** Promote/demote is the
|
|
861
|
+
dominant intra-phase operation. Today's single-block render
|
|
862
|
+
invalidates the entire data surface every time. With the split,
|
|
863
|
+
`<summarized>` mutates only when a new entry lands (slow); `<visible>`
|
|
864
|
+
mutates on every promote/demote (fast). Ordering slow-above-fast
|
|
865
|
+
preserves the prefix cache for `<summarized>` across the common case.
|
|
866
|
+
Cognitively: `<summarized>` is "what I know exists" (identity);
|
|
867
|
+
`<visible>` is "what I'm reading right now" (working memory).
|
|
715
868
|
|
|
716
869
|
The `<prompt>` tag is present on every turn — first turn and
|
|
717
870
|
continuations alike. The model always sees its task. The active prompt
|
|
@@ -896,6 +1049,22 @@ and emits a 413 error via `hooks.error.log.emit` with the descriptive
|
|
|
896
1049
|
body (what was demoted, the 50% rule for the next turn). The model
|
|
897
1050
|
sees the `error://` entry next turn and adjusts.
|
|
898
1051
|
|
|
1052
|
+
**Delta-from-actual prediction.** Post-dispatch uses
|
|
1053
|
+
`predictNextPacket = lastContextTokens + Σ countTokens(body) for rows added this turn`,
|
|
1054
|
+
not the conservative measureMessages estimator. Reason: a 60%+
|
|
1055
|
+
divergence between the pre-call `<prompt tokenUsage>` (real API
|
|
1056
|
+
prompt_tokens) and the post-check estimator made the model dismiss
|
|
1057
|
+
the budget as janky and stop following demote rules. The two numbers
|
|
1058
|
+
must live on the same scale.
|
|
1059
|
+
|
|
1060
|
+
**Prior-turn-pressure fallback.** If post-dispatch finds nothing to
|
|
1061
|
+
demote in the current turn but the packet still overflows, the
|
|
1062
|
+
pressure is coming from prior-turn promotions the model never demoted
|
|
1063
|
+
itself. Demotion widens to all currently-visible entries in the run
|
|
1064
|
+
and the prompt is also demoted. Without this fallback, observed
|
|
1065
|
+
behavior was strikes accumulating on runs whose base context had
|
|
1066
|
+
drifted over ceiling through no fault of the current turn.
|
|
1067
|
+
|
|
899
1068
|
**LLM-reported context exceeded.** If the LLM rejects the request
|
|
900
1069
|
with a "context too long" error (detected via the regex in
|
|
901
1070
|
`src/llm/errors.js`), the LlmProvider raises `ContextExceededError`
|
|
@@ -1075,6 +1244,16 @@ is raw JSON; parse client-side. Mid-turn emissions have `telemetry:
|
|
|
1075
1244
|
null`; the final emission of each turn includes the full telemetry
|
|
1076
1245
|
block (token usage, context distribution, cost).
|
|
1077
1246
|
|
|
1247
|
+
**Telemetry completeness guarantee.** Every `run/state` emission
|
|
1248
|
+
computes a real budget from real numbers — never undefined, never
|
|
1249
|
+
synthesized. When no fresh turn result is available
|
|
1250
|
+
(abort/max-turns/crash paths fire before any turn executed, or after
|
|
1251
|
+
a turn that produced no tokens), `AgentLoop.#emitRunState` reads the
|
|
1252
|
+
last turn's `context_tokens` from the DB. Absent means no turn ran
|
|
1253
|
+
yet; zero is the truth, not a fallback. The shape and the math are
|
|
1254
|
+
the same on every code path so the client's renderer never needs to
|
|
1255
|
+
discriminate by emission cause.
|
|
1256
|
+
|
|
1078
1257
|
`stream/cancelled` payload: `{ run, path, reason }`. Server has
|
|
1079
1258
|
already transitioned the entries to 499 (`Client Closed Request`);
|
|
1080
1259
|
client should stop sending `stream` chunks for that path.
|
|
@@ -1089,6 +1268,30 @@ client should stop sending `stream` chunks for that path.
|
|
|
1089
1268
|
| accept | neither | `running` — healer decides |
|
|
1090
1269
|
| error | any | `running` — error state, model retries |
|
|
1091
1270
|
|
|
1271
|
+
**RPC ack vs run terminal status.** `resolve` and `inject` return the
|
|
1272
|
+
*current* run status (typically 102 mid-run), not 200. The client's
|
|
1273
|
+
dispatch handler must distinguish the synchronous RPC ack from the
|
|
1274
|
+
asynchronous `run/state` notification that carries real terminal
|
|
1275
|
+
state at end-of-turn — otherwise an HTTP-style 200 ack on a
|
|
1276
|
+
successful resolve would prematurely close the document.
|
|
1277
|
+
|
|
1278
|
+
**Proposal hook chain.** Resolution flows through three filter/event
|
|
1279
|
+
hooks plugins can subscribe to:
|
|
1280
|
+
|
|
1281
|
+
- `proposal.accepting` (filter) — first plugin to return
|
|
1282
|
+
`{ allow: false, outcome, body }` vetoes acceptance. The entry
|
|
1283
|
+
resolves to `state="failed"` with the plugin-supplied outcome and
|
|
1284
|
+
body. Used by `policy` for read-only enforcement and similar
|
|
1285
|
+
guards. First veto wins; later filters don't run.
|
|
1286
|
+
- `proposal.content` (filter) — when acceptance proceeds, plugins
|
|
1287
|
+
override the resolved body. Default is `output ?? ""`. The `set`
|
|
1288
|
+
plugin uses this to prefer the proposed body it already staged
|
|
1289
|
+
on the audit entry over whatever literal body the client passed
|
|
1290
|
+
through `resolve`.
|
|
1291
|
+
- `proposal.accepted` / `proposal.rejected` (events) — fired after
|
|
1292
|
+
the resolution is committed; plugins side-effect on either
|
|
1293
|
+
outcome.
|
|
1294
|
+
|
|
1092
1295
|
---
|
|
1093
1296
|
|
|
1094
1297
|
## Plugin System {#plugin_system}
|
|
@@ -1165,12 +1368,12 @@ one unified mechanism.
|
|
|
1165
1368
|
|
|
1166
1369
|
### Pattern Distribution
|
|
1167
1370
|
|
|
1168
|
-
Hedbergian pattern matching (globs, body filters,
|
|
1371
|
+
Hedbergian pattern matching (globs, body filters, manifest) is taught
|
|
1169
1372
|
across multiple tools, not concentrated in one. `get` shows content
|
|
1170
|
-
filtering. `cp` shows glob batch operations. `rm` shows
|
|
1373
|
+
filtering. `cp` shows glob batch operations. `rm` shows manifest safety.
|
|
1171
1374
|
Each tool reinforces the pattern vocabulary from a different angle.
|
|
1172
1375
|
A model that sees `path="known://*"` in get, `path="known://plan_*"` in
|
|
1173
|
-
cp, and `path="known://temp_*"
|
|
1376
|
+
cp, and `path="known://temp_*" manifest` in rm learns that patterns
|
|
1174
1377
|
are universal — not a feature of any single tool.
|
|
1175
1378
|
|
|
1176
1379
|
---
|
|
@@ -1205,11 +1408,14 @@ Termination protocol:
|
|
|
1205
1408
|
(the claim of doneness is refuted by the failures)
|
|
1206
1409
|
- `<update status="102">` → run continues
|
|
1207
1410
|
- Multiple `<update>` → last one wins
|
|
1208
|
-
- No `<update>` + investigation tools → stall counter (RUMMY_MAX_STALLS)
|
|
1209
1411
|
- No `<update>` + action-only tools → healer infers terminal from body
|
|
1210
1412
|
- No `<update>` + plain text → healer infers terminal from body
|
|
1211
|
-
- Repeated commands
|
|
1212
|
-
|
|
1413
|
+
- Repeated turn fingerprints (commands, attributes, or empty turns) →
|
|
1414
|
+
cycle detection (`RUMMY_MIN_CYCLES`, `RUMMY_MAX_CYCLE_PERIOD`); after
|
|
1415
|
+
detection, strikes accumulate up to `RUMMY_MAX_STRIKES` then close 499.
|
|
1416
|
+
- Hard ceiling: `RUMMY_MAX_LOOP_TURNS` caps turns within a single loop,
|
|
1417
|
+
regardless of any other guard. There is no per-run cap; a run may
|
|
1418
|
+
comprise many loops.
|
|
1213
1419
|
|
|
1214
1420
|
Format normalization:
|
|
1215
1421
|
- Gemma `\`\`\`tool_code` fences → stripped before parsing
|
|
@@ -1218,6 +1424,78 @@ Format normalization:
|
|
|
1218
1424
|
- Mistral `[TOOL_CALLS]` → normalized to XML
|
|
1219
1425
|
- Sed alternate delimiters (`s|old|new|`) → parsed like `s/old/new/`
|
|
1220
1426
|
|
|
1427
|
+
### XML Parser {#xml_parser}
|
|
1428
|
+
|
|
1429
|
+
`src/agent/XmlParser.js` is the syntax layer between raw model output
|
|
1430
|
+
and the dispatch pipeline. Models routinely emit malformed XML —
|
|
1431
|
+
unclosed tags, missing slashes, mismatched closes, unterminated
|
|
1432
|
+
attribute values, embedded code-fences, training-format tool calls.
|
|
1433
|
+
The parser's contract is: never throw, never silently drop a tool
|
|
1434
|
+
call, surface every recovery as a warning so error.log can route it.
|
|
1435
|
+
|
|
1436
|
+
**Pre-flight repair pipeline** (order is load-bearing):
|
|
1437
|
+
|
|
1438
|
+
1. `#normalizeToolCalls` — translate native training formats (gemma
|
|
1439
|
+
`\`\`\`tool_code\n<xml>\n\`\`\``, Qwen `<|tool_call>call:NAME{...}`,
|
|
1440
|
+
OpenAI `{"name":"...","arguments":{...}}`, Anthropic
|
|
1441
|
+
`<tool_use><name>...</name><input>{...}</input></tool_use>`,
|
|
1442
|
+
Mistral `[TOOL_CALLS] [{...}]`, harmony role/channel pseudo-tags
|
|
1443
|
+
`<|channel>` / `<channel|>`). Catch-all malformed `<|tool_call>`
|
|
1444
|
+
tokens become `<error>` blocks (in prose — never with literal
|
|
1445
|
+
`<get>`/`<set>`/etc. tags, which would re-enter the parser as
|
|
1446
|
+
phantom tool calls).
|
|
1447
|
+
2. `#neutralizeCodeSpans` — entity-encode tag brackets inside
|
|
1448
|
+
backtick spans (`` `<get/>` `` → `` `<get/>` ``). Models
|
|
1449
|
+
quote instructions; quoted tool names must not parse.
|
|
1450
|
+
3. `#correctMismatchedCloses` — at outer tool depth (stack=1),
|
|
1451
|
+
rewrite `</WRONG>` to `</RIGHT>`. htmlparser2 silently drops
|
|
1452
|
+
unmatched closes, which would make the explicit recovery path
|
|
1453
|
+
unreachable and absorb every sibling command as body text.
|
|
1454
|
+
Conservative: only outermost depth; nested mismatches inside
|
|
1455
|
+
tool bodies are left alone (bodies are opaque, see below).
|
|
1456
|
+
4. `#balanceAttrQuotes` — close `ATTR="..."` values that never
|
|
1457
|
+
quote-close before the next tag. Without this repair,
|
|
1458
|
+
htmlparser2 consumes the rest of input as one giant attribute
|
|
1459
|
+
value and silently drops every subsequent tool call. Triggers
|
|
1460
|
+
only when the value contains no quote, no `>`, and is followed
|
|
1461
|
+
by another tag opening or close.
|
|
1462
|
+
|
|
1463
|
+
**Body opacity.** Tool bodies are opaque text, not nested XML. The
|
|
1464
|
+
model writing a plan with `<get/>` examples in it, SEARCH/REPLACE
|
|
1465
|
+
markers in `<set>`, or XML samples in `<known>` all need to survive
|
|
1466
|
+
intact. Nested tag opens push onto a per-tool stack; matching closes
|
|
1467
|
+
pop. Orphan closes that don't match the stack but match a known tool
|
|
1468
|
+
name are treated as recovery (likely typo); unknown orphan closes
|
|
1469
|
+
are kept as body text.
|
|
1470
|
+
|
|
1471
|
+
**Empty-body recovery.** A new tool tag opens while the current tool
|
|
1472
|
+
has no body content yet — the model meant the current tag to
|
|
1473
|
+
self-close but typed it paired, or emitted a mismatched close that
|
|
1474
|
+
htmlparser2 dropped. Close current, open new, emit recovery warning.
|
|
1475
|
+
|
|
1476
|
+
**Per-tool attr-vs-body resolution** (`resolveCommand`). Tools accept
|
|
1477
|
+
attributes on the open tag *and* body text inside the tag. If the
|
|
1478
|
+
canonical attribute is missing, the body silently fills it. The
|
|
1479
|
+
shape per tool:
|
|
1480
|
+
- `set` — structured edit detection (merge-conflict markers, udiff,
|
|
1481
|
+
Claude `<old_text>` XML, JSON `{search,replace}`, sed `s/.../.../`,
|
|
1482
|
+
attribute-mode `search=`/`replace=`, body-as-search-when-`body=`
|
|
1483
|
+
attr-set, plain write).
|
|
1484
|
+
- `update` — body fills `body`, status defaults to 102 if absent.
|
|
1485
|
+
- `get` / `rm` — attr `path` or body fills target. Spread `a` so
|
|
1486
|
+
`line` / `limit` / `visibility` / future attrs reach the handler.
|
|
1487
|
+
- `search` — attr `path` or body fills target; `results` numeric.
|
|
1488
|
+
- `mv` / `cp` — attr `path` (source); attr `to` or body fills dest.
|
|
1489
|
+
Spread `a` so `visibility` reaches the handler for batch
|
|
1490
|
+
visibility changes.
|
|
1491
|
+
- `sh` / `env` — attr `command` or body fills the command.
|
|
1492
|
+
- `ask_user` — attr `question`; attr `options` or body for options.
|
|
1493
|
+
|
|
1494
|
+
**Tool-call cap.** `RUMMY_MAX_COMMANDS` caps the number of tool
|
|
1495
|
+
calls per turn. When hit, remaining commands drop with a warning;
|
|
1496
|
+
the model sees one structured error so it can adjust on the next
|
|
1497
|
+
turn rather than rediscovering silent truncation.
|
|
1498
|
+
|
|
1221
1499
|
---
|
|
1222
1500
|
|
|
1223
1501
|
## Testing
|
|
@@ -1381,10 +1659,9 @@ Full reference is `.env.example` — these are the load-bearing vars.
|
|
|
1381
1659
|
|
|
1382
1660
|
| Var | Default | Purpose |
|
|
1383
1661
|
|-----|---------|---------|
|
|
1384
|
-
| `
|
|
1662
|
+
| `RUMMY_MAX_LOOP_TURNS` | 99 | Per-loop turn cap (no per-run cap) |
|
|
1385
1663
|
| `RUMMY_MAX_COMMANDS` | 99 | Max parsed tool calls per turn |
|
|
1386
|
-
| `
|
|
1387
|
-
| `RUMMY_MAX_UPDATE_REPEATS` | 3 | Same-text repeat threshold without progress |
|
|
1664
|
+
| `RUMMY_MAX_STRIKES` | 3 | Strikes (errors or detected cycles) before close at 499 |
|
|
1388
1665
|
| `RUMMY_MIN_CYCLES` | 3 | Consecutive repetitions to trigger cycle detection |
|
|
1389
1666
|
| `RUMMY_MAX_CYCLE_PERIOD` | 4 | Max cycle period checked by healer |
|
|
1390
1667
|
| `RUMMY_RETENTION_DAYS` | 31 | Days of completed/aborted runs kept |
|
package/bin/postinstall.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, copyFileSync } from "node:fs";
|
|
2
2
|
import { join, dirname } from "node:path";
|
|
3
3
|
import { fileURLToPath } from "node:url";
|
|
4
|
-
import
|
|
4
|
+
import resolveRummyHome from "../src/agent/rummyHome.js";
|
|
5
5
|
|
|
6
6
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
7
|
const packageRoot = join(__dirname, "..");
|
|
8
8
|
const envExample = join(packageRoot, ".env.example");
|
|
9
9
|
|
|
10
|
-
const rummyHome =
|
|
10
|
+
const rummyHome = resolveRummyHome();
|
|
11
11
|
|
|
12
12
|
if (!existsSync(rummyHome)) {
|
|
13
13
|
mkdirSync(rummyHome, { recursive: true });
|
package/bin/rummy.js
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
import { existsSync } from "node:fs";
|
|
4
4
|
import { isAbsolute, join, dirname } from "node:path";
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
|
-
import
|
|
6
|
+
import resolveRummyHome from "../src/agent/rummyHome.js";
|
|
7
7
|
|
|
8
8
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
9
|
const packageRoot = join(__dirname, "..");
|
|
10
10
|
|
|
11
|
-
const rummyHome =
|
|
11
|
+
const rummyHome = resolveRummyHome();
|
|
12
12
|
|
|
13
13
|
// Base dir for env files: cwd if it has .env.example, else $RUMMY_HOME.
|
|
14
14
|
// The package's own .env.example is never consulted — silent package-
|