@possumtech/rummy 0.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +21 -4
- package/PLUGINS.md +389 -194
- package/README.md +25 -8
- package/SPEC.md +850 -373
- package/bin/demo.js +166 -0
- package/bin/rummy.js +9 -3
- package/biome/no-fallbacks.grit +50 -0
- package/lang/en.json +2 -2
- package/migrations/001_initial_schema.sql +88 -37
- package/package.json +6 -4
- package/service.js +50 -9
- package/src/agent/AgentLoop.js +460 -331
- package/src/agent/ContextAssembler.js +4 -2
- package/src/agent/Entries.js +655 -0
- package/src/agent/ProjectAgent.js +30 -18
- package/src/agent/TurnExecutor.js +232 -379
- package/src/agent/XmlParser.js +242 -67
- package/src/agent/budget.js +56 -0
- package/src/agent/errors.js +22 -0
- package/src/agent/httpStatus.js +39 -0
- package/src/agent/known_checks.sql +8 -4
- package/src/agent/known_queries.sql +9 -13
- package/src/agent/known_store.sql +275 -118
- package/src/agent/materializeContext.js +102 -0
- package/src/agent/runs.sql +10 -7
- package/src/agent/schemes.sql +14 -3
- package/src/agent/turns.sql +9 -9
- package/src/hooks/HookRegistry.js +6 -5
- package/src/hooks/Hooks.js +44 -3
- package/src/hooks/PluginContext.js +35 -21
- package/src/{server → hooks}/RpcRegistry.js +2 -1
- package/src/hooks/RummyContext.js +140 -37
- package/src/hooks/ToolRegistry.js +36 -35
- package/src/llm/LlmProvider.js +64 -90
- package/src/llm/errors.js +21 -0
- package/src/plugins/ask_user/README.md +1 -1
- package/src/plugins/ask_user/ask_user.js +37 -12
- package/src/plugins/ask_user/ask_userDoc.js +2 -23
- package/src/plugins/ask_user/ask_userDoc.md +10 -0
- package/src/plugins/budget/README.md +27 -23
- package/src/plugins/budget/budget.js +261 -69
- package/src/plugins/cp/README.md +2 -2
- package/src/plugins/cp/cp.js +31 -13
- package/src/plugins/cp/cpDoc.js +2 -23
- package/src/plugins/cp/cpDoc.md +7 -0
- package/src/plugins/engine/README.md +2 -2
- package/src/plugins/engine/engine.sql +4 -4
- package/src/plugins/engine/turn_context.sql +10 -10
- package/src/plugins/env/README.md +20 -5
- package/src/plugins/env/env.js +47 -8
- package/src/plugins/env/envDoc.js +2 -23
- package/src/plugins/env/envDoc.md +13 -0
- package/src/plugins/error/README.md +16 -0
- package/src/plugins/error/error.js +151 -0
- package/src/plugins/file/README.md +6 -6
- package/src/plugins/file/file.js +15 -7
- package/src/plugins/get/README.md +1 -1
- package/src/plugins/get/get.js +125 -49
- package/src/plugins/get/getDoc.js +2 -43
- package/src/plugins/get/getDoc.md +36 -0
- package/src/plugins/hedberg/README.md +1 -2
- package/src/plugins/hedberg/hedberg.js +8 -4
- package/src/plugins/hedberg/matcher.js +16 -17
- package/src/plugins/hedberg/normalize.js +0 -48
- package/src/plugins/helpers.js +43 -3
- package/src/plugins/index.js +146 -123
- package/src/plugins/instructions/README.md +35 -9
- package/src/plugins/instructions/instructions.js +126 -12
- package/src/plugins/instructions/instructions.md +25 -0
- package/src/plugins/instructions/instructions_104.md +7 -0
- package/src/plugins/instructions/instructions_105.md +46 -0
- package/src/plugins/instructions/instructions_106.md +0 -0
- package/src/plugins/instructions/instructions_107.md +0 -0
- package/src/plugins/instructions/instructions_108.md +8 -0
- package/src/plugins/instructions/protocol.js +12 -0
- package/src/plugins/known/README.md +2 -2
- package/src/plugins/known/known.js +77 -45
- package/src/plugins/known/knownDoc.js +2 -29
- package/src/plugins/known/knownDoc.md +8 -0
- package/src/plugins/log/README.md +48 -0
- package/src/plugins/log/log.js +109 -0
- package/src/plugins/mv/README.md +2 -2
- package/src/plugins/mv/mv.js +57 -24
- package/src/plugins/mv/mvDoc.js +2 -29
- package/src/plugins/mv/mvDoc.md +10 -0
- package/src/plugins/ollama/README.md +15 -0
- package/src/{llm/OllamaClient.js → plugins/ollama/ollama.js} +40 -18
- package/src/plugins/openai/README.md +17 -0
- package/src/plugins/openai/openai.js +120 -0
- package/src/plugins/openrouter/README.md +27 -0
- package/src/plugins/openrouter/openrouter.js +121 -0
- package/src/plugins/persona/README.md +20 -0
- package/src/plugins/persona/persona.js +9 -16
- package/src/plugins/policy/README.md +21 -0
- package/src/plugins/policy/policy.js +29 -14
- package/src/plugins/prompt/README.md +1 -1
- package/src/plugins/prompt/prompt.js +63 -18
- package/src/plugins/rm/README.md +1 -1
- package/src/plugins/rm/rm.js +58 -14
- package/src/plugins/rm/rmDoc.js +2 -24
- package/src/plugins/rm/rmDoc.md +13 -0
- package/src/plugins/rpc/README.md +2 -2
- package/src/plugins/rpc/rpc.js +515 -296
- package/src/plugins/set/README.md +1 -1
- package/src/plugins/set/set.js +318 -77
- package/src/plugins/set/setDoc.js +2 -35
- package/src/plugins/set/setDoc.md +22 -0
- package/src/plugins/sh/README.md +28 -5
- package/src/plugins/sh/sh.js +52 -8
- package/src/plugins/sh/shDoc.js +2 -23
- package/src/plugins/sh/shDoc.md +13 -0
- package/src/plugins/skill/README.md +23 -0
- package/src/plugins/skill/skill.js +14 -17
- package/src/plugins/stream/README.md +101 -0
- package/src/plugins/stream/stream.js +290 -0
- package/src/plugins/telemetry/README.md +1 -1
- package/src/plugins/telemetry/telemetry.js +148 -74
- package/src/plugins/think/README.md +1 -1
- package/src/plugins/think/think.js +14 -1
- package/src/plugins/think/thinkDoc.js +2 -17
- package/src/plugins/think/thinkDoc.md +7 -0
- package/src/plugins/unknown/README.md +3 -3
- package/src/plugins/unknown/unknown.js +56 -21
- package/src/plugins/unknown/unknownDoc.js +2 -25
- package/src/plugins/unknown/unknownDoc.md +11 -0
- package/src/plugins/update/README.md +1 -1
- package/src/plugins/update/update.js +67 -5
- package/src/plugins/update/updateDoc.js +2 -27
- package/src/plugins/update/updateDoc.md +8 -0
- package/src/plugins/xai/README.md +23 -0
- package/src/{llm/XaiClient.js → plugins/xai/xai.js} +58 -37
- package/src/server/ClientConnection.js +64 -37
- package/src/server/SocketServer.js +23 -10
- package/src/server/protocol.js +11 -0
- package/src/sql/functions/slugify.js +13 -1
- package/src/sql/v_model_context.sql +27 -31
- package/src/sql/v_run_log.sql +9 -14
- package/EXCEPTIONS.md +0 -46
- package/src/agent/KnownStore.js +0 -338
- package/src/agent/ResponseHealer.js +0 -188
- package/src/llm/OpenAiClient.js +0 -100
- package/src/llm/OpenRouterClient.js +0 -100
- package/src/plugins/budget/recovery.js +0 -47
- package/src/plugins/instructions/preamble.md +0 -37
- package/src/plugins/performed/README.md +0 -15
- package/src/plugins/performed/performed.js +0 -45
- package/src/plugins/previous/README.md +0 -16
- package/src/plugins/previous/previous.js +0 -60
- package/src/plugins/progress/README.md +0 -16
- package/src/plugins/progress/progress.js +0 -26
- package/src/plugins/summarize/README.md +0 -19
- package/src/plugins/summarize/summarize.js +0 -32
- package/src/plugins/summarize/summarizeDoc.js +0 -28
package/SPEC.md
CHANGED
|
@@ -1,120 +1,311 @@
|
|
|
1
1
|
# RUMMY: Architecture Specification
|
|
2
2
|
|
|
3
|
-
The authoritative reference for Rummy's design. The
|
|
4
|
-
|
|
5
|
-
model-facing behavior. This document defines
|
|
3
|
+
The authoritative reference for Rummy's design. The instructions
|
|
4
|
+
plugin (`instructions.md` + phase-specific `instructions_10N.md` +
|
|
5
|
+
tool docs) defines model-facing behavior. This document defines
|
|
6
|
+
everything else.
|
|
6
7
|
|
|
7
8
|
---
|
|
8
9
|
|
|
9
|
-
##
|
|
10
|
+
## The Contract
|
|
10
11
|
|
|
11
|
-
Rummy
|
|
12
|
-
pipeline is a hookable checkpoint. Plugins subscribe to events
|
|
13
|
-
(fire-and-forget side effects) and filters (transformation chains
|
|
14
|
-
that thread a value through subscribers in priority order).
|
|
12
|
+
Rummy has one contract. Every actor speaks it.
|
|
15
13
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
14
|
+
### Entries {#entries}
|
|
15
|
+
|
|
16
|
+
An entry is the sole unit of state the contract names. Every entry
|
|
17
|
+
carries:
|
|
18
|
+
|
|
19
|
+
| Field | Meaning |
|
|
20
|
+
|-------|---------|
|
|
21
|
+
| **path** | Identity. `scheme://locator` or bare filepath. |
|
|
22
|
+
| **body** | Content (text). |
|
|
23
|
+
| **attributes** | JSON bag of structured metadata. |
|
|
24
|
+
| **visibility** | `visible \| summarized \| archived`. What the model sees of this entry next turn. |
|
|
25
|
+
| **state** | `proposed \| streaming \| resolved \| failed \| cancelled`. Where the entry is in its lifecycle. |
|
|
26
|
+
| **outcome** | Short reason string when state ∈ {failed, cancelled}. Opaque to most callers; a few plugins parse it. |
|
|
27
|
+
| **writer** | Which tier wrote it last. |
|
|
28
|
+
| **scope** | `run:N \| project:N \| global`. Determines namespace and readership. |
|
|
29
|
+
|
|
30
|
+
Visibility and state are independent axes. An entry can be `state=resolved,
|
|
31
|
+
visibility=archived` (complete and hidden) or `state=streaming,
|
|
32
|
+
visibility=summarized` (in-flight, shown as summary) or `state=proposed,
|
|
33
|
+
visibility=visible` (visible, awaiting resolution).
|
|
34
|
+
|
|
35
|
+
### Six Primitives {#primitives}
|
|
36
|
+
|
|
37
|
+
The entire grammar for changing entries:
|
|
38
|
+
|
|
39
|
+
| Verb | Effect |
|
|
40
|
+
|------|--------|
|
|
41
|
+
| **set** | Create or update an entry. Writes content, state, visibility, attributes. |
|
|
42
|
+
| **get** | Promote an entry to `visibility=visible`. The read-with-side-effect. |
|
|
43
|
+
| **rm** | Remove an entry from the caller's view (or delete it when scope permits). |
|
|
44
|
+
| **cp** | Copy an entry to a new path. |
|
|
45
|
+
| **mv** | Rename an entry to a new path. |
|
|
46
|
+
| **update** | Record a turn's continuation or terminal signal. |
|
|
47
|
+
|
|
48
|
+
Every tool in rummy (`<sh>`, `<ask_user>`, `<search>`, `<env>`, `<think>`,
|
|
49
|
+
`<known>`, `<unknown>`, …) is a **plugin that composes the six
|
|
50
|
+
primitives**. A `<sh>` invocation becomes a `set` that creates a
|
|
51
|
+
proposed entry; on user accept, a stream plugin drives body appends
|
|
52
|
+
via `set` and eventually a state transition to `resolved`. The
|
|
53
|
+
primitives are the atoms; tools are the molecules.
|
|
54
|
+
|
|
55
|
+
### Three Surfaces, One Grammar {#surfaces}
|
|
56
|
+
|
|
57
|
+
| Actor | Syntax |
|
|
58
|
+
|-------|--------|
|
|
59
|
+
| **Model** | XML tags: `<set path="..." />` |
|
|
60
|
+
| **Plugin** | RummyContext methods: `rummy.set({...})` |
|
|
61
|
+
| **Client** | JSON-RPC: `{"method":"set","params":{...}}` |
|
|
62
|
+
|
|
63
|
+
Syntactic skins over the same semantics. A plugin calling
|
|
64
|
+
`rummy.set(...)`, a client sending `{"method":"set",...}`, and a model
|
|
65
|
+
emitting `<set/>` are the same event at the store layer, authorized by
|
|
66
|
+
the respective writer identity against the scheme's permissions.
|
|
67
|
+
|
|
68
|
+
### Four Writer Tiers {#writer_tiers}
|
|
69
|
+
|
|
70
|
+
A strict hierarchy of writer identities. Each tier is a superset of
|
|
71
|
+
what's below it:
|
|
72
|
+
|
|
73
|
+
| Tier | Access |
|
|
74
|
+
|------|--------|
|
|
75
|
+
| **system** | Internal plumbing (TurnExecutor, AgentLoop audit writes — `instructions://`, `reasoning://`, message schemes). |
|
|
76
|
+
| **plugin** | Declares schemes, registers hooks and filters, calls store methods directly. Everything below plus plugin-scope infrastructure. |
|
|
77
|
+
| **client** | RPC surface. Writes to client-writable schemes (`run://`, proposed-entry state transitions, config) and reads via subscribed notifications. |
|
|
78
|
+
| **model** | XML-tag surface. Writes to model-writable schemes (`known://`, `unknown://`, `update://`, tool-result schemes) as restricted by the active run's capability set. |
|
|
79
|
+
|
|
80
|
+
Every scheme declares `writable_by` as a subset of `{system, plugin,
|
|
81
|
+
client, model}`. A write from an identity outside that subset rejects
|
|
82
|
+
with state=failed, outcome="permission:403".
|
|
83
|
+
|
|
84
|
+
### Runs Are Entries {#runs_are_entries}
|
|
85
|
+
|
|
86
|
+
Starting a run is not a separate API — it is a `set` to
|
|
87
|
+
`run://{alias}` with a prompt body and attributes carrying model,
|
|
88
|
+
restrictions, and resolution strategy. A run plugin observes `run://`
|
|
89
|
+
entry writes and starts the turn loop. Cancelling is a state
|
|
90
|
+
transition to `cancelled` on the same path. Resolving a proposed entry
|
|
91
|
+
is a state transition on that entry's path.
|
|
92
|
+
|
|
93
|
+
The lifecycle API is the entry grammar. No parallel verb set.
|
|
94
|
+
|
|
95
|
+
### Events & Filters {#events_and_filters}
|
|
96
|
+
|
|
97
|
+
Between the primitive-write layer and the actual work, rummy is a
|
|
98
|
+
hooks-and-filters system. Plugins subscribe to events (fire-and-forget
|
|
99
|
+
side effects) and filters (transformation chains that thread a value
|
|
100
|
+
through subscribers in priority order).
|
|
101
|
+
|
|
102
|
+
**Every `<tag>` the model sees is a plugin.** `<knowns>` → known
|
|
103
|
+
plugin. `<unknowns>` → unknown plugin. `<performed>` → performed
|
|
104
|
+
plugin. `<previous>` → previous plugin. `<prompt>` → prompt plugin.
|
|
105
|
+
No monolithic assembler decides what goes where. Each plugin filters
|
|
106
|
+
for its own data from the shared row set, renders its section, returns.
|
|
22
107
|
|
|
23
108
|
**Plugins compose, they don't coordinate.** A plugin subscribes to a
|
|
24
|
-
filter at a priority
|
|
25
|
-
contribution,
|
|
109
|
+
filter at a priority, receives the accumulator value, appends its
|
|
110
|
+
contribution, returns. It doesn't know what other plugins exist.
|
|
26
111
|
Priority determines ordering. Lower numbers run first.
|
|
27
112
|
|
|
28
|
-
**The core is a filter chain invocation.**
|
|
29
|
-
`loopStartTurn`
|
|
30
|
-
`assembly.system.filter(
|
|
113
|
+
**The core is a filter chain invocation.** `ContextAssembler` computes
|
|
114
|
+
`loopStartTurn` from the latest prompt entry's `source_turn`, then
|
|
115
|
+
calls `assembly.system.filter(systemPrompt, ctx)` and
|
|
31
116
|
`assembly.user.filter("", ctx)`. Everything else is plugins.
|
|
32
117
|
|
|
118
|
+
### Physical Layout
|
|
119
|
+
|
|
120
|
+
The contract is realized across two tables plus a compat view:
|
|
121
|
+
|
|
122
|
+
- **`entries`** — content layer. `(scope, path)` unique. Body,
|
|
123
|
+
attributes, hash, tokens.
|
|
124
|
+
- **`run_views`** — per-run projection. Visibility, state, outcome,
|
|
125
|
+
turn, loop. A run sees an entry only if it has a view row.
|
|
126
|
+
- **`known_entries`** — compatibility VIEW joining the two for legacy
|
|
127
|
+
SELECT queries. Not writable.
|
|
128
|
+
|
|
129
|
+
Server-side bookkeeping (runs, loops, turns, projects, models,
|
|
130
|
+
schemes, file_constraints, turn_context, rpc_log) exists to support
|
|
131
|
+
the contract; the contract's actors never address these tables
|
|
132
|
+
directly.
|
|
133
|
+
|
|
33
134
|
---
|
|
34
135
|
|
|
35
|
-
##
|
|
136
|
+
## The Known Store {#known_store}
|
|
137
|
+
|
|
138
|
+
All model-facing state is stored across two tables joined via the
|
|
139
|
+
`known_entries` compatibility VIEW. Files, knowledge, tool results,
|
|
140
|
+
skills, audit — everything is a keyed entry with a URI path, body,
|
|
141
|
+
attributes, per-run status, and per-run visibility.
|
|
36
142
|
|
|
37
|
-
|
|
38
|
-
results, skills, audit — everything is a keyed entry with a URI scheme,
|
|
39
|
-
body, attributes, and state.
|
|
143
|
+
### Schema {#schema}
|
|
40
144
|
|
|
41
|
-
|
|
145
|
+
**Content layer** — `entries` (shared, scope-owned):
|
|
42
146
|
|
|
43
147
|
```sql
|
|
44
|
-
|
|
45
|
-
id,
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
created_at, updated_at
|
|
148
|
+
entries (
|
|
149
|
+
id, scope, path, scheme, body, attributes,
|
|
150
|
+
hash, created_at, updated_at,
|
|
151
|
+
UNIQUE (scope, path)
|
|
49
152
|
)
|
|
50
153
|
```
|
|
51
154
|
|
|
52
155
|
| Column | Purpose |
|
|
53
156
|
|--------|---------|
|
|
54
|
-
| `
|
|
55
|
-
| `
|
|
56
|
-
| `
|
|
57
|
-
| `
|
|
58
|
-
| `
|
|
59
|
-
| `
|
|
60
|
-
| `hash` | SHA-256 for file change detection |
|
|
61
|
-
| `tokens` | Full-body token cost. Never changes on demotion/promotion. |
|
|
62
|
-
| `turn` | Freshness — when was this entry last touched |
|
|
157
|
+
| `scope` | `global`, `project:N`, or `run:N`. Determines who can read; per-scheme `writable_by` determines who can write. |
|
|
158
|
+
| `path` | Entry identity within scope. Bare paths (`src/app.js`) or URIs (`known://auth`). Max 2048 chars. |
|
|
159
|
+
| `scheme` | GENERATED from `schemeOf(path)`. Drives dispatch and view routing. |
|
|
160
|
+
| `body` | Content. File text, tool output, skill docs. |
|
|
161
|
+
| `attributes` | Tag attributes as JSON. `CHECK (json_valid)`. |
|
|
162
|
+
| `hash` | SHA-256 for file change detection. |
|
|
63
163
|
|
|
64
|
-
|
|
164
|
+
Tokens are not stored on entries. See [token_accounting](#token_accounting) — token cost is a property of the materialized packet, computed during assembly, never persisted.
|
|
65
165
|
|
|
66
|
-
|
|
67
|
-
and **fidelity** (visibility level). These are separate concerns.
|
|
166
|
+
**View layer** — `run_views` (per-run projection):
|
|
68
167
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
(
|
|
75
|
-
|
|
168
|
+
```sql
|
|
169
|
+
run_views (
|
|
170
|
+
id, run_id, entry_id, loop_id, turn,
|
|
171
|
+
status INTEGER, visibility TEXT,
|
|
172
|
+
write_count, refs, created_at, updated_at,
|
|
173
|
+
UNIQUE (run_id, entry_id)
|
|
174
|
+
)
|
|
175
|
+
```
|
|
76
176
|
|
|
77
|
-
|
|
177
|
+
| Column | Purpose |
|
|
178
|
+
|--------|---------|
|
|
179
|
+
| `run_id`, `entry_id` | (run, entry) unique pair. Absent view = not in context. |
|
|
180
|
+
| `loop_id`, `turn` | Freshness — when this run last touched the entry. |
|
|
181
|
+
| `status` | HTTP status code — outcome of the run's last operation on this entry. |
|
|
182
|
+
| `visibility` | `visible` \| `summarized` \| `archived`. The run's relationship to the entry. |
|
|
183
|
+
| `write_count` | How many times this run has written this entry. |
|
|
184
|
+
|
|
185
|
+
**Compatibility view** — `known_entries` joins the two tables so
|
|
186
|
+
legacy SELECT queries keep working. Not writable; new write code must
|
|
187
|
+
target `entries` + `run_views` directly (see [upsert_semantics](#upsert_semantics)).
|
|
188
|
+
|
|
189
|
+
**No shadowing.** A run cannot override a global (or project-scoped)
|
|
190
|
+
entry with a run-scoped copy of the same path. Scope is resolved from
|
|
191
|
+
the scheme's declared `default_scope` at write time; if the writer's
|
|
192
|
+
permission doesn't allow the target scope, the write is rejected
|
|
193
|
+
(403 + `error://`). Paths are unique within a scope, but different
|
|
194
|
+
scopes use independent namespaces — `known://plan` is always run-
|
|
195
|
+
scoped; `wiki://...` (hypothetical) would always be global. The
|
|
196
|
+
scheme plugin owns the decision; the model doesn't juggle scopes.
|
|
197
|
+
|
|
198
|
+
**Forks copy views, not content.** `store.forkEntries(parent, child)`
|
|
199
|
+
inserts new `run_views` rows referencing the parent's `entries`
|
|
200
|
+
rows — no body copies, O(row-count) rather than O(body-bytes).
|
|
201
|
+
A forked child's subsequent writes diverge by creating new entries
|
|
202
|
+
at the child's scope; the parent's entries stay untouched.
|
|
203
|
+
|
|
204
|
+
### Schemes, Status & Visibility {#schemes_status_visibility}
|
|
205
|
+
|
|
206
|
+
Every entry has two independent dimensions: **status** (HTTP integer —
|
|
207
|
+
view-side) and **visibility** (what the model sees — view-side). These
|
|
208
|
+
are separate concerns.
|
|
209
|
+
|
|
210
|
+
**Status** (operation outcome): 200 (OK), 202 (proposed), 400 (bad
|
|
211
|
+
request), 403 (permission denied), 404 (not found), 409 (conflict),
|
|
212
|
+
413 (too large), 499 (aborted), 500 (error).
|
|
213
|
+
|
|
214
|
+
**Visibility** (the model's view in the run's context): `visible` (body
|
|
215
|
+
shown), `summarized` (path + attrs shown, body hidden or condensed;
|
|
216
|
+
promote via `<get>`), `archived` (invisible; retrievable via pattern
|
|
217
|
+
search).
|
|
218
|
+
|
|
219
|
+
Lifecycle events (budget Turn Demotion, fork copy) change `visibility`
|
|
220
|
+
but never `status` — status stays truthful about the last body
|
|
221
|
+
operation. See `demote_turn_entries` in `known_store.sql`.
|
|
222
|
+
|
|
223
|
+
Paths use URI scheme syntax. Bare paths (no `://`) are files, stored
|
|
224
|
+
with `scheme IS NULL` (JOINs treat NULL as `'file'` via COALESCE).
|
|
78
225
|
|
|
79
226
|
Every entry plays one of four roles:
|
|
80
227
|
|
|
81
228
|
| Role | Category | Section | Description |
|
|
82
229
|
|------|----------|---------|-------------|
|
|
83
|
-
| **Data** | `data` | `<
|
|
84
|
-
| **Logging** | `logging` | `<
|
|
230
|
+
| **Data** | `data` | `<context>` | Entries the model works with — persistent state and captured payload |
|
|
231
|
+
| **Logging** | `logging` | `<log>` | Records of what happened — tool results, lifecycle signals |
|
|
85
232
|
| **Unknowns** | `unknown` | `<unknowns>` | Open questions the model is tracking |
|
|
86
233
|
| **Prompt** | `prompt` | `<prompt>` | The task driving the loop |
|
|
87
234
|
|
|
88
235
|
`logging` is the default category. Plugins opt into `data` explicitly.
|
|
89
236
|
|
|
90
|
-
| Scheme | Category | Description |
|
|
91
|
-
|
|
92
|
-
| `NULL` (bare path) | data | File content. JOINs via `COALESCE(scheme, 'file')`.
|
|
93
|
-
| `known://` | data | Model-registered knowledge. One fact per entry. |
|
|
94
|
-
| `skill://` | data | Skill docs. Rendered in system message. |
|
|
95
|
-
| `http://`, `https://` | data | Web content. |
|
|
96
|
-
| `
|
|
97
|
-
| `
|
|
98
|
-
| `
|
|
99
|
-
| `
|
|
100
|
-
| `
|
|
101
|
-
| `
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
237
|
+
| Scheme | Category | `writable_by` | Description |
|
|
238
|
+
|--------|----------|---------------|-------------|
|
|
239
|
+
| `NULL` (bare path) | data | `model, plugin` | File content. JOINs via `COALESCE(scheme, 'file')`. |
|
|
240
|
+
| `known://` | data | `model, plugin` | Model-registered knowledge. One fact per entry. |
|
|
241
|
+
| `skill://` | data | `model, plugin` | Skill docs. Rendered in system message. |
|
|
242
|
+
| `http://`, `https://` | data | `model, plugin` | Web content. |
|
|
243
|
+
| `sh://`, `env://` | data | `model, plugin` | Streaming-producer payload — stdout/stderr channel entries from shell/env commands. **Channels only**; the action audit record lives in `log://`. See [scheme_category_split](#scheme_category_split). |
|
|
244
|
+
| `unknown://` | unknown | `model, plugin` | Unresolved questions. |
|
|
245
|
+
| `prompt://` | prompt | `plugin` | User prompt with `mode` attribute. Written by prompt plugin, never by model. |
|
|
246
|
+
| `log://` | logging | `system, plugin, model` | Unified audit record namespace for all tool actions. One entry per action at `log://turn_N/{action}/{slug}`. |
|
|
247
|
+
| `update://` | logging | `model, plugin` | Lifecycle signal. Status attr classifies terminal (200/204/422) vs continuation (102). |
|
|
248
|
+
| `error://` | logging | `model, plugin` | Runtime errors — policy rejection, budget overflow (status 413), dispatch crashes, protocol violations. Unified channel via `hooks.error.log.emit`. |
|
|
249
|
+
| `tool://` | audit | `system` | Internal plugin metadata. `model_visible = 0`. |
|
|
250
|
+
| `instructions://`, `system://`, `reasoning://`, `model://`, `user://`, `assistant://`, `content://` | audit | `system` | Audit entries. `model_visible = 0`. Written only by server-level code. |
|
|
251
|
+
|
|
252
|
+
### Scheme / Category Split {#scheme_category_split}
|
|
253
|
+
|
|
254
|
+
**Scheme determines category.** Every entry's category is looked up
|
|
255
|
+
from its scheme registration; entries of the same scheme always share a
|
|
256
|
+
category. Data and logging never share a scheme.
|
|
257
|
+
|
|
258
|
+
Streaming producers (sh, env, and future fetch/search/tail/watch) split
|
|
259
|
+
across two namespaces as a direct consequence:
|
|
260
|
+
|
|
261
|
+
- **Action audit record** lives in `log://turn_N/{action}/{slug}` —
|
|
262
|
+
scheme=`log`, category=`logging`. Renders in `<log>`.
|
|
263
|
+
- **Payload channels** live in `{action}://turn_N/{slug}_N` —
|
|
264
|
+
scheme=`{action}` (registered as `category: "data"`). Render in
|
|
265
|
+
`<context>`.
|
|
266
|
+
|
|
267
|
+
This keeps `<log>` a terse audit trail (what happened, exit code,
|
|
268
|
+
paths) while `<context>` carries the actual streamed bytes the model
|
|
269
|
+
reads. Conflating the two — e.g., writing channels under `log://...` —
|
|
270
|
+
mislabels payload as audit and pollutes the logging section with
|
|
271
|
+
multi-line command output. See [streaming_entries](#streaming_entries).
|
|
272
|
+
|
|
273
|
+
### Scheme Registry {#scheme_registry}
|
|
274
|
+
|
|
275
|
+
The `schemes` table is a bootstrap registry — rows of
|
|
276
|
+
`(name, model_visible, category, default_scope, writable_by)`.
|
|
277
|
+
Plugins register their scheme via `core.registerScheme({name, category,
|
|
278
|
+
scope, writableBy})` in the constructor. Defaults:
|
|
279
|
+
`scope = "run"`, `writableBy = ["model", "plugin"]`.
|
|
280
|
+
|
|
281
|
+
- `model_visible` — whether entries appear in `v_model_context` (`0`
|
|
282
|
+
hides audit schemes from the model).
|
|
283
|
+
- `default_scope` — `run` \| `project` \| `global`. Resolved to a
|
|
284
|
+
concrete scope string at write time (`run:N`, `project:N`, `global`).
|
|
285
|
+
Project-scoped writes require `projectId` on the call; `Entries.set`
|
|
286
|
+
throws if it's missing.
|
|
287
|
+
- `writable_by` — JSON array of allowed writer types
|
|
288
|
+
(`model` \| `plugin` \| `system` \| `client`). `Entries.set` throws
|
|
289
|
+
`PermissionError` when the caller's writer isn't in the list.
|
|
290
|
+
|
|
291
|
+
### UPSERT Semantics {#upsert_semantics}
|
|
292
|
+
|
|
293
|
+
Writes go through `Entries.set({runId, path, body, state?, visibility?,
|
|
294
|
+
attributes?, outcome?, turn?, loopId?, writer?, projectId?, ...})`
|
|
295
|
+
— two-prep flow:
|
|
296
|
+
|
|
297
|
+
1. `upsert_entry` — INSERT OR UPDATE on `(scope, path)`. Scope comes
|
|
298
|
+
from scheme's `default_scope`. Returns the `entry_id`.
|
|
299
|
+
2. `upsert_run_view` — INSERT OR UPDATE on `(run_id, entry_id)`.
|
|
300
|
+
Increments `write_count` on conflict.
|
|
301
|
+
|
|
302
|
+
Blank body is valid. Deletion uses `<rm>`, which removes the
|
|
303
|
+
`run_views` row; the shared `entries` row is left for now (GC is a
|
|
304
|
+
future concern).
|
|
114
305
|
|
|
115
306
|
---
|
|
116
307
|
|
|
117
|
-
##
|
|
308
|
+
## Relational Tables
|
|
118
309
|
|
|
119
310
|
The K/V store is the memory. Relational tables are the skeleton.
|
|
120
311
|
|
|
@@ -132,9 +323,9 @@ turns (id, run_id, loop_id, sequence, context_tokens,
|
|
|
132
323
|
created_at)
|
|
133
324
|
|
|
134
325
|
file_constraints (id, project_id, pattern, visibility, created_at)
|
|
135
|
-
-- Project-level config. NOT tool dispatch. See
|
|
326
|
+
-- Project-level config. NOT tool dispatch. See [file_constraints](#file_constraints).
|
|
136
327
|
turn_context (id, run_id, loop_id, turn, ordinal, path, scheme,
|
|
137
|
-
status,
|
|
328
|
+
status, visibility, body, tokens, attributes,
|
|
138
329
|
category, source_turn)
|
|
139
330
|
rpc_log (id, project_id, method, rpc_id, params, result, error)
|
|
140
331
|
```
|
|
@@ -146,37 +337,44 @@ name can access any run. Temperature, persona, and context_limit are per-run.
|
|
|
146
337
|
Clients can add/remove models at runtime via RPC. No default model — the
|
|
147
338
|
client picks for every run.
|
|
148
339
|
|
|
149
|
-
###
|
|
340
|
+
### Run State Machine {#run_state_machine}
|
|
150
341
|
|
|
151
|
-
All status fields are HTTP integer codes
|
|
342
|
+
All status fields are HTTP integer codes. `runs.status` transitions
|
|
343
|
+
are enforced by `trg_run_state_transition` (see initial migration):
|
|
152
344
|
|
|
153
345
|
```
|
|
154
|
-
100
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
346
|
+
100 queued → 102 running, 499 aborted
|
|
347
|
+
102 running → 200 completed, 202 proposed, 500 failed, 499 aborted
|
|
348
|
+
202 proposed → 102 running, 200 completed, 499 aborted
|
|
349
|
+
200 completed → 102 running, 499 aborted
|
|
350
|
+
500 failed → 102 running, 499 aborted
|
|
351
|
+
499 aborted → 102 running
|
|
158
352
|
```
|
|
159
353
|
|
|
160
|
-
All terminal states allow transition back to
|
|
354
|
+
All terminal states (200/500/499) allow transition back to running.
|
|
355
|
+
Runs are long-lived.
|
|
161
356
|
|
|
162
|
-
###
|
|
357
|
+
### Loops Table {#loops_table}
|
|
163
358
|
|
|
164
359
|
The loops table IS the prompt queue. Each `ask`/`act` creates a loop.
|
|
165
360
|
FIFO per run (ordered by sequence). One active at a time. Abort stops
|
|
166
361
|
the current loop; pending loops survive. Projects > runs > loops > turns.
|
|
167
362
|
|
|
168
|
-
###
|
|
363
|
+
### File Constraints {#file_constraints}
|
|
169
364
|
|
|
170
365
|
The `file_constraints` table is project-level configuration — it
|
|
171
366
|
defines which files a project cares about. This is backbone, not tool
|
|
172
|
-
dispatch. Constraints have three visibilities:
|
|
173
|
-
|
|
367
|
+
dispatch. Constraints have three visibilities:
|
|
368
|
+
|
|
369
|
+
- `active` — matching files are promoted into the run's context
|
|
370
|
+
- `readonly` — promoted but not editable by the model
|
|
371
|
+
- `ignore` — demoted (excluded from context)
|
|
174
372
|
|
|
175
373
|
**Boundary:** Setting a constraint (`File.setConstraint`) is a
|
|
176
374
|
project-config write. Promoting/demoting the matching entries is tool
|
|
177
375
|
dispatch that goes through the handler chain with budget enforcement.
|
|
178
376
|
These are separate operations: constraint persists across runs, entry
|
|
179
|
-
|
|
377
|
+
visibility is scoped to a run and subject to the same budget rules as
|
|
180
378
|
a model `<get>`.
|
|
181
379
|
|
|
182
380
|
`store` RPC manages constraints directly — it is not a model tool.
|
|
@@ -184,25 +382,23 @@ a model `<get>`.
|
|
|
184
382
|
|
|
185
383
|
---
|
|
186
384
|
|
|
187
|
-
##
|
|
385
|
+
## Entry-Driven Dispatch
|
|
188
386
|
|
|
189
|
-
###
|
|
387
|
+
### Unified API {#unified_api}
|
|
190
388
|
|
|
191
|
-
Three callers
|
|
389
|
+
Three callers share a tool vocabulary. The invocation shape is
|
|
390
|
+
per-tier; params shape is not uniform across tiers.
|
|
192
391
|
|
|
193
|
-
| Tier | Transport | Invocation
|
|
194
|
-
|
|
195
|
-
| Model | XML tags |
|
|
392
|
+
| Tier | Transport | Invocation |
|
|
393
|
+
|------|-----------|-----------|
|
|
394
|
+
| Model | XML tags | `<rm path="file.txt"/>` |
|
|
196
395
|
| Client | JSON-RPC | `{ method: "rm", params: { path: "file.txt" } }` |
|
|
197
|
-
| Plugin |
|
|
198
|
-
|
|
199
|
-
`name` (model) = `method` (client) = method name (plugin). The params
|
|
200
|
-
object is the same shape at every tier.
|
|
396
|
+
| Plugin | RummyContext verbs | `rummy.rm("file.txt")` (each verb takes what's natural — see `src/hooks/RummyContext.js`) |
|
|
201
397
|
|
|
202
398
|
| Method | Model | Client | Plugin |
|
|
203
399
|
|--------|-------|--------|--------|
|
|
204
|
-
| `get`, `set`, `rm`, `mv`, `cp`, `sh`, `env`, `search` | ✓ | ✓ | ✓ |
|
|
205
|
-
| `
|
|
400
|
+
| `think`, `get`, `set`, `rm`, `mv`, `cp`, `sh`, `env`, `search` | ✓ | ✓ | ✓ |
|
|
401
|
+
| `ask_user`, `update` | ✓ | ✓ | ✓ |
|
|
206
402
|
| `ask`, `act`, `resolve`, `abort`, `startRun` | — | ✓ | ✓ |
|
|
207
403
|
| `getRuns`, `getModels`, `getEntries` | — | ✓ | ✓ |
|
|
208
404
|
| `on()`, `filter()`, db/store access | — | — | ✓ |
|
|
@@ -210,44 +406,60 @@ object is the same shape at every tier.
|
|
|
210
406
|
Model tier restrictions enforced by unified `resolveForLoop(mode, flags)`.
|
|
211
407
|
Ask mode excludes `sh`. Flags: `noInteraction` excludes `ask_user`,
|
|
212
408
|
`noWeb` excludes `search`, `noProposals` excludes `ask_user`/`env`/`sh`.
|
|
213
|
-
|
|
214
|
-
|
|
409
|
+
11 model tools: think, get, set, env, sh, rm, cp, mv, ask_user, update,
|
|
410
|
+
search. The model writes `known` and `unknown` entries via
|
|
411
|
+
`<set path="known://...">` and `<set path="unknown://...">`; those
|
|
412
|
+
plugins don't advertise their own tag name — they render and filter.
|
|
215
413
|
Client tier requires project init. Plugin tier has no restrictions.
|
|
216
414
|
|
|
217
|
-
###
|
|
415
|
+
### Dispatch Path {#dispatch_path}
|
|
218
416
|
|
|
219
|
-
|
|
417
|
+
Each tier feeds into the shared tool handler chain, but through a
|
|
418
|
+
different entry point:
|
|
220
419
|
|
|
221
420
|
```
|
|
222
|
-
Model: XmlParser → { name, path, ... } →
|
|
223
|
-
|
|
224
|
-
|
|
421
|
+
Model: XmlParser → { name, path, ... } → TurnExecutor.#record()
|
|
422
|
+
→ hooks.tools.dispatch(scheme, entry, rummy)
|
|
423
|
+
Client: JSON-RPC → rpc.js dispatchTool(hooks, rummy, scheme, ...)
|
|
424
|
+
→ hooks.tools.dispatch(scheme, entry, rummy)
|
|
425
|
+
Plugin: rummy.set({path, body, ...}) / rummy.rm(path) / etc.
|
|
426
|
+
→ direct entries.* store calls (bypasses the handler chain)
|
|
225
427
|
```
|
|
226
428
|
|
|
429
|
+
Model and client tiers both land in `hooks.tools.dispatch`, which
|
|
430
|
+
invokes the scheme's registered handler. Model-tier additionally
|
|
431
|
+
passes through `TurnExecutor.#record()` (adds turn-scoped recording,
|
|
432
|
+
policy filtering, abort cascade). Plugin-tier convenience verbs
|
|
433
|
+
(`rummy.rm`, `rummy.set`, ...) are thin wrappers over the store — they
|
|
434
|
+
don't invoke the handler chain. Plugin code that wants full handler
|
|
435
|
+
semantics calls `hooks.tools.dispatch` directly.
|
|
436
|
+
|
|
227
437
|
**Tool dispatch:** Commands are dispatched sequentially in the order
|
|
228
438
|
the model emitted them. Each tool either succeeds (200), fails (400+),
|
|
229
439
|
or proposes (202). On failure, all remaining tools are aborted. On
|
|
230
440
|
proposal, dispatch pauses, a notification is pushed to the client
|
|
231
|
-
(same WebSocket push pattern as `run/
|
|
441
|
+
(same WebSocket push pattern as `run/state`), the client resolves
|
|
232
442
|
(accept/reject), and dispatch resumes — the proposal becomes 200 or
|
|
233
443
|
400+ like any other tool. The `ask`/`act` RPC response is only sent
|
|
234
444
|
when all tools have completed. Proposals are NOT batched — each is
|
|
235
445
|
sent and resolved inline during dispatch. The model controls tool
|
|
236
446
|
ordering; the system respects it.
|
|
237
447
|
|
|
238
|
-
If the model sends `<
|
|
239
|
-
turn failed, the
|
|
240
|
-
|
|
241
|
-
|
|
448
|
+
If the model sends `<update status="200">` (terminal) but a preceding
|
|
449
|
+
action in the same turn failed, the terminal assertion is overridden
|
|
450
|
+
to a continuation (the model's claim of doneness is false); the update
|
|
451
|
+
plugin resolves the update entry to 409 and surfaces it to the next
|
|
452
|
+
turn as a continuation. Multiple `<update>` tags → last signal wins.
|
|
242
453
|
|
|
243
|
-
**Post-dispatch budget check:** After all tools dispatch, the
|
|
244
|
-
materializes context and checks the
|
|
245
|
-
the ceiling, Turn
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
454
|
+
**Post-dispatch budget check:** After all tools dispatch, the budget
|
|
455
|
+
plugin re-materializes context and checks the ceiling
|
|
456
|
+
(`hooks.budget.postDispatch`). If context exceeds the ceiling, Turn
|
|
457
|
+
Demotion fires — all `visible` `run_views` rows for the current turn
|
|
458
|
+
have their `visibility` flipped to `summarized`, and an `error://` entry at status 413 is
|
|
459
|
+
written. Status is NOT touched (see [schemes_status_visibility](#schemes_status_visibility)). The tools already ran;
|
|
460
|
+
their outcomes are settled.
|
|
249
461
|
|
|
250
|
-
###
|
|
462
|
+
### Plugin Convention {#plugin_convention}
|
|
251
463
|
|
|
252
464
|
A plugin is an instantiated class. The class name matches the file name.
|
|
253
465
|
The constructor receives `core` (a PluginContext) — the plugin's
|
|
@@ -259,85 +471,174 @@ export default class Rm {
|
|
|
259
471
|
|
|
260
472
|
constructor(core) {
|
|
261
473
|
this.#core = core;
|
|
474
|
+
core.ensureTool();
|
|
475
|
+
core.registerScheme({ category: "logging" });
|
|
262
476
|
core.on("handler", this.handler.bind(this));
|
|
263
|
-
core.on("
|
|
477
|
+
core.on("visible", this.full.bind(this));
|
|
478
|
+
core.on("summarized", this.summary.bind(this));
|
|
264
479
|
}
|
|
265
480
|
|
|
266
481
|
async handler(entry, rummy) {
|
|
267
482
|
// rummy here is per-turn RummyContext (not the startup PluginContext)
|
|
268
483
|
}
|
|
269
484
|
|
|
270
|
-
full(entry) {
|
|
271
|
-
|
|
272
|
-
}
|
|
485
|
+
full(entry) { return `# rm ${entry.attributes.path}`; }
|
|
486
|
+
summary(entry) { return ""; }
|
|
273
487
|
}
|
|
274
488
|
```
|
|
275
489
|
|
|
490
|
+
**Registration verbs on PluginContext:**
|
|
491
|
+
- `"handler"` — tool handler (dispatches when a matching entry is recorded).
|
|
492
|
+
- `"visible"` / `"summarized"` — visibility view projections. Return the
|
|
493
|
+
projected body string for the given visibility level.
|
|
494
|
+
- Any hook name (e.g. `"turn.started"`, `"entry.created"`) — subscribes
|
|
495
|
+
to that event.
|
|
496
|
+
- `core.filter(name, callback, priority)` — subscribes to a filter chain.
|
|
497
|
+
|
|
276
498
|
**Two objects:**
|
|
277
499
|
- `this.#core` — PluginContext (startup). For registration: `on()`, `filter()`.
|
|
278
500
|
- `rummy` argument — RummyContext (per-turn). For runtime: tool verbs, queries.
|
|
279
501
|
|
|
280
502
|
**Plugin types:**
|
|
281
|
-
- **Tool plugins**: register `handler` + `
|
|
282
|
-
- **Assembly plugins**: register `core.filter("assembly.system", ...)`. Own a packet tag.
|
|
283
|
-
- **Infrastructure plugins**:
|
|
503
|
+
- **Tool plugins**: register `handler` + `visible`/`summarized`. Model-invokable.
|
|
504
|
+
- **Assembly plugins**: register `core.filter("assembly.system"|"assembly.user", ...)`. Own a packet tag.
|
|
505
|
+
- **Infrastructure plugins**: subscribe to lifecycle events
|
|
506
|
+
(`turn.started`, `turn.response`, `turn.completed`, `entry.created`,
|
|
507
|
+
`loop.started`, etc.). Background work.
|
|
284
508
|
|
|
285
509
|
A plugin can be multiple types. Known is a tool AND an assembly plugin.
|
|
286
510
|
|
|
287
|
-
###
|
|
511
|
+
### Mode Enforcement {#mode_enforcement}
|
|
512
|
+
|
|
513
|
+
Two mechanisms, operating at different layers:
|
|
514
|
+
|
|
515
|
+
1. **Tool-list exclusion** — `hooks.tools.resolveForLoop(mode, flags)`
|
|
516
|
+
computes the active tool set at loop start. Ask mode excludes `sh`.
|
|
517
|
+
Flag-driven exclusions: `noInteraction` removes `ask_user`; `noWeb`
|
|
518
|
+
removes `search`; `noProposals` removes `ask_user`/`env`/`sh`. The
|
|
519
|
+
excluded tools don't appear in the system prompt's tool list.
|
|
520
|
+
2. **Per-invocation filtering** — the `policy` plugin subscribes to
|
|
521
|
+
`entry.recording` and inspects individual emissions for ask-mode
|
|
522
|
+
violations that the tool-list alone can't catch (file-scheme `<set>`
|
|
523
|
+
edits, file `<rm>`, file-destination `<mv>`/`<cp>`). Rejects with
|
|
524
|
+
status 403 and emits `error://`. The tool remains advertised; the
|
|
525
|
+
specific invocation is blocked.
|
|
526
|
+
|
|
527
|
+
### Streaming Entries {#streaming_entries}
|
|
528
|
+
|
|
529
|
+
Producers that generate output over time (shell commands, web fetches,
|
|
530
|
+
log tails, file watches) use the streaming-entry pattern. Entry
|
|
531
|
+
lifecycle extends beyond the synchronous 202→200/400+ flow.
|
|
288
532
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
533
|
+
**Lifecycle:**
|
|
534
|
+
|
|
535
|
+
```
|
|
536
|
+
202 Proposal (user decision pending)
|
|
537
|
+
→ accept → 200 (log entry: action complete) + 102 data entries
|
|
538
|
+
→ reject → 403
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
**Entry shape for a streaming producer** — two namespaces per
|
|
542
|
+
invocation, one for the audit record, one for the payload (see
|
|
543
|
+
[scheme_category_split](#scheme_category_split)):
|
|
544
|
+
|
|
545
|
+
```
|
|
546
|
+
log://turn_N/{action}/{slug} scheme=log category=logging status=202→200
|
|
547
|
+
body: "ran 'command', exit=0, Output: {paths}"
|
|
548
|
+
(renders in <log>)
|
|
549
|
+
|
|
550
|
+
{action}://turn_N/{slug}_1 scheme={action} category=data status=102 → 200/500
|
|
551
|
+
body: primary stream (stdout for shell)
|
|
552
|
+
summary="{command}" visibility=summarized
|
|
553
|
+
(renders in <context>)
|
|
554
|
+
|
|
555
|
+
{action}://turn_N/{slug}_2 scheme={action} category=data status=102 → 200/500
|
|
556
|
+
body: alt stream (stderr for shell)
|
|
557
|
+
(renders in <context>, often empty)
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
`{action}` is the producer plugin's name (`sh`, `env`, future: `search`,
|
|
561
|
+
`fetch`, ...). The stream RPC accepts the **log-entry path** and derives
|
|
562
|
+
the data base internally via `logPathToDataBase` — see
|
|
563
|
+
[stream_plugin](#stream_plugin).
|
|
564
|
+
|
|
565
|
+
**Channel numbering follows Unix file descriptor convention.** Channel
|
|
566
|
+
1 is primary output (stdout for shell); channel 2 is alternate/error
|
|
567
|
+
output (stderr); higher numbers for additional producer-specific
|
|
568
|
+
channels. Non-process producers (search, fetch) map their streams onto
|
|
569
|
+
the same numeric space: `_1` for the primary data stream, `_2` for
|
|
570
|
+
anomalies/errors, `_3`+ for auxiliary streams.
|
|
571
|
+
|
|
572
|
+
**Status 102 ("Processing") marks an entry in mid-stream:** body is
|
|
573
|
+
partial, will change; tokens grow as chunks arrive. Agents reading a
|
|
574
|
+
102 entry use `<get>` with `line`/`limit` (including negative `line`
|
|
575
|
+
for tail) to sample without promoting full body.
|
|
576
|
+
|
|
577
|
+
**Status transition on completion** is terminal: 200 (exit_code=0 or
|
|
578
|
+
N/A for non-process producers), 500 (non-zero exit), or 499 (client
|
|
579
|
+
aborted via `stream/aborted`). The log entry is rewritten with final
|
|
580
|
+
stats (exit code, duration, channel sizes, or abort reason).
|
|
581
|
+
|
|
582
|
+
**Budget demotion preserves status.** A 102 entry demoted by Turn
|
|
583
|
+
Demotion stays at 102 — status reflects operation outcome, visibility
|
|
584
|
+
reflects visibility. See [schemes_status_visibility](#schemes_status_visibility) for the status-vs-visibility separation.
|
|
585
|
+
|
|
586
|
+
**Stream plugin ([plugin_system](#plugin_system)) owns the append and completion RPCs.** Producer
|
|
587
|
+
plugins (sh, env) create the proposal and data entries; the stream
|
|
588
|
+
plugin handles the subsequent growth and terminal transitions.
|
|
292
589
|
|
|
293
590
|
---
|
|
294
591
|
|
|
295
|
-
##
|
|
592
|
+
## Message Structure {#message_structure}
|
|
296
593
|
|
|
297
594
|
Two messages per turn. System = stable truth. User = active task.
|
|
298
595
|
|
|
299
|
-
###
|
|
596
|
+
### Packet Structure {#packet_structure}
|
|
300
597
|
|
|
301
598
|
```
|
|
302
|
-
[system]
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
(
|
|
314
|
-
</
|
|
599
|
+
[system message]
|
|
600
|
+
instructions text
|
|
601
|
+
(instructions.md base template + tool docs injected via
|
|
602
|
+
instructions.toolDocs filter; optional persona appended)
|
|
603
|
+
<context>
|
|
604
|
+
all category=data entries (knowns, files, http/https),
|
|
605
|
+
wrapped by known.js on assembly.system at priority 100
|
|
606
|
+
</context>
|
|
607
|
+
[user message]
|
|
608
|
+
<log>
|
|
609
|
+
action history — log:// entries + pre-latest prompts
|
|
610
|
+
(log.js, assembly.user priority 100)
|
|
611
|
+
</log>
|
|
315
612
|
<unknowns>
|
|
316
|
-
(open questions
|
|
613
|
+
(open questions at category=unknown, unknown.js priority 200)
|
|
317
614
|
</unknowns>
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
(
|
|
322
|
-
</
|
|
323
|
-
<
|
|
324
|
-
<prompt mode="ask|act" tools="...">user prompt</prompt>
|
|
325
|
-
[/user]
|
|
615
|
+
<instructions>
|
|
616
|
+
current phase directive — one of instructions_104.md …
|
|
617
|
+
instructions_108.md, selected by the latest <update status="1XY">
|
|
618
|
+
emission (instructions.js, assembly.user priority 250)
|
|
619
|
+
</instructions>
|
|
620
|
+
<prompt mode="ask|act" tokenUsage="N" tokensFree="M">user prompt</prompt>
|
|
326
621
|
```
|
|
327
622
|
|
|
328
|
-
**System**
|
|
329
|
-
|
|
623
|
+
**System** = stable world state the model operates within (identity,
|
|
624
|
+
tools, tool docs, reference context). Stable across turns within a
|
|
625
|
+
run, which keeps prompt caching intact. **User** = active work (what
|
|
626
|
+
the model is doing right now): history, open questions, current
|
|
627
|
+
phase, and current prompt. The phase-specific `<instructions>` block
|
|
628
|
+
lives in the user message precisely *because* it changes between
|
|
629
|
+
turns — putting it in system would invalidate the cache on every
|
|
630
|
+
phase transition.
|
|
330
631
|
|
|
331
632
|
The `<prompt>` tag is present on every turn — first turn and
|
|
332
633
|
continuations alike. The model always sees its task. The active prompt
|
|
333
634
|
is extracted from its chronological position and placed last for maximum
|
|
334
|
-
recency. `<
|
|
335
|
-
|
|
635
|
+
recency. The `<prompt>` element carries `tokenUsage` / `tokensFree`
|
|
636
|
+
attributes so the model can do budget arithmetic in-line with the cause.
|
|
336
637
|
|
|
337
|
-
###
|
|
638
|
+
### Loops, Previous, and Performed {#loops_previous_performed}
|
|
338
639
|
|
|
339
640
|
A **loop** is one `ask` or `act` invocation and all its continuation
|
|
340
|
-
turns until
|
|
641
|
+
turns until `<update status="200">`, fail, or abort.
|
|
341
642
|
|
|
342
643
|
**Previous** = all completed loops on this run. The user prompt, model
|
|
343
644
|
responses, tool results, agent warnings — the full chronicle in order.
|
|
@@ -353,11 +654,11 @@ When a new prompt arrives on an existing run, the prior loop's
|
|
|
353
654
|
`<performed>` content plus its prompt move to `<previous>`. When a loop
|
|
354
655
|
continues (next turn), new results append to `<performed>`.
|
|
355
656
|
|
|
356
|
-
###
|
|
657
|
+
### Key Entries {#key_entries}
|
|
357
658
|
|
|
358
659
|
| Path | Lifetime | Body | Attributes |
|
|
359
660
|
|------|----------|------|-----------|
|
|
360
|
-
| `instructions://system` | One per run (mutable) | Empty (projection builds from
|
|
661
|
+
| `instructions://system` | One per run (mutable) | Empty (projection builds from `instructions.md` + tool docs + optional persona) | `{ persona, toolSet }` |
|
|
361
662
|
| `system://N` | Audit, one per turn | Full assembled system message | — |
|
|
362
663
|
| `user://N` | Audit, one per turn | Full assembled user message | — |
|
|
363
664
|
| `assistant://N` | Audit, one per turn | Model's raw response | — |
|
|
@@ -367,97 +668,176 @@ framework auto-populates `toolDescriptions` from tool registrations
|
|
|
367
668
|
that include `docs`. The instructions projection assembles the final
|
|
368
669
|
text from body + attributes.
|
|
369
670
|
|
|
370
|
-
###
|
|
671
|
+
### Materialization {#materialization}
|
|
371
672
|
|
|
372
673
|
Each turn:
|
|
373
674
|
|
|
374
|
-
1. Write `instructions://system` (empty body, attributes = { persona })
|
|
675
|
+
1. Write `instructions://system` (empty body, attributes = { persona, toolSet })
|
|
375
676
|
2. Emit `turn.started` — plugins write prompt/instructions entries
|
|
376
|
-
3.
|
|
377
|
-
4. Query `v_model_context` VIEW → visible entries
|
|
378
|
-
|
|
677
|
+
3. Resolve the instructions system prompt (`hooks.instructions.resolveSystemPrompt`)
|
|
678
|
+
4. Query `v_model_context` VIEW → visible entries (joined from
|
|
679
|
+
`run_views` + `entries` + `schemes`)
|
|
680
|
+
5. Project each entry through its scheme's `visible`/`summarized` projection
|
|
379
681
|
6. Insert projected rows into `turn_context`
|
|
380
682
|
7. Invoke `assembly.system` filter chain (instructions text as base):
|
|
381
|
-
- Known plugin (priority 100) → `<
|
|
683
|
+
- Known plugin (priority 100) → `<knowns>` section
|
|
382
684
|
- Previous plugin (priority 200) → `<previous>` section
|
|
383
|
-
- Unknown plugin (priority 300) → `<unknowns>` section
|
|
384
685
|
8. Invoke `assembly.user` filter chain (empty string as base):
|
|
385
686
|
- Performed plugin (priority 100) → `<performed>` section
|
|
386
|
-
-
|
|
387
|
-
- Prompt plugin (priority 300) → `<prompt>`
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
- `
|
|
393
|
-
- `
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
687
|
+
- Unknown plugin (priority 200) → `<unknowns>` section
|
|
688
|
+
- Prompt plugin (priority 300) → `<prompt>` element (carries
|
|
689
|
+
`tokenUsage` / `tokensFree` attrs when `contextSize` is set)
|
|
690
|
+
9. Store as `system://N` and `user://N` audit entries (telemetry plugin)
|
|
691
|
+
|
|
692
|
+
The VIEW determines visibility from `visibility` and `status`:
|
|
693
|
+
- `visibility = 'visible'` → full body visible in `<knowns>` / `<performed>`.
|
|
694
|
+
- `visibility = 'summarized'` → summarized projection visible (typically path +
|
|
695
|
+
summary attr). Promote with `<get>` to expand.
|
|
696
|
+
- `visibility = 'archived'` → invisible. Discoverable via pattern search
|
|
697
|
+
(`<get path="known://*">keyword</get>`); promote to bring back into view.
|
|
698
|
+
- `status = 202` → invisible (proposed, pending client resolution).
|
|
699
|
+
- `model_visible = 0` → invisible (audit schemes: instructions, system,
|
|
700
|
+
reasoning, model, user, assistant, content, tool).
|
|
701
|
+
|
|
702
|
+
**Partial read:** `<get path="..." line="N" limit="M"/>` returns lines N
|
|
703
|
+
through N+M−1 of the entry body as the log item without changing
|
|
704
|
+
visibility or promoting the entry to context. Use after reading a
|
|
705
|
+
demoted entry (which shows path + summary) to target a specific slice.
|
|
706
|
+
Single-path only — glob or body filter with `line`/`limit` is a 400 error.
|
|
707
|
+
|
|
708
|
+
Model controls visibility via `<set>` attributes:
|
|
709
|
+
`visibility="archived|summarized|visible"`. The `summary="..."` attribute
|
|
710
|
+
attaches a description (≤ 80 chars) that persists across visibility
|
|
711
|
+
changes.
|
|
712
|
+
|
|
713
|
+
### Token Accounting {#token_accounting}
|
|
714
|
+
|
|
715
|
+
Tokens are a property of the materialized packet, not of stored entries.
|
|
716
|
+
They are computed during assembly, exposed on the materialization records,
|
|
717
|
+
and consumed by the budget plugin for the model-facing `<budget>` table.
|
|
718
|
+
Nothing else in the system has its own opinion of "what an entry costs."
|
|
719
|
+
|
|
720
|
+
**Per-entry materialization records** carry three token measures:
|
|
721
|
+
|
|
722
|
+
| Field | Meaning |
|
|
723
|
+
|---|---|
|
|
724
|
+
| `vTokens` | Wire cost when the entry is fully visible. The body rendered through the scheme's `visible` view, wrapped in its envelope tag, tokenized. |
|
|
725
|
+
| `sTokens` | Wire cost when the entry is summarized. The body rendered through the scheme's `summarized` view (typically a projection or 500-char preview), wrapped in its envelope tag, tokenized. |
|
|
726
|
+
| `aTokens` | `vTokens − sTokens`. The promotion premium — the marginal cost of the entry being visible rather than summarized. The only token measure exposed to the model on per-entry tags. |
|
|
727
|
+
|
|
728
|
+
The model sees `tokens="N"` on each entry tag. That `N` is `aTokens`. It
|
|
729
|
+
means: *demoting this entry frees `N` tokens; promoting this entry from
|
|
730
|
+
summarized to visible costs `N` tokens.* The number is a pure lever — no
|
|
731
|
+
body-vs-wire ambiguity, no envelope overhead surprise.
|
|
732
|
+
|
|
733
|
+
**Floor and premium.** A run's packet decomposes into:
|
|
734
|
+
|
|
735
|
+
- **Summarized floor** = sum of `sTokens` for all non-archived entries.
|
|
736
|
+
Paid regardless of any visibility decision the model can make. Includes
|
|
737
|
+
the per-entry projection cost for every entry that's either `visible`
|
|
738
|
+
(since visible entries also pay their projection-cost-equivalent within
|
|
739
|
+
vTokens) or `summarized`.
|
|
740
|
+
- **Visibility premium** = sum of `aTokens` for currently-visible entries.
|
|
741
|
+
The active cost of visibility decisions. The model's lever.
|
|
742
|
+
- **System overhead** = system prompt + tool definition tokens. Constant
|
|
743
|
+
per turn, not addressable by the model.
|
|
744
|
+
|
|
745
|
+
`tokenUsage = floor + premium + system`. `tokensFree = ceiling − tokenUsage`.
|
|
746
|
+
|
|
747
|
+
**`<budget>` rendered shape** (between `<instructions>` and `<prompt>`,
|
|
748
|
+
priority 275):
|
|
409
749
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
§4.6).
|
|
417
|
-
|
|
418
|
-
**Write-layer gate:** BudgetGuard on KnownStore gates every write
|
|
419
|
-
during dispatch. `upsert()`, `promoteByPattern()`, and
|
|
420
|
-
`updateBodyByPattern()` check token delta against remaining headroom.
|
|
421
|
-
Exceeding the budget throws `BudgetExceeded` — the tool 413s, the
|
|
422
|
-
guard trips, and all subsequent tools in the turn fail.
|
|
423
|
-
|
|
424
|
-
BudgetGuard ceiling = `floor(contextSize × 0.9) − 500`. The 500-token
|
|
425
|
-
buffer below the enforce ceiling absorbs two sources of overhead that
|
|
426
|
-
BudgetGuard cannot see: (a) `#record()`-phase writes that bypass the
|
|
427
|
-
guard (~15 tokens per command), and (b) loop transition overhead —
|
|
428
|
-
when a loop completes and a new one starts, entries shift from
|
|
429
|
-
`<performed>` to `<previous>` format, adding ~200–300 tokens to the
|
|
430
|
-
next assembly. Without this buffer, the base context can accumulate
|
|
431
|
-
to exactly the enforce ceiling, making it impossible for the panic
|
|
432
|
-
loop to start (panic prompt + loop overhead > ceiling).
|
|
433
|
-
|
|
434
|
-
**Exemptions:** `status >= 400` entries (error results), `model_visible
|
|
435
|
-
= 0` entries (audit), `fidelity = "archive"` entries (not in context).
|
|
436
|
-
|
|
437
|
-
**Size gate:** Known entries exceeding 500 tokens are rejected with
|
|
438
|
-
413, forcing atomic entries.
|
|
439
|
-
|
|
440
|
-
**Advisory warnings** (progress plugin):
|
|
441
|
-
- 50%: "You may free space by lowering the fidelity of entries"
|
|
442
|
-
- 75%: "YOU MUST free space... or the run will fail"
|
|
750
|
+
```
|
|
751
|
+
<budget tokenUsage="N" tokensFree="M">
|
|
752
|
+
| scheme | visible | tokens | % |
|
|
753
|
+
|---|---|---|---|
|
|
754
|
+
| <scheme> | <count> | <sum-of-aTokens> | <%-of-ceiling> |
|
|
755
|
+
... rows for visible-scheme breakdown, sorted desc by tokens ...
|
|
443
756
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
757
|
+
Summarized: <count> entries, <sum-of-sTokens> tokens (<%>% of budget).
|
|
758
|
+
System: <token-count> tokens (<%>% of budget).
|
|
759
|
+
Total: <visible-count> visible + <summarized-count> summarized entries; tokenUsage <N> / ceiling <C>. <M> tokens free.
|
|
760
|
+
</budget>
|
|
761
|
+
```
|
|
448
762
|
|
|
449
|
-
**
|
|
763
|
+
**Why the table only contains visible scheme rows.** The `tokens` column
|
|
764
|
+
in the table is `aTokens` — the action lever. Per-entry visibility of
|
|
765
|
+
summarized entries is intentionally not surfaced; surgical pruning of
|
|
766
|
+
individual high-signal summaries is the wrong action shape. The
|
|
767
|
+
summarized aggregate line below the table is the only signal for that
|
|
768
|
+
class — actionable via glob (`<set path="known://oldsession/*"
|
|
769
|
+
visibility="archived"/>`), not per-entry.
|
|
770
|
+
|
|
771
|
+
**Where the math is computed.** Materialization (the assembly path
|
|
772
|
+
through `materializeContext.js` and `ContextAssembler.js` plus per-scheme
|
|
773
|
+
view handlers) renders each entry's visible and summarized projections,
|
|
774
|
+
wraps them in their envelope, and tokenizes both. The resulting per-entry
|
|
775
|
+
record carries `vTokens`/`sTokens`/`aTokens` alongside the projected
|
|
776
|
+
text. The budget plugin's `assembleBudget` filter consumes this; no other
|
|
777
|
+
caller measures tokens.
|
|
778
|
+
|
|
779
|
+
**Body-size gates** (e.g. `known.js` MAX_ENTRY_TOKENS) compute
|
|
780
|
+
`countTokens(body)` inline at write time. They check intrinsic body
|
|
781
|
+
size, not wire cost — the materialization record doesn't yet exist when
|
|
782
|
+
an entry is being written.
|
|
783
|
+
|
|
784
|
+
### Budget Enforcement {#budget_enforcement}
|
|
450
785
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
786
|
+
The model owns its context. The system enforces a hard ceiling and
|
|
787
|
+
surfaces the numbers — it does not automatically manage entries.
|
|
788
|
+
|
|
789
|
+
**Ceiling.** `ceiling = floor(contextSize × RUMMY_BUDGET_CEILING)`
|
|
790
|
+
(default `RUMMY_BUDGET_CEILING = 0.9`, i.e. 10% headroom). All budget
|
|
791
|
+
decisions compare `assembledTokens` against `ceiling`, never against
|
|
792
|
+
`contextSize` directly.
|
|
793
|
+
|
|
794
|
+
**Pre-LLM enforce** (`hooks.budget.enforce`, in TurnExecutor before
|
|
795
|
+
the LLM call). Measures the assembled messages (using
|
|
796
|
+
`turns.context_tokens` from the prior turn when available,
|
|
797
|
+
`countTokens(messages)` as a first-turn estimate).
|
|
798
|
+
|
|
799
|
+
- `assembledTokens ≤ ceiling` → return 200, proceed to LLM.
|
|
800
|
+
- `assembledTokens > ceiling` on the first turn of a loop → **Prompt
|
|
801
|
+
Demotion**: demote the incoming `prompt://N` entry to `visibility =
|
|
802
|
+
demoted`, re-materialize, re-check. If the retry fits, proceed.
|
|
803
|
+
- `assembledTokens > ceiling` on a non-first turn, or still over after
|
|
804
|
+
Prompt Demotion → return 413. AgentLoop exits the loop with 413.
|
|
805
|
+
|
|
806
|
+
**Post-dispatch Turn Demotion** (`hooks.budget.postDispatch`, after
|
|
807
|
+
all tool dispatches complete). Re-materializes end-of-turn context
|
|
808
|
+
and re-checks. If still over the ceiling, flips every `run_views` row
|
|
809
|
+
for this turn from `visibility = visible` to `visibility = summarized`
|
|
810
|
+
(status preserved — see [schemes_status_visibility](#schemes_status_visibility))
|
|
811
|
+
and emits a 413 error via `hooks.error.log.emit` with the descriptive
|
|
812
|
+
body (what was demoted, the 50% rule for the next turn). The model
|
|
813
|
+
sees the `error://` entry next turn and adjusts.
|
|
814
|
+
|
|
815
|
+
**LLM-reported context exceeded.** If the LLM rejects the request
|
|
816
|
+
with a "context too long" error (detected via the regex in
|
|
817
|
+
`src/llm/errors.js`), the LlmProvider raises `ContextExceededError`
|
|
818
|
+
which TurnExecutor catches and emits a 413 error through the same
|
|
819
|
+
channel.
|
|
820
|
+
|
|
821
|
+
**Known-scheme size gate** (in the `known` plugin). Writes to
|
|
822
|
+
`known://` entries exceeding `RUMMY_MAX_ENTRY_TOKENS` (default 512)
|
|
823
|
+
are rejected at the handler with an instructive error message. Forces
|
|
824
|
+
atomic entries instead of dumping transcripts into a single `known://`.
|
|
825
|
+
|
|
826
|
+
**Advisory feedback.** The model reads `tokensFree` / `tokenUsage`
|
|
827
|
+
attributes on `<budget>` every turn and self-regulates. The full
|
|
828
|
+
breakdown (per-scheme visible cost, summarized aggregate, system
|
|
829
|
+
overhead) lives in the same tag — see [token_accounting](#token_accounting)
|
|
830
|
+
for the rendered shape and the contract for what each number means.
|
|
831
|
+
No threshold-based warnings. When the ceiling is actually breached the
|
|
832
|
+
413 `error://` entry is the feedback.
|
|
456
833
|
|
|
457
|
-
`
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
834
|
+
**Token math:** `Math.ceil(text.length / RUMMY_TOKEN_DIVISOR)`. One
|
|
835
|
+
formula, one file (`src/agent/tokens.js`), env-configurable. No
|
|
836
|
+
external dependencies. All costs surfaced to the model and the budget
|
|
837
|
+
guard come through materialization (see [token_accounting](#token_accounting));
|
|
838
|
+
the budget guard's pre-LLM check uses the actual API tokens
|
|
839
|
+
(`turns.context_tokens` from the prior turn) when available, falling
|
|
840
|
+
back to the materialized packet estimate on turn 1.
|
|
461
841
|
|
|
462
842
|
**`context_tokens` vs `prompt_tokens` in step telemetry:**
|
|
463
843
|
- `context_tokens` in the step JSON = `turns.context_tokens` for that turn =
|
|
@@ -469,92 +849,14 @@ These two will diverge rapidly on any multi-turn run. A run at turn 50 might sho
|
|
|
469
849
|
`context_tokens: 8000` (context under control) and `prompt_tokens: 400000`
|
|
470
850
|
(total input tokens billed across the whole run). They are measuring orthogonal things.
|
|
471
851
|
|
|
472
|
-
### 4.6 Panic Mode
|
|
473
|
-
|
|
474
|
-
**The invariant.** A panic is only ever triggered because the
|
|
475
|
-
assembled context was under the ceiling — and the new prompt pushed
|
|
476
|
-
it over. The existing context fit; the incoming prompt did not.
|
|
477
|
-
Panic mode replaces that too-large incoming prompt with a small
|
|
478
|
-
panic prompt on the same context. Therefore: the first turn of a
|
|
479
|
-
panic loop cannot 413. If it does, it is a bug.
|
|
480
|
-
|
|
481
|
-
**Trigger.** `TurnExecutor.execute()` assembles the full packet
|
|
482
|
-
(context + incoming prompt) before calling the LLM. If
|
|
483
|
-
`assembledTokens > contextSize`, it returns 413 without calling
|
|
484
|
-
the LLM. `#drainQueue` intercepts this and enters panic mode.
|
|
485
|
-
|
|
486
|
-
**Flow.**
|
|
487
|
-
1. Complete the failed loop with status 413 (audit trail).
|
|
488
|
-
2. Enqueue a panic loop (`mode = "panic"`, `noRepo = true`,
|
|
489
|
-
`prompt = panicPrompt`, `panicTarget` in config).
|
|
490
|
-
3. Re-enqueue the original loop with `panicAttempted: true` in
|
|
491
|
-
its config JSON. This flag persists across drain cycles.
|
|
492
|
-
4. `continue` — the drain loop claims the panic loop next.
|
|
493
|
-
|
|
494
|
-
After panic completes (model freed enough space), the retry loop
|
|
495
|
-
runs. If the retry also 413s, hard-fail to client. One panic
|
|
496
|
-
attempt per drain cycle — `panicAttempted` is checked both as a
|
|
497
|
-
local variable and on the re-enqueued loop's config.
|
|
498
|
-
|
|
499
|
-
**Panic target.** The model must compress context to below:
|
|
500
|
-
|
|
501
|
-
```
|
|
502
|
-
panicTarget = MIN(contextSize × 0.75, contextSize − incomingTokens) − cushion
|
|
503
|
-
```
|
|
504
|
-
|
|
505
|
-
`incomingTokens` is the raw token count of the original prompt.
|
|
506
|
-
`cushion` is a small safety margin (500 tokens) to absorb
|
|
507
|
-
materialization overhead. The target is expressed in materialized
|
|
508
|
-
token units — the same unit the system uses to measure completion
|
|
509
|
-
(see Token Math below).
|
|
510
|
-
|
|
511
|
-
**Two token contexts.**
|
|
512
|
-
|
|
513
|
-
The model reasons in *per-entry SQL tokens* — the token counts
|
|
514
|
-
visible in `<knowns>` entries. These are the granular unit the model
|
|
515
|
-
uses to decide which entries to target: "this entry is 200 tokens;
|
|
516
|
-
if I archive it, I save 200 tokens."
|
|
517
|
-
|
|
518
|
-
The system makes decisions using *actual API tokens* —
|
|
519
|
-
`turns.context_tokens` back-filled from `usage.input_tokens` after
|
|
520
|
-
each LLM call. SQL token sums do not equal actual API counts because
|
|
521
|
-
projections, assembly overhead, and fidelity transforms alter the
|
|
522
|
-
output; and the SQL estimate (`ceil(chars / DIVISOR)`) can be 3–7×
|
|
523
|
-
off for structured content. **Never use SQL token sums for ceiling or
|
|
524
|
-
budget decisions.** See §4.5 Token Measures for the full breakdown.
|
|
525
|
-
|
|
526
|
-
**Strike system.** After each panic turn, compare
|
|
527
|
-
`result.assembledTokens` (materialized) with `_lastPanicTokens`
|
|
528
|
-
(previous turn's materialized total):
|
|
529
|
-
- Decreased → reset strike counter to 0.
|
|
530
|
-
- Same or increased → increment strikes.
|
|
531
|
-
- 3 consecutive strikes → return 413 to `#drainQueue` → hard-fail.
|
|
532
|
-
|
|
533
|
-
Progress (any reduction) resets the counter. The model has
|
|
534
|
-
unlimited turns as long as it makes progress.
|
|
535
|
-
|
|
536
|
-
**Panic success.** After each turn, if `result.assembledTokens
|
|
537
|
-
<= panicTarget`, the panic loop exits with 200. The retry loop
|
|
538
|
-
then runs with the original prompt on the now-compressed context.
|
|
539
|
-
|
|
540
|
-
**Tool set.** `resolveForLoop("panic")` includes: get, set, known,
|
|
541
|
-
unknown, rm, mv, cp, summarize, update. Excludes: sh, env, search,
|
|
542
|
-
ask_user. `noRepo: true` — no file scanning during panic.
|
|
543
|
-
|
|
544
|
-
**What the model sees.** Turn 1 receives the panic prompt from
|
|
545
|
-
`budget.panicPrompt()`: the assembled token count, the target, and
|
|
546
|
-
the exact number of tokens to free. Turn 2+ receives a continuation
|
|
547
|
-
prompt. The model uses `<set fidelity="archive">`, `<mv
|
|
548
|
-
fidelity="summary">`, and similar fidelity operations to free space,
|
|
549
|
-
concluding with `<summarize>` when done or `<update>` while working.
|
|
550
852
|
|
|
551
853
|
---
|
|
552
854
|
|
|
553
|
-
##
|
|
855
|
+
## RPC Protocol
|
|
554
856
|
|
|
555
857
|
JSON-RPC 2.0 over WebSocket. `discover` returns the live catalog.
|
|
556
858
|
|
|
557
|
-
###
|
|
859
|
+
### Methods {#rpc_methods}
|
|
558
860
|
|
|
559
861
|
#### Protocol
|
|
560
862
|
|
|
@@ -607,6 +909,25 @@ be added explicitly by the client).
|
|
|
607
909
|
`noInteraction` removes `ask_user` from the tool list.
|
|
608
910
|
`noWeb` removes `search` from the tool list.
|
|
609
911
|
|
|
912
|
+
#### Streaming (see [streaming_entries](#streaming_entries))
|
|
913
|
+
|
|
914
|
+
| Method | Params |
|
|
915
|
+
|--------|--------|
|
|
916
|
+
| `stream` | `{ run, path, channel, chunk }` |
|
|
917
|
+
| `stream/completed` | `{ run, path, exit_code?, duration? }` |
|
|
918
|
+
| `stream/aborted` | `{ run, path, reason?, duration? }` |
|
|
919
|
+
| `stream/cancel` | `{ run, path, reason? }` |
|
|
920
|
+
|
|
921
|
+
Producer-agnostic RPC for streaming output into data entries created by
|
|
922
|
+
any plugin (sh/env today; search/fetch/watch as future consumers). The
|
|
923
|
+
`stream` method appends `chunk` to `{path}_{channel}`; `stream/completed`
|
|
924
|
+
transitions all `{path}_*` channels to terminal status (200/500) and
|
|
925
|
+
finalizes the log entry body; `stream/aborted` is the client-initiated
|
|
926
|
+
cancellation counterpart, transitioning channels to **499** (Client
|
|
927
|
+
Closed Request); `stream/cancel` is the server-initiated counterpart
|
|
928
|
+
(transitions to 499 and pushes `stream/cancelled` notification to
|
|
929
|
+
connected clients). `stream/cancel` also handles stale 102 cleanup.
|
|
930
|
+
|
|
610
931
|
#### Queries
|
|
611
932
|
|
|
612
933
|
| Method | Params |
|
|
@@ -628,28 +949,65 @@ be added explicitly by the client).
|
|
|
628
949
|
Skills loaded from `RUMMY_HOME/skills/{name}.md`. Personas from
|
|
629
950
|
`RUMMY_HOME/personas/{name}.md`.
|
|
630
951
|
|
|
631
|
-
###
|
|
952
|
+
### Notifications {#notifications}
|
|
953
|
+
|
|
954
|
+
| Notification | Scoped by | Purpose |
|
|
955
|
+
|-------------|-----------|---------|
|
|
956
|
+
| `rummy/hello` | connection | Server greeting on client connect. Carries `rummyVersion` (semver). Clients check MAJOR and refuse on mismatch. |
|
|
957
|
+
| `run/state` | projectId | Turn state snapshot (status, history, unknowns, telemetry). Fires per command dispatch (incremental 102), at turn conclusion (verdict status), and at terminal run close. |
|
|
958
|
+
| `run/progress` | projectId | Transient turn activity (`thinking` / `processing` / `retrying`). |
|
|
959
|
+
| `run/proposal` | projectId | A 202 entry is awaiting resolution. |
|
|
960
|
+
| `stream/cancelled` | projectId | Server-initiated streaming cancellation. |
|
|
961
|
+
| `ui/render` | projectId | Streaming UI output (e.g. tool progress). |
|
|
962
|
+
| `ui/notify` | projectId | Toast notification. |
|
|
963
|
+
|
|
964
|
+
**`run/state` payload shape** — the unified contract for both the
|
|
965
|
+
notification and `getRun` RPC:
|
|
966
|
+
|
|
967
|
+
```jsonc
|
|
968
|
+
{
|
|
969
|
+
"run": "gemma_1234567890",
|
|
970
|
+
"turn": 4,
|
|
971
|
+
"status": 102, // numeric HTTP status
|
|
972
|
+
"summary": "…", // latest <update status="200"> body, or ""
|
|
973
|
+
"history": [ // chronological per-entry log
|
|
974
|
+
{
|
|
975
|
+
"tool": "set",
|
|
976
|
+
"path": "known://president/current",
|
|
977
|
+
"status": 200,
|
|
978
|
+
"body": "Donald Trump is the 47th president…",
|
|
979
|
+
"turn": 4,
|
|
980
|
+
"attributes": "{\"summary\":\"president,current,trump\",\"visibility\":\"visible\"}"
|
|
981
|
+
}
|
|
982
|
+
],
|
|
983
|
+
"unknowns": [{ "path": "unknown://…", "body": "…" }],
|
|
984
|
+
"telemetry": null | { /* final end-of-turn usage; null on mid-turn emissions */ }
|
|
985
|
+
}
|
|
986
|
+
```
|
|
987
|
+
|
|
988
|
+
`history` includes every entry the model has touched this run in
|
|
989
|
+
timeline order — prompt entries, unknowns, tool results. `attributes`
|
|
990
|
+
is raw JSON; parse client-side. Mid-turn emissions have `telemetry:
|
|
991
|
+
null`; the final emission of each turn includes the full telemetry
|
|
992
|
+
block (token usage, context distribution, cost).
|
|
632
993
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
| `run/progress` | projectId |
|
|
637
|
-
| `ui/render` | projectId |
|
|
638
|
-
| `ui/notify` | projectId |
|
|
994
|
+
`stream/cancelled` payload: `{ run, path, reason }`. Server has
|
|
995
|
+
already transitioned the entries to 499 (`Client Closed Request`);
|
|
996
|
+
client should stop sending `stream` chunks for that path.
|
|
639
997
|
|
|
640
|
-
###
|
|
998
|
+
### Resolution {#resolution}
|
|
641
999
|
|
|
642
1000
|
| Resolution | Model signal | Outcome |
|
|
643
1001
|
|-----------|-------------|---------|
|
|
644
1002
|
| reject | any | `completed` — rejection stops the bus |
|
|
645
|
-
| accept | `<update>` | `running` — model has more work |
|
|
646
|
-
| accept | `<
|
|
1003
|
+
| accept | `<update status="102">` | `running` — model has more work |
|
|
1004
|
+
| accept | `<update status="200|204|422">` | `completed` — terminal |
|
|
647
1005
|
| accept | neither | `running` — healer decides |
|
|
648
1006
|
| error | any | `running` — error state, model retries |
|
|
649
1007
|
|
|
650
1008
|
---
|
|
651
1009
|
|
|
652
|
-
##
|
|
1010
|
+
## Plugin System {#plugin_system}
|
|
653
1011
|
|
|
654
1012
|
See [PLUGINS.md](PLUGINS.md) for the full plugin development guide,
|
|
655
1013
|
including the RummyContext API, tool registration, handler chains,
|
|
@@ -659,7 +1017,7 @@ Each plugin has its own README at `src/plugins/{name}/README.md`.
|
|
|
659
1017
|
|
|
660
1018
|
---
|
|
661
1019
|
|
|
662
|
-
##
|
|
1020
|
+
## Tool Documentation Design {#tool_documentation}
|
|
663
1021
|
|
|
664
1022
|
Tool docs are the most carefully designed text in rummy. Every line
|
|
665
1023
|
simultaneously teaches syntax, implies workflow priority, demonstrates
|
|
@@ -674,12 +1032,13 @@ simple to powerful — weak models learn from examples 1-2, strong models
|
|
|
674
1032
|
pick up the pattern from example 3.
|
|
675
1033
|
|
|
676
1034
|
**Lifecycle continuity.** Examples weave stories across tools. The get
|
|
677
|
-
docs
|
|
678
|
-
|
|
679
|
-
`<
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
1035
|
+
docs demonstrate `<get path="known://*">keyword</get>` for pattern recall
|
|
1036
|
+
and `<get path="..." line="N" limit="M"/>` for partial reads that don't
|
|
1037
|
+
promote. The known docs reference `<get path="known://*">keyword</get>`
|
|
1038
|
+
for recall. The unknown docs reference `<set path="unknown://..."
|
|
1039
|
+
visibility="archived"/>` for retiring resolved questions, `<get/>` for
|
|
1040
|
+
investigation. A model reading the full tool docs encounters a coherent
|
|
1041
|
+
workflow: discover → load → reason → edit → archive → recall.
|
|
683
1042
|
|
|
684
1043
|
**RFC 2119 semantics.** Constraint bullets use YOU MUST, YOU MUST NOT,
|
|
685
1044
|
YOU SHOULD, YOU MAY from RFC 2119. Every LLM has extensive pretraining
|
|
@@ -732,7 +1091,7 @@ are universal — not a feature of any single tool.
|
|
|
732
1091
|
|
|
733
1092
|
---
|
|
734
1093
|
|
|
735
|
-
##
|
|
1094
|
+
## Hedberg Editing Syntax {#hedberg}
|
|
736
1095
|
|
|
737
1096
|
The model picks its preferred edit format. The parser understands all of them:
|
|
738
1097
|
|
|
@@ -747,7 +1106,7 @@ The model picks its preferred edit format. The parser understands all of them:
|
|
|
747
1106
|
|
|
748
1107
|
---
|
|
749
1108
|
|
|
750
|
-
##
|
|
1109
|
+
## Response Healing {#response_healing}
|
|
751
1110
|
|
|
752
1111
|
The server never throws on model output. "Model behavior" is never an
|
|
753
1112
|
acceptable explanation. Recovery order:
|
|
@@ -757,15 +1116,16 @@ acceptable explanation. Recovery order:
|
|
|
757
1116
|
3. Did our structure cause this? Check formatting, prompts.
|
|
758
1117
|
|
|
759
1118
|
Termination protocol:
|
|
760
|
-
- `<
|
|
761
|
-
- `<
|
|
762
|
-
|
|
763
|
-
-
|
|
764
|
-
-
|
|
765
|
-
-
|
|
766
|
-
-
|
|
1119
|
+
- `<update status="200|204|422">` → run terminates
|
|
1120
|
+
- `<update status="200">` + failed actions → overridden to continuation
|
|
1121
|
+
(the claim of doneness is refuted by the failures)
|
|
1122
|
+
- `<update status="102">` → run continues
|
|
1123
|
+
- Multiple `<update>` → last one wins
|
|
1124
|
+
- No `<update>` + investigation tools → stall counter (RUMMY_MAX_STALLS)
|
|
1125
|
+
- No `<update>` + action-only tools → healer infers terminal from body
|
|
1126
|
+
- No `<update>` + plain text → healer infers terminal from body
|
|
767
1127
|
- Repeated commands → cycle detection (RUMMY_MIN_CYCLES, RUMMY_MAX_CYCLE_PERIOD)
|
|
768
|
-
- Repeated update text → stall (RUMMY_MAX_UPDATE_REPEATS)
|
|
1128
|
+
- Repeated update text without non-update work → stall (RUMMY_MAX_UPDATE_REPEATS)
|
|
769
1129
|
|
|
770
1130
|
Format normalization:
|
|
771
1131
|
- Gemma `\`\`\`tool_code` fences → stripped before parsing
|
|
@@ -776,7 +1136,7 @@ Format normalization:
|
|
|
776
1136
|
|
|
777
1137
|
---
|
|
778
1138
|
|
|
779
|
-
##
|
|
1139
|
+
## Testing
|
|
780
1140
|
|
|
781
1141
|
| Tier | Location | LLM? |
|
|
782
1142
|
|------|----------|------|
|
|
@@ -788,9 +1148,66 @@ Format normalization:
|
|
|
788
1148
|
E2E tests must NEVER mock the LLM. Environment cascade:
|
|
789
1149
|
`.env.example` → `.env` → `.env.test`. Always use `npm run test:*`.
|
|
790
1150
|
|
|
1151
|
+
### Spec-Anchored Testing
|
|
1152
|
+
|
|
1153
|
+
Integration and e2e tests MUST be anchored to SPEC.md's snake_case
|
|
1154
|
+
anchor system. The rule is bidirectional:
|
|
1155
|
+
|
|
1156
|
+
1. **Every SPEC.md heading with a `{#snake_case_id}` anchor has at
|
|
1157
|
+
least one integration or e2e test that references it.** The
|
|
1158
|
+
reference is literal: an `@snake_case_id` token appearing in the
|
|
1159
|
+
test file (suite name, test name, or comment). A heading without
|
|
1160
|
+
a test reference is a spec with no verified guarantee.
|
|
1161
|
+
2. **Every integration or e2e test is attributed to at least one
|
|
1162
|
+
`@`-reference.** A test describing behavior that isn't in SPEC
|
|
1163
|
+
either adds the behavior to SPEC or isn't under the integration
|
|
1164
|
+
/ e2e tiers.
|
|
1165
|
+
|
|
1166
|
+
Enforcement: `npm run test:spec` parses SPEC.md's `{#id}` anchors
|
|
1167
|
+
and greps `test/integration/` + `test/e2e/` for `@id` references.
|
|
1168
|
+
Missing references fail the script. The check runs in CI and blocks
|
|
1169
|
+
merges.
|
|
1170
|
+
|
|
1171
|
+
Unit tests (`src/**/*.test.js`) are exempt — they verify
|
|
1172
|
+
implementation details, not spec guarantees.
|
|
1173
|
+
|
|
1174
|
+
**Why snake_case, not numeric `§X.Y`:** slugs are stable identifiers
|
|
1175
|
+
independent of section ordering. Numbering required a rewrite of
|
|
1176
|
+
every test reference whenever SPEC.md reorganized. Slugs never
|
|
1177
|
+
churn — rename a section's text, leave the anchor, no tests break.
|
|
1178
|
+
|
|
1179
|
+
**Anchor naming rules:**
|
|
1180
|
+
- Lowercase `[a-z0-9_]`, underscores for word separation.
|
|
1181
|
+
- Unique across the whole document.
|
|
1182
|
+
- Stable once published: treat as a permanent identifier; renames
|
|
1183
|
+
are a breaking change requiring a test sweep.
|
|
1184
|
+
- Short and semantic (`entries`, not `section_0_1_the_entry_contract`).
|
|
1185
|
+
|
|
1186
|
+
**When a section doesn't get an anchor:** umbrella sections (parents
|
|
1187
|
+
of testable subsections, like "The Contract" or "RPC Protocol") and
|
|
1188
|
+
pure-documentation sections (env var listings, debugging procedures,
|
|
1189
|
+
this section itself) stay as plain headings. The anchor *implies
|
|
1190
|
+
testability* — if there's nothing observable to verify, adding an
|
|
1191
|
+
anchor creates a permanent false obligation.
|
|
1192
|
+
|
|
1193
|
+
**PLUGINS.md and `src/plugins/*/README.md`** participate in the
|
|
1194
|
+
same coverage gate as SPEC.md. `npm run test:spec` scans all three
|
|
1195
|
+
sources for `{#snake_case_id}` anchors and requires each one to
|
|
1196
|
+
have an integration or e2e test that references it. Anchors must
|
|
1197
|
+
be unique across the whole doc set — the script errors on
|
|
1198
|
+
collision. Conventional prefixes keep namespaces clean: SPEC uses
|
|
1199
|
+
bare slugs (`entries`, `primitives`), PLUGINS uses `plugins_*`,
|
|
1200
|
+
plugin READMEs use `<plugin>_plugin`.
|
|
1201
|
+
|
|
1202
|
+
**Untestable plugin docs (LLM providers, quickstart tutorials,
|
|
1203
|
+
loader-level behavior verified only in `test/live/`)** stay as
|
|
1204
|
+
plain headings without anchors. Anchors are a commitment to
|
|
1205
|
+
verification; skipping the anchor is the honest declaration that
|
|
1206
|
+
no integration test exists or is feasible.
|
|
1207
|
+
|
|
791
1208
|
---
|
|
792
1209
|
|
|
793
|
-
##
|
|
1210
|
+
## SQL Functions {#sql_functions}
|
|
794
1211
|
|
|
795
1212
|
| Function | Purpose |
|
|
796
1213
|
|----------|---------|
|
|
@@ -805,7 +1222,7 @@ See [PLUGINS.md](PLUGINS.md) for the hedberg pattern type reference.
|
|
|
805
1222
|
|
|
806
1223
|
---
|
|
807
1224
|
|
|
808
|
-
##
|
|
1225
|
+
## Debugging: E2E and Benchmark Results
|
|
809
1226
|
|
|
810
1227
|
### E2E test failures
|
|
811
1228
|
|
|
@@ -826,10 +1243,11 @@ The dump format is: `scheme:state path {attributes}\n body (120 chars)` grouped
|
|
|
826
1243
|
|
|
827
1244
|
Key things to look for in a dump:
|
|
828
1245
|
- **202**: unresolved proposals — model issued `<sh>`, `<rm>`, or `<mv>` that needs approval
|
|
829
|
-
- **413**: budget overflow — assembled context exceeded ceiling
|
|
830
|
-
- **
|
|
831
|
-
-
|
|
832
|
-
-
|
|
1246
|
+
- **413**: budget overflow — assembled context exceeded ceiling (see [budget_enforcement](#budget_enforcement))
|
|
1247
|
+
- **403**: policy rejection (ask-mode file writes) or permission denial (writer ∉ `writable_by`)
|
|
1248
|
+
- **`error://` entries at status 413**: Turn Demotion fired — model received a directive to demote promotions next turn
|
|
1249
|
+
- **`error://` entries at other statuses**: runtime errors (422 parser warnings, 429 cycle detection, 403 policy rejections, 500 dispatch crashes)
|
|
1250
|
+
- **`<sh>` in ask mode**: the policy plugin rejected it; check for the corresponding `error://` entry
|
|
833
1251
|
|
|
834
1252
|
### MAB benchmark
|
|
835
1253
|
|
|
@@ -853,20 +1271,79 @@ Run with: `npm run test:lme`
|
|
|
853
1271
|
|
|
854
1272
|
---
|
|
855
1273
|
|
|
856
|
-
##
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
RUMMY_DEBUG
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
1274
|
+
## Configuration
|
|
1275
|
+
|
|
1276
|
+
Full reference is `.env.example` — these are the load-bearing vars.
|
|
1277
|
+
|
|
1278
|
+
**Runtime:**
|
|
1279
|
+
|
|
1280
|
+
| Var | Default | Purpose |
|
|
1281
|
+
|-----|---------|---------|
|
|
1282
|
+
| `PORT` | 3044 | WebSocket port |
|
|
1283
|
+
| `RUMMY_HOME` | `~/.rummy` | Skills, personas, local config |
|
|
1284
|
+
| `RUMMY_DB_PATH` | `rummy.db` | SQLite path |
|
|
1285
|
+
| `RUMMY_MMAP_MB` | 0 | SQLite mmap hint (MB; 0 disables) |
|
|
1286
|
+
| `RUMMY_DEBUG` | false | Verbose logging |
|
|
1287
|
+
|
|
1288
|
+
**Budget & token math:**
|
|
1289
|
+
|
|
1290
|
+
| Var | Default | Purpose |
|
|
1291
|
+
|-----|---------|---------|
|
|
1292
|
+
| `RUMMY_BUDGET_CEILING` | 0.9 | Fraction of `contextSize` used as ceiling |
|
|
1293
|
+
| `RUMMY_MAX_ENTRY_TOKENS` | 512 | `known://` write rejection threshold |
|
|
1294
|
+
| `RUMMY_TOKEN_DIVISOR` | 2 | `ceil(chars/N)` token estimate divisor |
|
|
1295
|
+
|
|
1296
|
+
**Loop controls:**
|
|
1297
|
+
|
|
1298
|
+
| Var | Default | Purpose |
|
|
1299
|
+
|-----|---------|---------|
|
|
1300
|
+
| `RUMMY_MAX_TURNS` | 15 | Hard loop iteration cap |
|
|
1301
|
+
| `RUMMY_MAX_COMMANDS` | 99 | Max parsed tool calls per turn |
|
|
1302
|
+
| `RUMMY_MAX_STALLS` | 3 | Turns without `<update>` before force-complete |
|
|
1303
|
+
| `RUMMY_MAX_UPDATE_REPEATS` | 3 | Same-text repeat threshold without progress |
|
|
1304
|
+
| `RUMMY_MIN_CYCLES` | 3 | Consecutive repetitions to trigger cycle detection |
|
|
1305
|
+
| `RUMMY_MAX_CYCLE_PERIOD` | 4 | Max cycle period checked by healer |
|
|
1306
|
+
| `RUMMY_RETENTION_DAYS` | 31 | Days of completed/aborted runs kept |
|
|
1307
|
+
| `RUMMY_THINK` | 1 | Enable `<think>` tag reasoning |
|
|
1308
|
+
| `RUMMY_TEMPERATURE` | 0.5 | Default LLM temperature |
|
|
1309
|
+
| `RUMMY_RPC_TIMEOUT` | 30000 | RPC timeout (ms) |
|
|
1310
|
+
| `RUMMY_FETCH_TIMEOUT` | 300000 | LLM HTTP timeout (ms) |
|
|
1311
|
+
|
|
1312
|
+
**LLM providers** (plugin-scoped; a provider with no config is inert):
|
|
1313
|
+
|
|
1314
|
+
| Var | Purpose |
|
|
1315
|
+
|-----|---------|
|
|
1316
|
+
| `OPENROUTER_BASE_URL` / `OPENROUTER_API_KEY` | OpenRouter |
|
|
1317
|
+
| `OPENAI_BASE_URL` / `OPENAI_API_KEY` | OpenAI-compatible (llama.cpp, OpenAI, etc.) |
|
|
1318
|
+
| `OLLAMA_BASE_URL` | Ollama |
|
|
1319
|
+
| `XAI_BASE_URL` / `XAI_API_KEY` | xAI |
|
|
1320
|
+
| `RUMMY_HTTP_REFERER` / `RUMMY_X_TITLE` | OpenRouter attribution headers |
|
|
1321
|
+
|
|
1322
|
+
**Model aliases:**
|
|
1323
|
+
|
|
1324
|
+
`RUMMY_MODEL_{alias}={provider/model}` or `{provider/publisher/model}` —
|
|
1325
|
+
seeded into `models` table at startup. First path segment picks the
|
|
1326
|
+
provider plugin; the rest is the provider's own model identifier. E.g.
|
|
1327
|
+
`RUMMY_MODEL_gpt4=openai/gpt-4`, `RUMMY_MODEL_claude=openrouter/anthropic/claude-3-opus`.
|
|
1328
|
+
Optional companion: `RUMMY_CONTEXT_{alias}={tokens}` overrides the
|
|
1329
|
+
auto-discovered context length.
|
|
1330
|
+
|
|
1331
|
+
**External plugins:**
|
|
1332
|
+
|
|
1333
|
+
`RUMMY_PLUGIN_{name}={path or npm package}` loads an external plugin
|
|
1334
|
+
at startup. Absolute path or published package name (resolved via
|
|
1335
|
+
local `node_modules` then global).
|
|
1336
|
+
|
|
1337
|
+
**Search:**
|
|
1338
|
+
|
|
1339
|
+
| Var | Purpose |
|
|
1340
|
+
|-----|---------|
|
|
1341
|
+
| `RUMMY_SEARCH` | `brave` \| `searxng` |
|
|
1342
|
+
| `BRAVE_API_KEY` | Brave Search API key |
|
|
1343
|
+
| `RUMMY_SEARXNG_URL` | SearXNG instance URL |
|
|
1344
|
+
|
|
1345
|
+
**Testing:**
|
|
1346
|
+
|
|
1347
|
+
| Var | Purpose |
|
|
1348
|
+
|-----|---------|
|
|
1349
|
+
| `RUMMY_TEST_MODEL` | Model alias used by test/live/e2e runners |
|