@possumtech/rummy 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.env.example +12 -7
  2. package/BENCH_ENVIRONMENT.md +230 -0
  3. package/CLIENT_INTERFACE.md +396 -0
  4. package/PLUGINS.md +93 -1
  5. package/SPEC.md +305 -28
  6. package/bin/postinstall.js +2 -2
  7. package/bin/rummy.js +2 -2
  8. package/last_run.txt +5617 -0
  9. package/migrations/001_initial_schema.sql +2 -1
  10. package/package.json +6 -2
  11. package/scriptify/cache_probe.js +66 -0
  12. package/scriptify/cache_probe_grok.js +74 -0
  13. package/service.js +22 -11
  14. package/src/agent/AgentLoop.js +33 -139
  15. package/src/agent/ContextAssembler.js +2 -9
  16. package/src/agent/Entries.js +36 -101
  17. package/src/agent/ProjectAgent.js +2 -9
  18. package/src/agent/TurnExecutor.js +45 -83
  19. package/src/agent/XmlParser.js +247 -273
  20. package/src/agent/budget.js +5 -28
  21. package/src/agent/config.js +38 -0
  22. package/src/agent/errors.js +7 -13
  23. package/src/agent/httpStatus.js +1 -19
  24. package/src/agent/known_store.sql +7 -2
  25. package/src/agent/materializeContext.js +12 -17
  26. package/src/agent/pathEncode.js +5 -0
  27. package/src/agent/rummyHome.js +9 -0
  28. package/src/agent/runs.sql +18 -0
  29. package/src/agent/tokens.js +2 -8
  30. package/src/hooks/HookRegistry.js +1 -16
  31. package/src/hooks/Hooks.js +8 -33
  32. package/src/hooks/PluginContext.js +3 -21
  33. package/src/hooks/RpcRegistry.js +1 -4
  34. package/src/hooks/RummyContext.js +2 -16
  35. package/src/hooks/ToolRegistry.js +5 -15
  36. package/src/llm/LlmProvider.js +28 -23
  37. package/src/llm/errors.js +41 -4
  38. package/src/llm/openaiStream.js +125 -0
  39. package/src/llm/retry.js +61 -15
  40. package/src/plugins/budget/budget.js +14 -81
  41. package/src/plugins/cli/README.md +87 -0
  42. package/src/plugins/cli/bin.js +61 -0
  43. package/src/plugins/cli/cli.js +120 -0
  44. package/src/plugins/env/README.md +2 -1
  45. package/src/plugins/env/env.js +4 -6
  46. package/src/plugins/env/envDoc.md +2 -2
  47. package/src/plugins/error/error.js +23 -23
  48. package/src/plugins/file/file.js +2 -22
  49. package/src/plugins/get/get.js +12 -34
  50. package/src/plugins/get/getDoc.md +5 -3
  51. package/src/plugins/hedberg/edits.js +1 -11
  52. package/src/plugins/hedberg/hedberg.js +3 -26
  53. package/src/plugins/hedberg/normalize.js +1 -5
  54. package/src/plugins/hedberg/patterns.js +4 -15
  55. package/src/plugins/hedberg/sed.js +1 -7
  56. package/src/plugins/helpers.js +28 -20
  57. package/src/plugins/index.js +25 -41
  58. package/src/plugins/instructions/README.md +18 -0
  59. package/src/plugins/instructions/instructions.js +13 -76
  60. package/src/plugins/instructions/instructions.md +19 -18
  61. package/src/plugins/instructions/instructions_104.md +5 -4
  62. package/src/plugins/instructions/instructions_105.md +16 -15
  63. package/src/plugins/instructions/instructions_106.md +15 -14
  64. package/src/plugins/instructions/instructions_107.md +13 -6
  65. package/src/plugins/known/README.md +26 -6
  66. package/src/plugins/known/known.js +36 -34
  67. package/src/plugins/log/README.md +2 -2
  68. package/src/plugins/log/log.js +6 -33
  69. package/src/plugins/ollama/ollama.js +50 -66
  70. package/src/plugins/openai/openai.js +26 -44
  71. package/src/plugins/openrouter/openrouter.js +28 -52
  72. package/src/plugins/policy/README.md +8 -2
  73. package/src/plugins/policy/policy.js +8 -21
  74. package/src/plugins/prompt/README.md +22 -0
  75. package/src/plugins/prompt/prompt.js +8 -16
  76. package/src/plugins/rm/rm.js +5 -2
  77. package/src/plugins/rm/rmDoc.md +4 -4
  78. package/src/plugins/rpc/README.md +2 -1
  79. package/src/plugins/rpc/rpc.js +51 -47
  80. package/src/plugins/set/README.md +5 -1
  81. package/src/plugins/set/set.js +23 -33
  82. package/src/plugins/set/setDoc.md +1 -1
  83. package/src/plugins/sh/README.md +2 -1
  84. package/src/plugins/sh/sh.js +5 -11
  85. package/src/plugins/sh/shDoc.md +2 -2
  86. package/src/plugins/stream/README.md +6 -5
  87. package/src/plugins/stream/stream.js +6 -35
  88. package/src/plugins/telemetry/telemetry.js +26 -19
  89. package/src/plugins/think/think.js +4 -7
  90. package/src/plugins/unknown/unknown.js +8 -13
  91. package/src/plugins/update/update.js +36 -35
  92. package/src/plugins/update/updateDoc.md +3 -3
  93. package/src/plugins/xai/xai.js +30 -20
  94. package/src/plugins/yolo/yolo.js +8 -41
  95. package/src/server/ClientConnection.js +17 -47
  96. package/src/server/SocketServer.js +14 -14
  97. package/src/server/protocol.js +1 -10
  98. package/src/sql/functions/slugify.js +5 -7
  99. package/src/sql/v_model_context.sql +4 -11
  100. package/turns/cli_1777462658211/turn_001.txt +772 -0
  101. package/turns/cli_1777462658211/turn_002.txt +606 -0
  102. package/turns/cli_1777462658211/turn_003.txt +667 -0
  103. package/turns/cli_1777462658211/turn_004.txt +297 -0
  104. package/turns/cli_1777462658211/turn_005.txt +301 -0
  105. package/turns/cli_1777462658211/turn_006.txt +262 -0
  106. package/turns/cli_1777465095132/turn_001.txt +715 -0
  107. package/turns/cli_1777465095132/turn_002.txt +236 -0
  108. package/turns/cli_1777465095132/turn_003.txt +287 -0
  109. package/turns/cli_1777465095132/turn_004.txt +694 -0
  110. package/turns/cli_1777465095132/turn_005.txt +422 -0
  111. package/turns/cli_1777465095132/turn_006.txt +365 -0
  112. package/turns/cli_1777465095132/turn_007.txt +885 -0
  113. package/turns/cli_1777465095132/turn_008.txt +1277 -0
  114. package/turns/cli_1777465095132/turn_009.txt +736 -0
package/.env.example CHANGED
@@ -6,7 +6,7 @@
6
6
  # (environment) > ~/.rummy/.env > ~/.rummy/.env.example
7
7
 
8
8
  # Service Configuration
9
- PORT=3044
9
+ RUMMY_PORT=3044
10
10
 
11
11
  # Absolute path, no ~
12
12
  # RUMMY_HOME=/home/ubuntu/.rummy
@@ -15,19 +15,18 @@ RUMMY_DB_PATH=rummy.db
15
15
  # SQLite mmap size in MB
16
16
  RUMMY_MMAP_MB=0
17
17
 
18
- # Agent Loop Limits
19
- RUMMY_MAX_TURNS=99
18
+ # Agent Loop Limits — per-loop cap (turns within a single loop).
19
+ # No per-run cap; a run can comprise many loops.
20
+ RUMMY_MAX_LOOP_TURNS=99
20
21
  # Hard cap on commands per turn — high by design. The real cost
21
22
  # ceiling is the Token Budget; per-tool rate limits (e.g.
22
23
  # RUMMY_WEB_SEARCH_MAX) bound the expensive tools individually.
23
24
  RUMMY_MAX_COMMANDS=99
24
25
  # Per-turn cap on <search>. Refusals strike via 429.
25
26
  RUMMY_WEB_SEARCH_MAX=1
26
- RUMMY_MAX_STALLS=3
27
27
  RUMMY_MAX_STRIKES=3
28
28
  RUMMY_MIN_CYCLES=3
29
29
  RUMMY_MAX_CYCLE_PERIOD=4
30
- RUMMY_MAX_UPDATE_REPEATS=3
31
30
 
32
31
  # Hygiene
33
32
  # Days to keep completed/aborted runs before purging
@@ -39,13 +38,19 @@ RUMMY_FETCH_TIMEOUT=300000
39
38
  # Test harness — how long AuditClient waits for a single ask/act to reach
40
39
  # terminal status. Sized for full-context ingest on large-window models.
41
40
  RUMMY_TEST_RUN_TIMEOUT=3600000
41
+ # rummy-cli watchdog — wall-clock budget for a one-shot CLI invocation.
42
+ # Overridable per invocation via --RUMMY_RUN_TIMEOUT=<ms>.
43
+ RUMMY_RUN_TIMEOUT=3600000
44
+
45
+ # Plugin module load watchdog.
46
+ RUMMY_PLUGINS_LOAD_TIMEOUT=10000
42
47
 
43
48
  # LLM retry policy: time-bounded exponential backoff with full jitter.
44
49
  # DEADLINE is total wall-clock budget for an LLM call across all retries.
45
50
  # MAX_BACKOFF caps each inter-attempt sleep so a long deadline doesn't
46
51
  # yield 10-minute waits between attempts.
47
- RUMMY_LLM_DEADLINE_MS=600000
48
- RUMMY_LLM_MAX_BACKOFF_MS=30000
52
+ RUMMY_LLM_DEADLINE=600000
53
+ RUMMY_LLM_MAX_BACKOFF=30000
49
54
 
50
55
  # Debug
51
56
  # RUMMY_DEBUG=true
@@ -0,0 +1,230 @@
1
+ # Bench Environment
2
+
3
+ Hardware and software inventory for local-model rummy runs. Captured
4
+ from live system probes; values to cite verbatim in any benchmark
5
+ writeup. **Do not paraphrase.** Re-probe before publishing if the
6
+ machine has been touched.
7
+
8
+ Last verified: 2026-04-30.
9
+
10
+ ---
11
+
12
+ ## Hardware
13
+
14
+ | | |
15
+ |---|---|
16
+ | GPU | **NVIDIA GeForce RTX 5070 Ti** (16 GB VRAM, GB203 Blackwell die) |
17
+ | GPU driver | 595.71.05 (kernel module + userspace, matched as of 2026-04-29 module reload) |
18
+ | Integrated GPU | Intel Arrow Lake-S iGPU (not used for inference) |
19
+ | CPU | Intel Core Ultra 9 285 |
20
+ | Cores | 24 (logical) |
21
+ | RAM | 32 GB |
22
+
23
+ **Source of truth:** `lspci | grep -E "vga\|3d"`, `cat /proc/driver/nvidia/version`,
24
+ `grep model /proc/cpuinfo`, `nproc`, `grep MemTotal /proc/meminfo`.
25
+
26
+ ---
27
+
28
+ ## OS / kernel
29
+
30
+ | | |
31
+ |---|---|
32
+ | Distro | Debian 13 (trixie) |
33
+ | Kernel | 6.12.74+deb13+1-amd64 |
34
+ | GCC | 14.2.0 |
35
+
36
+ ---
37
+
38
+ ## Inference engine
39
+
40
+ | | |
41
+ |---|---|
42
+ | Server | `llama-server` (llama.cpp) |
43
+ | Build | `b199-82209ef` (per `/props.build_info`) |
44
+ | Local endpoint | `http://127.0.0.1:11435` (OpenAI-compatible) |
45
+ | Public endpoint | `https://gemma.possumtech.com` (OpenAI-compatible; Cloudflare-fronted, SSL terminated on hawkbit AWS box, proxied via SSH reverse tunnel `hawkbit:5172 → hyzen:11435`. Toggleable via `systemctl --user disable --now gemma.service` on hyzen.) |
46
+ | n_ctx | **32768** (runtime; model supports up to 262144) |
47
+ | Binary | `/home/hyzen/repo/llama-mainline/build-fast/bin/llama-server` (custom rebuild — see "Build flags" below) |
48
+ | Slots | 1 |
49
+ | Default sampler | temperature 0.0, top_k 64, top_p 1.0, min_p 0.05 |
50
+ | `n_predict` default | -1 (unbounded — fills remaining context) |
51
+ | `reasoning_format` | none (model treated as content-only) |
52
+
53
+ ---
54
+
55
+ ## Loaded model
56
+
57
+ | | |
58
+ |---|---|
59
+ | Filename | `macher.gguf` (local rename) |
60
+ | Path | `/home/hyzen/repo/turbo/models/gemma/macher.gguf` |
61
+ | File size | 13917726528 bytes (12.95 GiB / 13.92 GB) |
62
+ | `general.name` | **Gemma 4 26B A4B It** |
63
+ | `general.architecture` | gemma4 |
64
+ | `general.basename` | gemma-4 |
65
+ | `general.size_label` | 26B-A4B |
66
+ | `general.finetune` | it (instruction-tuned) |
67
+ | `general.license` | apache-2.0 |
68
+ | `general.file_type` | 30 → **IQ4_XS** at 4.41 BPW (confirmed by `llama-server` load output: `print_info: file type = IQ4_XS - 4.25 bpw`; not Q3_K_XL despite filename hints in older `.env` templates) |
69
+ | `general.quantization_version` | 2 |
70
+ | Architecture: `expert_count` | 128 |
71
+ | Architecture: `expert_used_count` | 8 (MoE — ~4B params active per token despite 26B total) |
72
+ | Architecture: `block_count` | 30 |
73
+ | Architecture: `embedding_length` | 2816 |
74
+ | Architecture: `attention.head_count` | 16 |
75
+ | Architecture: `feed_forward_length` | 2112 |
76
+ | Native `context_length` | 262144 (256K) |
77
+
78
+ **Note on quantization:** `general.file_type=30` maps to **IQ4_XS** in
79
+ current llama.cpp (the file is mradermacher's imatrix quant of
80
+ `google/gemma-4-26B-A4B-it`). The load output confirms this directly:
81
+ `print_info: file type = IQ4_XS - 4.25 bpw`, `file size = 12.95 GiB
82
+ (4.41 BPW)`. The earlier `Q3_K_XL` reference in `.env` templates was
83
+ for a different file (`gemma-4-26B-A4B-it-UD-Q3_K_XL.gguf`) that has
84
+ since been deleted from disk. Tensor breakdown from load: 392× F32,
85
+ 1× Q6_K, 60× IQ4_NL, 205× IQ4_XS.
86
+
87
+ **Note on chat template:** the GGUF was rewritten in-place on
88
+ 2026-04-29 via `gguf_new_metadata.py` to embed the Apr-28 upstream
89
+ official Google chat template (commit `4c55b528` of
90
+ `google/gemma-4-26B-A4B-it`), which fixes SI / tool-call handling.
91
+ Tensor data is byte-identical to the original mradermacher download;
92
+ only `tokenizer.chat_template` changed (12045 → 16934 bytes), so
93
+ file size grew by 4864 bytes. The `--chat-template-file` runtime
94
+ flag is no longer needed and has been removed from ExecStart.
95
+
96
+ ---
97
+
98
+ ## Sampling parameters used by rummy
99
+
100
+ Rummy's `openai` plugin (`src/plugins/openai/openai.js`) constructs
101
+ its request body as `{ model, messages, think: true }`, optionally
102
+ adding `temperature` if the caller passed one. **No `max_tokens`,
103
+ no `stop`** — server defaults apply.
104
+
105
+ The plugin then sends the request through the shared streaming
106
+ client at `src/llm/openaiStream.js`, which spreads that body and
107
+ adds `stream: true` and `stream_options: { include_usage: true }`.
108
+ So the actual wire body is:
109
+ `{ model, messages, think: true, [temperature], stream: true, stream_options: {include_usage:true} }`.
110
+
111
+ Streaming is required, not optional: a non-streaming hold can
112
+ exceed the Cloudflare-fronted edge's idle-timeout when the model
113
+ spends seconds on extended reasoning before emitting visible
114
+ content. The streaming wrapper exists specifically to keep bytes
115
+ flowing through the proxy.
116
+
117
+ `n_predict: -1` is in force (server default), so output can still
118
+ grow until it hits the context limit and gets truncated. Under
119
+ streaming, that truncation now manifests as a stalled / late-EOS
120
+ stream rather than the all-at-once mid-token cutoff observed in
121
+ the regex-log gemma run on 2026-04-29.
122
+
123
+ ---
124
+
125
+ ## Build flags (custom llama.cpp rebuild)
126
+
127
+ The `llama-server` binary is a local rebuild with non-default flags
128
+ that materially affect performance on Blackwell sm_120. Stock builds
129
+ will produce slower numbers — readers reproducing should match these
130
+ flags or note their stock-build numbers as such.
131
+
132
+ | Flag | Setting | Why it matters |
133
+ |---|---|---|
134
+ | `CMAKE_CUDA_ARCHITECTURES` | `120` | Blackwell-targeted kernels |
135
+ | `GGML_CUDA_FORCE_MMQ` | `ON` | Forces MMQ kernels for low-bit quants (default OFF) |
136
+ | `GGML_CUDA_FA_ALL_QUANTS` | `ON` | Enables Flash Attention path for q8_0 KV cache (default OFF; without it, q8 KV falls back to a slow generic path) |
137
+ | `GGML_CUDA_F16` | `ON` | fp16 intermediates |
138
+ | `GGML_NATIVE` | `ON` | Native CPU arch tuning |
139
+ | `CMAKE_BUILD_TYPE` | `Release` | |
140
+
141
+ Source tree: `/home/hyzen/repo/llama-mainline` at commit `82209ef`.
142
+ Build dir: `build-fast/`. Binary RUNPATH is baked to that absolute
143
+ path; do not rename the directory.
144
+
145
+ ---
146
+
147
+ ## Service-level operational settings
148
+
149
+ | | |
150
+ |---|---|
151
+ | systemd unit | `/etc/systemd/system/llama.service` |
152
+ | `MemoryHigh` | 12 GB (host RAM soft cap) |
153
+ | `MemoryMax` | 16 GB (host RAM hard cap; OOM-kill if exceeded) |
154
+ | `MemorySwapMax` | 0 (process is forbidden from touching swap) |
155
+ | `Restart` | `always`, `RestartSec=3` |
156
+ | Daily restart timer | `llama-restart.timer` at 04:00 EDT ±30 min via `systemctl try-restart` |
157
+
158
+ KV cache quantization is q8_0 (both K and V). Flash Attention is
159
+ enabled. Gemma 4 sliding-window attention keeps KV at ~500 MiB
160
+ even at 32k context (5 of 30 layers full-context, 25 SWA-capped;
161
+ SWA window = 1024 tokens; pattern is full-context every 6th layer
162
+ per `gemma4.attention.sliding_window_pattern`).
163
+
164
+ Full ExecStart (for cite-verbatim purposes):
165
+
166
+ ```
167
+ /home/hyzen/repo/llama-mainline/build-fast/bin/llama-server \
168
+ --model /home/hyzen/repo/turbo/models/gemma/macher.gguf \
169
+ --ctx-size 32768 --parallel 1 \
170
+ -fa on -ctk q8_0 -ctv q8_0 \
171
+ -ngl 999 -b 1024 -ub 512 \
172
+ -t 12 -tb 24 \
173
+ --host 127.0.0.1 --port 11435 \
174
+ --jinja --reasoning-budget 4096 \
175
+ --cache-ram 4096 --cache-reuse 256 \
176
+ --temp 0 --top-p 1.0 --repeat-penalty 1.0
177
+ ```
178
+
179
+ `--cache-ram 4096 --cache-reuse 256` enables the 4 GiB host-RAM
180
+ prompt cache; first-token latency on warm cache is ~10× faster
181
+ than cold. `--reasoning-budget 4096` caps the thinking phase at
182
+ 4096 tokens before forcing the model into the answer phase.
183
+
184
+ ---
185
+
186
+ ## Measured single-stream baseline (this config)
187
+
188
+ Captured from steady-state probes on 2026-04-29 (after warmup).
189
+ Carried forward as of 2026-04-30: the subsequent changes
190
+ (`--reasoning-budget`, `--cache-ram`, `--cache-reuse` added; chat
191
+ template metadata rewritten in-place) do not touch tensor data,
192
+ attention path, or sampler chain, so generation throughput is
193
+ unaffected. Re-probe before publishing.
194
+
195
+ | Metric | Value |
196
+ |---|---|
197
+ | Generation throughput | **~168 tokens/sec** at 32k ctx (~187 t/s at 16k ctx with FP16 KV) |
198
+ | Per-token latency | ~5.95 ms/token |
199
+ | Prompt eval, small prompts (warm cache) | ~900 t/s |
200
+ | Prompt eval, large prompts (10k+ tokens) | ~5,600 t/s |
201
+ | Time-to-first-token, 10k-token prompt | ~1.9 s |
202
+ | VRAM at idle after model load | ~14.6 / 15.84 GB |
203
+ | Theoretical bandwidth ceiling | 421 t/s (4B active × 4.25 bpw / 896 GB/s) |
204
+ | Observed efficiency vs ceiling | ~40% |
205
+
206
+ Sampling is deterministic at temp 0; numbers above are reproducible
207
+ to <0.5% across trials within the same llama-server lifetime.
208
+
209
+ ---
210
+
211
+ ## How to re-probe
212
+
213
+ ```bash
214
+ # llama-server runtime
215
+ curl -s http://127.0.0.1:11435/props | python3 -m json.tool | head -60
216
+
217
+ # GGUF metadata
218
+ # (parser script in this repo; or use `gguf-dump` if installed)
219
+
220
+ # GPU
221
+ lspci | grep -iE "vga|3d|display"
222
+ cat /proc/driver/nvidia/version
223
+
224
+ # CPU / RAM / OS
225
+ grep -m1 "model name" /proc/cpuinfo
226
+ nproc
227
+ grep MemTotal /proc/meminfo
228
+ grep PRETTY_NAME /etc/os-release
229
+ uname -a
230
+ ```
@@ -0,0 +1,396 @@
1
+ # CLIENT_INTERFACE
2
+
3
+ Wire-protocol contract for any client that drives a rummy server (nvim,
4
+ CLI, tbench harness, future GUIs). Pulse + query model: the server
5
+ emits a content-free `run/changed` notification when entries land;
6
+ clients reconcile against the entry store on demand.
7
+
8
+ The entry store is the only source of truth for run progress. The
9
+ server tells the client *that* something changed — never *what*.
10
+
11
+ ---
12
+
13
+ ## TL;DR
14
+
15
+ 1. Connect a JSON-RPC websocket; `rummy/hello` to register the project.
16
+ 2. `set run://...` (or omit alias) to start a run; you receive `{ alias }`.
17
+ 3. Subscribe to **`run/changed`** pulses (notification from server).
18
+ 4. On each pulse, call **`getEntries(run, { since, pattern })`** to fetch
19
+ what's new. The reply is a flat list of insertion-ordered entries.
20
+ Pass `withBody: true` if you want the body inline (otherwise omit and
21
+ pull bodies via `getRun` or per-row).
22
+ 5. Track the highest `id` you've seen per run; pass it as `since` next pulse.
23
+ 6. Resolve any `state: "proposed"` entries by writing back via `set`
24
+ with `state: "resolved" | "cancelled" | "failed"`.
25
+ 7. Drive UI from the entry stream + `runs.status`; the run is complete
26
+ when its row reaches a terminal status (200/204/413/422/499/500).
27
+
28
+ No typed payloads. No "render this widget" hints. The store is the
29
+ narrative; the client decides what to show.
30
+
31
+ ---
32
+
33
+ ## 1. Connection & handshake
34
+
35
+ WebSocket JSON-RPC 2.0. Default port `3044`. Send:
36
+
37
+ ```json
38
+ { "jsonrpc": "2.0", "method": "rummy/hello", "params": {
39
+ "name": "my-client", "projectRoot": "/abs/path",
40
+ "clientVersion": "2.0.0"
41
+ }, "id": 1 }
42
+ ```
43
+
44
+ Reply: `{ rummyVersion, projectId, projectRoot }`. The server enforces
45
+ **MAJOR-version match** between client and server protocol versions and
46
+ rejects on mismatch.
47
+
48
+ After `rummy/hello`, every subsequent RPC carries the project context
49
+ implicitly — the server knows which project this socket belongs to.
50
+
51
+ ---
52
+
53
+ ## 2. Starting a run
54
+
55
+ ```json
56
+ { "method": "set", "params": {
57
+ "path": "run://",
58
+ "body": "Write a brief OC_RIVERS.md ...",
59
+ "attributes": { "model": "fast", "mode": "act", "yolo": false }
60
+ } }
61
+ ```
62
+
63
+ The server returns `{ alias }` immediately and kicks off the run async.
64
+ `mode` is `"ask"` or `"act"`. `yolo: true` opts out of client proposal
65
+ resolution (server auto-accepts everything and materializes file edits
66
+ to disk).
67
+
68
+ Aliases are formatted `<modelAlias>_<unixMs>` (e.g.
69
+ `gfast_1777422716094`). The format is **not a stable public contract**
70
+ — treat the alias as an opaque string and don't parse it. To recover
71
+ the model, read `runs.model` via `getRun`.
72
+
73
+ To **cancel**:
74
+ ```json
75
+ { "method": "set", "params": {
76
+ "path": "run://gfast_1777422716094", "state": "cancelled"
77
+ } }
78
+ ```
79
+
80
+ To **inject a continuation prompt** into an existing run, write to its
81
+ `run://` path with a `body` and `attributes.mode`. To **fork** a run,
82
+ include `attributes.fork: true`.
83
+
84
+ ---
85
+
86
+ ## 3. The `run/changed` pulse
87
+
88
+ The server emits this notification any time an entry write occurs in
89
+ the project. Payload is intentionally minimal:
90
+
91
+ ```json
92
+ { "method": "run/changed", "params": {
93
+ "run": "gfast_1777422716094",
94
+ "runId": 42,
95
+ "path": "log://turn_3/set/notes.md",
96
+ "changeType": "insert"
97
+ } }
98
+ ```
99
+
100
+ The pulse is **content-free** — it does not carry the entry body, the
101
+ run status, telemetry, or render hints. It is only a hint that the
102
+ store has moved. Treat it as a debounce signal.
103
+
104
+ **Identifiers.** Both `run` (string alias, what you pass to other
105
+ RPCs) and `runId` (integer, the SQLite primary key) are included.
106
+ Clients should key UI state by `run`. `runId` is informational and may
107
+ be useful when multi-tenancy / cross-project bookkeeping requires a
108
+ globally unique key, but you never pass it back to the server.
109
+
110
+ **Delivery.** Pulses are best-effort and may be coalesced server-side
111
+ during burst writes. Use `since` (§4) for catch-up — a missed pulse is
112
+ recovered on the next reliable pulse by `getEntries(run, { since })`
113
+ returning every entry that landed in the gap. Do not assume one pulse
114
+ per write.
115
+
116
+ ---
117
+
118
+ ## 4. Reconciling via `getEntries`
119
+
120
+ After receiving (or coalescing) one or more pulses for a given run,
121
+ query for the diff:
122
+
123
+ ```json
124
+ { "method": "getEntries", "params": {
125
+ "run": "gfast_1777422716094",
126
+ "pattern": "**",
127
+ "since": 1234,
128
+ "limit": 200,
129
+ "withBody": false
130
+ } }
131
+ ```
132
+
133
+ **Parameters:**
134
+
135
+ - **`run`** — alias from `rummy/hello`-then-`set run://`.
136
+ - **`pattern`** — glob over entry path. Default `"*"`. Use `"**"` to
137
+ mirror everything, `"log://**"` for the audit trail, etc.
138
+ - **`since`** — the highest `id` you've already processed for this run
139
+ (or `0` / omit on first call). Server returns only entries with
140
+ `id > since`, ordered by `id` ASC (insertion order).
141
+ - **`limit`** — cap result count; chunk catch-up by re-querying with
142
+ the new high-water mark.
143
+ - **`scheme`**, **`state`**, **`visibility`** — exact-match filters.
144
+ - **`bodyFilter`** — substring/glob match against entry body content;
145
+ filters which **rows** are returned by their body. *Not* a body-
146
+ inclusion knob — for that, see `withBody`.
147
+ - **`withBody`** — when `true`, each returned row carries `body`
148
+ inline. Default `false` to keep pulse-reconcile traffic lean.
149
+
150
+ **Returned row shape:**
151
+
152
+ | Field | Type | Notes |
153
+ |--------------|--------------------------------|-------------------------------------------------|
154
+ | `id` | integer | Monotonic insertion id; the `since` cursor key. |
155
+ | `path` | string | URI-encoded; e.g. `log://turn_1/update/done`. |
156
+ | `scheme` | string \| null | URI scheme of `path`, or `null` for bare files.|
157
+ | `state` | string | `proposed` / `resolved` / `cancelled` / `failed` / `streaming`. |
158
+ | `outcome` | string \| null | Free-form outcome label (e.g. `not_found`). |
159
+ | `visibility` | string | `visible` / `summarized` / `archived`. |
160
+ | `turn` | integer | Turn this entry was written in. |
161
+ | `tokens` | integer | `countTokens(body)` — included even when body isn't. |
162
+ | `attributes` | object | Always parsed JSON object (never a string). |
163
+ | `body` | string (only with `withBody`) | Full entry body. Omitted by default. |
164
+
165
+ **Important:** when `since` is set, results are ordered by `id` ASC
166
+ (insertion order — what catch-up streams want). When `since` is
167
+ omitted, results are ordered by `path` ASC (browse mode — what
168
+ inventory walks want). Pick the mode that matches your use case.
169
+
170
+ ---
171
+
172
+ ## 5. Resolving proposals
173
+
174
+ Some entries land in `state: "proposed"` — the run is parked waiting
175
+ for the client to decide. Examples: file edits (`log://turn_N/set/...`),
176
+ shell commands (`log://turn_N/sh/...`), `ask_user` prompts.
177
+
178
+ To accept, reject, or fail a proposal, write back through `set`:
179
+
180
+ ```json
181
+ { "method": "set", "params": {
182
+ "run": "gfast_1777422716094",
183
+ "path": "log://turn_3/set/notes.md",
184
+ "state": "resolved",
185
+ "body": ""
186
+ } }
187
+ ```
188
+
189
+ | Resolution | `state` | Meaning |
190
+ |------------|--------------|--------------------------------------|
191
+ | accept | `resolved` | Apply the proposal; server materializes side effects |
192
+ | reject | `cancelled` | Drop the proposal; the run continues |
193
+ | error | `failed` | The proposal couldn't be applied; the run aborts |
194
+
195
+ For `ask_user` proposals, put the user's answer in `body`.
196
+
197
+ The server's response carries `{ status }` reflecting the run's
198
+ **current** status (102 mid-run, terminal at completion). Do **not**
199
+ treat `status >= 200` from a resolve response as terminal — the run may
200
+ still be active. Use the run row's status (via `getRun` or by tracking
201
+ pulses) as authoritative.
202
+
203
+ If a run was started with `attributes.yolo: true`, you do not need to
204
+ register a resolver — the server auto-accepts every proposal
205
+ server-side and materializes file edits to disk under `projectRoot`.
206
+ For `ask_user` under yolo the server cannot supply a meaningful answer
207
+ on the user's behalf; yolo runs that emit `ask_user` proposals park
208
+ indefinitely (or until cancelled). Treat `ask_user` + yolo as a client
209
+ configuration error.
210
+
211
+ ---
212
+
213
+ ## 6. Reading bodies and run state
214
+
215
+ `getEntries` returns metadata only by default. Two ways to get bodies:
216
+
217
+ 1. **`getEntries` with `withBody: true`** — bodies for matched rows
218
+ inline. Bandwidth scales with what you query for; bound it with
219
+ `pattern` / `limit`.
220
+ 2. **`getRun(run)`** — full structured snapshot of one run with
221
+ bodies, telemetry, history, latest prompt and summary. Use on
222
+ initial open of a run document or after long disconnects.
223
+
224
+ ```json
225
+ { "method": "getRun", "params": { "run": "gfast_1777422716094" } }
226
+ ```
227
+
228
+ **`getRun` response shape (pinned):**
229
+
230
+ ```json
231
+ {
232
+ "run": "gfast_1777422716094",
233
+ "turn": 4,
234
+ "status": 200,
235
+ "model": "gfast",
236
+ "temperature": null,
237
+ "persona": null,
238
+ "context_limit": null,
239
+ "context": {
240
+ "telemetry": {
241
+ "prompt_tokens": 1928,
242
+ "completion_tokens": 75,
243
+ "total_tokens": 2003,
244
+ "cost": 0
245
+ },
246
+ "reasoning": [ { "path": "reasoning://N", "body": "...", "turn": N } ],
247
+ "content": [ { "path": "content://N", "body": "...", "turn": N } ],
248
+ "history": [ {
249
+ "tool": "set",
250
+ "path": "log://turn_N/set/notes.md",
251
+ "status": 200,
252
+ "body": "...",
253
+ "attributes": { "action": "set", "status": 200, "...": "..." },
254
+ "turn": N
255
+ } ]
256
+ },
257
+ "last_user_prompt": "...",
258
+ "last_summary": "..."
259
+ }
260
+ ```
261
+
262
+ - `attributes` on `history` rows is always a parsed object (never a
263
+ JSON string).
264
+ - `context.reasoning` and `context.content` carry the model's per-turn
265
+ reasoning and assistant content respectively. Empty arrays for runs
266
+ whose model didn't surface those channels.
267
+ - `last_summary` is the body of the most recent `log://turn_N/update/*`
268
+ entry. `last_user_prompt` is the body of the most recent `prompt://*`
269
+ entry (the active user prompt for this run).
270
+
271
+ For incremental updates inside a session, prefer the pulse +
272
+ `getEntries` flow over polling `getRun`.
273
+
274
+ ---
275
+
276
+ ## 7. Terminal detection & telemetry
277
+
278
+ A run's `status` field on its row is authoritative. Terminal statuses:
279
+ `200, 204, 413, 422, 499, 500`. Any other value is in-flight (typically
280
+ `102`).
281
+
282
+ **Status updates land at `log://turn_N/update/<slug>` with:**
283
+ - `attributes.action = "update"`
284
+ - `attributes.status = <int>` — the integer status code (e.g. `145`,
285
+ `156`, `167`, `200`)
286
+ - `body` — the human-readable summary text
287
+
288
+ Latest `update` entry's body is the latest summary. Terminal status is
289
+ detected when:
290
+ 1. The `runs.status` row reaches a terminal value (read via `getRun`),
291
+ **or**
292
+ 2. The latest `log://turn_N/update/*` entry's `attributes.status` is in
293
+ the terminal set.
294
+
295
+ (1) is the authoritative read; (2) is a convenience for UIs that are
296
+ already watching the entry stream.
297
+
298
+ **Errors land at `log://turn_N/error/<slug>` with:**
299
+ - `attributes.action = "error"`
300
+ - Body carrying the error detail; `outcome` may carry a short label
301
+ (e.g. `not_found`, `validation`).
302
+
303
+ There is no separate `update://` or `error://` URI scheme — these are
304
+ log channels under the audit trail. Filter via `pattern: "log://**/update/**"`
305
+ or `pattern: "log://**/error/**"` if you only want one channel.
306
+
307
+ **Per-turn telemetry** (token counts, model alias, cached tokens, etc.)
308
+ is in the `turns` table. Surface it by:
309
+ - Calling `getRun(run)` and reading `context.telemetry` (aggregated
310
+ across all turns of the run), **or**
311
+ - Querying the SQLite store directly if your client runs alongside the
312
+ server (private optimization, not a wire contract).
313
+
314
+ Per-turn breakdowns (rather than aggregated) require a direct DB read
315
+ for now; we may add a wire RPC if a need surfaces.
316
+
317
+ A common UI pattern:
318
+
319
+ - Maintain a `Map<runAlias, lastSeenId>`.
320
+ - On `run/changed`: `getEntries(run, { since })`, update `lastSeenId`,
321
+ render new entries inline.
322
+ - For runs in your foreground UI, periodically (every ~2s, or on
323
+ pulse) fetch bodies for newly-arrived rows via `getEntries` with
324
+ `withBody: true` filtered by the new `id` range.
325
+ - Detect terminal by watching for `attributes.action === "update"`
326
+ with `attributes.status` in the terminal set, or by polling
327
+ `runs.status` via `getRun` on a low cadence.
328
+
329
+ ---
330
+
331
+ ## 8. Other notifications
332
+
333
+ Beyond `run/changed`, the server emits:
334
+
335
+ | Notification | Purpose |
336
+ |---------------------|--------------------------------------------------------|
337
+ | `ui/render` | **Advisory only.** Streaming model output for live thinking displays. Payload shape and cadence are not part of the wire contract; clients may ignore. The entry stream + bodies is the durable record. |
338
+ | `ui/notify` | Toast-level operator messages. `params: { message, level }`. |
339
+ | `stream/cancelled` | Server-initiated stream abort; client should kill its local process if it owned the stream. |
340
+
341
+ A minimal client can ignore all three and still function — the entry
342
+ store carries the durable record.
343
+
344
+ ---
345
+
346
+ ## 9. Migrating from the typed-notification protocol
347
+
348
+ The legacy protocol shipped three typed notifications:
349
+
350
+ - `run/state` — fired after each turn with status, summary, history,
351
+ unknowns, telemetry.
352
+ - `run/progress` — turn-status pings ("thinking", "processing").
353
+ - `run/proposal` — pending proposal payload + metadata.
354
+
355
+ All three are **gone**. Their information is fully derivable from the
356
+ entry store:
357
+
358
+ | Old surface | New equivalent |
359
+ |-------------------------|------------------------------------------------------------------|
360
+ | `run/state.status` | `runs.status` row field (via `getRun`), or latest `log://turn_N/update/*` with `attributes.status` in terminal set |
361
+ | `run/state.summary` | latest `log://turn_N/update/*` entry body, or `getRun.last_summary` |
362
+ | `run/state.history` | `getEntries(run, { pattern: "log://**" })` |
363
+ | `run/state.unknowns` | `getEntries(run, { pattern: "unknown://**" })` |
364
+ | `run/state.telemetry` | `getRun(run).context.telemetry` (aggregated) |
365
+ | `run/progress` | (drop — pulse cadence is sufficient) |
366
+ | `run/proposal.proposed` | `getEntries(run, { state: "proposed", since })` |
367
+
368
+ If your client previously closed a document the moment a `run/state`
369
+ arrived with `status >= 200`: do **not** apply the same logic to a
370
+ resolve-RPC response. Track `runs.status` instead.
371
+
372
+ ---
373
+
374
+ ## 10. Multi-client semantics
375
+
376
+ Multiple clients may connect to the same server simultaneously.
377
+ Conflict resolution is **last-write-wins** at the entry-store level —
378
+ two clients resolving the same `(run, path)` proposal will both
379
+ succeed; the second resolution's `state` and `body` overwrite the
380
+ first. The server does not lock or arbitrate.
381
+
382
+ For UI safety: implement optimistic local state on resolve, but
383
+ re-render from `getEntries` on pulse to absorb any concurrent client's
384
+ write. The pulse will always reach you eventually; don't pessimistically
385
+ lock.
386
+
387
+ ---
388
+
389
+ ## 11. Reference
390
+
391
+ - Server source of truth: `src/server/ClientConnection.js`,
392
+ `src/plugins/rpc/rpc.js`.
393
+ - Entry store schema: `migrations/001_initial_schema.sql`.
394
+ - Pulse emission point: `hooks.entry.changed` → `run/changed`.
395
+ - Protocol version constant: `src/server/protocol.js`
396
+ (`RUMMY_PROTOCOL_VERSION`).