npm - @possumtech/rummy - Versions diffs - 2.0.0 → 2.1.0 - Mend

@possumtech/rummy 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/.env.example +31 -5
package/BENCH_ENVIRONMENT.md +230 -0
package/CLIENT_INTERFACE.md +396 -0
package/PLUGINS.md +93 -1
package/SPEC.md +389 -28
package/bin/postinstall.js +2 -2
package/bin/rummy.js +2 -2
package/last_run.txt +5617 -0
package/migrations/001_initial_schema.sql +2 -1
package/package.json +13 -9
package/scriptify/ask_run.js +77 -0
package/scriptify/cache_probe.js +66 -0
package/scriptify/cache_probe_grok.js +74 -0
package/service.js +22 -11
package/src/agent/AgentLoop.js +62 -157
package/src/agent/ContextAssembler.js +2 -9
package/src/agent/Entries.js +54 -98
package/src/agent/ProjectAgent.js +4 -11
package/src/agent/TurnExecutor.js +48 -83
package/src/agent/XmlParser.js +247 -273
package/src/agent/budget.js +5 -28
package/src/agent/config.js +38 -0
package/src/agent/errors.js +7 -13
package/src/agent/httpStatus.js +1 -19
package/src/agent/known_queries.sql +1 -1
package/src/agent/known_store.sql +12 -2
package/src/agent/materializeContext.js +15 -18
package/src/agent/pathEncode.js +5 -0
package/src/agent/rummyHome.js +9 -0
package/src/agent/runs.sql +37 -0
package/src/agent/tokens.js +7 -7
package/src/hooks/HookRegistry.js +1 -16
package/src/hooks/Hooks.js +8 -33
package/src/hooks/PluginContext.js +3 -21
package/src/hooks/RpcRegistry.js +1 -4
package/src/hooks/RummyContext.js +6 -16
package/src/hooks/ToolRegistry.js +5 -15
package/src/llm/LlmProvider.js +41 -33
package/src/llm/errors.js +41 -4
package/src/llm/openaiStream.js +125 -0
package/src/llm/retry.js +109 -0
package/src/plugins/budget/budget.js +55 -76
package/src/plugins/cli/README.md +87 -0
package/src/plugins/cli/bin.js +61 -0
package/src/plugins/cli/cli.js +120 -0
package/src/plugins/env/README.md +2 -1
package/src/plugins/env/env.js +4 -6
package/src/plugins/env/envDoc.md +2 -2
package/src/plugins/error/error.js +23 -23
package/src/plugins/file/file.js +2 -22
package/src/plugins/get/get.js +12 -34
package/src/plugins/get/getDoc.md +8 -6
package/src/plugins/hedberg/edits.js +1 -11
package/src/plugins/hedberg/hedberg.js +3 -26
package/src/plugins/hedberg/normalize.js +1 -5
package/src/plugins/hedberg/patterns.js +4 -15
package/src/plugins/hedberg/sed.js +1 -7
package/src/plugins/helpers.js +28 -20
package/src/plugins/index.js +25 -41
package/src/plugins/instructions/README.md +18 -0
package/src/plugins/instructions/instructions.js +97 -38
package/src/plugins/instructions/instructions.md +24 -15
package/src/plugins/instructions/instructions_104.md +5 -4
package/src/plugins/instructions/instructions_105.md +29 -36
package/src/plugins/instructions/instructions_106.md +22 -0
package/src/plugins/instructions/instructions_107.md +17 -0
package/src/plugins/instructions/instructions_108.md +0 -8
package/src/plugins/known/README.md +26 -6
package/src/plugins/known/known.js +37 -34
package/src/plugins/log/README.md +2 -2
package/src/plugins/log/log.js +27 -34
package/src/plugins/ollama/ollama.js +50 -66
package/src/plugins/openai/openai.js +26 -44
package/src/plugins/openrouter/openrouter.js +28 -52
package/src/plugins/policy/README.md +8 -2
package/src/plugins/policy/policy.js +8 -21
package/src/plugins/prompt/README.md +22 -0
package/src/plugins/prompt/prompt.js +14 -16
package/src/plugins/rm/rm.js +5 -2
package/src/plugins/rm/rmDoc.md +4 -4
package/src/plugins/rpc/README.md +2 -1
package/src/plugins/rpc/rpc.js +62 -48
package/src/plugins/set/README.md +5 -1
package/src/plugins/set/set.js +23 -33
package/src/plugins/set/setDoc.md +1 -1
package/src/plugins/sh/README.md +2 -1
package/src/plugins/sh/sh.js +5 -11
package/src/plugins/sh/shDoc.md +2 -2
package/src/plugins/stream/README.md +6 -5
package/src/plugins/stream/stream.js +6 -35
package/src/plugins/telemetry/telemetry.js +26 -19
package/src/plugins/think/think.js +4 -7
package/src/plugins/unknown/unknown.js +8 -13
package/src/plugins/update/update.js +42 -25
package/src/plugins/update/updateDoc.md +3 -3
package/src/plugins/xai/xai.js +30 -20
package/src/plugins/yolo/yolo.js +159 -0
package/src/server/ClientConnection.js +17 -47
package/src/server/SocketServer.js +14 -14
package/src/server/protocol.js +1 -10
package/src/sql/functions/slugify.js +5 -7
package/src/sql/v_model_context.sql +4 -11
package/turns/cli_1777462658211/turn_001.txt +772 -0
package/turns/cli_1777462658211/turn_002.txt +606 -0
package/turns/cli_1777462658211/turn_003.txt +667 -0
package/turns/cli_1777462658211/turn_004.txt +297 -0
package/turns/cli_1777462658211/turn_005.txt +301 -0
package/turns/cli_1777462658211/turn_006.txt +262 -0
package/turns/cli_1777465095132/turn_001.txt +715 -0
package/turns/cli_1777465095132/turn_002.txt +236 -0
package/turns/cli_1777465095132/turn_003.txt +287 -0
package/turns/cli_1777465095132/turn_004.txt +694 -0
package/turns/cli_1777465095132/turn_005.txt +422 -0
package/turns/cli_1777465095132/turn_006.txt +365 -0
package/turns/cli_1777465095132/turn_007.txt +885 -0
package/turns/cli_1777465095132/turn_008.txt +1277 -0
package/turns/cli_1777465095132/turn_009.txt +736 -0

package/.env.example CHANGED Viewed

@@ -6,7 +6,7 @@
 # (environment) > ~/.rummy/.env > ~/.rummy/.env.example
 # Service Configuration
-PORT=3044
+RUMMY_PORT=3044
 # Absolute path, no ~
 # RUMMY_HOME=/home/ubuntu/.rummy
@@ -15,19 +15,18 @@ RUMMY_DB_PATH=rummy.db
 # SQLite mmap size in MB
 RUMMY_MMAP_MB=0
-# Agent Loop Limits
-RUMMY_MAX_TURNS=99
+# Agent Loop Limits — per-loop cap (turns within a single loop).
+# No per-run cap; a run can comprise many loops.
+RUMMY_MAX_LOOP_TURNS=99
 # Hard cap on commands per turn — high by design. The real cost
 # ceiling is the Token Budget; per-tool rate limits (e.g.
 # RUMMY_WEB_SEARCH_MAX) bound the expensive tools individually.
 RUMMY_MAX_COMMANDS=99
 # Per-turn cap on <search>. Refusals strike via 429.
 RUMMY_WEB_SEARCH_MAX=1
-RUMMY_MAX_STALLS=3
 RUMMY_MAX_STRIKES=3
 RUMMY_MIN_CYCLES=3
 RUMMY_MAX_CYCLE_PERIOD=4
-RUMMY_MAX_UPDATE_REPEATS=3
 # Hygiene
 # Days to keep completed/aborted runs before purging
@@ -36,6 +35,22 @@ RUMMY_RETENTION_DAYS=31
 # Timeouts (ms)
 RUMMY_RPC_TIMEOUT=30000
 RUMMY_FETCH_TIMEOUT=300000
+# Test harness — how long AuditClient waits for a single ask/act to reach
+# terminal status. Sized for full-context ingest on large-window models.
+RUMMY_TEST_RUN_TIMEOUT=3600000
+# rummy-cli watchdog — wall-clock budget for a one-shot CLI invocation.
+# Overridable per invocation via --RUMMY_RUN_TIMEOUT=<ms>.
+RUMMY_RUN_TIMEOUT=3600000
+# Plugin module load watchdog.
+RUMMY_PLUGINS_LOAD_TIMEOUT=10000
+# LLM retry policy: time-bounded exponential backoff with full jitter.
+# DEADLINE is total wall-clock budget for an LLM call across all retries.
+# MAX_BACKOFF caps each inter-attempt sleep so a long deadline doesn't
+# yield 10-minute waits between attempts.
+RUMMY_LLM_DEADLINE=600000
+RUMMY_LLM_MAX_BACKOFF=30000
 # Debug
 # RUMMY_DEBUG=true
@@ -59,6 +74,17 @@ RUMMY_TOKEN_DIVISOR=2
 # LLM temperature (0 = deterministic, 0.7 = creative). Client can override per-request.
 RUMMY_TEMPERATURE=0.5
+# Run Attribute Defaults
+# Per-run attributes (passed in the run-creation set call) trump these.
+# Strict "1" enables; unset / "0" / "" disables. Useful in profile env
+# files (e.g. .env.tbench) layered via --env-file-if-exists.
+#
+# RUMMY_YOLO=1            # auto-accept every proposal (headless / CI / bench)
+# RUMMY_NO_INTERACTION=1  # exclude <ask_user> from the tool list
+# RUMMY_NO_WEB=1          # exclude <search> from the tool list
+# RUMMY_NO_PROPOSALS=1    # exclude <ask_user>/<env>/<sh>
+# RUMMY_NO_REPO=1         # skip rummy.repo scanning (file scan + overview)
 # Provider Configuration
 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
 # OPENROUTER_API_KEY=

package/BENCH_ENVIRONMENT.md ADDED Viewed

@@ -0,0 +1,230 @@
+# Bench Environment
+Hardware and software inventory for local-model rummy runs. Captured
+from live system probes; values to cite verbatim in any benchmark
+writeup. **Do not paraphrase.** Re-probe before publishing if the
+machine has been touched.
+Last verified: 2026-04-30.
+---
+## Hardware
+| | |
+|---|---|
+| GPU            | **NVIDIA GeForce RTX 5070 Ti** (16 GB VRAM, GB203 Blackwell die) |
+| GPU driver     | 595.71.05 (kernel module + userspace, matched as of 2026-04-29 module reload) |
+| Integrated GPU | Intel Arrow Lake-S iGPU (not used for inference) |
+| CPU            | Intel Core Ultra 9 285 |
+| Cores          | 24 (logical) |
+| RAM            | 32 GB |
+**Source of truth:** `lspci | grep -E "vga\|3d"`, `cat /proc/driver/nvidia/version`,
+`grep model /proc/cpuinfo`, `nproc`, `grep MemTotal /proc/meminfo`.
+---
+## OS / kernel
+| | |
+|---|---|
+| Distro  | Debian 13 (trixie) |
+| Kernel  | 6.12.74+deb13+1-amd64 |
+| GCC     | 14.2.0 |
+---
+## Inference engine
+| | |
+|---|---|
+| Server   | `llama-server` (llama.cpp) |
+| Build    | `b199-82209ef` (per `/props.build_info`) |
+| Local endpoint  | `http://127.0.0.1:11435` (OpenAI-compatible) |
+| Public endpoint | `https://gemma.possumtech.com` (OpenAI-compatible; Cloudflare-fronted, SSL terminated on hawkbit AWS box, proxied via SSH reverse tunnel `hawkbit:5172 → hyzen:11435`. Toggleable via `systemctl --user disable --now gemma.service` on hyzen.) |
+| n_ctx    | **32768** (runtime; model supports up to 262144) |
+| Binary   | `/home/hyzen/repo/llama-mainline/build-fast/bin/llama-server` (custom rebuild — see "Build flags" below) |
+| Slots    | 1 |
+| Default sampler | temperature 0.0, top_k 64, top_p 1.0, min_p 0.05 |
+| `n_predict` default | -1 (unbounded — fills remaining context) |
+| `reasoning_format` | none (model treated as content-only) |
+---
+## Loaded model
+| | |
+|---|---|
+| Filename                     | `macher.gguf` (local rename) |
+| Path                         | `/home/hyzen/repo/turbo/models/gemma/macher.gguf` |
+| File size                    | 13917726528 bytes (12.95 GiB / 13.92 GB) |
+| `general.name`               | **Gemma 4 26B A4B It** |
+| `general.architecture`       | gemma4 |
+| `general.basename`           | gemma-4 |
+| `general.size_label`         | 26B-A4B |
+| `general.finetune`           | it (instruction-tuned) |
+| `general.license`            | apache-2.0 |
+| `general.file_type`          | 30 → **IQ4_XS** at 4.41 BPW (confirmed by `llama-server` load output: `print_info: file type = IQ4_XS - 4.25 bpw`; not Q3_K_XL despite filename hints in older `.env` templates) |
+| `general.quantization_version` | 2 |
+| Architecture: `expert_count` | 128 |
+| Architecture: `expert_used_count` | 8 (MoE — ~4B params active per token despite 26B total) |
+| Architecture: `block_count`  | 30 |
+| Architecture: `embedding_length` | 2816 |
+| Architecture: `attention.head_count` | 16 |
+| Architecture: `feed_forward_length` | 2112 |
+| Native `context_length`      | 262144 (256K) |
+**Note on quantization:** `general.file_type=30` maps to **IQ4_XS** in
+current llama.cpp (the file is mradermacher's imatrix quant of
+`google/gemma-4-26B-A4B-it`). The load output confirms this directly:
+`print_info: file type = IQ4_XS - 4.25 bpw`, `file size = 12.95 GiB
+(4.41 BPW)`. The earlier `Q3_K_XL` reference in `.env` templates was
+for a different file (`gemma-4-26B-A4B-it-UD-Q3_K_XL.gguf`) that has
+since been deleted from disk. Tensor breakdown from load: 392× F32,
+1× Q6_K, 60× IQ4_NL, 205× IQ4_XS.
+**Note on chat template:** the GGUF was rewritten in-place on
+2026-04-29 via `gguf_new_metadata.py` to embed the Apr-28 upstream
+official Google chat template (commit `4c55b528` of
+`google/gemma-4-26B-A4B-it`), which fixes SI / tool-call handling.
+Tensor data is byte-identical to the original mradermacher download;
+only `tokenizer.chat_template` changed (12045 → 16934 bytes), so
+file size grew by 4864 bytes. The `--chat-template-file` runtime
+flag is no longer needed and has been removed from ExecStart.
+---
+## Sampling parameters used by rummy
+Rummy's `openai` plugin (`src/plugins/openai/openai.js`) constructs
+its request body as `{ model, messages, think: true }`, optionally
+adding `temperature` if the caller passed one. **No `max_tokens`,
+no `stop`** — server defaults apply.
+The plugin then sends the request through the shared streaming
+client at `src/llm/openaiStream.js`, which spreads that body and
+adds `stream: true` and `stream_options: { include_usage: true }`.
+So the actual wire body is:
+`{ model, messages, think: true, [temperature], stream: true, stream_options: {include_usage:true} }`.
+Streaming is required, not optional: a non-streaming hold can
+exceed the Cloudflare-fronted edge's idle-timeout when the model
+spends seconds on extended reasoning before emitting visible
+content. The streaming wrapper exists specifically to keep bytes
+flowing through the proxy.
+`n_predict: -1` is in force (server default), so output can still
+grow until it hits the context limit and gets truncated. Under
+streaming, that truncation now manifests as a stalled / late-EOS
+stream rather than the all-at-once mid-token cutoff observed in
+the regex-log gemma run on 2026-04-29.
+---
+## Build flags (custom llama.cpp rebuild)
+The `llama-server` binary is a local rebuild with non-default flags
+that materially affect performance on Blackwell sm_120. Stock builds
+will produce slower numbers — readers reproducing should match these
+flags or note their stock-build numbers as such.
+| Flag | Setting | Why it matters |
+|---|---|---|
+| `CMAKE_CUDA_ARCHITECTURES` | `120` | Blackwell-targeted kernels |
+| `GGML_CUDA_FORCE_MMQ` | `ON` | Forces MMQ kernels for low-bit quants (default OFF) |
+| `GGML_CUDA_FA_ALL_QUANTS` | `ON` | Enables Flash Attention path for q8_0 KV cache (default OFF; without it, q8 KV falls back to a slow generic path) |
+| `GGML_CUDA_F16` | `ON` | fp16 intermediates |
+| `GGML_NATIVE` | `ON` | Native CPU arch tuning |
+| `CMAKE_BUILD_TYPE` | `Release` | |
+Source tree: `/home/hyzen/repo/llama-mainline` at commit `82209ef`.
+Build dir: `build-fast/`. Binary RUNPATH is baked to that absolute
+path; do not rename the directory.
+---
+## Service-level operational settings
+| | |
+|---|---|
+| systemd unit | `/etc/systemd/system/llama.service` |
+| `MemoryHigh` | 12 GB (host RAM soft cap) |
+| `MemoryMax`  | 16 GB (host RAM hard cap; OOM-kill if exceeded) |
+| `MemorySwapMax` | 0 (process is forbidden from touching swap) |
+| `Restart`    | `always`, `RestartSec=3` |
+| Daily restart timer | `llama-restart.timer` at 04:00 EDT ±30 min via `systemctl try-restart` |
+KV cache quantization is q8_0 (both K and V). Flash Attention is
+enabled. Gemma 4 sliding-window attention keeps KV at ~500 MiB
+even at 32k context (5 of 30 layers full-context, 25 SWA-capped;
+SWA window = 1024 tokens; pattern is full-context every 6th layer
+per `gemma4.attention.sliding_window_pattern`).
+Full ExecStart (for cite-verbatim purposes):
+```
+/home/hyzen/repo/llama-mainline/build-fast/bin/llama-server \
+  --model /home/hyzen/repo/turbo/models/gemma/macher.gguf \
+  --ctx-size 32768 --parallel 1 \
+  -fa on -ctk q8_0 -ctv q8_0 \
+  -ngl 999 -b 1024 -ub 512 \
+  -t 12 -tb 24 \
+  --host 127.0.0.1 --port 11435 \
+  --jinja --reasoning-budget 4096 \
+  --cache-ram 4096 --cache-reuse 256 \
+  --temp 0 --top-p 1.0 --repeat-penalty 1.0
+```
+`--cache-ram 4096 --cache-reuse 256` enables the 4 GiB host-RAM
+prompt cache; first-token latency on warm cache is ~10× faster
+than cold. `--reasoning-budget 4096` caps the thinking phase at
+4096 tokens before forcing the model into the answer phase.
+---
+## Measured single-stream baseline (this config)
+Captured from steady-state probes on 2026-04-29 (after warmup).
+Carried forward as of 2026-04-30: the subsequent changes
+(`--reasoning-budget`, `--cache-ram`, `--cache-reuse` added; chat
+template metadata rewritten in-place) do not touch tensor data,
+attention path, or sampler chain, so generation throughput is
+unaffected. Re-probe before publishing.
+| Metric | Value |
+|---|---|
+| Generation throughput | **~168 tokens/sec** at 32k ctx (~187 t/s at 16k ctx with FP16 KV) |
+| Per-token latency | ~5.95 ms/token |
+| Prompt eval, small prompts (warm cache) | ~900 t/s |
+| Prompt eval, large prompts (10k+ tokens) | ~5,600 t/s |
+| Time-to-first-token, 10k-token prompt | ~1.9 s |
+| VRAM at idle after model load | ~14.6 / 15.84 GB |
+| Theoretical bandwidth ceiling | 421 t/s (4B active × 4.25 bpw / 896 GB/s) |
+| Observed efficiency vs ceiling | ~40% |
+Sampling is deterministic at temp 0; numbers above are reproducible
+to <0.5% across trials within the same llama-server lifetime.
+---
+## How to re-probe
+```bash
+# llama-server runtime
+curl -s http://127.0.0.1:11435/props | python3 -m json.tool | head -60
+# GGUF metadata
+# (parser script in this repo; or use `gguf-dump` if installed)
+# GPU
+lspci | grep -iE "vga|3d|display"
+cat /proc/driver/nvidia/version
+# CPU / RAM / OS
+grep -m1 "model name" /proc/cpuinfo
+nproc
+grep MemTotal /proc/meminfo
+grep PRETTY_NAME /etc/os-release
+uname -a
+```

package/CLIENT_INTERFACE.md ADDED Viewed

@@ -0,0 +1,396 @@
+# CLIENT_INTERFACE
+Wire-protocol contract for any client that drives a rummy server (nvim,
+CLI, tbench harness, future GUIs). Pulse + query model: the server
+emits a content-free `run/changed` notification when entries land;
+clients reconcile against the entry store on demand.
+The entry store is the only source of truth for run progress. The
+server tells the client *that* something changed — never *what*.
+---
+## TL;DR
+1. Connect a JSON-RPC websocket; `rummy/hello` to register the project.
+2. `set run://...` (or omit alias) to start a run; you receive `{ alias }`.
+3. Subscribe to **`run/changed`** pulses (notification from server).
+4. On each pulse, call **`getEntries(run, { since, pattern })`** to fetch
+   what's new. The reply is a flat list of insertion-ordered entries.
+   Pass `withBody: true` if you want the body inline (otherwise omit and
+   pull bodies via `getRun` or per-row).
+5. Track the highest `id` you've seen per run; pass it as `since` next pulse.
+6. Resolve any `state: "proposed"` entries by writing back via `set`
+   with `state: "resolved" | "cancelled" | "failed"`.
+7. Drive UI from the entry stream + `runs.status`; the run is complete
+   when its row reaches a terminal status (200/204/413/422/499/500).
+No typed payloads. No "render this widget" hints. The store is the
+narrative; the client decides what to show.
+---
+## 1. Connection & handshake
+WebSocket JSON-RPC 2.0. Default port `3044`. Send:
+```json
+{ "jsonrpc": "2.0", "method": "rummy/hello", "params": {
+    "name": "my-client", "projectRoot": "/abs/path",
+    "clientVersion": "2.0.0"
+  }, "id": 1 }
+```
+Reply: `{ rummyVersion, projectId, projectRoot }`. The server enforces
+**MAJOR-version match** between client and server protocol versions and
+rejects on mismatch.
+After `rummy/hello`, every subsequent RPC carries the project context
+implicitly — the server knows which project this socket belongs to.
+---
+## 2. Starting a run
+```json
+{ "method": "set", "params": {
+    "path": "run://",
+    "body": "Write a brief OC_RIVERS.md ...",
+    "attributes": { "model": "fast", "mode": "act", "yolo": false }
+  } }
+```
+The server returns `{ alias }` immediately and kicks off the run async.
+`mode` is `"ask"` or `"act"`. `yolo: true` opts out of client proposal
+resolution (server auto-accepts everything and materializes file edits
+to disk).
+Aliases are formatted `<modelAlias>_<unixMs>` (e.g.
+`gfast_1777422716094`). The format is **not a stable public contract**
+— treat the alias as an opaque string and don't parse it. To recover
+the model, read `runs.model` via `getRun`.
+To **cancel**:
+```json
+{ "method": "set", "params": {
+    "path": "run://gfast_1777422716094", "state": "cancelled"
+  } }
+```
+To **inject a continuation prompt** into an existing run, write to its
+`run://` path with a `body` and `attributes.mode`. To **fork** a run,
+include `attributes.fork: true`.
+---
+## 3. The `run/changed` pulse
+The server emits this notification any time an entry write occurs in
+the project. Payload is intentionally minimal:
+```json
+{ "method": "run/changed", "params": {
+    "run": "gfast_1777422716094",
+    "runId": 42,
+    "path": "log://turn_3/set/notes.md",
+    "changeType": "insert"
+  } }
+```
+The pulse is **content-free** — it does not carry the entry body, the
+run status, telemetry, or render hints. It is only a hint that the
+store has moved. Treat it as a debounce signal.
+**Identifiers.** Both `run` (string alias, what you pass to other
+RPCs) and `runId` (integer, the SQLite primary key) are included.
+Clients should key UI state by `run`. `runId` is informational and may
+be useful when multi-tenancy / cross-project bookkeeping requires a
+globally unique key, but you never pass it back to the server.
+**Delivery.** Pulses are best-effort and may be coalesced server-side
+during burst writes. Use `since` (§4) for catch-up — a missed pulse is
+recovered on the next reliable pulse by `getEntries(run, { since })`
+returning every entry that landed in the gap. Do not assume one pulse
+per write.
+---
+## 4. Reconciling via `getEntries`
+After receiving (or coalescing) one or more pulses for a given run,
+query for the diff:
+```json
+{ "method": "getEntries", "params": {
+    "run": "gfast_1777422716094",
+    "pattern": "**",
+    "since": 1234,
+    "limit": 200,
+    "withBody": false
+  } }
+```
+**Parameters:**
+- **`run`** — alias from `rummy/hello`-then-`set run://`.
+- **`pattern`** — glob over entry path. Default `"*"`. Use `"**"` to
+  mirror everything, `"log://**"` for the audit trail, etc.
+- **`since`** — the highest `id` you've already processed for this run
+  (or `0` / omit on first call). Server returns only entries with
+  `id > since`, ordered by `id` ASC (insertion order).
+- **`limit`** — cap result count; chunk catch-up by re-querying with
+  the new high-water mark.
+- **`scheme`**, **`state`**, **`visibility`** — exact-match filters.
+- **`bodyFilter`** — substring/glob match against entry body content;
+  filters which **rows** are returned by their body. *Not* a body-
+  inclusion knob — for that, see `withBody`.
+- **`withBody`** — when `true`, each returned row carries `body`
+  inline. Default `false` to keep pulse-reconcile traffic lean.
+**Returned row shape:**
+| Field        | Type                           | Notes                                           |
+|--------------|--------------------------------|-------------------------------------------------|
+| `id`         | integer                        | Monotonic insertion id; the `since` cursor key. |
+| `path`       | string                         | URI-encoded; e.g. `log://turn_1/update/done`.  |
+| `scheme`     | string \| null                 | URI scheme of `path`, or `null` for bare files.|
+| `state`      | string                         | `proposed` / `resolved` / `cancelled` / `failed` / `streaming`. |
+| `outcome`    | string \| null                 | Free-form outcome label (e.g. `not_found`).    |
+| `visibility` | string                         | `visible` / `summarized` / `archived`.         |
+| `turn`       | integer                        | Turn this entry was written in.                |
+| `tokens`     | integer                        | `countTokens(body)` — included even when body isn't. |
+| `attributes` | object                         | Always parsed JSON object (never a string).    |
+| `body`       | string (only with `withBody`)  | Full entry body. Omitted by default.           |
+**Important:** when `since` is set, results are ordered by `id` ASC
+(insertion order — what catch-up streams want). When `since` is
+omitted, results are ordered by `path` ASC (browse mode — what
+inventory walks want). Pick the mode that matches your use case.
+---
+## 5. Resolving proposals
+Some entries land in `state: "proposed"` — the run is parked waiting
+for the client to decide. Examples: file edits (`log://turn_N/set/...`),
+shell commands (`log://turn_N/sh/...`), `ask_user` prompts.
+To accept, reject, or fail a proposal, write back through `set`:
+```json
+{ "method": "set", "params": {
+    "run": "gfast_1777422716094",
+    "path": "log://turn_3/set/notes.md",
+    "state": "resolved",
+    "body": ""
+  } }
+```
+| Resolution | `state`      | Meaning                              |
+|------------|--------------|--------------------------------------|
+| accept     | `resolved`   | Apply the proposal; server materializes side effects |
+| reject     | `cancelled`  | Drop the proposal; the run continues |
+| error      | `failed`     | The proposal couldn't be applied; the run aborts |
+For `ask_user` proposals, put the user's answer in `body`.
+The server's response carries `{ status }` reflecting the run's
+**current** status (102 mid-run, terminal at completion). Do **not**
+treat `status >= 200` from a resolve response as terminal — the run may
+still be active. Use the run row's status (via `getRun` or by tracking
+pulses) as authoritative.
+If a run was started with `attributes.yolo: true`, you do not need to
+register a resolver — the server auto-accepts every proposal
+server-side and materializes file edits to disk under `projectRoot`.
+For `ask_user` under yolo the server cannot supply a meaningful answer
+on the user's behalf; yolo runs that emit `ask_user` proposals park
+indefinitely (or until cancelled). Treat `ask_user` + yolo as a client
+configuration error.
+---
+## 6. Reading bodies and run state
+`getEntries` returns metadata only by default. Two ways to get bodies:
+1. **`getEntries` with `withBody: true`** — bodies for matched rows
+   inline. Bandwidth scales with what you query for; bound it with
+   `pattern` / `limit`.
+2. **`getRun(run)`** — full structured snapshot of one run with
+   bodies, telemetry, history, latest prompt and summary. Use on
+   initial open of a run document or after long disconnects.
+```json
+{ "method": "getRun", "params": { "run": "gfast_1777422716094" } }
+```
+**`getRun` response shape (pinned):**
+```json
+{
+  "run": "gfast_1777422716094",
+  "turn": 4,
+  "status": 200,
+  "model": "gfast",
+  "temperature": null,
+  "persona": null,
+  "context_limit": null,
+  "context": {
+    "telemetry": {
+      "prompt_tokens": 1928,
+      "completion_tokens": 75,
+      "total_tokens": 2003,
+      "cost": 0
+    },
+    "reasoning": [ { "path": "reasoning://N", "body": "...", "turn": N } ],
+    "content":   [ { "path": "content://N",   "body": "...", "turn": N } ],
+    "history":   [ {
+      "tool": "set",
+      "path": "log://turn_N/set/notes.md",
+      "status": 200,
+      "body": "...",
+      "attributes": { "action": "set", "status": 200, "...": "..." },
+      "turn": N
+    } ]
+  },
+  "last_user_prompt": "...",
+  "last_summary": "..."
+}
+```
+- `attributes` on `history` rows is always a parsed object (never a
+  JSON string).
+- `context.reasoning` and `context.content` carry the model's per-turn
+  reasoning and assistant content respectively. Empty arrays for runs
+  whose model didn't surface those channels.
+- `last_summary` is the body of the most recent `log://turn_N/update/*`
+  entry. `last_user_prompt` is the body of the most recent `prompt://*`
+  entry (the active user prompt for this run).
+For incremental updates inside a session, prefer the pulse +
+`getEntries` flow over polling `getRun`.
+---
+## 7. Terminal detection & telemetry
+A run's `status` field on its row is authoritative. Terminal statuses:
+`200, 204, 413, 422, 499, 500`. Any other value is in-flight (typically
+`102`).
+**Status updates land at `log://turn_N/update/<slug>` with:**
+- `attributes.action = "update"`
+- `attributes.status = <int>` — the integer status code (e.g. `145`,
+  `156`, `167`, `200`)
+- `body` — the human-readable summary text
+Latest `update` entry's body is the latest summary. Terminal status is
+detected when:
+1. The `runs.status` row reaches a terminal value (read via `getRun`),
+   **or**
+2. The latest `log://turn_N/update/*` entry's `attributes.status` is in
+   the terminal set.
+(1) is the authoritative read; (2) is a convenience for UIs that are
+already watching the entry stream.
+**Errors land at `log://turn_N/error/<slug>` with:**
+- `attributes.action = "error"`
+- Body carrying the error detail; `outcome` may carry a short label
+  (e.g. `not_found`, `validation`).
+There is no separate `update://` or `error://` URI scheme — these are
+log channels under the audit trail. Filter via `pattern: "log://**/update/**"`
+or `pattern: "log://**/error/**"` if you only want one channel.
+**Per-turn telemetry** (token counts, model alias, cached tokens, etc.)
+is in the `turns` table. Surface it by:
+- Calling `getRun(run)` and reading `context.telemetry` (aggregated
+  across all turns of the run), **or**
+- Querying the SQLite store directly if your client runs alongside the
+  server (private optimization, not a wire contract).
+Per-turn breakdowns (rather than aggregated) require a direct DB read
+for now; we may add a wire RPC if a need surfaces.
+A common UI pattern:
+- Maintain a `Map<runAlias, lastSeenId>`.
+- On `run/changed`: `getEntries(run, { since })`, update `lastSeenId`,
+  render new entries inline.
+- For runs in your foreground UI, periodically (every ~2s, or on
+  pulse) fetch bodies for newly-arrived rows via `getEntries` with
+  `withBody: true` filtered by the new `id` range.
+- Detect terminal by watching for `attributes.action === "update"`
+  with `attributes.status` in the terminal set, or by polling
+  `runs.status` via `getRun` on a low cadence.
+---
+## 8. Other notifications
+Beyond `run/changed`, the server emits:
+| Notification        | Purpose                                                |
+|---------------------|--------------------------------------------------------|
+| `ui/render`         | **Advisory only.** Streaming model output for live thinking displays. Payload shape and cadence are not part of the wire contract; clients may ignore. The entry stream + bodies is the durable record. |
+| `ui/notify`         | Toast-level operator messages. `params: { message, level }`. |
+| `stream/cancelled`  | Server-initiated stream abort; client should kill its local process if it owned the stream. |
+A minimal client can ignore all three and still function — the entry
+store carries the durable record.
+---
+## 9. Migrating from the typed-notification protocol
+The legacy protocol shipped three typed notifications:
+- `run/state` — fired after each turn with status, summary, history,
+  unknowns, telemetry.
+- `run/progress` — turn-status pings ("thinking", "processing").
+- `run/proposal` — pending proposal payload + metadata.
+All three are **gone**. Their information is fully derivable from the
+entry store:
+| Old surface             | New equivalent                                                   |
+|-------------------------|------------------------------------------------------------------|
+| `run/state.status`      | `runs.status` row field (via `getRun`), or latest `log://turn_N/update/*` with `attributes.status` in terminal set |
+| `run/state.summary`     | latest `log://turn_N/update/*` entry body, or `getRun.last_summary` |
+| `run/state.history`     | `getEntries(run, { pattern: "log://**" })`                       |
+| `run/state.unknowns`    | `getEntries(run, { pattern: "unknown://**" })`                   |
+| `run/state.telemetry`   | `getRun(run).context.telemetry` (aggregated)                     |
+| `run/progress`          | (drop — pulse cadence is sufficient)                             |
+| `run/proposal.proposed` | `getEntries(run, { state: "proposed", since })`                  |
+If your client previously closed a document the moment a `run/state`
+arrived with `status >= 200`: do **not** apply the same logic to a
+resolve-RPC response. Track `runs.status` instead.
+---
+## 10. Multi-client semantics
+Multiple clients may connect to the same server simultaneously.
+Conflict resolution is **last-write-wins** at the entry-store level —
+two clients resolving the same `(run, path)` proposal will both
+succeed; the second resolution's `state` and `body` overwrite the
+first. The server does not lock or arbitrate.
+For UI safety: implement optimistic local state on resolve, but
+re-render from `getEntries` on pulse to absorb any concurrent client's
+write. The pulse will always reach you eventually; don't pessimistically
+lock.
+---
+## 11. Reference
+- Server source of truth: `src/server/ClientConnection.js`,
+  `src/plugins/rpc/rpc.js`.
+- Entry store schema: `migrations/001_initial_schema.sql`.
+- Pulse emission point: `hooks.entry.changed` → `run/changed`.
+- Protocol version constant: `src/server/protocol.js`
+  (`RUMMY_PROTOCOL_VERSION`).