typeclaw 0.36.8 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +2 -2
  2. package/package.json +3 -2
  3. package/src/agent/index.ts +31 -11
  4. package/src/agent/live-sessions.ts +12 -0
  5. package/src/agent/model-fallback.ts +17 -15
  6. package/src/agent/model-overrides.ts +2 -2
  7. package/src/agent/session-meta.ts +10 -0
  8. package/src/agent/subagents.ts +11 -2
  9. package/src/agent/system-prompt.ts +9 -3
  10. package/src/agent/todo/continuation-policy.ts +6 -3
  11. package/src/agent/todo/continuation-wiring.ts +4 -2
  12. package/src/agent/todo/continuation.ts +3 -3
  13. package/src/agent/tools/todo/index.ts +27 -4
  14. package/src/bundled-plugins/agent-browser/index.ts +33 -108
  15. package/src/bundled-plugins/agent-browser/shim.ts +3 -94
  16. package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
  17. package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
  19. package/src/bundled-plugins/memory/README.md +80 -23
  20. package/src/bundled-plugins/memory/append-tool.ts +74 -53
  21. package/src/bundled-plugins/memory/citation-superset.ts +4 -0
  22. package/src/bundled-plugins/memory/citations.ts +54 -0
  23. package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
  24. package/src/bundled-plugins/memory/dreaming.ts +444 -21
  25. package/src/bundled-plugins/memory/index.ts +544 -400
  26. package/src/bundled-plugins/memory/load-memory.ts +87 -10
  27. package/src/bundled-plugins/memory/load-shards.ts +48 -22
  28. package/src/bundled-plugins/memory/memory-logger.ts +95 -106
  29. package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
  30. package/src/bundled-plugins/memory/parent-link.ts +33 -0
  31. package/src/bundled-plugins/memory/paths.ts +12 -0
  32. package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
  33. package/src/bundled-plugins/memory/references/load-references.ts +212 -0
  34. package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
  35. package/src/bundled-plugins/memory/search-tool.ts +282 -45
  36. package/src/bundled-plugins/memory/stream-events.ts +1 -0
  37. package/src/bundled-plugins/memory/stream-io.ts +28 -3
  38. package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
  39. package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
  40. package/src/bundled-plugins/memory/vector/config.ts +28 -0
  41. package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
  42. package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
  43. package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
  44. package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
  45. package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
  46. package/src/bundled-plugins/memory/vector/passages.ts +125 -0
  47. package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
  48. package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
  49. package/src/bundled-plugins/memory/vector/startup.ts +71 -0
  50. package/src/bundled-plugins/memory/vector/store.ts +203 -0
  51. package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
  52. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
  53. package/src/channels/router.ts +239 -40
  54. package/src/cli/incomplete-init.ts +57 -0
  55. package/src/cli/init.ts +143 -12
  56. package/src/cli/inspect.ts +11 -5
  57. package/src/cli/model.ts +112 -34
  58. package/src/cli/restart.ts +24 -0
  59. package/src/cli/start.ts +24 -0
  60. package/src/cli/tunnel.ts +53 -8
  61. package/src/config/config.ts +110 -19
  62. package/src/config/index.ts +5 -1
  63. package/src/config/models-mutation.ts +29 -11
  64. package/src/config/providers-mutation.ts +2 -2
  65. package/src/config/providers.ts +146 -12
  66. package/src/container/shared.ts +9 -0
  67. package/src/container/start.ts +87 -4
  68. package/src/cron/consumer.ts +13 -7
  69. package/src/hostd/models.ts +64 -0
  70. package/src/hostd/paths.ts +6 -0
  71. package/src/hostd/portbroker-manager.ts +2 -2
  72. package/src/init/checkpoint.ts +201 -0
  73. package/src/init/dockerfile.ts +121 -34
  74. package/src/init/gitignore.ts +7 -7
  75. package/src/init/index.ts +41 -9
  76. package/src/init/models-dev.ts +96 -21
  77. package/src/init/oauth-login.ts +3 -3
  78. package/src/init/progress.ts +29 -0
  79. package/src/init/validate-api-key.ts +4 -0
  80. package/src/inspect/index.ts +13 -6
  81. package/src/inspect/item-list.ts +11 -2
  82. package/src/inspect/live-list.ts +65 -0
  83. package/src/inspect/open-item.ts +22 -1
  84. package/src/inspect/session-list.ts +29 -0
  85. package/src/models/embedding-model.ts +114 -0
  86. package/src/models/transformers-version.ts +55 -0
  87. package/src/plugin/types.ts +3 -0
  88. package/src/portbroker/container-server.ts +23 -0
  89. package/src/portbroker/forward-request-bus.ts +35 -0
  90. package/src/portbroker/forward-result-bus.ts +2 -3
  91. package/src/portbroker/hostd-client.ts +182 -36
  92. package/src/portbroker/index.ts +6 -1
  93. package/src/portbroker/protocol.ts +9 -2
  94. package/src/run/channel-session-factory.ts +11 -1
  95. package/src/run/index.ts +41 -7
  96. package/src/server/command-runner.ts +24 -1
  97. package/src/server/index.ts +42 -8
  98. package/src/shared/index.ts +2 -0
  99. package/src/shared/protocol.ts +31 -0
  100. package/src/skills/typeclaw-channels/SKILL.md +4 -4
  101. package/src/skills/typeclaw-config/SKILL.md +2 -2
  102. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  103. package/src/skills/typeclaw-permissions/SKILL.md +3 -3
  104. package/src/skills/typeclaw-skills/SKILL.md +1 -1
  105. package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
  106. package/src/tunnels/providers/cloudflare-quick.ts +65 -7
  107. package/src/tunnels/upstream-probe.ts +25 -0
  108. package/typeclaw.schema.json +156 -67
  109. package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
  110. package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
  111. package/src/portbroker/bind-with-forward.ts +0 -102
@@ -13,39 +13,64 @@ Auto-loaded by every TypeClaw agent. No `plugins[]` entry to add and no opt-out.
13
13
  "bufferBytes": 500000,
14
14
  "injectionBudgetBytes": 16384,
15
15
  "minIdleDeltaLines": 3,
16
- "dreaming": { "schedule": "*/30 * * * *" }
16
+ "dreaming": { "schedule": "*/30 * * * *" },
17
+ "vector": { "enabled": false }
17
18
  }
18
19
  }
19
20
  ```
20
21
 
21
- | Field | Default | Effect |
22
- | ----------------------------- | ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
23
- | `memory.idleMs` | `60000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. |
24
- | `memory.bufferBytes` | `500000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run. `0` disables. Minimum `10000` when non-zero. |
25
- | `memory.injectionBudgetBytes` | `16384` | Total shard-body budget for direct-mode memory injection. Above this, `loadMemory` switches to index-mode (headings + metadata only) and the agent must call `memory_search` to fetch specific topics or recent stream events. Minimum `4096`. |
26
- | `memory.minIdleDeltaLines` | `3` | Minimum JSONL line growth since the last `memory-logger` run required to fire an idle spawn. Below this, the idle timer ticks but no spawn fires. `0` disables (legacy always-fire-on-idle behavior). Independent of `bufferBytes`. |
27
- | `memory.dreaming.schedule` | `"*/30 * * * *"` | Five-field cron expression for the dreaming subagent. |
22
+ | Field | Default | Effect |
23
+ | ----------------------------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
24
+ | `memory.idleMs` | `60000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. |
25
+ | `memory.bufferBytes` | `500000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run. `0` disables. Minimum `10000` when non-zero. |
26
+ | `memory.injectionBudgetBytes` | `16384` | Total shard-body budget for direct-mode memory injection. Above this, `loadMemory` switches to index-mode (headings + metadata only) and the agent must call `memory_search` to fetch specific topics or recent stream events. Minimum `4096`. |
27
+ | `memory.minIdleDeltaLines` | `3` | Minimum JSONL line growth since the last `memory-logger` run required to fire an idle spawn. Below this, the idle timer ticks but no spawn fires. `0` disables (legacy always-fire-on-idle behavior). Independent of `bufferBytes`. |
28
+ | `memory.dreaming.schedule` | `"*/30 * * * *"` | Five-field cron expression for the dreaming subagent. |
29
+ | `memory.vector.enabled` | `false` | Master switch for per-turn vector memory. When `true`, the `# Memory` system-prompt section is suppressed and memory is injected per turn into the user prompt via `hybridSearch` (over budget) or direct shards (under budget); the host downloads the embedding model and mounts it into the container. When `false`, memory lives in the system prompt and no model is downloaded. |
28
30
 
29
- All fields are **restart-required** — the plugin reads them once at boot.
31
+ All fields are **restart-required** — the plugin reads them once at boot. The `memory` block is plugin-owned config that passes through core's schema (`.catchall`), so it is **outside the core `FIELD_EFFECTS` reload fence**: changing any `memory.*` field and running `typeclaw reload` silently no-ops with no "restart-required" warning. Restart the container to apply.
30
32
 
31
33
  ## What it contributes
32
34
 
33
- | Kind | Name | Notes |
34
- | -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
35
- | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
36
- | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
37
- | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
38
- | Tool | `memory_search` | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Plain queries are phrase-first: the whole query is tried as one substring, and if that finds nothing the query is split on whitespace and the distinct words are OR-matched, ranked by how many words each hit contains (regex queries never fall back). Results are discriminated by `source: "topic" \| "stream"`; exact-phrase (and regex) results list topics first, then streams newest-first, while word-fallback results are ranked by matched-word count with that order as the tiebreak (so a higher-scoring stream can precede a lower-scoring topic). |
39
- | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
40
- | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
41
- | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
42
- | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
43
- | Hook | `session.turn.start` | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
35
+ | Kind | Name | Notes |
36
+ | -------- | -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
37
+ | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
38
+ | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
39
+ | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. Declares `profile: 'fast'` (retrieval is "≤3 keyword searches + 1 write", no reasoning required) and `timeoutMs: 30_000` so a wedged provider call releases the coalescing key instead of poisoning the cache for every subsequent turn. |
40
+ | Tool | `memory_search` | Main-agent tool. Pass `query` to search, or `topic` to look up one shard by exact slug (not both). Search is substring/regex across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Plain queries are phrase-first: the whole query is tried as one substring, and if that finds nothing the query is split on whitespace and the distinct words are OR-matched, ranked by how many words each hit contains (regex queries never fall back). Results are discriminated by `source: "topic" \| "stream"`; exact-phrase (and regex) results list topics first, then streams newest-first, while word-fallback results are ranked by matched-word count with that order as the tiebreak (so a higher-scoring stream can precede a lower-scoring topic). The `topic` mode reads `memory/topics/<slug>.md` directly via `loadShard` and returns that one shard with its full body — no fuzzy search — so a channel agent can open a topic whose slug the per-turn injection already showed it, instead of re-searching for a body the retrieval layer already located. When no topic shard matches the slug it falls back to a reference of the same slug (`memory/references/<slug>.md`), because the per-turn injection renders topic and reference entries with the same `slug:` line and the same `memory_search({ topic: "<slug>" })` recovery hint — so a reference slug surfaced in channel memory must resolve here, not dead-end. Bad slugs (path traversal) come back as a structured error; a slug matching neither a topic nor a reference is an empty match set. |
41
+ | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
42
+ | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
43
+ | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
44
+ | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
45
+ | Hook | `session.turn.start` | **Vector on:** renders the per-turn user-prompt memory block into `event.retrievalContext.results` (direct-mode shards under budget, de-duplicated across turns per session; top-K `hybridSearch` over budget; channel origins always index/headings-only); runs for every origin that supplies a `retrievalContext`, including subagents. **Vector off:** when `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question. Fire-and-forget; failures route through the plugin logger. |
44
46
 
45
47
  ## Memory injection (two-tier, topic shards only)
46
48
 
47
49
  Default budget is 16 KB. Direct mode when shard bytes sum ≤ budget: all shard bodies are injected verbatim. Index mode when sum > budget: only heading + `cites=N, days=N, lastReinforced=YYYY-MM-DD` per shard, plus a directive for the agent to call `memory_search` to fetch specific topics or recent stream events.
48
50
 
51
+ **Where the section is injected depends on `memory.vector.enabled`:**
52
+
53
+ - **Vector off (default): system prompt.** `loadMemory` renders the `# Memory` section once at session creation; it lives in the cacheable system-prompt prefix (ordered last, since it's the most volatile block). The two-tier direct/index split above applies, and the index-mode `memory-retrieval` subagent (below) covers the over-budget case lag-by-one.
54
+ - **Vector on: user prompt, per turn.** The system-prompt `# Memory` section is suppressed entirely (`createSession`'s `suppressSystemMemory`, derived once at boot in `src/run/index.ts`). The `session.turn.start` hook instead renders memory into `event.retrievalContext.results`, which the turn-drivers append to the user message. Under budget it injects shard bodies in **direct mode**, **de-duplicated across turns** (see below); over budget it runs `hybridSearch` and injects the **top-K** most relevant shards/fragments for the current message. The direct path renders via `renderDedupedMemorySection` and the index path via `renderRetrievedMemorySection` (`load-memory.ts`). This keeps the system-prompt cache prefix stable across a session and lets retrieval track the live topic instead of a stale session-start snapshot.
55
+
56
+ **Cross-turn dedup (direct mode, non-channel).** Re-sending every shard body verbatim on every turn made a long conversation pay the same memory block dozens of times. The plugin keeps a per-session `slug → body-hash` record (`turn-dedup.ts`, `partitionDirectShards`) and, after a shard's full body has been injected once this session, replaces it on later turns with a one-line `slug:` reference plus a `memory_search({ topic: "<slug>" })` recovery hint. A shard whose body **changed** (a dreaming pass rewrote it) re-injects in full so the agent never reasons over a stale body. The "nothing the agent always had vanishes on an off-topic turn" guarantee is preserved by **availability, not literal presence**: every topic is still named (heading + slug) every turn, and the full body is one `memory_search` away. The per-session record is cleared on `session.end` (before the subagent-origin early-return, so subagent sessions don't leak it), so a resurrected session re-injects bodies in full rather than dangling a reference to text the rebuilt context no longer holds.
57
+
58
+ For **channel** origins, the per-turn render injects topic **headings only** — the excerpt body is stripped and the agent fetches it on demand via `memory_search`. A channel **direct-mode** turn is force-indexed via `forceIndexForChannel` in `renderVectorTurnMemory` to a headings/slugs section over **every** shard (not run through `hybridSearch`), so a full body can never reach a channel turn AND no heading is silently dropped by relevance-filtering or a stale vector index. Only channel turns that are genuinely **over budget** fall to `hybridSearch` top-K, where headings-only injection is the established channel policy regardless. This mirrors `forceIndexForChannel`'s direct-path policy that channel sessions never carry bodies: a topic heading is a self-contained belief sentence (the dreaming subagent writes exactly one), so the heading alone lets the model decide whether the topic is worth opening, while the ~10-shard excerpt block — which dominated the per-turn injection and was usually abandoned unread — no longer rides in the (uncacheable) user turn on every message. Non-channel origins (TUI/direct) keep the full excerpt on first injection, where the extra retrieval round-trip isn't worth it.
59
+
60
+ Each stripped topic is injected with its `slug:` line so the agent can re-fetch the exact shard with `memory_search({ topic: "<slug>" })` — a direct slug lookup, not a second fuzzy search over the heading text the retrieval layer already matched. Undreamed `source: "stream"` items have no topic shard yet (no slug to look up), so they instead carry a `memory_search({ query: ... })` recovery hint; the channel directive names both paths so every stripped entry — topic or stream — has an explicit way back to its full text.
61
+
62
+ The invariant `suppressSystemMemory === memory.vector.enabled` is load-bearing: a session must never carry memory in both the system prompt and the user turn.
63
+
64
+ ### Vector retrieval is parent-child, not a flat pool
65
+
66
+ When `memory.vector.enabled` is true, `hybridSearch` does **not** rank topic vectors and stream-fragment vectors together in one flat cosine pool. It uses a parent-child collapse so the result set is always topic-level current truth:
67
+
68
+ - **Match surface.** The query matches against topic-belief vectors AND fragment vectors (both lanes, fused by RRF). Fragments are the fine-grained retrieval hooks; topics are the returned unit. The keyword lane is **phrase-first with a token-OR fallback** (mirroring `memory_search`): since `hybridSearch`'s query is the whole user prompt — which never appears verbatim in a shard — a phrase-only lane would return nothing every turn and silently degrade RRF to vector-only. The fallback (`searchAllRanked`) also ranks by matched-token-count and truncates after ranking, giving RRF a real second ranker instead of alphabetical topic order.
69
+ - **Collapse to parent.** A matched fragment resolves to the topic that cites it (via the shard's `fragments:` list — the citation IS the parent foreign key) and contributes its score to that topic. It never appears as a standalone result. An **undreamed** fragment (no topic cites it yet) resolves to itself, preserving the ~30-min freshness window before dreaming consolidates it.
70
+ - **MAX-child ranking.** A collapsed topic takes the **maximum** of its members' RRF scores, never the sum. Sum would over-rank often-revised topics purely for accumulating more citations to match (the PARADE result: max beats sum when relevance is concentrated).
71
+ - **Dreamed-and-cited fragment vectors are pruned every dreaming pass.** Once a fragment is folded into a topic (dreamed AND cited), its standalone `stream:*` row is redundant — any match on it collapses to the parent topic, whose `topic:*` row is already a candidate. But the redundant row still consumes one of `store.query`'s finite `topK * 2` pre-fusion slots by raw cosine, so a topic cited on many days would otherwise hoard proportionally more slots and displace distinct topics — silently defeating the MAX-child anti-popularity design over a long container uptime. `deleteRedundantDreamedCitedStreamVectors` (`dreaming.ts`) removes these per pass; it is the same deletion startup `pruneStaleRows` performs (a dreamed-and-cited fragment leaves the undreamed passage set), advanced from per-restart to per-pass. Undreamed fragment rows are kept — they resolve to themselves and ARE the freshness window.
72
+ - **Superseded fragments are excluded from the match surface.** When dreaming overturns a belief on contradiction, the old fragment id moves from `fragments:` to a `superseded:` list (see below). Superseded ids stay cited — so the citation-superset GC invariant keeps the fragment alive and history is auditable — but `passages.ts` does not embed them, so a stale "uses bun" fragment can never resurface as a hook for the current "uses pnpm" belief. `parseCitations` stays section-blind so GC and frontmatter recompute still see both sections; `splitCitationsBySection` is the status-aware view the retrieval layer reads (`parent-link.ts`).
73
+
49
74
  **Undreamed daily-stream events are NOT injected into the system prompt.** They are reachable only via `memory_search`, which discriminates results by `source: "topic" | "stream"`. The agent now decides per-query whether recent observations are relevant, instead of carrying every undreamed fragment in the cached prompt prefix. Three reasons this is the right shape:
50
75
 
51
76
  1. PR #314 made `memory_search` cover the stream surface, so the duplicate copy in the system prompt no longer earns its bytes.
@@ -70,6 +95,18 @@ The subagent uses these signals to:
70
95
 
71
96
  There is no `## Historical observations` bucket. Demoted topics live as their own shards; injection-time filtering (the index/direct split) handles the prompt-budget pressure.
72
97
 
98
+ ## Muscle memory (three forms)
99
+
100
+ While reading streams, the dreaming subagent watches for **repeated multi-step procedures** the user has guided the main agent through, and codifies them. There are three forms, picked smallest-that-fits (top to bottom, stop at the first match):
101
+
102
+ 1. **Form C — plugin suggestion.** The procedure needs a runtime hook (`session.prompt` / `tool.before` / etc.), a custom tool, a cron job, or a subagent — things only a plugin can express. Recorded as a topic shard with a `proposal: plugin packages/<name>` line.
103
+ 2. **Form B — CLI suggestion.** The procedure boils down to "run this small script with these args." Recorded as a topic shard with a `proposal: cli packages/<name>` line.
104
+ 3. **Form A — skill.** The default, and where most procedures land. The procedure can be done with the tools the agent already has. Written directly to `memory/skills/<name>/SKILL.md` and auto-loaded as a first-class skill by the next session.
105
+
106
+ The split exists because the forms have different write boundaries. The dreaming subagent can write skills directly (Form A) — `memory/skills/` is inside its sandbox — but it can only **suggest** CLIs and plugins (Forms B and C), because those live under `packages/`, outside its write sandbox. A suggestion is a passive recommendation: the topic shard sits in long-term memory, and the main agent scaffolds the package only when a current user request asks for the matching procedure. CLI/plugin proposal shards are the one exception to the one-sentence belief format (see [Memory saturation](#memory-saturation)) — they keep a rationale paragraph plus the `proposal:` line that names the form and package, and carry the same fragment citations as any other shard.
107
+
108
+ The bar for codifying is identical across all three forms: the procedure is multi-step, has recurred (≥2 distinct fragments, ideally across different days), has a clearly statable trigger, and generalizes beyond the specific instances. No speculative skills or suggestions — anything the main agent never reaches for is dead weight in the prompt budget it reads on every prompt. A given CLI or plugin is suggested **once**; future dreaming runs leave the existing proposal shard alone unless new fragments show the procedure has shifted shape (e.g. a CLI that now needs a runtime hook, upgrading the proposal from `cli` to `plugin`).
109
+
73
110
  ## Citation-superset safety net
74
111
 
75
112
  `checkCitationSupersetAcrossShards` checks that the union of fragment ids cited in NEW shards is a superset of the union cited in OLD shards. Violation triggers:
@@ -83,13 +120,31 @@ A `[dreaming] citation-superset violation: …` warning logs the dropped ids and
83
120
 
84
121
  ## Files on disk
85
122
 
86
- - **`memory/topics/<slug>.md`** — per-topic shards with YAML frontmatter (`heading`, `cites`, `days`, `lastReinforced`, `tags?`) + body markdown. Runtime owns the frontmatter (recomputed after every dreaming run from the body's citations); dreaming subagent writes body only.
123
+ - **`memory/topics/<slug>.md`** — per-topic shards with YAML frontmatter (`heading`, `cites`, `days`, `lastReinforced`, `tags?`) + body markdown. Runtime owns the frontmatter (recomputed after every dreaming run from the body's citations); dreaming subagent writes body only. The body is a **compact belief record**: one belief sentence stating the current truth (subject + predicate + essential scope qualifier; the strength wording — "mentioned" / "consistently" / "always" — is calibrated from `days`), followed by the `fragments:` list (active evidence behind the belief) and an optional `superseded:` list (evidence overturned by a later contradiction — kept cited for GC/history but excluded from vector retrieval). One sentence (not a paragraph) keeps bodies small so more topics stay under the direct-injection budget; the citation lists are the only load-bearing part of the body, so the prose shape is free to be terse. Existing verbose shards are rewritten into this form by the next dreaming run (no migration).
87
124
  - **`memory/streams/yyyy-MM-dd.jsonl`** — daily fragment streams. One event per line, discriminated union of `fragment | watermark | legacy_prose`. Force-committed alongside the shards.
88
125
  - **`memory/MEMORY.md.pre-shard.bak`** — legacy pre-shard backup left by older TypeClaw versions. Safe to delete after verifying.
89
- - **`memory/skills/<name>/SKILL.md`** — muscle memory. Skills the dreaming subagent distills from repeated procedures. Auto-loaded as first-class skills.
126
+ - **`memory/skills/<name>/SKILL.md`** — muscle memory (Form A). Skills the dreaming subagent distills from repeated procedures. Auto-loaded as first-class skills. CLI/plugin suggestions (Forms B and C) are recorded as topic shards instead — see [Muscle memory](#muscle-memory-three-forms).
127
+ - **`memory/references/<slug>.md`** — verbatim reference artifacts (gated behind `memory.references.enabled`). Written by `store_reference`. When vector is enabled, a freshly stored reference is embedded into the vector index immediately via the `store_reference` on-write hook (`vector/reference-index-on-write.ts`), mirroring the stream-fragment on-write hook — so it is vector-retrievable within the same container uptime instead of only after the next startup index build. Chunks are derived by the same `referencePassagesForOne` the startup build uses, and re-storing a shorter body prunes the stale higher-index chunk rows.
90
128
  - **`memory/.dreaming-state.json`** — per-day dreamed-id sets.
91
129
  - **`memory/.retrieval-cache/<sessionId>.md`** — ephemeral retrieval summaries. Written by `memory-retrieval`, read by `loadMemory` on the next prompt of the same session, unlinked on `session.end`.
92
130
 
131
+ ## Observability
132
+
133
+ The plugin emits structured `[plugin:memory]` log lines (no separate metrics infra). The load-bearing per-run signals:
134
+
135
+ - **`[memory-logger] <session> done fragments_written=N elapsed_ms=…`** — how many fragments a logger run captured (delta of fragment events in today's stream).
136
+ - **`[dreaming] done topics_created=N topics_removed=N superseded_new=N fragments_dropped=N over_budget=N elapsed_ms=…`** — consolidation activity per run: new/removed topic shards (by snapshot path diff), net citations moved into `superseded:`, fragments GC'd, and `over_budget` (the count of topic shards whose embeddable text exceeds the model token budget — see [Embedding token budget](#embedding-token-budget)). `topics_created` + reinforcement is the "is memory getting sharper" signal; `superseded_new` is contradiction-edit volume; a persistently non-zero `over_budget` means the subagent isn't compacting the flagged shards. Derived by `computeDreamingMetrics` from the pre/post shard snapshots.
137
+ - **`[vector-retrieval] mode=index topic_results=N stream_results=N reference_results=N elapsed_ms=… [suppressed=1]`** (or `mode=direct topics=N`) — per-turn retrieval breakdown. `stream_results` counts undreamed-fragment hits that self-resolved (no citing topic yet) — the freshness-window usage signal that informs whether the undreamed surface earns its keep. `reference_results` counts verbatim-reference hits (only non-zero when `references.enabled`), broken out from `stream_results` so the two surfaces are distinguishable. `elapsed_ms` times the full `hybridSearch` (embed + cosine scan + keyword lane + RRF fusion) — the on-critical-path retrieval cost. `suppressed=1` appears when the query matched nothing after the relevance gate, so an empty memory block is distinguishable from a missing one.
138
+
139
+ These are intentionally verbose for now so behavior is observable in logs; trim once the useful subset is known.
140
+
141
+ ## Embedding token budget
142
+
143
+ The embedding model (`Xenova/multilingual-e5-base`) caps inputs at **512 tokens**; the transformers.js tokenizer truncates anything longer by default, silently dropping the tail. Canonical compact shards (heading + one belief sentence first) sit well under the cap, but legacy verbose shards, legacy prose migration events, long fragments, and very long user queries can exceed it. Handled in two layers (`vector/truncation.ts`):
144
+
145
+ 1. **Deterministic bound at embed time (primary, always on).** `embed()` calls `boundEmbeddableText` on every input before the tokenizer sees it, trimming to a character budget derived from the same conservative per-script token estimate (CJK counted 1:1, Latin ~3.5 chars/token; the tokenizer is never loaded, preserving the embedder's lazy-load). The cut is now explicit and owned, not a hidden tokenizer side effect, and the leading heading/belief sentence — the load-bearing retrieval signal — always survives because it comes first. This guarantees no silent loss even for inputs nothing ever rewrites (raw legacy prose, long queries).
146
+ 2. **Compaction routed to dreaming (remediation, vector-on only).** Bounding keeps retrieval correct but the bounded tail is still not embedded. The real fix is to make the shard fit. On each run the dreaming handler computes which topic shards are over budget (measuring `topicPassage(...).text` — the same citation-stripped string the embedder bounds — so the flag matches what is actually truncated) and renders an "Over the embedding budget" table into the subagent's prompt, alongside the strength-signal table. Dreaming rule 8 instructs it to compact those shards into the one-belief-sentence form **while preserving every `fragments:`/`superseded:` citation** — shrinking prose, never dropping a citation (a dropped citation would trip the citation-superset revert, leaving the shard both reverted and still over budget). The signal is gated on the vector index (`memory/.vectors/index.db`) existing: with vector off, nothing embeds these shards, so the budget doesn't apply and the table is suppressed. The `over_budget=N` count rides the `[dreaming] done` line.
147
+
93
148
  ## How `session.idle` works
94
149
 
95
150
  Core fires `session.idle` immediately after every `session.prompt()` completion. The plugin owns the debounce: a `Map<sessionId, Timeout>` reset on every event. When the timer fires, the plugin spawns `memory-logger` for that session — unless the min-delta gate suppresses the spawn (see below).
@@ -114,7 +169,9 @@ Each `memory-logger` spawn captures the line count of `memory/streams/<today>.js
114
169
 
115
170
  ## Tests
116
171
 
117
- Test files in this directory (kebab-case, `.test.ts` neighbors): `paths`, `slug`, `frontmatter`, `topics`, `shard-snapshot`, `delete-tool`, `citations`, `citation-superset`, `load-shards`, `load-memory`, `injection-plan`, `search-tool`, `memory-retrieval`, `memory-logger`, `dreaming`, `index`, `integration`. Plus guard policies in `../guard/policies/`: `memory-topics-delete`, `memory-topics-write`, `memory-retrieval-cache-write`.
172
+ Tests live next to the code they cover as `.test.ts` neighbors; `vector/*.test.ts` covers the vector subsystem (store, embedder, hybrid search, relevance gate, startup/on-write indexing, doctor/inspect). The core surfaces are `paths`, `slug`, `frontmatter`, `topics`, `shard-snapshot`, `delete-tool`, `citations`, `citation-superset`, `load-shards`, `load-memory`, `injection-plan`, `turn-dedup`, `search-tool`, `memory-retrieval`, `memory-logger`, `dreaming`, `index`, `index-vector`, `index-vector-retrieval`, `integration` (not exhaustive — `git ls-files 'src/bundled-plugins/memory/**/*.test.ts'` is the source of truth). Plus guard policies in `../guard/policies/`: `memory-topics-delete`, `memory-topics-write`, `memory-retrieval-cache-write`.
173
+
174
+ Both vector hook tests build the plugin through `createMemoryPluginForTests({ hybridSearch?, queryEmbedFn? })` — a per-instance dependency seam (the factory defaults to the real implementations) so a fake is scoped to one boot and never leaks across the suite. `index-vector` injects a fake `hybridSearch` to cover the budget-gating, channel force-index, and cross-turn dedup branches of the `session.turn.start` hook. `index-vector-retrieval` injects only a synthetic `queryEmbedFn` (so the ~279 MB model never loads) and runs the REAL `hybridSearch` → `VectorStore.query` → RRF → `renderRetrievedMemorySection` pipeline through the same hook against a real seeded vector store; the query token appears nowhere in the corpus, so a rendered topic proves the vector lane matched rather than the substring `memory_search` lane. Relevance-gate suppression is covered separately in `vector/hybrid` and `vector/relevance-gate`.
118
175
 
119
176
  ## Notes from before the plugin existed
120
177
 
@@ -11,63 +11,84 @@ import { streamFilePath } from './paths'
11
11
  import { detectSecrets } from './secret-detector'
12
12
  import { newEventId, timestampFromId } from './stream-events'
13
13
  import type { FragmentEvent, WatermarkEvent } from './stream-events'
14
- import { appendEvents, readEvents } from './stream-io'
14
+ import { appendEvents, readEvents, type FragmentsAppendedContext } from './stream-io'
15
15
 
16
- export const appendTool = defineTool({
17
- description:
18
- "Append a memory fragment to today's JSONL daily stream and advance the watermark. The runtime serializes your call into a JSON line and chooses the filename — do not emit raw JSON and do not pass a path. `topic`/`body` are the fragment's substance; `source` is the parent session id; `entry` is the transcript-entry-id this fragment anchors to; `latestEntryId` is the latest transcript-entry-id you evaluated in this run (advances the watermark, may equal `entry` or be later). Refuses content with recognized credential patterns and refuses byte-equivalent topic+body within the same daily stream.",
19
- parameters: z.object({
20
- topic: z.string().min(1),
21
- body: z.string().min(1),
22
- source: z.string().min(1),
23
- entry: z.string().min(1),
24
- latestEntryId: z.string().min(1),
25
- }),
26
- async execute({ topic, body, source, entry, latestEntryId }, ctx) {
27
- const streamPath = dailyStreamPath(ctx.agentDir)
28
- assertNoSecrets(`${topic}\n${body}`)
29
-
30
- const hash = fragmentContentHash({ topic, body })
31
- const events = await readEvents(streamPath)
32
- const duplicate = events
33
- .filter((event) => event.type === 'fragment')
34
- .find((event) => fragmentContentHash(event) === hash)
35
- if (duplicate !== undefined) {
36
- throw new Error(
37
- `Refusing to append: fragment "${duplicate.topic}" already exists in ${streamPath} with byte-equivalent content. ` +
38
- `The dreaming subagent will see the existing fragment; do not write it again. If the new occurrence ` +
39
- `is genuinely informative, write a fragment that says so explicitly rather than restating the original.`,
40
- )
41
- }
16
+ export type FragmentsAppendedHook = (fragments: FragmentEvent[], context: FragmentsAppendedContext) => Promise<void>
42
17
 
43
- const fragmentId = newEventId()
44
- const watermarkId = newEventId()
45
- const fragment: FragmentEvent = {
46
- type: 'fragment',
47
- id: fragmentId,
48
- ts: timestampFromId(fragmentId),
49
- source,
50
- entry,
51
- topic,
52
- body,
53
- }
54
- const watermark: WatermarkEvent = {
55
- type: 'watermark',
56
- id: watermarkId,
57
- ts: timestampFromId(watermarkId),
58
- source,
59
- entry: latestEntryId,
60
- }
18
+ export function createAppendTool(onFragmentsAppended?: FragmentsAppendedHook) {
19
+ return defineTool({
20
+ description:
21
+ "Append a memory fragment to today's JSONL daily stream and advance the watermark. The runtime serializes your call into a JSON line and chooses the filename — do not emit raw JSON and do not pass a path. `topic`/`body` are the fragment's substance; `source` is the parent session id; `entry` is the transcript-entry-id this fragment anchors to; `latestEntryId` is the latest transcript-entry-id you evaluated in this run (advances the watermark, may equal `entry` or be later). Refuses content with recognized credential patterns and refuses byte-equivalent topic+body within the same daily stream.",
22
+ parameters: z.object({
23
+ topic: z.string().min(1),
24
+ body: z.string().min(1),
25
+ source: z.string().min(1),
26
+ entry: z.string().min(1),
27
+ latestEntryId: z.string().min(1),
28
+ references: z.array(z.string()).optional(),
29
+ }),
30
+ async execute({ topic, body, source, entry, latestEntryId, references }, ctx) {
31
+ const streamPath = dailyStreamPath(ctx.agentDir)
32
+ assertNoSecrets(`${topic}\n${body}`)
61
33
 
62
- await mkdir(dirname(streamPath), { recursive: true })
63
- await appendEvents(streamPath, [fragment, watermark])
34
+ const hash = fragmentContentHash({ topic, body })
35
+ const events = await readEvents(streamPath)
36
+ const duplicate = events
37
+ .filter((event) => event.type === 'fragment')
38
+ .find((event) => fragmentContentHash(event) === hash)
39
+ if (duplicate !== undefined) {
40
+ throw new Error(
41
+ `Refusing to append: fragment "${duplicate.topic}" already exists in ${streamPath} with byte-equivalent content. ` +
42
+ `The dreaming subagent will see the existing fragment; do not write it again. If the new occurrence ` +
43
+ `is genuinely informative, write a fragment that says so explicitly rather than restating the original.`,
44
+ )
45
+ }
64
46
 
65
- return {
66
- content: [{ type: 'text' as const, text: `Appended memory fragment and watermark to ${streamPath}` }],
67
- details: { path: streamPath, fragmentId: fragment.id, watermarkId: watermark.id },
68
- }
69
- },
70
- })
47
+ const fragmentId = newEventId()
48
+ const watermarkId = newEventId()
49
+ const fragment: FragmentEvent = {
50
+ type: 'fragment',
51
+ id: fragmentId,
52
+ ts: timestampFromId(fragmentId),
53
+ source,
54
+ entry,
55
+ topic,
56
+ body,
57
+ }
58
+ if (references !== undefined && references.length > 0) {
59
+ fragment.references = references
60
+ }
61
+ const watermark: WatermarkEvent = {
62
+ type: 'watermark',
63
+ id: watermarkId,
64
+ ts: timestampFromId(watermarkId),
65
+ source,
66
+ entry: latestEntryId,
67
+ }
68
+
69
+ await mkdir(dirname(streamPath), { recursive: true })
70
+ await appendEvents(
71
+ streamPath,
72
+ [fragment, watermark],
73
+ onFragmentsAppended,
74
+ onFragmentsAppended
75
+ ? (err) => {
76
+ ctx.logger?.warn(
77
+ `[memory] post-append vector hook failed: ${err instanceof Error ? err.message : String(err)}`,
78
+ )
79
+ }
80
+ : undefined,
81
+ )
82
+
83
+ return {
84
+ content: [{ type: 'text' as const, text: `Appended memory fragment and watermark to ${streamPath}` }],
85
+ details: { path: streamPath, fragmentId: fragment.id, watermarkId: watermark.id },
86
+ }
87
+ },
88
+ })
89
+ }
90
+
91
+ export const appendTool = createAppendTool()
71
92
 
72
93
  export const advanceWatermarkTool = defineTool({
73
94
  description:
@@ -20,6 +20,10 @@
20
20
  // to honor that — especially across hundreds of runs over months — so the
21
21
  // mechanical check is the safety floor.
22
22
  //
23
+ // Reference citations (`references/<slug>`) use a different format and are
24
+ // intentionally excluded from this check — they are not fragment ids and are
25
+ // not subject to the citation-superset GC invariant.
26
+ //
23
27
  // Detection only. The handler decides what to do with the verdict (revert
24
28
  // memory/topics/ to its pre-run bytes, skip daily-stream compaction, still
25
29
  // advance the dreamed-id set so we do not loop on the same fragments).
@@ -54,3 +54,57 @@ export function parseCitations(text: string): Map<string, Set<string>> {
54
54
  export function isCitationLine(line: string): boolean {
55
55
  return CITATION_LINE.test(line)
56
56
  }
57
+
58
+ // Drops `fragments:`/`superseded:` headings and citation lines, leaving only the
59
+ // belief prose. The embedding input must exclude them: mean-pooling a body of one
60
+ // belief sentence + dozens of `streams/<date>#<uuidv7>` lines dilutes the belief
61
+ // and pulls every topic vector toward the shared citation-list structure. The
62
+ // citations stay in the on-disk body (the load-bearing parent-child links); only
63
+ // the text handed to the embedder is stripped.
64
+ export function stripCitationLines(body: string): string {
65
+ const kept = body.split('\n').filter((line) => !isCitationLine(line) && !SECTION_HEADING.test(line))
66
+ return collapseBlankRuns(kept).join('\n').trim()
67
+ }
68
+
69
+ function collapseBlankRuns(lines: string[]): string[] {
70
+ const out: string[] = []
71
+ let prevBlank = false
72
+ for (const line of lines) {
73
+ const blank = line.trim() === ''
74
+ if (blank && prevBlank) continue
75
+ out.push(line)
76
+ prevBlank = blank
77
+ }
78
+ return out
79
+ }
80
+
81
+ // Superseded citations stay cited (so the citation-superset GC invariant never
82
+ // drops them) but must be excluded from retrieval, so a superseded "uses bun"
83
+ // fragment can't surface as a hook for the current "uses pnpm" belief.
84
+ // `parseCitations` stays section-blind for GC; this is the status-aware view.
85
+ // Citations before any heading count as active (legacy shards had no section).
86
+ const SECTION_HEADING = /^[\s-]*(fragments|superseded)\s*:\s*$/i
87
+
88
+ export type SectionedCitations = { active: Set<string>; superseded: Set<string> }
89
+
90
+ export function splitCitationsBySection(body: string): SectionedCitations {
91
+ const active = new Set<string>()
92
+ const superseded = new Set<string>()
93
+ let current: 'active' | 'superseded' = 'active'
94
+
95
+ for (const line of body.split('\n')) {
96
+ const heading = SECTION_HEADING.exec(line)
97
+ if (heading !== null) {
98
+ current = heading[1]!.toLowerCase() === 'superseded' ? 'superseded' : 'active'
99
+ continue
100
+ }
101
+ const citation = CITATION_LINE.exec(line)
102
+ if (citation === null) continue
103
+ ;(current === 'superseded' ? superseded : active).add(citation[3]!)
104
+ }
105
+
106
+ // Re-affirmed fact (appears in both sections across edits): active wins.
107
+ for (const id of active) superseded.delete(id)
108
+
109
+ return { active, superseded }
110
+ }
@@ -0,0 +1,30 @@
1
+ import { splitCitationsBySection } from './citations'
2
+
3
+ export type DreamingMetrics = {
4
+ topicsCreated: number
5
+ topicsRemoved: number
6
+ supersededDelta: number
7
+ referencesDemoted: number
8
+ referencesEvicted: number
9
+ }
10
+
11
+ // Snapshots are keyed by absolute shard path → file bytes (captureShardSnapshot).
12
+ // supersededDelta is the net change in citations under `superseded:` across all
13
+ // shards, i.e. how many fragments were overturned this run.
14
+ export function computeDreamingMetrics(before: Map<string, Buffer>, after: Map<string, Buffer>): DreamingMetrics {
15
+ let topicsCreated = 0
16
+ for (const path of after.keys()) if (!before.has(path)) topicsCreated += 1
17
+
18
+ let topicsRemoved = 0
19
+ for (const path of before.keys()) if (!after.has(path)) topicsRemoved += 1
20
+
21
+ const supersededDelta = countSuperseded(after) - countSuperseded(before)
22
+
23
+ return { topicsCreated, topicsRemoved, supersededDelta, referencesDemoted: 0, referencesEvicted: 0 }
24
+ }
25
+
26
+ function countSuperseded(snapshot: Map<string, Buffer>): number {
27
+ let total = 0
28
+ for (const bytes of snapshot.values()) total += splitCitationsBySection(bytes.toString('utf8')).superseded.size
29
+ return total
30
+ }