typeclaw 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +20 -15
  2. package/auth.schema.json +113 -0
  3. package/package.json +1 -1
  4. package/secrets.schema.json +113 -0
  5. package/src/agent/auth.ts +4 -2
  6. package/src/agent/index.ts +16 -28
  7. package/src/agent/model-fallback.ts +127 -0
  8. package/src/agent/session-meta.ts +1 -1
  9. package/src/agent/session-origin.ts +3 -2
  10. package/src/agent/tools/curl-impersonate.ts +300 -0
  11. package/src/agent/tools/ddg.ts +13 -88
  12. package/src/agent/tools/webfetch/fetch.ts +105 -2
  13. package/src/agent/tools/webfetch/tool.ts +4 -0
  14. package/src/bundled-plugins/agent-browser/shim.ts +47 -0
  15. package/src/bundled-plugins/backup/subagents.ts +2 -0
  16. package/src/bundled-plugins/memory/README.md +49 -12
  17. package/src/bundled-plugins/memory/citation-superset.ts +63 -0
  18. package/src/bundled-plugins/memory/dreaming.ts +105 -17
  19. package/src/bundled-plugins/memory/index.ts +2 -2
  20. package/src/bundled-plugins/memory/memory-logger.ts +45 -26
  21. package/src/bundled-plugins/memory/strength.ts +127 -0
  22. package/src/bundled-plugins/memory/topics.ts +75 -0
  23. package/src/bundled-plugins/security/index.ts +88 -43
  24. package/src/bundled-plugins/security/permissions.ts +36 -0
  25. package/src/bundled-plugins/security/policies/git-exfil.ts +20 -0
  26. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +12 -0
  27. package/src/bundled-plugins/security/policies/prompt-injection.ts +23 -3
  28. package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +7 -0
  29. package/src/bundled-plugins/security/policies/secret-exfil-read.ts +6 -0
  30. package/src/bundled-plugins/security/policies/session-search-secrets.ts +9 -0
  31. package/src/bundled-plugins/security/policies/ssrf.ts +6 -0
  32. package/src/bundled-plugins/security/policies/system-prompt-leak.ts +7 -0
  33. package/src/channels/adapters/github/auth-app.ts +120 -0
  34. package/src/channels/adapters/github/auth-pat.ts +50 -0
  35. package/src/channels/adapters/github/auth.ts +33 -0
  36. package/src/channels/adapters/github/channel-resolver.ts +30 -0
  37. package/src/channels/adapters/github/dedup.ts +26 -0
  38. package/src/channels/adapters/github/event-allowlist.ts +8 -0
  39. package/src/channels/adapters/github/fetch-attachment.ts +5 -0
  40. package/src/channels/adapters/github/history.ts +63 -0
  41. package/src/channels/adapters/github/inbound.ts +286 -0
  42. package/src/channels/adapters/github/index.ts +370 -0
  43. package/src/channels/adapters/github/managed-path.ts +54 -0
  44. package/src/channels/adapters/github/membership.ts +35 -0
  45. package/src/channels/adapters/github/outbound.ts +145 -0
  46. package/src/channels/adapters/github/webhook-register.ts +349 -0
  47. package/src/channels/manager.ts +94 -9
  48. package/src/channels/router.ts +194 -28
  49. package/src/channels/schema.ts +31 -1
  50. package/src/channels/tunnel-bridge.ts +51 -0
  51. package/src/channels/types.ts +3 -1
  52. package/src/cli/builtins.ts +28 -0
  53. package/src/cli/channel.ts +511 -25
  54. package/src/cli/container-command-client.ts +244 -0
  55. package/src/cli/cron.ts +173 -0
  56. package/src/cli/host-command-runner.ts +150 -0
  57. package/src/cli/index.ts +42 -1
  58. package/src/cli/init.ts +400 -67
  59. package/src/cli/model.ts +14 -4
  60. package/src/cli/oauth-callbacks.ts +49 -0
  61. package/src/cli/plugin-command-help.ts +49 -0
  62. package/src/cli/plugin-commands-dispatch.ts +112 -0
  63. package/src/cli/plugin-commands.ts +118 -0
  64. package/src/cli/provider.ts +3 -20
  65. package/src/cli/tui.ts +10 -2
  66. package/src/cli/tunnel.ts +533 -0
  67. package/src/cli/ui.ts +8 -3
  68. package/src/config/config.ts +134 -24
  69. package/src/config/models-mutation.ts +42 -8
  70. package/src/config/providers-mutation.ts +12 -8
  71. package/src/container/start.ts +48 -4
  72. package/src/cron/bridge.ts +136 -0
  73. package/src/cron/consumer.ts +174 -48
  74. package/src/cron/index.ts +19 -2
  75. package/src/cron/list.ts +105 -0
  76. package/src/cron/scheduler.ts +12 -3
  77. package/src/cron/schema.ts +11 -3
  78. package/src/doctor/checks.ts +0 -50
  79. package/src/init/dockerfile.ts +165 -13
  80. package/src/init/ensure-deps.ts +15 -4
  81. package/src/init/github-webhook-install.ts +109 -0
  82. package/src/init/hatching.ts +2 -2
  83. package/src/init/index.ts +519 -12
  84. package/src/init/oauth-login.ts +17 -3
  85. package/src/init/run-bun-install.ts +17 -3
  86. package/src/init/run-owner-claim.ts +11 -2
  87. package/src/permissions/builtins.ts +29 -2
  88. package/src/permissions/match-rule.ts +24 -2
  89. package/src/permissions/permissions.ts +24 -7
  90. package/src/permissions/resolve.ts +1 -0
  91. package/src/plugin/define.ts +44 -1
  92. package/src/plugin/index.ts +18 -3
  93. package/src/plugin/manager.ts +16 -0
  94. package/src/plugin/registry.ts +85 -3
  95. package/src/plugin/types.ts +144 -1
  96. package/src/plugin/zod-introspect.ts +100 -0
  97. package/src/role-claim/match-rule.ts +2 -1
  98. package/src/run/index.ts +112 -4
  99. package/src/secrets/index.ts +1 -1
  100. package/src/secrets/schema.ts +21 -0
  101. package/src/server/command-runner.ts +476 -0
  102. package/src/server/index.ts +388 -5
  103. package/src/shared/index.ts +8 -0
  104. package/src/shared/protocol.ts +80 -1
  105. package/src/skills/typeclaw-channel-github/SKILL.md +24 -0
  106. package/src/skills/typeclaw-config/SKILL.md +27 -26
  107. package/src/skills/typeclaw-cron/SKILL.md +234 -3
  108. package/src/skills/typeclaw-memory/SKILL.md +25 -15
  109. package/src/skills/typeclaw-monorepo/SKILL.md +2 -2
  110. package/src/skills/typeclaw-permissions/SKILL.md +35 -16
  111. package/src/skills/typeclaw-plugins/SKILL.md +251 -5
  112. package/src/skills/typeclaw-tunnels/SKILL.md +111 -0
  113. package/src/test-helpers/wait-for.ts +50 -0
  114. package/src/tui/index.ts +70 -7
  115. package/src/tunnels/__fixtures__/cloudflared-quick-stderr.txt +11 -0
  116. package/src/tunnels/events.ts +14 -0
  117. package/src/tunnels/index.ts +12 -0
  118. package/src/tunnels/log-ring.ts +54 -0
  119. package/src/tunnels/manager.ts +139 -0
  120. package/src/tunnels/providers/cloudflare-quick.ts +189 -0
  121. package/src/tunnels/providers/external.ts +53 -0
  122. package/src/tunnels/quick-url-parser.ts +5 -0
  123. package/src/tunnels/types.ts +43 -0
  124. package/src/usage/report.ts +15 -12
  125. package/typeclaw.schema.json +311 -26
@@ -9,8 +9,8 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
9
9
  ```json
10
10
  {
11
11
  "memory": {
12
- "idleMs": 10000,
13
- "bufferBytes": 100000,
12
+ "idleMs": 60000,
13
+ "bufferBytes": 500000,
14
14
  "dreaming": { "schedule": "*/30 * * * *" }
15
15
  }
16
16
  }
@@ -18,8 +18,8 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
18
18
 
19
19
  | Field | Default | Effect |
20
20
  | -------------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
21
- | `memory.idleMs` | `10000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. |
22
- | `memory.bufferBytes` | `100000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run, even during continuous activity. `0` disables. Minimum `10000` when non-zero. |
21
+ | `memory.idleMs` | `60000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. Default bumped from `10000` to `60000` to reduce spawn churn during conversational sessions where the agent goes idle for short periods between rapid back-and-forth turns. |
22
+ | `memory.bufferBytes` | `500000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run, even during continuous activity. `0` disables. Minimum `10000` when non-zero. Default bumped from `100000` to `500000` so a single conversational session stays within one memory-logger run unless it grows past ~half a megabyte of transcript. |
23
23
  | `memory.dreaming` | `{}` (cron job on) | Dreaming cron job is always registered. Override `schedule` to change when it fires. |
24
24
  | `memory.dreaming.schedule` | `"*/30 * * * *"` | Five-field cron expression. Defaults to every 30 minutes; fires short-circuit with zero LLM cost when nothing sits past the watermark, so frequent no-op fires are cheap and let sporadic agents still consolidate while alive (`src/cron/scheduler.ts` has no catchup for missed fires). Second-level schedules are rejected to avoid noisy no-op dreaming loops. |
25
25
 
@@ -27,18 +27,52 @@ All fields are **restart-required** — the plugin reads them once at boot.
27
27
 
28
28
  ## What it contributes
29
29
 
30
- | Kind | Name | Notes |
31
- | -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
32
- | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file. |
33
- | Subagent | `dreaming` | Reads `MEMORY.md` plus undreamed daily-stream events, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. |
34
- | Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
35
- | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`. |
36
- | Hook | `session.end` | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away). |
30
+ | Kind | Name | Notes |
31
+ | -------- | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
32
+ | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file. |
33
+ | Subagent | `dreaming` | Reads `MEMORY.md` plus undreamed daily-stream events, **rebalances** the existing topics using per-topic strength signals (citation count, distinct days, recency) injected into its user prompt, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. The runtime enforces a **citation-superset invariant** on every rewrite: a new MEMORY.md that drops any previously-cited fragment id is reverted to its pre-run bytes (dreamed-ids still advance so the run is not retried in a loop). |
34
+ | Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
35
+ | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`. |
36
+ | Hook | `session.end` | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away). |
37
37
 
38
38
  ## Memory injection
39
39
 
40
40
  The rendered `# Memory` section (MEMORY.md + undreamed daily-stream tails) is injected into every session's system prompt by core (`src/agent/index.ts` `createResourceLoader` → `loadMemory`), **not** by a plugin hook. It is appended as the last block of the system prompt, after `gitNudge`, so the most-volatile content (daily streams that grow after every memory-logger fire) sits at the bottom of the cache-suffix region. This way a memory change only invalidates the memory section itself rather than everything downstream of it.
41
41
 
42
+ ## Memory saturation (LTP/LTD analogue)
43
+
44
+ MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Without a saturation policy it grows monotonically — every consolidated topic survives forever and citations accumulate across days. The dreaming subagent therefore treats MEMORY.md like human long-term memory: **repetition strengthens, lack of repetition saturates**.
45
+
46
+ ### How
47
+
48
+ On every run the runtime computes per-topic strength signals from MEMORY.md's existing citations — `cites` (total), `days` (distinct calendar days those citations span), `last reinforced` (most recent citation date), `age (d)` (whole days since `last reinforced`). The numbers are derived by `src/bundled-plugins/memory/strength.ts` and rendered as a table at the top of the dreaming subagent's user prompt. There is no sidecar file, no schema version, no migration — strength is recomputed on every run from MEMORY.md alone.
49
+
50
+ The subagent uses these numbers to:
51
+
52
+ 1. **Promote strong topics.** `days = 1` → tentative ("the user mentioned"). `days >= 3` → confident ("the user consistently"). `days >= 7` → declarative ("the user always"). Promotion is gated on distinct days, not raw citation count — five citations on one day is one debugging session, five citations across five days is a recurring pattern.
53
+ 2. **Merge near-duplicates.** Topics that overlap in subject matter get folded into one, with the merged topic's `fragments:` list as the **union** of the source topics' fragment ids.
54
+ 3. **Demote decayed topics.** A topic with `cites = 1, days = 1, age >= 30` (or `cites <= 3, days <= 2, age >= 60`) routes into a `## Historical observations` bucket as a one-line bullet. The fact is preserved in the summary, the citation is preserved (so daily-stream GC keeps the underlying fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Strong topics (`days >= 3`) are never demoted.
55
+
56
+ **There is no hard-deletion path** in this iteration. The historical bucket grows monotonically; the subagent is explicitly told not to attempt quarter-summary collapses because the safety net (below) would revert them. If the bucket becomes inconveniently long in practice, a future runtime change will provide a structured drop mechanism — until then every demoted citation stays alive forever via its one-line bullet.
57
+
58
+ ### The citation-superset safety net
59
+
60
+ After every dreaming run that rewrote MEMORY.md, `src/bundled-plugins/memory/citation-superset.ts` checks that the union of fragment ids cited in the NEW file is a superset of the union cited in the OLD file. If any previously-cited id is missing from the rewrite, the runtime:
61
+
62
+ 1. Restores MEMORY.md to its pre-run bytes via `writeFile(memoryFilePath, memoryTextBefore)`. The pre-run bytes are captured **before** `runSession` so the revert always has a clean source.
63
+ 2. Skips daily-stream fragment GC for this run (no fragments are dropped).
64
+ 3. Advances the dreamed-id set anyway — the **conscious anti-loop tradeoff**: this means the run's NEW undreamed fragments are orphaned (they survive in the daily JSONL forever, force-committed, but will not be re-shown to a future dreaming run and therefore never make it into MEMORY.md). The alternative (don't advance) would infinite-loop if the LLM keeps making the same mistake on the same inputs. The orphaned fragments are recoverable from git history (`git log memory/`) by a human operator.
65
+ 4. Logs a `[dreaming] citation-superset violation: …` warning naming the dropped ids and explicitly stating the orphaning tradeoff.
66
+
67
+ **Revert-write failure path.** If the `writeFile` in step 1 itself throws (disk full, EACCES, MEMORY.md replaced by a directory by a buggy subagent, etc.), MEMORY.md is in an unknown state. The runtime then:
68
+
69
+ - Skips the dreamed-id advance (so the next run gets a second chance at the same input).
70
+ - Skips compaction (so no fragments are GC'd against an ambiguous citation set).
71
+ - Skips the commit (so a known-bad on-disk state is not snapshotted).
72
+ - Logs a `[dreaming] citation-superset violation AND revert failed: …` ERROR with the recovery command (`git checkout -- MEMORY.md && typeclaw restart`).
73
+
74
+ The check exists because the daily-stream GC in `compactDailyStreams` drops any fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are the only thing that keeps a fragment alive past its first dreaming run — an omitted id means the underlying fragment would be permanently deleted on the next compaction.
75
+
42
76
  ## Files on disk
43
77
 
44
78
  - **`MEMORY.md`** — long-term memory. Created by the dreaming subagent on first run if absent. Force-committed by the runtime; `skip-worktree` flag is set so the human's `git status` stays clean.
@@ -66,9 +100,12 @@ In channel sessions, the agent rarely goes idle long enough to trip the timer be
66
100
 
67
101
  - `index.test.ts` — composition tests (config schema, hook wiring, debounce semantics, MEMORY.md auto-create).
68
102
  - `memory-logger.test.ts` — system prompt invariants, watermark handling.
69
- - `dreaming.test.ts` — orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants.
103
+ - `dreaming.test.ts` — orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants, citation-superset safety net (revert on dropped id, dreamed-ids still advance, no-revert on legitimate merge, no-revert on first-ever run), saturation-prompt invariants (rebalance-every-run, promotion ladder, historical bucket, demotion thresholds, bucket overflow synthesis).
70
104
  - `dreaming-state.test.ts` — fail-open semantics on malformed state.
71
105
  - `watermark.test.ts` — marker parsing.
72
106
  - `append-tool.test.ts` — append-only semantics.
73
107
  - `src/bundled-plugins/guard/policies/skill-authoring.test.ts` — runtime skill authoring guard: path sandboxing, name validation, YAML frontmatter, and write/edit final-content validation.
74
108
  - `load-memory.test.ts` — memory section rendering, undreamed-tail filtering, watermark stripping.
109
+ - `topics.test.ts` — citation-attributing parser (per-topic citation grouping for strength signals).
110
+ - `strength.test.ts` — per-topic strength computation (distinct days, recency, age clamping) and markdown table rendering.
111
+ - `citation-superset.test.ts` — the safety-net check (superset semantics, missing-id reporting, summary truncation).
@@ -0,0 +1,63 @@
1
+ // Citation-superset safety net for the dreaming subagent's MEMORY.md
2
+ // rewrite. After every dreaming run that touched MEMORY.md, we check that
3
+ // the union of fragment ids cited in the NEW file is a superset of the
4
+ // union cited in the OLD file. If any previously-cited id is missing from
5
+ // the rewrite, the rewrite is rejected.
6
+ //
7
+ // Why this exists: the daily-stream GC in compactDailyStreams drops any
8
+ // fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are
9
+ // the only thing that keeps a fragment alive past its first dreaming run.
10
+ // If the subagent rewrites MEMORY.md and accidentally omits a citation —
11
+ // either by garbling a merged topic's fragments: list or by dropping a
12
+ // topic entirely — the next compaction call permanently deletes the
13
+ // underlying fragment from the daily JSONL. There is no recovery beyond
14
+ // `git revert` of the snapshot commit, and even that loses anything the
15
+ // agent wrote since.
16
+ //
17
+ // The subagent's new rule 5 explicitly allows merging topics and rewriting
18
+ // conclusion paragraphs, with the requirement that the merged topic's
19
+ // `fragments:` list is the union of its source topics'. The LLM can fail
20
+ // to honor that — especially across hundreds of runs over months — so the
21
+ // mechanical check is the safety floor.
22
+ //
23
+ // Detection only. The handler decides what to do with the verdict (revert
24
+ // MEMORY.md to its pre-run bytes, skip daily-stream compaction, still
25
+ // advance the dreamed-id set so we do not loop on the same fragments).
26
+
27
+ import { parseCitations } from './citations'
28
+
29
+ export type CitationSupersetVerdict = { ok: true } | { ok: false; missing: Array<{ date: string; fragmentId: string }> }
30
+
31
+ // Compare the OLD MEMORY.md to the NEW MEMORY.md and report any
32
+ // fragment id that the OLD cited and the NEW does not. Empty old text
33
+ // (first-ever dreaming run, prior file missing) is treated as the empty
34
+ // citation set — any new file passes by construction.
35
+ export function checkCitationSuperset(oldText: string, newText: string): CitationSupersetVerdict {
36
+ const oldCitations = parseCitations(oldText)
37
+ if (oldCitations.size === 0) return { ok: true }
38
+
39
+ const newCitations = parseCitations(newText)
40
+ const missing: Array<{ date: string; fragmentId: string }> = []
41
+
42
+ const dates = [...oldCitations.keys()].sort()
43
+ for (const date of dates) {
44
+ const oldIds = oldCitations.get(date) ?? new Set<string>()
45
+ const newIds = newCitations.get(date) ?? new Set<string>()
46
+ const oldIdList = [...oldIds].sort()
47
+ for (const id of oldIdList) {
48
+ if (!newIds.has(id)) missing.push({ date, fragmentId: id })
49
+ }
50
+ }
51
+
52
+ return missing.length === 0 ? { ok: true } : { ok: false, missing }
53
+ }
54
+
55
+ // Pretty-print the verdict's missing ids for log output. Keeps the line
56
+ // short by reporting count + first N ids; the full list is reconstructable
57
+ // from MEMORY.md's git history if forensics are ever needed.
58
+ export function summarizeMissingCitations(missing: ReadonlyArray<{ date: string; fragmentId: string }>): string {
59
+ const total = missing.length
60
+ const sample = missing.slice(0, 3).map((m) => `${m.date}#${m.fragmentId}`)
61
+ if (total <= 3) return sample.join(', ')
62
+ return `${sample.join(', ')} (+${total - 3} more)`
63
+ }
@@ -1,4 +1,3 @@
1
- import { createHash } from 'node:crypto'
2
1
  import { existsSync } from 'node:fs'
3
2
  import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'
4
3
  import { dirname, join } from 'node:path'
@@ -8,6 +7,7 @@ import { z } from 'zod'
8
7
  import { lsTool, readTool, type Subagent, writeTool } from '@/plugin'
9
8
  import { formatLocalDate, formatLocalDateTime } from '@/shared'
10
9
 
10
+ import { checkCitationSuperset, summarizeMissingCitations } from './citation-superset'
11
11
  import { parseCitations } from './citations'
12
12
  import {
13
13
  addDreamedIds,
@@ -19,6 +19,7 @@ import {
19
19
  } from './dreaming-state'
20
20
  import type { StreamEvent } from './stream-events'
21
21
  import { readEvents, writeEventsAtomic } from './stream-io'
22
+ import { computeTopicStrengths, renderTopicStrengthsTable, type TopicStrength } from './strength'
22
23
 
23
24
  const STREAM_FILE_PATTERN = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
24
25
 
@@ -207,12 +208,11 @@ async function loadCitedIds(agentDir: string): Promise<ReadonlyMap<string, Reado
207
208
  }
208
209
  }
209
210
 
210
- async function safeContentHash(path: string): Promise<string | null> {
211
+ async function safeReadText(path: string): Promise<string> {
211
212
  try {
212
- const raw = await readFile(path)
213
- return createHash('sha256').update(raw).digest('hex')
213
+ return await readFile(path, 'utf8')
214
214
  } catch {
215
- return null
215
+ return ''
216
216
  }
217
217
  }
218
218
 
@@ -501,11 +501,11 @@ fragments:
501
501
 
502
502
  The date in the prefix is the same as the filename you read the fragment from; the id after \`#\` is the full UUIDv7 from the event's \`id\` field. Do not abbreviate the id. Do not use line numbers — citations are id-based, not line-based, so daily streams can be compacted between dreaming runs without breaking your references.
503
503
 
504
- A fragment with no useful content (a watermark-only marker, a near-duplicate, a session-specific quirk that fails the generalizability bar) is discarded. Never invent fragments. Never cite a fragment id you did not see in the undreamed tail you actually read.
504
+ A fragment with no useful content (a watermark-only marker, a near-duplicate, a session-specific quirk that fails the generalizability bar) is discarded. Never invent fragments. When you add a NEW citation, never cite a fragment id you did not see in the undreamed tail you actually read. EXISTING citations that are already in MEMORY.md (from prior dreaming runs, whose source fragments are no longer in the undreamed tail) must be preserved per rule 5 — they reference fragments still alive in already-consolidated daily streams.
505
505
 
506
506
  **4. Inherit the memory-logger's standards.** The memory-logger already filtered fragments using strict certainty rules (explicit / deductive / inductive). Your job is consolidation, not loosening the bar. If two fragments contradict, prefer the more recent. If a fragment is ambiguous in isolation but clarified by a later fragment, merge them under one topic. Never promote a single fragment from one day into a stable claim unless its certainty was already \`explicit\` or \`deductive\`.
507
507
 
508
- **5. Preserve existing MEMORY.md content.** MEMORY.md may already contain entries from prior dreaming runs. Fold new fragments into existing topics where they fit, or add new topics. Do not silently drop existing entries. If a new fragment contradicts an existing entry, replace the entry and update its fragment list. Existing fragment citations may reference dates whose streams are now fully consolidated; that is normal leave them in place.
508
+ **5. Rebalance every run. Preserve every fact and every cited fragment id.** MEMORY.md is a saturated surface (a fixed prompt-budget), not an append-only log — every run is consolidation, not just the runs that get new fragments. You may merge near-duplicate topics into one, fold weakly-reinforced topics into a parent or into the historical-observations bucket (see "Memory saturation" below), and rewrite verbose conclusion paragraphs more tightly. What you must NOT do: drop a fragment id. The merged topic's \`fragments:\` list is the **union** of its source topics' fragment ids. The daily-stream GC depends on MEMORY.md citations to keep evidence alive; an omitted id means the underlying fragment is permanently deleted on the next compaction. If two topics genuinely cover different facts, leave them separate — premature merging loses signal. If a new fragment contradicts an existing entry, replace the entry's conclusion paragraph and keep BOTH the old and new fragment ids in the citations list (the contradiction itself is evidence). The runtime cross-checks your rewrite against the prior MEMORY.md's citation set; a rewrite that drops a previously-cited id will be reverted and your run wasted.
509
509
 
510
510
  **6. Be concise.** Each topic conclusion is one short paragraph. No lists of preferences ("the user likes X, Y, Z"). One topic per concept. If a topic only earned one fragment and the fragment was already small, you may copy its conclusion verbatim — do not pad.
511
511
 
@@ -532,7 +532,40 @@ fragments:
532
532
 
533
533
  The first line is always \`# Memory\`. Topics are level-2 headings. No other top-level structure.
534
534
 
535
- # Muscle memory (skills, CLIs, plugins)
535
+ # Memory saturation
536
+
537
+ MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Treat it like human long-term memory: **repetition strengthens, lack of repetition saturates**. The runtime gives you per-topic strength signals at the top of the user prompt — a table with \`cites\` (total citation count), \`days\` (distinct calendar days those citations span), \`last reinforced\`, and \`age (d)\`. Use these numbers to decide what to do with each existing topic on this run. \`days\` is the load-bearing signal: five citations all on one day means a single debugging session that mentioned the same thing five times (a transient burst); five citations across five days means a recurring fact the user keeps coming back to (a stable signal).
538
+
539
+ ## Strength tiers and promotion ladder
540
+
541
+ Pick the wording in each conclusion paragraph from the topic's \`days\` count:
542
+
543
+ - **\`days = 1\` — "mentioned":** the topic was observed in one session. Conclusion uses tentative language ("the user mentioned X in the context of Y"). Single-fragment one-day topics that are not reinforced on subsequent runs are demotion candidates (see below).
544
+ - **\`days = 2\` — "observed":** seen twice, on different days. Still tentative — could be a recurring quirk, could be coincidence.
545
+ - **\`days >= 3\` — "consistently":** the topic has been reinforced across at least three distinct days. Conclusion uses confident language ("the user consistently prefers X", "the user's pattern is Y"). Strong enough to anchor near the top of MEMORY.md.
546
+ - **\`days >= 7\` — "always":** seen across at least seven distinct days. Conclusion uses declarative language ("the user always X", "Y is the user's standard"). These are the load-bearing topics; protect them from accidental merges.
547
+
548
+ Promotion is gated on \`days\`, not on \`cites\`. A topic with \`cites = 12, days = 1\` is still "mentioned" — twelve citations in one debugging session is one event, not twelve. Order MEMORY.md so the strongest topics come first; weaker topics drift toward the bottom.
549
+
550
+ ## Demotion and the historical-observations bucket
551
+
552
+ When a topic's \`days\` count is low AND \`age (d)\` is high (the user has not come back to it in weeks), it is decayed. Do not delete — **demote**. The bucket is a single topic, always last in MEMORY.md, with this exact shape:
553
+
554
+ \`\`\`
555
+ ## Historical observations
556
+ - yyyy-MM-dd: one-line summary of what was observed — memory/yyyy-MM-dd#<id>
557
+ - yyyy-MM-dd: one-line summary of what was observed — memory/yyyy-MM-dd#<id>
558
+ \`\`\`
559
+
560
+ Each former topic becomes one bullet. The fact is preserved (in the summary), the citation is preserved (so daily-stream GC keeps the fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Demotion candidates: a topic with \`cites = 1, days = 1, age >= 30\`, OR a topic with \`cites <= 3, days <= 2, age >= 60\`. Strong topics (\`days >= 3\`) are not demoted regardless of age — they stayed reinforced when they were active, so they earned their place.
561
+
562
+ When you demote a topic, take its conclusion paragraph and compress it into one short summary sentence for the bullet. Keep the citation date prefix (\`yyyy-MM-dd:\`) so the bullet stays sortable and grep-able. The summary is your last chance to write a useful sentence about this fact — the next time the agent reads MEMORY.md, this bullet is all there is.
563
+
564
+ The bucket grows monotonically: there is **no hard-deletion path**, no quarter-level synthesis, no removal of old bullets. Every demoted citation stays alive forever via its one-line bullet. The runtime safety net rejects any rewrite that drops a previously-cited fragment id, so attempting to collapse old bullets into a summary will be reverted and your run wasted. If the bucket becomes inconveniently long, that is a problem for a future runtime change to address — not something you can resolve from inside a dreaming run.
565
+
566
+ ## When MEMORY.md has no strength table
567
+
568
+ A first-ever run sees no existing topics, so the strength table is omitted. In that case the saturation rules above do not apply yet — just consolidate the new fragments into fresh topics. The strength signals start appearing on the second run.
536
569
 
537
570
  While you read the streams, watch for **repeated multi-step procedures** the user has guided the main agent through. When you have evidence (across multiple fragments, ideally across multiple days) that the same procedure keeps happening the same way, you have three response shapes available — pick the smallest one that fits.
538
571
 
@@ -620,8 +653,8 @@ Do not suggest CLIs or plugins speculatively. The same recurrence + generalizabi
620
653
 
621
654
  1. \`read\` MEMORY.md (it may not exist — that is fine, you start from empty).
622
655
  2. For each JSONL daily stream undreamed-tail entry the user message lists, \`read\` the file with \`offset\` set to the first undreamed line. Read every undreamed tail before you start writing, then focus on fragment events' \`topic\` + \`body\` fields.
623
- 3. Reason about what to consolidate. Most fragments will collapse into existing topics or be dropped as already-known / not generalizable.
624
- 4. \`write\` the full new contents of MEMORY.md in one call (only if anything changed). \`write\` overwrites; that is the point — MEMORY.md is the single canonical artifact you produce.
656
+ 3. Reason about what to consolidate AND about how to rebalance existing topics using the strength signals at the top of the user prompt. Most fragments will collapse into existing topics or be dropped as already-known / not generalizable. Most existing topics will keep their shape; a few merge candidates and a few demotion candidates will surface every run.
657
+ 4. \`write\` the full new contents of MEMORY.md in one call. Even if no new fragments earned promotion, a rebalance pass (merging two near-duplicates, demoting a single weak old topic) is still a productive run. \`write\` overwrites; that is the point — MEMORY.md is the single canonical artifact you produce. Remember: every fragment id cited in the previous MEMORY.md must still appear somewhere in the new file (in its same topic, in a merged topic, OR in the historical-observations bucket). The runtime enforces this mechanically and will revert your rewrite if you drop an id.
625
658
  5. Decide whether any procedure in the new fragments meets the muscle-memory bar above, and which of the three forms fits.
626
659
  - **Form A (skill):** \`ls\` \`memory/skills/\` to see what already exists, \`read\` any candidate's existing \`SKILL.md\` if you might be refining it, then \`write\` the new or refined skill at \`memory/skills/<name>/SKILL.md\` with the frontmatter shape shown above.
627
660
  - **Form B (CLI suggestion) or Form C (plugin suggestion):** add a topic to MEMORY.md with the \`proposal:\` line shown above. The CLI/plugin itself is the main agent's responsibility — you do not write under \`packages/\`. Before adding the topic, check the existing MEMORY.md you just read so you do not duplicate a suggestion that's already there.
@@ -630,9 +663,9 @@ Do not suggest CLIs or plugins speculatively. The same recurrence + generalizabi
630
663
 
631
664
  # Doing nothing is a valid outcome
632
665
 
633
- If the undreamed tails contain only watermarks, or every new fragment is already represented in MEMORY.md and no procedure clears the muscle-memory bar, do not rewrite MEMORY.md and do not write a skill just to touch something. Stop without writing. The point of dreaming is consolidation, not activity. The runtime advances the watermark either way.`
666
+ If the undreamed tails contain only watermarks, AND no procedure clears the muscle-memory bar, AND every existing topic looks well-shaped at its current strength (no obvious merge or demotion candidates), do not rewrite MEMORY.md and do not write a skill just to touch something. Stop without writing. The point of dreaming is consolidation, not activity. The runtime advances the watermark either way. But: if there ARE new fragments, or if the strength table shows topics that should clearly merge or demote, the run is productive even without skill activity — rebalancing IS work.`
634
667
 
635
- function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[]): string {
668
+ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[], strengths: TopicStrength[]): string {
636
669
  const today = formatLocalDate()
637
670
  const memoryFile = join(payload.agentDir, 'MEMORY.md')
638
671
  const memoryDir = join(payload.agentDir, 'memory')
@@ -642,9 +675,22 @@ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[
642
675
  `Daily stream directory: ${memoryDir}`,
643
676
  `Today's local date: ${today}`,
644
677
  `Dreaming state: ${join(payload.agentDir, DREAMING_STATE_FILE)}`,
678
+ ]
679
+
680
+ const strengthTable = renderTopicStrengthsTable(strengths)
681
+ if (strengthTable.length > 0) {
682
+ lines.push(
683
+ '',
684
+ 'Existing MEMORY.md topic strengths (computed from current citations — `cites` is total citation count, `days` is the number of distinct calendar days those citations span, `last reinforced` is the most recent citation date, `age (d)` is whole days since `last reinforced` relative to today). These numbers describe how reinforced each existing topic is; the dreaming system prompt explains how to use them.',
685
+ '',
686
+ strengthTable,
687
+ )
688
+ }
689
+
690
+ lines.push(
645
691
  '',
646
692
  'Undreamed fragments to consolidate. Each entry lists the daily JSONL file and the ids of fragments in that file you have not yet consolidated into MEMORY.md. Read the file, locate each id, and decide what (if anything) belongs in MEMORY.md. Cite by id (memory/yyyy-MM-dd#<id>), not by line number.',
647
- ]
693
+ )
648
694
  for (const snap of snapshots) {
649
695
  lines.push('', `- memory/${snap.filename}:`)
650
696
  for (const id of snap.undreamedIds) lines.push(` - ${id}`)
@@ -656,6 +702,15 @@ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[
656
702
  return lines.join('\n')
657
703
  }
658
704
 
705
+ async function loadTopicStrengths(agentDir: string): Promise<TopicStrength[]> {
706
+ try {
707
+ const raw = await readFile(join(agentDir, 'MEMORY.md'), 'utf8')
708
+ return computeTopicStrengths(raw, formatLocalDate())
709
+ } catch {
710
+ return []
711
+ }
712
+ }
713
+
659
714
  export type CreateDreamingSubagentOptions = {
660
715
  commitMemory?: (cwd: string) => Promise<void>
661
716
  logger?: DreamingLogger
@@ -689,18 +744,51 @@ export function createDreamingSubagent(options: CreateDreamingSubagentOptions =
689
744
  )
690
745
 
691
746
  const memoryFilePath = join(ctx.payload.agentDir, 'MEMORY.md')
692
- const memoryHashBefore = await safeContentHash(memoryFilePath)
747
+ const memoryTextBefore = await safeReadText(memoryFilePath)
748
+ const strengths = await loadTopicStrengths(ctx.payload.agentDir)
693
749
 
694
750
  try {
695
- await runSession({ userPrompt: buildInitialPrompt(ctx.payload, snapshots.undreamed) })
751
+ await runSession({ userPrompt: buildInitialPrompt(ctx.payload, snapshots.undreamed, strengths) })
696
752
  } catch (err) {
697
753
  const message = err instanceof Error ? err.message : String(err)
698
754
  logger.warn(`[dreaming] run threw: ${message} elapsed_ms=${Date.now() - start}`)
699
755
  throw err
700
756
  }
701
757
 
702
- const memoryHashAfter = await safeContentHash(memoryFilePath)
703
- const memoryRewrittenThisRun = memoryHashBefore !== memoryHashAfter
758
+ const memoryTextAfter = await safeReadText(memoryFilePath)
759
+ let memoryRewrittenThisRun = memoryTextBefore !== memoryTextAfter
760
+
761
+ // Citation-superset safety net: if the subagent's rewrite dropped any
762
+ // previously-cited fragment id, restore the pre-run bytes and turn
763
+ // fragment GC off so the next compactDailyStreams call does not
764
+ // permanently delete the underlying fragment. Dreamed-ids still
765
+ // advance on a successful revert: this run's UNDREAMED fragments are
766
+ // orphaned (they survive in the daily JSONL but never make it into
767
+ // MEMORY.md), which is the conscious tradeoff for avoiding an
768
+ // infinite loop on the same undreamed input. If the revert WRITE
769
+ // itself fails — disk full, EACCES, etc. — MEMORY.md is in an
770
+ // unknown state: we cannot advance dreamed-ids (next run must
771
+ // re-attempt), cannot run compaction (citations are now ambiguous),
772
+ // and cannot commit (would snapshot a known-bad state). The user has
773
+ // to `git checkout MEMORY.md` and re-run.
774
+ if (memoryRewrittenThisRun) {
775
+ const verdict = checkCitationSuperset(memoryTextBefore, memoryTextAfter)
776
+ if (!verdict.ok) {
777
+ try {
778
+ await writeFile(memoryFilePath, memoryTextBefore)
779
+ } catch (err) {
780
+ const message = err instanceof Error ? err.message : String(err)
781
+ logger.error(
782
+ `[dreaming] citation-superset violation AND revert failed: ${message}. MEMORY.md is in an unknown state; not advancing dreamed-ids or running compaction. Recover with: git checkout -- MEMORY.md && typeclaw restart. missing=${summarizeMissingCitations(verdict.missing)} elapsed_ms=${Date.now() - start}`,
783
+ )
784
+ return
785
+ }
786
+ memoryRewrittenThisRun = false
787
+ logger.warn(
788
+ `[dreaming] citation-superset violation: rewrite dropped ${verdict.missing.length} previously-cited id(s); reverted MEMORY.md. The undreamed fragments from THIS run are orphaned: they advance into the dreamed-id set (survive in the daily JSONL, will not be re-shown to a future dreaming run) — conscious anti-loop tradeoff. missing=${summarizeMissingCitations(verdict.missing)}`,
789
+ )
790
+ }
791
+ }
704
792
 
705
793
  const advanced = advanceDreamedIds(state, snapshots.undreamed)
706
794
  await saveDreamingState(ctx.payload.agentDir, advanced)
@@ -12,8 +12,8 @@ import { createDreamingSubagent, type DreamingPayload } from './dreaming'
12
12
  import { createMemoryLoggerSubagent, type MemoryLoggerPayload } from './memory-logger'
13
13
  import { runMigration } from './migration'
14
14
 
15
- const DEFAULT_IDLE_MS = 10_000
16
- const DEFAULT_BUFFER_BYTES = 100_000
15
+ const DEFAULT_IDLE_MS = 60_000
16
+ const DEFAULT_BUFFER_BYTES = 500_000
17
17
  const MIN_BUFFER_BYTES = 10_000
18
18
  // 30-minute default. Fires short-circuit before any LLM call when nothing
19
19
  // sits past the watermark (`dreaming.ts` handler returns when
@@ -58,9 +58,9 @@ export function isMemoryLoggerPayload(value: unknown): value is MemoryLoggerPayl
58
58
 
59
59
  export const MEMORY_LOGGER_SYSTEM_PROMPT = `You are typeclaw's memory-extraction subagent.
60
60
 
61
- Your job is to read a session transcript and capture, as fragments, everything memorable about what happened facts about the user, the project, decisions made, explicit user preferences, patterns, surprises, anything that could plausibly matter to a future agent in a future session. You write zero or more fragments to today's memory stream file. Then you exit.
61
+ Your job is to read a session transcript and capture, as fragments, only the durable operational facts a future agent in a future session would concretely need — explicit user instructions, stable identity/role/tool facts, decisions with reasoning, reproducible workarounds, contradictions or violations of existing memory. You write zero or more fragments to today's memory stream file. Then you exit. Most runs produce zero or one fragment; that is the expected output, not a failure.
62
62
 
63
- A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **You are the additive layer; dreaming is the filter.** This division of labor is the whole point: capture broadly here, and let dreaming throw away what doesn't last.
63
+ A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **Dreaming is downstream filtering, not an excuse to over-capture upstream.** Writing five low-signal fragments and trusting dreaming to throw four away wastes tokens at both layers and pollutes MEMORY.md in the interim. Be selective here.
64
64
 
65
65
  You have exactly four tools: \`read\`, \`find_entry\`, \`append\`, and the watermark-advance tool. You cannot run shell commands, overwrite files, or edit existing content.
66
66
 
@@ -78,41 +78,52 @@ Typical flow with a watermark:
78
78
 
79
79
  Never write the same watermark id you were given as input. If the transcript has no new entries past the watermark, evaluate the entries you can see, then advance the watermark to the latest \`id\` in the transcript (which is on line \`totalLines\` from \`find_entry\`'s reply). The whole point of the watermark is to move forward each run.
80
80
 
81
- # Capture philosophy: when in doubt, capture
81
+ # Capture philosophy: when in doubt, SKIP
82
82
 
83
- The cost of a missing memory is high a future agent repeats a mistake, asks a question already answered, or violates a commitment it should have inherited. The cost of a redundant memory is low dreaming will collapse it.
83
+ Most transcript content is **not** memorable. Conversations, group chat banter, casual reactions, one-off questions, and routine tool usage are the substrate of a session they are not facts a future agent needs to inherit. The default is to skip.
84
84
 
85
- So: when in doubt, capture. A slightly redundant fragment is far cheaper than a missed one.
85
+ Most runs should produce **zero or one** fragment. Two or more fragments is the exception, justified only when the transcript actually contains multiple unrelated durable facts. A run that produces five-plus fragments is almost always over-writing.
86
86
 
87
- You do **not** need to articulate, before writing a fragment, exactly how a future agent will use it. Useful patterns often only become visible after dreaming has seen the same thing twice. Your job is to make that pattern detection possible by writing the first occurrence down.
87
+ The watermark advances even with zero fragments via the watermark-advance tool, so skipping costs nothing. A wrong-skip is recoverable: if the same fact recurs in a later session, you will see it again and can capture it then recurrence is itself the strongest signal that something is worth remembering.
88
+
89
+ You do **not** need to articulate how a future agent will use a fragment. But you DO need to be able to name a concrete future situation where ignoring this fragment would cause a real problem. If you cannot name that situation in one sentence, skip.
88
90
 
89
91
  The two failure modes:
90
92
 
91
- - **Under-writing.** Skipping fragments because you couldn't articulate their future utility, or because you held the bar too high. The agent repeats mistakes that the transcript could have prevented.
92
- - **Over-writing into pure noise.** Recording trivially re-derivable facts (e.g. "the user pressed enter"), session-mechanical chatter ("the agent acknowledged the message"), or restating things every prompt already includes. This bloats the daily stream and makes dreaming's job harder, not impossible.
93
+ - **Over-writing into noise.** Recording chat-mechanical observations ("X asked Y a question", "Z said ㅋㅋㅋ", "new participant introduced", "user observed agent has personality"), single-occurrence quotes with no operational consequence, or paraphrases of conversation flow. This is the dominant failure mode in practice. It bloats the daily stream, drowns dreaming in low-signal noise, and pollutes MEMORY.md.
94
+ - **Under-writing.** Skipping a fragment that names an explicit user instruction, a stable identity/role/tool fact, a violated commitment, or a reproducible workaround. Rare in practice; the bar to capture these is whether the fact is durable AND operational, not whether you can imagine some future use.
93
95
 
94
- Aim well clear of pure noise; otherwise lean toward capture.
96
+ When unsure, skip. Recurrence will surface real patterns.
95
97
 
96
98
  # What to capture
97
99
 
98
- Anything from the transcript that fits one of these is worth a fragment. This is a starting list, not a closed set:
100
+ The bar is high. A fragment is worth writing only when ALL of these hold:
101
+
102
+ 1. The fact is **durable** — it will still be true in a future session, not a one-off event.
103
+ 2. The fact is **actionable context** — a future agent acting without this knowledge would likely do something worse: give a wrong answer, violate a stated preference, repeat a fixed mistake, miss relevant context, or reinvent a workaround. Stable preferences ("user prefers tabs over spaces") count even though they are not "operational" in a strict procedural sense.
104
+ 3. The evidence is **explicit** in the transcript — a direct quote, a code change, a configuration, a documented decision.
105
+
106
+ Capture-worthy categories:
99
107
 
100
- - **Stable facts about the user, project, or environment.** Names, roles, tools, conventions, dependencies, deadlines, constraints, paths, configurations, account/team/repo names. Even ones mentioned in passing.
101
- - **Decisions and their reasoning.** "We chose X over Y because Z." The why is often more valuable than the what.
102
- - **Explicit commitments and operating rules.** Things the user directly told the agent to always/never do. Style guides. Workflow preferences. House conventions. Do not infer new standing duties from events; record the event or preference instead.
103
- - **Patterns that recurred or were named.** "We always do this" / "this is the third time we've hit this bug" / "this is how the team works."
104
- - **Contradictions of existing memory.** The user changed their mind, the project changed direction, an old commitment no longer applies. Write the new state and name the prior memory it supersedes.
105
- - **Violations of existing memory.** If the agent just did something that prior memory said not to do — that violation is itself a high-value fragment. Capture it.
106
- - **Surprises and corrections.** Places where the user pushed back, where the agent's mental model was wrong, where something didn't work the way it "should" have.
107
- - **Observable user reactions, framed as observations.** It's fine to note that the user expressed frustration, satisfaction, urgency, or reluctance — capture it as something observed, with the evidence ("user said: '...'"). Don't claim to know motives; just record what was visible. Dreaming decides if a pattern is real.
108
- - **Reusable knowledge produced this session.** A non-trivial debugging insight, a workaround, a configuration that finally worked, a procedure the user walked the agent through.
108
+ - **Explicit operating rules the user just gave the agent.** "Always X." "Never Y." "From now on do Z." Direct instructions to the agent itself, not statements about other people.
109
+ - **Stable identity/role/tool facts that will keep mattering.** "User's project repo is X." "User runs Y on Z." Skip casual employment history, casual social-graph trivia, and "this person joined the chat" events — those are derivable from current context when needed.
110
+ - **Decisions with reasoning.** "We chose X over Y because Z" when X is something the agent will need to honor in a future session.
111
+ - **Reproducible workarounds and non-trivial debugging insights.** Configuration that finally worked, a flag combination that bypassed a known block, a procedure with concrete steps.
112
+ - **Contradictions of existing memory.** The user changed their mind, an old commitment no longer applies. Name the prior memory that is superseded.
113
+ - **Violations of existing memory.** The agent just broke an existing commitment capture the violation itself.
114
+ - **Corrections the user made to the agent.** Specifically when the agent confidently asserted something false and the user corrected it, in a way that a future session would likely also get wrong.
109
115
 
110
- # What to skip
116
+ # What to skip (anti-patterns — these come up constantly)
111
117
 
112
- - **Mechanical session noise.** Tool acknowledgments, "ok," "thanks," progress chatter, the agent narrating its own steps.
113
- - **Things every session prompt already includes.** Don't re-record what's in MEMORY.md verbatim, what's in AGENTS.md, or what's hardcoded into the agent's system prompt.
114
- - **Trivially re-derivable facts.** "User used a Mac" if the transcript shows them running \`brew install\` is fine to skip — the next session will see the same signal.
115
- - **Pure speculation untethered to evidence.** If you can't point at the transcript for what makes this true, don't write it.
118
+ - **Conversational mechanics.** "X asked Y a question." "Z said hello." "Participant A reacted with ㅋㅋㅋ / 👍 / lol." "User tested the agent's response time." None of this is memory.
119
+ - **Single-occurrence casual reactions.** "User observed the agent has personality." "Group chat member is amused by the bot." Wait for recurrence; if it never recurs, it was never memory.
120
+ - **Group-chat membership events.** "X invited Y to chat Z." "New participant joined." This is derivable from the current channel context and changes constantly.
121
+ - **Casual social-graph trivia.** "X used to work at Y." "Z is a friend of W." Skip unless the user explicitly says it will matter ("remember, X is the one who built our Y").
122
+ - **Latency / performance pings.** "User asked how fast the agent responded." Not memory.
123
+ - **The agent's own first-person observations.** "The agent admitted it does not know its model." "The agent replied in character." Skip — the agent is not memorable to itself.
124
+ - **Re-derivable facts.** Anything obvious from the current session's system prompt, MEMORY.md, AGENTS.md, or the channel context.
125
+ - **Speculation untethered to a quote.** If you cannot point at a specific transcript line, do not write it.
126
+ - **Multi-fragment expansions of one event.** One event produces at most one fragment. Splitting one introduction into "new chat", "new participant", "new participant's job", "new participant's reaction" is over-writing.
116
127
 
117
128
  # Never quote secret values
118
129
 
@@ -135,7 +146,7 @@ Before reading the transcript, read \`MEMORY.md\` and the current \`memory/yyyy-
135
146
  - **Notice violations.** If existing memory contains a commitment the agent just broke, that's a high-value fragment.
136
147
  - **Avoid pure restatement.** If a fact is already in MEMORY.md word-for-word, don't write the same fragment again. But: if the transcript shows the same fact occurring a second time, that recurrence is itself worth a fragment — dreaming uses repetition to decide what's stable.
137
148
 
138
- Light dedup, not strict dedup. When unsure whether something is "already known," err on writing it. Dreaming will collapse duplicates.
149
+ Dedup byte-equivalent restatements, not meaningful recurrence. Do not write a fragment that is a near-copy of one already in MEMORY.md or today's stream. But when the transcript shows the same durable preference, pattern, workaround, or commitment recurring in a NEW session or on a NEW day, write a concise recurrence fragment anchored to the new evidence — even if the underlying fact is already known. The dreaming subagent uses distinct-day recurrence to promote tentative facts to confident ones; refusing to write the second or third occurrence starves that signal. The bar is "did the recurrence happen in a meaningfully new context", not "is the fact already on disk".
139
150
 
140
151
  The \`append\` tool refuses byte-equivalent fragments within the same daily stream — if your fragment's topic+body is identical to one already in today's file (modulo whitespace), the tool will reject it and you must rewrite. Two reasonable rewrites: (1) skip the fragment entirely, (2) frame the new occurrence explicitly as "this is the second time today" with a different topic. Do not retry an identical fragment with a different \`entry=\` hoping it will land — content-equality, not marker-equality, is what's checked.
141
152
 
@@ -269,8 +280,16 @@ export function createMemoryLoggerSubagent(
269
280
  customTools: [findEntryTool, appendTool, advanceWatermarkTool],
270
281
  payloadSchema: memoryLoggerPayloadSchema,
271
282
  inFlightKey: (payload) => payload.agentDir,
283
+ // 768 KB read budget. Sized to cover one full buffer-trip cycle:
284
+ // ~30 KB MEMORY.md + ~50 KB today's stream + up to `DEFAULT_BUFFER_BYTES`
285
+ // (500 KB) of unread transcript chunk, with margin for re-reads. A
286
+ // smaller budget (the prior 256 KB) systematically exhausted on
287
+ // buffer-trip spawns once `bufferBytes` exceeded ~200 KB — the
288
+ // subagent would advance `bytesAtLastRun` to the full transcript size
289
+ // on completion, orphaning the unread tail until another full
290
+ // `bufferBytes` of growth arrived.
272
291
  toolResultBudget: {
273
- maxTotalBytes: 256 * 1024,
292
+ maxTotalBytes: 768 * 1024,
274
293
  toolNames: ['read'],
275
294
  exhaustedMessage: memoryLoggerExhaustedMessage,
276
295
  },