typeclaw 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -15
- package/auth.schema.json +113 -0
- package/package.json +1 -1
- package/secrets.schema.json +113 -0
- package/src/agent/auth.ts +4 -2
- package/src/agent/index.ts +16 -28
- package/src/agent/model-fallback.ts +127 -0
- package/src/agent/session-meta.ts +1 -1
- package/src/agent/session-origin.ts +3 -2
- package/src/agent/tools/curl-impersonate.ts +300 -0
- package/src/agent/tools/ddg.ts +13 -88
- package/src/agent/tools/webfetch/fetch.ts +105 -2
- package/src/agent/tools/webfetch/tool.ts +4 -0
- package/src/bundled-plugins/agent-browser/shim.ts +47 -0
- package/src/bundled-plugins/backup/subagents.ts +2 -0
- package/src/bundled-plugins/memory/README.md +49 -12
- package/src/bundled-plugins/memory/citation-superset.ts +63 -0
- package/src/bundled-plugins/memory/dreaming.ts +105 -17
- package/src/bundled-plugins/memory/index.ts +2 -2
- package/src/bundled-plugins/memory/memory-logger.ts +45 -26
- package/src/bundled-plugins/memory/strength.ts +127 -0
- package/src/bundled-plugins/memory/topics.ts +75 -0
- package/src/bundled-plugins/security/index.ts +88 -43
- package/src/bundled-plugins/security/permissions.ts +36 -0
- package/src/bundled-plugins/security/policies/git-exfil.ts +20 -0
- package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +12 -0
- package/src/bundled-plugins/security/policies/prompt-injection.ts +23 -3
- package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +7 -0
- package/src/bundled-plugins/security/policies/secret-exfil-read.ts +6 -0
- package/src/bundled-plugins/security/policies/session-search-secrets.ts +9 -0
- package/src/bundled-plugins/security/policies/ssrf.ts +6 -0
- package/src/bundled-plugins/security/policies/system-prompt-leak.ts +7 -0
- package/src/channels/adapters/github/auth-app.ts +120 -0
- package/src/channels/adapters/github/auth-pat.ts +50 -0
- package/src/channels/adapters/github/auth.ts +33 -0
- package/src/channels/adapters/github/channel-resolver.ts +30 -0
- package/src/channels/adapters/github/dedup.ts +26 -0
- package/src/channels/adapters/github/event-allowlist.ts +8 -0
- package/src/channels/adapters/github/fetch-attachment.ts +5 -0
- package/src/channels/adapters/github/history.ts +63 -0
- package/src/channels/adapters/github/inbound.ts +286 -0
- package/src/channels/adapters/github/index.ts +370 -0
- package/src/channels/adapters/github/managed-path.ts +54 -0
- package/src/channels/adapters/github/membership.ts +35 -0
- package/src/channels/adapters/github/outbound.ts +145 -0
- package/src/channels/adapters/github/webhook-register.ts +349 -0
- package/src/channels/manager.ts +94 -9
- package/src/channels/router.ts +194 -28
- package/src/channels/schema.ts +31 -1
- package/src/channels/tunnel-bridge.ts +51 -0
- package/src/channels/types.ts +3 -1
- package/src/cli/builtins.ts +28 -0
- package/src/cli/channel.ts +511 -25
- package/src/cli/container-command-client.ts +244 -0
- package/src/cli/cron.ts +173 -0
- package/src/cli/host-command-runner.ts +150 -0
- package/src/cli/index.ts +42 -1
- package/src/cli/init.ts +400 -67
- package/src/cli/model.ts +14 -4
- package/src/cli/oauth-callbacks.ts +49 -0
- package/src/cli/plugin-command-help.ts +49 -0
- package/src/cli/plugin-commands-dispatch.ts +112 -0
- package/src/cli/plugin-commands.ts +118 -0
- package/src/cli/provider.ts +3 -20
- package/src/cli/tui.ts +10 -2
- package/src/cli/tunnel.ts +533 -0
- package/src/cli/ui.ts +8 -3
- package/src/config/config.ts +134 -24
- package/src/config/models-mutation.ts +42 -8
- package/src/config/providers-mutation.ts +12 -8
- package/src/container/start.ts +48 -4
- package/src/cron/bridge.ts +136 -0
- package/src/cron/consumer.ts +174 -48
- package/src/cron/index.ts +19 -2
- package/src/cron/list.ts +105 -0
- package/src/cron/scheduler.ts +12 -3
- package/src/cron/schema.ts +11 -3
- package/src/doctor/checks.ts +0 -50
- package/src/init/dockerfile.ts +165 -13
- package/src/init/ensure-deps.ts +15 -4
- package/src/init/github-webhook-install.ts +109 -0
- package/src/init/hatching.ts +2 -2
- package/src/init/index.ts +519 -12
- package/src/init/oauth-login.ts +17 -3
- package/src/init/run-bun-install.ts +17 -3
- package/src/init/run-owner-claim.ts +11 -2
- package/src/permissions/builtins.ts +29 -2
- package/src/permissions/match-rule.ts +24 -2
- package/src/permissions/permissions.ts +24 -7
- package/src/permissions/resolve.ts +1 -0
- package/src/plugin/define.ts +44 -1
- package/src/plugin/index.ts +18 -3
- package/src/plugin/manager.ts +16 -0
- package/src/plugin/registry.ts +85 -3
- package/src/plugin/types.ts +144 -1
- package/src/plugin/zod-introspect.ts +100 -0
- package/src/role-claim/match-rule.ts +2 -1
- package/src/run/index.ts +112 -4
- package/src/secrets/index.ts +1 -1
- package/src/secrets/schema.ts +21 -0
- package/src/server/command-runner.ts +476 -0
- package/src/server/index.ts +388 -5
- package/src/shared/index.ts +8 -0
- package/src/shared/protocol.ts +80 -1
- package/src/skills/typeclaw-channel-github/SKILL.md +24 -0
- package/src/skills/typeclaw-config/SKILL.md +27 -26
- package/src/skills/typeclaw-cron/SKILL.md +234 -3
- package/src/skills/typeclaw-memory/SKILL.md +25 -15
- package/src/skills/typeclaw-monorepo/SKILL.md +2 -2
- package/src/skills/typeclaw-permissions/SKILL.md +35 -16
- package/src/skills/typeclaw-plugins/SKILL.md +251 -5
- package/src/skills/typeclaw-tunnels/SKILL.md +111 -0
- package/src/test-helpers/wait-for.ts +50 -0
- package/src/tui/index.ts +70 -7
- package/src/tunnels/__fixtures__/cloudflared-quick-stderr.txt +11 -0
- package/src/tunnels/events.ts +14 -0
- package/src/tunnels/index.ts +12 -0
- package/src/tunnels/log-ring.ts +54 -0
- package/src/tunnels/manager.ts +139 -0
- package/src/tunnels/providers/cloudflare-quick.ts +189 -0
- package/src/tunnels/providers/external.ts +53 -0
- package/src/tunnels/quick-url-parser.ts +5 -0
- package/src/tunnels/types.ts +43 -0
- package/src/usage/report.ts +15 -12
- package/typeclaw.schema.json +311 -26
|
@@ -9,8 +9,8 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
|
|
|
9
9
|
```json
|
|
10
10
|
{
|
|
11
11
|
"memory": {
|
|
12
|
-
"idleMs":
|
|
13
|
-
"bufferBytes":
|
|
12
|
+
"idleMs": 60000,
|
|
13
|
+
"bufferBytes": 500000,
|
|
14
14
|
"dreaming": { "schedule": "*/30 * * * *" }
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -18,8 +18,8 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
|
|
|
18
18
|
|
|
19
19
|
| Field | Default | Effect |
|
|
20
20
|
| -------------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
|
21
|
-
| `memory.idleMs` | `
|
|
22
|
-
| `memory.bufferBytes` | `
|
|
21
|
+
| `memory.idleMs` | `60000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. Default bumped from `10000` to `60000` to reduce spawn churn during conversational sessions where the agent goes idle for short periods between rapid back-and-forth turns. |
|
|
22
|
+
| `memory.bufferBytes` | `500000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run, even during continuous activity. `0` disables. Minimum `10000` when non-zero. Default bumped from `100000` to `500000` so a single conversational session stays within one memory-logger run unless it grows past ~half a megabyte of transcript. |
|
|
23
23
|
| `memory.dreaming` | `{}` (cron job on) | Dreaming cron job is always registered. Override `schedule` to change when it fires. |
|
|
24
24
|
| `memory.dreaming.schedule` | `"*/30 * * * *"` | Five-field cron expression. Defaults to every 30 minutes; fires short-circuit with zero LLM cost when nothing sits past the watermark, so frequent no-op fires are cheap and let sporadic agents still consolidate while alive (`src/cron/scheduler.ts` has no catchup for missed fires). Second-level schedules are rejected to avoid noisy no-op dreaming loops. |
|
|
25
25
|
|
|
@@ -27,18 +27,52 @@ All fields are **restart-required** — the plugin reads them once at boot.
|
|
|
27
27
|
|
|
28
28
|
## What it contributes
|
|
29
29
|
|
|
30
|
-
| Kind | Name | Notes
|
|
31
|
-
| -------- | -------------------------- |
|
|
32
|
-
| Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file.
|
|
33
|
-
| Subagent | `dreaming` | Reads `MEMORY.md` plus undreamed daily-stream events, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. |
|
|
34
|
-
| Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.
|
|
35
|
-
| Hook | `session.idle` | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`.
|
|
36
|
-
| Hook | `session.end` | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away).
|
|
30
|
+
| Kind | Name | Notes |
|
|
31
|
+
| -------- | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
32
|
+
| Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file. |
|
|
33
|
+
| Subagent | `dreaming` | Reads `MEMORY.md` plus undreamed daily-stream events, **rebalances** the existing topics using per-topic strength signals (citation count, distinct days, recency) injected into its user prompt, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. The runtime enforces a **citation-superset invariant** on every rewrite: a new MEMORY.md that drops any previously-cited fragment id is reverted to its pre-run bytes (dreamed-ids still advance so the run is not retried in a loop). |
|
|
34
|
+
| Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
|
|
35
|
+
| Hook | `session.idle` | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`. |
|
|
36
|
+
| Hook | `session.end` | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away). |
|
|
37
37
|
|
|
38
38
|
## Memory injection
|
|
39
39
|
|
|
40
40
|
The rendered `# Memory` section (MEMORY.md + undreamed daily-stream tails) is injected into every session's system prompt by core (`src/agent/index.ts` `createResourceLoader` → `loadMemory`), **not** by a plugin hook. It is appended as the last block of the system prompt, after `gitNudge`, so the most-volatile content (daily streams that grow after every memory-logger fire) sits at the bottom of the cache-suffix region. This way a memory change only invalidates the memory section itself rather than everything downstream of it.
|
|
41
41
|
|
|
42
|
+
## Memory saturation (LTP/LTD analogue)
|
|
43
|
+
|
|
44
|
+
MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Without a saturation policy it grows monotonically — every consolidated topic survives forever and citations accumulate across days. The dreaming subagent therefore treats MEMORY.md like human long-term memory: **repetition strengthens, lack of repetition saturates**.
|
|
45
|
+
|
|
46
|
+
### How
|
|
47
|
+
|
|
48
|
+
On every run the runtime computes per-topic strength signals from MEMORY.md's existing citations — `cites` (total), `days` (distinct calendar days those citations span), `last reinforced` (most recent citation date), `age (d)` (whole days since `last reinforced`). The numbers are derived by `src/bundled-plugins/memory/strength.ts` and rendered as a table at the top of the dreaming subagent's user prompt. There is no sidecar file, no schema version, no migration — strength is recomputed on every run from MEMORY.md alone.
|
|
49
|
+
|
|
50
|
+
The subagent uses these numbers to:
|
|
51
|
+
|
|
52
|
+
1. **Promote strong topics.** `days = 1` → tentative ("the user mentioned"). `days >= 3` → confident ("the user consistently"). `days >= 7` → declarative ("the user always"). Promotion is gated on distinct days, not raw citation count — five citations on one day is one debugging session, five citations across five days is a recurring pattern.
|
|
53
|
+
2. **Merge near-duplicates.** Topics that overlap in subject matter get folded into one, with the merged topic's `fragments:` list as the **union** of the source topics' fragment ids.
|
|
54
|
+
3. **Demote decayed topics.** A topic with `cites = 1, days = 1, age >= 30` (or `cites <= 3, days <= 2, age >= 60`) routes into a `## Historical observations` bucket as a one-line bullet. The fact is preserved in the summary, the citation is preserved (so daily-stream GC keeps the underlying fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Strong topics (`days >= 3`) are never demoted.
|
|
55
|
+
|
|
56
|
+
**There is no hard-deletion path** in this iteration. The historical bucket grows monotonically; the subagent is explicitly told not to attempt quarter-summary collapses because the safety net (below) would revert them. If the bucket becomes inconveniently long in practice, a future runtime change will provide a structured drop mechanism — until then every demoted citation stays alive forever via its one-line bullet.
|
|
57
|
+
|
|
58
|
+
### The citation-superset safety net
|
|
59
|
+
|
|
60
|
+
After every dreaming run that rewrote MEMORY.md, `src/bundled-plugins/memory/citation-superset.ts` checks that the union of fragment ids cited in the NEW file is a superset of the union cited in the OLD file. If any previously-cited id is missing from the rewrite, the runtime:
|
|
61
|
+
|
|
62
|
+
1. Restores MEMORY.md to its pre-run bytes via `writeFile(memoryFilePath, memoryTextBefore)`. The pre-run bytes are captured **before** `runSession` so the revert always has a clean source.
|
|
63
|
+
2. Skips daily-stream fragment GC for this run (no fragments are dropped).
|
|
64
|
+
3. Advances the dreamed-id set anyway — the **conscious anti-loop tradeoff**: this means the run's NEW undreamed fragments are orphaned (they survive in the daily JSONL forever, force-committed, but will not be re-shown to a future dreaming run and therefore never make it into MEMORY.md). The alternative (don't advance) would infinite-loop if the LLM keeps making the same mistake on the same inputs. The orphaned fragments are recoverable from git history (`git log memory/`) by a human operator.
|
|
65
|
+
4. Logs a `[dreaming] citation-superset violation: …` warning naming the dropped ids and explicitly stating the orphaning tradeoff.
|
|
66
|
+
|
|
67
|
+
**Revert-write failure path.** If the `writeFile` in step 1 itself throws (disk full, EACCES, MEMORY.md replaced by a directory by a buggy subagent, etc.), MEMORY.md is in an unknown state. The runtime then:
|
|
68
|
+
|
|
69
|
+
- Skips the dreamed-id advance (so the next run gets a second chance at the same input).
|
|
70
|
+
- Skips compaction (so no fragments are GC'd against an ambiguous citation set).
|
|
71
|
+
- Skips the commit (so a known-bad on-disk state is not snapshotted).
|
|
72
|
+
- Logs a `[dreaming] citation-superset violation AND revert failed: …` ERROR with the recovery command (`git checkout -- MEMORY.md && typeclaw restart`).
|
|
73
|
+
|
|
74
|
+
The check exists because the daily-stream GC in `compactDailyStreams` drops any fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are the only thing that keeps a fragment alive past its first dreaming run — an omitted id means the underlying fragment would be permanently deleted on the next compaction.
|
|
75
|
+
|
|
42
76
|
## Files on disk
|
|
43
77
|
|
|
44
78
|
- **`MEMORY.md`** — long-term memory. Created by the dreaming subagent on first run if absent. Force-committed by the runtime; `skip-worktree` flag is set so the human's `git status` stays clean.
|
|
@@ -66,9 +100,12 @@ In channel sessions, the agent rarely goes idle long enough to trip the timer be
|
|
|
66
100
|
|
|
67
101
|
- `index.test.ts` — composition tests (config schema, hook wiring, debounce semantics, MEMORY.md auto-create).
|
|
68
102
|
- `memory-logger.test.ts` — system prompt invariants, watermark handling.
|
|
69
|
-
- `dreaming.test.ts` — orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants.
|
|
103
|
+
- `dreaming.test.ts` — orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants, citation-superset safety net (revert on dropped id, dreamed-ids still advance, no-revert on legitimate merge, no-revert on first-ever run), saturation-prompt invariants (rebalance-every-run, promotion ladder, historical bucket, demotion thresholds, bucket overflow synthesis).
|
|
70
104
|
- `dreaming-state.test.ts` — fail-open semantics on malformed state.
|
|
71
105
|
- `watermark.test.ts` — marker parsing.
|
|
72
106
|
- `append-tool.test.ts` — append-only semantics.
|
|
73
107
|
- `src/bundled-plugins/guard/policies/skill-authoring.test.ts` — runtime skill authoring guard: path sandboxing, name validation, YAML frontmatter, and write/edit final-content validation.
|
|
74
108
|
- `load-memory.test.ts` — memory section rendering, undreamed-tail filtering, watermark stripping.
|
|
109
|
+
- `topics.test.ts` — citation-attributing parser (per-topic citation grouping for strength signals).
|
|
110
|
+
- `strength.test.ts` — per-topic strength computation (distinct days, recency, age clamping) and markdown table rendering.
|
|
111
|
+
- `citation-superset.test.ts` — the safety-net check (superset semantics, missing-id reporting, summary truncation).
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// Citation-superset safety net for the dreaming subagent's MEMORY.md
|
|
2
|
+
// rewrite. After every dreaming run that touched MEMORY.md, we check that
|
|
3
|
+
// the union of fragment ids cited in the NEW file is a superset of the
|
|
4
|
+
// union cited in the OLD file. If any previously-cited id is missing from
|
|
5
|
+
// the rewrite, the rewrite is rejected.
|
|
6
|
+
//
|
|
7
|
+
// Why this exists: the daily-stream GC in compactDailyStreams drops any
|
|
8
|
+
// fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are
|
|
9
|
+
// the only thing that keeps a fragment alive past its first dreaming run.
|
|
10
|
+
// If the subagent rewrites MEMORY.md and accidentally omits a citation —
|
|
11
|
+
// either by garbling a merged topic's fragments: list or by dropping a
|
|
12
|
+
// topic entirely — the next compaction call permanently deletes the
|
|
13
|
+
// underlying fragment from the daily JSONL. There is no recovery beyond
|
|
14
|
+
// `git revert` of the snapshot commit, and even that loses anything the
|
|
15
|
+
// agent wrote since.
|
|
16
|
+
//
|
|
17
|
+
// The subagent's new rule 5 explicitly allows merging topics and rewriting
|
|
18
|
+
// conclusion paragraphs, with the requirement that the merged topic's
|
|
19
|
+
// `fragments:` list is the union of its source topics'. The LLM can fail
|
|
20
|
+
// to honor that — especially across hundreds of runs over months — so the
|
|
21
|
+
// mechanical check is the safety floor.
|
|
22
|
+
//
|
|
23
|
+
// Detection only. The handler decides what to do with the verdict (revert
|
|
24
|
+
// MEMORY.md to its pre-run bytes, skip daily-stream compaction, still
|
|
25
|
+
// advance the dreamed-id set so we do not loop on the same fragments).
|
|
26
|
+
|
|
27
|
+
import { parseCitations } from './citations'
|
|
28
|
+
|
|
29
|
+
export type CitationSupersetVerdict = { ok: true } | { ok: false; missing: Array<{ date: string; fragmentId: string }> }
|
|
30
|
+
|
|
31
|
+
// Compare the OLD MEMORY.md to the NEW MEMORY.md and report any
|
|
32
|
+
// fragment id that the OLD cited and the NEW does not. Empty old text
|
|
33
|
+
// (first-ever dreaming run, prior file missing) is treated as the empty
|
|
34
|
+
// citation set — any new file passes by construction.
|
|
35
|
+
export function checkCitationSuperset(oldText: string, newText: string): CitationSupersetVerdict {
|
|
36
|
+
const oldCitations = parseCitations(oldText)
|
|
37
|
+
if (oldCitations.size === 0) return { ok: true }
|
|
38
|
+
|
|
39
|
+
const newCitations = parseCitations(newText)
|
|
40
|
+
const missing: Array<{ date: string; fragmentId: string }> = []
|
|
41
|
+
|
|
42
|
+
const dates = [...oldCitations.keys()].sort()
|
|
43
|
+
for (const date of dates) {
|
|
44
|
+
const oldIds = oldCitations.get(date) ?? new Set<string>()
|
|
45
|
+
const newIds = newCitations.get(date) ?? new Set<string>()
|
|
46
|
+
const oldIdList = [...oldIds].sort()
|
|
47
|
+
for (const id of oldIdList) {
|
|
48
|
+
if (!newIds.has(id)) missing.push({ date, fragmentId: id })
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return missing.length === 0 ? { ok: true } : { ok: false, missing }
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Pretty-print the verdict's missing ids for log output. Keeps the line
|
|
56
|
+
// short by reporting count + first N ids; the full list is reconstructable
|
|
57
|
+
// from MEMORY.md's git history if forensics are ever needed.
|
|
58
|
+
export function summarizeMissingCitations(missing: ReadonlyArray<{ date: string; fragmentId: string }>): string {
|
|
59
|
+
const total = missing.length
|
|
60
|
+
const sample = missing.slice(0, 3).map((m) => `${m.date}#${m.fragmentId}`)
|
|
61
|
+
if (total <= 3) return sample.join(', ')
|
|
62
|
+
return `${sample.join(', ')} (+${total - 3} more)`
|
|
63
|
+
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto'
|
|
2
1
|
import { existsSync } from 'node:fs'
|
|
3
2
|
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'
|
|
4
3
|
import { dirname, join } from 'node:path'
|
|
@@ -8,6 +7,7 @@ import { z } from 'zod'
|
|
|
8
7
|
import { lsTool, readTool, type Subagent, writeTool } from '@/plugin'
|
|
9
8
|
import { formatLocalDate, formatLocalDateTime } from '@/shared'
|
|
10
9
|
|
|
10
|
+
import { checkCitationSuperset, summarizeMissingCitations } from './citation-superset'
|
|
11
11
|
import { parseCitations } from './citations'
|
|
12
12
|
import {
|
|
13
13
|
addDreamedIds,
|
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
} from './dreaming-state'
|
|
20
20
|
import type { StreamEvent } from './stream-events'
|
|
21
21
|
import { readEvents, writeEventsAtomic } from './stream-io'
|
|
22
|
+
import { computeTopicStrengths, renderTopicStrengthsTable, type TopicStrength } from './strength'
|
|
22
23
|
|
|
23
24
|
const STREAM_FILE_PATTERN = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
|
|
24
25
|
|
|
@@ -207,12 +208,11 @@ async function loadCitedIds(agentDir: string): Promise<ReadonlyMap<string, Reado
|
|
|
207
208
|
}
|
|
208
209
|
}
|
|
209
210
|
|
|
210
|
-
async function
|
|
211
|
+
async function safeReadText(path: string): Promise<string> {
|
|
211
212
|
try {
|
|
212
|
-
|
|
213
|
-
return createHash('sha256').update(raw).digest('hex')
|
|
213
|
+
return await readFile(path, 'utf8')
|
|
214
214
|
} catch {
|
|
215
|
-
return
|
|
215
|
+
return ''
|
|
216
216
|
}
|
|
217
217
|
}
|
|
218
218
|
|
|
@@ -501,11 +501,11 @@ fragments:
|
|
|
501
501
|
|
|
502
502
|
The date in the prefix is the same as the filename you read the fragment from; the id after \`#\` is the full UUIDv7 from the event's \`id\` field. Do not abbreviate the id. Do not use line numbers — citations are id-based, not line-based, so daily streams can be compacted between dreaming runs without breaking your references.
|
|
503
503
|
|
|
504
|
-
A fragment with no useful content (a watermark-only marker, a near-duplicate, a session-specific quirk that fails the generalizability bar) is discarded. Never invent fragments.
|
|
504
|
+
A fragment with no useful content (a watermark-only marker, a near-duplicate, a session-specific quirk that fails the generalizability bar) is discarded. Never invent fragments. When you add a NEW citation, never cite a fragment id you did not see in the undreamed tail you actually read. EXISTING citations that are already in MEMORY.md (from prior dreaming runs, whose source fragments are no longer in the undreamed tail) must be preserved per rule 5 — they reference fragments still alive in already-consolidated daily streams.
|
|
505
505
|
|
|
506
506
|
**4. Inherit the memory-logger's standards.** The memory-logger already filtered fragments using strict certainty rules (explicit / deductive / inductive). Your job is consolidation, not loosening the bar. If two fragments contradict, prefer the more recent. If a fragment is ambiguous in isolation but clarified by a later fragment, merge them under one topic. Never promote a single fragment from one day into a stable claim unless its certainty was already \`explicit\` or \`deductive\`.
|
|
507
507
|
|
|
508
|
-
**5.
|
|
508
|
+
**5. Rebalance every run. Preserve every fact and every cited fragment id.** MEMORY.md is a saturated surface (a fixed prompt-budget), not an append-only log — every run is consolidation, not just the runs that get new fragments. You may merge near-duplicate topics into one, fold weakly-reinforced topics into a parent or into the historical-observations bucket (see "Memory saturation" below), and rewrite verbose conclusion paragraphs more tightly. What you must NOT do: drop a fragment id. The merged topic's \`fragments:\` list is the **union** of its source topics' fragment ids. The daily-stream GC depends on MEMORY.md citations to keep evidence alive; an omitted id means the underlying fragment is permanently deleted on the next compaction. If two topics genuinely cover different facts, leave them separate — premature merging loses signal. If a new fragment contradicts an existing entry, replace the entry's conclusion paragraph and keep BOTH the old and new fragment ids in the citations list (the contradiction itself is evidence). The runtime cross-checks your rewrite against the prior MEMORY.md's citation set; a rewrite that drops a previously-cited id will be reverted and your run wasted.
|
|
509
509
|
|
|
510
510
|
**6. Be concise.** Each topic conclusion is one short paragraph. No lists of preferences ("the user likes X, Y, Z"). One topic per concept. If a topic only earned one fragment and the fragment was already small, you may copy its conclusion verbatim — do not pad.
|
|
511
511
|
|
|
@@ -532,7 +532,40 @@ fragments:
|
|
|
532
532
|
|
|
533
533
|
The first line is always \`# Memory\`. Topics are level-2 headings. No other top-level structure.
|
|
534
534
|
|
|
535
|
-
#
|
|
535
|
+
# Memory saturation
|
|
536
|
+
|
|
537
|
+
MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Treat it like human long-term memory: **repetition strengthens, lack of repetition saturates**. The runtime gives you per-topic strength signals at the top of the user prompt — a table with \`cites\` (total citation count), \`days\` (distinct calendar days those citations span), \`last reinforced\`, and \`age (d)\`. Use these numbers to decide what to do with each existing topic on this run. \`days\` is the load-bearing signal: five citations all on one day means a single debugging session that mentioned the same thing five times (a transient burst); five citations across five days means a recurring fact the user keeps coming back to (a stable signal).
|
|
538
|
+
|
|
539
|
+
## Strength tiers and promotion ladder
|
|
540
|
+
|
|
541
|
+
Pick the wording in each conclusion paragraph from the topic's \`days\` count:
|
|
542
|
+
|
|
543
|
+
- **\`days = 1\` — "mentioned":** the topic was observed in one session. Conclusion uses tentative language ("the user mentioned X in the context of Y"). Single-fragment one-day topics that are not reinforced on subsequent runs are demotion candidates (see below).
|
|
544
|
+
- **\`days = 2\` — "observed":** seen twice, on different days. Still tentative — could be a recurring quirk, could be coincidence.
|
|
545
|
+
- **\`days >= 3\` — "consistently":** the topic has been reinforced across at least three distinct days. Conclusion uses confident language ("the user consistently prefers X", "the user's pattern is Y"). Strong enough to anchor near the top of MEMORY.md.
|
|
546
|
+
- **\`days >= 7\` — "always":** seen across at least seven distinct days. Conclusion uses declarative language ("the user always X", "Y is the user's standard"). These are the load-bearing topics; protect them from accidental merges.
|
|
547
|
+
|
|
548
|
+
Promotion is gated on \`days\`, not on \`cites\`. A topic with \`cites = 12, days = 1\` is still "mentioned" — twelve citations in one debugging session is one event, not twelve. Order MEMORY.md so the strongest topics come first; weaker topics drift toward the bottom.
|
|
549
|
+
|
|
550
|
+
## Demotion and the historical-observations bucket
|
|
551
|
+
|
|
552
|
+
When a topic's \`days\` count is low AND \`age (d)\` is high (the user has not come back to it in weeks), it is decayed. Do not delete — **demote**. The bucket is a single topic, always last in MEMORY.md, with this exact shape:
|
|
553
|
+
|
|
554
|
+
\`\`\`
|
|
555
|
+
## Historical observations
|
|
556
|
+
- yyyy-MM-dd: one-line summary of what was observed — memory/yyyy-MM-dd#<id>
|
|
557
|
+
- yyyy-MM-dd: one-line summary of what was observed — memory/yyyy-MM-dd#<id>
|
|
558
|
+
\`\`\`
|
|
559
|
+
|
|
560
|
+
Each former topic becomes one bullet. The fact is preserved (in the summary), the citation is preserved (so daily-stream GC keeps the fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Demotion candidates: a topic with \`cites = 1, days = 1, age >= 30\`, OR a topic with \`cites <= 3, days <= 2, age >= 60\`. Strong topics (\`days >= 3\`) are not demoted regardless of age — they stayed reinforced when they were active, so they earned their place.
|
|
561
|
+
|
|
562
|
+
When you demote a topic, take its conclusion paragraph and compress it into one short summary sentence for the bullet. Keep the citation date prefix (\`yyyy-MM-dd:\`) so the bullet stays sortable and grep-able. The summary is your last chance to write a useful sentence about this fact — the next time the agent reads MEMORY.md, this bullet is all there is.
|
|
563
|
+
|
|
564
|
+
The bucket grows monotonically: there is **no hard-deletion path**, no quarter-level synthesis, no removal of old bullets. Every demoted citation stays alive forever via its one-line bullet. The runtime safety net rejects any rewrite that drops a previously-cited fragment id, so attempting to collapse old bullets into a summary will be reverted and your run wasted. If the bucket becomes inconveniently long, that is a problem for a future runtime change to address — not something you can resolve from inside a dreaming run.
|
|
565
|
+
|
|
566
|
+
## When MEMORY.md has no strength table
|
|
567
|
+
|
|
568
|
+
A first-ever run sees no existing topics, so the strength table is omitted. In that case the saturation rules above do not apply yet — just consolidate the new fragments into fresh topics. The strength signals start appearing on the second run.
|
|
536
569
|
|
|
537
570
|
While you read the streams, watch for **repeated multi-step procedures** the user has guided the main agent through. When you have evidence (across multiple fragments, ideally across multiple days) that the same procedure keeps happening the same way, you have three response shapes available — pick the smallest one that fits.
|
|
538
571
|
|
|
@@ -620,8 +653,8 @@ Do not suggest CLIs or plugins speculatively. The same recurrence + generalizabi
|
|
|
620
653
|
|
|
621
654
|
1. \`read\` MEMORY.md (it may not exist — that is fine, you start from empty).
|
|
622
655
|
2. For each JSONL daily stream undreamed-tail entry the user message lists, \`read\` the file with \`offset\` set to the first undreamed line. Read every undreamed tail before you start writing, then focus on fragment events' \`topic\` + \`body\` fields.
|
|
623
|
-
3. Reason about what to consolidate. Most fragments will collapse into existing topics or be dropped as already-known / not generalizable.
|
|
624
|
-
4. \`write\` the full new contents of MEMORY.md in one call
|
|
656
|
+
3. Reason about what to consolidate AND about how to rebalance existing topics using the strength signals at the top of the user prompt. Most fragments will collapse into existing topics or be dropped as already-known / not generalizable. Most existing topics will keep their shape; a few merge candidates and a few demotion candidates will surface every run.
|
|
657
|
+
4. \`write\` the full new contents of MEMORY.md in one call. Even if no new fragments earned promotion, a rebalance pass (merging two near-duplicates, demoting a single weak old topic) is still a productive run. \`write\` overwrites; that is the point — MEMORY.md is the single canonical artifact you produce. Remember: every fragment id cited in the previous MEMORY.md must still appear somewhere in the new file (in its same topic, in a merged topic, OR in the historical-observations bucket). The runtime enforces this mechanically and will revert your rewrite if you drop an id.
|
|
625
658
|
5. Decide whether any procedure in the new fragments meets the muscle-memory bar above, and which of the three forms fits.
|
|
626
659
|
- **Form A (skill):** \`ls\` \`memory/skills/\` to see what already exists, \`read\` any candidate's existing \`SKILL.md\` if you might be refining it, then \`write\` the new or refined skill at \`memory/skills/<name>/SKILL.md\` with the frontmatter shape shown above.
|
|
627
660
|
- **Form B (CLI suggestion) or Form C (plugin suggestion):** add a topic to MEMORY.md with the \`proposal:\` line shown above. The CLI/plugin itself is the main agent's responsibility — you do not write under \`packages/\`. Before adding the topic, check the existing MEMORY.md you just read so you do not duplicate a suggestion that's already there.
|
|
@@ -630,9 +663,9 @@ Do not suggest CLIs or plugins speculatively. The same recurrence + generalizabi
|
|
|
630
663
|
|
|
631
664
|
# Doing nothing is a valid outcome
|
|
632
665
|
|
|
633
|
-
If the undreamed tails contain only watermarks,
|
|
666
|
+
If the undreamed tails contain only watermarks, AND no procedure clears the muscle-memory bar, AND every existing topic looks well-shaped at its current strength (no obvious merge or demotion candidates), do not rewrite MEMORY.md and do not write a skill just to touch something. Stop without writing. The point of dreaming is consolidation, not activity. The runtime advances the watermark either way. But: if there ARE new fragments, or if the strength table shows topics that should clearly merge or demote, the run is productive even without skill activity — rebalancing IS work.`
|
|
634
667
|
|
|
635
|
-
function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[]): string {
|
|
668
|
+
function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[], strengths: TopicStrength[]): string {
|
|
636
669
|
const today = formatLocalDate()
|
|
637
670
|
const memoryFile = join(payload.agentDir, 'MEMORY.md')
|
|
638
671
|
const memoryDir = join(payload.agentDir, 'memory')
|
|
@@ -642,9 +675,22 @@ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[
|
|
|
642
675
|
`Daily stream directory: ${memoryDir}`,
|
|
643
676
|
`Today's local date: ${today}`,
|
|
644
677
|
`Dreaming state: ${join(payload.agentDir, DREAMING_STATE_FILE)}`,
|
|
678
|
+
]
|
|
679
|
+
|
|
680
|
+
const strengthTable = renderTopicStrengthsTable(strengths)
|
|
681
|
+
if (strengthTable.length > 0) {
|
|
682
|
+
lines.push(
|
|
683
|
+
'',
|
|
684
|
+
'Existing MEMORY.md topic strengths (computed from current citations — `cites` is total citation count, `days` is the number of distinct calendar days those citations span, `last reinforced` is the most recent citation date, `age (d)` is whole days since `last reinforced` relative to today). These numbers describe how reinforced each existing topic is; the dreaming system prompt explains how to use them.',
|
|
685
|
+
'',
|
|
686
|
+
strengthTable,
|
|
687
|
+
)
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
lines.push(
|
|
645
691
|
'',
|
|
646
692
|
'Undreamed fragments to consolidate. Each entry lists the daily JSONL file and the ids of fragments in that file you have not yet consolidated into MEMORY.md. Read the file, locate each id, and decide what (if anything) belongs in MEMORY.md. Cite by id (memory/yyyy-MM-dd#<id>), not by line number.',
|
|
647
|
-
|
|
693
|
+
)
|
|
648
694
|
for (const snap of snapshots) {
|
|
649
695
|
lines.push('', `- memory/${snap.filename}:`)
|
|
650
696
|
for (const id of snap.undreamedIds) lines.push(` - ${id}`)
|
|
@@ -656,6 +702,15 @@ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[
|
|
|
656
702
|
return lines.join('\n')
|
|
657
703
|
}
|
|
658
704
|
|
|
705
|
+
async function loadTopicStrengths(agentDir: string): Promise<TopicStrength[]> {
|
|
706
|
+
try {
|
|
707
|
+
const raw = await readFile(join(agentDir, 'MEMORY.md'), 'utf8')
|
|
708
|
+
return computeTopicStrengths(raw, formatLocalDate())
|
|
709
|
+
} catch {
|
|
710
|
+
return []
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
659
714
|
export type CreateDreamingSubagentOptions = {
|
|
660
715
|
commitMemory?: (cwd: string) => Promise<void>
|
|
661
716
|
logger?: DreamingLogger
|
|
@@ -689,18 +744,51 @@ export function createDreamingSubagent(options: CreateDreamingSubagentOptions =
|
|
|
689
744
|
)
|
|
690
745
|
|
|
691
746
|
const memoryFilePath = join(ctx.payload.agentDir, 'MEMORY.md')
|
|
692
|
-
const
|
|
747
|
+
const memoryTextBefore = await safeReadText(memoryFilePath)
|
|
748
|
+
const strengths = await loadTopicStrengths(ctx.payload.agentDir)
|
|
693
749
|
|
|
694
750
|
try {
|
|
695
|
-
await runSession({ userPrompt: buildInitialPrompt(ctx.payload, snapshots.undreamed) })
|
|
751
|
+
await runSession({ userPrompt: buildInitialPrompt(ctx.payload, snapshots.undreamed, strengths) })
|
|
696
752
|
} catch (err) {
|
|
697
753
|
const message = err instanceof Error ? err.message : String(err)
|
|
698
754
|
logger.warn(`[dreaming] run threw: ${message} elapsed_ms=${Date.now() - start}`)
|
|
699
755
|
throw err
|
|
700
756
|
}
|
|
701
757
|
|
|
702
|
-
const
|
|
703
|
-
|
|
758
|
+
const memoryTextAfter = await safeReadText(memoryFilePath)
|
|
759
|
+
let memoryRewrittenThisRun = memoryTextBefore !== memoryTextAfter
|
|
760
|
+
|
|
761
|
+
// Citation-superset safety net: if the subagent's rewrite dropped any
|
|
762
|
+
// previously-cited fragment id, restore the pre-run bytes and turn
|
|
763
|
+
// fragment GC off so the next compactDailyStreams call does not
|
|
764
|
+
// permanently delete the underlying fragment. Dreamed-ids still
|
|
765
|
+
// advance on a successful revert: this run's UNDREAMED fragments are
|
|
766
|
+
// orphaned (they survive in the daily JSONL but never make it into
|
|
767
|
+
// MEMORY.md), which is the conscious tradeoff for avoiding an
|
|
768
|
+
// infinite loop on the same undreamed input. If the revert WRITE
|
|
769
|
+
// itself fails — disk full, EACCES, etc. — MEMORY.md is in an
|
|
770
|
+
// unknown state: we cannot advance dreamed-ids (next run must
|
|
771
|
+
// re-attempt), cannot run compaction (citations are now ambiguous),
|
|
772
|
+
// and cannot commit (would snapshot a known-bad state). The user has
|
|
773
|
+
// to `git checkout MEMORY.md` and re-run.
|
|
774
|
+
if (memoryRewrittenThisRun) {
|
|
775
|
+
const verdict = checkCitationSuperset(memoryTextBefore, memoryTextAfter)
|
|
776
|
+
if (!verdict.ok) {
|
|
777
|
+
try {
|
|
778
|
+
await writeFile(memoryFilePath, memoryTextBefore)
|
|
779
|
+
} catch (err) {
|
|
780
|
+
const message = err instanceof Error ? err.message : String(err)
|
|
781
|
+
logger.error(
|
|
782
|
+
`[dreaming] citation-superset violation AND revert failed: ${message}. MEMORY.md is in an unknown state; not advancing dreamed-ids or running compaction. Recover with: git checkout -- MEMORY.md && typeclaw restart. missing=${summarizeMissingCitations(verdict.missing)} elapsed_ms=${Date.now() - start}`,
|
|
783
|
+
)
|
|
784
|
+
return
|
|
785
|
+
}
|
|
786
|
+
memoryRewrittenThisRun = false
|
|
787
|
+
logger.warn(
|
|
788
|
+
`[dreaming] citation-superset violation: rewrite dropped ${verdict.missing.length} previously-cited id(s); reverted MEMORY.md. The undreamed fragments from THIS run are orphaned: they advance into the dreamed-id set (survive in the daily JSONL, will not be re-shown to a future dreaming run) — conscious anti-loop tradeoff. missing=${summarizeMissingCitations(verdict.missing)}`,
|
|
789
|
+
)
|
|
790
|
+
}
|
|
791
|
+
}
|
|
704
792
|
|
|
705
793
|
const advanced = advanceDreamedIds(state, snapshots.undreamed)
|
|
706
794
|
await saveDreamingState(ctx.payload.agentDir, advanced)
|
|
@@ -12,8 +12,8 @@ import { createDreamingSubagent, type DreamingPayload } from './dreaming'
|
|
|
12
12
|
import { createMemoryLoggerSubagent, type MemoryLoggerPayload } from './memory-logger'
|
|
13
13
|
import { runMigration } from './migration'
|
|
14
14
|
|
|
15
|
-
const DEFAULT_IDLE_MS =
|
|
16
|
-
const DEFAULT_BUFFER_BYTES =
|
|
15
|
+
const DEFAULT_IDLE_MS = 60_000
|
|
16
|
+
const DEFAULT_BUFFER_BYTES = 500_000
|
|
17
17
|
const MIN_BUFFER_BYTES = 10_000
|
|
18
18
|
// 30-minute default. Fires short-circuit before any LLM call when nothing
|
|
19
19
|
// sits past the watermark (`dreaming.ts` handler returns when
|
|
@@ -58,9 +58,9 @@ export function isMemoryLoggerPayload(value: unknown): value is MemoryLoggerPayl
|
|
|
58
58
|
|
|
59
59
|
export const MEMORY_LOGGER_SYSTEM_PROMPT = `You are typeclaw's memory-extraction subagent.
|
|
60
60
|
|
|
61
|
-
Your job is to read a session transcript and capture, as fragments,
|
|
61
|
+
Your job is to read a session transcript and capture, as fragments, only the durable operational facts a future agent in a future session would concretely need — explicit user instructions, stable identity/role/tool facts, decisions with reasoning, reproducible workarounds, contradictions or violations of existing memory. You write zero or more fragments to today's memory stream file. Then you exit. Most runs produce zero or one fragment; that is the expected output, not a failure.
|
|
62
62
|
|
|
63
|
-
A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **
|
|
63
|
+
A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **Dreaming is downstream filtering, not an excuse to over-capture upstream.** Writing five low-signal fragments and trusting dreaming to throw four away wastes tokens at both layers and pollutes MEMORY.md in the interim. Be selective here.
|
|
64
64
|
|
|
65
65
|
You have exactly four tools: \`read\`, \`find_entry\`, \`append\`, and the watermark-advance tool. You cannot run shell commands, overwrite files, or edit existing content.
|
|
66
66
|
|
|
@@ -78,41 +78,52 @@ Typical flow with a watermark:
|
|
|
78
78
|
|
|
79
79
|
Never write the same watermark id you were given as input. If the transcript has no new entries past the watermark, evaluate the entries you can see, then advance the watermark to the latest \`id\` in the transcript (which is on line \`totalLines\` from \`find_entry\`'s reply). The whole point of the watermark is to move forward each run.
|
|
80
80
|
|
|
81
|
-
# Capture philosophy: when in doubt,
|
|
81
|
+
# Capture philosophy: when in doubt, SKIP
|
|
82
82
|
|
|
83
|
-
|
|
83
|
+
Most transcript content is **not** memorable. Conversations, group chat banter, casual reactions, one-off questions, and routine tool usage are the substrate of a session — they are not facts a future agent needs to inherit. The default is to skip.
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
Most runs should produce **zero or one** fragment. Two or more fragments is the exception, justified only when the transcript actually contains multiple unrelated durable facts. A run that produces five-plus fragments is almost always over-writing.
|
|
86
86
|
|
|
87
|
-
|
|
87
|
+
The watermark advances even with zero fragments via the watermark-advance tool, so skipping costs nothing. A wrong-skip is recoverable: if the same fact recurs in a later session, you will see it again and can capture it then — recurrence is itself the strongest signal that something is worth remembering.
|
|
88
|
+
|
|
89
|
+
You do **not** need to articulate how a future agent will use a fragment. But you DO need to be able to name a concrete future situation where ignoring this fragment would cause a real problem. If you cannot name that situation in one sentence, skip.
|
|
88
90
|
|
|
89
91
|
The two failure modes:
|
|
90
92
|
|
|
91
|
-
- **
|
|
92
|
-
- **
|
|
93
|
+
- **Over-writing into noise.** Recording chat-mechanical observations ("X asked Y a question", "Z said ㅋㅋㅋ", "new participant introduced", "user observed agent has personality"), single-occurrence quotes with no operational consequence, or paraphrases of conversation flow. This is the dominant failure mode in practice. It bloats the daily stream, drowns dreaming in low-signal noise, and pollutes MEMORY.md.
|
|
94
|
+
- **Under-writing.** Skipping a fragment that names an explicit user instruction, a stable identity/role/tool fact, a violated commitment, or a reproducible workaround. Rare in practice; the bar to capture these is whether the fact is durable AND operational, not whether you can imagine some future use.
|
|
93
95
|
|
|
94
|
-
|
|
96
|
+
When unsure, skip. Recurrence will surface real patterns.
|
|
95
97
|
|
|
96
98
|
# What to capture
|
|
97
99
|
|
|
98
|
-
|
|
100
|
+
The bar is high. A fragment is worth writing only when ALL of these hold:
|
|
101
|
+
|
|
102
|
+
1. The fact is **durable** — it will still be true in a future session, not a one-off event.
|
|
103
|
+
2. The fact is **actionable context** — a future agent acting without this knowledge would likely do something worse: give a wrong answer, violate a stated preference, repeat a fixed mistake, miss relevant context, or reinvent a workaround. Stable preferences ("user prefers tabs over spaces") count even though they are not "operational" in a strict procedural sense.
|
|
104
|
+
3. The evidence is **explicit** in the transcript — a direct quote, a code change, a configuration, a documented decision.
|
|
105
|
+
|
|
106
|
+
Capture-worthy categories:
|
|
99
107
|
|
|
100
|
-
- **
|
|
101
|
-
- **
|
|
102
|
-
- **
|
|
103
|
-
- **
|
|
104
|
-
- **Contradictions of existing memory.** The user changed their mind,
|
|
105
|
-
- **Violations of existing memory.**
|
|
106
|
-
- **
|
|
107
|
-
- **Observable user reactions, framed as observations.** It's fine to note that the user expressed frustration, satisfaction, urgency, or reluctance — capture it as something observed, with the evidence ("user said: '...'"). Don't claim to know motives; just record what was visible. Dreaming decides if a pattern is real.
|
|
108
|
-
- **Reusable knowledge produced this session.** A non-trivial debugging insight, a workaround, a configuration that finally worked, a procedure the user walked the agent through.
|
|
108
|
+
- **Explicit operating rules the user just gave the agent.** "Always X." "Never Y." "From now on do Z." Direct instructions to the agent itself, not statements about other people.
|
|
109
|
+
- **Stable identity/role/tool facts that will keep mattering.** "User's project repo is X." "User runs Y on Z." Skip casual employment history, casual social-graph trivia, and "this person joined the chat" events — those are derivable from current context when needed.
|
|
110
|
+
- **Decisions with reasoning.** "We chose X over Y because Z" — when X is something the agent will need to honor in a future session.
|
|
111
|
+
- **Reproducible workarounds and non-trivial debugging insights.** Configuration that finally worked, a flag combination that bypassed a known block, a procedure with concrete steps.
|
|
112
|
+
- **Contradictions of existing memory.** The user changed their mind, an old commitment no longer applies. Name the prior memory that is superseded.
|
|
113
|
+
- **Violations of existing memory.** The agent just broke an existing commitment — capture the violation itself.
|
|
114
|
+
- **Corrections the user made to the agent.** Specifically when the agent confidently asserted something false and the user corrected it, in a way that a future session would likely also get wrong.
|
|
109
115
|
|
|
110
|
-
# What to skip
|
|
116
|
+
# What to skip (anti-patterns — these come up constantly)
|
|
111
117
|
|
|
112
|
-
- **
|
|
113
|
-
- **
|
|
114
|
-
- **
|
|
115
|
-
- **
|
|
118
|
+
- **Conversational mechanics.** "X asked Y a question." "Z said hello." "Participant A reacted with ㅋㅋㅋ / 👍 / lol." "User tested the agent's response time." None of this is memory.
|
|
119
|
+
- **Single-occurrence casual reactions.** "User observed the agent has personality." "Group chat member is amused by the bot." Wait for recurrence; if it never recurs, it was never memory.
|
|
120
|
+
- **Group-chat membership events.** "X invited Y to chat Z." "New participant joined." This is derivable from the current channel context and changes constantly.
|
|
121
|
+
- **Casual social-graph trivia.** "X used to work at Y." "Z is a friend of W." Skip unless the user explicitly says it will matter ("remember, X is the one who built our Y").
|
|
122
|
+
- **Latency / performance pings.** "User asked how fast the agent responded." Not memory.
|
|
123
|
+
- **The agent's own first-person observations.** "The agent admitted it does not know its model." "The agent replied in character." Skip — the agent is not memorable to itself.
|
|
124
|
+
- **Re-derivable facts.** Anything obvious from the current session's system prompt, MEMORY.md, AGENTS.md, or the channel context.
|
|
125
|
+
- **Speculation untethered to a quote.** If you cannot point at a specific transcript line, do not write it.
|
|
126
|
+
- **Multi-fragment expansions of one event.** One event produces at most one fragment. Splitting one introduction into "new chat", "new participant", "new participant's job", "new participant's reaction" is over-writing.
|
|
116
127
|
|
|
117
128
|
# Never quote secret values
|
|
118
129
|
|
|
@@ -135,7 +146,7 @@ Before reading the transcript, read \`MEMORY.md\` and the current \`memory/yyyy-
|
|
|
135
146
|
- **Notice violations.** If existing memory contains a commitment the agent just broke, that's a high-value fragment.
|
|
136
147
|
- **Avoid pure restatement.** If a fact is already in MEMORY.md word-for-word, don't write the same fragment again. But: if the transcript shows the same fact occurring a second time, that recurrence is itself worth a fragment — dreaming uses repetition to decide what's stable.
|
|
137
148
|
|
|
138
|
-
|
|
149
|
+
Dedup byte-equivalent restatements, not meaningful recurrence. Do not write a fragment that is a near-copy of one already in MEMORY.md or today's stream. But when the transcript shows the same durable preference, pattern, workaround, or commitment recurring in a NEW session or on a NEW day, write a concise recurrence fragment anchored to the new evidence — even if the underlying fact is already known. The dreaming subagent uses distinct-day recurrence to promote tentative facts to confident ones; refusing to write the second or third occurrence starves that signal. The bar is "did the recurrence happen in a meaningfully new context", not "is the fact already on disk".
|
|
139
150
|
|
|
140
151
|
The \`append\` tool refuses byte-equivalent fragments within the same daily stream — if your fragment's topic+body is identical to one already in today's file (modulo whitespace), the tool will reject it and you must rewrite. Two reasonable rewrites: (1) skip the fragment entirely, (2) frame the new occurrence explicitly as "this is the second time today" with a different topic. Do not retry an identical fragment with a different \`entry=\` hoping it will land — content-equality, not marker-equality, is what's checked.
|
|
141
152
|
|
|
@@ -269,8 +280,16 @@ export function createMemoryLoggerSubagent(
|
|
|
269
280
|
customTools: [findEntryTool, appendTool, advanceWatermarkTool],
|
|
270
281
|
payloadSchema: memoryLoggerPayloadSchema,
|
|
271
282
|
inFlightKey: (payload) => payload.agentDir,
|
|
283
|
+
// 768 KB read budget. Sized to cover one full buffer-trip cycle:
|
|
284
|
+
// ~30 KB MEMORY.md + ~50 KB today's stream + up to `DEFAULT_BUFFER_BYTES`
|
|
285
|
+
// (500 KB) of unread transcript chunk, with margin for re-reads. A
|
|
286
|
+
// smaller budget (the prior 256 KB) systematically exhausted on
|
|
287
|
+
// buffer-trip spawns once `bufferBytes` exceeded ~200 KB — the
|
|
288
|
+
// subagent would advance `bytesAtLastRun` to the full transcript size
|
|
289
|
+
// on completion, orphaning the unread tail until another full
|
|
290
|
+
// `bufferBytes` of growth arrived.
|
|
272
291
|
toolResultBudget: {
|
|
273
|
-
maxTotalBytes:
|
|
292
|
+
maxTotalBytes: 768 * 1024,
|
|
274
293
|
toolNames: ['read'],
|
|
275
294
|
exhaustedMessage: memoryLoggerExhaustedMessage,
|
|
276
295
|
},
|