npm - typeclaw - Versions diffs - 0.4.0 → 0.5.1 - Mend

typeclaw 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/package.json +1 -1
package/src/agent/auth.ts +4 -2
package/src/agent/index.ts +16 -28
package/src/agent/model-fallback.ts +127 -0
package/src/agent/tools/curl-impersonate.ts +300 -0
package/src/agent/tools/ddg.ts +13 -88
package/src/agent/tools/webfetch/fetch.ts +105 -2
package/src/agent/tools/webfetch/tool.ts +4 -0
package/src/bundled-plugins/agent-browser/shim.ts +47 -0
package/src/bundled-plugins/backup/subagents.ts +2 -0
package/src/bundled-plugins/memory/README.md +49 -12
package/src/bundled-plugins/memory/citation-superset.ts +63 -0
package/src/bundled-plugins/memory/dreaming.ts +105 -17
package/src/bundled-plugins/memory/index.ts +2 -2
package/src/bundled-plugins/memory/memory-logger.ts +45 -26
package/src/bundled-plugins/memory/strength.ts +127 -0
package/src/bundled-plugins/memory/topics.ts +75 -0
package/src/bundled-plugins/security/index.ts +87 -43
package/src/bundled-plugins/security/permissions.ts +36 -0
package/src/bundled-plugins/security/policies/git-exfil.ts +20 -0
package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +12 -0
package/src/bundled-plugins/security/policies/prompt-injection.ts +23 -3
package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +7 -0
package/src/bundled-plugins/security/policies/secret-exfil-read.ts +6 -0
package/src/bundled-plugins/security/policies/session-search-secrets.ts +9 -0
package/src/bundled-plugins/security/policies/ssrf.ts +6 -0
package/src/bundled-plugins/security/policies/system-prompt-leak.ts +7 -0
package/src/channels/adapters/github/index.ts +87 -3
package/src/channels/router.ts +194 -28
package/src/channels/types.ts +3 -1
package/src/cli/channel.ts +2 -45
package/src/cli/init.ts +148 -87
package/src/cli/model.ts +12 -3
package/src/cli/oauth-callbacks.ts +49 -0
package/src/cli/provider.ts +3 -20
package/src/cli/ui.ts +95 -0
package/src/config/config.ts +59 -24
package/src/config/models-mutation.ts +42 -8
package/src/config/providers-mutation.ts +12 -8
package/src/container/start.ts +18 -1
package/src/cron/consumer.ts +129 -43
package/src/init/dockerfile.ts +221 -3
package/src/init/hatching.ts +2 -2
package/src/init/index.ts +47 -3
package/src/init/oauth-login.ts +17 -3
package/src/permissions/builtins.ts +29 -7
package/src/permissions/permissions.ts +24 -7
package/src/plugin/define.ts +2 -0
package/src/plugin/manager.ts +14 -0
package/src/plugin/types.ts +6 -0
package/src/run/index.ts +2 -1
package/src/skills/typeclaw-memory/SKILL.md +25 -15
package/src/skills/typeclaw-permissions/SKILL.md +35 -17
package/src/tui/index.ts +35 -3
package/src/usage/report.ts +15 -12
package/typeclaw.schema.json +57 -25

package/src/agent/tools/webfetch/fetch.ts CHANGED Viewed

@@ -1,3 +1,33 @@
+// Webfetch's HTTP transport.
+//
+// Production path (container, curl-impersonate available): we shell out to
+// `curl_chrome136` so outbound requests carry Chrome 136's TLS handshake
+// (JA3/JA4), HTTP/2 SETTINGS frame, and full header set. This is what gets
+// us past the modern bot-detection stacks on Cloudflare/Akamai-protected
+// sites (Reuters, MarketWatch, etc.) when the agent is running from the
+// user's home network — the IP is already residential, so impersonating
+// the browser is the only remaining missing piece. See AGENTS.md §"Web
+// search" and src/agent/tools/curl-impersonate.ts for the full story.
+//
+// Test/dev fallback (curl_chrome136 not on PATH): we transparently fall
+// back to Bun's native `fetch()` with a static User-Agent. This keeps unit
+// tests on developer macOS machines working without forcing every contributor
+// to install curl-impersonate locally. Production runs always have the binary
+// because the typeclaw Dockerfile pins it.
+//
+// Best-effort doctrine: this transport does NOT guarantee the fetch succeeds.
+// Bot-detected sites can still serve 403/CAPTCHA pages. We surface what we
+// got (status, body, final URL) and let the caller decide. The webfetch tool
+// translates non-2xx into a tool-level error message that's useful to the
+// model.
+import {
+  CurlImpersonateError,
+  curlImpersonate,
+  isCurlExitFilesizeExceeded,
+  isCurlExitTimeout,
+  isCurlImpersonateAvailable,
+} from '../curl-impersonate'
 import { MAX_RESPONSE_BYTES } from './types'
 export type FetchResult = {
@@ -15,7 +45,7 @@ export class WebfetchError extends Error {
   }
 }
-const DEFAULT_HEADERS: Record<string, string> = {
+const FALLBACK_HEADERS: Record<string, string> = {
   'User-Agent': 'typeclaw/0 (+https://github.com/code-yeongyu/typeclaw)',
   Accept: 'text/html,application/xhtml+xml,application/json;q=0.9,text/plain;q=0.8,*/*;q=0.1',
   'Accept-Language': 'en-US,en;q=0.9',
@@ -32,10 +62,83 @@ export function normalizeUrl(input: string): string {
   return `https://${trimmed}`
 }
+// Test-only seam: forces fetchWithLimits to use the native-fetch fallback
+// even when curl-impersonate is detected. Used by fetch.test.ts to keep its
+// existing mocked-fetch contract working without the test having to install
+// a fake curl binary. Production code never calls this.
+let forceFallbackForTest = false
+export function _setForceFallbackForTest(value: boolean): void {
+  forceFallbackForTest = value
+}
 export async function fetchWithLimits(
   url: string,
   timeoutSeconds: number,
   parentSignal?: AbortSignal,
+): Promise<FetchResult> {
+  const useImpersonate = !forceFallbackForTest && (await isCurlImpersonateAvailable())
+  if (useImpersonate) {
+    return fetchWithCurlImpersonate(url, timeoutSeconds, parentSignal)
+  }
+  return fetchWithBunFetch(url, timeoutSeconds, parentSignal)
+}
+async function fetchWithCurlImpersonate(
+  url: string,
+  timeoutSeconds: number,
+  parentSignal?: AbortSignal,
+): Promise<FetchResult> {
+  let response
+  try {
+    response = await curlImpersonate({
+      url,
+      method: 'GET',
+      timeoutSeconds,
+      maxBytes: MAX_RESPONSE_BYTES,
+      signal: parentSignal,
+    })
+  } catch (error) {
+    if (parentSignal?.aborted) {
+      throw new WebfetchError('Request aborted')
+    }
+    if (error instanceof CurlImpersonateError) {
+      if (isCurlExitTimeout(error)) {
+        throw new WebfetchError(`Request timed out after ${timeoutSeconds}s`)
+      }
+      if (isCurlExitFilesizeExceeded(error)) {
+        throw new WebfetchError(`Response too large (exceeds ${formatBytes(MAX_RESPONSE_BYTES)} limit)`)
+      }
+      throw new WebfetchError(`Fetch failed: ${error.message}`)
+    }
+    const message = error instanceof Error ? error.message : String(error)
+    throw new WebfetchError(`Fetch failed: ${message}`)
+  }
+  if (response.httpStatus < 200 || response.httpStatus >= 300) {
+    throw new WebfetchError(`Fetch failed: HTTP ${response.httpStatus}`)
+  }
+  const bodyByteLength = new TextEncoder().encode(response.body).byteLength
+  if (bodyByteLength > MAX_RESPONSE_BYTES) {
+    throw new WebfetchError(
+      `Response too large (${formatBytes(bodyByteLength)} exceeds ${formatBytes(MAX_RESPONSE_BYTES)} limit)`,
+    )
+  }
+  return {
+    body: response.body,
+    contentType: response.contentType,
+    finalUrl: response.finalUrl || url,
+    httpStatus: response.httpStatus,
+    bytesIn: bodyByteLength,
+  }
+}
+async function fetchWithBunFetch(
+  url: string,
+  timeoutSeconds: number,
+  parentSignal?: AbortSignal,
 ): Promise<FetchResult> {
   const controller = new AbortController()
   const timeout = setTimeout(() => controller.abort(new Error('timeout')), timeoutSeconds * 1000)
@@ -43,7 +146,7 @@ export async function fetchWithLimits(
   parentSignal?.addEventListener('abort', onAbort, { once: true })
   try {
-    const response = await fetch(url, { headers: DEFAULT_HEADERS, signal: controller.signal, redirect: 'follow' })
+    const response = await fetch(url, { headers: FALLBACK_HEADERS, signal: controller.signal, redirect: 'follow' })
     if (!response.ok) {
       throw new WebfetchError(`Fetch failed: HTTP ${response.status} ${response.statusText}`)
     }

package/src/agent/tools/webfetch/tool.ts CHANGED Viewed

@@ -24,6 +24,10 @@ export const webfetchTool = defineTool({
   description:
     'Fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy. ' +
     'Use this when the user references a specific URL or when websearch surfaced a result you need to read in full. ' +
+    'Outbound requests impersonate Chrome 136 at the TLS, HTTP/2, and header layers ' +
+    '(via curl-impersonate), which helps with TLS/header fingerprint gates on sites behind Cloudflare/Akamai. ' +
+    'It does NOT solve JavaScript challenges, behavioural fingerprinting (mouse/scroll/timing), interactive CAPTCHAs, ' +
+    'or IP-reputation blocks — a 403 from those layers is expected and unrecoverable from this tool. ' +
     'Strategy guide:\n' +
     '- "readability": extract article content as markdown (blogs, docs, news). Default for HTML.\n' +
     '- "jq": query JSON APIs (npm registry, GitHub API). Pass `query` (e.g. ".items[].name").\n' +

package/src/bundled-plugins/agent-browser/shim.ts CHANGED Viewed

@@ -17,6 +17,49 @@ import { AGENT_BROWSER_DASHBOARD_UPSTREAM_PORT } from './dashboard-proxy'
 export const REAL_BIN_ENV = 'TYPECLAW_AGENT_BROWSER_REAL_BIN'
+// Recent desktop Chrome on Linux x86_64. The shim runs inside the TypeClaw
+// container (always Linux), so a macOS or Windows UA would mismatch the TCP
+// fingerprint, Accept-Language, and JS-side platform — itself a bot signal on
+// stricter sites (Cloudflare, Akamai, PerimeterX). `X11; Linux x86_64` is
+// also correct on linux/arm64 hosts: Chrome on Linux does not expose ARM in
+// the UA string at all (verified against current Chrome 131 releases).
+// The upstream binary defaults to a UA that includes "HeadlessChrome" /
+// a stale Chromium build, which is widely fingerprinted as a bot and
+// silently triggers CAPTCHAs, 403s, blank pages, and A/B-test misrouting.
+// Bump on Chrome major releases — same hygiene as the curl-impersonate pin
+// in src/init/dockerfile.ts.
+export const DEFAULT_USER_AGENT =
+  'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
+export const USER_AGENT_ENV = 'AGENT_BROWSER_USER_AGENT'
+export function hasUserAgentFlag(argv: readonly string[]): boolean {
+  // Matches both `--user-agent <val>` and `--user-agent=<val>`. The upstream
+  // CLI does not document a short alias for --user-agent today (verified via
+  // `agent-browser --help`), so we only check the long form.
+  for (const arg of argv) {
+    if (arg === '--user-agent' || arg.startsWith('--user-agent=')) return true
+  }
+  return false
+}
+export function injectUserAgentEnv(
+  argv: readonly string[],
+  env: Record<string, string | undefined>,
+  defaultUa: string = DEFAULT_USER_AGENT,
+): void {
+  // Upstream's precedence is CLI flag > env > default. We only inject the
+  // env when BOTH layers above it are absent so:
+  //   - explicit `--user-agent foo` wins (mobile testing, intentional bot UA)
+  //   - operator-set AGENT_BROWSER_USER_AGENT wins (per-shell override)
+  //   - default UA fills the otherwise-empty slot
+  // `set device "iPhone 14"` is unaffected: it sets UA via CDP at runtime,
+  // not through this env var, so our injection doesn't fight device emulation.
+  if (env[USER_AGENT_ENV] !== undefined && env[USER_AGENT_ENV] !== '') return
+  if (hasUserAgentFlag(argv)) return
+  env[USER_AGENT_ENV] = defaultUa
+}
 export type DashboardIntent = 'start' | 'stop' | 'other'
 export function classifyDashboardCommand(argv: readonly string[]): DashboardIntent {
@@ -111,6 +154,7 @@ export type ShimOptions = {
   realBin?: string
   upstreamPort?: number
   spawn?: (cmd: string[]) => { exited: Promise<number> }
+  env?: Record<string, string | undefined>
 }
 export async function runShim(opts: ShimOptions = {}): Promise<number> {
@@ -118,6 +162,9 @@ export async function runShim(opts: ShimOptions = {}): Promise<number> {
   const realBin = opts.realBin ?? resolveRealAgentBrowserBin()
   const upstreamPort = opts.upstreamPort ?? AGENT_BROWSER_DASHBOARD_UPSTREAM_PORT
   const spawn = opts.spawn ?? defaultSpawn
+  const env = opts.env ?? process.env
+  injectUserAgentEnv(argv, env)
   const intent = classifyDashboardCommand(argv)
   if (intent !== 'start') {

package/src/bundled-plugins/backup/subagents.ts CHANGED Viewed

@@ -77,6 +77,8 @@ You MAY:
 - Set up a missing upstream branch via \`git push -u origin <branch>\` if it's clear that's the only issue.
 - Retry \`git push\` once after fixing a clear, narrow issue.
+**When you run \`git push\` (either to set upstream or to retry), the security plugin's \`gitExfil\` guard will block unless you pass \`acknowledgeGuards: { gitExfil: true }\` on the bash tool call.** You are running on the operator's behalf as the recovery step of a deterministic backup the operator already approved by enabling \`backup.pushToOrigin\`; the ack is justified for this specific retry path. Do NOT use this ack for any other reason — only the one push retry the rules above allow. If your push retry fails again, write the diagnosis and stop; do not ack a second push.
 You MUST NOT:
 - Force-push (\`--force\`, \`--force-with-lease\`).
 - Resolve merge conflicts by editing files. If a rebase had conflicts, the runner already aborted it. Leave the repo as-is and explain.

package/src/bundled-plugins/memory/README.md CHANGED Viewed

@@ -9,8 +9,8 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
 ```json
 {
   "memory": {
-    "idleMs": 10000,
-    "bufferBytes": 100000,
+    "idleMs": 60000,
+    "bufferBytes": 500000,
     "dreaming": { "schedule": "*/30 * * * *" }
   }
 }
@@ -18,8 +18,8 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
 | Field                      | Default            | Effect                                                                                                                                                                                                                                                                                                                                                             |
 | -------------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `memory.idleMs`            | `10000`            | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`.                                                                                                                                                                                                                                                                            |
-| `memory.bufferBytes`       | `100000`           | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run, even during continuous activity. `0` disables. Minimum `10000` when non-zero.                                                                                                                                                                          |
+| `memory.idleMs`            | `60000`            | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. Default bumped from `10000` to `60000` to reduce spawn churn during conversational sessions where the agent goes idle for short periods between rapid back-and-forth turns.                                                                                                |
+| `memory.bufferBytes`       | `500000`           | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run, even during continuous activity. `0` disables. Minimum `10000` when non-zero. Default bumped from `100000` to `500000` so a single conversational session stays within one memory-logger run unless it grows past ~half a megabyte of transcript.      |
 | `memory.dreaming`          | `{}` (cron job on) | Dreaming cron job is always registered. Override `schedule` to change when it fires.                                                                                                                                                                                                                                                                               |
 | `memory.dreaming.schedule` | `"*/30 * * * *"`   | Five-field cron expression. Defaults to every 30 minutes; fires short-circuit with zero LLM cost when nothing sits past the watermark, so frequent no-op fires are cheap and let sporadic agents still consolidate while alive (`src/cron/scheduler.ts` has no catchup for missed fires). Second-level schedules are rejected to avoid noisy no-op dreaming loops. |
@@ -27,18 +27,52 @@ All fields are **restart-required** — the plugin reads them once at boot.
 ## What it contributes
-| Kind     | Name                       | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| Subagent | `memory-logger`            | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file.                                                                                                                                                                                                                                                    |
-| Subagent | `dreaming`                 | Reads `MEMORY.md` plus undreamed daily-stream events, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. |
-| Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| Hook     | `session.idle`             | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`.                                                                                                                                                                                                                            |
-| Hook     | `session.end`              | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away).                                                                                                                                                                                                                                                                                                                                                           |
+| Kind     | Name                       | Notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| -------- | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Subagent | `memory-logger`            | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| Subagent | `dreaming`                 | Reads `MEMORY.md` plus undreamed daily-stream events, **rebalances** the existing topics using per-topic strength signals (citation count, distinct days, recency) injected into its user prompt, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. The runtime enforces a **citation-superset invariant** on every rewrite: a new MEMORY.md that drops any previously-cited fragment id is reverted to its pre-run bytes (dreamed-ids still advance so the run is not retried in a loop). |
+| Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+| Hook     | `session.idle`             | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| Hook     | `session.end`              | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 ## Memory injection
 The rendered `# Memory` section (MEMORY.md + undreamed daily-stream tails) is injected into every session's system prompt by core (`src/agent/index.ts` `createResourceLoader` → `loadMemory`), **not** by a plugin hook. It is appended as the last block of the system prompt, after `gitNudge`, so the most-volatile content (daily streams that grow after every memory-logger fire) sits at the bottom of the cache-suffix region. This way a memory change only invalidates the memory section itself rather than everything downstream of it.
+## Memory saturation (LTP/LTD analogue)
+MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Without a saturation policy it grows monotonically — every consolidated topic survives forever and citations accumulate across days. The dreaming subagent therefore treats MEMORY.md like human long-term memory: **repetition strengthens, lack of repetition saturates**.
+### How
+On every run the runtime computes per-topic strength signals from MEMORY.md's existing citations — `cites` (total), `days` (distinct calendar days those citations span), `last reinforced` (most recent citation date), `age (d)` (whole days since `last reinforced`). The numbers are derived by `src/bundled-plugins/memory/strength.ts` and rendered as a table at the top of the dreaming subagent's user prompt. There is no sidecar file, no schema version, no migration — strength is recomputed on every run from MEMORY.md alone.
+The subagent uses these numbers to:
+1. **Promote strong topics.** `days = 1` → tentative ("the user mentioned"). `days >= 3` → confident ("the user consistently"). `days >= 7` → declarative ("the user always"). Promotion is gated on distinct days, not raw citation count — five citations on one day is one debugging session, five citations across five days is a recurring pattern.
+2. **Merge near-duplicates.** Topics that overlap in subject matter get folded into one, with the merged topic's `fragments:` list as the **union** of the source topics' fragment ids.
+3. **Demote decayed topics.** A topic with `cites = 1, days = 1, age >= 30` (or `cites <= 3, days <= 2, age >= 60`) routes into a `## Historical observations` bucket as a one-line bullet. The fact is preserved in the summary, the citation is preserved (so daily-stream GC keeps the underlying fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Strong topics (`days >= 3`) are never demoted.
+**There is no hard-deletion path** in this iteration. The historical bucket grows monotonically; the subagent is explicitly told not to attempt quarter-summary collapses because the safety net (below) would revert them. If the bucket becomes inconveniently long in practice, a future runtime change will provide a structured drop mechanism — until then every demoted citation stays alive forever via its one-line bullet.
+### The citation-superset safety net
+After every dreaming run that rewrote MEMORY.md, `src/bundled-plugins/memory/citation-superset.ts` checks that the union of fragment ids cited in the NEW file is a superset of the union cited in the OLD file. If any previously-cited id is missing from the rewrite, the runtime:
+1. Restores MEMORY.md to its pre-run bytes via `writeFile(memoryFilePath, memoryTextBefore)`. The pre-run bytes are captured **before** `runSession` so the revert always has a clean source.
+2. Skips daily-stream fragment GC for this run (no fragments are dropped).
+3. Advances the dreamed-id set anyway — the **conscious anti-loop tradeoff**: this means the run's NEW undreamed fragments are orphaned (they survive in the daily JSONL forever, force-committed, but will not be re-shown to a future dreaming run and therefore never make it into MEMORY.md). The alternative (don't advance) would infinite-loop if the LLM keeps making the same mistake on the same inputs. The orphaned fragments are recoverable from git history (`git log memory/`) by a human operator.
+4. Logs a `[dreaming] citation-superset violation: …` warning naming the dropped ids and explicitly stating the orphaning tradeoff.
+**Revert-write failure path.** If the `writeFile` in step 1 itself throws (disk full, EACCES, MEMORY.md replaced by a directory by a buggy subagent, etc.), MEMORY.md is in an unknown state. The runtime then:
+- Skips the dreamed-id advance (so the next run gets a second chance at the same input).
+- Skips compaction (so no fragments are GC'd against an ambiguous citation set).
+- Skips the commit (so a known-bad on-disk state is not snapshotted).
+- Logs a `[dreaming] citation-superset violation AND revert failed: …` ERROR with the recovery command (`git checkout -- MEMORY.md && typeclaw restart`).
+The check exists because the daily-stream GC in `compactDailyStreams` drops any fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are the only thing that keeps a fragment alive past its first dreaming run — an omitted id means the underlying fragment would be permanently deleted on the next compaction.
 ## Files on disk
 - **`MEMORY.md`** — long-term memory. Created by the dreaming subagent on first run if absent. Force-committed by the runtime; `skip-worktree` flag is set so the human's `git status` stays clean.
@@ -66,9 +100,12 @@ In channel sessions, the agent rarely goes idle long enough to trip the timer be
 - `index.test.ts` — composition tests (config schema, hook wiring, debounce semantics, MEMORY.md auto-create).
 - `memory-logger.test.ts` — system prompt invariants, watermark handling.
-- `dreaming.test.ts` — orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants.
+- `dreaming.test.ts` — orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants, citation-superset safety net (revert on dropped id, dreamed-ids still advance, no-revert on legitimate merge, no-revert on first-ever run), saturation-prompt invariants (rebalance-every-run, promotion ladder, historical bucket, demotion thresholds, bucket overflow synthesis).
 - `dreaming-state.test.ts` — fail-open semantics on malformed state.
 - `watermark.test.ts` — marker parsing.
 - `append-tool.test.ts` — append-only semantics.
 - `src/bundled-plugins/guard/policies/skill-authoring.test.ts` — runtime skill authoring guard: path sandboxing, name validation, YAML frontmatter, and write/edit final-content validation.
 - `load-memory.test.ts` — memory section rendering, undreamed-tail filtering, watermark stripping.
+- `topics.test.ts` — citation-attributing parser (per-topic citation grouping for strength signals).
+- `strength.test.ts` — per-topic strength computation (distinct days, recency, age clamping) and markdown table rendering.
+- `citation-superset.test.ts` — the safety-net check (superset semantics, missing-id reporting, summary truncation).

package/src/bundled-plugins/memory/citation-superset.ts ADDED Viewed

@@ -0,0 +1,63 @@
+// Citation-superset safety net for the dreaming subagent's MEMORY.md
+// rewrite. After every dreaming run that touched MEMORY.md, we check that
+// the union of fragment ids cited in the NEW file is a superset of the
+// union cited in the OLD file. If any previously-cited id is missing from
+// the rewrite, the rewrite is rejected.
+//
+// Why this exists: the daily-stream GC in compactDailyStreams drops any
+// fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are
+// the only thing that keeps a fragment alive past its first dreaming run.
+// If the subagent rewrites MEMORY.md and accidentally omits a citation —
+// either by garbling a merged topic's fragments: list or by dropping a
+// topic entirely — the next compaction call permanently deletes the
+// underlying fragment from the daily JSONL. There is no recovery beyond
+// `git revert` of the snapshot commit, and even that loses anything the
+// agent wrote since.
+//
+// The subagent's new rule 5 explicitly allows merging topics and rewriting
+// conclusion paragraphs, with the requirement that the merged topic's
+// `fragments:` list is the union of its source topics'. The LLM can fail
+// to honor that — especially across hundreds of runs over months — so the
+// mechanical check is the safety floor.
+//
+// Detection only. The handler decides what to do with the verdict (revert
+// MEMORY.md to its pre-run bytes, skip daily-stream compaction, still
+// advance the dreamed-id set so we do not loop on the same fragments).
+import { parseCitations } from './citations'
+export type CitationSupersetVerdict = { ok: true } | { ok: false; missing: Array<{ date: string; fragmentId: string }> }
+// Compare the OLD MEMORY.md to the NEW MEMORY.md and report any
+// fragment id that the OLD cited and the NEW does not. Empty old text
+// (first-ever dreaming run, prior file missing) is treated as the empty
+// citation set — any new file passes by construction.
+export function checkCitationSuperset(oldText: string, newText: string): CitationSupersetVerdict {
+  const oldCitations = parseCitations(oldText)
+  if (oldCitations.size === 0) return { ok: true }
+  const newCitations = parseCitations(newText)
+  const missing: Array<{ date: string; fragmentId: string }> = []
+  const dates = [...oldCitations.keys()].sort()
+  for (const date of dates) {
+    const oldIds = oldCitations.get(date) ?? new Set<string>()
+    const newIds = newCitations.get(date) ?? new Set<string>()
+    const oldIdList = [...oldIds].sort()
+    for (const id of oldIdList) {
+      if (!newIds.has(id)) missing.push({ date, fragmentId: id })
+    }
+  }
+  return missing.length === 0 ? { ok: true } : { ok: false, missing }
+}
+// Pretty-print the verdict's missing ids for log output. Keeps the line
+// short by reporting count + first N ids; the full list is reconstructable
+// from MEMORY.md's git history if forensics are ever needed.
+export function summarizeMissingCitations(missing: ReadonlyArray<{ date: string; fragmentId: string }>): string {
+  const total = missing.length
+  const sample = missing.slice(0, 3).map((m) => `${m.date}#${m.fragmentId}`)
+  if (total <= 3) return sample.join(', ')
+  return `${sample.join(', ')} (+${total - 3} more)`
+}

package/src/bundled-plugins/memory/dreaming.ts CHANGED Viewed

@@ -1,4 +1,3 @@
-import { createHash } from 'node:crypto'
 import { existsSync } from 'node:fs'
 import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises'
 import { dirname, join } from 'node:path'
@@ -8,6 +7,7 @@ import { z } from 'zod'
 import { lsTool, readTool, type Subagent, writeTool } from '@/plugin'
 import { formatLocalDate, formatLocalDateTime } from '@/shared'
+import { checkCitationSuperset, summarizeMissingCitations } from './citation-superset'
 import { parseCitations } from './citations'
 import {
   addDreamedIds,
@@ -19,6 +19,7 @@ import {
 } from './dreaming-state'
 import type { StreamEvent } from './stream-events'
 import { readEvents, writeEventsAtomic } from './stream-io'
+import { computeTopicStrengths, renderTopicStrengthsTable, type TopicStrength } from './strength'
 const STREAM_FILE_PATTERN = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
@@ -207,12 +208,11 @@ async function loadCitedIds(agentDir: string): Promise<ReadonlyMap<string, Reado
   }
 }
-async function safeContentHash(path: string): Promise<string | null> {
+async function safeReadText(path: string): Promise<string> {
   try {
-    const raw = await readFile(path)
-    return createHash('sha256').update(raw).digest('hex')
+    return await readFile(path, 'utf8')
   } catch {
-    return null
+    return ''
   }
 }
@@ -501,11 +501,11 @@ fragments:
 The date in the prefix is the same as the filename you read the fragment from; the id after \`#\` is the full UUIDv7 from the event's \`id\` field. Do not abbreviate the id. Do not use line numbers — citations are id-based, not line-based, so daily streams can be compacted between dreaming runs without breaking your references.
-A fragment with no useful content (a watermark-only marker, a near-duplicate, a session-specific quirk that fails the generalizability bar) is discarded. Never invent fragments. Never cite a fragment id you did not see in the undreamed tail you actually read.
+A fragment with no useful content (a watermark-only marker, a near-duplicate, a session-specific quirk that fails the generalizability bar) is discarded. Never invent fragments. When you add a NEW citation, never cite a fragment id you did not see in the undreamed tail you actually read. EXISTING citations that are already in MEMORY.md (from prior dreaming runs, whose source fragments are no longer in the undreamed tail) must be preserved per rule 5 — they reference fragments still alive in already-consolidated daily streams.
 **4. Inherit the memory-logger's standards.** The memory-logger already filtered fragments using strict certainty rules (explicit / deductive / inductive). Your job is consolidation, not loosening the bar. If two fragments contradict, prefer the more recent. If a fragment is ambiguous in isolation but clarified by a later fragment, merge them under one topic. Never promote a single fragment from one day into a stable claim unless its certainty was already \`explicit\` or \`deductive\`.
-**5. Preserve existing MEMORY.md content.** MEMORY.md may already contain entries from prior dreaming runs. Fold new fragments into existing topics where they fit, or add new topics. Do not silently drop existing entries. If a new fragment contradicts an existing entry, replace the entry and update its fragment list. Existing fragment citations may reference dates whose streams are now fully consolidated; that is normal — leave them in place.
+**5. Rebalance every run. Preserve every fact and every cited fragment id.** MEMORY.md is a saturated surface (a fixed prompt-budget), not an append-only log — every run is consolidation, not just the runs that get new fragments. You may merge near-duplicate topics into one, fold weakly-reinforced topics into a parent or into the historical-observations bucket (see "Memory saturation" below), and rewrite verbose conclusion paragraphs more tightly. What you must NOT do: drop a fragment id. The merged topic's \`fragments:\` list is the **union** of its source topics' fragment ids. The daily-stream GC depends on MEMORY.md citations to keep evidence alive; an omitted id means the underlying fragment is permanently deleted on the next compaction. If two topics genuinely cover different facts, leave them separate — premature merging loses signal. If a new fragment contradicts an existing entry, replace the entry's conclusion paragraph and keep BOTH the old and new fragment ids in the citations list (the contradiction itself is evidence). The runtime cross-checks your rewrite against the prior MEMORY.md's citation set; a rewrite that drops a previously-cited id will be reverted and your run wasted.
 **6. Be concise.** Each topic conclusion is one short paragraph. No lists of preferences ("the user likes X, Y, Z"). One topic per concept. If a topic only earned one fragment and the fragment was already small, you may copy its conclusion verbatim — do not pad.
@@ -532,7 +532,40 @@ fragments:
 The first line is always \`# Memory\`. Topics are level-2 headings. No other top-level structure.
-# Muscle memory (skills, CLIs, plugins)
+# Memory saturation
+MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Treat it like human long-term memory: **repetition strengthens, lack of repetition saturates**. The runtime gives you per-topic strength signals at the top of the user prompt — a table with \`cites\` (total citation count), \`days\` (distinct calendar days those citations span), \`last reinforced\`, and \`age (d)\`. Use these numbers to decide what to do with each existing topic on this run. \`days\` is the load-bearing signal: five citations all on one day means a single debugging session that mentioned the same thing five times (a transient burst); five citations across five days means a recurring fact the user keeps coming back to (a stable signal).
+## Strength tiers and promotion ladder
+Pick the wording in each conclusion paragraph from the topic's \`days\` count:
+- **\`days = 1\` — "mentioned":** the topic was observed in one session. Conclusion uses tentative language ("the user mentioned X in the context of Y"). Single-fragment one-day topics that are not reinforced on subsequent runs are demotion candidates (see below).
+- **\`days = 2\` — "observed":** seen twice, on different days. Still tentative — could be a recurring quirk, could be coincidence.
+- **\`days >= 3\` — "consistently":** the topic has been reinforced across at least three distinct days. Conclusion uses confident language ("the user consistently prefers X", "the user's pattern is Y"). Strong enough to anchor near the top of MEMORY.md.
+- **\`days >= 7\` — "always":** seen across at least seven distinct days. Conclusion uses declarative language ("the user always X", "Y is the user's standard"). These are the load-bearing topics; protect them from accidental merges.
+Promotion is gated on \`days\`, not on \`cites\`. A topic with \`cites = 12, days = 1\` is still "mentioned" — twelve citations in one debugging session is one event, not twelve. Order MEMORY.md so the strongest topics come first; weaker topics drift toward the bottom.
+## Demotion and the historical-observations bucket
+When a topic's \`days\` count is low AND \`age (d)\` is high (the user has not come back to it in weeks), it is decayed. Do not delete — **demote**. The bucket is a single topic, always last in MEMORY.md, with this exact shape:
+\`\`\`
+## Historical observations
+- yyyy-MM-dd: one-line summary of what was observed — memory/yyyy-MM-dd#<id>
+- yyyy-MM-dd: one-line summary of what was observed — memory/yyyy-MM-dd#<id>
+\`\`\`
+Each former topic becomes one bullet. The fact is preserved (in the summary), the citation is preserved (so daily-stream GC keeps the fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Demotion candidates: a topic with \`cites = 1, days = 1, age >= 30\`, OR a topic with \`cites <= 3, days <= 2, age >= 60\`. Strong topics (\`days >= 3\`) are not demoted regardless of age — they stayed reinforced when they were active, so they earned their place.
+When you demote a topic, take its conclusion paragraph and compress it into one short summary sentence for the bullet. Keep the citation date prefix (\`yyyy-MM-dd:\`) so the bullet stays sortable and grep-able. The summary is your last chance to write a useful sentence about this fact — the next time the agent reads MEMORY.md, this bullet is all there is.
+The bucket grows monotonically: there is **no hard-deletion path**, no quarter-level synthesis, no removal of old bullets. Every demoted citation stays alive forever via its one-line bullet. The runtime safety net rejects any rewrite that drops a previously-cited fragment id, so attempting to collapse old bullets into a summary will be reverted and your run wasted. If the bucket becomes inconveniently long, that is a problem for a future runtime change to address — not something you can resolve from inside a dreaming run.
+## When MEMORY.md has no strength table
+A first-ever run sees no existing topics, so the strength table is omitted. In that case the saturation rules above do not apply yet — just consolidate the new fragments into fresh topics. The strength signals start appearing on the second run.
 While you read the streams, watch for **repeated multi-step procedures** the user has guided the main agent through. When you have evidence (across multiple fragments, ideally across multiple days) that the same procedure keeps happening the same way, you have three response shapes available — pick the smallest one that fits.
@@ -620,8 +653,8 @@ Do not suggest CLIs or plugins speculatively. The same recurrence + generalizabi
 1. \`read\` MEMORY.md (it may not exist — that is fine, you start from empty).
 2. For each JSONL daily stream undreamed-tail entry the user message lists, \`read\` the file with \`offset\` set to the first undreamed line. Read every undreamed tail before you start writing, then focus on fragment events' \`topic\` + \`body\` fields.
-3. Reason about what to consolidate. Most fragments will collapse into existing topics or be dropped as already-known / not generalizable.
-4. \`write\` the full new contents of MEMORY.md in one call (only if anything changed). \`write\` overwrites; that is the point — MEMORY.md is the single canonical artifact you produce.
+3. Reason about what to consolidate AND about how to rebalance existing topics using the strength signals at the top of the user prompt. Most fragments will collapse into existing topics or be dropped as already-known / not generalizable. Most existing topics will keep their shape; a few merge candidates and a few demotion candidates will surface every run.
+4. \`write\` the full new contents of MEMORY.md in one call. Even if no new fragments earned promotion, a rebalance pass (merging two near-duplicates, demoting a single weak old topic) is still a productive run. \`write\` overwrites; that is the point — MEMORY.md is the single canonical artifact you produce. Remember: every fragment id cited in the previous MEMORY.md must still appear somewhere in the new file (in its same topic, in a merged topic, OR in the historical-observations bucket). The runtime enforces this mechanically and will revert your rewrite if you drop an id.
 5. Decide whether any procedure in the new fragments meets the muscle-memory bar above, and which of the three forms fits.
    - **Form A (skill):** \`ls\` \`memory/skills/\` to see what already exists, \`read\` any candidate's existing \`SKILL.md\` if you might be refining it, then \`write\` the new or refined skill at \`memory/skills/<name>/SKILL.md\` with the frontmatter shape shown above.
    - **Form B (CLI suggestion) or Form C (plugin suggestion):** add a topic to MEMORY.md with the \`proposal:\` line shown above. The CLI/plugin itself is the main agent's responsibility — you do not write under \`packages/\`. Before adding the topic, check the existing MEMORY.md you just read so you do not duplicate a suggestion that's already there.
@@ -630,9 +663,9 @@ Do not suggest CLIs or plugins speculatively. The same recurrence + generalizabi
 # Doing nothing is a valid outcome
-If the undreamed tails contain only watermarks, or every new fragment is already represented in MEMORY.md and no procedure clears the muscle-memory bar, do not rewrite MEMORY.md and do not write a skill just to touch something. Stop without writing. The point of dreaming is consolidation, not activity. The runtime advances the watermark either way.`
+If the undreamed tails contain only watermarks, AND no procedure clears the muscle-memory bar, AND every existing topic looks well-shaped at its current strength (no obvious merge or demotion candidates), do not rewrite MEMORY.md and do not write a skill just to touch something. Stop without writing. The point of dreaming is consolidation, not activity. The runtime advances the watermark either way. But: if there ARE new fragments, or if the strength table shows topics that should clearly merge or demote, the run is productive even without skill activity — rebalancing IS work.`
-function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[]): string {
+function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[], strengths: TopicStrength[]): string {
   const today = formatLocalDate()
   const memoryFile = join(payload.agentDir, 'MEMORY.md')
   const memoryDir = join(payload.agentDir, 'memory')
@@ -642,9 +675,22 @@ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[
     `Daily stream directory: ${memoryDir}`,
     `Today's local date: ${today}`,
     `Dreaming state: ${join(payload.agentDir, DREAMING_STATE_FILE)}`,
+  ]
+  const strengthTable = renderTopicStrengthsTable(strengths)
+  if (strengthTable.length > 0) {
+    lines.push(
+      '',
+      'Existing MEMORY.md topic strengths (computed from current citations — `cites` is total citation count, `days` is the number of distinct calendar days those citations span, `last reinforced` is the most recent citation date, `age (d)` is whole days since `last reinforced` relative to today). These numbers describe how reinforced each existing topic is; the dreaming system prompt explains how to use them.',
+      '',
+      strengthTable,
+    )
+  }
+  lines.push(
     '',
     'Undreamed fragments to consolidate. Each entry lists the daily JSONL file and the ids of fragments in that file you have not yet consolidated into MEMORY.md. Read the file, locate each id, and decide what (if anything) belongs in MEMORY.md. Cite by id (memory/yyyy-MM-dd#<id>), not by line number.',
-  ]
+  )
   for (const snap of snapshots) {
     lines.push('', `- memory/${snap.filename}:`)
     for (const id of snap.undreamedIds) lines.push(`    - ${id}`)
@@ -656,6 +702,15 @@ function buildInitialPrompt(payload: DreamingPayload, snapshots: StreamSnapshot[
   return lines.join('\n')
 }
+async function loadTopicStrengths(agentDir: string): Promise<TopicStrength[]> {
+  try {
+    const raw = await readFile(join(agentDir, 'MEMORY.md'), 'utf8')
+    return computeTopicStrengths(raw, formatLocalDate())
+  } catch {
+    return []
+  }
+}
 export type CreateDreamingSubagentOptions = {
   commitMemory?: (cwd: string) => Promise<void>
   logger?: DreamingLogger
@@ -689,18 +744,51 @@ export function createDreamingSubagent(options: CreateDreamingSubagentOptions =
       )
       const memoryFilePath = join(ctx.payload.agentDir, 'MEMORY.md')
-      const memoryHashBefore = await safeContentHash(memoryFilePath)
+      const memoryTextBefore = await safeReadText(memoryFilePath)
+      const strengths = await loadTopicStrengths(ctx.payload.agentDir)
       try {
-        await runSession({ userPrompt: buildInitialPrompt(ctx.payload, snapshots.undreamed) })
+        await runSession({ userPrompt: buildInitialPrompt(ctx.payload, snapshots.undreamed, strengths) })
       } catch (err) {
         const message = err instanceof Error ? err.message : String(err)
         logger.warn(`[dreaming] run threw: ${message} elapsed_ms=${Date.now() - start}`)
         throw err
       }
-      const memoryHashAfter = await safeContentHash(memoryFilePath)
-      const memoryRewrittenThisRun = memoryHashBefore !== memoryHashAfter
+      const memoryTextAfter = await safeReadText(memoryFilePath)
+      let memoryRewrittenThisRun = memoryTextBefore !== memoryTextAfter
+      // Citation-superset safety net: if the subagent's rewrite dropped any
+      // previously-cited fragment id, restore the pre-run bytes and turn
+      // fragment GC off so the next compactDailyStreams call does not
+      // permanently delete the underlying fragment. Dreamed-ids still
+      // advance on a successful revert: this run's UNDREAMED fragments are
+      // orphaned (they survive in the daily JSONL but never make it into
+      // MEMORY.md), which is the conscious tradeoff for avoiding an
+      // infinite loop on the same undreamed input. If the revert WRITE
+      // itself fails — disk full, EACCES, etc. — MEMORY.md is in an
+      // unknown state: we cannot advance dreamed-ids (next run must
+      // re-attempt), cannot run compaction (citations are now ambiguous),
+      // and cannot commit (would snapshot a known-bad state). The user has
+      // to `git checkout MEMORY.md` and re-run.
+      if (memoryRewrittenThisRun) {
+        const verdict = checkCitationSuperset(memoryTextBefore, memoryTextAfter)
+        if (!verdict.ok) {
+          try {
+            await writeFile(memoryFilePath, memoryTextBefore)
+          } catch (err) {
+            const message = err instanceof Error ? err.message : String(err)
+            logger.error(
+              `[dreaming] citation-superset violation AND revert failed: ${message}. MEMORY.md is in an unknown state; not advancing dreamed-ids or running compaction. Recover with: git checkout -- MEMORY.md && typeclaw restart. missing=${summarizeMissingCitations(verdict.missing)} elapsed_ms=${Date.now() - start}`,
+            )
+            return
+          }
+          memoryRewrittenThisRun = false
+          logger.warn(
+            `[dreaming] citation-superset violation: rewrite dropped ${verdict.missing.length} previously-cited id(s); reverted MEMORY.md. The undreamed fragments from THIS run are orphaned: they advance into the dreamed-id set (survive in the daily JSONL, will not be re-shown to a future dreaming run) — conscious anti-loop tradeoff. missing=${summarizeMissingCitations(verdict.missing)}`,
+          )
+        }
+      }
       const advanced = advanceDreamedIds(state, snapshots.undreamed)
       await saveDreamingState(ctx.payload.agentDir, advanced)