typeclaw 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +15 -9
  2. package/package.json +5 -3
  3. package/scripts/dump-system-prompt.ts +12 -1
  4. package/scripts/require-parallel.ts +41 -0
  5. package/src/agent/auth.ts +3 -3
  6. package/src/agent/index.ts +116 -14
  7. package/src/agent/live-sessions.ts +34 -0
  8. package/src/agent/multimodal/read-redirect.ts +43 -0
  9. package/src/agent/plugin-tools.ts +97 -13
  10. package/src/agent/session-meta.ts +21 -2
  11. package/src/agent/session-origin.ts +6 -13
  12. package/src/agent/subagent-completion-reminder.ts +89 -0
  13. package/src/agent/subagents.ts +3 -2
  14. package/src/agent/system-prompt.ts +49 -15
  15. package/src/bundled-plugins/explorer/explorer.ts +2 -2
  16. package/src/bundled-plugins/guard/index.ts +14 -1
  17. package/src/bundled-plugins/guard/policies/managed-config.ts +43 -13
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +37 -0
  19. package/src/bundled-plugins/guard/policies/memory-topics-delete.ts +67 -0
  20. package/src/bundled-plugins/guard/policies/memory-topics-write.ts +33 -0
  21. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -2
  22. package/src/bundled-plugins/guard/policy.ts +7 -0
  23. package/src/bundled-plugins/memory/README.md +76 -62
  24. package/src/bundled-plugins/memory/append-tool.ts +3 -2
  25. package/src/bundled-plugins/memory/citation-superset.ts +49 -11
  26. package/src/bundled-plugins/memory/citations.ts +19 -8
  27. package/src/bundled-plugins/memory/delete-tool.ts +57 -0
  28. package/src/bundled-plugins/memory/dreaming-state.ts +1 -1
  29. package/src/bundled-plugins/memory/dreaming.ts +364 -146
  30. package/src/bundled-plugins/memory/frontmatter.ts +165 -0
  31. package/src/bundled-plugins/memory/index.ts +236 -16
  32. package/src/bundled-plugins/memory/injection-plan.ts +15 -0
  33. package/src/bundled-plugins/memory/load-memory.ts +102 -103
  34. package/src/bundled-plugins/memory/load-shards.ts +156 -0
  35. package/src/bundled-plugins/memory/memory-logger.ts +16 -15
  36. package/src/bundled-plugins/memory/memory-retrieval.ts +105 -0
  37. package/src/bundled-plugins/memory/migration.ts +282 -1
  38. package/src/bundled-plugins/memory/paths.ts +42 -0
  39. package/src/bundled-plugins/memory/search-tool.ts +232 -0
  40. package/src/bundled-plugins/memory/secret-detector.ts +2 -2
  41. package/src/bundled-plugins/memory/shard-snapshot.ts +51 -0
  42. package/src/bundled-plugins/memory/slug.ts +59 -0
  43. package/src/bundled-plugins/memory/stream-io.ts +110 -1
  44. package/src/bundled-plugins/memory/strength.ts +3 -3
  45. package/src/bundled-plugins/memory/topics.ts +70 -16
  46. package/src/bundled-plugins/security/index.ts +24 -0
  47. package/src/bundled-plugins/security/permissions.ts +4 -0
  48. package/src/bundled-plugins/security/policies/cron-promotion.ts +349 -0
  49. package/src/bundled-plugins/security/policies/git-exfil.ts +2 -0
  50. package/src/bundled-plugins/security/policies/prompt-injection.ts +3 -0
  51. package/src/bundled-plugins/security/policies/role-promotion.ts +419 -0
  52. package/src/bundled-plugins/security/policies/system-prompt-leak.ts +1 -0
  53. package/src/channels/adapters/discord-bot-slash-commands.ts +186 -0
  54. package/src/channels/adapters/discord-bot.ts +163 -1
  55. package/src/channels/adapters/kakaotalk-attachment.ts +7 -17
  56. package/src/channels/adapters/kakaotalk.ts +64 -37
  57. package/src/channels/adapters/slack-bot-classify.ts +2 -27
  58. package/src/channels/adapters/slack-bot-slash-commands.ts +82 -0
  59. package/src/channels/adapters/slack-bot.ts +139 -1
  60. package/src/channels/index.ts +5 -0
  61. package/src/channels/router.ts +328 -18
  62. package/src/channels/subagent-completion-bridge.ts +84 -0
  63. package/src/cli/builtins.ts +1 -0
  64. package/src/cli/index.ts +1 -0
  65. package/src/cli/init.ts +122 -14
  66. package/src/cli/inspect.ts +151 -0
  67. package/src/cli/role.ts +7 -2
  68. package/src/cli/tunnel.ts +13 -1
  69. package/src/cli/ui.ts +25 -1
  70. package/src/config/index.ts +1 -0
  71. package/src/config/models-mutation.ts +10 -2
  72. package/src/cron/consumer.ts +1 -1
  73. package/src/init/dockerfile.ts +353 -2
  74. package/src/init/hatching.ts +5 -6
  75. package/src/init/kakaotalk-auth.ts +6 -47
  76. package/src/init/validate-api-key.ts +121 -0
  77. package/src/inspect/index.ts +213 -0
  78. package/src/inspect/label.ts +50 -0
  79. package/src/inspect/live.ts +221 -0
  80. package/src/inspect/render.ts +163 -0
  81. package/src/inspect/replay.ts +265 -0
  82. package/src/inspect/session-list.ts +160 -0
  83. package/src/inspect/types.ts +110 -0
  84. package/src/plugin/hooks.ts +23 -1
  85. package/src/plugin/index.ts +2 -0
  86. package/src/plugin/manager.ts +1 -1
  87. package/src/plugin/registry.ts +1 -1
  88. package/src/plugin/types.ts +10 -0
  89. package/src/run/channel-session-factory.ts +7 -1
  90. package/src/run/index.ts +87 -21
  91. package/src/secrets/kakao-renewal.ts +3 -47
  92. package/src/server/index.ts +241 -60
  93. package/src/shared/index.ts +4 -1
  94. package/src/shared/local-time.ts +17 -0
  95. package/src/shared/protocol.ts +49 -0
  96. package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +9 -9
  97. package/src/skills/typeclaw-claude-code/SKILL.md +83 -40
  98. package/src/skills/typeclaw-claude-code/references/stop-hook.md +2 -0
  99. package/src/skills/typeclaw-claude-code/references/tmux-driving.md +102 -16
  100. package/src/skills/typeclaw-config/SKILL.md +38 -33
  101. package/src/skills/typeclaw-cron/SKILL.md +1 -1
  102. package/src/skills/typeclaw-git/SKILL.md +2 -2
  103. package/src/skills/typeclaw-memory/SKILL.md +16 -163
  104. package/src/skills/typeclaw-permissions/SKILL.md +2 -2
  105. package/src/skills/typeclaw-plugins/SKILL.md +26 -15
  106. package/src/test-helpers/wait-for.ts +7 -1
  107. package/typeclaw.schema.json +7 -0
@@ -0,0 +1,37 @@
1
+ import path from 'node:path'
2
+
3
+ import type { SessionOrigin } from '@/agent/session-origin'
4
+ import type { SecuritySeverity } from '@/bundled-plugins/security/permissions'
5
+
6
+ export const GUARD_MEMORY_RETRIEVAL_CACHE_WRITE = 'memoryRetrievalCacheWrite'
7
+ export const GUARD_MEMORY_RETRIEVAL_CACHE_WRITE_SEVERITY: SecuritySeverity = 'low'
8
+
9
+ const SESSION_ID_REGEX = /^[A-Za-z0-9._-]{1,128}$/
10
+
11
+ export async function isMemoryRetrievalCacheWriteAllowed(options: {
12
+ tool: string
13
+ args: Record<string, unknown>
14
+ agentDir: string
15
+ origin?: SessionOrigin
16
+ }): Promise<boolean> {
17
+ const { tool, args, agentDir, origin } = options
18
+ if (tool !== 'write') return false
19
+ if (origin?.kind !== 'subagent' || origin.subagent !== 'memory-retrieval') return false
20
+
21
+ const rawPath = args.path
22
+ if (typeof rawPath !== 'string') return false
23
+
24
+ const targetPath = path.resolve(agentDir, rawPath)
25
+ const expectedDir = path.resolve(agentDir, 'memory', '.retrieval-cache')
26
+ const relative = path.relative(expectedDir, targetPath)
27
+ if (relative === '' || relative.startsWith('..') || path.isAbsolute(relative)) return false
28
+
29
+ const parts = relative.split(path.sep).filter(Boolean)
30
+ if (parts.length !== 1) return false
31
+
32
+ const fileName = parts[0]!
33
+ if (!fileName.endsWith('.md')) return false
34
+
35
+ const sessionId = fileName.slice(0, -3)
36
+ return SESSION_ID_REGEX.test(sessionId)
37
+ }
@@ -0,0 +1,67 @@
1
+ import path from 'node:path'
2
+
3
+ import type { SessionOrigin } from '@/agent/session-origin'
4
+ import { SLUG_REGEX } from '@/bundled-plugins/memory/slug'
5
+ import type { SecuritySeverity } from '@/bundled-plugins/security/permissions'
6
+
7
+ import type { GuardBlock } from '../policy'
8
+
9
+ export const GUARD_MEMORY_TOPICS_DELETE = 'memoryTopicsDelete'
10
+
11
+ export const GUARD_MEMORY_TOPICS_DELETE_SEVERITY: SecuritySeverity = 'medium'
12
+
13
+ export function checkMemoryTopicsDeleteGuard(options: {
14
+ tool: string
15
+ args: Record<string, unknown>
16
+ agentDir: string
17
+ origin?: SessionOrigin
18
+ }): GuardBlock | undefined {
19
+ const { tool, args, agentDir, origin } = options
20
+
21
+ if (tool !== 'delete_topic_shard') return undefined
22
+
23
+ const rawPath = args.path
24
+ if (typeof rawPath !== 'string') {
25
+ return block(tool, 'path argument must be a string')
26
+ }
27
+
28
+ if (origin?.kind !== 'subagent' || origin.subagent !== 'dreaming') {
29
+ return block(tool, 'only the dreaming subagent may delete topic shards')
30
+ }
31
+
32
+ if (rawPath.includes('\\')) {
33
+ return block(tool, 'path must use POSIX separators under memory/topics/')
34
+ }
35
+
36
+ const targetPath = path.resolve(agentDir, rawPath)
37
+ const topicsDir = path.resolve(agentDir, 'memory', 'topics')
38
+ const relative = path.relative(topicsDir, targetPath)
39
+
40
+ if (relative === '' || relative.startsWith('..') || path.isAbsolute(relative)) {
41
+ return block(tool, `path must be a direct child of memory/topics/: ${targetPath}`)
42
+ }
43
+
44
+ const parts = relative.split(path.sep).filter(Boolean)
45
+ if (parts.length !== 1) {
46
+ return block(tool, `path must be a single .md file inside memory/topics/: ${targetPath}`)
47
+ }
48
+
49
+ const fileName = parts[0]!
50
+ if (!fileName.endsWith('.md')) {
51
+ return block(tool, `path must be a single .md file inside memory/topics/: ${targetPath}`)
52
+ }
53
+
54
+ const slug = fileName.slice(0, -3)
55
+ if (!SLUG_REGEX.test(slug)) {
56
+ return block(tool, `slug must match ${SLUG_REGEX}: ${slug}`)
57
+ }
58
+
59
+ return undefined
60
+ }
61
+
62
+ function block(tool: string, reason: string): GuardBlock {
63
+ return {
64
+ block: true,
65
+ reason: `Guard \`${GUARD_MEMORY_TOPICS_DELETE}\` blocked ${tool}: ${reason}.`,
66
+ }
67
+ }
@@ -0,0 +1,33 @@
1
+ import path from 'node:path'
2
+
3
+ import type { SessionOrigin } from '@/agent/session-origin'
4
+ import { SLUG_REGEX } from '@/bundled-plugins/memory/slug'
5
+
6
+ export async function isMemoryTopicsWriteAllowed(options: {
7
+ tool: string
8
+ args: Record<string, unknown>
9
+ agentDir: string
10
+ origin?: SessionOrigin
11
+ }): Promise<boolean> {
12
+ if (options.tool !== 'write') return false
13
+
14
+ const { origin } = options
15
+ if (!origin || origin.kind !== 'subagent' || origin.subagent !== 'dreaming') return false
16
+
17
+ const rawPath = options.args.path
18
+ if (typeof rawPath !== 'string') return false
19
+
20
+ const target = path.resolve(options.agentDir, rawPath)
21
+ const expectedDir = path.resolve(options.agentDir, 'memory', 'topics')
22
+ const rel = path.relative(expectedDir, target)
23
+ if (rel === '' || rel.startsWith('..') || path.isAbsolute(rel)) return false
24
+
25
+ const parts = rel.split(path.sep).filter(Boolean)
26
+ const fileName = parts[0]
27
+ if (parts.length !== 1 || !fileName || !fileName.endsWith('.md')) return false
28
+
29
+ const slug = fileName.slice(0, -3)
30
+ if (!SLUG_REGEX.test(slug)) return false
31
+
32
+ return true
33
+ }
@@ -1,7 +1,11 @@
1
1
  import { realpath } from 'node:fs/promises'
2
2
  import path from 'node:path'
3
3
 
4
+ import type { SessionOrigin } from '@/agent/session-origin'
5
+
4
6
  import { ACKNOWLEDGE_GUARDS, type GuardBlock, isGuardAcknowledged } from '../policy'
7
+ import { isMemoryRetrievalCacheWriteAllowed } from './memory-retrieval-cache-write'
8
+ import { isMemoryTopicsWriteAllowed } from './memory-topics-write'
5
9
  import { isSkillAuthoringAllowed } from './skill-authoring'
6
10
 
7
11
  export const GUARD_NON_WORKSPACE_WRITE = 'nonWorkspaceWrite'
@@ -9,7 +13,6 @@ export const GUARD_NON_WORKSPACE_WRITE = 'nonWorkspaceWrite'
9
13
  const AGENT_ROOT_WRITE_ALLOWLIST = new Set([
10
14
  'AGENTS.md',
11
15
  'IDENTITY.md',
12
- 'MEMORY.md',
13
16
  'SOUL.md',
14
17
  'USER.md',
15
18
  'cron.json',
@@ -28,8 +31,9 @@ export async function checkNonWorkspaceWriteGuard(options: {
28
31
  tool: string
29
32
  args: Record<string, unknown>
30
33
  agentDir: string
34
+ origin?: SessionOrigin
31
35
  }): Promise<GuardBlock | undefined> {
32
- const { tool, args, agentDir } = options
36
+ const { tool, args, agentDir, origin } = options
33
37
  if (tool !== 'write' && tool !== 'edit') return undefined
34
38
 
35
39
  const rawPath = args.path
@@ -42,6 +46,8 @@ export async function checkNonWorkspaceWriteGuard(options: {
42
46
  resolveRealIntendedPath(workspacePath),
43
47
  ])
44
48
  if (await isSkillAuthoringAllowed({ tool, args, agentDir })) return undefined
49
+ if (await isMemoryRetrievalCacheWriteAllowed({ tool, args, agentDir, origin })) return undefined
50
+ if (await isMemoryTopicsWriteAllowed({ tool, args, agentDir, origin })) return undefined
45
51
  if (await isAllowedAgentRootWrite(agentDir, targetPath, realTargetPath)) return undefined
46
52
  if (isInside(realWorkspacePath, realTargetPath)) return undefined
47
53
  if (isGuardAcknowledged(args, GUARD_NON_WORKSPACE_WRITE)) return undefined
@@ -16,4 +16,11 @@ export {
16
16
  checkSkillAuthoringGuard,
17
17
  isSkillAuthoringAllowed,
18
18
  } from './policies/skill-authoring'
19
+ export { GUARD_MEMORY_TOPICS_DELETE, checkMemoryTopicsDeleteGuard } from './policies/memory-topics-delete'
20
+ export {
21
+ GUARD_MEMORY_RETRIEVAL_CACHE_WRITE,
22
+ GUARD_MEMORY_RETRIEVAL_CACHE_WRITE_SEVERITY,
23
+ isMemoryRetrievalCacheWriteAllowed,
24
+ } from './policies/memory-retrieval-cache-write'
25
+ export { isMemoryTopicsWriteAllowed } from './policies/memory-topics-write'
19
26
  export { GUARD_UNCOMMITTED_CHANGES, checkUncommittedChangesAdvice } from './policies/uncommitted-changes'
@@ -1,8 +1,8 @@
1
1
  # typeclaw-plugin-memory
2
2
 
3
- The bundled memory plugin. Owns `MEMORY.md` (long-term memory) and `memory/yyyy-MM-dd.jsonl` (daily streams) plus the two subagents that write them: `memory-logger` and `dreaming`.
3
+ The bundled memory plugin. Owns `memory/topics/` (sharded long-term memory) and `memory/streams/yyyy-MM-dd.jsonl` (daily fragment streams) plus three subagents that read and write them: `memory-logger`, `dreaming`, `memory-retrieval`.
4
4
 
5
- This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]` entry to add and no opt-out. To configure it, add a `memory` block to `typeclaw.json`.
5
+ Auto-loaded by every TypeClaw agent. No `plugins[]` entry to add and no opt-out. Configure via the `memory` block in `typeclaw.json`.
6
6
 
7
7
  ## Config
8
8
 
@@ -11,101 +11,115 @@ This plugin is **auto-loaded** by every TypeClaw agent. There is no `plugins[]`
11
11
  "memory": {
12
12
  "idleMs": 60000,
13
13
  "bufferBytes": 500000,
14
+ "injectionBudgetBytes": 16384,
14
15
  "dreaming": { "schedule": "*/30 * * * *" }
15
16
  }
16
17
  }
17
18
  ```
18
19
 
19
- | Field | Default | Effect |
20
- | -------------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
21
- | `memory.idleMs` | `60000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. Default bumped from `10000` to `60000` to reduce spawn churn during conversational sessions where the agent goes idle for short periods between rapid back-and-forth turns. |
22
- | `memory.bufferBytes` | `500000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run, even during continuous activity. `0` disables. Minimum `10000` when non-zero. Default bumped from `100000` to `500000` so a single conversational session stays within one memory-logger run unless it grows past ~half a megabyte of transcript. |
23
- | `memory.dreaming` | `{}` (cron job on) | Dreaming cron job is always registered. Override `schedule` to change when it fires. |
24
- | `memory.dreaming.schedule` | `"*/30 * * * *"` | Five-field cron expression. Defaults to every 30 minutes; fires short-circuit with zero LLM cost when nothing sits past the watermark, so frequent no-op fires are cheap and let sporadic agents still consolidate while alive (`src/cron/scheduler.ts` has no catchup for missed fires). Second-level schedules are rejected to avoid noisy no-op dreaming loops. |
20
+ | Field | Default | Effect |
21
+ | ----------------------------- | ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
22
+ | `memory.idleMs` | `60000` | Debounce window before `memory-logger` spawns after a prompt completes. Minimum `1000`. |
23
+ | `memory.bufferBytes` | `500000` | Size-based ceiling: spawns `memory-logger` when the transcript grows by this many bytes since the last run. `0` disables. Minimum `10000` when non-zero. |
24
+ | `memory.injectionBudgetBytes` | `16384` | Total shard-body budget for direct-mode memory injection. Above this, `loadMemory` switches to index-mode (headings + metadata only) and the agent must call `memory_search` to fetch specific topics or recent stream events. Minimum `4096`. |
25
+ | `memory.dreaming.schedule` | `"*/30 * * * *"` | Five-field cron expression for the dreaming subagent. |
25
26
 
26
27
  All fields are **restart-required** — the plugin reads them once at boot.
27
28
 
28
29
  ## What it contributes
29
30
 
30
- | Kind | Name | Notes |
31
- | -------- | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
32
- | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/<today>.jsonl`. Coalesced per `agentDir`; the plugin chains spawn calls onto a per-agent Promise so two concurrent channel sessions never race on the same daily stream file. |
33
- | Subagent | `dreaming` | Reads `MEMORY.md` plus undreamed daily-stream events, **rebalances** the existing topics using per-topic strength signals (citation count, distinct days, recency) injected into its user prompt, rewrites `MEMORY.md` with `memory/yyyy-MM-dd#<fragment-id>` citations, optionally writes muscle-memory skills under `memory/skills/<name>/SKILL.md`, advances the per-day dreamed-id set, **compacts daily streams** by dropping superseded watermarks and dreamed-but-uncited fragments, then commits the result with a summary message (`dream: <summary> <emoji>`, e.g. `dream: 3 fragments + new skill 'pr-review' 🔮`). Coalesced per `agentDir`. The runtime enforces a **citation-superset invariant** on every rewrite: a new MEMORY.md that drops any previously-cited fragment id is reverted to its pre-run bytes (dreamed-ids still advance so the run is not retried in a loop). |
34
- | Cron job | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
35
- | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Resets a `setTimeout(idleMs)` on every event; on fire, calls `ctx.spawnSubagent('memory-logger', ...)`. Also `fs.stat`s the transcript on every event and spawns immediately when growth since the last run reaches `bufferBytes`. |
36
- | Hook | `session.end` | Cancels the debounce timer and immediately spawns `memory-logger` (so the final transcript is captured even when the user disconnects right away). |
31
+ | Kind | Name | Notes |
32
+ | -------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
33
+ | Subagent | `memory-logger` | Reads a parent transcript past a watermark and appends fragments to `memory/streams/<today>.jsonl`. Coalesced per `agentDir`. |
34
+ | Subagent | `dreaming` | Reads shards under `memory/topics/` plus undreamed daily-stream events and rebalances the topic shards. Coalesced per `agentDir`. Citation-superset invariant enforced on every run. |
35
+ | Subagent | `memory-retrieval` | On `session.turn.start` when injection plan is `index` mode, reads the user's actual prompt for this turn + shard listing, writes a focused summary to `memory/.retrieval-cache/<sessionId>.md`. Coalesced per `parentSessionId`. |
36
+ | Tool | `memory_search` | Main-agent tool. Substring/regex search across BOTH topic shards (slugs, frontmatter, bodies) and undreamed daily-stream events (fragment topic/body, legacy prose). Results are discriminated by `source: "topic" \| "stream"`; topics come first, then streams newest-first. |
37
+ | Tool | `delete_topic_shard` | Subagent-only (dreaming). Deletes a topic shard at `memory/topics/<slug>.md`. Path-guarded. |
38
+ | Cron | `__plugin_memory_dreaming` | `kind: 'prompt'`, `subagent: 'dreaming'`, scheduled per `memory.dreaming.schedule`. |
39
+ | Hook | `session.idle` | Per-session debouncer with size-based ceiling. Spawns `memory-logger` on idle or buffer-trip. |
40
+ | Hook | `session.end` | Spawns `memory-logger` immediately; also unlinks the retrieval-cache file for this session. |
41
+ | Hook | `session.turn.start` | When `buildInjectionPlan` returns `mode: 'index'` and origin is not a subagent, spawns `memory-retrieval` (detached) with the turn's `userPrompt` so the cache reflects the user's current question, not the assembling system prompt. Fire-and-forget; failures route through the plugin logger. |
37
42
 
38
- ## Memory injection
43
+ ## Memory injection (two-tier, topic shards only)
39
44
 
40
- The rendered `# Memory` section (MEMORY.md + undreamed daily-stream tails) is injected into every session's system prompt by core (`src/agent/index.ts` `createResourceLoader` `loadMemory`), **not** by a plugin hook. It is appended as the last block of the system prompt, after `gitNudge`, so the most-volatile content (daily streams that grow after every memory-logger fire) sits at the bottom of the cache-suffix region. This way a memory change only invalidates the memory section itself rather than everything downstream of it.
45
+ Default budget is 16 KB. Direct mode when shard bytes sum budget: all shard bodies are injected verbatim. Index mode when sum > budget: only heading + `cites=N, days=N, lastReinforced=YYYY-MM-DD` per shard, plus a directive for the agent to call `memory_search` to fetch specific topics or recent stream events.
41
46
 
42
- ## Memory saturation (LTP/LTD analogue)
47
+ **Undreamed daily-stream events are NOT injected into the system prompt.** They are reachable only via `memory_search`, which discriminates results by `source: "topic" | "stream"`. The agent now decides per-query whether recent observations are relevant, instead of carrying every undreamed fragment in the cached prompt prefix. Three reasons this is the right shape:
43
48
 
44
- MEMORY.md is read into every session's system prompt, so its size is the prompt budget for everything else. Without a saturation policy it grows monotonically — every consolidated topic survives forever and citations accumulate across days. The dreaming subagent therefore treats MEMORY.md like human long-term memory: **repetition strengthens, lack of repetition saturates**.
49
+ 1. PR #314 made `memory_search` cover the stream surface, so the duplicate copy in the system prompt no longer earns its bytes.
50
+ 2. Streams grow unboundedly with usage (~360 KB at 30 days in the typical case, more under heavy use). The previous per-file 12 KB cap silently dropped each day's tail with no signal to the agent; on-demand search returns the relevant slice instead of "the first 12 KB by date".
51
+ 3. Streams sat inside the cached system-prompt prefix and appended new fragments on every memory-logger run, breaking cache reuse across prompts. Without injection, the prefix is stable until topic shards change.
45
52
 
46
- ### How
53
+ **Channel-origin always uses index mode regardless of total shard size** — defends against memory bleed into channel responses (the agent treats injected memory as instructions when channel users see it).
47
54
 
48
- On every run the runtime computes per-topic strength signals from MEMORY.md's existing citations `cites` (total), `days` (distinct calendar days those citations span), `last reinforced` (most recent citation date), `age (d)` (whole days since `last reinforced`). The numbers are derived by `src/bundled-plugins/memory/strength.ts` and rendered as a table at the top of the dreaming subagent's user prompt. There is no sidecar file, no schema version, no migration strength is recomputed on every run from MEMORY.md alone.
55
+ When index mode is active, the `memory-retrieval` subagent fires on `session.turn.start` the hook that brackets every actual `session.prompt(text)` call with the user's literal text reads that user prompt, decides what's relevant across BOTH topic shards and undreamed stream events, pulls them via `memory_search`/`read`, and writes a focused ≤8 KB summary to `memory/.retrieval-cache/<sessionId>.md`. The NEXT `loadMemory` call for the same session reads and appends that cache file (lag-by-one-prompt). The cache file is unlinked on `session.end`.
49
56
 
50
- The subagent uses these numbers to:
57
+ The hook trigger matters: `SessionPromptEvent.prompt` (`session.prompt`) carries the assembling system prompt (`basePrompt + IDENTITY + SOUL`) at session-creation time, NOT the user's message. Reading that field as if it were the user's prompt — which this plugin did before PR #340 — caused the retrieval subagent to keyword-mine TypeClaw's own framing prose (`TypeClaw`, `subagent`, `AGENTS.md`, `systemPromptLeak`, etc.) on every session. `session.turn.start` is the correct hook for "what is the user asking right now."
51
58
 
52
- 1. **Promote strong topics.** `days = 1` → tentative ("the user mentioned"). `days >= 3` → confident ("the user consistently"). `days >= 7` → declarative ("the user always"). Promotion is gated on distinct days, not raw citation count — five citations on one day is one debugging session, five citations across five days is a recurring pattern.
53
- 2. **Merge near-duplicates.** Topics that overlap in subject matter get folded into one, with the merged topic's `fragments:` list as the **union** of the source topics' fragment ids.
54
- 3. **Demote decayed topics.** A topic with `cites = 1, days = 1, age >= 30` (or `cites <= 3, days <= 2, age >= 60`) routes into a `## Historical observations` bucket as a one-line bullet. The fact is preserved in the summary, the citation is preserved (so daily-stream GC keeps the underlying fragment), but the bytes shrink from a full topic+paragraph+citation-list to one line. Strong topics (`days >= 3`) are never demoted.
59
+ ## Memory saturation
55
60
 
56
- **There is no hard-deletion path** in this iteration. The historical bucket grows monotonically; the subagent is explicitly told not to attempt quarter-summary collapses because the safety net (below) would revert them. If the bucket becomes inconveniently long in practice, a future runtime change will provide a structured drop mechanism until then every demoted citation stays alive forever via its one-line bullet.
61
+ The dreaming subagent treats topic shards like human long-term memory: **repetition strengthens, lack of repetition saturates**. On every run the runtime computes per-shard strength signals from each shard's frontmatter (`cites`, `days`, `lastReinforced`) and renders them as a table at the top of the dreaming subagent's user prompt.
57
62
 
58
- ### The citation-superset safety net
63
+ The subagent uses these signals to:
59
64
 
60
- After every dreaming run that rewrote MEMORY.md, `src/bundled-plugins/memory/citation-superset.ts` checks that the union of fragment ids cited in the NEW file is a superset of the union cited in the OLD file. If any previously-cited id is missing from the rewrite, the runtime:
65
+ 1. **Promote strong topics.** `days = 1` tentative ("the user mentioned"). `days >= 3` confident ("the user consistently"). `days >= 7` declarative ("the user always"). Promotion is gated on distinct days, not raw citation count.
66
+ 2. **Merge near-duplicates.** Topics that overlap get folded into one. The merged topic's citation set is the union.
67
+ 3. **Demote decayed topics.** A weak/decayed topic stays as its own shard but the body should be trimmed to a single line. When index-mode injection is in effect, demoted shards' bodies don't enter the system prompt at all — the index plus `memory_search` retrieval cover them on demand.
61
68
 
62
- 1. Restores MEMORY.md to its pre-run bytes via `writeFile(memoryFilePath, memoryTextBefore)`. The pre-run bytes are captured **before** `runSession` so the revert always has a clean source.
63
- 2. Skips daily-stream fragment GC for this run (no fragments are dropped).
64
- 3. Advances the dreamed-id set anyway — the **conscious anti-loop tradeoff**: this means the run's NEW undreamed fragments are orphaned (they survive in the daily JSONL forever, force-committed, but will not be re-shown to a future dreaming run and therefore never make it into MEMORY.md). The alternative (don't advance) would infinite-loop if the LLM keeps making the same mistake on the same inputs. The orphaned fragments are recoverable from git history (`git log memory/`) by a human operator.
65
- 4. Logs a `[dreaming] citation-superset violation: …` warning naming the dropped ids and explicitly stating the orphaning tradeoff.
69
+ There is no `## Historical observations` bucket. Demoted topics live as their own shards; injection-time filtering (the index/direct split) handles the prompt-budget pressure.
66
70
 
67
- **Revert-write failure path.** If the `writeFile` in step 1 itself throws (disk full, EACCES, MEMORY.md replaced by a directory by a buggy subagent, etc.), MEMORY.md is in an unknown state. The runtime then:
71
+ ## Citation-superset safety net
68
72
 
69
- - Skips the dreamed-id advance (so the next run gets a second chance at the same input).
70
- - Skips compaction (so no fragments are GC'd against an ambiguous citation set).
71
- - Skips the commit (so a known-bad on-disk state is not snapshotted).
72
- - Logs a `[dreaming] citation-superset violation AND revert failed: …` ERROR with the recovery command (`git checkout -- MEMORY.md && typeclaw restart`).
73
+ `checkCitationSupersetAcrossShards` checks that the union of fragment ids cited in NEW shards is a superset of the union cited in OLD shards. Violation triggers:
73
74
 
74
- The check exists because the daily-stream GC in `compactDailyStreams` drops any fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are the only thing that keeps a fragment alive past its first dreaming run an omitted id means the underlying fragment would be permanently deleted on the next compaction.
75
+ 1. `restoreShardSnapshot` restores every pre-run shard to byte-identical bytes AND deletes any new shards created during the run.
76
+ 2. Daily-stream fragment GC is skipped.
77
+ 3. Dreamed-ids ADVANCE anyway — the **conscious anti-loop tradeoff**: orphaned fragments survive in the daily JSONL (force-committed) but won't be re-shown to a future dreaming run. The alternative (don't advance) would infinite-loop if the LLM keeps making the same mistake.
78
+ 4. The commit is skipped.
79
+
80
+ A `[dreaming] citation-superset violation: …` warning logs the dropped ids and explicitly names the orphaning tradeoff.
75
81
 
76
82
  ## Files on disk
77
83
 
78
- - **`MEMORY.md`** — long-term memory. Created by the dreaming subagent on first run if absent. Force-committed by the runtime; `skip-worktree` flag is set so the human's `git status` stays clean.
79
- - **`memory/yyyy-MM-dd.jsonl`** — daily fragment streams. One event per line, discriminated union of `fragment | watermark | legacy_prose`, lossy-preserving one-shot migration from older `.md` streams. Appended to by `memory-logger`. Created on demand. Gitignored at the agent's level but force-committed alongside `MEMORY.md` after each dreaming run.
80
- - **`memory/skills/<name>/SKILL.md`** — _muscle memory_. Skills the dreaming subagent distills from repeated procedures it sees in daily streams. Auto-discovered as first-class skills by `createResourceLoader`, and force-committed under the same `memory/` snapshot path as the daily streams. Written or refined with the standard `write` / `edit` tools; the bundled guard plugin enforces the exact `memory/skills/<name>/SKILL.md` path shape, single-segment kebab/snake-case names, matching frontmatter, and symlink/path-traversal safety. There is no runtime skill-delete tool; outright deletion of muscle-memory skills remains a user decision.
81
- - **`memory/.dreaming-state.json`** — per-day **dreamed-id sets**: which stream-event ids the dreaming subagent has already reasoned over. Plain JSON, schema version `2`. The next dreaming run reads only fragments whose id is NOT in the set. On malformed input or a version mismatch (including legacy `version: 1` line-count files from before the id-based switch), the plugin fails open with empty state — one extra dreaming run re-reads each day, then the file is stable.
84
+ - **`memory/topics/<slug>.md`** — per-topic shards with YAML frontmatter (`heading`, `cites`, `days`, `lastReinforced`, `tags?`) + body markdown. Runtime owns the frontmatter (recomputed after every dreaming run from the body's citations); dreaming subagent writes body only.
85
+ - **`memory/streams/yyyy-MM-dd.jsonl`** — daily fragment streams. One event per line, discriminated union of `fragment | watermark | legacy_prose`. Force-committed alongside the shards.
86
+ - **`memory/MEMORY.md.pre-shard.bak`** — one-shot pre-migration backup created by the boot migration. Safe to delete after verifying.
87
+ - **`memory/skills/<name>/SKILL.md`** — muscle memory. Skills the dreaming subagent distills from repeated procedures. Auto-loaded as first-class skills.
88
+ - **`memory/.dreaming-state.json`** — per-day dreamed-id sets.
89
+ - **`memory/.retrieval-cache/<sessionId>.md`** — ephemeral retrieval summaries. Written by `memory-retrieval`, read by `loadMemory` on the next prompt of the same session, unlinked on `session.end`.
82
90
 
83
- `typeclaw init` does **not** scaffold these files. They appear when needed.
91
+ ## One-shot boot migration
84
92
 
85
- ## How `session.idle` works
93
+ When the plugin boots against an agent folder with a root `MEMORY.md` and no `memory/topics/`, it runs `runShardingMigration`. Steps:
86
94
 
87
- Core fires `session.idle` immediately after every `session.prompt()` completion (success or error). The plugin owns the debounce: it keeps a `Map<sessionId, Timeout>` and resets the timer on every event. When the timer fires, the plugin spawns `memory-logger` for that session.
95
+ 1. Detect prerequisites.
96
+ 2. Reset `memory/.migrating/` if a previous run crashed mid-flight.
97
+ 3. Run the legacy `.md → .jsonl` daily-stream migration (existing behavior).
98
+ 4. Parse `MEMORY.md` via `parseTopicsWithBodies`.
99
+ 5. **Stage topic shards** in `memory/.migrating/topics/` (originals untouched).
100
+ 6. **Stage streams** by COPY (not rename) to `memory/.migrating/streams/`.
101
+ 7. Stage pre-shard backup by COPY.
102
+ 8. **Verify staging** via `checkCitationSupersetAcrossShards`. On failure, abort and KEEP `memory/.migrating/` for human inspection. Originals untouched.
103
+ 9. **Atomic finalization**: rename three dirs (`topics`, `streams`, `.pre-shard.bak`), then unlink originals.
88
104
 
89
- If the user starts a new prompt before the timer fires, the next `session.idle` event resets the timer. If the user disconnects, `session.end` cancels the timer and fires `memory-logger` immediately so the final transcript is captured.
105
+ Crash-recovery branches at boot: stale `memory/.migrating/` with no `topics/` cleanup + retry; leftover `memory/.migrating/` alongside complete `topics/` cleanup only; orphan root `MEMORY.md` or `memory/<date>.jsonl` alongside the new layout delete orphans.
90
106
 
91
- In channel sessions, the agent rarely goes idle long enough to trip the timer because new participant messages keep arriving. The size-based ceiling handles this: on every `session.idle` the plugin `fs.stat`s the transcript and compares against the size at the last memory-logger run. Once growth reaches `memory.bufferBytes`, the timer is cancelled and `memory-logger` spawns immediately. The watermark on the output side absorbs any over-firing — if a buffer-trip arrives on a transcript chunk that's all tool noise, `memory-logger` reads it, decides nothing is worth logging, advances the watermark, and exits.
107
+ The migration is idempotent and crash-safe.
92
108
 
93
- ## Migration notes (from before the plugin existed)
109
+ ## How `session.idle` works
110
+
111
+ Core fires `session.idle` immediately after every `session.prompt()` completion. The plugin owns the debounce: a `Map<sessionId, Timeout>` reset on every event. When the timer fires, the plugin spawns `memory-logger` for that session.
94
112
 
95
- - `memory.idleMs` and `memory.dreaming.schedule` already existed in core's `typeclaw.json` schema. They moved into this plugin's `configSchema` verbatim. Existing agents continue to work with no config change.
96
- - `memory.dreaming.schedule` was previously live-reloadable. It is now **restart-required** because plugin config is read once at boot. To change the schedule, edit `typeclaw.json` and run `typeclaw restart`.
97
- - The cron job ID changed from `__internal_dreaming` to `__plugin_memory_dreaming`. Anything that referenced the old ID (custom dashboards, scripts) needs updating.
113
+ If the user starts a new prompt before the timer fires, the next `session.idle` resets it. If the user disconnects, `session.end` cancels the timer and fires `memory-logger` immediately.
114
+
115
+ In busy channel sessions the agent rarely goes idle long enough to trip the timer. The size-based ceiling handles this: on every `session.idle` the plugin `fs.stat`s the transcript and compares against the size at the last memory-logger run. Once growth reaches `memory.bufferBytes`, the timer is cancelled and `memory-logger` spawns immediately.
98
116
 
99
117
  ## Tests
100
118
 
101
- - `index.test.ts` composition tests (config schema, hook wiring, debounce semantics, MEMORY.md auto-create).
102
- - `memory-logger.test.ts` — system prompt invariants, watermark handling.
103
- - `dreaming.test.ts` orchestration, watermark advancement, git snapshot (including muscle-memory skill files), system prompt + tool-surface invariants, citation-superset safety net (revert on dropped id, dreamed-ids still advance, no-revert on legitimate merge, no-revert on first-ever run), saturation-prompt invariants (rebalance-every-run, promotion ladder, historical bucket, demotion thresholds, bucket overflow synthesis).
104
- - `dreaming-state.test.ts` — fail-open semantics on malformed state.
105
- - `watermark.test.ts` marker parsing.
106
- - `append-tool.test.ts` append-only semantics.
107
- - `src/bundled-plugins/guard/policies/skill-authoring.test.ts` runtime skill authoring guard: path sandboxing, name validation, YAML frontmatter, and write/edit final-content validation.
108
- - `load-memory.test.ts` — memory section rendering, undreamed-tail filtering, watermark stripping.
109
- - `topics.test.ts` — citation-attributing parser (per-topic citation grouping for strength signals).
110
- - `strength.test.ts` — per-topic strength computation (distinct days, recency, age clamping) and markdown table rendering.
111
- - `citation-superset.test.ts` — the safety-net check (superset semantics, missing-id reporting, summary truncation).
119
+ Test files in this directory (kebab-case, `.test.ts` neighbors): `paths`, `slug`, `frontmatter`, `topics`, `shard-snapshot`, `delete-tool`, `citations`, `citation-superset`, `migration`, `load-shards`, `load-memory`, `injection-plan`, `search-tool`, `memory-retrieval`, `memory-logger`, `dreaming`, `index`, `integration`. Plus guard policies in `../guard/policies/`: `memory-topics-delete`, `memory-topics-write`, `memory-retrieval-cache-write`.
120
+
121
+ ## Migration notes (from before the plugin existed)
122
+
123
+ - `memory.idleMs` and `memory.dreaming.schedule` already existed in core's `typeclaw.json` schema and moved into this plugin's `configSchema` verbatim.
124
+ - `memory.dreaming.schedule` is now **restart-required** because plugin config is read once at boot.
125
+ - The cron job ID is `__plugin_memory_dreaming` (previously `__internal_dreaming`).
@@ -1,5 +1,5 @@
1
1
  import { mkdir } from 'node:fs/promises'
2
- import { dirname, join } from 'node:path'
2
+ import { dirname } from 'node:path'
3
3
 
4
4
  import { z } from 'zod'
5
5
 
@@ -7,6 +7,7 @@ import { defineTool } from '@/plugin'
7
7
  import { formatLocalDate } from '@/shared'
8
8
 
9
9
  import { fragmentContentHash } from './fragment-parser'
10
+ import { streamFilePath } from './paths'
10
11
  import { detectSecrets } from './secret-detector'
11
12
  import { newEventId, timestampFromId } from './stream-events'
12
13
  import type { FragmentEvent, WatermarkEvent } from './stream-events'
@@ -97,7 +98,7 @@ export const advanceWatermarkTool = defineTool({
97
98
  })
98
99
 
99
100
  function dailyStreamPath(agentDir: string): string {
100
- return join(agentDir, 'memory', `${formatLocalDate()}.jsonl`)
101
+ return streamFilePath(agentDir, formatLocalDate())
101
102
  }
102
103
 
103
104
  function assertNoSecrets(content: string): void {
@@ -1,13 +1,13 @@
1
- // Citation-superset safety net for the dreaming subagent's MEMORY.md
2
- // rewrite. After every dreaming run that touched MEMORY.md, we check that
3
- // the union of fragment ids cited in the NEW file is a superset of the
4
- // union cited in the OLD file. If any previously-cited id is missing from
1
+ // Citation-superset safety net for the dreaming subagent's topic-shard
2
+ // rewrite. After every dreaming run that touched memory/topics/, we check that
3
+ // the union of fragment ids cited in the NEW shard set is a superset of the
4
+ // union cited in the OLD shard set. If any previously-cited id is missing from
5
5
  // the rewrite, the rewrite is rejected.
6
6
  //
7
7
  // Why this exists: the daily-stream GC in compactDailyStreams drops any
8
- // fragment that is `dreamedIds ∧ ¬citedIds`. Citations in MEMORY.md are
8
+ // fragment that is `dreamedIds ∧ ¬citedIds`. Citations in topic shards are
9
9
  // the only thing that keeps a fragment alive past its first dreaming run.
10
- // If the subagent rewrites MEMORY.md and accidentally omits a citation —
10
+ // If the subagent rewrites a topic shard and accidentally omits a citation —
11
11
  // either by garbling a merged topic's fragments: list or by dropping a
12
12
  // topic entirely — the next compaction call permanently deletes the
13
13
  // underlying fragment from the daily JSONL. There is no recovery beyond
@@ -21,22 +21,34 @@
21
21
  // mechanical check is the safety floor.
22
22
  //
23
23
  // Detection only. The handler decides what to do with the verdict (revert
24
- // MEMORY.md to its pre-run bytes, skip daily-stream compaction, still
24
+ // memory/topics/ to its pre-run bytes, skip daily-stream compaction, still
25
25
  // advance the dreamed-id set so we do not loop on the same fragments).
26
26
 
27
27
  import { parseCitations } from './citations'
28
28
 
29
29
  export type CitationSupersetVerdict = { ok: true } | { ok: false; missing: Array<{ date: string; fragmentId: string }> }
30
30
 
31
- // Compare the OLD MEMORY.md to the NEW MEMORY.md and report any
31
+ // Compare the OLD shard content to the NEW shard content and report any
32
32
  // fragment id that the OLD cited and the NEW does not. Empty old text
33
33
  // (first-ever dreaming run, prior file missing) is treated as the empty
34
34
  // citation set — any new file passes by construction.
35
35
  export function checkCitationSuperset(oldText: string, newText: string): CitationSupersetVerdict {
36
- const oldCitations = parseCitations(oldText)
36
+ return checkCitationIndexSuperset(buildCitationIndex(oldText), buildCitationIndex(newText))
37
+ }
38
+
39
+ export function checkCitationSupersetAcrossShards(
40
+ oldShards: Map<string, string>,
41
+ newShards: Map<string, string>,
42
+ ): CitationSupersetVerdict {
43
+ return checkCitationIndexSuperset(buildCitationIndexFromShards(oldShards), buildCitationIndexFromShards(newShards))
44
+ }
45
+
46
+ function checkCitationIndexSuperset(
47
+ oldCitations: Map<string, Set<string>>,
48
+ newCitations: Map<string, Set<string>>,
49
+ ): CitationSupersetVerdict {
37
50
  if (oldCitations.size === 0) return { ok: true }
38
51
 
39
- const newCitations = parseCitations(newText)
40
52
  const missing: Array<{ date: string; fragmentId: string }> = []
41
53
 
42
54
  const dates = [...oldCitations.keys()].sort()
@@ -52,9 +64,35 @@ export function checkCitationSuperset(oldText: string, newText: string): Citatio
52
64
  return missing.length === 0 ? { ok: true } : { ok: false, missing }
53
65
  }
54
66
 
67
+ function buildCitationIndex(text: string): Map<string, Set<string>> {
68
+ return parseCitations(text)
69
+ }
70
+
71
+ function buildCitationIndexFromShards(shards: Map<string, string>): Map<string, Set<string>> {
72
+ const index = new Map<string, Set<string>>()
73
+
74
+ for (const text of shards.values()) {
75
+ mergeCitationIndex(index, buildCitationIndex(text))
76
+ }
77
+
78
+ return index
79
+ }
80
+
81
+ function mergeCitationIndex(target: Map<string, Set<string>>, source: Map<string, Set<string>>): void {
82
+ for (const [date, ids] of source) {
83
+ let targetIds = target.get(date)
84
+ if (targetIds === undefined) {
85
+ targetIds = new Set<string>()
86
+ target.set(date, targetIds)
87
+ }
88
+
89
+ for (const id of ids) targetIds.add(id)
90
+ }
91
+ }
92
+
55
93
  // Pretty-print the verdict's missing ids for log output. Keeps the line
56
94
  // short by reporting count + first N ids; the full list is reconstructable
57
- // from MEMORY.md's git history if forensics are ever needed.
95
+ // from memory/topics/ git history if forensics are ever needed.
58
96
  export function summarizeMissingCitations(missing: ReadonlyArray<{ date: string; fragmentId: string }>): string {
59
97
  const total = missing.length
60
98
  const sample = missing.slice(0, 3).map((m) => `${m.date}#${m.fragmentId}`)
@@ -1,4 +1,4 @@
1
- // Citation format: `memory/yyyy-MM-dd#<fragment-id>`. The id is the full
1
+ // Citation format: `streams/yyyy-MM-dd#<fragment-id>`. The id is the full
2
2
  // UUIDv7 of the fragment event in the daily JSONL stream. The date prefix is
3
3
  // redundant with the id's timestamp (UUIDv7 encodes minting time in the first
4
4
  // 48 bits) but kept for human grep-ability — readers should be able to see
@@ -6,30 +6,41 @@
6
6
  //
7
7
  // The format does NOT accept line ranges. The prior `:43-45` shape is gone
8
8
  // (see the "drop backward compat" decision in the PR description). Parsing
9
- // silently ignores any line in MEMORY.md that doesn't match this exact shape,
9
+ // silently ignores any line in a topic shard that doesn't match this exact shape,
10
10
  // so legacy citations from before the cutover are dropped — they no longer
11
11
  // pin fragments alive against compaction.
12
12
 
13
- const CITATION_LINE = /^[\s-]*memory\/(\d{4}-\d{2}-\d{2})#([\w-]+)\s*$/im
13
+ export const CITATION_FORMAT_CANONICAL = 'streams' as const
14
+ export const acceptedPrefixes = ['streams', 'memory'] as const
14
15
 
15
- const CITATION_LINE_GLOBAL = /memory\/(\d{4}-\d{2}-\d{2})#([\w-]+)/g
16
+ // Single alternation keeps line and global parsing on the same transitional
17
+ // prefix set while dropping the prefix from the public Citation shape.
18
+ const CITATION_LINE = /^[\s-]*(streams|memory)\/(\d{4}-\d{2}-\d{2})#([\w-]+)\s*$/im
19
+
20
+ const CITATION_LINE_GLOBAL = /(streams|memory)\/(\d{4}-\d{2}-\d{2})#([\w-]+)/g
21
+
22
+ const LEGACY_CITATION_GLOBAL = /memory\/(\d{4}-\d{2}-\d{2})#([\w-]+)/g
16
23
 
17
24
  export type Citation = { date: string; fragmentId: string }
18
25
 
19
26
  export function formatCitation(date: string, fragmentId: string): string {
20
- return `memory/${date}#${fragmentId}`
27
+ return `${CITATION_FORMAT_CANONICAL}/${date}#${fragmentId}`
28
+ }
29
+
30
+ export function normalizeCitation(citation: string): string {
31
+ return citation.replace(LEGACY_CITATION_GLOBAL, `${CITATION_FORMAT_CANONICAL}/$1#$2`)
21
32
  }
22
33
 
23
34
  // Parse every citation in `text` and return them grouped by date. The
24
35
  // returned Map is empty when no citations appear. Used by:
25
36
  // - dreaming.ts compaction to decide which fragments are still referenced
26
- // by MEMORY.md and must survive GC.
37
+ // by topic shards and must survive GC.
27
38
  // - tests pinning the format.
28
39
  export function parseCitations(text: string): Map<string, Set<string>> {
29
40
  const out = new Map<string, Set<string>>()
30
41
  for (const match of text.matchAll(CITATION_LINE_GLOBAL)) {
31
- const date = match[1]!
32
- const fragmentId = match[2]!
42
+ const date = match[2]!
43
+ const fragmentId = match[3]!
33
44
  let set = out.get(date)
34
45
  if (set === undefined) {
35
46
  set = new Set<string>()