typeclaw 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/require-parallel.ts +41 -15
- package/src/agent/live-subagents.ts +0 -1
- package/src/agent/session-origin.ts +10 -0
- package/src/agent/subagent-completion-reminder.ts +4 -1
- package/src/agent/system-prompt.ts +5 -5
- package/src/agent/tools/restart.ts +13 -2
- package/src/agent/tools/spawn-subagent.ts +0 -1
- package/src/agent/tools/subagent-output.ts +3 -51
- package/src/bundled-plugins/memory/dreaming-state.ts +51 -2
- package/src/bundled-plugins/memory/index.ts +55 -25
- package/src/bundled-plugins/memory/memory-retrieval.ts +1 -1
- package/src/bundled-plugins/memory/migration.ts +21 -17
- package/src/bundled-plugins/memory/stream-io.ts +71 -1
- package/src/channels/manager.ts +7 -0
- package/src/channels/router.ts +141 -10
- package/src/channels/schema.ts +1 -1
- package/src/cli/compose.ts +23 -2
- package/src/cli/logs.ts +17 -2
- package/src/compose/logs.ts +8 -4
- package/src/config/config.ts +8 -0
- package/src/container/index.ts +1 -1
- package/src/container/logs.ts +38 -11
- package/src/init/dockerfile.ts +147 -4
- package/src/inspect/live.ts +32 -1
- package/src/inspect/render.ts +32 -0
- package/src/inspect/replay.ts +14 -0
- package/src/inspect/types.ts +26 -0
- package/src/run/index.ts +1 -0
- package/src/server/index.ts +59 -19
- package/src/shared/protocol.ts +30 -0
- package/src/skills/typeclaw-codex-cli/SKILL.md +324 -0
- package/src/skills/typeclaw-codex-cli/references/auth-flow.md +131 -0
- package/src/skills/typeclaw-codex-cli/references/stop-hook.md +92 -0
- package/src/skills/typeclaw-codex-cli/references/tmux-driving.md +239 -0
- package/src/skills/typeclaw-config/SKILL.md +32 -31
- package/src/test-helpers/wait-for.ts +15 -7
- package/typeclaw.schema.json +16 -10
package/package.json
CHANGED
|
@@ -1,29 +1,55 @@
|
|
|
1
|
-
// Preloaded by bunfig.toml `[test] preload`.
|
|
2
|
-
//
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
1
|
+
// Preloaded by bunfig.toml `[test] preload`. Two responsibilities:
|
|
2
|
+
// 1. Deny `bun test` without --parallel.
|
|
3
|
+
// 2. Raise the per-test default timeout from Bun's 5000ms.
|
|
4
|
+
//
|
|
5
|
+
// Why deny serial runs: Serial runs are ~3.4x slower (44s → 13s, see commit
|
|
6
|
+
// 1c66d5e), and Bun has no bunfig knob for the flag yet (verified against
|
|
7
|
+
// bunfig.zig in oven-sh/bun main, May 2026). Without this guard, IDE test
|
|
8
|
+
// runners and ad-hoc shells silently fall back to the slow path.
|
|
7
9
|
//
|
|
8
10
|
// Detection: Bun strips CLI flags from `Bun.argv` before invoking the
|
|
9
11
|
// preload, so we can't scrape the flag directly. Instead we look for
|
|
10
12
|
// BUN_TEST_WORKER_ID, which Bun sets in the preload env exactly when
|
|
11
|
-
// `--parallel` is active (the variable carries the worker index for
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
15
|
-
//
|
|
16
|
-
//
|
|
13
|
+
// `--parallel` is active (the variable carries the worker index for the
|
|
14
|
+
// IPC handshake between coordinator and workers). Empirically verified
|
|
15
|
+
// against bun 1.3.14: present under --parallel, absent under serial. If
|
|
16
|
+
// a future Bun version renames this var, the guard fails closed (treats
|
|
17
|
+
// every run as serial → always denies), which is the safe direction.
|
|
18
|
+
//
|
|
19
|
+
// Bypass with TYPECLAW_ALLOW_SERIAL_TESTS=1 when debugging a flaky test
|
|
20
|
+
// where worker contention obscures the failure.
|
|
17
21
|
//
|
|
18
|
-
//
|
|
19
|
-
//
|
|
22
|
+
// Why raise the default timeout: A growing number of tests in this repo
|
|
23
|
+
// either spawn child processes (`bun run typeclaw …` via Bun.spawn from
|
|
24
|
+
// src/cli/index.test.ts, src/cli/role.test.ts, src/cli/status.test.ts,
|
|
25
|
+
// src/init/dockerfile.test.ts agent-browser wrapper, etc.) or boot the
|
|
26
|
+
// in-process agent (`startAgent({ port: 0, … })` from src/run/plugin.test.ts).
|
|
27
|
+
// Both shapes have a happy-path cost well under 1s but a worst-case cost
|
|
28
|
+
// that races Bun's 5000ms ceiling under `--parallel` contention. The
|
|
29
|
+
// repeating failure mode is "this test timed out after 5000ms" appearing
|
|
30
|
+
// on different tests across runs at a rough ~3-15% rate per full-suite
|
|
31
|
+
// invocation — not a real bug, just resource starvation. Raising the
|
|
32
|
+
// default to 30s eliminates the false positives without masking real
|
|
33
|
+
// hangs (a wedged test still fails, just 6x slower than before). The
|
|
34
|
+
// happy path is unaffected because tests complete in their actual
|
|
35
|
+
// runtime, not the timeout budget.
|
|
36
|
+
//
|
|
37
|
+
// 30s was chosen as ~75x the observed happy-path cold-start (~400ms) for
|
|
38
|
+
// the heaviest subprocess tests, matching the in-house convention used in
|
|
39
|
+
// pi-coding-agent's subprocess fixtures and Bun's own integration-test
|
|
40
|
+
// suites (see oven-sh/bun test/cli/install/*.test.ts which set 5-minute
|
|
41
|
+
// timeouts for full installs). Individual tests that genuinely need more
|
|
42
|
+
// can still pass an explicit 3rd arg to `test()` to override locally.
|
|
43
|
+
|
|
44
|
+
import { setDefaultTimeout } from 'bun:test'
|
|
20
45
|
|
|
21
46
|
const isParallelWorker = typeof process.env.BUN_TEST_WORKER_ID === 'string'
|
|
22
47
|
|
|
23
48
|
if (isParallelWorker) {
|
|
24
|
-
|
|
49
|
+
setDefaultTimeout(30_000)
|
|
25
50
|
} else if (process.env.TYPECLAW_ALLOW_SERIAL_TESTS === '1') {
|
|
26
51
|
console.warn('[require-parallel] Running serially — TYPECLAW_ALLOW_SERIAL_TESTS=1 set.')
|
|
52
|
+
setDefaultTimeout(30_000)
|
|
27
53
|
} else {
|
|
28
54
|
console.error('')
|
|
29
55
|
console.error(' ✗ `bun test` without --parallel is denied in this repo.')
|
|
@@ -231,6 +231,16 @@ function renderChannelOrigin(
|
|
|
231
231
|
'the answer — both in the same turn. The ack is not your reply; the answer',
|
|
232
232
|
'is. Once the answer lands, end your turn.',
|
|
233
233
|
'',
|
|
234
|
+
'**Backgrounded work does not end the obligation.** If you spawn a',
|
|
235
|
+
'subagent with `run_in_background: true` to answer the current inbound,',
|
|
236
|
+
"you have promised a reply you have not delivered yet. Don't end the",
|
|
237
|
+
'turn with `NO_REPLY` — the system will not surface the subagent result',
|
|
238
|
+
'on its own. When the subagent-completion `<system-reminder>` arrives,',
|
|
239
|
+
'fetch the result with `subagent_output` and send it via `channel_reply`',
|
|
240
|
+
'in that turn. `NO_REPLY` is only legal on the post-result turn if there',
|
|
241
|
+
'is genuinely nothing user-facing to share (e.g. the result is empty or',
|
|
242
|
+
'identical to something you already replied with this conversation).',
|
|
243
|
+
'',
|
|
234
244
|
'Do not send a second reply just to rephrase, restate, or "confirm in',
|
|
235
245
|
'plain language" something you already said.',
|
|
236
246
|
'',
|
|
@@ -21,7 +21,10 @@ export type CompletionReminderArgs = {
|
|
|
21
21
|
const CHANNEL_REPLY_NUDGE =
|
|
22
22
|
'This reminder is a system message, not a user inbound — but you are in a channel session, ' +
|
|
23
23
|
'so end your turn via `channel_reply` (or `channel_send`) to surface the result. ' +
|
|
24
|
-
'Plain-text output is invisible here. If
|
|
24
|
+
'Plain-text output is invisible here. If you spawned this subagent to answer a user, ' +
|
|
25
|
+
'this is the turn where that promised reply lands — fetch the result via `subagent_output` ' +
|
|
26
|
+
'and send it. `NO_REPLY` is only correct when the result is genuinely empty or duplicates ' +
|
|
27
|
+
'something you already replied with in this conversation.'
|
|
25
28
|
|
|
26
29
|
export function renderSubagentCompletionReminder(args: CompletionReminderArgs): string {
|
|
27
30
|
const durationStr = formatReminderDuration(args.durationMs)
|
|
@@ -60,7 +60,7 @@ There are two delegation modes. Pick deliberately.
|
|
|
60
60
|
|
|
61
61
|
**Mode A — Research fan-out** (in service of the current question)
|
|
62
62
|
|
|
63
|
-
When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion;
|
|
63
|
+
When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; then call \`subagent_output\` once per task_id to fetch the result and answer the user. \`subagent_output\` always returns immediately with a snapshot — it does not block.
|
|
64
64
|
|
|
65
65
|
The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
|
|
66
66
|
|
|
@@ -72,13 +72,13 @@ When the user hands you a task that will take minutes (a multi-step browser sess
|
|
|
72
72
|
|
|
73
73
|
In a channel session, the completion \`<system-reminder>\` is NOT a user message — the channel origin's "you MUST call \`channel_reply\` for every user message" rule does not literally apply, but the underlying constraint does: plain-text output is invisible in a channel. Surface the result via \`channel_reply\` (or \`channel_send\`) so the user actually sees it. Failures need surfacing too: when a delegated task didn't complete, the user needs the outcome and whatever partial progress you got. \`NO_REPLY\` is the escape hatch only when the user has already seen the substantive answer — typically because you posted it via \`channel_reply\` in the same turn that spawned the subagent, and the reminder is purely confirming completion of a step the user is already tracking. Otherwise, post the result.
|
|
74
74
|
|
|
75
|
-
Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
|
|
75
|
+
Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) or \`codex\` (OpenAI Codex CLI) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
|
|
76
76
|
|
|
77
|
-
The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
|
|
77
|
+
The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code or Codex CLI delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
|
|
78
78
|
|
|
79
79
|
**Status queries**
|
|
80
80
|
|
|
81
|
-
If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id
|
|
81
|
+
If the user asks "how's it going?" or "status?" on a running subagent, call \`subagent_output({ task_id })\` and report the \`status_summary\` in your own words. Don't pretend to know the status without checking.
|
|
82
82
|
|
|
83
83
|
**Prompt structure for spawns** (mandatory — the subagent does not see this conversation)
|
|
84
84
|
|
|
@@ -92,7 +92,7 @@ If the user asks "how's it going?" or "status?" on a running subagent, call \`su
|
|
|
92
92
|
|
|
93
93
|
- Don't fire more than 5 subagents in a single turn.
|
|
94
94
|
- Don't spawn for a known answer or single-file lookup — do it yourself.
|
|
95
|
-
- Don't
|
|
95
|
+
- Don't call \`subagent_output\` in a loop waiting for completion; end your response and the reminder will wake you, then fetch the result once.
|
|
96
96
|
- Don't ask a research subagent to make architectural decisions for you — they find and report; you decide.
|
|
97
97
|
- Subagents cannot recursively spawn other subagents.
|
|
98
98
|
|
|
@@ -27,6 +27,15 @@ export type CreateRestartToolOptions = {
|
|
|
27
27
|
// fixes. Required even when stream is absent so the type stays simple and
|
|
28
28
|
// the field's presence documents the runtime contract.
|
|
29
29
|
originatingSessionId: string
|
|
30
|
+
// Override the default 5s ACK budget. Production has no caller for this —
|
|
31
|
+
// 5s is generous against a real hostd on the same host. Test-only seam:
|
|
32
|
+
// restart.test.ts spawns a `Bun.serve` and awaits its HTTP roundtrip from
|
|
33
|
+
// the same parallel-test-runner that hosts dozens of other workers
|
|
34
|
+
// contending on libuv's I/O threads. Under that contention, an in-process
|
|
35
|
+
// 127.0.0.1 fetch can occasionally exceed 5s and the test's `expect(ok:
|
|
36
|
+
// true)` assertion flips to `ok: false, reason: 'daemon ack timeout'`.
|
|
37
|
+
// Optional so production callers keep the 5s default unchanged.
|
|
38
|
+
ackTimeoutMs?: number
|
|
30
39
|
}
|
|
31
40
|
|
|
32
41
|
export type RestartToolDetails = { ok: boolean; containerName: string; reason?: string }
|
|
@@ -45,9 +54,11 @@ export function createRestartTool({
|
|
|
45
54
|
hostdToken,
|
|
46
55
|
stream,
|
|
47
56
|
originatingSessionId,
|
|
57
|
+
ackTimeoutMs,
|
|
48
58
|
}: CreateRestartToolOptions) {
|
|
49
59
|
const doExit = exit ?? ((code: number) => process.exit(code))
|
|
50
60
|
const httpUrl = hostdUrl ?? process.env.TYPECLAW_HOSTD_URL
|
|
61
|
+
const ackBudget = ackTimeoutMs ?? ACK_TIMEOUT_MS
|
|
51
62
|
const httpToken = hostdToken ?? process.env.TYPECLAW_HOSTD_TOKEN
|
|
52
63
|
|
|
53
64
|
return defineTool({
|
|
@@ -78,8 +89,8 @@ export function createRestartTool({
|
|
|
78
89
|
const request = { kind: 'restart' as const, containerName, build }
|
|
79
90
|
const reply =
|
|
80
91
|
httpUrl && httpToken
|
|
81
|
-
? await sendHttp(request, { timeoutMs:
|
|
82
|
-
: await send(request, { timeoutMs:
|
|
92
|
+
? await sendHttp(request, { timeoutMs: ackBudget, url: httpUrl, token: httpToken })
|
|
93
|
+
: await send(request, { timeoutMs: ackBudget, socket: socketPath ?? containerSocketPath() })
|
|
83
94
|
if (!reply.ok) {
|
|
84
95
|
const details: RestartToolDetails = { ok: false, containerName, reason: reply.reason }
|
|
85
96
|
return {
|
|
@@ -130,7 +130,6 @@ export function createSpawnSubagentTool(options: CreateSpawnSubagentToolOptions)
|
|
|
130
130
|
startedAt,
|
|
131
131
|
status: 'running' as const,
|
|
132
132
|
abort: resolvedHandle.abort,
|
|
133
|
-
awaitCompletion: () => completion.then((c) => completionToFinalShape(c, now() - startedAt)),
|
|
134
133
|
}
|
|
135
134
|
liveRegistry.register(live)
|
|
136
135
|
|
|
@@ -6,9 +6,6 @@ import type { PermissionService } from '@/permissions'
|
|
|
6
6
|
import type { LiveSubagentRegistry, StatusSnapshot, SubagentProgressEvent } from '../live-subagents'
|
|
7
7
|
import type { SessionOrigin } from '../session-origin'
|
|
8
8
|
|
|
9
|
-
const DEFAULT_TIMEOUT_MS = 60_000
|
|
10
|
-
const MAX_TIMEOUT_MS = 300_000
|
|
11
|
-
|
|
12
9
|
export type SubagentOutputToolDetails =
|
|
13
10
|
| {
|
|
14
11
|
ok: true
|
|
@@ -57,43 +54,19 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
|
|
|
57
54
|
'Fetch the current state of a subagent you previously spawned. Returns one of three statuses: ' +
|
|
58
55
|
"'running' (with a human-readable status_summary and a tail of recent progress events), " +
|
|
59
56
|
"'completed' (with the final message), or 'failed' (with the error). " +
|
|
60
|
-
'
|
|
61
|
-
'
|
|
62
|
-
'
|
|
57
|
+
'Returns immediately with a snapshot — never blocks. ' +
|
|
58
|
+
'For backgrounded spawns, end your turn after spawning and wait for the completion <system-reminder>; ' +
|
|
59
|
+
'then call this once to fetch the result. Use it for ad-hoc status checks too — never in a polling loop.',
|
|
63
60
|
parameters: Type.Object({
|
|
64
61
|
task_id: Type.String({
|
|
65
62
|
description: 'The task_id returned by a previous spawn_subagent call.',
|
|
66
63
|
}),
|
|
67
|
-
block: Type.Optional(
|
|
68
|
-
Type.Boolean({
|
|
69
|
-
description:
|
|
70
|
-
'If true, wait for the subagent to complete (or time out) before returning. Default false: return immediately with the current state.',
|
|
71
|
-
}),
|
|
72
|
-
),
|
|
73
|
-
timeout_ms: Type.Optional(
|
|
74
|
-
Type.Integer({
|
|
75
|
-
description: `When block=true, max milliseconds to wait (default ${DEFAULT_TIMEOUT_MS}, max ${MAX_TIMEOUT_MS}).`,
|
|
76
|
-
minimum: 1,
|
|
77
|
-
maximum: MAX_TIMEOUT_MS,
|
|
78
|
-
}),
|
|
79
|
-
),
|
|
80
64
|
}),
|
|
81
65
|
|
|
82
66
|
async execute(_toolCallId, params) {
|
|
83
67
|
if (permissions !== undefined && !permissions.has(getOrigin(), 'subagent.output')) {
|
|
84
68
|
return errorResult('subagent.output denied: insufficient permissions')
|
|
85
69
|
}
|
|
86
|
-
const live = liveRegistry.get(params.task_id)
|
|
87
|
-
if (live === undefined) {
|
|
88
|
-
return errorResult(`Unknown task_id: ${params.task_id}.`)
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
const wantsBlock = params.block === true && live.status === 'running'
|
|
92
|
-
if (wantsBlock) {
|
|
93
|
-
const timeoutMs = clampTimeout(params.timeout_ms)
|
|
94
|
-
await raceWithTimeout(live.awaitCompletion(), timeoutMs)
|
|
95
|
-
}
|
|
96
|
-
|
|
97
70
|
const snap = liveRegistry.snapshot(params.task_id, now())
|
|
98
71
|
if (snap === undefined) {
|
|
99
72
|
return errorResult(`Unknown task_id: ${params.task_id}.`)
|
|
@@ -103,27 +76,6 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
|
|
|
103
76
|
})
|
|
104
77
|
}
|
|
105
78
|
|
|
106
|
-
function clampTimeout(value: number | undefined): number {
|
|
107
|
-
if (value === undefined) return DEFAULT_TIMEOUT_MS
|
|
108
|
-
return Math.min(Math.max(1, Math.floor(value)), MAX_TIMEOUT_MS)
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
async function raceWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T | undefined> {
|
|
112
|
-
return new Promise<T | undefined>((resolve) => {
|
|
113
|
-
const timer = setTimeout(() => resolve(undefined), timeoutMs)
|
|
114
|
-
promise.then(
|
|
115
|
-
(value) => {
|
|
116
|
-
clearTimeout(timer)
|
|
117
|
-
resolve(value)
|
|
118
|
-
},
|
|
119
|
-
() => {
|
|
120
|
-
clearTimeout(timer)
|
|
121
|
-
resolve(undefined)
|
|
122
|
-
},
|
|
123
|
-
)
|
|
124
|
-
})
|
|
125
|
-
}
|
|
126
|
-
|
|
127
79
|
type ToolReturn = {
|
|
128
80
|
content: { type: 'text'; text: string }[]
|
|
129
81
|
details: SubagentOutputToolDetails
|
|
@@ -1,11 +1,29 @@
|
|
|
1
1
|
import { existsSync } from 'node:fs'
|
|
2
|
-
import { mkdir, readFile, writeFile } from 'node:fs/promises'
|
|
2
|
+
import { mkdir, readFile, stat, writeFile } from 'node:fs/promises'
|
|
3
3
|
import { dirname, join } from 'node:path'
|
|
4
4
|
|
|
5
5
|
export const DREAMING_STATE_FILE = 'memory/.dreaming-state.json'
|
|
6
6
|
|
|
7
7
|
const VERSION = 2
|
|
8
8
|
|
|
9
|
+
// Stat-keyed cache for `.dreaming-state.json`. The file is read once at
|
|
10
|
+
// the start of every dreaming run AND once per `readAllStreamDays` call
|
|
11
|
+
// (which fires inside every `memory_search` invocation). For a retrieval
|
|
12
|
+
// subagent that issues 3 parallel searches, this cache turns 3 reads +
|
|
13
|
+
// 3 JSON.parses into 3 stats + 1 parse — small per-call savings, but the
|
|
14
|
+
// file is tiny so the win is mostly avoiding GC pressure on busy
|
|
15
|
+
// channel sessions. Invalidation key matches the stream-file cache
|
|
16
|
+
// (`load-shards.ts` and `stream-io.ts` use the same `(mtimeMs, ctimeMs,
|
|
17
|
+
// size)` shape); `saveDreamingState` uses `writeFile` which bumps both
|
|
18
|
+
// mtime and ctime.
|
|
19
|
+
type DreamingStateCacheEntry = {
|
|
20
|
+
mtimeMs: number
|
|
21
|
+
ctimeMs: number
|
|
22
|
+
size: number
|
|
23
|
+
state: DreamingState
|
|
24
|
+
}
|
|
25
|
+
const dreamingStateCache = new Map<string, DreamingStateCacheEntry>()
|
|
26
|
+
|
|
9
27
|
// Per-day "dreamed" set: the set of stream-event ids dreaming has already
|
|
10
28
|
// reasoned over for a given day. Anything in this set is either cited from
|
|
11
29
|
// memory/topics/ (must survive compaction) or was consciously discarded by a
|
|
@@ -32,8 +50,35 @@ export function emptyState(): DreamingState {
|
|
|
32
50
|
|
|
33
51
|
export async function loadDreamingState(agentDir: string): Promise<DreamingState> {
|
|
34
52
|
const path = join(agentDir, DREAMING_STATE_FILE)
|
|
35
|
-
if (!existsSync(path))
|
|
53
|
+
if (!existsSync(path)) {
|
|
54
|
+
dreamingStateCache.delete(path)
|
|
55
|
+
return emptyState()
|
|
56
|
+
}
|
|
36
57
|
|
|
58
|
+
let fileStat: { mtimeMs: number; ctimeMs: number; size: number }
|
|
59
|
+
try {
|
|
60
|
+
const s = await stat(path)
|
|
61
|
+
fileStat = { mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs, size: s.size }
|
|
62
|
+
} catch {
|
|
63
|
+
return emptyState()
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const cached = dreamingStateCache.get(path)
|
|
67
|
+
if (
|
|
68
|
+
cached !== undefined &&
|
|
69
|
+
cached.mtimeMs === fileStat.mtimeMs &&
|
|
70
|
+
cached.ctimeMs === fileStat.ctimeMs &&
|
|
71
|
+
cached.size === fileStat.size
|
|
72
|
+
) {
|
|
73
|
+
return cached.state
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const state = await loadDreamingStateFromDisk(path)
|
|
77
|
+
dreamingStateCache.set(path, { ...fileStat, state })
|
|
78
|
+
return state
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function loadDreamingStateFromDisk(path: string): Promise<DreamingState> {
|
|
37
82
|
let raw: string
|
|
38
83
|
try {
|
|
39
84
|
raw = await readFile(path, 'utf8')
|
|
@@ -58,6 +103,10 @@ export async function saveDreamingState(agentDir: string, state: DreamingState):
|
|
|
58
103
|
await writeFile(path, `${JSON.stringify(state, null, 2)}\n`, 'utf8')
|
|
59
104
|
}
|
|
60
105
|
|
|
106
|
+
export function __resetDreamingStateCacheForTests(): void {
|
|
107
|
+
dreamingStateCache.clear()
|
|
108
|
+
}
|
|
109
|
+
|
|
61
110
|
export function getDreamedIds(state: DreamingState, date: string): ReadonlySet<string> {
|
|
62
111
|
const ids = state.dreamedThrough[date]?.dreamedIds
|
|
63
112
|
return ids === undefined ? EMPTY_SET : new Set(ids)
|
|
@@ -145,39 +145,46 @@ export default definePlugin({
|
|
|
145
145
|
const lastIdleEvent = new Map<string, { parentTranscriptPath: string | undefined; origin?: SessionOrigin }>()
|
|
146
146
|
const bytesAtLastRun = new Map<string, number>()
|
|
147
147
|
|
|
148
|
-
// memory-logger is
|
|
148
|
+
// memory-logger is coalesced per agentDir (not per parentSessionId) so that
|
|
149
149
|
// two concurrent channel sessions for the same agent never write to the same
|
|
150
150
|
// daily stream file at the same time. The subagent consumer would silently drop
|
|
151
151
|
// a colliding fire, so we serialize spawn calls *here* (chaining each onto the
|
|
152
152
|
// previous one's settlement) instead of letting the consumer choose between
|
|
153
153
|
// dropping or queueing. The chain holds at most one in-flight promise plus one
|
|
154
|
-
// queued
|
|
155
|
-
//
|
|
154
|
+
// queued.
|
|
155
|
+
//
|
|
156
|
+
// The `lastIdleEvent` lookup happens SYNCHRONOUSLY at call time and the
|
|
157
|
+
// snapshot is captured in `payload` before any await. This is load-bearing
|
|
158
|
+
// for `session.end`'s fire-and-forget path (see hook below): the hook
|
|
159
|
+
// synchronously cleans up `lastIdleEvent.delete(sessionId)` immediately
|
|
160
|
+
// after calling fireMemoryLogger, so if the snapshot were read lazily
|
|
161
|
+
// inside the chained `.then`, it would race with cleanup and the spawn
|
|
162
|
+
// would silently no-op. Capturing the payload up front decouples the
|
|
163
|
+
// session-end snapshot from the cleanup that follows.
|
|
156
164
|
let spawnChain: Promise<void> = Promise.resolve()
|
|
157
165
|
|
|
158
166
|
const fireMemoryLogger = (sessionId: string, reason: 'idle' | 'buffer-trip' | 'session-end'): Promise<void> => {
|
|
167
|
+
const last = lastIdleEvent.get(sessionId)
|
|
168
|
+
if (!last || last.parentTranscriptPath === undefined) return Promise.resolve()
|
|
169
|
+
const parentTranscriptPath = last.parentTranscriptPath
|
|
170
|
+
const payload: MemoryLoggerPayload = {
|
|
171
|
+
parentSessionId: sessionId,
|
|
172
|
+
parentTranscriptPath,
|
|
173
|
+
agentDir: ctx.agentDir,
|
|
174
|
+
...(last.origin !== undefined ? { origin: last.origin } : {}),
|
|
175
|
+
}
|
|
176
|
+
const spawnOptions = {
|
|
177
|
+
parentSessionId: sessionId,
|
|
178
|
+
...(last.origin !== undefined ? { spawnedByOrigin: last.origin } : {}),
|
|
179
|
+
}
|
|
159
180
|
const next = spawnChain
|
|
160
181
|
.catch(() => undefined)
|
|
161
182
|
.then(async () => {
|
|
162
|
-
const
|
|
163
|
-
if (!last || last.parentTranscriptPath === undefined) return
|
|
164
|
-
const payload: MemoryLoggerPayload = {
|
|
165
|
-
parentSessionId: sessionId,
|
|
166
|
-
parentTranscriptPath: last.parentTranscriptPath,
|
|
167
|
-
agentDir: ctx.agentDir,
|
|
168
|
-
...(last.origin !== undefined ? { origin: last.origin } : {}),
|
|
169
|
-
}
|
|
170
|
-
const currentSize = await readSize(last.parentTranscriptPath)
|
|
183
|
+
const currentSize = await readSize(parentTranscriptPath)
|
|
171
184
|
bytesAtLastRun.set(sessionId, currentSize)
|
|
172
185
|
ctx.logger.info(`memory-logger spawn ${sessionId} reason=${reason} transcript_bytes=${currentSize}`)
|
|
173
186
|
try {
|
|
174
|
-
await raceSpawn(
|
|
175
|
-
ctx.spawnSubagent('memory-logger', payload, {
|
|
176
|
-
parentSessionId: sessionId,
|
|
177
|
-
...(last.origin !== undefined ? { spawnedByOrigin: last.origin } : {}),
|
|
178
|
-
}),
|
|
179
|
-
spawnTimeoutMs,
|
|
180
|
-
)
|
|
187
|
+
await raceSpawn(ctx.spawnSubagent('memory-logger', payload, spawnOptions), spawnTimeoutMs)
|
|
181
188
|
} catch (err) {
|
|
182
189
|
ctx.logger.error(`memory-logger spawn failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
183
190
|
}
|
|
@@ -355,16 +362,39 @@ export default definePlugin({
|
|
|
355
362
|
ctx.logger.error(`memory-retrieval spawn failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
356
363
|
})
|
|
357
364
|
},
|
|
358
|
-
|
|
365
|
+
// The memory-logger spawn is intentionally detached (`void`) instead
|
|
366
|
+
// of awaited. The channel router calls `tearDownLive` synchronously
|
|
367
|
+
// inside `ensureLive`'s stale-rollover path (router.ts:718), and
|
|
368
|
+
// `tearDownLive` awaits `fireSessionEnd` which awaits this hook. An
|
|
369
|
+
// awaited memory-logger spawn here would block new-session creation
|
|
370
|
+
// for the full subagent runtime — observed as 22+ seconds of channel
|
|
371
|
+
// silence on a 22 KB transcript before the new session even starts
|
|
372
|
+
// its cold-start chain.
|
|
373
|
+
//
|
|
374
|
+
// Safety: `fireMemoryLogger` captures the payload synchronously from
|
|
375
|
+
// `lastIdleEvent` (see comment above), so the `delete` calls below
|
|
376
|
+
// cannot race with the chained spawn. `spawnChain` still serializes
|
|
377
|
+
// memory-logger fires per agentDir — the detached promise is queued
|
|
378
|
+
// onto the chain before this hook returns, so a subsequent fire from
|
|
379
|
+
// the new session (idle, buffer-trip, or session-end) waits for the
|
|
380
|
+
// session-end spawn to settle before running.
|
|
381
|
+
//
|
|
382
|
+
// The only durability tradeoff: if the agent process dies between
|
|
383
|
+
// this hook returning and `spawnChain` settling, the session-end
|
|
384
|
+
// memory-logger fire is lost (its transcript fragments don't make
|
|
385
|
+
// it into today's daily stream). This is already true for the idle
|
|
386
|
+
// and buffer-trip paths, which are timer-driven and fire-and-forget
|
|
387
|
+
// by design. Session JSONLs are force-committed elsewhere, so no
|
|
388
|
+
// user-visible transcript is lost — only the LLM-distilled stream
|
|
389
|
+
// fragments for the final batch.
|
|
390
|
+
'session.end': (event) => {
|
|
359
391
|
if (event.origin?.kind === 'subagent') return
|
|
360
392
|
cancelTimer(event.sessionId)
|
|
361
|
-
|
|
393
|
+
void fireMemoryLogger(event.sessionId, 'session-end')
|
|
362
394
|
const cacheFilePath = join(ctx.agentDir, 'memory', '.retrieval-cache', `${event.sessionId}.md`)
|
|
363
|
-
|
|
364
|
-
await unlink(cacheFilePath)
|
|
365
|
-
} catch (err) {
|
|
395
|
+
unlink(cacheFilePath).catch((err) => {
|
|
366
396
|
if (!isEnoent(err)) ctx.logger.warn(`[memory] failed to clean retrieval cache: ${err}`)
|
|
367
|
-
}
|
|
397
|
+
})
|
|
368
398
|
lastIdleEvent.delete(event.sessionId)
|
|
369
399
|
bytesAtLastRun.delete(event.sessionId)
|
|
370
400
|
},
|
|
@@ -31,7 +31,7 @@ export type CreateMemoryRetrievalSubagentOptions = {
|
|
|
31
31
|
|
|
32
32
|
export const MEMORY_RETRIEVAL_SYSTEM_PROMPT = `You are the memory-retrieval subagent. Read the user's most recent prompt and decide what's relevant from BOTH topic shards in \`memory/topics/\` (consolidated long-term memory) AND undreamed daily-stream events under \`memory/streams/\` (recent fragments not yet folded into shards). Use \`memory_search\` to query both surfaces; use \`read\`/\`ls\` to pull full shard bodies when needed. Synthesize a focused ≤8 KB summary of the relevant memory. Save by \`write\`ing it to the exact path provided in your payload as \`cacheFilePath\`. Be ruthlessly concise. Do NOT write anywhere else. Do NOT delete files.
|
|
33
33
|
|
|
34
|
-
Search discipline:
|
|
34
|
+
Search discipline: issue ALL your \`memory_search\` queries in a SINGLE response as parallel tool calls (up to 3 at once), then wait for every result before deciding what to do next. Different angles in parallel, NEVER one search per turn — sequential searches waste a full LLM round-trip per query (~3s each) on file I/O that takes milliseconds. Pick queries that match the user's literal phrasing — not framing vocabulary, not metadata (session ids, dates), not words from your own system prompt. If the parallel batch turns up nothing relevant, write the empty-context note and stop.`
|
|
35
35
|
|
|
36
36
|
export function memoryRetrievalExhaustedMessage(used: number, max: number): string {
|
|
37
37
|
const usedKb = Math.round(used / 1024)
|
|
@@ -246,28 +246,32 @@ async function recoverShardingOrphans(
|
|
|
246
246
|
logger: MigrationLogger,
|
|
247
247
|
git: MigrationGit | undefined,
|
|
248
248
|
): Promise<void> {
|
|
249
|
-
if (
|
|
249
|
+
if (existsSync(topicsDir(agentDir))) {
|
|
250
|
+
let cleaned = false
|
|
251
|
+
const memoryPath = rootMemoryPath(agentDir)
|
|
252
|
+
if (existsSync(memoryPath)) {
|
|
253
|
+
await unlink(memoryPath)
|
|
254
|
+
cleaned = true
|
|
255
|
+
}
|
|
250
256
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
+
const memoryDir = join(agentDir, 'memory')
|
|
258
|
+
const dates = await collectFlatJsonlDates(memoryDir)
|
|
259
|
+
for (const date of dates) {
|
|
260
|
+
if (!existsSync(streamFilePath(agentDir, date))) continue
|
|
261
|
+
await unlink(join(memoryDir, `${date}.jsonl`))
|
|
262
|
+
cleaned = true
|
|
263
|
+
}
|
|
257
264
|
|
|
258
|
-
|
|
259
|
-
const dates = await collectFlatJsonlDates(memoryDir)
|
|
260
|
-
for (const date of dates) {
|
|
261
|
-
if (!existsSync(streamFilePath(agentDir, date))) continue
|
|
262
|
-
await unlink(join(memoryDir, `${date}.jsonl`))
|
|
263
|
-
cleaned = true
|
|
265
|
+
if (cleaned) logger.info('[memory:migration] cleaned orphaned pre-shard memory files')
|
|
264
266
|
}
|
|
265
267
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
//
|
|
269
|
-
// and earlier runs of this function unlinked without committing, leaving
|
|
268
|
+
// Always called, even when nothing was cleaned this boot AND even when the
|
|
269
|
+
// sharded layout never landed on this agent: pre-#315 migrations and
|
|
270
|
+
// earlier runs of this function unlinked without committing, leaving
|
|
270
271
|
// staged deletions that survive across reboots until cleared explicitly.
|
|
272
|
+
// The earlier guard (`return` when topicsDir is absent) stranded any agent
|
|
273
|
+
// whose pre-shard files were deleted but whose sharding never completed —
|
|
274
|
+
// their staged deletions sat in the index forever.
|
|
271
275
|
await commitPendingLegacyDeletions(agentDir, logger, git)
|
|
272
276
|
}
|
|
273
277
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { readFile, appendFile, readdir, writeFile, rename } from 'node:fs/promises'
|
|
1
|
+
import { readFile, appendFile, readdir, stat, writeFile, rename } from 'node:fs/promises'
|
|
2
2
|
import { join } from 'node:path'
|
|
3
3
|
|
|
4
4
|
import { getDreamedIds, loadDreamingState } from './dreaming-state'
|
|
@@ -8,7 +8,59 @@ import { parseEventLine, type StreamEvent } from './stream-events'
|
|
|
8
8
|
const STREAM_FILE_PATTERN = /^\d{4}-\d{2}-\d{2}\.jsonl$/
|
|
9
9
|
const STREAM_DATE_FROM_FILENAME = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
|
|
10
10
|
|
|
11
|
+
// Per-file event cache. `(mtimeMs, ctimeMs, size)` is the invalidation key,
|
|
12
|
+
// mirroring `load-shards.ts`'s shard cache. The three writers in this module
|
|
13
|
+
// — `appendEvents` (memory-logger appends), `writeEventsAtomic` (dreaming
|
|
14
|
+
// compaction + migration), and any external `writeFile` — all bump mtime
|
|
15
|
+
// and/or ctime, so stat-based invalidation is sufficient without explicit
|
|
16
|
+
// hooks. ctimeMs guards metadata-preserving external edits (rsync -t,
|
|
17
|
+
// `touch -r`, restored backups, `git checkout` with timestamps): the kernel
|
|
18
|
+
// always bumps ctime on inode content changes and ctime cannot be backdated
|
|
19
|
+
// via utimes.
|
|
20
|
+
//
|
|
21
|
+
// Module-level keyed by absolute file path. One Bun process owns one agent
|
|
22
|
+
// dir in production (the container stage), so cardinality is small. Multi-
|
|
23
|
+
// path support exists because dreaming compacts multiple files per run and
|
|
24
|
+
// memory_search reads every dated stream.
|
|
25
|
+
type StreamFileCacheEntry = {
|
|
26
|
+
mtimeMs: number
|
|
27
|
+
ctimeMs: number
|
|
28
|
+
size: number
|
|
29
|
+
events: StreamEvent[]
|
|
30
|
+
}
|
|
31
|
+
const streamFileCache = new Map<string, StreamFileCacheEntry>()
|
|
32
|
+
|
|
11
33
|
export async function readEvents(path: string): Promise<StreamEvent[]> {
|
|
34
|
+
const fileStat = await statFile(path)
|
|
35
|
+
if (fileStat === null) {
|
|
36
|
+
// File disappeared since last cache populate (e.g. dreaming dropped a
|
|
37
|
+
// fully-GC'd day). Drop the entry so a future recreate gets fresh
|
|
38
|
+
// content.
|
|
39
|
+
streamFileCache.delete(path)
|
|
40
|
+
return []
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const cached = streamFileCache.get(path)
|
|
44
|
+
if (
|
|
45
|
+
cached !== undefined &&
|
|
46
|
+
cached.mtimeMs === fileStat.mtimeMs &&
|
|
47
|
+
cached.ctimeMs === fileStat.ctimeMs &&
|
|
48
|
+
cached.size === fileStat.size
|
|
49
|
+
) {
|
|
50
|
+
return cached.events
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const events = await readEventsFromDisk(path)
|
|
54
|
+
streamFileCache.set(path, {
|
|
55
|
+
mtimeMs: fileStat.mtimeMs,
|
|
56
|
+
ctimeMs: fileStat.ctimeMs,
|
|
57
|
+
size: fileStat.size,
|
|
58
|
+
events,
|
|
59
|
+
})
|
|
60
|
+
return events
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function readEventsFromDisk(path: string): Promise<StreamEvent[]> {
|
|
12
64
|
let raw: string
|
|
13
65
|
try {
|
|
14
66
|
raw = await readFile(path, 'utf-8')
|
|
@@ -34,6 +86,24 @@ export async function readEvents(path: string): Promise<StreamEvent[]> {
|
|
|
34
86
|
return events
|
|
35
87
|
}
|
|
36
88
|
|
|
89
|
+
async function statFile(path: string): Promise<{ mtimeMs: number; ctimeMs: number; size: number } | null> {
|
|
90
|
+
try {
|
|
91
|
+
const s = await stat(path)
|
|
92
|
+
return { mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs, size: s.size }
|
|
93
|
+
} catch (err) {
|
|
94
|
+
if ((err as NodeJS.ErrnoException).code === 'ENOENT') return null
|
|
95
|
+
throw err
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Test-only helper. Clears the in-memory stream-file cache so tests that
|
|
100
|
+
// exercise the cache invalidation path can simulate a cold start without
|
|
101
|
+
// spinning up a fresh process. Mirrors `__resetShardCacheForTests` in
|
|
102
|
+
// `load-shards.ts`.
|
|
103
|
+
export function __resetStreamFileCacheForTests(): void {
|
|
104
|
+
streamFileCache.clear()
|
|
105
|
+
}
|
|
106
|
+
|
|
37
107
|
export async function appendEvents(path: string, events: readonly StreamEvent[]): Promise<void> {
|
|
38
108
|
if (events.length === 0) return
|
|
39
109
|
const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')
|