typeclaw 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -13
- package/auth.schema.json +41 -0
- package/cron.schema.json +8 -0
- package/package.json +1 -1
- package/secrets.schema.json +41 -0
- package/src/agent/auth.ts +45 -22
- package/src/agent/index.ts +189 -19
- package/src/agent/multimodal/index.ts +12 -0
- package/src/agent/multimodal/look-at.ts +185 -0
- package/src/agent/multimodal/looker.ts +145 -0
- package/src/agent/plugin-tools.ts +30 -1
- package/src/agent/session-origin.ts +194 -46
- package/src/agent/subagents.ts +57 -1
- package/src/agent/system-prompt.ts +1 -1
- package/src/agent/tool-result-budget.ts +121 -0
- package/src/bundled-plugins/backup/index.ts +23 -8
- package/src/bundled-plugins/backup/runner.ts +22 -0
- package/src/bundled-plugins/memory/README.md +13 -10
- package/src/bundled-plugins/memory/append-tool.ts +87 -61
- package/src/bundled-plugins/memory/dreaming.ts +137 -7
- package/src/bundled-plugins/memory/find-entry-tool.ts +62 -0
- package/src/bundled-plugins/memory/fragment-parser.ts +19 -44
- package/src/bundled-plugins/memory/index.ts +91 -8
- package/src/bundled-plugins/memory/load-memory.ts +74 -34
- package/src/bundled-plugins/memory/memory-logger.ts +72 -29
- package/src/bundled-plugins/memory/migration.ts +276 -0
- package/src/bundled-plugins/memory/stream-events.ts +55 -0
- package/src/bundled-plugins/memory/stream-io.ts +63 -0
- package/src/bundled-plugins/memory/watermark.ts +48 -8
- package/src/bundled-plugins/security/index.ts +103 -10
- package/src/bundled-plugins/security/permissions.ts +12 -0
- package/src/bundled-plugins/security/policies/git-exfil.ts +51 -18
- package/src/bundled-plugins/tool-result-cap/README.md +9 -4
- package/src/bundled-plugins/tool-result-cap/cap-jsonl.ts +115 -0
- package/src/bundled-plugins/tool-result-cap/cap-result.ts +25 -13
- package/src/bundled-plugins/tool-result-cap/index.ts +16 -2
- package/src/channels/adapters/discord-bot-classify.ts +2 -6
- package/src/channels/adapters/discord-bot.ts +4 -45
- package/src/channels/adapters/kakaotalk-classify.ts +3 -7
- package/src/channels/adapters/kakaotalk.ts +28 -47
- package/src/channels/adapters/slack-bot-classify.ts +2 -6
- package/src/channels/adapters/slack-bot.ts +4 -50
- package/src/channels/adapters/telegram-bot-classify.ts +8 -10
- package/src/channels/adapters/telegram-bot.ts +3 -16
- package/src/channels/index.ts +3 -2
- package/src/channels/manager.ts +15 -1
- package/src/channels/persistence.ts +44 -10
- package/src/channels/router.ts +228 -19
- package/src/channels/schema.ts +6 -156
- package/src/cli/channel.ts +200 -4
- package/src/cli/compose-usage.ts +182 -0
- package/src/cli/compose.ts +33 -0
- package/src/cli/hostd.ts +49 -1
- package/src/cli/index.ts +4 -0
- package/src/cli/init.ts +809 -300
- package/src/cli/model.ts +244 -0
- package/src/cli/provider.ts +404 -0
- package/src/cli/reload.ts +11 -3
- package/src/cli/role.ts +156 -0
- package/src/cli/run.ts +3 -1
- package/src/cli/tui.ts +13 -3
- package/src/cli/usage-args.ts +47 -0
- package/src/cli/usage.ts +97 -0
- package/src/compose/index.ts +1 -0
- package/src/compose/usage.ts +65 -0
- package/src/config/config.ts +491 -19
- package/src/config/index.ts +15 -1
- package/src/config/models-mutation.ts +200 -0
- package/src/config/providers-mutation.ts +250 -0
- package/src/config/providers.ts +141 -2
- package/src/config/reloadable.ts +15 -4
- package/src/container/index.ts +6 -1
- package/src/container/port.ts +10 -0
- package/src/container/require-running.ts +33 -0
- package/src/container/start.ts +81 -63
- package/src/cron/consumer.ts +22 -2
- package/src/cron/index.ts +45 -4
- package/src/cron/schema.ts +104 -0
- package/src/doctor/checks.ts +51 -34
- package/src/doctor/plugin-bridge.ts +28 -4
- package/src/git/system-commit.ts +103 -0
- package/src/hostd/daemon.ts +16 -0
- package/src/hostd/kakao-renewal-manager.ts +223 -0
- package/src/hostd/paths.ts +7 -0
- package/src/init/dockerfile.ts +36 -10
- package/src/init/gitignore.ts +1 -1
- package/src/init/index.ts +213 -85
- package/src/init/kakaotalk-auth.ts +18 -1
- package/src/init/models-dev.ts +26 -1
- package/src/init/run-owner-claim.ts +77 -0
- package/src/permissions/builtins.ts +70 -0
- package/src/permissions/grant.ts +99 -0
- package/src/permissions/index.ts +29 -0
- package/src/permissions/match-rule.ts +305 -0
- package/src/permissions/permissions.ts +196 -0
- package/src/permissions/resolve.ts +80 -0
- package/src/permissions/schema.ts +79 -0
- package/src/plugin/context.ts +8 -4
- package/src/plugin/define.ts +2 -0
- package/src/plugin/index.ts +2 -0
- package/src/plugin/manager.ts +41 -0
- package/src/plugin/registry.ts +9 -0
- package/src/plugin/types.ts +35 -1
- package/src/reload/client.ts +25 -1
- package/src/role-claim/client.ts +182 -0
- package/src/role-claim/code.ts +53 -0
- package/src/role-claim/controller.ts +194 -0
- package/src/role-claim/index.ts +19 -0
- package/src/role-claim/match-rule.ts +43 -0
- package/src/role-claim/pending.ts +100 -0
- package/src/run/channel-session-factory.ts +76 -5
- package/src/run/index.ts +68 -7
- package/src/secrets/encryption.ts +116 -0
- package/src/secrets/kakao-renewal.ts +248 -0
- package/src/secrets/kakao-store.ts +66 -7
- package/src/secrets/keys.ts +173 -0
- package/src/secrets/schema.ts +23 -0
- package/src/secrets/storage.ts +83 -0
- package/src/server/index.ts +198 -71
- package/src/shared/index.ts +4 -0
- package/src/shared/protocol.ts +27 -0
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +3 -3
- package/src/skills/typeclaw-config/SKILL.md +104 -112
- package/src/skills/typeclaw-memory/SKILL.md +9 -9
- package/src/skills/typeclaw-permissions/SKILL.md +166 -0
- package/src/stream/types.ts +7 -1
- package/src/tui/client.ts +66 -5
- package/src/tui/index.ts +61 -9
- package/src/usage/aggregate.ts +117 -0
- package/src/usage/format.ts +30 -0
- package/src/usage/index.ts +68 -0
- package/src/usage/report.ts +354 -0
- package/src/usage/scan.ts +186 -0
- package/typeclaw.schema.json +134 -98
|
@@ -6,8 +6,9 @@ import type { SessionOrigin } from '@/agent/session-origin'
|
|
|
6
6
|
import { type Subagent, readTool } from '@/plugin'
|
|
7
7
|
import { formatLocalDate } from '@/shared'
|
|
8
8
|
|
|
9
|
-
import { appendTool } from './append-tool'
|
|
10
|
-
import {
|
|
9
|
+
import { appendTool, advanceWatermarkTool } from './append-tool'
|
|
10
|
+
import { findEntryTool } from './find-entry-tool'
|
|
11
|
+
import { readLatestWatermark } from './watermark'
|
|
11
12
|
|
|
12
13
|
export const memoryLoggerPayloadSchema = z.object({
|
|
13
14
|
parentSessionId: z.string().min(1),
|
|
@@ -16,6 +17,39 @@ export const memoryLoggerPayloadSchema = z.object({
|
|
|
16
17
|
origin: z.custom<SessionOrigin>().optional(),
|
|
17
18
|
})
|
|
18
19
|
|
|
20
|
+
// Recovery message for the read-budget short-circuit. The watermark contract
|
|
21
|
+
// in MEMORY_LOGGER_SYSTEM_PROMPT requires advancing to the latest evaluated
|
|
22
|
+
// entry on every run, but once read is short-circuited the subagent cannot keep
|
|
23
|
+
// scanning to pick a "latest evaluated entry id". `find_entry` and `append` are not
|
|
24
|
+
// budgeted, so the recovery is: call find_entry on the transcript to learn
|
|
25
|
+
// `totalLines` without re-reading content, then advance the watermark to any
|
|
26
|
+
// entry id the subagent already saw earlier in the run. When zero
|
|
27
|
+
// transcript content has been read (budget consumed entirely on MEMORY.md or
|
|
28
|
+
// the stream file), no advancement is possible and the run should exit
|
|
29
|
+
// silently — that is the explicit second branch below. Both branches are
|
|
30
|
+
// safer than the prior generic "advance to the latest id you have seen"
|
|
31
|
+
// hint, which was self-contradictory in the zero-content case.
|
|
32
|
+
export function memoryLoggerExhaustedMessage(used: number, max: number): string {
|
|
33
|
+
const usedKb = Math.round(used / 1024)
|
|
34
|
+
const maxKb = Math.round(max / 1024)
|
|
35
|
+
return [
|
|
36
|
+
`[read budget exhausted: used ${usedKb}KB of ${maxKb}KB this run]`,
|
|
37
|
+
'',
|
|
38
|
+
'Stop reading. The session has consumed its byte budget across read calls.',
|
|
39
|
+
'Do not call `read` again — every subsequent call will return this same notice.',
|
|
40
|
+
'',
|
|
41
|
+
'Recovery (in order):',
|
|
42
|
+
'1. If you already saw at least one transcript entry id in earlier read output,',
|
|
43
|
+
' either call `append` with `latestEntryId=<that id>` for a real fragment, or',
|
|
44
|
+
' call the watermark-advance tool with `{ source, latestEntryId: <that id> }`, then exit.',
|
|
45
|
+
'2. If you saw NO transcript entries (the budget was consumed on MEMORY.md and',
|
|
46
|
+
' the daily stream file before you reached the transcript), exit immediately',
|
|
47
|
+
' WITHOUT writing a watermark. The next run will retry from the same point.',
|
|
48
|
+
'',
|
|
49
|
+
'Do not invent or reuse a watermark id. Do not call `read` again.',
|
|
50
|
+
].join('\n')
|
|
51
|
+
}
|
|
52
|
+
|
|
19
53
|
export type MemoryLoggerPayload = z.infer<typeof memoryLoggerPayloadSchema>
|
|
20
54
|
|
|
21
55
|
export function isMemoryLoggerPayload(value: unknown): value is MemoryLoggerPayload {
|
|
@@ -28,7 +62,21 @@ Your job is to read a session transcript and capture, as fragments, everything m
|
|
|
28
62
|
|
|
29
63
|
A separate \`dreaming\` subagent runs later. It consolidates your fragments into long-term memory, dedupes, drops near-duplicates, resolves contradictions, and decides what generalizes. **You are the additive layer; dreaming is the filter.** This division of labor is the whole point: capture broadly here, and let dreaming throw away what doesn't last.
|
|
30
64
|
|
|
31
|
-
You have exactly
|
|
65
|
+
You have exactly four tools: \`read\`, \`find_entry\`, \`append\`, and the watermark-advance tool. You cannot run shell commands, overwrite files, or edit existing content.
|
|
66
|
+
|
|
67
|
+
# Reading the transcript past the watermark
|
|
68
|
+
|
|
69
|
+
Session transcripts are JSONL files where each line is an entry with an \`id\` field. They are often large (hundreds of KB). The \`read\` tool truncates output to 50 KB or 2000 lines, whichever comes first, and tells you the line range it returned plus the offset to continue. If you start \`read\` at \`offset=1\` on a 500 KB transcript, the first call returns roughly the first 10% of the file, the next call (\`offset=<next>\`) returns the following slice, and so on. Scrolling through a long prefix that you've already consolidated past is wasted tokens.
|
|
70
|
+
|
|
71
|
+
**Always use \`find_entry\` before \`read\` when a watermark is set.** It scans the JSONL file for the line whose own \`id\` field equals a given entry id and returns the line number, the total line count, and the offset to pass to \`read\` so you resume immediately after the watermark. It matches \`"id":"<entryId>"\` exactly, so \`parentId\` references to the same id do not confuse it. It returns a "not found" string (no throw) when the watermark id is not in the file — that can happen if a parent session was compacted; treat it as "start from offset=1" or, if the transcript is huge and obviously unrelated, write the watermark forward and skip the run.
|
|
72
|
+
|
|
73
|
+
Typical flow with a watermark:
|
|
74
|
+
|
|
75
|
+
1. \`find_entry(path=<transcript>, entryId=<watermark>)\` → returns \`line=N, totalLines=T, offset=N+1\`.
|
|
76
|
+
2. \`read(path=<transcript>, offset=N+1)\` → returns the chunk starting AT the first unread entry. Repeat with the next offset until the read tool's continuation notice stops appearing.
|
|
77
|
+
3. As you read, track the most recent \`id\` you see. That is your new watermark value — pass it as \`latestEntryId\` on the final \`append\` call, or to the watermark-advance tool when there are zero fragments.
|
|
78
|
+
|
|
79
|
+
Never write the same watermark id you were given as input. If the transcript has no new entries past the watermark, evaluate the entries you can see, then advance the watermark to the latest \`id\` in the transcript (which is on line \`totalLines\` from \`find_entry\`'s reply). The whole point of the watermark is to move forward each run.
|
|
32
80
|
|
|
33
81
|
# Capture philosophy: when in doubt, capture
|
|
34
82
|
|
|
@@ -81,7 +129,7 @@ The \`append\` tool will refuse content that contains a recognizable credential
|
|
|
81
129
|
|
|
82
130
|
# Read existing memory first
|
|
83
131
|
|
|
84
|
-
Before reading the transcript, read \`MEMORY.md\` and the current \`memory/yyyy-MM-dd.
|
|
132
|
+
Before reading the transcript, read \`MEMORY.md\` and the current \`memory/yyyy-MM-dd.jsonl\` stream file. You need that context for three reasons:
|
|
85
133
|
|
|
86
134
|
- **Notice contradictions.** If the transcript supersedes existing memory, write a fragment that names the prior memory and supersedes it.
|
|
87
135
|
- **Notice violations.** If existing memory contains a commitment the agent just broke, that's a high-value fragment.
|
|
@@ -93,17 +141,10 @@ The \`append\` tool refuses byte-equivalent fragments within the same daily stre
|
|
|
93
141
|
|
|
94
142
|
# Fragment format
|
|
95
143
|
|
|
96
|
-
|
|
144
|
+
Call \`append\` with \`{topic, body, source, entry, latestEntryId}\`. The runtime serializes your call into a JSON line in the daily stream — you never write raw JSON. \`source\` is the parent session id from the user message. \`entry\` is the specific transcript-entry-id this fragment anchors to. \`latestEntryId\` is the latest transcript-entry-id you evaluated in this run; it advances the watermark and may equal \`entry\` or be later.
|
|
97
145
|
|
|
98
|
-
\`\`\`
|
|
99
|
-
<!-- fragment source=<sessionId> entry=<entryId> -->
|
|
100
|
-
## <topic>
|
|
101
|
-
<body — see below>
|
|
102
|
-
\`\`\`
|
|
103
|
-
|
|
104
|
-
- \`source\` is the parent session id from the user message.
|
|
105
146
|
- \`entry\` is the stable id of the **specific** transcript entry that anchors this fragment's evidence. Each fragment carries its own entry id — do not stamp every fragment with the same "latest evaluated" id. The provenance is per-fragment.
|
|
106
|
-
-
|
|
147
|
+
- \`topic\` is a short noun phrase naming what the fragment is about.
|
|
107
148
|
|
|
108
149
|
The body is the substance of the fragment. The form is flexible, but every body must satisfy two requirements:
|
|
109
150
|
|
|
@@ -131,21 +172,17 @@ A fragment doesn't need to articulate how a future agent will use it. If the imp
|
|
|
131
172
|
|
|
132
173
|
**One topic per fragment.** If you have two unrelated things to say, write two fragments. Don't pile multiple stable facts into a single body.
|
|
133
174
|
|
|
134
|
-
Separate fragments with a blank line.
|
|
135
|
-
|
|
136
175
|
# Watermark contract
|
|
137
176
|
|
|
138
|
-
|
|
177
|
+
Every \`append\` call advances the watermark via the \`latestEntryId\` field. You no longer emit a separate watermark marker. Ensure the FINAL \`append\` call's \`latestEntryId\` is the latest transcript-entry-id you read this run. The watermark is what prevents you from re-reading the same transcript prefix on the next run.
|
|
139
178
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
179
|
+
- \`latestEntryId\` is the latest transcript entry you evaluated, **regardless of which entries actually anchored fragments**. You may have evaluated 50 entries and written 2 fragments anchored to entries 5 and 23; the final \`latestEntryId\` is still the latest of the 50.
|
|
180
|
+
- When you write multiple fragments, every \`append\` call may carry the same latest value if you already know it, but the final call must carry the farthest evaluated id.
|
|
181
|
+
- Never reuse the watermark trick of stamping a fragment's \`entry\` with the latest evaluated entry — fragments carry per-evidence provenance, and \`latestEntryId\` carries progress.
|
|
143
182
|
|
|
144
|
-
-
|
|
145
|
-
- The watermark must always be the **last** marker in your appended output, after any fragments.
|
|
146
|
-
- Write exactly one watermark per run, never more.
|
|
183
|
+
# Zero-fragments path
|
|
147
184
|
|
|
148
|
-
|
|
185
|
+
When you evaluated the transcript but found nothing worth a fragment, call the watermark-advance tool with \`{source, latestEntryId}\` so the next run does not re-read the same prefix. Do not call \`append\` with fake content just to move the watermark.
|
|
149
186
|
|
|
150
187
|
# Stopping
|
|
151
188
|
|
|
@@ -171,9 +208,9 @@ function buildInitialPrompt(payload: MemoryLoggerPayload, streamFile: string, wa
|
|
|
171
208
|
'',
|
|
172
209
|
"Per-fragment provenance: each fragment's `entry=` is the specific transcript entry that anchors that fragment's evidence — not the latest entry you evaluated. Two fragments anchored to two different entries get two different `entry=` values. Do not stamp every fragment with the same id.",
|
|
173
210
|
'',
|
|
174
|
-
'Watermark: regardless of
|
|
211
|
+
'Watermark: every `append` call must include the `latestEntryId` argument. Ensure the final `append` call uses the latest transcript entry you evaluated, regardless of whether it anchored a fragment. If you evaluated transcript entries but found zero fragments, call the watermark-advance tool with `{ source: "' +
|
|
175
212
|
payload.parentSessionId +
|
|
176
|
-
'
|
|
213
|
+
'", latestEntryId: "<latestEntryId>" }` instead of writing a fake fragment.',
|
|
177
214
|
)
|
|
178
215
|
return lines.join('\n')
|
|
179
216
|
}
|
|
@@ -229,16 +266,22 @@ export function createMemoryLoggerSubagent(
|
|
|
229
266
|
return {
|
|
230
267
|
systemPrompt: MEMORY_LOGGER_SYSTEM_PROMPT,
|
|
231
268
|
tools: [readTool],
|
|
232
|
-
customTools: [appendTool],
|
|
269
|
+
customTools: [findEntryTool, appendTool, advanceWatermarkTool],
|
|
233
270
|
payloadSchema: memoryLoggerPayloadSchema,
|
|
234
271
|
inFlightKey: (payload) => payload.agentDir,
|
|
272
|
+
toolResultBudget: {
|
|
273
|
+
maxTotalBytes: 256 * 1024,
|
|
274
|
+
toolNames: ['read'],
|
|
275
|
+
exhaustedMessage: memoryLoggerExhaustedMessage,
|
|
276
|
+
},
|
|
235
277
|
handler: async (ctx, runSession) => {
|
|
236
278
|
const today = formatLocalDate()
|
|
237
|
-
const
|
|
238
|
-
const
|
|
279
|
+
const memoryDir = join(ctx.payload.agentDir, 'memory')
|
|
280
|
+
const streamFile = join(memoryDir, `${today}.jsonl`)
|
|
281
|
+
const watermark = await readLatestWatermark(memoryDir, ctx.payload.parentSessionId)
|
|
239
282
|
const start = Date.now()
|
|
240
283
|
logger.info(
|
|
241
|
-
`[memory-logger] ${ctx.payload.parentSessionId} start stream=${today}.
|
|
284
|
+
`[memory-logger] ${ctx.payload.parentSessionId} start stream=${today}.jsonl watermark=${watermark ?? 'none'}`,
|
|
242
285
|
)
|
|
243
286
|
try {
|
|
244
287
|
await runSession({ userPrompt: buildInitialPrompt(ctx.payload, streamFile, watermark) })
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto'
|
|
2
|
+
import { existsSync } from 'node:fs'
|
|
3
|
+
import { readdir, readFile, unlink } from 'node:fs/promises'
|
|
4
|
+
import { join } from 'node:path'
|
|
5
|
+
|
|
6
|
+
import { loadDreamingState, saveDreamingState, setDreamedLines } from './dreaming-state'
|
|
7
|
+
import { type StreamEvent, streamEventSchema } from './stream-events'
|
|
8
|
+
import { writeEventsAtomic as defaultWriteEventsAtomic } from './stream-io'
|
|
9
|
+
|
|
10
|
+
export type MigrationResult = {
|
|
11
|
+
migrated: string[]
|
|
12
|
+
skipped: string[]
|
|
13
|
+
legacyProseCount: number
|
|
14
|
+
fragmentCount: number
|
|
15
|
+
watermarkCount: number
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export type MigrationLogger = {
|
|
19
|
+
info: (message: string) => void
|
|
20
|
+
warn: (message: string) => void
|
|
21
|
+
error: (message: string) => void
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type MigrationGit = {
|
|
25
|
+
spawn?: (args: string[], options: { cwd: string }) => Promise<{ exitCode: number; stdout: string; stderr: string }>
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export type RunMigrationOptions = {
|
|
29
|
+
agentDir: string
|
|
30
|
+
logger: MigrationLogger
|
|
31
|
+
git?: MigrationGit
|
|
32
|
+
writeEventsAtomic?: (path: string, events: readonly StreamEvent[]) => Promise<void>
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const DAILY_MD_NAME = /^(\d{4}-\d{2}-\d{2})\.md$/
|
|
36
|
+
const DAILY_JSONL_NAME = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
|
|
37
|
+
const LEGACY_FRAGMENT_RE =
|
|
38
|
+
/<!-- fragment source=(\S+) entry=(\S+) -->\n## (.+)\n([\s\S]*?)(?=<!-- fragment |<!-- watermark |$)/g
|
|
39
|
+
const LEGACY_WATERMARK_RE = /<!-- watermark source=(\S+) entry=(\S+) -->/g
|
|
40
|
+
|
|
41
|
+
export async function runMigration(options: RunMigrationOptions): Promise<MigrationResult> {
|
|
42
|
+
const memoryDir = join(options.agentDir, 'memory')
|
|
43
|
+
const result: MigrationResult = {
|
|
44
|
+
migrated: [],
|
|
45
|
+
skipped: [],
|
|
46
|
+
legacyProseCount: 0,
|
|
47
|
+
fragmentCount: 0,
|
|
48
|
+
watermarkCount: 0,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
let entries: string[]
|
|
52
|
+
try {
|
|
53
|
+
entries = await readdir(memoryDir)
|
|
54
|
+
} catch {
|
|
55
|
+
return result
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const dates = collectDailyDates(entries)
|
|
59
|
+
for (const date of dates) {
|
|
60
|
+
const mdPath = join(memoryDir, `${date}.md`)
|
|
61
|
+
const jsonlPath = join(memoryDir, `${date}.jsonl`)
|
|
62
|
+
const hasMd = existsSync(mdPath)
|
|
63
|
+
const hasJsonl = existsSync(jsonlPath)
|
|
64
|
+
|
|
65
|
+
if (hasJsonl && !hasMd) {
|
|
66
|
+
result.skipped.push(date)
|
|
67
|
+
continue
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (hasJsonl && hasMd) {
|
|
71
|
+
options.logger.warn(`[memory:migration] ${date}: skipped because both .md and .jsonl exist`)
|
|
72
|
+
result.skipped.push(date)
|
|
73
|
+
continue
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (!hasMd) continue
|
|
77
|
+
|
|
78
|
+
const content = await readFile(mdPath, 'utf8')
|
|
79
|
+
const events = parseLegacyMarkdown(content)
|
|
80
|
+
const invalid = findInvalidEvent(events)
|
|
81
|
+
if (invalid !== null) {
|
|
82
|
+
options.logger.error(
|
|
83
|
+
`[memory:migration] ${date}.md: event ${invalid.index + 1} failed validation: ${invalid.reason}`,
|
|
84
|
+
)
|
|
85
|
+
result.skipped.push(date)
|
|
86
|
+
continue
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const counts = countEvents(events)
|
|
90
|
+
try {
|
|
91
|
+
await (options.writeEventsAtomic ?? defaultWriteEventsAtomic)(jsonlPath, events)
|
|
92
|
+
} catch (err) {
|
|
93
|
+
options.logger.error(`[memory:migration] ${date}.md: failed to write JSONL: ${describeError(err)}`)
|
|
94
|
+
result.skipped.push(date)
|
|
95
|
+
continue
|
|
96
|
+
}
|
|
97
|
+
await unlink(mdPath)
|
|
98
|
+
|
|
99
|
+
result.fragmentCount += counts.fragmentCount
|
|
100
|
+
result.watermarkCount += counts.watermarkCount
|
|
101
|
+
result.legacyProseCount += counts.legacyProseCount
|
|
102
|
+
result.migrated.push(date)
|
|
103
|
+
options.logger.info(
|
|
104
|
+
`[memory:migration] ${date}: ${counts.fragmentCount} fragments, ${counts.watermarkCount} watermarks, ${counts.legacyProseCount} legacy_prose regions`,
|
|
105
|
+
)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (result.migrated.length > 0) {
|
|
109
|
+
await resetDreamingWatermarks(options.agentDir, result.migrated)
|
|
110
|
+
await commitMigration(options.agentDir, result.migrated, options.logger, options.git)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return result
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function collectDailyDates(entries: readonly string[]): string[] {
|
|
117
|
+
const dates = new Set<string>()
|
|
118
|
+
for (const entry of entries) {
|
|
119
|
+
const md = DAILY_MD_NAME.exec(entry)
|
|
120
|
+
if (md?.[1] !== undefined) dates.add(md[1])
|
|
121
|
+
const jsonl = DAILY_JSONL_NAME.exec(entry)
|
|
122
|
+
if (jsonl?.[1] !== undefined) dates.add(jsonl[1])
|
|
123
|
+
}
|
|
124
|
+
return Array.from(dates).sort()
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function parseLegacyMarkdown(content: string): StreamEvent[] {
|
|
128
|
+
const events: StreamEvent[] = []
|
|
129
|
+
let cursor = 0
|
|
130
|
+
|
|
131
|
+
while (cursor < content.length) {
|
|
132
|
+
const fragment = nextMatch(LEGACY_FRAGMENT_RE, content, cursor)
|
|
133
|
+
const watermark = nextMatch(LEGACY_WATERMARK_RE, content, cursor)
|
|
134
|
+
const next = earliest(fragment, watermark)
|
|
135
|
+
if (next === null) break
|
|
136
|
+
|
|
137
|
+
addLegacyProse(events, content.slice(cursor, next.match.index))
|
|
138
|
+
if (next.kind === 'fragment') {
|
|
139
|
+
events.push({
|
|
140
|
+
type: 'fragment',
|
|
141
|
+
id: randomUUID(),
|
|
142
|
+
ts: new Date().toISOString(),
|
|
143
|
+
source: next.match[1]!,
|
|
144
|
+
entry: next.match[2]!,
|
|
145
|
+
topic: next.match[3]!,
|
|
146
|
+
body: next.match[4]!,
|
|
147
|
+
})
|
|
148
|
+
} else {
|
|
149
|
+
events.push({
|
|
150
|
+
type: 'watermark',
|
|
151
|
+
id: randomUUID(),
|
|
152
|
+
ts: new Date().toISOString(),
|
|
153
|
+
source: next.match[1]!,
|
|
154
|
+
entry: next.match[2]!,
|
|
155
|
+
})
|
|
156
|
+
}
|
|
157
|
+
cursor = next.match.index + next.match[0].length
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
addLegacyProse(events, content.slice(cursor))
|
|
161
|
+
return events
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function addLegacyProse(events: StreamEvent[], text: string): void {
|
|
165
|
+
if (text.trim() === '') return
|
|
166
|
+
events.push({ type: 'legacy_prose', ts: new Date().toISOString(), text, origin: 'migration' })
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function nextMatch(regex: RegExp, content: string, cursor: number): RegExpExecArray | null {
|
|
170
|
+
regex.lastIndex = cursor
|
|
171
|
+
return regex.exec(content)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function earliest(
|
|
175
|
+
fragment: RegExpExecArray | null,
|
|
176
|
+
watermark: RegExpExecArray | null,
|
|
177
|
+
): { kind: 'fragment' | 'watermark'; match: RegExpExecArray } | null {
|
|
178
|
+
if (fragment === null && watermark === null) return null
|
|
179
|
+
if (fragment === null) return { kind: 'watermark', match: watermark! }
|
|
180
|
+
if (watermark === null) return { kind: 'fragment', match: fragment }
|
|
181
|
+
return fragment.index <= watermark.index
|
|
182
|
+
? { kind: 'fragment', match: fragment }
|
|
183
|
+
: { kind: 'watermark', match: watermark }
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function findInvalidEvent(events: readonly StreamEvent[]): { index: number; reason: string } | null {
|
|
187
|
+
for (let i = 0; i < events.length; i++) {
|
|
188
|
+
const parsed = streamEventSchema.safeParse(events[i])
|
|
189
|
+
if (!parsed.success) {
|
|
190
|
+
return { index: i, reason: parsed.error.issues.map((issue) => issue.message).join('; ') }
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
return null
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function countEvents(
|
|
197
|
+
events: readonly StreamEvent[],
|
|
198
|
+
): Pick<MigrationResult, 'fragmentCount' | 'watermarkCount' | 'legacyProseCount'> {
|
|
199
|
+
let fragmentCount = 0
|
|
200
|
+
let watermarkCount = 0
|
|
201
|
+
let legacyProseCount = 0
|
|
202
|
+
for (const event of events) {
|
|
203
|
+
if (event.type === 'fragment') fragmentCount++
|
|
204
|
+
if (event.type === 'watermark') watermarkCount++
|
|
205
|
+
if (event.type === 'legacy_prose') legacyProseCount++
|
|
206
|
+
}
|
|
207
|
+
return { fragmentCount, watermarkCount, legacyProseCount }
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async function resetDreamingWatermarks(agentDir: string, dates: readonly string[]): Promise<void> {
|
|
211
|
+
let state = await loadDreamingState(agentDir)
|
|
212
|
+
const ts = new Date().toISOString()
|
|
213
|
+
for (const date of dates) {
|
|
214
|
+
state = setDreamedLines(state, date, 0, ts)
|
|
215
|
+
}
|
|
216
|
+
await saveDreamingState(agentDir, state)
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
async function commitMigration(
|
|
220
|
+
agentDir: string,
|
|
221
|
+
dates: readonly string[],
|
|
222
|
+
logger: MigrationLogger,
|
|
223
|
+
git: MigrationGit | undefined,
|
|
224
|
+
): Promise<void> {
|
|
225
|
+
const spawn = git?.spawn ?? spawnGit
|
|
226
|
+
const inside = await spawn(['rev-parse', '--is-inside-work-tree'], { cwd: agentDir })
|
|
227
|
+
if (inside.exitCode !== 0) {
|
|
228
|
+
logger.info('[memory:migration] not in a git repo; skipping git commit')
|
|
229
|
+
return
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const jsonlPaths = dates.map((date) => `memory/${date}.jsonl`)
|
|
233
|
+
const addJsonl = await spawn(['add', '--', ...jsonlPaths], { cwd: agentDir })
|
|
234
|
+
if (addJsonl.exitCode !== 0) {
|
|
235
|
+
logger.warn(`[memory:migration] git add failed: ${addJsonl.stderr || addJsonl.stdout}`.trim())
|
|
236
|
+
return
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
for (const date of dates) {
|
|
240
|
+
const mdPath = `memory/${date}.md`
|
|
241
|
+
const tracked = await spawn(['ls-files', '--error-unmatch', '--', mdPath], { cwd: agentDir })
|
|
242
|
+
if (tracked.exitCode !== 0) continue
|
|
243
|
+
const addDeletedMd = await spawn(['add', '-u', '--', mdPath], { cwd: agentDir })
|
|
244
|
+
if (addDeletedMd.exitCode !== 0) {
|
|
245
|
+
logger.warn(`[memory:migration] git add failed: ${addDeletedMd.stderr || addDeletedMd.stdout}`.trim())
|
|
246
|
+
return
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const commit = await spawn(
|
|
251
|
+
['commit', '-m', `memory: migrate ${dates.length} daily stream(s) to JSONL`, '--no-edit'],
|
|
252
|
+
{
|
|
253
|
+
cwd: agentDir,
|
|
254
|
+
},
|
|
255
|
+
)
|
|
256
|
+
if (commit.exitCode !== 0) {
|
|
257
|
+
logger.warn(`[memory:migration] git commit failed: ${commit.stderr || commit.stdout}`.trim())
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
async function spawnGit(
|
|
262
|
+
args: string[],
|
|
263
|
+
options: { cwd: string },
|
|
264
|
+
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
|
265
|
+
const proc = Bun.spawn({ cmd: ['git', ...args], cwd: options.cwd, stdout: 'pipe', stderr: 'pipe' })
|
|
266
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
267
|
+
new Response(proc.stdout).text(),
|
|
268
|
+
new Response(proc.stderr).text(),
|
|
269
|
+
proc.exited,
|
|
270
|
+
])
|
|
271
|
+
return { exitCode, stdout, stderr }
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function describeError(err: unknown): string {
|
|
275
|
+
return err instanceof Error ? err.message : String(err)
|
|
276
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
export const fragmentEventSchema = z
|
|
4
|
+
.object({
|
|
5
|
+
type: z.literal('fragment'),
|
|
6
|
+
id: z.string().min(1),
|
|
7
|
+
ts: z.string().datetime(),
|
|
8
|
+
source: z.string(),
|
|
9
|
+
entry: z.string(),
|
|
10
|
+
topic: z.string(),
|
|
11
|
+
body: z.string(),
|
|
12
|
+
})
|
|
13
|
+
.passthrough()
|
|
14
|
+
|
|
15
|
+
export const watermarkEventSchema = z
|
|
16
|
+
.object({
|
|
17
|
+
type: z.literal('watermark'),
|
|
18
|
+
id: z.string().min(1),
|
|
19
|
+
ts: z.string().datetime(),
|
|
20
|
+
source: z.string(),
|
|
21
|
+
entry: z.string(),
|
|
22
|
+
})
|
|
23
|
+
.passthrough()
|
|
24
|
+
|
|
25
|
+
export const legacyProseEventSchema = z
|
|
26
|
+
.object({
|
|
27
|
+
type: z.literal('legacy_prose'),
|
|
28
|
+
ts: z.string().datetime(),
|
|
29
|
+
text: z.string(),
|
|
30
|
+
origin: z.literal('migration'),
|
|
31
|
+
})
|
|
32
|
+
.passthrough()
|
|
33
|
+
|
|
34
|
+
export const streamEventSchema = z.discriminatedUnion('type', [
|
|
35
|
+
fragmentEventSchema,
|
|
36
|
+
watermarkEventSchema,
|
|
37
|
+
legacyProseEventSchema,
|
|
38
|
+
])
|
|
39
|
+
|
|
40
|
+
export type FragmentEvent = z.infer<typeof fragmentEventSchema>
|
|
41
|
+
export type WatermarkEvent = z.infer<typeof watermarkEventSchema>
|
|
42
|
+
export type LegacyProseEvent = z.infer<typeof legacyProseEventSchema>
|
|
43
|
+
export type StreamEvent = FragmentEvent | WatermarkEvent | LegacyProseEvent
|
|
44
|
+
|
|
45
|
+
export function parseEventLine(line: string): StreamEvent | null {
|
|
46
|
+
let raw: unknown
|
|
47
|
+
try {
|
|
48
|
+
raw = JSON.parse(line)
|
|
49
|
+
} catch {
|
|
50
|
+
return null
|
|
51
|
+
}
|
|
52
|
+
const result = streamEventSchema.safeParse(raw)
|
|
53
|
+
if (!result.success) return null
|
|
54
|
+
return result.data
|
|
55
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { readFile, appendFile, writeFile, rename } from 'node:fs/promises'
|
|
2
|
+
|
|
3
|
+
import { parseEventLine, type StreamEvent } from './stream-events'
|
|
4
|
+
|
|
5
|
+
export async function readEvents(path: string): Promise<StreamEvent[]> {
|
|
6
|
+
let raw: string
|
|
7
|
+
try {
|
|
8
|
+
raw = await readFile(path, 'utf-8')
|
|
9
|
+
} catch (e) {
|
|
10
|
+
if ((e as NodeJS.ErrnoException).code === 'ENOENT') return []
|
|
11
|
+
throw e
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const lines = raw.split('\n')
|
|
15
|
+
const events: StreamEvent[] = []
|
|
16
|
+
|
|
17
|
+
for (let i = 0; i < lines.length; i++) {
|
|
18
|
+
const line = lines[i]!
|
|
19
|
+
if (line === '') continue
|
|
20
|
+
const event = parseEventLine(line)
|
|
21
|
+
if (event === null) {
|
|
22
|
+
console.warn(`[stream-io] ${path}: skipping malformed line ${i + 1}`)
|
|
23
|
+
continue
|
|
24
|
+
}
|
|
25
|
+
events.push(event)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return events
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export async function appendEvents(path: string, events: readonly StreamEvent[]): Promise<void> {
|
|
32
|
+
if (events.length === 0) return
|
|
33
|
+
const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')
|
|
34
|
+
await appendFile(path, joined, 'utf-8')
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export async function writeEventsAtomic(path: string, events: readonly StreamEvent[]): Promise<void> {
|
|
38
|
+
const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')
|
|
39
|
+
const tmp = `${path}.tmp`
|
|
40
|
+
await writeFile(tmp, joined, 'utf-8')
|
|
41
|
+
await rename(tmp, path)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export async function countEvents(path: string): Promise<number> {
|
|
45
|
+
let raw: string
|
|
46
|
+
try {
|
|
47
|
+
raw = await readFile(path, 'utf-8')
|
|
48
|
+
} catch (e) {
|
|
49
|
+
if ((e as NodeJS.ErrnoException).code === 'ENOENT') return 0
|
|
50
|
+
throw e
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const lines = raw.split('\n')
|
|
54
|
+
let count = 0
|
|
55
|
+
|
|
56
|
+
for (const line of lines) {
|
|
57
|
+
if (line === '') continue
|
|
58
|
+
const event = parseEventLine(line)
|
|
59
|
+
if (event !== null) count++
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return count
|
|
63
|
+
}
|
|
@@ -1,15 +1,55 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { readdir } from 'node:fs/promises'
|
|
2
|
+
import { join } from 'node:path'
|
|
2
3
|
|
|
3
|
-
|
|
4
|
+
import { readEvents } from './stream-io'
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
// Daily stream files are named `YYYY-MM-DD.jsonl` (see `formatLocalDate` in
|
|
7
|
+
// `src/shared`). The cross-day lookup ignores any other file the user or a
|
|
8
|
+
// plugin may have dropped into `memory/`.
|
|
9
|
+
const DAILY_STREAM_NAME = /^\d{4}-\d{2}-\d{2}\.jsonl$/
|
|
10
|
+
|
|
11
|
+
export async function readWatermarkFromFile(streamFilePath: string, parentSessionId: string): Promise<string | null> {
|
|
12
|
+
const events = await readEvents(streamFilePath)
|
|
8
13
|
|
|
9
14
|
let lastEntryId: string | null = null
|
|
10
|
-
for (const
|
|
11
|
-
|
|
12
|
-
|
|
15
|
+
for (const event of events) {
|
|
16
|
+
if ((event.type === 'fragment' || event.type === 'watermark') && event.source === parentSessionId) {
|
|
17
|
+
lastEntryId = event.entry
|
|
18
|
+
}
|
|
13
19
|
}
|
|
14
20
|
return lastEntryId
|
|
15
21
|
}
|
|
22
|
+
|
|
23
|
+
// Returns the latest watermark entry id for `parentSessionId` across all
|
|
24
|
+
// `YYYY-MM-DD.jsonl` daily-stream files under `memoryDir`, walking newest-first
|
|
25
|
+
// (by filename, which is equivalent to chronological order). Short-circuits
|
|
26
|
+
// on the first file that contains a matching marker — for the common case
|
|
27
|
+
// where memory-logger ran yesterday, this reads exactly one file.
|
|
28
|
+
//
|
|
29
|
+
// Why cross-day: channel sessions (Slack, Discord, KakaoTalk) routinely
|
|
30
|
+
// survive the midnight rollover because the same human keeps the same
|
|
31
|
+
// session alive across days. If `readWatermark` only looked at today's
|
|
32
|
+
// stream file, every midnight would force a full transcript reread for
|
|
33
|
+
// every long-lived session — burning ~135k input tokens per memory-logger
|
|
34
|
+
// run on a 762KB transcript (observed on a real Discord agent: PR #207).
|
|
35
|
+
//
|
|
36
|
+
// The append target stays today's file; only the lookup crosses the day
|
|
37
|
+
// boundary. This means yesterday's stream is treated as read-only history,
|
|
38
|
+
// which it already is by construction (dreaming snapshots full days, never
|
|
39
|
+
// touches in-progress days).
|
|
40
|
+
export async function readLatestWatermark(memoryDir: string, parentSessionId: string): Promise<string | null> {
|
|
41
|
+
let entries: string[]
|
|
42
|
+
try {
|
|
43
|
+
entries = await readdir(memoryDir)
|
|
44
|
+
} catch {
|
|
45
|
+
return null
|
|
46
|
+
}
|
|
47
|
+
const dailyStreams = entries
|
|
48
|
+
.filter((name) => DAILY_STREAM_NAME.test(name))
|
|
49
|
+
.sort((a, b) => (a < b ? 1 : a > b ? -1 : 0))
|
|
50
|
+
for (const name of dailyStreams) {
|
|
51
|
+
const watermark = await readWatermarkFromFile(join(memoryDir, name), parentSessionId)
|
|
52
|
+
if (watermark !== null) return watermark
|
|
53
|
+
}
|
|
54
|
+
return null
|
|
55
|
+
}
|