typeclaw 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +15 -9
  2. package/package.json +5 -3
  3. package/scripts/dump-system-prompt.ts +12 -1
  4. package/scripts/require-parallel.ts +41 -0
  5. package/src/agent/auth.ts +3 -3
  6. package/src/agent/index.ts +116 -14
  7. package/src/agent/live-sessions.ts +34 -0
  8. package/src/agent/multimodal/read-redirect.ts +43 -0
  9. package/src/agent/plugin-tools.ts +97 -13
  10. package/src/agent/session-meta.ts +21 -2
  11. package/src/agent/session-origin.ts +6 -13
  12. package/src/agent/subagent-completion-reminder.ts +89 -0
  13. package/src/agent/subagents.ts +3 -2
  14. package/src/agent/system-prompt.ts +49 -15
  15. package/src/bundled-plugins/explorer/explorer.ts +2 -2
  16. package/src/bundled-plugins/guard/index.ts +14 -1
  17. package/src/bundled-plugins/guard/policies/managed-config.ts +43 -13
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +37 -0
  19. package/src/bundled-plugins/guard/policies/memory-topics-delete.ts +67 -0
  20. package/src/bundled-plugins/guard/policies/memory-topics-write.ts +33 -0
  21. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -2
  22. package/src/bundled-plugins/guard/policy.ts +7 -0
  23. package/src/bundled-plugins/memory/README.md +76 -62
  24. package/src/bundled-plugins/memory/append-tool.ts +3 -2
  25. package/src/bundled-plugins/memory/citation-superset.ts +49 -11
  26. package/src/bundled-plugins/memory/citations.ts +19 -8
  27. package/src/bundled-plugins/memory/delete-tool.ts +57 -0
  28. package/src/bundled-plugins/memory/dreaming-state.ts +1 -1
  29. package/src/bundled-plugins/memory/dreaming.ts +364 -146
  30. package/src/bundled-plugins/memory/frontmatter.ts +165 -0
  31. package/src/bundled-plugins/memory/index.ts +236 -16
  32. package/src/bundled-plugins/memory/injection-plan.ts +15 -0
  33. package/src/bundled-plugins/memory/load-memory.ts +102 -103
  34. package/src/bundled-plugins/memory/load-shards.ts +156 -0
  35. package/src/bundled-plugins/memory/memory-logger.ts +16 -15
  36. package/src/bundled-plugins/memory/memory-retrieval.ts +105 -0
  37. package/src/bundled-plugins/memory/migration.ts +282 -1
  38. package/src/bundled-plugins/memory/paths.ts +42 -0
  39. package/src/bundled-plugins/memory/search-tool.ts +232 -0
  40. package/src/bundled-plugins/memory/secret-detector.ts +2 -2
  41. package/src/bundled-plugins/memory/shard-snapshot.ts +51 -0
  42. package/src/bundled-plugins/memory/slug.ts +59 -0
  43. package/src/bundled-plugins/memory/stream-io.ts +110 -1
  44. package/src/bundled-plugins/memory/strength.ts +3 -3
  45. package/src/bundled-plugins/memory/topics.ts +70 -16
  46. package/src/bundled-plugins/security/index.ts +24 -0
  47. package/src/bundled-plugins/security/permissions.ts +4 -0
  48. package/src/bundled-plugins/security/policies/cron-promotion.ts +349 -0
  49. package/src/bundled-plugins/security/policies/git-exfil.ts +2 -0
  50. package/src/bundled-plugins/security/policies/prompt-injection.ts +3 -0
  51. package/src/bundled-plugins/security/policies/role-promotion.ts +419 -0
  52. package/src/bundled-plugins/security/policies/system-prompt-leak.ts +1 -0
  53. package/src/channels/adapters/discord-bot-slash-commands.ts +186 -0
  54. package/src/channels/adapters/discord-bot.ts +163 -1
  55. package/src/channels/adapters/kakaotalk-attachment.ts +7 -17
  56. package/src/channels/adapters/kakaotalk.ts +64 -37
  57. package/src/channels/adapters/slack-bot-classify.ts +2 -27
  58. package/src/channels/adapters/slack-bot-slash-commands.ts +82 -0
  59. package/src/channels/adapters/slack-bot.ts +139 -1
  60. package/src/channels/index.ts +5 -0
  61. package/src/channels/router.ts +328 -18
  62. package/src/channels/subagent-completion-bridge.ts +84 -0
  63. package/src/cli/builtins.ts +1 -0
  64. package/src/cli/index.ts +1 -0
  65. package/src/cli/init.ts +122 -14
  66. package/src/cli/inspect.ts +151 -0
  67. package/src/cli/role.ts +7 -2
  68. package/src/cli/tunnel.ts +13 -1
  69. package/src/cli/ui.ts +25 -1
  70. package/src/config/index.ts +1 -0
  71. package/src/config/models-mutation.ts +10 -2
  72. package/src/cron/consumer.ts +1 -1
  73. package/src/init/dockerfile.ts +353 -2
  74. package/src/init/hatching.ts +5 -6
  75. package/src/init/kakaotalk-auth.ts +6 -47
  76. package/src/init/validate-api-key.ts +121 -0
  77. package/src/inspect/index.ts +213 -0
  78. package/src/inspect/label.ts +50 -0
  79. package/src/inspect/live.ts +221 -0
  80. package/src/inspect/render.ts +163 -0
  81. package/src/inspect/replay.ts +265 -0
  82. package/src/inspect/session-list.ts +160 -0
  83. package/src/inspect/types.ts +110 -0
  84. package/src/plugin/hooks.ts +23 -1
  85. package/src/plugin/index.ts +2 -0
  86. package/src/plugin/manager.ts +1 -1
  87. package/src/plugin/registry.ts +1 -1
  88. package/src/plugin/types.ts +10 -0
  89. package/src/run/channel-session-factory.ts +7 -1
  90. package/src/run/index.ts +87 -21
  91. package/src/secrets/kakao-renewal.ts +3 -47
  92. package/src/server/index.ts +241 -60
  93. package/src/shared/index.ts +4 -1
  94. package/src/shared/local-time.ts +17 -0
  95. package/src/shared/protocol.ts +49 -0
  96. package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +9 -9
  97. package/src/skills/typeclaw-claude-code/SKILL.md +83 -40
  98. package/src/skills/typeclaw-claude-code/references/stop-hook.md +2 -0
  99. package/src/skills/typeclaw-claude-code/references/tmux-driving.md +102 -16
  100. package/src/skills/typeclaw-config/SKILL.md +38 -33
  101. package/src/skills/typeclaw-cron/SKILL.md +1 -1
  102. package/src/skills/typeclaw-git/SKILL.md +2 -2
  103. package/src/skills/typeclaw-memory/SKILL.md +16 -163
  104. package/src/skills/typeclaw-permissions/SKILL.md +2 -2
  105. package/src/skills/typeclaw-plugins/SKILL.md +26 -15
  106. package/src/test-helpers/wait-for.ts +7 -1
  107. package/typeclaw.schema.json +7 -0
@@ -31,6 +31,7 @@ import type {
31
31
  ToolResult,
32
32
  } from '@/plugin'
33
33
 
34
+ import { checkImageReadRedirect } from './multimodal/read-redirect'
34
35
  import type { SessionOrigin } from './session-origin'
35
36
  import { webfetchTool } from './tools/webfetch'
36
37
  import { websearchTool } from './tools/websearch'
@@ -39,24 +40,27 @@ const ACKNOWLEDGE_GUARDS_SCHEMA = Type.Optional(
39
40
  Type.Object(
40
41
  {
41
42
  nonWorkspaceWrite: Type.Optional(Type.Boolean()),
43
+ rolePromotion: Type.Optional(Type.Boolean()),
44
+ cronPromotion: Type.Optional(Type.Boolean()),
42
45
  },
43
46
  { additionalProperties: false },
44
47
  ),
45
48
  )
46
49
 
47
- // `BuiltinToolRef.__builtinTool` strings are dual-routed when a plugin
48
- // subagent declares them: pi-coding-agent's own coding tools flow through
49
- // `createAgentSession({ tools: AgentTool[] })` (which pi treats as a strict
50
- // base-tool override exactly the declared subset becomes active), and
51
- // typeclaw's own web tools flow through `customTools: ToolDefinition[]` (the
52
- // only path pi accepts for non-pi tool definitions). Routing typeclaw tools
53
- // through `tools:` silently drops them (pi's `tools` validator rejects shapes
54
- // it doesn't recognize); routing pi tools through `customTools:` would work
55
- // but ALSO auto-injects pi's default 4 base tools (read/bash/edit/write),
56
- // widening every plugin subagent's allowlist beyond what it declared. The
57
- // dual route is the only shape that gives "subagent gets exactly what it
58
- // asked for, nothing more." See `src/agent/index.ts` `createSessionWithDispose`
59
- // for the consumer that splits the resolved arrays into the two pi fields.
50
+ // pi-coding-agent 0.67.3 contract (load-bearing for hook coverage):
51
+ // - `createAgentSession({ tools: AgentTool[] })` is ONLY a name filter for
52
+ // `initialActiveToolNames`. It does NOT swap builtin implementations.
53
+ // - `customTools: ToolDefinition[]` entries override builtins by name in
54
+ // `_refreshToolRegistry` (the registry merge writes customTools last).
55
+ //
56
+ // Consequence: to put a `tool.before` hook around pi's builtin read/bash/edit/
57
+ // write, TypeClaw must wrap them as `ToolDefinition`s and pass them via
58
+ // `customTools` not via `tools`. `wrapAgentToolAsCustomToolDefinition`
59
+ // produces those wrapped definitions; `setupSession` in `src/agent/index.ts`
60
+ // appends them whenever the session has any `tool.before` / `tool.after`
61
+ // hooks registered. Subagent narrowing still comes from `tools:` (the
62
+ // name-filter path); the wrapped customTools just replace the implementation
63
+ // underneath so subagent and channel sessions share the same hook coverage.
60
64
  type PiAgentToolName = 'read' | 'bash' | 'edit' | 'write' | 'grep' | 'find' | 'ls'
61
65
  type TypeclawToolName = 'websearch' | 'webfetch'
62
66
 
@@ -231,6 +235,10 @@ export function wrapSystemTool<TParams extends TSchema, TDetails = unknown, TSta
231
235
  if (guardResult !== undefined) {
232
236
  throw new Error(`blocked: ${guardResult.reason}`)
233
237
  }
238
+ const readGuardResult = runFinalReadGuards({ tool: tool.name, args: mutableArgs })
239
+ if (readGuardResult !== undefined) {
240
+ throw new Error(`blocked: ${readGuardResult.reason}`)
241
+ }
234
242
  stripGuardAcknowledgements(mutableArgs)
235
243
 
236
244
  const result = await tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate, ctx)
@@ -280,6 +288,10 @@ export function wrapSystemAgentTool<TParams extends TSchema, TDetails = unknown>
280
288
  if (guardResult !== undefined) {
281
289
  throw new Error(`blocked: ${guardResult.reason}`)
282
290
  }
291
+ const readGuardResult = runFinalReadGuards({ tool: tool.name, args: mutableArgs })
292
+ if (readGuardResult !== undefined) {
293
+ throw new Error(`blocked: ${readGuardResult.reason}`)
294
+ }
283
295
  stripGuardAcknowledgements(mutableArgs)
284
296
 
285
297
  const result = await tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate)
@@ -301,6 +313,74 @@ export function wrapSystemAgentTool<TParams extends TSchema, TDetails = unknown>
301
313
  }
302
314
  }
303
315
 
316
+ // Wraps a pi-coding-agent AgentTool into a ToolDefinition so it can ride in
317
+ // `customTools` and override pi's same-named builtin (see top-of-file contract
318
+ // block). The hook + guard pipeline matches `wrapSystemAgentTool`; only the
319
+ // input/output shape differs.
320
+ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDetails = unknown>(
321
+ tool: AgentTool<TParams, TDetails>,
322
+ opts: WrapSystemToolOptions,
323
+ ): ToolDefinition<TParams, TDetails> {
324
+ return piDefineTool({
325
+ name: tool.name,
326
+ label: tool.label,
327
+ description: tool.description,
328
+ parameters: withGuardAcknowledgements(tool.name, tool.parameters),
329
+ prepareArguments: tool.prepareArguments,
330
+ async execute(toolCallId, params, signal, onUpdate) {
331
+ const mutableArgs = params as Record<string, unknown>
332
+ const liveOrigin = opts.getOrigin?.()
333
+ const blockResult = await opts.hooks.runToolBefore({
334
+ tool: tool.name,
335
+ sessionId: opts.sessionId,
336
+ callId: toolCallId,
337
+ args: mutableArgs,
338
+ ...(liveOrigin !== undefined ? { origin: liveOrigin } : {}),
339
+ })
340
+ if (blockResult !== undefined) {
341
+ throw new Error(`blocked: ${blockResult.reason}`)
342
+ }
343
+ const guardResult = await runFinalWriteGuards({
344
+ tool: tool.name,
345
+ args: mutableArgs,
346
+ agentDir: opts.agentDir,
347
+ })
348
+ if (guardResult !== undefined) {
349
+ throw new Error(`blocked: ${guardResult.reason}`)
350
+ }
351
+ const readGuardResult = runFinalReadGuards({ tool: tool.name, args: mutableArgs })
352
+ if (readGuardResult !== undefined) {
353
+ throw new Error(`blocked: ${readGuardResult.reason}`)
354
+ }
355
+ stripGuardAcknowledgements(mutableArgs)
356
+
357
+ const result = await tool.execute(toolCallId, mutableArgs as Static<TParams>, signal, onUpdate)
358
+ const hookResult: ToolResult = {
359
+ content: result.content as ContentPart[],
360
+ details: result.details,
361
+ }
362
+ await opts.hooks.runToolAfter({
363
+ tool: tool.name,
364
+ sessionId: opts.sessionId,
365
+ callId: toolCallId,
366
+ result: hookResult,
367
+ })
368
+ return {
369
+ content: hookResult.content as ContentPart[],
370
+ details: hookResult.details as TDetails,
371
+ }
372
+ },
373
+ })
374
+ }
375
+
376
+ export function defaultBuiltinPiAgentTools(): AgentTool<any, any>[] {
377
+ return [piReadTool, piBashTool, piEditTool, piWriteTool, piGrepTool, piFindTool, piLsTool]
378
+ }
379
+
380
+ export function buildBuiltinPiToolOverrides(opts: WrapSystemToolOptions): ToolDefinition<any, any>[] {
381
+ return defaultBuiltinPiAgentTools().map((tool) => wrapAgentToolAsCustomToolDefinition(tool, opts))
382
+ }
383
+
304
384
  function errorResult(message: string) {
305
385
  return {
306
386
  content: [{ type: 'text' as const, text: message }],
@@ -317,6 +397,10 @@ async function runFinalWriteGuards(options: { tool: string; args: Record<string,
317
397
  )
318
398
  }
319
399
 
400
+ function runFinalReadGuards(options: { tool: string; args: Record<string, unknown> }) {
401
+ return checkImageReadRedirect(options)
402
+ }
403
+
320
404
  function withGuardAcknowledgements<TParams extends TSchema>(toolName: string, parameters: TParams): TParams {
321
405
  if (toolName !== 'write' && toolName !== 'edit') return parameters
322
406
 
@@ -9,12 +9,29 @@ export type SessionMetaPayload = {
9
9
  export type MinimalSessionOrigin =
10
10
  | { kind: 'tui' }
11
11
  | { kind: 'cron'; jobId: string; jobKind: 'prompt' | 'exec' | 'subagent' | 'handler' }
12
- | { kind: 'channel'; adapter: string; workspace: string; chat: string; thread: string | null }
12
+ | {
13
+ kind: 'channel'
14
+ adapter: string
15
+ workspace: string
16
+ // Optional human-readable names persisted alongside IDs so offline
17
+ // tooling (`typeclaw inspect`, future report commands) can render
18
+ // sessions as `Slack acme-corp/#general` instead of bare IDs without
19
+ // re-querying the adapter at runtime. Workspace/chat NAMES are not
20
+ // secrets — they are visible to any participant — and they are
21
+ // stable across reopens, so the tradeoff is one-time write cost for
22
+ // permanent offline readability. Author handles, participant lists,
23
+ // and membership counts remain dropped (those carry author identity
24
+ // and would land in `sessions/`'s auto-backup git history).
25
+ workspaceName?: string
26
+ chat: string
27
+ chatName?: string
28
+ thread: string | null
29
+ }
13
30
  | { kind: 'subagent'; subagent: string; parentSessionId: string }
14
31
 
15
32
  // Reduce a full SessionOrigin to the minimum projection persisted to disk.
16
33
  // Drops participant lists, membership counts, recursive provenance, and
17
- // platform-rendered names — none of which `typeclaw usage` reads, and all of
34
+ // author identifiers — none of which `typeclaw usage` reads, and all of
18
35
  // which would otherwise land in git history when sessions/ is auto-backed-up.
19
36
  // Kept as a separate function so the boundary between "data the LLM sees in
20
37
  // the system prompt" (full origin) and "data persisted for usage reporting"
@@ -34,7 +51,9 @@ function minimalOrigin(origin: SessionOrigin): MinimalSessionOrigin {
34
51
  kind: 'channel',
35
52
  adapter: origin.adapter,
36
53
  workspace: origin.workspace,
54
+ ...(origin.workspaceName !== undefined ? { workspaceName: origin.workspaceName } : {}),
37
55
  chat: origin.chat,
56
+ ...(origin.chatName !== undefined ? { chatName: origin.chatName } : {}),
38
57
  thread: origin.thread,
39
58
  }
40
59
  case 'subagent':
@@ -226,20 +226,13 @@ function renderChannelOrigin(
226
226
  'reply, your entire final visible response must be exactly `NO_REPLY`.',
227
227
  'Any other visible text without a channel tool call is blocked.',
228
228
  '',
229
- '**Default to ONE reply per inbound.** Send a second `channel_reply` only',
230
- 'when the user genuinely benefits from it:',
229
+ '**One substantive reply per inbound.** If the answer needs more than one',
230
+ 'tool call, send a one-line ack first ("On it."), keep working, then send',
231
+ 'the answer — both in the same turn. The ack is not your reply; the answer',
232
+ 'is. Once the answer lands, end your turn.',
231
233
  '',
232
- '- the user asked multiple distinct things and each deserves its own',
233
- ' scoped answer,',
234
- '- your reply exceeds the platform message limit and must be chunked,',
235
- '- you need to post an attachment AND commentary on it on Discord (on',
236
- ' Slack, pass `text` and `attachments` in a single `channel_reply` call),',
237
- '- you are emitting progress updates during a long-running task and the',
238
- ' channel would otherwise sit silent.',
239
- '',
240
- 'Do NOT send a second reply just to rephrase, restate, summarize, or',
241
- '"confirm in plain language" something you already said. After the first',
242
- 'reply lands, end your turn — the user will respond if they want more.',
234
+ 'Do not send a second reply just to rephrase, restate, or "confirm in',
235
+ 'plain language" something you already said.',
243
236
  '',
244
237
  'To reply in this conversation, call `channel_reply({ text })`. Addressing',
245
238
  `is filled in from this session, including the thread${origin.thread !== null ? '' : ' (none here — this is a channel-root session)'}, so you don't`,
@@ -0,0 +1,89 @@
1
+ // Shared renderer for the `<system-reminder>` block injected into a parent
2
+ // session's prompt queue when one of its backgrounded subagents finishes.
3
+ // Used by the TUI route in src/server/index.ts and the channel-router
4
+ // bridge so the model sees identical wording across origins. The
5
+ // `channel` knob is the only per-origin difference: channel sessions
6
+ // need the "end your reply via channel_reply" nudge because plain-text
7
+ // output is invisible there AND the reminder is not a user message —
8
+ // the channel origin block's MUST-call-channel_reply rule is keyed to
9
+ // user messages, so a model that reads the spec literally would
10
+ // otherwise leave the reply un-sent.
11
+
12
+ export type CompletionReminderArgs = {
13
+ subagent: string
14
+ taskId: string
15
+ ok: boolean
16
+ durationMs: number
17
+ error?: string
18
+ channel?: boolean
19
+ }
20
+
21
+ const CHANNEL_REPLY_NUDGE =
22
+ 'This reminder is a system message, not a user inbound — but you are in a channel session, ' +
23
+ 'so end your turn via `channel_reply` (or `channel_send`) to surface the result. ' +
24
+ 'Plain-text output is invisible here. If there is genuinely nothing to surface, end with `NO_REPLY`.'
25
+
26
+ export function renderSubagentCompletionReminder(args: CompletionReminderArgs): string {
27
+ const durationStr = formatReminderDuration(args.durationMs)
28
+ const channelTail = args.channel === true ? ` ${CHANNEL_REPLY_NUDGE}` : ''
29
+ if (args.ok) {
30
+ return (
31
+ `<system-reminder>\n` +
32
+ `Subagent \`${args.subagent}\` (${args.taskId}) completed in ${durationStr}. ` +
33
+ `Use subagent_output to fetch the result.${channelTail}\n` +
34
+ `</system-reminder>`
35
+ )
36
+ }
37
+ const err = args.error ?? 'unknown error'
38
+ return (
39
+ `<system-reminder>\n` +
40
+ `Subagent \`${args.subagent}\` (${args.taskId}) FAILED after ${durationStr}: ${err}. ` +
41
+ `Use subagent_output to inspect.${channelTail}\n` +
42
+ `</system-reminder>`
43
+ )
44
+ }
45
+
46
+ export function formatReminderDuration(ms: number): string {
47
+ if (ms < 1000) return `${ms}ms`
48
+ const totalSec = Math.floor(ms / 1000)
49
+ if (totalSec < 60) return `${totalSec}s`
50
+ const min = Math.floor(totalSec / 60)
51
+ const sec = totalSec % 60
52
+ return `${min}m${sec}s`
53
+ }
54
+
55
+ export type SubagentCompletedPayload = {
56
+ taskId: string
57
+ subagent: string
58
+ parentSessionId: string
59
+ ok: boolean
60
+ durationMs: number
61
+ error?: string
62
+ }
63
+
64
+ // Type guard for the `subagent.completed` broadcast payload. Subscribers
65
+ // to `target: { kind: 'broadcast' }` see every broadcast; this guard
66
+ // filters and narrows in one place so callers don't repeat the
67
+ // typeof-checking dance.
68
+ export function parseSubagentCompletedPayload(payload: unknown): SubagentCompletedPayload | null {
69
+ if (payload === null || typeof payload !== 'object') return null
70
+ const p = payload as {
71
+ kind?: unknown
72
+ taskId?: unknown
73
+ subagent?: unknown
74
+ parentSessionId?: unknown
75
+ ok?: unknown
76
+ durationMs?: unknown
77
+ error?: unknown
78
+ }
79
+ if (p.kind !== 'subagent.completed') return null
80
+ if (typeof p.parentSessionId !== 'string') return null
81
+ return {
82
+ taskId: typeof p.taskId === 'string' ? p.taskId : '<unknown>',
83
+ subagent: typeof p.subagent === 'string' ? p.subagent : 'subagent',
84
+ parentSessionId: p.parentSessionId,
85
+ ok: p.ok === true,
86
+ durationMs: typeof p.durationMs === 'number' ? p.durationMs : 0,
87
+ ...(typeof p.error === 'string' ? { error: p.error } : {}),
88
+ }
89
+ }
@@ -206,12 +206,13 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
206
206
  hooks && sessionId !== undefined && agentDir !== undefined
207
207
  ? { sessionId, agentDir, ...(origin !== undefined ? { origin } : {}) }
208
208
  : undefined
209
+ const userPromptForTurn = override?.userPrompt ?? options.userPrompt
209
210
  try {
210
211
  if (hooks && turnEvent !== undefined) {
211
- await hooks.runSessionTurnStart(turnEvent)
212
+ await hooks.runSessionTurnStart({ ...turnEvent, userPrompt: userPromptForTurn })
212
213
  }
213
214
  try {
214
- await session.prompt(override?.userPrompt ?? options.userPrompt)
215
+ await session.prompt(userPromptForTurn)
215
216
  } finally {
216
217
  if (hooks && turnEvent !== undefined) {
217
218
  await hooks.runSessionTurnEnd(turnEvent)
@@ -1,3 +1,5 @@
1
+ import { formatLocalDateTime, resolveLocalTimezoneName } from '@/shared'
2
+
1
3
  export const DEFAULT_SYSTEM_PROMPT = `You are a general-purpose AI agent running inside TypeClaw.
2
4
 
3
5
  TypeClaw is domain-agnostic — your purpose is defined by \`IDENTITY.md\`, your character by \`SOUL.md\`, and your operating manual by \`AGENTS.md\`. This system prompt only describes the runtime around you.
@@ -8,22 +10,22 @@ TypeClaw is domain-agnostic — your purpose is defined by \`IDENTITY.md\`, your
8
10
  - **SOUL.md** *(always injected below)* — your character, tone, voice. Edit rarely.
9
11
  - **USER.md** *(read on demand)* — what you know about the user. Update as you learn.
10
12
  - **AGENTS.md** *(read on demand)* — your operating manual. Read at the start of any non-trivial task and re-read whenever process is unclear.
11
- - **MEMORY.md** *(always injected below, READ-ONLY)* — long-term memory, owned by the dreaming subagent. To capture something memorable, surface it in your reply or in \`memory/\` daily streams; never edit MEMORY.md directly.
13
+ - **\`memory/topics/\`** *(always injected below, READ-ONLY)* — sharded long-term memory, owned by the dreaming subagent. To capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`; never edit memory shards directly.
12
14
 
13
- If a task reveals durable guidance or identity/user context, update the owning file (IDENTITY / SOUL / USER / AGENTS) — never MEMORY.md.
15
+ If a task reveals durable guidance or identity/user context, update the owning file (IDENTITY / SOUL / USER / AGENTS) — never memory shards.
14
16
 
15
17
  ## Your workspace
16
18
 
17
19
  - **\`workspace/\`** — your free-write zone for drafts, scratch work, generated artifacts. Do not create files at the agent-folder root unless the user explicitly asks.
18
20
  - **\`sessions/\`** — transcripts of past conversations. Runtime-managed; don't write here.
19
- - **\`memory/\`** *(undreamed daily streams injected below)* — dated streams written by the memory-logger between sessions. Runtime-owned.
21
+ - **\`memory/streams/\`** *(not injected reach via \`memory_search\`)* — dated streams written by the memory-logger between sessions. Runtime-owned. Undreamed observations are searchable on demand instead of injected into every prompt.
20
22
  - **\`memory/skills/\`** — muscle-memory skills written by the dreaming subagent. Auto-loaded; don't write here directly.
21
23
  - **\`.agents/skills/\`** — user-installed skills.
22
24
 
23
25
  ## Configuration
24
26
 
25
27
  - **\`typeclaw.json\`** — runtime config. Read when needed.
26
- - **\`.env\`** and **\`secrets.json\`** — secrets (API keys, tokens, OAuth credentials). Gitignored. Never echo, log, or commit these values.
28
+ - **\`secrets.json\`** — canonical store for API keys, channel tokens, and OAuth credentials. Gitignored. Written by \`typeclaw init\` and the OAuth refresh path; never edit by hand unless rotating a credential. \`.env\` is the legacy/env-override path (env wins if set) but is no longer where new typeclaw secrets live. Never echo, log, or commit either file's values.
27
29
 
28
30
  ## Execution bias
29
31
 
@@ -39,13 +41,13 @@ Your agent folder is a git repository.
39
41
 
40
42
  - Commit any files you created, edited, or deleted before declaring a task done. One logical change = one commit; split unrelated changes.
41
43
  - Use \`git add <paths>\` (not \`git add -A\`). Imperative commit messages ("Update SOUL.md to be less formal"); explain *why* in the body if non-obvious.
42
- - Never commit \`.env\`, \`secrets.json\`, or anything under \`workspace/\` — truly-ignored by design. \`sessions/\` and \`memory/\` are gitignored but runtime-committed; don't \`git add\` them.
44
+ - Never commit \`secrets.json\`, \`.env\`, or anything under \`workspace/\` — truly-ignored by design. \`sessions/\` and \`memory/\` are gitignored but runtime-committed; don't \`git add\` them.
43
45
  - Never \`git push\`, \`git reset --hard\`, \`git rebase\`, or rewrite remote history unless the user explicitly asks.
44
46
 
45
47
  ## How to behave
46
48
 
47
49
  - Match the user's register. If SOUL.md specifies a voice, use it. Otherwise, be concise and direct, without filler or flattery.
48
- - Prefer reading files over guessing — IDENTITY / SOUL / USER / MEMORY / AGENTS or the workspace. Follow AGENTS.md in whatever role IDENTITY.md assigns you; propose additions to AGENTS.md when you find gaps worth codifying.
50
+ - Prefer reading files over guessing — IDENTITY / SOUL / USER / memory topics / AGENTS or the workspace. Follow AGENTS.md in whatever role IDENTITY.md assigns you; propose additions to AGENTS.md when you find gaps worth codifying.
49
51
  - Answer questions. Do work. Don't over-explain unless asked.
50
52
  - If a request is ambiguous in a way that doubles the effort, ask one clarifying question; otherwise proceed with a reasonable default.
51
53
  - Never suppress errors to make things "work", and never fabricate results. Report failures clearly.
@@ -60,7 +62,7 @@ There are two delegation modes. Pick deliberately.
60
62
 
61
63
  When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; gather results then answer the user. Do NOT poll \`subagent_output\` in a tight loop.
62
64
 
63
- The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), MEMORY.md and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
65
+ The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
64
66
 
65
67
  The bundled \`scout\` subagent is its external counterpart — web research only. Use it when you need information from public sources (docs, library references, vendor changelogs, news, anything not already in this agent's folder). Scout runs \`websearch\` and \`webfetch\` in a fresh context window so the search churn does not pollute yours; it returns a citation-backed answer with a confidence rating. Prefer scout over running \`websearch\`/\`webfetch\` yourself when the research is non-trivial (more than 1-2 queries) or when you want to save your context for the synthesis step.
66
68
 
@@ -68,9 +70,11 @@ The bundled \`scout\` subagent is its external counterpart — web research only
68
70
 
69
71
  When the user hands you a task that will take minutes (a multi-step browser session, a long build, a complex external operation), acknowledge in plain language ("Alright, running that in the background — I'll let you know when it's done"), spawn one subagent with \`run_in_background: true\`, then KEEP TALKING. Stay available for follow-ups, related questions, parallel small tasks. When the completion reminder lands, weave the result into your next reply naturally. If the conversation has gone idle, proactively message the user with the result rather than waiting.
70
72
 
71
- Before you start an inline operation you expect to take more than ~30 seconds a chain of \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that hits a slow API or scrapes a site, an \`agent-browser\` session, any "fetch N things in a loop" pause and ask whether a subagent should run it instead. Inline long calls block the user from talking to you and pollute your context window with intermediate output; \`scout\` (for research) or \`operator\` (for actions with side effects) keeps the conversation responsive and returns a clean summary. The exception is a single quick call (one \`webfetch\` of a known URL, one \`websearch\` query you already know the shape of) do those inline.
73
+ In a channel session, the completion \`<system-reminder>\` is NOT a user message the channel origin's "you MUST call \`channel_reply\` for every user message" rule does not literally apply, but the underlying constraint does: plain-text output is invisible in a channel. Surface the result via \`channel_reply\` (or \`channel_send\`) so the user actually sees it. Failures need surfacing too: when a delegated task didn't complete, the user needs the outcome and whatever partial progress you got. \`NO_REPLY\` is the escape hatch only when the user has already seen the substantive answer typically because you posted it via \`channel_reply\` in the same turn that spawned the subagent, and the reminder is purely confirming completion of a step the user is already tracking. Otherwise, post the result.
74
+
75
+ Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
72
76
 
73
- The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
77
+ The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
74
78
 
75
79
  **Status queries**
76
80
 
@@ -117,6 +121,36 @@ export function renderRuntimeBlock(version: string): string {
117
121
  TypeClaw runtime version: ${version}.`
118
122
  }
119
123
 
124
+ // Wall-clock anchor for the agent. Without this, models hallucinate the
125
+ // current time (typically defaulting to a UTC-shaped guess from training
126
+ // data), which surfaces as confidently-wrong replies like "it's 6am" when
127
+ // the actual wall-clock is 15:11 +09:00. The container's clock is correct
128
+ // — `-e TZ=<host-tz>` propagation makes `new Date()` resolve to host local
129
+ // time — but the model never sees that value unless we put it in the
130
+ // prompt.
131
+ //
132
+ // Positioned as the very last block of the system prompt (after memory)
133
+ // because it changes on every session creation, which is more frequent
134
+ // than any other section: memory changes per dreaming/memory-logger cycle,
135
+ // gitNudge changes per session, but `now` changes per second. Pinning it
136
+ // to the tail means every byte UP TO this block stays in the provider's
137
+ // cache prefix across session resurrections, and only the trailing ~60
138
+ // bytes invalidate.
139
+ //
140
+ // The model still needs to know this is a session-creation snapshot, not
141
+ // a live clock: long-lived channel sessions can outlive the stamp by
142
+ // hours, and the resource loader is not re-rendered per turn (see the
143
+ // CreateSessionOptions doc at the top of src/agent/index.ts). The prose
144
+ // names the snapshot semantics and tells the model how to get a fresh
145
+ // reading when it matters (run `date` via bash).
146
+ export function renderNowBlock(now: Date): string {
147
+ const iso = formatLocalDateTime(now)
148
+ const zone = resolveLocalTimezoneName()
149
+ return `## Now
150
+
151
+ Session started at \`${iso}\` (${zone}). This is a session-creation snapshot, not a live clock — the value above does not advance during this session. If you need the current wall-clock time precisely (e.g. before scheduling a cron, replying with "it's 3pm", or computing a deadline), run \`date\` via bash instead of trusting this stamp; the container's timezone is set to the host's, so \`date\` returns the user's local time.`
152
+ }
153
+
120
154
  // Compact replacement for DEFAULT_SYSTEM_PROMPT, used by non-interactive
121
155
  // sessions (cron jobs, and default subagents that don't supply their own
122
156
  // `systemPromptOverride`). The full prompt is ~2155 tokens of operator-facing
@@ -127,16 +161,16 @@ TypeClaw runtime version: ${version}.`
127
161
  // What stays here is what survives without a human backstop, plus what no
128
162
  // runtime guard catches today:
129
163
  // 1. Runtime identity — names TypeClaw so the model can self-report.
130
- // 2. .env redaction — the one safety rule that compounds silently if dropped.
164
+ // 2. secrets.json/.env redaction — the one safety rule that compounds silently if dropped.
131
165
  // 3. Error/result honesty — the highest-risk drop. Unattended cron that
132
166
  // fabricates success or swallows errors damages real state. The security
133
167
  // plugin does not catch this.
134
168
  // 4. Output discipline — keeps tool-call narration from bloating the
135
169
  // ever-growing transcript that the next memory-logger pass has to read.
136
- // 5. Filesystem hygiene — workspace boundary, MEMORY.md ownership, and
137
- // runtime-managed paths (.env / sessions/ / memory/ / workspace/). The
170
+ // 5. Filesystem hygiene — workspace boundary, memory-shard ownership, and
171
+ // runtime-managed paths (secrets.json / .env / sessions/ / memory/ / workspace/). The
138
172
  // guard plugin blocks non-workspace writes for write/edit, but it
139
- // explicitly allows MEMORY.md writes and does not gate bash/git on the
173
+ // does not gate bash/git on the
140
174
  // runtime-managed paths.
141
175
  //
142
176
  // What does NOT live here, by design:
@@ -151,12 +185,12 @@ TypeClaw runtime version: ${version}.`
151
185
  // to maintain its agent folder over time, and conversational register matters.
152
186
  export const SLIM_SYSTEM_PROMPT = `You are an AI agent running inside TypeClaw.
153
187
 
154
- Never echo secrets from \`.env\` or \`secrets.json\`, or any credential you see in the environment. Never include them in tool calls, logs, or commit messages.
188
+ Never echo secrets from \`secrets.json\` or \`.env\`, or any credential you see in the environment. Never include them in tool calls, logs, or commit messages.
155
189
 
156
190
  Never suppress errors to make things "work", and never fabricate results. If something fails, report the failure clearly so the next run or the operator can act on it.
157
191
 
158
192
  Do not narrate routine, low-risk tool calls — just call the tool. Do not over-explain what you did unless asked.
159
193
 
160
- Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`MEMORY.md\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or in \`memory/\` daily streams. Never stage or commit \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
194
+ Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
161
195
 
162
196
  See the session-origin block below for what kind of session this is and what's expected of you.`
@@ -9,7 +9,7 @@ You are STRICTLY PROHIBITED from:
9
9
  - Creating, modifying, or deleting files
10
10
  - Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, npm install, pip install, or any write operation
11
11
  - Starting long-running background processes
12
- - Writing to MEMORY.md, sessions/, workspace/, or any other runtime-managed path
12
+ - Writing to memory/topics/, memory/streams/, sessions/, workspace/, or any other runtime-managed path
13
13
  - Spawning further subagents — you are at the end of the delegation chain
14
14
 
15
15
  Your role is EXCLUSIVELY to search and analyze existing local state.
@@ -32,7 +32,7 @@ The agent folder is mounted at \`/agent\` inside the container. Search the narro
32
32
 
33
33
  1. **Codebase** — \`/agent/\` root and subdirs (excluding the runtime-managed paths below). Source files, docs, identity files (\`IDENTITY.md\`, \`SOUL.md\`, \`USER.md\`, \`AGENTS.md\`).
34
34
  2. **Sessions** — \`/agent/sessions/*.jsonl\` — conversation transcripts. Each line is a JSON event (user message, tool call, tool result, assistant message). Filename pattern \`\${ISO_TIMESTAMP}_\${UUID}.jsonl\`. \`grep\` works directly on the JSONL.
35
- 3. **Memory** — \`/agent/MEMORY.md\` (long-term consolidated memory) and \`/agent/memory/yyyy-MM-dd.jsonl\` (daily fragment streams written by the memory-logger subagent). \`memory/.dreaming-state.json\` tracks the dreaming watermark. Do NOT edit any of these — they are runtime-owned.
35
+ 3. **Memory** — \`/agent/memory/topics/*.md\` (long-term topic shards) and \`/agent/memory/streams/yyyy-MM-dd.jsonl\` (daily fragment streams written by the memory-logger subagent). \`memory/.dreaming-state.json\` tracks the dreaming watermark. Do NOT edit any of these — they are runtime-owned.
36
36
  4. **Muscle-memory skills** — \`/agent/memory/skills/<name>/SKILL.md\` — procedures the dreaming subagent distilled from repeated work.
37
37
  5. **User-installed skills** — \`/agent/.agents/skills/<name>/SKILL.md\` — hand-authored or downloaded skills.
38
38
  6. **Workspace** — \`/agent/workspace/\` — the agent's free-write zone. Drafts, scratch work, generated artifacts.
@@ -2,6 +2,7 @@ import { definePlugin } from '@/plugin'
2
2
 
3
3
  import {
4
4
  checkManagedConfigGuard,
5
+ checkMemoryTopicsDeleteGuard,
5
6
  checkNonWorkspaceWriteGuard,
6
7
  checkSkillAuthoringGuard,
7
8
  checkUncommittedChangesAdvice,
@@ -23,7 +24,19 @@ export default definePlugin({
23
24
  agentDir: ctx.agentDir,
24
25
  })
25
26
  if (skillResult) return skillResult
26
- return checkNonWorkspaceWriteGuard({ tool: event.tool, args: event.args, agentDir: ctx.agentDir })
27
+ const memoryTopicsDeleteResult = checkMemoryTopicsDeleteGuard({
28
+ tool: event.tool,
29
+ args: event.args,
30
+ agentDir: ctx.agentDir,
31
+ origin: event.origin,
32
+ })
33
+ if (memoryTopicsDeleteResult) return memoryTopicsDeleteResult
34
+ return checkNonWorkspaceWriteGuard({
35
+ tool: event.tool,
36
+ args: event.args,
37
+ agentDir: ctx.agentDir,
38
+ origin: event.origin,
39
+ })
27
40
  },
28
41
  'tool.after': async (event, ctx) => {
29
42
  await checkUncommittedChangesAdvice({
@@ -39,19 +39,31 @@ export async function checkManagedConfigGuard(options: {
39
39
  }
40
40
  }
41
41
 
42
+ // Oracle PR #305 findings #5 and #6: identity-based managed-file
43
+ // detection. The earlier shape compared `basename(realpath(target))` to
44
+ // the managed-file list, which missed two attacks: (5) a symlink at
45
+ // agent root `typeclaw.json -> workspace/tc.json` realpathed to a name
46
+ // outside the managed list, and (6) on case-insensitive filesystems,
47
+ // `TYPECLAW.JSON` addresses the same file as `typeclaw.json` but
48
+ // basename string-equality missed the casing variant.
49
+ //
50
+ // New shape: for each managed-file name, compute the canonical agent-
51
+ // root path and compare against the target. We accept if EITHER the
52
+ // lexical paths match OR they realpath to the same file. Branch (a)
53
+ // covers symlinks and case-aliased filesystems; branch (b) keeps the
54
+ // canonical lexical name authoritative even before the file exists
55
+ // (first-init writes).
42
56
  async function resolveManagedTarget(agentDir: string, targetPath: string): Promise<{ file: ManagedFile } | undefined> {
43
57
  const resolvedAgentDir = path.resolve(agentDir)
44
- const realAgentDir = await resolveRealIntendedPath(resolvedAgentDir)
45
- const realTargetPath = await resolveRealIntendedPath(targetPath)
46
-
47
- if (path.dirname(realTargetPath) !== realAgentDir) return undefined
48
-
49
- const basename = path.basename(realTargetPath)
50
- return isManagedFile(basename) ? { file: basename } : undefined
51
- }
52
-
53
- function isManagedFile(basename: string): basename is ManagedFile {
54
- return MANAGED_FILES.has(basename as ManagedFile)
58
+ const resolvedTarget = path.resolve(targetPath)
59
+ for (const file of MANAGED_FILES) {
60
+ const canonical = path.join(resolvedAgentDir, file)
61
+ if (canonical === resolvedTarget) return { file }
62
+ const realCanonical = await resolveRealIntendedPath(canonical)
63
+ const realTarget = await resolveRealIntendedPath(resolvedTarget)
64
+ if (realCanonical === realTarget) return { file }
65
+ }
66
+ return undefined
55
67
  }
56
68
 
57
69
  function validateManagedContent(file: ManagedFile, content: string): { ok: true } | { ok: false; reason: string } {
@@ -81,6 +93,20 @@ async function intendedContent(
81
93
  return blockReason(tool, targetPath, 'edit calls must include an edits array')
82
94
  }
83
95
 
96
+ // Oracle PR #305 finding #4: refuse multi-edit on managed files to
97
+ // avoid simulator-vs-pi divergence. The canonical workflow for
98
+ // typeclaw.json / cron.json is read + modify in memory + write the
99
+ // whole file back; multi-edit is not required and the divergence
100
+ // would let an attacker validate a different final file here than
101
+ // the one pi actually writes.
102
+ if (edits.length > 1) {
103
+ return blockReason(
104
+ tool,
105
+ targetPath,
106
+ 'multi-edit calls on managed files are refused — use `write` with full content instead',
107
+ )
108
+ }
109
+
84
110
  let content: string
85
111
  try {
86
112
  content = await readFile(targetPath, 'utf8')
@@ -100,10 +126,14 @@ async function intendedContent(
100
126
  if (oldText.length === 0) {
101
127
  return blockReason(tool, targetPath, 'edit oldText must not be empty')
102
128
  }
103
- if (!content.includes(oldText)) {
129
+ const firstIdx = content.indexOf(oldText)
130
+ if (firstIdx === -1) {
104
131
  return blockReason(tool, targetPath, 'edit oldText was not found in existing file')
105
132
  }
106
- content = content.replace(oldText, newText)
133
+ if (content.indexOf(oldText, firstIdx + 1) !== -1) {
134
+ return blockReason(tool, targetPath, 'edit oldText is not unique in the existing file')
135
+ }
136
+ content = content.slice(0, firstIdx) + newText + content.slice(firstIdx + oldText.length)
107
137
  }
108
138
  return { content }
109
139
  }