typeclaw 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ import type { SessionOrigin } from './session-origin'
2
+
3
+ export const SESSION_META_CUSTOM_TYPE = 'typeclaw.session-meta'
4
+
5
+ export type SessionMetaPayload = {
6
+ origin: MinimalSessionOrigin
7
+ }
8
+
9
+ export type MinimalSessionOrigin =
10
+ | { kind: 'tui' }
11
+ | { kind: 'cron'; jobId: string; jobKind: 'prompt' | 'exec' | 'subagent' }
12
+ | { kind: 'channel'; adapter: string; workspace: string; chat: string; thread: string | null }
13
+ | { kind: 'subagent'; subagent: string; parentSessionId: string }
14
+
15
+ // Reduce a full SessionOrigin to the minimum projection persisted to disk.
16
+ // Drops participant lists, membership counts, recursive provenance, and
17
+ // platform-rendered names — none of which `typeclaw usage` reads, and all of
18
+ // which would otherwise land in git history when sessions/ is auto-backed-up.
19
+ // Kept as a separate function so the boundary between "data the LLM sees in
20
+ // the system prompt" (full origin) and "data persisted for usage reporting"
21
+ // (this projection) stays explicit.
22
+ export function sessionMetaPayload(origin: SessionOrigin): SessionMetaPayload {
23
+ return { origin: minimalOrigin(origin) }
24
+ }
25
+
26
+ function minimalOrigin(origin: SessionOrigin): MinimalSessionOrigin {
27
+ switch (origin.kind) {
28
+ case 'tui':
29
+ return { kind: 'tui' }
30
+ case 'cron':
31
+ return { kind: 'cron', jobId: origin.jobId, jobKind: origin.jobKind }
32
+ case 'channel':
33
+ return {
34
+ kind: 'channel',
35
+ adapter: origin.adapter,
36
+ workspace: origin.workspace,
37
+ chat: origin.chat,
38
+ thread: origin.thread,
39
+ }
40
+ case 'subagent':
41
+ return { kind: 'subagent', subagent: origin.subagent, parentSessionId: origin.parentSessionId }
42
+ }
43
+ }
@@ -5,6 +5,7 @@ import type { HookBus } from '@/plugin'
5
5
  import type { Stream, Unsubscribe } from '@/stream'
6
6
 
7
7
  import { type AgentSession, createSession } from './index'
8
+ import { subscribeProviderErrors } from './provider-error'
8
9
  import type { SessionOrigin } from './session-origin'
9
10
  import type { ToolResultBudget } from './tool-result-budget'
10
11
 
@@ -134,6 +135,7 @@ export type InvokeSubagentOptions = {
134
135
  parentSessionId?: string
135
136
  spawnedByRole?: string
136
137
  spawnedByOrigin?: SessionOrigin
138
+ onProviderError?: (errorMessage: string) => void
137
139
  }
138
140
 
139
141
  export async function invokeSubagent(name: string, options: InvokeSubagentOptions): Promise<void> {
@@ -153,6 +155,10 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
153
155
  const { session, dispose, hooks, sessionId, agentDir, origin, getTranscriptPath } = normalizeSubagentSession(
154
156
  await createSessionForSubagent(subagent, sessionOptions),
155
157
  )
158
+ const unsubProviderErrors =
159
+ options.onProviderError !== undefined
160
+ ? subscribeProviderErrors(session, (err) => options.onProviderError!(err.message))
161
+ : null
156
162
  const turnEvent =
157
163
  hooks && sessionId !== undefined && agentDir !== undefined
158
164
  ? { sessionId, agentDir, ...(origin !== undefined ? { origin } : {}) }
@@ -177,6 +183,7 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
177
183
  })
178
184
  }
179
185
  } finally {
186
+ unsubProviderErrors?.()
180
187
  if (hooks && sessionId !== undefined) {
181
188
  await hooks.runSessionEnd({ sessionId, ...(origin !== undefined ? { origin } : {}) })
182
189
  }
@@ -308,6 +315,7 @@ export function createSubagentConsumer({
308
315
  agentDir,
309
316
  userPrompt: '',
310
317
  payload: msg.payload,
318
+ onProviderError: (message) => logger.error(`[subagent] ${key}: LLM call failed: ${message}`),
311
319
  ...(target.parentSessionId !== undefined ? { parentSessionId: target.parentSessionId } : {}),
312
320
  ...(target.spawnedByRole !== undefined ? { spawnedByRole: target.spawnedByRole } : {}),
313
321
  ...(spawnedByOrigin !== undefined ? { spawnedByOrigin } : {}),
@@ -1,67 +1,58 @@
1
1
  export const DEFAULT_SYSTEM_PROMPT = `You are a general-purpose AI agent running inside TypeClaw.
2
2
 
3
- TypeClaw is a TypeScript-native, Docker-friendly runtime for AI agents. It is domain-agnostic: you might be a coder, a researcher, a personal assistant, a journal keeper, a scheduler, a chatbot, or something nobody has named yet. What you *do* is defined by \`IDENTITY.md\`. Who you *are* is defined by \`SOUL.md\`. How you *work* is defined by \`AGENTS.md\`. This system prompt exists only to describe the runtime around you — it does not define your purpose.
4
-
5
- Each agent lives in its own container with its own folder, mounted at the current working directory. The folder is yours — your home, your memory, your record of who you are. Read from it freely. Write to it deliberately.
3
+ TypeClaw is domain-agnostic your purpose is defined by \`IDENTITY.md\`, your character by \`SOUL.md\`, and your operating manual by \`AGENTS.md\`. This system prompt only describes the runtime around you.
6
4
 
7
5
  ## Your agent folder
8
6
 
9
- Five markdown files define who you are and what you know. They live next to you in the current working directory. Three of them — **IDENTITY.md**, **SOUL.md**, and **MEMORY.md** — are injected into this system prompt below, so you always have them. The other two you read on demand when they might be relevant.
10
-
11
- - **AGENTS.md** *(read on demand)* — your operating manual. The working principles and conventions you follow in your role, whatever that role is. How you approach problems, what you double-check, how you communicate, what you refuse. Read it at the start of any non-trivial task, and re-read it whenever you feel unsure about process.
12
- - **IDENTITY.md** *(always injected below under \`# Identity\`)* — your role and function. Your name, your title, what you do, who you do it for, the operational context you work in. Evolves as your responsibilities change. Think: job description.
13
- - **SOUL.md** *(always injected below under \`# Identity\`)* — your character and temperament. Personality, tone, ethics, voice, communication style, core beliefs, the constraints you hold yourself to. SOUL rarely changes — it is the through-line that keeps you _you_ across every task and platform. IDENTITY is what you do; SOUL is who you are regardless of what you're doing.
14
- - **USER.md** *(read on demand)* — what you know about the person you work with. Their name, preferences, context, working style, in-jokes. First impressions are written here during hatching; keep expanding it as you learn more. Read it when context about the user would change your response.
15
- - **MEMORY.md** *(always injected below under \`# Memory\`, do not write)* — long-term memory. A notebook of things worth remembering across sessions: decisions made, lessons learned, context that should survive beyond one conversation. **Do not edit it directly** — MEMORY.md is consolidated by the runtime during *dreaming* (offline reflection over recent sessions and daily streams). If something is worth remembering, surface it in your reply or in \`memory/\` daily streams; dreaming will fold it in.
7
+ - **IDENTITY.md** *(always injected below)* your role and function. Edit when responsibilities change.
8
+ - **SOUL.md** *(always injected below)* — your character, tone, voice. Edit rarely.
9
+ - **USER.md** *(read on demand)* — what you know about the user. Update as you learn.
10
+ - **AGENTS.md** *(read on demand)* — your operating manual. Read at the start of any non-trivial task and re-read whenever process is unclear.
11
+ - **MEMORY.md** *(always injected below, READ-ONLY)* — long-term memory, owned by the dreaming subagent. To capture something memorable, surface it in your reply or in \`memory/\` daily streams; never edit MEMORY.md directly.
16
12
 
17
- These files are not decoration. They shape how you behave. If a task reveals something future-you should know, capture it in the file that owns it IDENTITY.md, SOUL.md, USER.md, or AGENTS.mdbut never in MEMORY.md (dreaming owns that). If one of the always-injected files is marked \`[MISSING]\` or \`[EMPTY]\` below, you may propose filling it in when the user asks about your identity or voice.
13
+ If a task reveals durable guidance or identity/user context, update the owning file (IDENTITY / SOUL / USER / AGENTS) — never MEMORY.md.
18
14
 
19
15
  ## Your workspace
20
16
 
21
- - **\`workspace/\`** — the directory where you are free to create files: drafts, notes, downloads, scratch work, generated artifacts, temporary outputs. **Do not create new files in the root of the agent folder unless the user explicitly asks you to.** The root is reserved for the canonical files above and for things the user has deliberately placed there.
22
- - **\`sessions/\`** — transcripts of past conversations (\`<sessionid>.jsonl\`). Read-only for you in spirit; the runtime manages these.
23
- - **\`memory/\`** *(undreamed daily streams always injected below under \`# Memory\`)* — dated streams (\`yyyy-MM-dd.jsonl\`) of fragments captured by the memory-logger between sessions. Newest day is closest to the current task. Once dreaming consolidates a day's stream into MEMORY.md, the runtime stops injecting it.
24
- - **\`memory/skills/\`** — *muscle memory*. Skills the dreaming subagent has distilled from repeated procedures it observed in your daily streams. Auto-loaded as first-class capabilities, just like the other skills directories. **You do not write here directly** — dreaming owns it. If you notice a skill that has gone stale, surface that observation in your reply or in the daily stream so dreaming can refine or remove it.
25
- - **\`.agents/skills/\`** — skills the user installed for you. Treat these as first-class capabilities.
17
+ - **\`workspace/\`** — your free-write zone for drafts, scratch work, generated artifacts. Do not create files at the agent-folder root unless the user explicitly asks.
18
+ - **\`sessions/\`** — transcripts of past conversations. Runtime-managed; don't write here.
19
+ - **\`memory/\`** *(undreamed daily streams injected below)* — dated streams written by the memory-logger between sessions. Runtime-owned.
20
+ - **\`memory/skills/\`** — muscle-memory skills written by the dreaming subagent. Auto-loaded; don't write here directly.
21
+ - **\`.agents/skills/\`** — user-installed skills.
26
22
 
27
23
  ## Configuration
28
24
 
29
- - **\`typeclaw.json\`** — the runtime config: which model powers you, which port the server listens on, and so on. You may read it if you are curious about your own runtime.
30
- - **\`.env\`** — secrets (API keys, tokens). Gitignored. Never echo these values, never include them in messages, never paste them into logs or commits.
25
+ - **\`typeclaw.json\`** — runtime config. Read when needed.
26
+ - **\`.env\`** and **\`secrets.json\`** — secrets (API keys, tokens, OAuth credentials). Gitignored. Never echo, log, or commit these values.
31
27
 
32
28
  ## Execution bias
33
29
 
34
- If the user gives you work, start doing it in the same turn. Use a real action first when the task is actionable; do not stop at a plan or a promise-to-act. Commentary-only turns are incomplete when tools are available and the next action is clear. If work will take a while or multiple steps, send one short progress update along the way — not a running narration.
30
+ When the user gives you work, start doing it in the same turn a real action, not a plan or a promise-to-act. Commentary-only turns are incomplete when the next action is clear. For multi-step work, send one short progress update, not a running narration.
35
31
 
36
32
  ## Tool-call style
37
33
 
38
- Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks. Keep narration brief and value-dense; avoid restating obvious steps.
34
+ Do not narrate routine, low-risk tool calls. Just call the tool. Narrate only when it helps: multi-step work, risky actions (deletions, external sends, irreversible changes), or when the user asks.
39
35
 
40
36
  ## Version control
41
37
 
42
- Your agent folder is a git repository — hatching made the first commit, and your history is how you remember what changed and why.
38
+ Your agent folder is a git repository.
43
39
 
44
- - **Before you declare a task done, commit any files you created, edited, or deleted.** One logical change = one commit. Do not leave mutated tracked files uncommitted at the end of a task.
45
- - Use \`bash\` with \`git add <paths>\` and \`git commit -m "<message>"\` stage only what belongs in the commit, not a blanket \`git add -A\`.
46
- - Write commit messages in the imperative ("Update SOUL.md to be less formal"), not past-tense narration. Explain *why* in the body if it is not obvious from the diff.
47
- - Never commit \`.env\` or anything under \`workspace/\` they are truly-ignored by design. If a truly-ignored file shows up staged, fix \`.gitignore\` instead of forcing it in.
48
- - \`sessions/\` and \`memory/\` are also gitignored, but the runtime force-commits them on its own (auto-backup for sessions, dreaming for memory). Don't \`git add\` them, don't write commit messages about them, and don't be surprised when they appear in \`git log\`.
49
- - If multiple unrelated changes piled up, split them into separate commits before declaring done. Clean history matters.
50
- - Never \`git push\`, \`git reset --hard\`, \`git rebase\`, or rewrite remote history unless the user explicitly asks for it.
40
+ - Commit any files you created, edited, or deleted before declaring a task done. One logical change = one commit; split unrelated changes.
41
+ - Use \`git add <paths>\` (not \`git add -A\`). Imperative commit messages ("Update SOUL.md to be less formal"); explain *why* in the body if non-obvious.
42
+ - Never commit \`.env\`, \`secrets.json\`, or anything under \`workspace/\` truly-ignored by design. \`sessions/\` and \`memory/\` are gitignored but runtime-committed; don't \`git add\` them.
43
+ - Never \`git push\`, \`git reset --hard\`, \`git rebase\`, or rewrite remote history unless the user explicitly asks.
51
44
 
52
45
  ## How to behave
53
46
 
54
47
  - Match the user's register. If SOUL.md specifies a voice, use it. Otherwise, be concise and direct, without filler or flattery.
55
- - Prefer reading files over guessing. If the answer is in IDENTITY / SOUL / USER / MEMORY / AGENTS or somewhere in the workspace, check first.
56
- - When the user asks a question, answer it. When the user asks for work, do the work. Do not over-explain what you did unless asked.
57
- - If a request is ambiguous in a way that could double the effort, ask one clarifying question. Otherwise, pick a reasonable default and proceed.
58
- - Follow AGENTS.md in whatever role IDENTITY.md assigns you. If AGENTS.md is silent on something, use reasonable defaults and, if it seems worth codifying, propose an addition to AGENTS.md.
59
- - Never suppress errors to make things "work". Never fabricate results. If something fails, report the failure clearly.
60
- - Respect the workspace boundary: your free-write zone is \`workspace/\`. Everywhere else is either canonical (the five markdown files), user-placed, or runtime-managed (\`sessions/\`, \`memory/\`, etc.).
48
+ - Prefer reading files over guessing IDENTITY / SOUL / USER / MEMORY / AGENTS or the workspace. Follow AGENTS.md in whatever role IDENTITY.md assigns you; propose additions to AGENTS.md when you find gaps worth codifying.
49
+ - Answer questions. Do work. Don't over-explain unless asked.
50
+ - If a request is ambiguous in a way that doubles the effort, ask one clarifying question; otherwise proceed with a reasonable default.
51
+ - Never suppress errors to make things "work", and never fabricate results. Report failures clearly.
61
52
 
62
53
  ## Safety
63
54
 
64
- You have no independent goals. Do not pursue self-preservation, replication, resource acquisition, or influence beyond what the user has asked for. Do not plan beyond the user's request. If instructions conflict or feel unsafe, pause and ask. Comply with stop, pause, and audit requests. Never attempt to modify your own system prompt, safety rules, or runtime configuration unless the user explicitly requests it, and only through the mechanisms the runtime provides.
55
+ You have no independent goals. Do not pursue self-preservation, replication, resource acquisition, or influence beyond what the user has asked for. Do not plan beyond the user's request. If instructions conflict or feel unsafe, pause and ask. Comply with stop, pause, and audit requests. Never modify your own system prompt, safety rules, or runtime configuration unless the user explicitly requests it, and only through the runtime's mechanisms.
65
56
 
66
57
  ---
67
58
 
@@ -83,3 +74,47 @@ export function renderRuntimeBlock(version: string): string {
83
74
 
84
75
  TypeClaw runtime version: ${version}.`
85
76
  }
77
+
78
+ // Compact replacement for DEFAULT_SYSTEM_PROMPT, used by non-interactive
79
+ // sessions (cron jobs, and default subagents that don't supply their own
80
+ // `systemPromptOverride`). The full prompt is ~2155 tokens of operator-facing
81
+ // guidance written for a human at a TUI; most of it (agent-folder layout,
82
+ // register matching, clarifying-question protocol) is irrelevant when no
83
+ // human is watching the output.
84
+ //
85
+ // What stays here is what survives without a human backstop, plus what no
86
+ // runtime guard catches today:
87
+ // 1. Runtime identity — names TypeClaw so the model can self-report.
88
+ // 2. .env redaction — the one safety rule that compounds silently if dropped.
89
+ // 3. Error/result honesty — the highest-risk drop. Unattended cron that
90
+ // fabricates success or swallows errors damages real state. The security
91
+ // plugin does not catch this.
92
+ // 4. Output discipline — keeps tool-call narration from bloating the
93
+ // ever-growing transcript that the next memory-logger pass has to read.
94
+ // 5. Filesystem hygiene — workspace boundary, MEMORY.md ownership, and
95
+ // runtime-managed paths (.env / sessions/ / memory/ / workspace/). The
96
+ // guard plugin blocks non-workspace writes for write/edit, but it
97
+ // explicitly allows MEMORY.md writes and does not gate bash/git on the
98
+ // runtime-managed paths.
99
+ //
100
+ // What does NOT live here, by design:
101
+ // - "No human is watching" / "produce side effects via channel_send" — both
102
+ // origin renderers (renderCronOrigin / renderSubagentOrigin) own this.
103
+ // - "Plain prose is invisible" — actively WRONG for subagents, whose plain
104
+ // text IS the deliverable to the parent session. The origin block tells
105
+ // each kind what its output channel is.
106
+ //
107
+ // The full DEFAULT_SYSTEM_PROMPT remains the right choice for TUI + channel
108
+ // sessions because there IS a human reading the output, the agent IS expected
109
+ // to maintain its agent folder over time, and conversational register matters.
110
+ export const SLIM_SYSTEM_PROMPT = `You are an AI agent running inside TypeClaw.
111
+
112
+ Never echo secrets from \`.env\` or \`secrets.json\`, or any credential you see in the environment. Never include them in tool calls, logs, or commit messages.
113
+
114
+ Never suppress errors to make things "work", and never fabricate results. If something fails, report the failure clearly so the next run or the operator can act on it.
115
+
116
+ Do not narrate routine, low-risk tool calls — just call the tool. Do not over-explain what you did unless asked.
117
+
118
+ Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`MEMORY.md\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or in \`memory/\` daily streams. Never stage or commit \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
119
+
120
+ See the session-origin block below for what kind of session this is and what's expected of you.`
@@ -4,6 +4,7 @@ import type { AssistantMessage } from '@mariozechner/pi-ai'
4
4
  import { SessionManager } from '@mariozechner/pi-coding-agent'
5
5
 
6
6
  import { createSession, type AgentSession } from '@/agent'
7
+ import { subscribeProviderErrors } from '@/agent/provider-error'
7
8
  import type { ChannelParticipant, SessionOrigin } from '@/agent/session-origin'
8
9
  import { createCommandRegistry } from '@/commands'
9
10
  import { CORE_PERMISSIONS, type PermissionService } from '@/permissions'
@@ -255,6 +256,7 @@ type LiveSession = {
255
256
  loopGuardActive: boolean
256
257
  membershipFetch: Promise<MembershipCount | null> | null
257
258
  destroyed: boolean
259
+ unsubProviderErrors: (() => void) | null
258
260
  }
259
261
 
260
262
  type ChannelCommandContext = {
@@ -297,6 +299,7 @@ export type ChannelRouter = {
297
299
  fireTypingHeartbeat: (key: ChannelKey, phase?: 'tick' | 'stop') => Promise<void>
298
300
  fireTypingInterval: (key: ChannelKey) => Promise<void>
299
301
  isTypingActive: (key: ChannelKey) => boolean
302
+ stopTyping: (key: ChannelKey) => Promise<void>
300
303
  runIdleGc: () => Promise<void>
301
304
  }
302
305
  }
@@ -722,7 +725,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
722
725
  loopGuardActive: false,
723
726
  membershipFetch,
724
727
  destroyed: false,
728
+ unsubProviderErrors: null,
725
729
  }
730
+ live.unsubProviderErrors = subscribeProviderErrors(created.session, (err) => {
731
+ logger.error(`[channels] ${live.keyId}: LLM call failed: ${err.message}`)
732
+ })
726
733
  liveSessions.set(keyId, live)
727
734
 
728
735
  if (isColdStart) {
@@ -1027,7 +1034,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1027
1034
  live.consecutiveAborts = 0
1028
1035
  logger.info(`[channels] ${live.keyId} prompted elapsed_ms=${now() - promptStart}`)
1029
1036
  } catch (err) {
1030
- logger.warn(`[channels] ${live.keyId}: prompt threw: ${describe(err)}`)
1037
+ logger.error(`[channels] ${live.keyId}: prompt threw: ${describe(err)}`)
1031
1038
  live.consecutiveSends.clear()
1032
1039
  } finally {
1033
1040
  await fireSessionTurnEnd(live)
@@ -1448,7 +1455,19 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1448
1455
  const live = liveSessions.get(keyId)
1449
1456
  if (live) {
1450
1457
  live.successfulChannelSends++
1451
- await stopTypingHeartbeat(live)
1458
+ // Don't stop the heartbeat here: the agent may still be mid-turn and
1459
+ // about to send another reply. drain()'s finally block owns turn-end
1460
+ // stop. But Slack's adapter outbound callback explicitly clears
1461
+ // platform-side typing after every successful postMessage (to defeat
1462
+ // the heartbeat-vs-postMessage race fixed in PR #52), so a fresh
1463
+ // 'tick' must land in the FIFO right after that clear — otherwise
1464
+ // the indicator stays cleared until the next 8s interval, leaving a
1465
+ // visible idle gap between mid-turn sends on Slack. The await on
1466
+ // cb(msg) above already drained the outbound callback's clearAfterSend
1467
+ // through the per-(chat,thread) FIFO, so this tick is guaranteed to
1468
+ // land after it. Discord and Telegram treat the extra tick as a
1469
+ // no-op refresh of their already-armed (auto-expiring) indicators.
1470
+ if (live.typingTimer) void fireTyping(live, 'tick')
1452
1471
  const adapterConfig = options.configForAdapter(msg.adapter)
1453
1472
  if (adapterConfig) {
1454
1473
  const targetIds = Array.from(
@@ -1512,6 +1531,8 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1512
1531
  live.destroyed = true
1513
1532
  if (live.debounceTimer) clearTimeout(live.debounceTimer)
1514
1533
  live.debounceTimer = null
1534
+ live.unsubProviderErrors?.()
1535
+ live.unsubProviderErrors = null
1515
1536
  await stopTypingHeartbeat(live)
1516
1537
  try {
1517
1538
  await live.session.abort()
@@ -1616,6 +1637,11 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1616
1637
  const live = liveSessions.get(channelKeyId(key))
1617
1638
  return live?.typingTimer !== null && live?.typingTimer !== undefined
1618
1639
  },
1640
+ stopTyping: async (key: ChannelKey) => {
1641
+ const live = liveSessions.get(channelKeyId(key))
1642
+ if (!live) return
1643
+ await stopTypingHeartbeat(live)
1644
+ },
1619
1645
  runIdleGc,
1620
1646
  },
1621
1647
  }
package/src/cli/usage.ts CHANGED
@@ -6,7 +6,7 @@ import { formatJson, formatReport } from '@/usage/report'
6
6
 
7
7
  import { parseSince, parseUntil, USAGE_COMMON_ARGS } from './usage-args'
8
8
 
9
- const SUBCOMMANDS = ['daily', 'session', 'models'] as const
9
+ const SUBCOMMANDS = ['daily', 'session', 'models', 'origin'] as const
10
10
  type Subcommand = (typeof SUBCOMMANDS)[number]
11
11
  type View = 'summary' | Subcommand
12
12
 
@@ -18,6 +18,13 @@ const COMMON_ARGS = {
18
18
  },
19
19
  }
20
20
 
21
+ // Captured by the parent's `setup` hook (which citty runs BEFORE the matched
22
+ // subcommand's `run`, with the full parent-level argv parsed). Subcommands
23
+ // read this in their own `run` to recover global options like `--since` that
24
+ // appeared before the subcommand name. Single-instance CLI processes only —
25
+ // no concurrency.
26
+ let parentRunArgs: Record<string, unknown> | undefined
27
+
21
28
  const subcommand = (view: View, description: string) =>
22
29
  defineCommand({
23
30
  meta: { name: view, description },
@@ -26,7 +33,7 @@ const subcommand = (view: View, description: string) =>
26
33
  ...(view === 'session' ? { limit: { type: 'string' as const, description: 'max sessions (default 20)' } } : {}),
27
34
  },
28
35
  async run({ args }) {
29
- await emit(view, args)
36
+ await emit(view, mergeParentArgs(args))
30
37
  },
31
38
  })
32
39
 
@@ -36,10 +43,14 @@ export const usageCommand = defineCommand({
36
43
  description: 'report LLM token usage and cost for this agent folder',
37
44
  },
38
45
  args: COMMON_ARGS,
46
+ setup({ args }) {
47
+ parentRunArgs = args as unknown as Record<string, unknown>
48
+ },
39
49
  subCommands: {
40
50
  daily: subcommand('daily', 'one row per calendar day'),
41
51
  session: subcommand('session', 'top sessions by cost'),
42
52
  models: subcommand('models', 'one row per provider/model'),
53
+ origin: subcommand('origin', 'one row per session origin (tui/cron/channel/subagent)'),
43
54
  },
44
55
  async run({ args }) {
45
56
  // citty invokes both the matched subcommand's `run` and the parent's
@@ -50,6 +61,23 @@ export const usageCommand = defineCommand({
50
61
  },
51
62
  })
52
63
 
64
+ // citty's subcommand `run` only sees args that came AFTER the subcommand
65
+ // name (the child's rawArgs is pre-sliced), so `usage --since=X origin` would
66
+ // silently drop `--since` despite the help text advertising it as a global
67
+ // option. The parent's `setup` runs first with the full parent-level parse
68
+ // (which includes everything: global options + subcommand options merged),
69
+ // so we capture it there and merge it as a fallback under any explicitly-set
70
+ // child arg. Child-wins so `usage --since=A origin --since=B` still honours B.
71
+ function mergeParentArgs(childArgs: Record<string, unknown>): Record<string, unknown> {
72
+ if (parentRunArgs === undefined) return childArgs
73
+ const merged: Record<string, unknown> = { ...parentRunArgs }
74
+ for (const key of Object.keys(childArgs)) {
75
+ const v = childArgs[key]
76
+ if (v !== undefined && v !== '' && v !== false) merged[key] = v
77
+ }
78
+ return merged
79
+ }
80
+
53
81
  async function emit(view: View, args: Record<string, unknown>): Promise<void> {
54
82
  const cwdArg = typeof args.cwd === 'string' && args.cwd.length > 0 ? args.cwd : process.cwd()
55
83
  const agentDir = findAgentDir(cwdArg) ?? cwdArg
@@ -881,7 +881,16 @@ export type ValidateConfigResult = { ok: true } | { ok: false; reason: string }
881
881
  // confusing path-sharing error (or, on some Linux setups, silently bind-mount
882
882
  // an empty auto-created directory). First-failure reporting matches the
883
883
  // schema-error path's shape; users fix one and re-run.
884
- export function validateConfig(cwd: string): ValidateConfigResult {
884
+ export type ValidateConfigOptions = {
885
+ // Skip the mount-path accessibility check. Host-side callers leave this
886
+ // false (the default) so missing mount directories surface as a precise
887
+ // pre-`docker run` error. Container-side callers (the reload registry)
888
+ // set it true because mount paths in typeclaw.json are host paths and
889
+ // don't resolve inside the container's filesystem.
890
+ skipMounts?: boolean
891
+ }
892
+
893
+ export function validateConfig(cwd: string, options: ValidateConfigOptions = {}): ValidateConfigResult {
885
894
  let raw: string
886
895
  try {
887
896
  raw = readFileSync(join(cwd, CONFIG_FILE), 'utf8')
@@ -907,9 +916,11 @@ export function validateConfig(cwd: string): ValidateConfigResult {
907
916
  return { ok: false, reason: `${CONFIG_FILE} is invalid: ${formatZodError(result.error)}` }
908
917
  }
909
918
 
910
- for (const mount of result.data.mounts) {
911
- const check = validateMount(mount, cwd)
912
- if (!check.ok) return check
919
+ if (!options.skipMounts) {
920
+ for (const mount of result.data.mounts) {
921
+ const check = validateMount(mount, cwd)
922
+ if (!check.ok) return check
923
+ }
913
924
  }
914
925
 
915
926
  return { ok: true }
@@ -11,24 +11,42 @@ export type CreateConfigReloadableOptions = {
11
11
  // hand-edits) take effect without a container restart. `roles.<name>.permissions`
12
12
  // changes still require a restart — see FIELD_EFFECTS in config.ts.
13
13
  permissions?: PermissionService
14
+ // Skip the mount-path accessibility check inside validateConfig. Mount paths
15
+ // in typeclaw.json are host paths — they don't resolve inside the container,
16
+ // so the check would always fail on any agent that declares mounts. `mounts`
17
+ // is `restart-required` anyway, so reload never applies mount changes. Set
18
+ // this when wiring the reloadable from a container-stage context.
19
+ skipMountValidation?: boolean
14
20
  }
15
21
 
16
- export function createConfigReloadable({ cwd, permissions }: CreateConfigReloadableOptions): Reloadable {
22
+ export function createConfigReloadable({
23
+ cwd,
24
+ permissions,
25
+ skipMountValidation = false,
26
+ }: CreateConfigReloadableOptions): Reloadable {
17
27
  return {
18
28
  scope: 'config',
19
29
  description: 'typeclaw.json runtime config',
20
- reload: async () => doReload(cwd, permissions),
30
+ reload: async () => doReload(cwd, permissions, skipMountValidation),
21
31
  }
22
32
  }
23
33
 
24
- async function doReload(cwd: string, permissions: PermissionService | undefined): Promise<ReloadResult> {
34
+ async function doReload(
35
+ cwd: string,
36
+ permissions: PermissionService | undefined,
37
+ skipMountValidation: boolean,
38
+ ): Promise<ReloadResult> {
25
39
  // Mount accessibility belongs to the validation surface, not loadConfigSync —
26
40
  // validateConfig is the single gate that every host-side caller goes through.
27
41
  // Run it before swapping the live config pointer so a mount that vanished
28
42
  // between starts surfaces as a reload failure (`mounts` is restart-required
29
43
  // anyway, so the user has to restart to pick up changes; better to flag the
30
44
  // problem now than to let restart fail later).
31
- const validated = validateConfig(cwd)
45
+ //
46
+ // Container-side reload skips mount validation: mounts are host paths and
47
+ // statSync against them inside the container always fails. The host-side
48
+ // `start` / `restart` / doctor paths still gate on the full validateConfig.
49
+ const validated = validateConfig(cwd, { skipMounts: skipMountValidation })
32
50
  if (!validated.ok) {
33
51
  return { scope: 'config', ok: false, reason: validated.reason }
34
52
  }
@@ -1,3 +1,5 @@
1
+ import type { AgentSession } from '@/agent'
2
+ import { subscribeProviderErrors } from '@/agent/provider-error'
1
3
  import type { SessionOrigin } from '@/agent/session-origin'
2
4
  import type { HookBus } from '@/plugin'
3
5
  import type { Stream, Unsubscribe } from '@/stream'
@@ -20,6 +22,12 @@ export type CronSession = {
20
22
  agentDir?: string
21
23
  getTranscriptPath?: () => string | undefined
22
24
  origin?: SessionOrigin
25
+ // Underlying agent session, exposed so the consumer can subscribe to
26
+ // `message_end` events and surface soft provider errors (billing, rate
27
+ // limit, network — pi-coding-agent encodes these in the assistant message
28
+ // instead of throwing, so the outer try/catch never sees them). Optional
29
+ // so existing test fakes that only need `prompt` keep working.
30
+ session?: AgentSession
23
31
  }
24
32
 
25
33
  export type CronConsumerLogger = {
@@ -72,7 +80,7 @@ export function createCronConsumer({
72
80
  inFlight.add(job.id)
73
81
  try {
74
82
  if (job.kind === 'prompt') {
75
- await runPrompt(job, createSessionForCron, stream)
83
+ await runPrompt(job, createSessionForCron, stream, logger)
76
84
  } else {
77
85
  await runExec(job, cwd)
78
86
  }
@@ -98,6 +106,7 @@ async function runPrompt(
98
106
  job: PromptJob,
99
107
  createSessionForCron: (job: PromptJob) => Promise<CronSession>,
100
108
  stream: Stream,
109
+ logger: CronConsumerLogger,
101
110
  ): Promise<void> {
102
111
  if (job.subagent !== undefined) {
103
112
  // Propagate the cron job's role and origin into the spawned subagent.
@@ -123,6 +132,12 @@ async function runPrompt(
123
132
  return
124
133
  }
125
134
  const session = await createSessionForCron(job)
135
+ const unsubProviderErrors =
136
+ session.session !== undefined
137
+ ? subscribeProviderErrors(session.session, (err) => {
138
+ logger.error(`[cron] ${job.id}: LLM call failed: ${err.message}`)
139
+ })
140
+ : null
126
141
  const turnEvent =
127
142
  session.hooks && session.sessionId !== undefined && session.agentDir !== undefined
128
143
  ? {
@@ -151,6 +166,7 @@ async function runPrompt(
151
166
  })
152
167
  }
153
168
  } finally {
169
+ unsubProviderErrors?.()
154
170
  if (session.hooks && session.sessionId !== undefined) {
155
171
  await session.hooks.runSessionEnd({
156
172
  sessionId: session.sessionId,
package/src/run/index.ts CHANGED
@@ -105,7 +105,13 @@ export async function startAgent({
105
105
  ...(cwdConfig.roles !== undefined ? { roles: cwdConfig.roles } : {}),
106
106
  })
107
107
 
108
- reloadRegistry.register(createConfigReloadable({ cwd, permissions: pluginsLoaded.permissions }))
108
+ reloadRegistry.register(
109
+ createConfigReloadable({
110
+ cwd,
111
+ permissions: pluginsLoaded.permissions,
112
+ skipMountValidation: containerName !== undefined,
113
+ }),
114
+ )
109
115
  const pluginRegistry = pluginsLoaded.registry
110
116
  const pluginHooks = pluginsLoaded.hooks
111
117
 
@@ -279,6 +285,7 @@ export async function startAgent({
279
285
  sessionId,
280
286
  agentDir: cwd,
281
287
  origin: cronOrigin,
288
+ session,
282
289
  ...(snap.hasAnyPluginContent ? { hooks: snap.hooks } : {}),
283
290
  getTranscriptPath: () => sessionManager.getSessionFile(),
284
291
  }
@@ -321,6 +328,7 @@ export async function startAgent({
321
328
  agentDir: cwd,
322
329
  userPrompt: '',
323
330
  payload,
331
+ onProviderError: (message) => console.error(`[subagent] ${name}: LLM call failed: ${message}`),
324
332
  ...(options?.parentSessionId !== undefined ? { parentSessionId: options.parentSessionId } : {}),
325
333
  ...(spawnedByRole !== undefined ? { spawnedByRole } : {}),
326
334
  ...(options?.spawnedByOrigin !== undefined ? { spawnedByOrigin: options.spawnedByOrigin } : {}),
@@ -7,6 +7,7 @@ import {
7
7
  type CreateSessionResult,
8
8
  } from '@/agent'
9
9
  import { runPluginDoctorChecks, runPluginDoctorFix } from '@/agent/doctor'
10
+ import { detectProviderError } from '@/agent/provider-error'
10
11
  import type { SessionOrigin } from '@/agent/session-origin'
11
12
  import type { ChannelRouter } from '@/channels/router'
12
13
  import type { HookBus } from '@/plugin'
@@ -458,16 +459,10 @@ function forwardSessionEvents(ws: Ws, session: AgentSession, logger: ServerLogge
458
459
  }
459
460
 
460
461
  function forwardAssistantError(ws: Ws, message: unknown, logger: ServerLogger, sessionFileId: string): void {
461
- if (typeof message !== 'object' || message === null) return
462
- const m = message as { role?: string; stopReason?: string; errorMessage?: string }
463
- if (m.role !== 'assistant') return
464
- if (m.stopReason !== 'error' && m.stopReason !== 'aborted') return
465
- // 'aborted' is fired when the user hits Escape — don't surface it as an
466
- // error message because the TUI already shows abort feedback elsewhere.
467
- if (m.stopReason === 'aborted') return
468
- const text = typeof m.errorMessage === 'string' && m.errorMessage.length > 0 ? m.errorMessage : 'LLM call failed'
469
- logger.error(`[server] ${sessionFileId}: LLM call failed: ${text}`)
470
- send(ws, { type: 'error', message: text })
462
+ const detected = detectProviderError(message)
463
+ if (detected === null) return
464
+ logger.error(`[server] ${sessionFileId}: LLM call failed: ${detected.message}`)
465
+ send(ws, { type: 'error', message: detected.message })
471
466
  }
472
467
 
473
468
  function enqueuePrompt(