typeclaw 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/package.json +1 -1
  2. package/src/agent/auth.ts +4 -2
  3. package/src/agent/index.ts +16 -28
  4. package/src/agent/model-fallback.ts +127 -0
  5. package/src/agent/tools/curl-impersonate.ts +300 -0
  6. package/src/agent/tools/ddg.ts +13 -88
  7. package/src/agent/tools/webfetch/fetch.ts +105 -2
  8. package/src/agent/tools/webfetch/tool.ts +4 -0
  9. package/src/bundled-plugins/agent-browser/shim.ts +47 -0
  10. package/src/bundled-plugins/backup/subagents.ts +2 -0
  11. package/src/bundled-plugins/memory/README.md +49 -12
  12. package/src/bundled-plugins/memory/citation-superset.ts +63 -0
  13. package/src/bundled-plugins/memory/dreaming.ts +105 -17
  14. package/src/bundled-plugins/memory/index.ts +2 -2
  15. package/src/bundled-plugins/memory/memory-logger.ts +45 -26
  16. package/src/bundled-plugins/memory/strength.ts +127 -0
  17. package/src/bundled-plugins/memory/topics.ts +75 -0
  18. package/src/bundled-plugins/security/index.ts +87 -43
  19. package/src/bundled-plugins/security/permissions.ts +36 -0
  20. package/src/bundled-plugins/security/policies/git-exfil.ts +20 -0
  21. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +12 -0
  22. package/src/bundled-plugins/security/policies/prompt-injection.ts +23 -3
  23. package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +7 -0
  24. package/src/bundled-plugins/security/policies/secret-exfil-read.ts +6 -0
  25. package/src/bundled-plugins/security/policies/session-search-secrets.ts +9 -0
  26. package/src/bundled-plugins/security/policies/ssrf.ts +6 -0
  27. package/src/bundled-plugins/security/policies/system-prompt-leak.ts +7 -0
  28. package/src/channels/adapters/github/index.ts +87 -3
  29. package/src/channels/router.ts +194 -28
  30. package/src/channels/types.ts +3 -1
  31. package/src/cli/init.ts +146 -42
  32. package/src/cli/model.ts +10 -2
  33. package/src/cli/oauth-callbacks.ts +49 -0
  34. package/src/cli/provider.ts +3 -20
  35. package/src/config/config.ts +59 -24
  36. package/src/config/models-mutation.ts +42 -8
  37. package/src/config/providers-mutation.ts +12 -8
  38. package/src/container/start.ts +18 -1
  39. package/src/cron/consumer.ts +129 -43
  40. package/src/init/dockerfile.ts +109 -3
  41. package/src/init/hatching.ts +2 -2
  42. package/src/init/index.ts +14 -3
  43. package/src/init/oauth-login.ts +17 -3
  44. package/src/permissions/builtins.ts +29 -7
  45. package/src/permissions/permissions.ts +24 -7
  46. package/src/plugin/define.ts +2 -0
  47. package/src/plugin/manager.ts +14 -0
  48. package/src/plugin/types.ts +6 -0
  49. package/src/run/index.ts +2 -1
  50. package/src/skills/typeclaw-memory/SKILL.md +25 -15
  51. package/src/skills/typeclaw-permissions/SKILL.md +35 -17
  52. package/src/tui/index.ts +35 -3
  53. package/src/usage/report.ts +15 -12
  54. package/typeclaw.schema.json +57 -25
@@ -136,8 +136,8 @@ export function findModelsReferencingProvider(cwd: string, providerId: string):
136
136
  const models = readModelsOrNull(cwd)
137
137
  if (models === null) return []
138
138
  const out: string[] = []
139
- for (const [profile, ref] of Object.entries(models)) {
140
- if (refTargetsProvider(ref, providerId)) out.push(profile)
139
+ for (const [profile, refs] of Object.entries(models)) {
140
+ if (refs.some((r) => refTargetsProvider(r, providerId))) out.push(profile)
141
141
  }
142
142
  return out
143
143
  }
@@ -212,12 +212,16 @@ function readEnvKey(env: NodeJS.ProcessEnv, key: string): string | undefined {
212
212
  function buildProviderReferenceMap(models: Models | null): Map<string, string[]> {
213
213
  const out = new Map<string, string[]>()
214
214
  if (models === null) return out
215
- for (const [profile, ref] of Object.entries(models)) {
216
- const providerId = safeProviderForRef(ref)
217
- if (providerId === null) continue
218
- const existing = out.get(providerId) ?? []
219
- existing.push(profile)
220
- out.set(providerId, existing)
215
+ for (const [profile, refs] of Object.entries(models)) {
216
+ for (const ref of refs) {
217
+ const providerId = safeProviderForRef(ref)
218
+ if (providerId === null) continue
219
+ const existing = out.get(providerId) ?? []
220
+ if (!existing.includes(profile)) {
221
+ existing.push(profile)
222
+ out.set(providerId, existing)
223
+ }
224
+ }
221
225
  }
222
226
  return out
223
227
  }
@@ -455,7 +455,24 @@ export async function planStart({
455
455
  // the start() preflight force-removes any lingering corpse before the next
456
456
  // launch — so the only state Docker ever sees in `docker ps -a` is either
457
457
  // a running container or one the user has not started again yet.
458
- const runArgs = ['run', '-d', '--name', containerName, '-p', `${publishHost}:${hostPort}:${CONTAINER_PORT}`]
458
+ //
459
+ // `--shm-size=2g` is mandatory for the bundled Chrome (agent-browser) to
460
+ // survive heavy pages. Docker's default /dev/shm is 64MB; Chrome uses
461
+ // shared memory for the renderer process and silently crashes mid-load
462
+ // on any site with a large DOM or non-trivial WebGL. The crash surfaces
463
+ // as a blank page or "target closed" with no clear cause — easy to
464
+ // misattribute to bot detection. 2g matches the Playwright/Puppeteer
465
+ // canonical recommendation and is a memory cap, not an allocation (only
466
+ // used pages count against the host).
467
+ const runArgs = [
468
+ 'run',
469
+ '-d',
470
+ '--name',
471
+ containerName,
472
+ '--shm-size=2g',
473
+ '-p',
474
+ `${publishHost}:${hostPort}:${CONTAINER_PORT}`,
475
+ ]
459
476
 
460
477
  // Network egress filter: when `typeclaw.json#network.blockInternal` is true,
461
478
  // grant the container CAP_NET_ADMIN at boot so the entrypoint shim can
@@ -1,6 +1,8 @@
1
1
  import type { AgentSession } from '@/agent'
2
- import { subscribeProviderErrors } from '@/agent/provider-error'
2
+ import { promptWithFallback, resolveFallbackChain } from '@/agent/model-fallback'
3
3
  import type { SessionOrigin } from '@/agent/session-origin'
4
+ import { getConfig } from '@/config'
5
+ import type { KnownModelRef } from '@/config/providers'
4
6
  import type { HookBus } from '@/plugin'
5
7
  import type { Stream, Unsubscribe } from '@/stream'
6
8
 
@@ -41,7 +43,12 @@ export type CronConsumerLogger = {
41
43
  export type CreateCronConsumerOptions = {
42
44
  stream: Stream
43
45
  cwd: string
44
- createSessionForCron: (job: PromptJob) => Promise<CronSession>
46
+ // The optional `refOverride` argument is consumed by the fallback loop: the
47
+ // consumer calls this factory once per ref in the profile's chain, pinning
48
+ // each attempt to the specified model. Factories that don't honor the
49
+ // override silently lose fallback semantics, so production wiring threads
50
+ // it through to `createSession({ refOverride })`.
51
+ createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>
45
52
  // Builds the `CronHandlerContext` for the job and awaits its `handler`.
46
53
  // Wired by `src/run/index.ts` to reuse `runPromptForCommand` /
47
54
  // `runExecForCommand` from the command runner so plugin cron handlers and
@@ -121,7 +128,7 @@ export function createCronConsumer({
121
128
 
122
129
  async function runPrompt(
123
130
  job: PromptJob,
124
- createSessionForCron: (job: PromptJob) => Promise<CronSession>,
131
+ createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
125
132
  stream: Stream,
126
133
  logger: CronConsumerLogger,
127
134
  ): Promise<void> {
@@ -148,52 +155,131 @@ async function runPrompt(
148
155
  })
149
156
  return
150
157
  }
151
- const session = await createSessionForCron(job)
152
- const unsubProviderErrors =
153
- session.session !== undefined
154
- ? subscribeProviderErrors(session.session, (err) => {
155
- logger.error(`[cron] ${job.id}: LLM call failed: ${err.message}`)
158
+ // Resolve the model fallback chain for the cron profile (cron jobs run
159
+ // under the `default` profile today). Single-ref configs produce a length-1
160
+ // chain; multi-ref configs (e.g. `"default": ["openai/...", "fireworks/..."]`)
161
+ // drive the retry-on-failure loop inside `runPromptOnce`.
162
+ const refs = resolveFallbackChain(getConfig().models, undefined)
163
+ await runPromptOnce(job, refs, createSessionForCron, logger)
164
+ }
165
+
166
+ async function runPromptOnce(
167
+ job: PromptJob,
168
+ refs: KnownModelRef[],
169
+ createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
170
+ logger: CronConsumerLogger,
171
+ ): Promise<void> {
172
+ // Per-attempt lifecycle: every session we create gets full
173
+ // turn-start → turn-end → session-end → dispose bracketing, regardless of
174
+ // whether the helper chose it as the final session or disposed it as a
175
+ // failed earlier attempt. Without per-attempt session.end, plugin state
176
+ // keyed by sessionId (security plugin's remote-taint map, memory plugin's
177
+ // debounce timer) would orphan for every failed attempt. We track the
178
+ // last session separately so we can fire session.idle exactly once on
179
+ // success (matching pre-fallback cron behavior — see the pre-fallback
180
+ // try/finally structure: idle inside the prompt try-block, end in the
181
+ // outer finally).
182
+ let lastSession: CronSession | null = null
183
+ const result = await promptWithFallback({
184
+ refs,
185
+ text: job.prompt,
186
+ createSessionForRef: async (ref) => {
187
+ const created = await createSessionForCron(job, ref)
188
+ lastSession = created
189
+ const turnEvent =
190
+ created.hooks && created.sessionId !== undefined && created.agentDir !== undefined
191
+ ? {
192
+ sessionId: created.sessionId,
193
+ agentDir: created.agentDir,
194
+ ...(created.origin !== undefined ? { origin: created.origin } : {}),
195
+ }
196
+ : undefined
197
+ if (created.hooks && turnEvent !== undefined) {
198
+ await created.hooks.runSessionTurnStart(turnEvent)
199
+ }
200
+ // Bridge the CronSession wrapper into the AgentSession surface the
201
+ // fallback helper expects:
202
+ // prompt → CronSession.prompt (wrapper that calls AgentSession.prompt
203
+ // in production, or a hand-rolled test fake)
204
+ // subscribe → CronSession.session.subscribe when an underlying agent
205
+ // session is supplied, else a no-op (soft-error detection
206
+ // degrades to "off" in that mode; only hard throws drive
207
+ // fallback). Test fakes that omit `.session` lose
208
+ // soft-error fallback — production code always provides it.
209
+ // .bind(created.session) is load-bearing: AgentSession.subscribe is a
210
+ // regular method that reads `this._eventListeners`. Destructuring drops
211
+ // the receiver.
212
+ const sessionForHelper: AgentSession = {
213
+ prompt: (text: string) => created.prompt(text),
214
+ subscribe: created.session?.subscribe.bind(created.session) ?? (() => () => {}),
215
+ } as unknown as AgentSession
216
+ return {
217
+ session: sessionForHelper,
218
+ // Per-attempt teardown. Fires turn.end and session.end for every
219
+ // session created (success or failure), then disposes the underlying
220
+ // resources. Hooks that throw are logged but don't prevent disposal.
221
+ dispose: async () => {
222
+ if (created.hooks && turnEvent !== undefined) {
223
+ try {
224
+ await created.hooks.runSessionTurnEnd(turnEvent)
225
+ } catch (e) {
226
+ logger.warn(`[cron] ${job.id}: turn-end hook threw: ${describe(e)}`)
227
+ }
228
+ }
229
+ if (created.hooks && created.sessionId !== undefined) {
230
+ try {
231
+ await created.hooks.runSessionEnd({
232
+ sessionId: created.sessionId,
233
+ ...(created.origin !== undefined ? { origin: created.origin } : {}),
234
+ })
235
+ } catch (e) {
236
+ logger.warn(`[cron] ${job.id}: session-end hook threw: ${describe(e)}`)
237
+ }
238
+ }
239
+ created.dispose?.()
240
+ },
241
+ }
242
+ },
243
+ onAttemptFailed: (attempt) => {
244
+ logger.warn(
245
+ `[cron] ${job.id}: ${attempt.outcome} failure on ${attempt.ref}: ${attempt.errorMessage ?? 'unknown'}; falling back`,
246
+ )
247
+ },
248
+ })
249
+
250
+ if (!result.success) {
251
+ logger.error(
252
+ `[cron] ${job.id}: all ${result.attempts.length} model(s) failed; last error: ${result.lastError?.message ?? 'unknown'}`,
253
+ )
254
+ }
255
+
256
+ // session.idle fires once, only on success, and only against the session
257
+ // that handled the turn. Then dispose the successful session (the helper
258
+ // returns the session+dispose so we can run post-prompt hooks against a
259
+ // live session before tearing it down). Failed-chain disposal is already
260
+ // handled by the helper's per-attempt dispose calls.
261
+ if (result.success && lastSession !== null) {
262
+ const finalSession: CronSession = lastSession
263
+ if (finalSession.hooks && finalSession.sessionId !== undefined) {
264
+ try {
265
+ await finalSession.hooks.runSessionIdle({
266
+ sessionId: finalSession.sessionId,
267
+ parentTranscriptPath: finalSession.getTranscriptPath?.(),
268
+ idleMs: 0,
269
+ ...(finalSession.origin !== undefined ? { origin: finalSession.origin } : {}),
156
270
  })
157
- : null
158
- const turnEvent =
159
- session.hooks && session.sessionId !== undefined && session.agentDir !== undefined
160
- ? {
161
- sessionId: session.sessionId,
162
- agentDir: session.agentDir,
163
- ...(session.origin !== undefined ? { origin: session.origin } : {}),
164
- }
165
- : undefined
166
- try {
167
- if (session.hooks && turnEvent !== undefined) {
168
- await session.hooks.runSessionTurnStart(turnEvent)
169
- }
170
- try {
171
- await session.prompt(job.prompt)
172
- } finally {
173
- if (session.hooks && turnEvent !== undefined) {
174
- await session.hooks.runSessionTurnEnd(turnEvent)
271
+ } catch (e) {
272
+ logger.warn(`[cron] ${job.id}: session-idle hook threw: ${describe(e)}`)
175
273
  }
176
274
  }
177
- if (session.hooks && session.sessionId !== undefined) {
178
- await session.hooks.runSessionIdle({
179
- sessionId: session.sessionId,
180
- parentTranscriptPath: session.getTranscriptPath?.(),
181
- idleMs: 0,
182
- ...(session.origin !== undefined ? { origin: session.origin } : {}),
183
- })
184
- }
185
- } finally {
186
- unsubProviderErrors?.()
187
- if (session.hooks && session.sessionId !== undefined) {
188
- await session.hooks.runSessionEnd({
189
- sessionId: session.sessionId,
190
- ...(session.origin !== undefined ? { origin: session.origin } : {}),
191
- })
192
- }
193
- session.dispose?.()
275
+ await result.dispose()
194
276
  }
195
277
  }
196
278
 
279
+ function describe(err: unknown): string {
280
+ return err instanceof Error ? err.message : String(err)
281
+ }
282
+
197
283
  async function runExec(job: ExecJob, cwd: string): Promise<void> {
198
284
  const [cmd, ...args] = job.command
199
285
  if (!cmd) throw new Error(`exec job ${job.id}: empty command`)
@@ -27,6 +27,12 @@ export type BuildDockerfileOptions = {
27
27
  // `util-linux` carries `setpriv`, which the shim uses to drop CAP_NET_ADMIN
28
28
  // from the bounding set before exec'ing the agent. Listed first in the
29
29
  // apt-get install line so the package set is self-documenting at a glance.
30
+ //
31
+ // xvfb is intentionally NOT in baseline — it's a toggle (`xvfb: true` by
32
+ // default, opt-out via `docker.file.xvfb: false`) because the shim
33
+ // self-heals: it spawns Xvfb (and exports DISPLAY) if the binary is on
34
+ // PATH, and execs the agent directly otherwise. See APT_FEATURES.xvfb
35
+ // below and `buildEntrypointShim`.
30
36
  const BASELINE_APT_PACKAGES = ['git', 'ca-certificates', 'curl', 'gnupg', 'iptables', 'util-linux'] as const
31
37
 
32
38
  // curl-impersonate is the only currently-working way to query DuckDuckGo from
@@ -219,7 +225,96 @@ export function buildEntrypointShim(): string {
219
225
  # Source: src/init/dockerfile.ts \`buildEntrypointShim()\`.
220
226
  set -eu
221
227
 
228
+ # start_xvfb launches Xvfb in the background under a stripped capability
229
+ # bounding set so headed Chrome (agent-browser --headed, Playwright
230
+ # headful) has a real X11 display to connect to. Headless containers
231
+ # have no display server; Chrome --headless / --headless=new is
232
+ # fingerprinted by modern bot detection (Akamai / Cloudflare BM)
233
+ # regardless of UA spoof, so real headed Chrome under a virtual
234
+ # framebuffer is the only path to a passing sensor score from a
235
+ # server-side container.
236
+ #
237
+ # Two correctness invariants this function enforces:
238
+ #
239
+ # 1. Xvfb never holds CAP_NET_ADMIN. The shim runs as PID 1 with the
240
+ # container's full capability set (including NET_ADMIN when
241
+ # network.blockInternal=true). If we backgrounded Xvfb naked, it
242
+ # would inherit NET_ADMIN and keep it for the container's lifetime
243
+ # — defeating the capability-drop contract that setpriv applies to
244
+ # the agent process. Routing Xvfb through the same setpriv invocation
245
+ # we use for the agent strips NET_ADMIN before Xvfb's first exec.
246
+ # On the off-path (blockInternal=false) the bounding-set drop is a
247
+ # no-op (NET_ADMIN was never granted), but the call is harmless.
248
+ #
249
+ # 2. Xvfb startup failure is loud, not silent. \`Xvfb ... >/dev/null &\`
250
+ # under \`set -e\` does not fail the script if Xvfb exits immediately
251
+ # (missing library, port conflict, malformed args). Without the
252
+ # explicit liveness probe below, the shim would then export DISPLAY
253
+ # and exec bun, agent-browser launches would die with "cannot open
254
+ # display", and the operator would chase a phantom bug. We capture
255
+ # $! and \`kill -0\` it on every poll iteration so an early exit
256
+ # becomes a clear stderr line and a non-zero shim exit.
257
+ #
258
+ # We DO NOT use \`xvfb-run\`. xvfb-run hangs forever when it runs as
259
+ # PID 1 inside a container: its SIGUSR1-based ready handshake races
260
+ # and stalls because PID 1 ignores signals without explicit handlers,
261
+ # so the \`trap : USR1 ; wait || :\` dance never wakes up. Observed in
262
+ # practice: container alive, Xvfb running, PID 1 stuck in
263
+ # \`rt_sigsuspend\`, no agent process ever spawns, \`docker logs\` empty.
264
+ # Documented industry workarounds are tini-as-PID-1 or direct Xvfb
265
+ # spawn; we pick the latter (no new dep).
266
+ #
267
+ # Xvfb args:
268
+ # :99 fixed display number. Filesystem
269
+ # (/tmp/.X11-unix/X99) and abstract
270
+ # (\\0/tmp/.X11-unix/X99) sockets are both
271
+ # network-namespace-scoped, so :99 is safe
272
+ # across all Compose'd containers.
273
+ # -screen 0 1920x1080x24 desktop viewport agent-browser advertises;
274
+ # mismatched geometry is itself a fingerprint
275
+ # signal.
276
+ # -ac disable host-based X access control so
277
+ # Chrome connects without XAUTHORITY plumbing.
278
+ # +extension RANDR expose the RandR extension; Chrome queries
279
+ # it for screen geometry, and without it
280
+ # \`screen.*\` values come back inconsistent.
281
+ # -nolisten tcp refuse TCP connections (Unix socket only).
282
+ # Defense-in-depth — we are in a netns with
283
+ # no inbound exposure anyway.
284
+ start_xvfb() {
285
+ if ! command -v Xvfb >/dev/null 2>&1; then
286
+ return 0
287
+ fi
288
+ setpriv --bounding-set -net_admin --inh-caps -net_admin --ambient-caps -net_admin \\
289
+ -- Xvfb :99 -screen 0 1920x1080x24 -ac +extension RANDR -nolisten tcp \\
290
+ >/dev/null 2>&1 &
291
+ xvfb_pid=$!
292
+ export DISPLAY=:99
293
+ # Poll the socket every 10ms up to ~3s. Xvfb cold start is typically
294
+ # ~20-50ms on a modern host; 3s covers slow Docker Desktop VMs,
295
+ # Rosetta/QEMU emulation, and loaded CI runners. We also \`kill -0\`
296
+ # the pid each iteration so an Xvfb that died immediately surfaces
297
+ # as a clear error instead of a 3-second hang followed by silent
298
+ # "cannot open display" downstream.
299
+ i=0
300
+ while [ $i -lt 300 ]; do
301
+ if [ -S /tmp/.X11-unix/X99 ]; then
302
+ unset i xvfb_pid
303
+ return 0
304
+ fi
305
+ if ! kill -0 "$xvfb_pid" 2>/dev/null; then
306
+ echo "typeclaw-entrypoint: Xvfb exited immediately; cannot start headed display (docker.file.xvfb=true)" >&2
307
+ exit 1
308
+ fi
309
+ sleep 0.01
310
+ i=$((i + 1))
311
+ done
312
+ echo "typeclaw-entrypoint: Xvfb did not create /tmp/.X11-unix/X99 within 3s; refusing to continue (docker.file.xvfb=true)" >&2
313
+ exit 1
314
+ }
315
+
222
316
  if [ "\${TYPECLAW_NETWORK_BLOCK_INTERNAL:-0}" != "1" ]; then
317
+ start_xvfb
223
318
  exec bun run typeclaw "$@"
224
319
  fi
225
320
 
@@ -264,6 +359,7 @@ ip6tables -A OUTPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
264
359
  ip6tables -A OUTPUT -o lo -j ACCEPT
265
360
  ${ipv6Rules.join('\n')}
266
361
 
362
+ start_xvfb
267
363
  exec setpriv --bounding-set -net_admin --inh-caps -net_admin --ambient-caps -net_admin -- bun run typeclaw "$@"
268
364
  `
269
365
  }
@@ -337,7 +433,7 @@ type AptFeature = {
337
433
  toAptArgs: (toggle: DockerfileFeatureToggle) => string[]
338
434
  }
339
435
 
340
- const APT_FEATURES: Record<'ffmpeg' | 'gh' | 'tmux' | 'python' | 'cjkFonts', AptFeature> = {
436
+ const APT_FEATURES: Record<'ffmpeg' | 'gh' | 'tmux' | 'python' | 'cjkFonts' | 'xvfb', AptFeature> = {
341
437
  ffmpeg: { toAptArgs: (v) => singlePackageArgs('ffmpeg', v) },
342
438
  gh: { toAptArgs: (v) => singlePackageArgs('gh', v) },
343
439
  tmux: { toAptArgs: (v) => singlePackageArgs('tmux', v) },
@@ -345,6 +441,7 @@ const APT_FEATURES: Record<'ffmpeg' | 'gh' | 'tmux' | 'python' | 'cjkFonts', Apt
345
441
  toAptArgs: (v) => (v === true ? ['python3', 'python3-pip', 'python3-venv', 'python-is-python3'] : []),
346
442
  },
347
443
  cjkFonts: { toAptArgs: (v) => (v === true ? [CJK_FONTS_PACKAGE] : []) },
444
+ xvfb: { toAptArgs: (v) => (v === true ? ['xvfb'] : []) },
348
445
  }
349
446
 
350
447
  export function buildDockerfile(
@@ -616,12 +713,21 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \\
616
713
  fi`
617
714
 
618
715
  function defaultConfig(): DockerfileConfig {
619
- return { ffmpeg: false, gh: true, python: true, tmux: true, cjkFonts: true, cloudflared: true, append: [] }
716
+ return {
717
+ ffmpeg: false,
718
+ gh: true,
719
+ python: true,
720
+ tmux: true,
721
+ cjkFonts: true,
722
+ cloudflared: true,
723
+ xvfb: true,
724
+ append: [],
725
+ }
620
726
  }
621
727
 
622
728
  function collectToggleAptArgs(config: DockerfileConfig): string[] {
623
729
  const args: string[] = []
624
- for (const key of ['ffmpeg', 'gh', 'python', 'tmux', 'cjkFonts'] as const) {
730
+ for (const key of ['ffmpeg', 'gh', 'python', 'tmux', 'cjkFonts', 'xvfb'] as const) {
625
731
  args.push(...APT_FEATURES[key].toAptArgs(config[key]))
626
732
  }
627
733
  return args
@@ -45,9 +45,9 @@ Do these in order. Do **not** ask further questions.
45
45
  2. Write one short paragraph in \`MEMORY.md\` marking this moment: the date, how you came to be, what you and the user agreed on.
46
46
  3. Configure local git identity with \`bash\`: \`git config user.name "<your name>"\` and \`git config user.email "<reasonable placeholder>@typeclaw.local"\` (unless the user provided an email).
47
47
  4. Stage and commit **only the files you authored** with commit message \`Hatched 🐣\`. This is the hatching-specific commit message — it overrides the normal version-control style guidance for this one commit.
48
- 5. Send **one final short message** — two sentences at most — telling the user hatching is complete and they can \`/quit\` the TUI. Do not ask further questions. Do not offer more work. The container keeps running once they quit; keeping the TUI open here wastes time.
48
+ 5. Send **one final short message** — two sentences at most — telling the user hatching is complete and they can leave the TUI with \`/quit\` (or Ctrl+C). Do not ask further questions. Do not offer more work. The container keeps running once they quit; keeping the TUI open here wastes time.
49
49
 
50
- After that final message, stop. If the user keeps talking, answer briefly and remind them they can \`/quit\` whenever they are ready.
50
+ After that final message, stop. If the user keeps talking, answer briefly and remind them they can \`/quit\` (or Ctrl+C) whenever they are ready.
51
51
 
52
52
  This is the only time you will receive these instructions. After the \`Hatched 🐣\` commit, your identity takes over and you run as yourself.`
53
53
 
package/src/init/index.ts CHANGED
@@ -121,7 +121,18 @@ export type KakaotalkAuthRunner = (options: { cwd: string }) => Promise<Kakaotal
121
121
  // API-key provider". Optional model defaults to DEFAULT_MODEL_REF, which is
122
122
  // an OpenAI api-key provider — so test fixtures that omit both fields keep
123
123
  // working under the api-key path.
124
- export type LLMAuth = { kind: 'api-key'; apiKey: string } | { kind: 'oauth'; runLogin: OAuthLoginRunner }
124
+ //
125
+ // `oauth-completed` is the CLI wizard's signal that the browser login already
126
+ // happened up-front (right after the user picked the auth method) and the
127
+ // resulting credentials are already in `secrets.json`. `runInit` then skips
128
+ // the `oauth-login` step but still treats this as an OAuth provider (no API
129
+ // key written, etc.). The wizard runs OAuth eagerly so the browser opens the
130
+ // moment the user picks "OAuth (browser login)" instead of waiting until the
131
+ // end of the wizard — see `collectWizardInputs` in `src/cli/init.ts`.
132
+ export type LLMAuth =
133
+ | { kind: 'api-key'; apiKey: string }
134
+ | { kind: 'oauth'; runLogin: OAuthLoginRunner }
135
+ | { kind: 'oauth-completed' }
125
136
 
126
137
  export type InitOptions = {
127
138
  cwd: string
@@ -223,8 +234,8 @@ export async function runInit({
223
234
  // Same trap as kakaotalk-auth: scaffold-then-fail-auth would leave
224
235
  // typeclaw.json without working credentials and the runtime would silently
225
236
  // refuse to boot. The login itself doesn't need the agent folder to exist
226
- // — pi-ai's OAuth helper just needs a writable path for secrets.json, which
227
- // we create on demand inside scaffold().
237
+ // — pi-ai's OAuth helper just needs a writable path for secrets.json, and
238
+ // the `mkdir` below creates it on demand before the login runs.
228
239
  if (resolvedAuth.kind === 'oauth') {
229
240
  emit({ step: 'oauth-login', phase: 'start' })
230
241
  await mkdir(cwd, { recursive: true })
@@ -14,16 +14,29 @@ export type OAuthLoginResult = { ok: true } | { ok: false; reason: string }
14
14
  export type OAuthLoginRunner = (options: { cwd: string; model: KnownModelRef }) => Promise<OAuthLoginResult>
15
15
 
16
16
  // Wrap pi-ai's OAuth callbacks so the CLI doesn't have to know about the
17
- // upstream callback shape. The CLI only sees three lifecycle events:
17
+ // upstream callback shape. The CLI sees four lifecycle events:
18
18
  // (1) onAuth(url) — print the URL the user must visit
19
19
  // (2) onProgress(message) — show waiting/finalizing status
20
20
  // (3) onPrompt(prompt) — ask the user for a manual code if the browser flow
21
- // can't reach the local callback server. Most users won't see this; it
22
- // fires when they paste the post-redirect URL by hand.
21
+ // can't reach the local callback server. Fires only after the local
22
+ // server gave up (bind error -> waitForCode resolves null).
23
+ // (4) onManualCodeInput() — concurrent paste input that RACES the local
24
+ // callback server. Required for cross-device flows: pi-ai's openai-codex
25
+ // OAuth hardcodes redirect_uri=http://localhost:1455/auth/callback, which
26
+ // resolves to the *browser's* machine. When the user runs `typeclaw init`
27
+ // over SSH or on a remote dev box and completes login on a different
28
+ // laptop, the browser callback never reaches the CLI's local server and
29
+ // waitForCode() hangs forever — so onPrompt would never fire either.
30
+ // onManualCodeInput is the upstream-supported escape hatch: it shows a
31
+ // paste field IMMEDIATELY alongside the URL, and whichever path lands a
32
+ // code first wins. parseAuthorizationInput on the upstream side accepts
33
+ // the full redirect URL, the bare `code=...&state=...` query string, or
34
+ // just the code value.
23
35
  export type OAuthCallbacks = {
24
36
  onAuth: (url: string, instructions?: string) => void
25
37
  onProgress?: (message: string) => void
26
38
  onPrompt: (message: string, placeholder?: string) => Promise<string | null>
39
+ onManualCodeInput?: () => Promise<string>
27
40
  }
28
41
 
29
42
  // Default runner: real OAuth flow against pi-ai. Tests inject a stub to skip
@@ -50,6 +63,7 @@ export function makeOAuthLoginRunner(callbacks: OAuthCallbacks): OAuthLoginRunne
50
63
  }
51
64
  return value
52
65
  },
66
+ onManualCodeInput: callbacks.onManualCodeInput,
53
67
  })
54
68
  return { ok: true }
55
69
  } catch (error) {
@@ -25,6 +25,21 @@ export type BuiltinRoleSpec = {
25
25
  readonly permissions: readonly string[]
26
26
  }
27
27
 
28
+ // Owner carries low + medium tier strings explicitly AND the wildcard
29
+ // sentinel. The sentinel expands to plugin-contributed `security.bypass.*`
30
+ // strings minus the security plugin's `ownerWildcardExclusions` (today:
31
+ // `security.bypass.high` plus high-tier per-guard strings). Net effect:
32
+ // owner auto-bypasses every low- and medium-tier guard, and high-tier
33
+ // guards require per-call ack from owner too (the audience-leak rule —
34
+ // owner-in-public-channel must not silently post credentials).
35
+ //
36
+ // Trusted carries only `security.bypass.low`. Trusted does NOT carry the
37
+ // pre-PR per-guard grants (`bypassSecretExfilBash`, `bypassGitExfil`):
38
+ // those guards are medium/high under the audience-leak axis and per-guard
39
+ // grants would re-introduce exactly the bypass holes the tier system
40
+ // exists to prevent. Operators who want the pre-PR ergonomics can add the
41
+ // per-guard strings explicitly to `roles.trusted.permissions[]` in
42
+ // typeclaw.json — that path stays alive forever.
28
43
  export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> = {
29
44
  owner: {
30
45
  match: [{ kind: 'tui' }],
@@ -32,17 +47,14 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
32
47
  CORE_PERMISSIONS.channelRespond,
33
48
  CORE_PERMISSIONS.cronSchedule,
34
49
  CORE_PERMISSIONS.cronModify,
50
+ 'security.bypass.low',
51
+ 'security.bypass.medium',
35
52
  OWNER_SECURITY_WILDCARD,
36
53
  ],
37
54
  },
38
55
  trusted: {
39
56
  match: [],
40
- permissions: [
41
- CORE_PERMISSIONS.channelRespond,
42
- CORE_PERMISSIONS.cronSchedule,
43
- 'security.bypass.secretExfilBash',
44
- 'security.bypass.gitExfil',
45
- ],
57
+ permissions: [CORE_PERMISSIONS.channelRespond, CORE_PERMISSIONS.cronSchedule, 'security.bypass.low'],
46
58
  },
47
59
  member: {
48
60
  match: [],
@@ -54,11 +66,21 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
54
66
  },
55
67
  }
56
68
 
69
+ // Expands the owner wildcard sentinel against plugin-contributed
70
+ // `security.bypass.*` strings. `wildcardExclusions` is an optional set of
71
+ // permission strings the sentinel must NOT expand to — used by the
72
+ // bundled security plugin to exclude `security.bypass.high` AND the
73
+ // per-guard strings for high-tier guards, so the wildcard does not
74
+ // auto-grant audience-leak bypass to owner. Explicit operator grants of
75
+ // those strings in `roles.owner.permissions[]` still take effect (they
76
+ // flow through the non-sentinel branch).
57
77
  export function expandOwnerWildcard(
58
78
  ownerPermissions: readonly string[],
59
79
  pluginContributed: readonly string[],
80
+ wildcardExclusions: readonly string[] = [],
60
81
  ): readonly string[] {
61
- const bypass = pluginContributed.filter((p) => p.startsWith('security.bypass.'))
82
+ const excludeSet = new Set(wildcardExclusions)
83
+ const bypass = pluginContributed.filter((p) => p.startsWith('security.bypass.') && !excludeSet.has(p))
62
84
  const out: string[] = []
63
85
  for (const p of ownerPermissions) {
64
86
  if (p === OWNER_SECURITY_WILDCARD) {
@@ -38,6 +38,12 @@ type ResolvedRole = {
38
38
  export type CreatePermissionServiceOptions = {
39
39
  roles?: RolesConfig
40
40
  pluginPermissions?: readonly string[]
41
+ // Permission strings that the owner wildcard sentinel must NOT
42
+ // auto-expand to. Today populated from the bundled security plugin's
43
+ // high-tier list so audience-leak guards do not get auto-granted to
44
+ // owner. Generic by design — any future plugin could contribute
45
+ // exclusions through the plugin manager. See expandOwnerWildcard.
46
+ ownerWildcardExclusions?: readonly string[]
41
47
  }
42
48
 
43
49
  // Returns warnings for user-declared `permissions[]` strings that aren't
@@ -97,7 +103,8 @@ function levenshtein(a: string, b: string): number {
97
103
 
98
104
  export function createPermissionService(opts: CreatePermissionServiceOptions = {}): PermissionService {
99
105
  const pluginPermissions = opts.pluginPermissions ?? []
100
- let resolved = buildRoleTable(opts.roles ?? {}, pluginPermissions)
106
+ const ownerWildcardExclusions = opts.ownerWildcardExclusions ?? []
107
+ let resolved = buildRoleTable(opts.roles ?? {}, pluginPermissions, ownerWildcardExclusions)
101
108
  let byName = new Map(resolved.map((r) => [r.name, r]))
102
109
 
103
110
  function resolveRole(origin: SessionOrigin | undefined): string {
@@ -139,36 +146,46 @@ export function createPermissionService(opts: CreatePermissionServiceOptions = {
139
146
  return { role: name, permissions: role?.permissions ?? [] }
140
147
  },
141
148
  replaceRoles(roles) {
142
- resolved = buildRoleTable(roles ?? {}, pluginPermissions)
149
+ resolved = buildRoleTable(roles ?? {}, pluginPermissions, ownerWildcardExclusions)
143
150
  byName = new Map(resolved.map((r) => [r.name, r]))
144
151
  },
145
152
  }
146
153
  }
147
154
 
148
- function buildRoleTable(roles: RolesConfig, pluginPermissions: readonly string[]): ResolvedRole[] {
155
+ function buildRoleTable(
156
+ roles: RolesConfig,
157
+ pluginPermissions: readonly string[],
158
+ ownerWildcardExclusions: readonly string[],
159
+ ): ResolvedRole[] {
149
160
  const out: ResolvedRole[] = []
150
161
  const seen = new Set<string>()
151
162
 
152
163
  for (const name of Object.keys(roles)) {
153
164
  if (seen.has(name)) continue
154
165
  seen.add(name)
155
- out.push(resolveOne(name, roles[name], pluginPermissions))
166
+ out.push(resolveOne(name, roles[name], pluginPermissions, ownerWildcardExclusions))
156
167
  }
157
168
 
158
169
  for (const name of BUILTIN_ROLE_NAMES) {
159
170
  if (seen.has(name)) continue
160
- out.push(resolveOne(name, undefined, pluginPermissions))
171
+ out.push(resolveOne(name, undefined, pluginPermissions, ownerWildcardExclusions))
161
172
  }
162
173
 
163
174
  return out
164
175
  }
165
176
 
166
- function resolveOne(name: string, user: RoleConfig | undefined, pluginPermissions: readonly string[]): ResolvedRole {
177
+ function resolveOne(
178
+ name: string,
179
+ user: RoleConfig | undefined,
180
+ pluginPermissions: readonly string[],
181
+ ownerWildcardExclusions: readonly string[],
182
+ ): ResolvedRole {
167
183
  if (isBuiltinRoleName(name)) {
168
184
  const builtin = BUILTIN_ROLES[name]
169
185
  const match = [...builtin.match, ...(user?.match ?? [])]
170
186
  const rawPerms = user?.permissions !== undefined ? user.permissions : [...builtin.permissions]
171
- const permissions = name === 'owner' ? expandOwnerWildcard(rawPerms, pluginPermissions) : rawPerms
187
+ const permissions =
188
+ name === 'owner' ? expandOwnerWildcard(rawPerms, pluginPermissions, ownerWildcardExclusions) : rawPerms
172
189
  return { name, match, permissions }
173
190
  }
174
191
  return {