npm - typeclaw - Versions diffs - 0.4.0 → 0.5.0 - Mend

typeclaw 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/package.json +1 -1
package/src/agent/auth.ts +4 -2
package/src/agent/index.ts +16 -28
package/src/agent/model-fallback.ts +127 -0
package/src/agent/tools/curl-impersonate.ts +300 -0
package/src/agent/tools/ddg.ts +13 -88
package/src/agent/tools/webfetch/fetch.ts +105 -2
package/src/agent/tools/webfetch/tool.ts +4 -0
package/src/bundled-plugins/agent-browser/shim.ts +47 -0
package/src/bundled-plugins/backup/subagents.ts +2 -0
package/src/bundled-plugins/memory/README.md +49 -12
package/src/bundled-plugins/memory/citation-superset.ts +63 -0
package/src/bundled-plugins/memory/dreaming.ts +105 -17
package/src/bundled-plugins/memory/index.ts +2 -2
package/src/bundled-plugins/memory/memory-logger.ts +45 -26
package/src/bundled-plugins/memory/strength.ts +127 -0
package/src/bundled-plugins/memory/topics.ts +75 -0
package/src/bundled-plugins/security/index.ts +87 -43
package/src/bundled-plugins/security/permissions.ts +36 -0
package/src/bundled-plugins/security/policies/git-exfil.ts +20 -0
package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +12 -0
package/src/bundled-plugins/security/policies/prompt-injection.ts +23 -3
package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +7 -0
package/src/bundled-plugins/security/policies/secret-exfil-read.ts +6 -0
package/src/bundled-plugins/security/policies/session-search-secrets.ts +9 -0
package/src/bundled-plugins/security/policies/ssrf.ts +6 -0
package/src/bundled-plugins/security/policies/system-prompt-leak.ts +7 -0
package/src/channels/adapters/github/index.ts +87 -3
package/src/channels/router.ts +194 -28
package/src/channels/types.ts +3 -1
package/src/cli/init.ts +146 -42
package/src/cli/model.ts +10 -2
package/src/cli/oauth-callbacks.ts +49 -0
package/src/cli/provider.ts +3 -20
package/src/config/config.ts +59 -24
package/src/config/models-mutation.ts +42 -8
package/src/config/providers-mutation.ts +12 -8
package/src/container/start.ts +18 -1
package/src/cron/consumer.ts +129 -43
package/src/init/dockerfile.ts +109 -3
package/src/init/hatching.ts +2 -2
package/src/init/index.ts +14 -3
package/src/init/oauth-login.ts +17 -3
package/src/permissions/builtins.ts +29 -7
package/src/permissions/permissions.ts +24 -7
package/src/plugin/define.ts +2 -0
package/src/plugin/manager.ts +14 -0
package/src/plugin/types.ts +6 -0
package/src/run/index.ts +2 -1
package/src/skills/typeclaw-memory/SKILL.md +25 -15
package/src/skills/typeclaw-permissions/SKILL.md +35 -17
package/src/tui/index.ts +35 -3
package/src/usage/report.ts +15 -12
package/typeclaw.schema.json +57 -25

package/src/config/providers-mutation.ts CHANGED Viewed

@@ -136,8 +136,8 @@ export function findModelsReferencingProvider(cwd: string, providerId: string):
   const models = readModelsOrNull(cwd)
   if (models === null) return []
   const out: string[] = []
-  for (const [profile, ref] of Object.entries(models)) {
-    if (refTargetsProvider(ref, providerId)) out.push(profile)
+  for (const [profile, refs] of Object.entries(models)) {
+    if (refs.some((r) => refTargetsProvider(r, providerId))) out.push(profile)
   }
   return out
 }
@@ -212,12 +212,16 @@ function readEnvKey(env: NodeJS.ProcessEnv, key: string): string | undefined {
 function buildProviderReferenceMap(models: Models | null): Map<string, string[]> {
   const out = new Map<string, string[]>()
   if (models === null) return out
-  for (const [profile, ref] of Object.entries(models)) {
-    const providerId = safeProviderForRef(ref)
-    if (providerId === null) continue
-    const existing = out.get(providerId) ?? []
-    existing.push(profile)
-    out.set(providerId, existing)
+  for (const [profile, refs] of Object.entries(models)) {
+    for (const ref of refs) {
+      const providerId = safeProviderForRef(ref)
+      if (providerId === null) continue
+      const existing = out.get(providerId) ?? []
+      if (!existing.includes(profile)) {
+        existing.push(profile)
+        out.set(providerId, existing)
+      }
+    }
   }
   return out
 }

package/src/container/start.ts CHANGED Viewed

@@ -455,7 +455,24 @@ export async function planStart({
   // the start() preflight force-removes any lingering corpse before the next
   // launch — so the only state Docker ever sees in `docker ps -a` is either
   // a running container or one the user has not started again yet.
-  const runArgs = ['run', '-d', '--name', containerName, '-p', `${publishHost}:${hostPort}:${CONTAINER_PORT}`]
+  //
+  // `--shm-size=2g` is mandatory for the bundled Chrome (agent-browser) to
+  // survive heavy pages. Docker's default /dev/shm is 64MB; Chrome uses
+  // shared memory for the renderer process and silently crashes mid-load
+  // on any site with a large DOM or non-trivial WebGL. The crash surfaces
+  // as a blank page or "target closed" with no clear cause — easy to
+  // misattribute to bot detection. 2g matches the Playwright/Puppeteer
+  // canonical recommendation and is a memory cap, not an allocation (only
+  // used pages count against the host).
+  const runArgs = [
+    'run',
+    '-d',
+    '--name',
+    containerName,
+    '--shm-size=2g',
+    '-p',
+    `${publishHost}:${hostPort}:${CONTAINER_PORT}`,
+  ]
   // Network egress filter: when `typeclaw.json#network.blockInternal` is true,
   // grant the container CAP_NET_ADMIN at boot so the entrypoint shim can

package/src/cron/consumer.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 import type { AgentSession } from '@/agent'
-import { subscribeProviderErrors } from '@/agent/provider-error'
+import { promptWithFallback, resolveFallbackChain } from '@/agent/model-fallback'
 import type { SessionOrigin } from '@/agent/session-origin'
+import { getConfig } from '@/config'
+import type { KnownModelRef } from '@/config/providers'
 import type { HookBus } from '@/plugin'
 import type { Stream, Unsubscribe } from '@/stream'
@@ -41,7 +43,12 @@ export type CronConsumerLogger = {
 export type CreateCronConsumerOptions = {
   stream: Stream
   cwd: string
-  createSessionForCron: (job: PromptJob) => Promise<CronSession>
+  // The optional `refOverride` argument is consumed by the fallback loop: the
+  // consumer calls this factory once per ref in the profile's chain, pinning
+  // each attempt to the specified model. Factories that don't honor the
+  // override silently lose fallback semantics, so production wiring threads
+  // it through to `createSession({ refOverride })`.
+  createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>
   // Builds the `CronHandlerContext` for the job and awaits its `handler`.
   // Wired by `src/run/index.ts` to reuse `runPromptForCommand` /
   // `runExecForCommand` from the command runner so plugin cron handlers and
@@ -121,7 +128,7 @@ export function createCronConsumer({
 async function runPrompt(
   job: PromptJob,
-  createSessionForCron: (job: PromptJob) => Promise<CronSession>,
+  createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
   stream: Stream,
   logger: CronConsumerLogger,
 ): Promise<void> {
@@ -148,52 +155,131 @@ async function runPrompt(
     })
     return
   }
-  const session = await createSessionForCron(job)
-  const unsubProviderErrors =
-    session.session !== undefined
-      ? subscribeProviderErrors(session.session, (err) => {
-          logger.error(`[cron] ${job.id}: LLM call failed: ${err.message}`)
+  // Resolve the model fallback chain for the cron profile (cron jobs run
+  // under the `default` profile today). Single-ref configs produce a length-1
+  // chain; multi-ref configs (e.g. `"default": ["openai/...", "fireworks/..."]`)
+  // drive the retry-on-failure loop inside `runPromptOnce`.
+  const refs = resolveFallbackChain(getConfig().models, undefined)
+  await runPromptOnce(job, refs, createSessionForCron, logger)
+}
+async function runPromptOnce(
+  job: PromptJob,
+  refs: KnownModelRef[],
+  createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
+  logger: CronConsumerLogger,
+): Promise<void> {
+  // Per-attempt lifecycle: every session we create gets full
+  // turn-start → turn-end → session-end → dispose bracketing, regardless of
+  // whether the helper chose it as the final session or disposed it as a
+  // failed earlier attempt. Without per-attempt session.end, plugin state
+  // keyed by sessionId (security plugin's remote-taint map, memory plugin's
+  // debounce timer) would orphan for every failed attempt. We track the
+  // last session separately so we can fire session.idle exactly once on
+  // success (matching pre-fallback cron behavior — see the pre-fallback
+  // try/finally structure: idle inside the prompt try-block, end in the
+  // outer finally).
+  let lastSession: CronSession | null = null
+  const result = await promptWithFallback({
+    refs,
+    text: job.prompt,
+    createSessionForRef: async (ref) => {
+      const created = await createSessionForCron(job, ref)
+      lastSession = created
+      const turnEvent =
+        created.hooks && created.sessionId !== undefined && created.agentDir !== undefined
+          ? {
+              sessionId: created.sessionId,
+              agentDir: created.agentDir,
+              ...(created.origin !== undefined ? { origin: created.origin } : {}),
+            }
+          : undefined
+      if (created.hooks && turnEvent !== undefined) {
+        await created.hooks.runSessionTurnStart(turnEvent)
+      }
+      // Bridge the CronSession wrapper into the AgentSession surface the
+      // fallback helper expects:
+      //   prompt    → CronSession.prompt (wrapper that calls AgentSession.prompt
+      //               in production, or a hand-rolled test fake)
+      //   subscribe → CronSession.session.subscribe when an underlying agent
+      //               session is supplied, else a no-op (soft-error detection
+      //               degrades to "off" in that mode; only hard throws drive
+      //               fallback). Test fakes that omit `.session` lose
+      //               soft-error fallback — production code always provides it.
+      // .bind(created.session) is load-bearing: AgentSession.subscribe is a
+      // regular method that reads `this._eventListeners`. Destructuring drops
+      // the receiver.
+      const sessionForHelper: AgentSession = {
+        prompt: (text: string) => created.prompt(text),
+        subscribe: created.session?.subscribe.bind(created.session) ?? (() => () => {}),
+      } as unknown as AgentSession
+      return {
+        session: sessionForHelper,
+        // Per-attempt teardown. Fires turn.end and session.end for every
+        // session created (success or failure), then disposes the underlying
+        // resources. Hooks that throw are logged but don't prevent disposal.
+        dispose: async () => {
+          if (created.hooks && turnEvent !== undefined) {
+            try {
+              await created.hooks.runSessionTurnEnd(turnEvent)
+            } catch (e) {
+              logger.warn(`[cron] ${job.id}: turn-end hook threw: ${describe(e)}`)
+            }
+          }
+          if (created.hooks && created.sessionId !== undefined) {
+            try {
+              await created.hooks.runSessionEnd({
+                sessionId: created.sessionId,
+                ...(created.origin !== undefined ? { origin: created.origin } : {}),
+              })
+            } catch (e) {
+              logger.warn(`[cron] ${job.id}: session-end hook threw: ${describe(e)}`)
+            }
+          }
+          created.dispose?.()
+        },
+      }
+    },
+    onAttemptFailed: (attempt) => {
+      logger.warn(
+        `[cron] ${job.id}: ${attempt.outcome} failure on ${attempt.ref}: ${attempt.errorMessage ?? 'unknown'}; falling back`,
+      )
+    },
+  })
+  if (!result.success) {
+    logger.error(
+      `[cron] ${job.id}: all ${result.attempts.length} model(s) failed; last error: ${result.lastError?.message ?? 'unknown'}`,
+    )
+  }
+  // session.idle fires once, only on success, and only against the session
+  // that handled the turn. Then dispose the successful session (the helper
+  // returns the session+dispose so we can run post-prompt hooks against a
+  // live session before tearing it down). Failed-chain disposal is already
+  // handled by the helper's per-attempt dispose calls.
+  if (result.success && lastSession !== null) {
+    const finalSession: CronSession = lastSession
+    if (finalSession.hooks && finalSession.sessionId !== undefined) {
+      try {
+        await finalSession.hooks.runSessionIdle({
+          sessionId: finalSession.sessionId,
+          parentTranscriptPath: finalSession.getTranscriptPath?.(),
+          idleMs: 0,
+          ...(finalSession.origin !== undefined ? { origin: finalSession.origin } : {}),
         })
-      : null
-  const turnEvent =
-    session.hooks && session.sessionId !== undefined && session.agentDir !== undefined
-      ? {
-          sessionId: session.sessionId,
-          agentDir: session.agentDir,
-          ...(session.origin !== undefined ? { origin: session.origin } : {}),
-        }
-      : undefined
-  try {
-    if (session.hooks && turnEvent !== undefined) {
-      await session.hooks.runSessionTurnStart(turnEvent)
-    }
-    try {
-      await session.prompt(job.prompt)
-    } finally {
-      if (session.hooks && turnEvent !== undefined) {
-        await session.hooks.runSessionTurnEnd(turnEvent)
+      } catch (e) {
+        logger.warn(`[cron] ${job.id}: session-idle hook threw: ${describe(e)}`)
       }
     }
-    if (session.hooks && session.sessionId !== undefined) {
-      await session.hooks.runSessionIdle({
-        sessionId: session.sessionId,
-        parentTranscriptPath: session.getTranscriptPath?.(),
-        idleMs: 0,
-        ...(session.origin !== undefined ? { origin: session.origin } : {}),
-      })
-    }
-  } finally {
-    unsubProviderErrors?.()
-    if (session.hooks && session.sessionId !== undefined) {
-      await session.hooks.runSessionEnd({
-        sessionId: session.sessionId,
-        ...(session.origin !== undefined ? { origin: session.origin } : {}),
-      })
-    }
-    session.dispose?.()
+    await result.dispose()
   }
 }
+function describe(err: unknown): string {
+  return err instanceof Error ? err.message : String(err)
+}
 async function runExec(job: ExecJob, cwd: string): Promise<void> {
   const [cmd, ...args] = job.command
   if (!cmd) throw new Error(`exec job ${job.id}: empty command`)

package/src/init/dockerfile.ts CHANGED Viewed

@@ -27,6 +27,12 @@ export type BuildDockerfileOptions = {
 // `util-linux` carries `setpriv`, which the shim uses to drop CAP_NET_ADMIN
 // from the bounding set before exec'ing the agent. Listed first in the
 // apt-get install line so the package set is self-documenting at a glance.
+//
+// xvfb is intentionally NOT in baseline — it's a toggle (`xvfb: true` by
+// default, opt-out via `docker.file.xvfb: false`) because the shim
+// self-heals: it spawns Xvfb (and exports DISPLAY) if the binary is on
+// PATH, and execs the agent directly otherwise. See APT_FEATURES.xvfb
+// below and `buildEntrypointShim`.
 const BASELINE_APT_PACKAGES = ['git', 'ca-certificates', 'curl', 'gnupg', 'iptables', 'util-linux'] as const
 // curl-impersonate is the only currently-working way to query DuckDuckGo from
@@ -219,7 +225,96 @@ export function buildEntrypointShim(): string {
 # Source: src/init/dockerfile.ts \`buildEntrypointShim()\`.
 set -eu
+# start_xvfb launches Xvfb in the background under a stripped capability
+# bounding set so headed Chrome (agent-browser --headed, Playwright
+# headful) has a real X11 display to connect to. Headless containers
+# have no display server; Chrome --headless / --headless=new is
+# fingerprinted by modern bot detection (Akamai / Cloudflare BM)
+# regardless of UA spoof, so real headed Chrome under a virtual
+# framebuffer is the only path to a passing sensor score from a
+# server-side container.
+#
+# Two correctness invariants this function enforces:
+#
+# 1. Xvfb never holds CAP_NET_ADMIN. The shim runs as PID 1 with the
+#    container's full capability set (including NET_ADMIN when
+#    network.blockInternal=true). If we backgrounded Xvfb naked, it
+#    would inherit NET_ADMIN and keep it for the container's lifetime
+#    — defeating the capability-drop contract that setpriv applies to
+#    the agent process. Routing Xvfb through the same setpriv invocation
+#    we use for the agent strips NET_ADMIN before Xvfb's first exec.
+#    On the off-path (blockInternal=false) the bounding-set drop is a
+#    no-op (NET_ADMIN was never granted), but the call is harmless.
+#
+# 2. Xvfb startup failure is loud, not silent. \`Xvfb ... >/dev/null &\`
+#    under \`set -e\` does not fail the script if Xvfb exits immediately
+#    (missing library, port conflict, malformed args). Without the
+#    explicit liveness probe below, the shim would then export DISPLAY
+#    and exec bun, agent-browser launches would die with "cannot open
+#    display", and the operator would chase a phantom bug. We capture
+#    $! and \`kill -0\` it on every poll iteration so an early exit
+#    becomes a clear stderr line and a non-zero shim exit.
+#
+# We DO NOT use \`xvfb-run\`. xvfb-run hangs forever when it runs as
+# PID 1 inside a container: its SIGUSR1-based ready handshake races
+# and stalls because PID 1 ignores signals without explicit handlers,
+# so the \`trap : USR1 ; wait || :\` dance never wakes up. Observed in
+# practice: container alive, Xvfb running, PID 1 stuck in
+# \`rt_sigsuspend\`, no agent process ever spawns, \`docker logs\` empty.
+# Documented industry workarounds are tini-as-PID-1 or direct Xvfb
+# spawn; we pick the latter (no new dep).
+#
+# Xvfb args:
+#   :99                     fixed display number. Filesystem
+#                           (/tmp/.X11-unix/X99) and abstract
+#                           (\\0/tmp/.X11-unix/X99) sockets are both
+#                           network-namespace-scoped, so :99 is safe
+#                           across all Compose'd containers.
+#   -screen 0 1920x1080x24  desktop viewport agent-browser advertises;
+#                           mismatched geometry is itself a fingerprint
+#                           signal.
+#   -ac                     disable host-based X access control so
+#                           Chrome connects without XAUTHORITY plumbing.
+#   +extension RANDR        expose the RandR extension; Chrome queries
+#                           it for screen geometry, and without it
+#                           \`screen.*\` values come back inconsistent.
+#   -nolisten tcp           refuse TCP connections (Unix socket only).
+#                           Defense-in-depth — we are in a netns with
+#                           no inbound exposure anyway.
+start_xvfb() {
+  if ! command -v Xvfb >/dev/null 2>&1; then
+    return 0
+  fi
+  setpriv --bounding-set -net_admin --inh-caps -net_admin --ambient-caps -net_admin \\
+    -- Xvfb :99 -screen 0 1920x1080x24 -ac +extension RANDR -nolisten tcp \\
+    >/dev/null 2>&1 &
+  xvfb_pid=$!
+  export DISPLAY=:99
+  # Poll the socket every 10ms up to ~3s. Xvfb cold start is typically
+  # ~20-50ms on a modern host; 3s covers slow Docker Desktop VMs,
+  # Rosetta/QEMU emulation, and loaded CI runners. We also \`kill -0\`
+  # the pid each iteration so an Xvfb that died immediately surfaces
+  # as a clear error instead of a 3-second hang followed by silent
+  # "cannot open display" downstream.
+  i=0
+  while [ $i -lt 300 ]; do
+    if [ -S /tmp/.X11-unix/X99 ]; then
+      unset i xvfb_pid
+      return 0
+    fi
+    if ! kill -0 "$xvfb_pid" 2>/dev/null; then
+      echo "typeclaw-entrypoint: Xvfb exited immediately; cannot start headed display (docker.file.xvfb=true)" >&2
+      exit 1
+    fi
+    sleep 0.01
+    i=$((i + 1))
+  done
+  echo "typeclaw-entrypoint: Xvfb did not create /tmp/.X11-unix/X99 within 3s; refusing to continue (docker.file.xvfb=true)" >&2
+  exit 1
+}
 if [ "\${TYPECLAW_NETWORK_BLOCK_INTERNAL:-0}" != "1" ]; then
+  start_xvfb
   exec bun run typeclaw "$@"
 fi
@@ -264,6 +359,7 @@ ip6tables -A OUTPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
 ip6tables -A OUTPUT -o lo -j ACCEPT
 ${ipv6Rules.join('\n')}
+start_xvfb
 exec setpriv --bounding-set -net_admin --inh-caps -net_admin --ambient-caps -net_admin -- bun run typeclaw "$@"
 `
 }
@@ -337,7 +433,7 @@ type AptFeature = {
   toAptArgs: (toggle: DockerfileFeatureToggle) => string[]
 }
-const APT_FEATURES: Record<'ffmpeg' | 'gh' | 'tmux' | 'python' | 'cjkFonts', AptFeature> = {
+const APT_FEATURES: Record<'ffmpeg' | 'gh' | 'tmux' | 'python' | 'cjkFonts' | 'xvfb', AptFeature> = {
   ffmpeg: { toAptArgs: (v) => singlePackageArgs('ffmpeg', v) },
   gh: { toAptArgs: (v) => singlePackageArgs('gh', v) },
   tmux: { toAptArgs: (v) => singlePackageArgs('tmux', v) },
@@ -345,6 +441,7 @@ const APT_FEATURES: Record<'ffmpeg' | 'gh' | 'tmux' | 'python' | 'cjkFonts', Apt
     toAptArgs: (v) => (v === true ? ['python3', 'python3-pip', 'python3-venv', 'python-is-python3'] : []),
   },
   cjkFonts: { toAptArgs: (v) => (v === true ? [CJK_FONTS_PACKAGE] : []) },
+  xvfb: { toAptArgs: (v) => (v === true ? ['xvfb'] : []) },
 }
 export function buildDockerfile(
@@ -616,12 +713,21 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \\
     fi`
 function defaultConfig(): DockerfileConfig {
-  return { ffmpeg: false, gh: true, python: true, tmux: true, cjkFonts: true, cloudflared: true, append: [] }
+  return {
+    ffmpeg: false,
+    gh: true,
+    python: true,
+    tmux: true,
+    cjkFonts: true,
+    cloudflared: true,
+    xvfb: true,
+    append: [],
+  }
 }
 function collectToggleAptArgs(config: DockerfileConfig): string[] {
   const args: string[] = []
-  for (const key of ['ffmpeg', 'gh', 'python', 'tmux', 'cjkFonts'] as const) {
+  for (const key of ['ffmpeg', 'gh', 'python', 'tmux', 'cjkFonts', 'xvfb'] as const) {
     args.push(...APT_FEATURES[key].toAptArgs(config[key]))
   }
   return args

package/src/init/hatching.ts CHANGED Viewed

@@ -45,9 +45,9 @@ Do these in order. Do **not** ask further questions.
 2. Write one short paragraph in \`MEMORY.md\` marking this moment: the date, how you came to be, what you and the user agreed on.
 3. Configure local git identity with \`bash\`: \`git config user.name "<your name>"\` and \`git config user.email "<reasonable placeholder>@typeclaw.local"\` (unless the user provided an email).
 4. Stage and commit **only the files you authored** with commit message \`Hatched 🐣\`. This is the hatching-specific commit message — it overrides the normal version-control style guidance for this one commit.
-5. Send **one final short message** — two sentences at most — telling the user hatching is complete and they can \`/quit\` the TUI. Do not ask further questions. Do not offer more work. The container keeps running once they quit; keeping the TUI open here wastes time.
+5. Send **one final short message** — two sentences at most — telling the user hatching is complete and they can leave the TUI with \`/quit\` (or Ctrl+C). Do not ask further questions. Do not offer more work. The container keeps running once they quit; keeping the TUI open here wastes time.
-After that final message, stop. If the user keeps talking, answer briefly and remind them they can \`/quit\` whenever they are ready.
+After that final message, stop. If the user keeps talking, answer briefly and remind them they can \`/quit\` (or Ctrl+C) whenever they are ready.
 This is the only time you will receive these instructions. After the \`Hatched 🐣\` commit, your identity takes over and you run as yourself.`

package/src/init/index.ts CHANGED Viewed

@@ -121,7 +121,18 @@ export type KakaotalkAuthRunner = (options: { cwd: string }) => Promise<Kakaotal
 // API-key provider". Optional model defaults to DEFAULT_MODEL_REF, which is
 // an OpenAI api-key provider — so test fixtures that omit both fields keep
 // working under the api-key path.
-export type LLMAuth = { kind: 'api-key'; apiKey: string } | { kind: 'oauth'; runLogin: OAuthLoginRunner }
+//
+// `oauth-completed` is the CLI wizard's signal that the browser login already
+// happened up-front (right after the user picked the auth method) and the
+// resulting credentials are already in `secrets.json`. `runInit` then skips
+// the `oauth-login` step but still treats this as an OAuth provider (no API
+// key written, etc.). The wizard runs OAuth eagerly so the browser opens the
+// moment the user picks "OAuth (browser login)" instead of waiting until the
+// end of the wizard — see `collectWizardInputs` in `src/cli/init.ts`.
+export type LLMAuth =
+  | { kind: 'api-key'; apiKey: string }
+  | { kind: 'oauth'; runLogin: OAuthLoginRunner }
+  | { kind: 'oauth-completed' }
 export type InitOptions = {
   cwd: string
@@ -223,8 +234,8 @@ export async function runInit({
   // Same trap as kakaotalk-auth: scaffold-then-fail-auth would leave
   // typeclaw.json without working credentials and the runtime would silently
   // refuse to boot. The login itself doesn't need the agent folder to exist
-  // — pi-ai's OAuth helper just needs a writable path for secrets.json, which
-  // we create on demand inside scaffold().
+  // — pi-ai's OAuth helper just needs a writable path for secrets.json, and
+  // the `mkdir` below creates it on demand before the login runs.
   if (resolvedAuth.kind === 'oauth') {
     emit({ step: 'oauth-login', phase: 'start' })
     await mkdir(cwd, { recursive: true })

package/src/init/oauth-login.ts CHANGED Viewed

@@ -14,16 +14,29 @@ export type OAuthLoginResult = { ok: true } | { ok: false; reason: string }
 export type OAuthLoginRunner = (options: { cwd: string; model: KnownModelRef }) => Promise<OAuthLoginResult>
 // Wrap pi-ai's OAuth callbacks so the CLI doesn't have to know about the
-// upstream callback shape. The CLI only sees three lifecycle events:
+// upstream callback shape. The CLI sees four lifecycle events:
 // (1) onAuth(url) — print the URL the user must visit
 // (2) onProgress(message) — show waiting/finalizing status
 // (3) onPrompt(prompt) — ask the user for a manual code if the browser flow
-//     can't reach the local callback server. Most users won't see this; it
-//     fires when they paste the post-redirect URL by hand.
+//     can't reach the local callback server. Fires only after the local
+//     server gave up (bind error -> waitForCode resolves null).
+// (4) onManualCodeInput() — concurrent paste input that RACES the local
+//     callback server. Required for cross-device flows: pi-ai's openai-codex
+//     OAuth hardcodes redirect_uri=http://localhost:1455/auth/callback, which
+//     resolves to the *browser's* machine. When the user runs `typeclaw init`
+//     over SSH or on a remote dev box and completes login on a different
+//     laptop, the browser callback never reaches the CLI's local server and
+//     waitForCode() hangs forever — so onPrompt would never fire either.
+//     onManualCodeInput is the upstream-supported escape hatch: it shows a
+//     paste field IMMEDIATELY alongside the URL, and whichever path lands a
+//     code first wins. parseAuthorizationInput on the upstream side accepts
+//     the full redirect URL, the bare `code=...&state=...` query string, or
+//     just the code value.
 export type OAuthCallbacks = {
   onAuth: (url: string, instructions?: string) => void
   onProgress?: (message: string) => void
   onPrompt: (message: string, placeholder?: string) => Promise<string | null>
+  onManualCodeInput?: () => Promise<string>
 }
 // Default runner: real OAuth flow against pi-ai. Tests inject a stub to skip
@@ -50,6 +63,7 @@ export function makeOAuthLoginRunner(callbacks: OAuthCallbacks): OAuthLoginRunne
           }
           return value
         },
+        onManualCodeInput: callbacks.onManualCodeInput,
       })
       return { ok: true }
     } catch (error) {

package/src/permissions/builtins.ts CHANGED Viewed

@@ -25,6 +25,21 @@ export type BuiltinRoleSpec = {
   readonly permissions: readonly string[]
 }
+// Owner carries low + medium tier strings explicitly AND the wildcard
+// sentinel. The sentinel expands to plugin-contributed `security.bypass.*`
+// strings minus the security plugin's `ownerWildcardExclusions` (today:
+// `security.bypass.high` plus high-tier per-guard strings). Net effect:
+// owner auto-bypasses every low- and medium-tier guard, and high-tier
+// guards require per-call ack from owner too (the audience-leak rule —
+// owner-in-public-channel must not silently post credentials).
+//
+// Trusted carries only `security.bypass.low`. Trusted does NOT carry the
+// pre-PR per-guard grants (`bypassSecretExfilBash`, `bypassGitExfil`):
+// those guards are medium/high under the audience-leak axis and per-guard
+// grants would re-introduce exactly the bypass holes the tier system
+// exists to prevent. Operators who want the pre-PR ergonomics can add the
+// per-guard strings explicitly to `roles.trusted.permissions[]` in
+// typeclaw.json — that path stays alive forever.
 export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> = {
   owner: {
     match: [{ kind: 'tui' }],
@@ -32,17 +47,14 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
       CORE_PERMISSIONS.channelRespond,
       CORE_PERMISSIONS.cronSchedule,
       CORE_PERMISSIONS.cronModify,
+      'security.bypass.low',
+      'security.bypass.medium',
       OWNER_SECURITY_WILDCARD,
     ],
   },
   trusted: {
     match: [],
-    permissions: [
-      CORE_PERMISSIONS.channelRespond,
-      CORE_PERMISSIONS.cronSchedule,
-      'security.bypass.secretExfilBash',
-      'security.bypass.gitExfil',
-    ],
+    permissions: [CORE_PERMISSIONS.channelRespond, CORE_PERMISSIONS.cronSchedule, 'security.bypass.low'],
   },
   member: {
     match: [],
@@ -54,11 +66,21 @@ export const BUILTIN_ROLES: Readonly<Record<BuiltinRoleName, BuiltinRoleSpec>> =
   },
 }
+// Expands the owner wildcard sentinel against plugin-contributed
+// `security.bypass.*` strings. `wildcardExclusions` is an optional set of
+// permission strings the sentinel must NOT expand to — used by the
+// bundled security plugin to exclude `security.bypass.high` AND the
+// per-guard strings for high-tier guards, so the wildcard does not
+// auto-grant audience-leak bypass to owner. Explicit operator grants of
+// those strings in `roles.owner.permissions[]` still take effect (they
+// flow through the non-sentinel branch).
 export function expandOwnerWildcard(
   ownerPermissions: readonly string[],
   pluginContributed: readonly string[],
+  wildcardExclusions: readonly string[] = [],
 ): readonly string[] {
-  const bypass = pluginContributed.filter((p) => p.startsWith('security.bypass.'))
+  const excludeSet = new Set(wildcardExclusions)
+  const bypass = pluginContributed.filter((p) => p.startsWith('security.bypass.') && !excludeSet.has(p))
   const out: string[] = []
   for (const p of ownerPermissions) {
     if (p === OWNER_SECURITY_WILDCARD) {

package/src/permissions/permissions.ts CHANGED Viewed

@@ -38,6 +38,12 @@ type ResolvedRole = {
 export type CreatePermissionServiceOptions = {
   roles?: RolesConfig
   pluginPermissions?: readonly string[]
+  // Permission strings that the owner wildcard sentinel must NOT
+  // auto-expand to. Today populated from the bundled security plugin's
+  // high-tier list so audience-leak guards do not get auto-granted to
+  // owner. Generic by design — any future plugin could contribute
+  // exclusions through the plugin manager. See expandOwnerWildcard.
+  ownerWildcardExclusions?: readonly string[]
 }
 // Returns warnings for user-declared `permissions[]` strings that aren't
@@ -97,7 +103,8 @@ function levenshtein(a: string, b: string): number {
 export function createPermissionService(opts: CreatePermissionServiceOptions = {}): PermissionService {
   const pluginPermissions = opts.pluginPermissions ?? []
-  let resolved = buildRoleTable(opts.roles ?? {}, pluginPermissions)
+  const ownerWildcardExclusions = opts.ownerWildcardExclusions ?? []
+  let resolved = buildRoleTable(opts.roles ?? {}, pluginPermissions, ownerWildcardExclusions)
   let byName = new Map(resolved.map((r) => [r.name, r]))
   function resolveRole(origin: SessionOrigin | undefined): string {
@@ -139,36 +146,46 @@ export function createPermissionService(opts: CreatePermissionServiceOptions = {
       return { role: name, permissions: role?.permissions ?? [] }
     },
     replaceRoles(roles) {
-      resolved = buildRoleTable(roles ?? {}, pluginPermissions)
+      resolved = buildRoleTable(roles ?? {}, pluginPermissions, ownerWildcardExclusions)
       byName = new Map(resolved.map((r) => [r.name, r]))
     },
   }
 }
-function buildRoleTable(roles: RolesConfig, pluginPermissions: readonly string[]): ResolvedRole[] {
+function buildRoleTable(
+  roles: RolesConfig,
+  pluginPermissions: readonly string[],
+  ownerWildcardExclusions: readonly string[],
+): ResolvedRole[] {
   const out: ResolvedRole[] = []
   const seen = new Set<string>()
   for (const name of Object.keys(roles)) {
     if (seen.has(name)) continue
     seen.add(name)
-    out.push(resolveOne(name, roles[name], pluginPermissions))
+    out.push(resolveOne(name, roles[name], pluginPermissions, ownerWildcardExclusions))
   }
   for (const name of BUILTIN_ROLE_NAMES) {
     if (seen.has(name)) continue
-    out.push(resolveOne(name, undefined, pluginPermissions))
+    out.push(resolveOne(name, undefined, pluginPermissions, ownerWildcardExclusions))
   }
   return out
 }
-function resolveOne(name: string, user: RoleConfig | undefined, pluginPermissions: readonly string[]): ResolvedRole {
+function resolveOne(
+  name: string,
+  user: RoleConfig | undefined,
+  pluginPermissions: readonly string[],
+  ownerWildcardExclusions: readonly string[],
+): ResolvedRole {
   if (isBuiltinRoleName(name)) {
     const builtin = BUILTIN_ROLES[name]
     const match = [...builtin.match, ...(user?.match ?? [])]
     const rawPerms = user?.permissions !== undefined ? user.permissions : [...builtin.permissions]
-    const permissions = name === 'owner' ? expandOwnerWildcard(rawPerms, pluginPermissions) : rawPerms
+    const permissions =
+      name === 'owner' ? expandOwnerWildcard(rawPerms, pluginPermissions, ownerWildcardExclusions) : rawPerms
     return { name, match, permissions }
   }
   return {