typeclaw 0.37.5 → 0.37.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1168,7 +1168,7 @@ function persistMigratedConfig(cwd: string, json: unknown, applied: readonly Mig
1168
1168
  }
1169
1169
  }
1170
1170
 
1171
- export type ValidateConfigResult = { ok: true } | { ok: false; reason: string }
1171
+ export type ValidateConfigResult = { ok: true; warnings?: string[] } | { ok: false; reason: string }
1172
1172
 
1173
1173
  // Missing file → ok (matches `loadMounts` in src/container/up.ts; `isInitialized`
1174
1174
  // is the dedicated check for "not initialized"). Present but invalid → fail, so
@@ -1200,6 +1200,18 @@ export function validateConfig(cwd: string, options: ValidateConfigOptions = {})
1200
1200
  const parsed = parseConfigJson(raw, { migrate: true, persistTarget: cwd })
1201
1201
  if (!parsed.ok) return parsed
1202
1202
 
1203
+ const allowUnsafeAppend = process.env[ALLOW_UNSAFE_DOCKER_APPEND_ENV] === '1'
1204
+ const warnings: string[] = []
1205
+ const appendLines = parsed.config.docker.file.append
1206
+ for (let i = 0; i < appendLines.length; i++) {
1207
+ const check = validateDockerfileAppendLine(appendLines[i]!)
1208
+ if (!check.ok) {
1209
+ if (check.kind === 'semantic' && allowUnsafeAppend) continue
1210
+ return { ok: false, reason: `docker.file.append[${i}] ${check.reason}` }
1211
+ }
1212
+ if (check.warning) warnings.push(`docker.file.append[${i}] ${check.warning}`)
1213
+ }
1214
+
1203
1215
  if (!options.skipMounts) {
1204
1216
  for (const mount of parsed.config.mounts) {
1205
1217
  const check = validateMount(mount, cwd)
@@ -1207,7 +1219,7 @@ export function validateConfig(cwd: string, options: ValidateConfigOptions = {})
1207
1219
  }
1208
1220
  }
1209
1221
 
1210
- return { ok: true }
1222
+ return warnings.length > 0 ? { ok: true, warnings } : { ok: true }
1211
1223
  }
1212
1224
 
1213
1225
  export type ParseConfigJsonResult = { ok: true; config: Config } | { ok: false; reason: string }
@@ -1253,11 +1265,15 @@ export function parseConfigJson(raw: string, options: ParseConfigJsonOptions = {
1253
1265
  return { ok: true, config: result.data }
1254
1266
  }
1255
1267
 
1256
- // Verifies a mount's host path: exists, is a directory, is readable, and is
1257
- // writable when not declared `readOnly`. Symlinks are followed (statSync's
1258
- // default) so a broken symlink reads as "does not exist". Permission checks
1259
- // are skipped when running as root (uid 0) euidaccess returns success
1260
- // regardless, so the test would be vacuous and inconsistent with non-root.
1268
+ // Verifies a mount's host path: exists, is a regular file or directory, is
1269
+ // readable, and is writable when not declared `readOnly`. Symlinks are
1270
+ // followed (statSync's default) so a broken symlink reads as "does not exist".
1271
+ // File mounts are allowed so credentials and config can be exposed as a single
1272
+ // path (e.g. an SSH private key); sockets, FIFOs, and devices are rejected
1273
+ // because exposing them is an advanced, security-sensitive case we don't take
1274
+ // implicitly. Permission checks are skipped when running as root (uid 0) —
1275
+ // euidaccess returns success regardless, so the test would be vacuous and
1276
+ // inconsistent with non-root.
1261
1277
  export function validateMount(mount: Mount, cwd: string): ValidateConfigResult {
1262
1278
  const resolved = expandMountPath(mount.path, cwd)
1263
1279
  const label = `mount "${mount.name}"`
@@ -1274,8 +1290,8 @@ export function validateMount(mount: Mount, cwd: string): ValidateConfigResult {
1274
1290
  return { ok: false, reason: `${label}: cannot stat ${resolved}: ${detail}` }
1275
1291
  }
1276
1292
 
1277
- if (!stats.isDirectory()) {
1278
- return { ok: false, reason: `${label}: path ${resolved} is not a directory` }
1293
+ if (!stats.isDirectory() && !stats.isFile()) {
1294
+ return { ok: false, reason: `${label}: path ${resolved} is not a file or directory` }
1279
1295
  }
1280
1296
 
1281
1297
  const isRoot = typeof process.getuid === 'function' && process.getuid() === 0
@@ -1301,6 +1317,181 @@ export function validateMount(mount: Mount, cwd: string): ValidateConfigResult {
1301
1317
  return { ok: true }
1302
1318
  }
1303
1319
 
1320
+ // Host env (not config) on purpose: an in-container agent can edit its own
1321
+ // typeclaw.json but cannot set the env of the host `typeclaw start` that runs
1322
+ // this gate, so it can never waive its own footgun. Only relaxes SEMANTIC
1323
+ // blocks; structural blocks always fire (they break Dockerfile generation).
1324
+ const ALLOW_UNSAFE_DOCKER_APPEND_ENV = 'TYPECLAW_ALLOW_UNSAFE_DOCKER_APPEND'
1325
+
1326
+ // FROM/ENTRYPOINT/CMD/MAINTAINER are intentionally excluded — see the
1327
+ // structural blocks in validateDockerfileAppendLine for why.
1328
+ const ALLOWED_APPEND_INSTRUCTIONS = new Set([
1329
+ 'RUN',
1330
+ 'ENV',
1331
+ 'ARG',
1332
+ 'LABEL',
1333
+ 'COPY',
1334
+ 'ADD',
1335
+ 'USER',
1336
+ 'WORKDIR',
1337
+ 'SHELL',
1338
+ 'EXPOSE',
1339
+ 'VOLUME',
1340
+ 'STOPSIGNAL',
1341
+ 'HEALTHCHECK',
1342
+ 'ONBUILD',
1343
+ ])
1344
+
1345
+ // Decode primitives that, paired with dynamic execution on the same line, form
1346
+ // the "decode an opaque blob and run it" anti-pattern that bricked a real build
1347
+ // (an agent base64-decoded the bash entrypoint shim and fed it to python3
1348
+ // exec). Matching is substring/case-insensitive — these are code tokens the
1349
+ // agent emits, not natural-language, so English literals are correct here (cf.
1350
+ // the protocol-token exception in AGENTS.md).
1351
+ const DECODE_PRIMITIVES = ['base64', 'b64decode', 'atob(', 'unhexlify', '.fromhex(', 'xxd -r']
1352
+
1353
+ // True dynamic-execution sinks — language constructs that run a STRING as code.
1354
+ // Deliberately NOT including interpreter flags like `python3 -c`/`node -e`: a
1355
+ // benign `python3 -c "print(base64.b64encode(...))"` legitimately mentions a
1356
+ // decode primitive without ever executing the decoded bytes. The footgun is
1357
+ // decode + a real exec sink (or decode piped to an interpreter, below).
1358
+ const EXEC_PRIMITIVES = ['exec(', 'eval(', 'new function(', 'function(']
1359
+
1360
+ // Decoded stdout piped straight into an interpreter: `base64 -d ... | sh`,
1361
+ // `... | python3`, etc. The pipe is the execution step here, so it pairs with
1362
+ // DECODE_PRIMITIVES independently of the EXEC_PRIMITIVES sinks above.
1363
+ const DECODE_PIPED_TO_INTERPRETER =
1364
+ /\|\s*(?:sudo\s+)?(?:ba)?sh\b|\|\s*(?:sudo\s+)?python3?\b|\|\s*(?:sudo\s+)?(?:node|perl|ruby)\b/i
1365
+
1366
+ // Risky-but-legitimate operator patterns: piping a remote script straight into
1367
+ // a shell, or ADDing a remote URL. Common enough in real build steps that a
1368
+ // hard block would frustrate power users, dangerous enough to flag.
1369
+ const APPEND_WARN_PATTERNS: Array<{ test: RegExp; note: string }> = [
1370
+ {
1371
+ test: /\b(?:curl|wget)\b[^|]*\|\s*(?:sudo\s+)?(?:ba)?sh\b/i,
1372
+ note: 'pipes a remote script directly into a shell (curl|bash); verify the source is trusted',
1373
+ },
1374
+ {
1375
+ test: /<\(\s*(?:curl|wget)\b/i,
1376
+ note: 'executes a remote script via process substitution; verify the source is trusted',
1377
+ },
1378
+ {
1379
+ test: /^ADD\s+https?:\/\//i,
1380
+ note: 'ADD of a remote URL fetches an unpinned artifact at build time; prefer a pinned COPY or checksum-verified RUN',
1381
+ },
1382
+ ]
1383
+
1384
+ export type AppendLineCheck =
1385
+ | { ok: true; warning?: string }
1386
+ // `structural` blocks are unconditional (they break Dockerfile generation);
1387
+ // `semantic` blocks are waivable via the host env override.
1388
+ | { ok: false; reason: string; kind: 'structural' | 'semantic' }
1389
+
1390
+ // Pure, side-effect-free validator for ONE docker.file.append entry. The newline
1391
+ // rejection stays in the zod schema (dockerfileLineSchema) so it fires on every
1392
+ // parse including the agent's own config-write guard; this adds the contextual
1393
+ // policy the schema can't express cheaply. Returns the first problem found.
1394
+ export function validateDockerfileAppendLine(line: string): AppendLineCheck {
1395
+ const trimmed = line.trim()
1396
+
1397
+ if (trimmed === '') {
1398
+ return { ok: false, reason: 'is empty or whitespace-only', kind: 'structural' }
1399
+ }
1400
+
1401
+ // A trailing backslash is a line continuation: it would merge the generated
1402
+ // ENTRYPOINT (spliced right after the append block) into this instruction.
1403
+ if (/\\\s*$/.test(line)) {
1404
+ return {
1405
+ ok: false,
1406
+ reason:
1407
+ 'ends with a line-continuation backslash, which would swallow the generated ENTRYPOINT; keep each entry self-contained',
1408
+ kind: 'structural',
1409
+ }
1410
+ }
1411
+
1412
+ // Heredoc syntax spans multiple lines by definition and cannot work in a
1413
+ // single spliced entry — it would consume the following generated lines.
1414
+ if (/<<-?\s*['"]?\w/.test(trimmed)) {
1415
+ return {
1416
+ ok: false,
1417
+ reason: 'uses heredoc syntax (<<EOF), which cannot be expressed as a single Dockerfile line',
1418
+ kind: 'structural',
1419
+ }
1420
+ }
1421
+
1422
+ if (trimmed.startsWith('#')) {
1423
+ // Parser directives (`# syntax=`, `# escape=`) only have meaning at the top
1424
+ // of a Dockerfile; spliced before ENTRYPOINT they are at best inert and at
1425
+ // worst confusing. Plain comments are fine.
1426
+ if (/^#\s*(syntax|escape|check)\s*=/i.test(trimmed)) {
1427
+ return {
1428
+ ok: false,
1429
+ reason: 'is a parser directive (# syntax=/# escape=), which is only valid at the top of a Dockerfile',
1430
+ kind: 'structural',
1431
+ }
1432
+ }
1433
+ return { ok: true }
1434
+ }
1435
+
1436
+ const instruction = trimmed.split(/\s+/, 1)[0]?.toUpperCase() ?? ''
1437
+
1438
+ if (instruction === 'FROM') {
1439
+ return {
1440
+ ok: false,
1441
+ reason: 'starts a new build stage (FROM), discarding everything TypeClaw layered before it',
1442
+ kind: 'structural',
1443
+ }
1444
+ }
1445
+ if (instruction === 'ENTRYPOINT' || instruction === 'CMD') {
1446
+ return {
1447
+ ok: false,
1448
+ reason: `overrides the container ${instruction}, which TypeClaw owns (the entrypoint shim is appended right after this block)`,
1449
+ kind: 'structural',
1450
+ }
1451
+ }
1452
+ if (!ALLOWED_APPEND_INSTRUCTIONS.has(instruction)) {
1453
+ return {
1454
+ ok: false,
1455
+ reason: `does not begin with a recognized Dockerfile instruction (got "${instruction}")`,
1456
+ kind: 'structural',
1457
+ }
1458
+ }
1459
+
1460
+ const lower = trimmed.toLowerCase()
1461
+
1462
+ // The actual incident: mutating TypeClaw's own entrypoint shim. This is never
1463
+ // a supported customization surface — entrypoint changes belong in TypeClaw
1464
+ // source, not in a build-time patch script.
1465
+ if (lower.includes('typeclaw-entrypoint')) {
1466
+ return {
1467
+ ok: false,
1468
+ reason:
1469
+ 'references the TypeClaw-owned entrypoint (typeclaw-entrypoint); patching it from docker.file.append is unsupported and brittle',
1470
+ kind: 'semantic',
1471
+ }
1472
+ }
1473
+
1474
+ // Decode-an-opaque-blob-and-execute-it. A benign decode (encoding output,
1475
+ // writing a file) or a bare `python3 -c "print(...)"` both pass; only decode
1476
+ // PAIRED with a real exec sink — or piped into an interpreter — is blocked.
1477
+ const hasDecode = DECODE_PRIMITIVES.some((p) => lower.includes(p))
1478
+ const hasExec = EXEC_PRIMITIVES.some((p) => lower.includes(p)) || DECODE_PIPED_TO_INTERPRETER.test(lower)
1479
+ if (hasDecode && hasExec) {
1480
+ return {
1481
+ ok: false,
1482
+ reason:
1483
+ 'decodes an opaque payload and executes it (e.g. base64 + exec/eval), an obfuscated-code anti-pattern that has bricked builds',
1484
+ kind: 'semantic',
1485
+ }
1486
+ }
1487
+
1488
+ for (const { test, note } of APPEND_WARN_PATTERNS) {
1489
+ if (test.test(trimmed)) return { ok: true, warning: note }
1490
+ }
1491
+
1492
+ return { ok: true }
1493
+ }
1494
+
1304
1495
  function formatZodError(error: z.ZodError): string {
1305
1496
  return error.issues
1306
1497
  .map((issue) => {
@@ -338,16 +338,16 @@ async function runExec(job: ExecJob, cwd: string): Promise<void> {
338
338
  const proc = Bun.spawn({
339
339
  cmd: [cmd, ...args],
340
340
  cwd,
341
- stdout: 'pipe',
341
+ stdout: 'ignore',
342
342
  stderr: 'pipe',
343
343
  env: {
344
344
  ...process.env,
345
345
  TYPECLAW_PARENT_ORIGIN_JSON: JSON.stringify(parentOrigin),
346
346
  },
347
347
  })
348
- const code = await proc.exited
348
+ const stderrText = new Response(proc.stderr).text()
349
+ const [code, stderr] = await Promise.all([proc.exited, stderrText])
349
350
  if (code !== 0) {
350
- const stderr = await new Response(proc.stderr).text()
351
351
  throw new Error(`exec job ${job.id} exited with code ${code}: ${stderr.trim() || 'no stderr'}`)
352
352
  }
353
353
  }
@@ -358,9 +358,9 @@ set -eu
358
358
  # The persist root lives under /agent/.typeclaw/home/ (bind-mounted
359
359
  # from the agent folder via the -v <cwd>:/agent flag in start.ts).
360
360
  # Namespacing under .typeclaw/ keeps the agent's top-level layout clean and reserves
361
- # a system-owned subtree we can extend later (e.g. ~/.gemini/,
362
- # ~/.config/<tool>/) without colliding with user files. The directory
363
- # is gitignored by buildGitignore() so credentials never enter history.
361
+ # a system-owned subtree we can extend later (e.g. ~/.gemini/) without
362
+ # colliding with user files. The directory is gitignored by buildGitignore()
363
+ # so credentials never enter history.
364
364
  #
365
365
  # Three invariants this function enforces:
366
366
  #
@@ -372,11 +372,11 @@ set -eu
372
372
  # if a previous container life happened to write a real ~/.codex/
373
373
  # dir before this code shipped.
374
374
  #
375
- # 2. We symlink the FILE, not the directory. Codex writes other state
376
- # to ~/.codex/ over time (history.jsonl, log/, config.toml). Linking
377
- # only auth.json keeps the persistence scope tight to credentials;
378
- # history/logs stay ephemeral by design. Future credentials get
379
- # added file-by-file here, not by widening to a directory link.
375
+ # 2. We symlink credential FILES for tools whose config dirs are mostly
376
+ # scratch/history (Codex, Claude). We do not redirect global config
377
+ # locations such as XDG_CONFIG_HOME or ~/.config here because tools like
378
+ # git also read config from those paths; first-party bundles that need
379
+ # persistence should set their own app-specific env vars instead.
380
380
  #
381
381
  # 3. We mkdir -p the target's parent on every boot. /agent is bind-
382
382
  # mounted, so the host-side path may exist or not depending on
@@ -1264,6 +1264,9 @@ ${fromAndHeavyLayers}
1264
1264
 
1265
1265
  ENV NODE_ENV=production
1266
1266
 
1267
+ # Persist first-party GWS config without changing global XDG/git config lookup.
1268
+ ENV GWS_CONFIG_HOME=/agent/workspace/.config/gws
1269
+
1267
1270
  # Keep agent-messenger's fallback config dir inside workspace/ for any future
1268
1271
  # SDK fallback paths. TypeClaw's KakaoTalk adapter does not write there:
1269
1272
  # credentials live in secrets.json#channels.kakaotalk and container writes go