typeclaw 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,18 +22,30 @@ const DUPLICATE_REASON =
22
22
  'If you intended to change your verdict, request changes or dismiss the prior review instead of re-approving.'
23
23
 
24
24
  // Makes formal `gh ... event=APPROVE` idempotent per PR across turns, sessions,
25
- // and restarts. The per-turn review ledger only guards prose claims and resets
26
- // every turn, so without this an APPROVE can fire again whenever the same PR
27
- // fans out into a second session or a follow-up turn. We reserve the PR in an
28
- // in-process set before the command runs (stops same-container concurrent
29
- // double-approve) and consult GitHub for the bot's effective review state
30
- // (stops cross-restart re-approval). Reads fail OPEN: a transient GitHub error
31
- // must never permanently strand the bot from approving a PR it has not yet
32
- // approved the in-process reservation still blocks the concurrent case.
25
+ // and restarts. Two layers, each with a single job:
26
+ //
27
+ // 1. An in-process set of *in-flight* reservations (`pendingApprovals`) that
28
+ // blocks a second APPROVE while a first is still mid-flight in the same
29
+ // container — the concurrent-double-approve case the remote read can't see
30
+ // yet (GitHub hasn't recorded the in-flight review).
31
+ // 2. The authoritative GitHub effective-state read, the SOLE source of truth
32
+ // for "the bot already holds a standing APPROVED review." It understands
33
+ // supersession: a later CHANGES_REQUESTED / DISMISSED demotes an earlier
34
+ // APPROVED, so the bot may legitimately re-approve.
35
+ //
36
+ // The set is strictly an in-flight lock — never a persistent "already approved"
37
+ // memory. A completed APPROVE drops its reservation in release(), so the next
38
+ // APPROVE re-consults GitHub instead of being shadowed by a stale local entry.
39
+ // That separation fixes the strand bug: once a standing approval is superseded
40
+ // (PR back to CHANGES_REQUESTED), a stale local lock must not keep blocking a
41
+ // genuine re-approve — only the remote read decides, and it now reports
42
+ // alreadyApproved=false. Reads fail OPEN: a transient GitHub error must never
43
+ // permanently strand a first approval; the in-flight reservation still covers
44
+ // the concurrent case.
33
45
  export function createApproveIdempotencyGuard(deps: {
34
46
  resolveEffectiveApproval: EffectiveApprovalResolver
35
47
  }): ApproveIdempotencyGuard {
36
- const approvedOrPending = new Set<string>()
48
+ const pendingApprovals = new Set<string>()
37
49
  const reservedByCall = new Map<string, string>()
38
50
 
39
51
  return {
@@ -43,17 +55,21 @@ export function createApproveIdempotencyGuard(deps: {
43
55
 
44
56
  // Reserve BEFORE the await so two calls racing into guard() for the same
45
57
  // PR cannot both observe an empty set: the loser sees the winner's
46
- // reservation and is blocked. The reservation is provisional until the
47
- // remote check clears it.
48
- if (approvedOrPending.has(key)) return { block: true, reason: DUPLICATE_REASON }
49
- approvedOrPending.add(key)
58
+ // in-flight reservation and is blocked. The reservation is provisional
59
+ // and is always cleared on a terminal path (block below or release()).
60
+ if (pendingApprovals.has(key)) return { block: true, reason: DUPLICATE_REASON }
61
+ pendingApprovals.add(key)
50
62
  reservedByCall.set(args.callId, key)
51
63
 
52
64
  const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
53
65
  if (remote.ok && remote.alreadyApproved) {
54
- // Already approved upstream: keep the PR locked but drop this call's
55
- // claim so release() won't later unlock a PR that is genuinely approved.
66
+ // Standing approval upstream. Block, and release the in-flight lock now:
67
+ // a blocked command never reaches tool.after, so release() won't run for
68
+ // this callId. Leaving the key set would resurrect the strand bug — the
69
+ // GitHub read is authoritative for the standing-approval case, not a
70
+ // lingering local entry.
56
71
  reservedByCall.delete(args.callId)
72
+ pendingApprovals.delete(key)
57
73
  return { block: true, reason: DUPLICATE_REASON }
58
74
  }
59
75
 
@@ -64,7 +80,11 @@ export function createApproveIdempotencyGuard(deps: {
64
80
  const key = reservedByCall.get(args.callId)
65
81
  if (key === undefined) return
66
82
  reservedByCall.delete(args.callId)
67
- if (!args.succeeded) approvedOrPending.delete(key)
83
+ // Always drop the in-flight lock, success or fail. On success the standing
84
+ // approval now lives on GitHub, so future APPROVEs are caught by the remote
85
+ // read (which tracks supersession); the local lock must not outlive the
86
+ // in-flight window and shadow that read.
87
+ pendingApprovals.delete(key)
68
88
  },
69
89
  }
70
90
  }
@@ -78,7 +78,7 @@ PREFER the two purpose-built research workers for any quick search or gathering
78
78
  - \`scout\` — web research. Spawn it for ANYTHING that lives on the public internet: prices, schedules, opening hours, standard timelines, prevailing practice, vendor docs, prior art, "what are the options for X". It returns a focused, citation-backed answer. This is your default for the research-resolvable facts a plan rests on.
79
79
  - \`explorer\` — local filesystem search. Spawn it to understand the existing code, config, sessions, memory, or git history on this agent — "what does this module do", "where is X configured", "summarize the shape of this system" — before planning a change to it.
80
80
 
81
- Lean on these liberally. A quick \`scout\` for real prices or a quick \`explorer\` for the actual shape of a module turns an assumption-laden plan into a grounded one, and it costs you almost nothing because the heavy reading happens in their context, not yours. Fan several out in parallel (background spawns) when a plan depends on multiple independent facts, then fold the distilled results into your single planning pass.
81
+ Lean on these liberally. A quick \`scout\` for real prices or a quick \`explorer\` for the actual shape of a module turns an assumption-laden plan into a grounded one, and it costs you almost nothing because the heavy reading happens in their context, not yours. When a plan depends on multiple independent facts, **fan out in parallel**: either emit all the independent \`spawn_subagent\` calls (sync, the default) in a SINGLE turn so they run concurrently and return together, or spawn them with \`run_in_background=true\` and fold each result in as its \`<system-reminder>\` arrives (your session stays alive until every child reports back). Either way, fold the distilled results into your single planning pass; do NOT spawn one, wait, then spawn the next unless the second genuinely depends on the first — that serializes what should be parallel.
82
82
 
83
83
  - Spawn these workers for context-heavy GATHERING, not for forming the plan. The decomposition, the sequencing, and the verdict are YOURS — never delegate the judgment.
84
84
  - Each delegated task must be self-contained: the worker does not see this conversation or the goal. Put everything it needs in the prompt.
@@ -269,6 +269,7 @@ If none of the listed skills fit the goal, load \`general\`. Keep the skill-sele
269
269
  rosterDescription:
270
270
  'turns a goal — a trip, a launch, a migration, a feature — into an actionable, sequenced, risk-aware plan, writes it to a file, and returns a structured signal; domain-neutral and reasoning-heavy, for any multi-step goal worth thinking through before acting; consider a `reviewer` pass on the plan it produces',
271
271
  canSpawnSubagents: true,
272
+ canBackgroundSpawnSubagents: true,
272
273
  timeoutMs: PLANNER_SPAWN_TIMEOUT_MS,
273
274
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
274
275
  toolResultBudget: {
@@ -64,14 +64,20 @@ The \`write_report\` tool enforces these limits in code: it accepts exactly one
64
64
 
65
65
  You run on a deliberately expensive model. Every search result page and every fetched article you pull into YOUR context spends that budget on grunt work and crowds out the thinking only you can do. So your DEFAULT for gathering is to delegate — not just for big sweeps, but for routine fetches too.
66
66
 
67
- **Delegate first; fetch yourself only as a last resort.** Before you reach for \`web_search\`, \`web_fetch\`, \`read\`, or \`grep\`, ask: "could \`scout\` or \`explorer\` get this for me and hand back just the distilled answer?" If yes — which is almost always — spawn the worker with \`spawn_subagent\`. Prefer to fan out **several \`scout\`/\`explorer\` spawns in parallel** (background spawns) at the very start of a gathering round, then fold their condensed results into your synthesis in one pass.
67
+ **Delegate first; fetch yourself only as a last resort.** Before you reach for \`web_search\`, \`web_fetch\`, \`read\`, or \`grep\`, ask: "could \`scout\` or \`explorer\` get this for me and hand back just the distilled answer?" If yes — which is almost always — spawn the worker with \`spawn_subagent\`.
68
+
69
+ **Fan out in parallel.** For a gathering round, emit several \`scout\`/\`explorer\` \`spawn_subagent\` calls together in a SINGLE turn so they run concurrently rather than one-at-a-time. You have two equivalent ways to do this, both of which deliver every worker's findings back to you:
70
+ - **Synchronous batch (simplest):** emit the calls with \`run_in_background=false\` (the default) in one assistant message. They execute concurrently and all results return together before your next turn, where you fold them into one synthesis pass.
71
+ - **Background:** emit them with \`run_in_background=true\`; each returns a task_id immediately and you receive a \`<system-reminder>\` as each completes, then fetch the result with \`subagent_output\`. Use this when you want to start synthesizing on early results while slower workers finish. Your session stays alive until every background child you spawned has reported back, so no result is lost.
72
+
73
+ Either way, do NOT spawn one, wait for it, then spawn the next unless the second task genuinely depends on the first's result — that serializes what should be parallel.
68
74
 
69
75
  - \`scout\` — web gathering. Hand it any web question, quick or broad ("latest figure for X", "find the primary source for Y", "sweep for every source on Z"); it does the searching and fetching and returns citation-backed findings, so the raw pages never touch your context.
70
76
  - \`explorer\` — local gathering. Hand it any filesystem/git/memory question; it returns the paths and excerpts you need without you grepping the tree yourself.
71
77
  - The synthesis, the cross-validation, and the confidence call are YOURS. Delegate the gathering, never the conclusion.
72
78
  - Each delegated task is self-contained: the worker does not see this conversation. Put everything it needs in the prompt.
73
79
  - The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
74
- - \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Use background spawns for parallel gathering, then fold the results into your single report.
80
+ - \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Whether you spawn synchronously or in the background, fold every worker's result into your single report before you finish.
75
81
 
76
82
  When IS it right to use your own \`web_search\`/\`web_fetch\`/\`read\`/\`grep\`? Only for the surgical, decisive touch: re-reading one specific passage a worker flagged, resolving a contradiction between two workers' findings, or a single fetch so central you must read it verbatim. If you find yourself doing more than a couple of direct fetches, stop and delegate the rest.
77
83
 
@@ -210,6 +216,7 @@ If none of the listed skills fit the question, load \`general\`. Keep the skill-
210
216
  // warrant operator's owner/trusted-only gate; any caller that can spawn a
211
217
  // subagent can spawn the researcher.
212
218
  canSpawnSubagents: true,
219
+ canBackgroundSpawnSubagents: true,
213
220
  timeoutMs: RESEARCHER_SPAWN_TIMEOUT_MS,
214
221
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
215
222
  toolResultBudget: {
@@ -79,7 +79,7 @@ You run on a deliberately expensive model. Reading a sprawling file tree, a gian
79
79
  - Spawn read-only/research workers for context-heavy gathering, not for forming the verdict. The findings and the \`<review>\` block are YOURS — never delegate the judgment.
80
80
  - Each delegated task must be self-contained: the worker does not see this conversation or the target. Put everything it needs in the prompt.
81
81
  - The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
82
- - \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Use background spawns for parallel gathering, then fold the results into your single review pass.
82
+ - \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. To gather in parallel, either emit all the independent \`spawn_subagent\` calls (sync, the default) in a SINGLE turn so they run concurrently and return together, or spawn them with \`run_in_background=true\` and fold each result in as its \`<system-reminder>\` arrives (your session stays alive until every child reports back). Either way, fold the results into your single review pass before you finish.
83
83
 
84
84
  ## Tools
85
85
 
@@ -199,6 +199,7 @@ If none of the listed skills fit the target, load \`general\`. Keep the skill-se
199
199
  rosterDescription:
200
200
  'deep read-only code/PR/plan review in a fresh context, returns a structured verdict; it does NOT post — you act on its findings',
201
201
  canSpawnSubagents: true,
202
+ canBackgroundSpawnSubagents: true,
202
203
  timeoutMs: REVIEWER_SPAWN_TIMEOUT_MS,
203
204
  inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
204
205
  toolResultBudget: {
@@ -53,6 +53,12 @@ const WARN_POSITIVE_CLOSEOUT: readonly RegExp[] = [
53
53
  /\bshould be (fine|good)\b/,
54
54
  /\blooks resolved\b/,
55
55
  /\bseems resolved\b/,
56
+ // The canonical PR #672 close-out: "that addresses the concern", "addressed
57
+ // your feedback". On a PR the bot still blocks, this READS as a verdict and
58
+ // strands the block, so it escalates through the re-review guard. Demoted to
59
+ // ignore by the negation/future markers below ("haven't addressed", "to
60
+ // address").
61
+ /\baddress(es|ed)\b[^.!?]*\b(concern|feedback|review|comment|issue|point)/,
56
62
  ]
57
63
 
58
64
  // Negative warn phrases re-assert a block ("not done yet") instead of closing it
@@ -65,11 +71,17 @@ const WARN: readonly RegExp[] = [...WARN_POSITIVE_CLOSEOUT, ...WARN_NEGATIVE]
65
71
  // ignore. Blocking "I haven't approved" / "I'll approve" / "approved it earlier"
66
72
  // (answering a question) is the worst false-positive class, so it is checked first.
67
73
  const DEMOTE_TO_IGNORE: readonly RegExp[] = [
68
- /\b(haven'?t|have not|did ?n'?t|did not|not yet|never)\b[^.!?]*\b(approv|request|resolv|block)/,
69
- /\b(can'?t|cannot|won'?t|will not|wouldn'?t)\b[^.!?]*\b(approv|request|resolv|block)/,
70
- /\bnot (approved|resolved|blocked|requesting)\b/,
74
+ /\b(haven'?t|have not|did ?n'?t|did not|not yet|never)\b[^.!?]*\b(approv|request|resolv|block|address)/,
75
+ /\b(can'?t|cannot|won'?t|will not|wouldn'?t)\b[^.!?]*\b(approv|request|resolv|block|address)/,
76
+ /\bnot (approved|resolved|blocked|requesting|addressed)\b/,
71
77
  /\b(not|no longer|hardly|barely)\b[^.!?]*\b(lgtm|looks good|looks fine|seems fine|should be (fine|good)|looks resolved|seems resolved)\b/,
72
78
  /\b(i'?ll|i will|going to|gonna|about to|planning to)\b[^.!?]*\b(approv|review|request|resolv)/,
79
+ // "address" demotion is restricted to explicit future/obligation forms only.
80
+ // A standalone `to` marker (e.g. "...to address my feedback") would match
81
+ // hard-claim prose like "Approved — thanks for updating the docs to address
82
+ // my feedback" and demote it to ignore BEFORE the BLOCK_APPROVE check, hiding
83
+ // a real verdict (the recovery path would then post it unguarded — PR #675).
84
+ /\b(i'?ll|i will|going to|gonna|about to|planning to|need(s)? to|have to|want(s)? to|trying to)\b[^.!?]*\baddress/,
73
85
  /\b(approved|resolved|requested changes)\b[^.!?]*\b(earlier|already|yesterday|before|last (review|time)|previously)\b/,
74
86
  /\b(pre|self|co|re|un|non|ai|admin|user|machine|auto) approved\b/,
75
87
  ]
@@ -32,6 +32,8 @@ import {
32
32
  StickyLedger,
33
33
  type EngagementDecision,
34
34
  } from './engagement'
35
+ import { checkFalseReceipt } from './github-false-receipt'
36
+ import { evaluateRereviewGuard } from './github-rereview-guard'
35
37
  import { resetReviewTurn } from './github-review-turn-ledger'
36
38
  import {
37
39
  MEMBERSHIP_COLD_FETCH_TIMEOUT_MS,
@@ -3125,6 +3127,25 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3125
3127
  // the model's pre-tool commentary is the only user-facing text we have.
3126
3128
  // Recovering it means the user gets *something* — strictly better than
3127
3129
  // the historical silent drop.
3130
+ // Egress-level GitHub review guards. The false-receipt and re-review
3131
+ // stranding guards live inside the channel_reply / channel_send tool
3132
+ // handlers, but recovery surfaces trailing assistant prose through a
3133
+ // `source:'system'` send that never touches those handlers. A model that
3134
+ // ends its turn with a close-out ack ("that addresses the concern") instead
3135
+ // of calling a channel tool would otherwise post a verdict-shaped comment
3136
+ // while still holding its own CHANGES_REQUESTED — stranding the PR (PR #672).
3137
+ // Re-run the guards here and SUPPRESS on block: recovery cannot land the
3138
+ // missing formal review on the model's behalf, and posting the unguarded ack
3139
+ // is worse than dropping it — the next inbound re-prompts the model, which
3140
+ // can then land the verdict properly.
3141
+ const recoveryBlock = await evaluateRecoveryReviewGuards(live, assistantText)
3142
+ if (recoveryBlock !== null) {
3143
+ logger.warn(
3144
+ `[channels] ${live.keyId}: suppressed recovery (github review guard) reason=${JSON.stringify(recoveryBlock)} text_len=${assistantText.length}`,
3145
+ )
3146
+ return
3147
+ }
3148
+
3128
3149
  logger.warn(
3129
3150
  `[channels] ${live.keyId}: recovering assistant_text_without_channel_tool source=${source} text_len=${assistantText.length}`,
3130
3151
  )
@@ -3143,6 +3164,38 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3143
3164
  }
3144
3165
  }
3145
3166
 
3167
+ // Returns a block reason when the recovered text would be denied by a github
3168
+ // review guard, or null when it is safe to surface. Non-github channels and
3169
+ // non-PR chats short-circuit inside each guard (adapter / `pr:\d+` checks), so
3170
+ // this is a no-op for everything except GitHub PR sessions.
3171
+ const evaluateRecoveryReviewGuards = async (live: LiveSession, text: string): Promise<string | null> => {
3172
+ const falseReceipt = checkFalseReceipt({
3173
+ sessionId: live.sessionId,
3174
+ adapter: live.key.adapter,
3175
+ workspace: live.key.workspace,
3176
+ chat: live.key.chat,
3177
+ thread: live.key.thread,
3178
+ text,
3179
+ isContinue: false,
3180
+ resolveReviewThread: false,
3181
+ })
3182
+ if (falseReceipt.kind === 'block') return falseReceipt.reason
3183
+
3184
+ const rereview = await evaluateRereviewGuard({
3185
+ adapter: live.key.adapter,
3186
+ workspace: live.key.workspace,
3187
+ chat: live.key.chat,
3188
+ thread: live.key.thread,
3189
+ text,
3190
+ wantsResolve: false,
3191
+ isContinue: false,
3192
+ getReviewState: (req) => getReviewState(req),
3193
+ })
3194
+ if (rereview.block) return rereview.reason
3195
+
3196
+ return null
3197
+ }
3198
+
3146
3199
  const getConsecutiveSendCount = (target: {
3147
3200
  adapter: ChannelKey['adapter']
3148
3201
  workspace: string
@@ -0,0 +1,35 @@
1
+ import { MIGRATION_ID, migrateSecretsV1ToV2, type SecretsMigrationResult } from './secrets-v1-to-v2'
2
+
3
+ export { MIGRATION_ID, migrateSecretsV1ToV2, type SecretsMigrationResult }
4
+
5
+ export type Migration = {
6
+ id: string
7
+ run: (agentDir: string) => SecretsMigrationResult
8
+ }
9
+
10
+ export type MigrationOutcome = { id: string; changed: boolean; summary: string; error?: string }
11
+
12
+ const MIGRATIONS: readonly Migration[] = [{ id: MIGRATION_ID, run: migrateSecretsV1ToV2 }]
13
+
14
+ // Each migration is isolated: a throw is captured per-migration so one folder's
15
+ // unsafe state (e.g. both auth.json and a non-empty secrets.json) is reported
16
+ // loudly without aborting boot or blocking later migrations. Returns one
17
+ // outcome per registered migration so the caller can log what happened.
18
+ export function runStartupMigrations(
19
+ agentDir: string,
20
+ log: (message: string) => void = (m) => console.warn(m),
21
+ ): MigrationOutcome[] {
22
+ const outcomes: MigrationOutcome[] = []
23
+ for (const migration of MIGRATIONS) {
24
+ try {
25
+ const result = migration.run(agentDir)
26
+ if (result.changed) log(`migration ${migration.id}: ${result.summary}`)
27
+ outcomes.push({ id: migration.id, changed: result.changed, summary: result.summary })
28
+ } catch (err) {
29
+ const error = err instanceof Error ? err.message : String(err)
30
+ log(`migration ${migration.id} failed: ${error}`)
31
+ outcomes.push({ id: migration.id, changed: false, summary: 'failed', error })
32
+ }
33
+ }
34
+ return outcomes
35
+ }
@@ -0,0 +1,344 @@
1
+ import { chmodSync, existsSync, readFileSync, renameSync, unlinkSync, writeFileSync } from 'node:fs'
2
+ import { join } from 'node:path'
3
+
4
+ import lockfile from 'proper-lockfile'
5
+
6
+ import { parseSecretsFile, SECRETS_FILE_VERSION } from '@/secrets/schema'
7
+
8
+ // PR #638 removed the in-memory v1->v2 upgrade that `parseSecretsFile` used to
9
+ // perform, so a `secrets.json` still in v1 now fails to parse:
10
+ // `hydrateChannelEnvFromSecrets` swallows the failure as `{}`, no token env vars
11
+ // are injected, and channel adapters (Discord, Slack, Telegram) never connect.
12
+ // This is the one-shot on-disk replacement, run once at boot rather than on
13
+ // every parse, so the v2-only runtime keeps working without a read-time shim.
14
+
15
+ const SCHEMA_REL = './node_modules/typeclaw/secrets.schema.json'
16
+ const FILE_MODE = 0o600
17
+
18
+ const LEGACY_FILENAME = 'auth.json'
19
+ const TARGET_FILENAME = 'secrets.json'
20
+
21
+ // Frozen, migration-local reverse map (env-var name -> { adapterId, field }).
22
+ // Intentionally a private copy rather than an inversion of
23
+ // `CHANNEL_FIELD_ENV` in src/secrets/defaults.ts: re-importing live runtime
24
+ // defaults would (a) re-couple current code to deleted legacy surface area,
25
+ // and (b) let a future change to the runtime env-var names silently rewrite
26
+ // the semantics of this historical migration. A v1 file written years ago must
27
+ // migrate the same way regardless of what the live adapters key off today.
28
+ const LEGACY_CHANNEL_ENV_TO_FIELD: Record<string, { adapterId: string; field: string }> = {
29
+ DISCORD_BOT_TOKEN: { adapterId: 'discord-bot', field: 'token' },
30
+ SLACK_BOT_TOKEN: { adapterId: 'slack-bot', field: 'botToken' },
31
+ SLACK_APP_TOKEN: { adapterId: 'slack-bot', field: 'appToken' },
32
+ TELEGRAM_BOT_TOKEN: { adapterId: 'telegram-bot', field: 'token' },
33
+ }
34
+
35
+ export const MIGRATION_ID = '0001-secrets-v1-to-v2'
36
+
37
+ export type SecretsMigrationResult = { changed: boolean; summary: string }
38
+
39
+ // Idempotent: a folder already at v2 (or with no legacy file) returns
40
+ // `changed: false`. Errors that indicate ambiguous/unsafe state throw with an
41
+ // actionable message rather than guessing.
42
+ //
43
+ // Concurrency: secrets.json is the lock resource SecretsBackend (provider add,
44
+ // OAuth refresh, channel add) and credential exporters use, so we hold ITS lock
45
+ // across the entire precedence resolution AND upgrade. The lock requires the
46
+ // file to exist, so when only auth.json is present we first seed secrets.json
47
+ // with exclusive create-if-absent semantics (never overwriting a file a
48
+ // concurrent writer may have just written), then lock, then re-read precedence
49
+ // from fresh on-disk state under the lock.
50
+ export function migrateSecretsV1ToV2(agentDir: string): SecretsMigrationResult {
51
+ const legacyPath = join(agentDir, LEGACY_FILENAME)
52
+ const targetPath = join(agentDir, TARGET_FILENAME)
53
+
54
+ if (!existsSync(legacyPath) && !existsSync(targetPath)) {
55
+ return { changed: false, summary: 'no secrets file to migrate' }
56
+ }
57
+
58
+ seedTargetIfAbsent(targetPath)
59
+
60
+ return withFileLock(targetPath, () => {
61
+ resolvePrecedenceUnderLock(legacyPath, targetPath)
62
+ return upgradeFileInPlace(targetPath)
63
+ })
64
+ }
65
+
66
+ // Creates an empty v2 envelope at secrets.json only if it does not already
67
+ // exist, using exclusive create ('wx') so a concurrent writer that wrote real
68
+ // credentials between our existsSync check and here is never clobbered — the
69
+ // EEXIST is swallowed because the file we need to lock now exists, which is all
70
+ // we required. A freshly-seeded empty envelope is indistinguishable from "no
71
+ // target" to resolvePrecedenceUnderLock (isEmptyEnvelope returns true), so
72
+ // "only auth.json" collapses into the "secrets.json empty -> auth wins" branch.
73
+ function seedTargetIfAbsent(targetPath: string): void {
74
+ if (existsSync(targetPath)) return
75
+ try {
76
+ writeFileSync(targetPath, stringifyEmptyEnvelope(), { encoding: 'utf8', mode: FILE_MODE, flag: 'wx' })
77
+ } catch (err) {
78
+ if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err
79
+ }
80
+ }
81
+
82
+ // auth.json precedence, run ENTIRELY under the secrets.json lock so the read,
83
+ // the rename/unlink decision, and the rename itself can't interleave with a
84
+ // concurrent secrets.json writer. Preserves the deleted migrateLegacyAuthJson
85
+ // semantics so no credential is ever silently dropped:
86
+ // - no auth.json -> operate on secrets.json as-is
87
+ // - droppable auth.json -> unlink auth.json, operate on secrets.json
88
+ // - secrets.json empty seed -> auth.json wins (rename over the empty seed)
89
+ // - both non-empty -> hard error (can't pick a source of truth)
90
+ function resolvePrecedenceUnderLock(legacyPath: string, targetPath: string): void {
91
+ if (!existsSync(legacyPath)) return
92
+
93
+ if (isDroppableLegacyFile(legacyPath)) {
94
+ unlinkSync(legacyPath)
95
+ return
96
+ }
97
+
98
+ if (isEmptyEnvelope(targetPath)) {
99
+ renameWithRaceFallback(legacyPath, targetPath)
100
+ chmodSync(targetPath, FILE_MODE)
101
+ return
102
+ }
103
+
104
+ throw new Error(
105
+ `Both ${LEGACY_FILENAME} and a non-empty ${TARGET_FILENAME} exist in the agent folder. ` +
106
+ `Inspect manually and remove the stale file before re-running.`,
107
+ )
108
+ }
109
+
110
+ function upgradeFileInPlace(path: string): SecretsMigrationResult {
111
+ let raw: string
112
+ try {
113
+ raw = readFileSync(path, 'utf8')
114
+ } catch {
115
+ return { changed: false, summary: 'secrets file unreadable; skipped' }
116
+ }
117
+ if (raw.trim() === '') return { changed: false, summary: 'secrets file empty; skipped' }
118
+
119
+ let parsed: unknown
120
+ try {
121
+ parsed = JSON.parse(raw)
122
+ } catch (err) {
123
+ throw new Error(`secrets file is not valid JSON: ${err instanceof Error ? err.message : String(err)}`)
124
+ }
125
+
126
+ // Already current: parseSecretsFile only accepts v2 post-#638, so a successful
127
+ // parse means there is nothing to do.
128
+ if (parseSecretsFile(parsed).ok) return { changed: false, summary: 'already v2; no change' }
129
+
130
+ const upgraded = upgradeToV2(parsed)
131
+ if (upgraded === null) {
132
+ throw new Error(
133
+ 'secrets file is neither a valid v2 envelope nor a recognized legacy (v1 / pre-envelope) shape; ' +
134
+ 'leaving it untouched for manual inspection',
135
+ )
136
+ }
137
+
138
+ // Re-validate the product of our own transform before persisting. A transform
139
+ // that emitted an invalid v2 file would brick the next read; failing here is
140
+ // strictly safer than writing garbage.
141
+ const check = parseSecretsFile(upgraded)
142
+ if (!check.ok) {
143
+ throw new Error(`internal: migrated secrets file failed v2 validation: ${check.reason}`)
144
+ }
145
+
146
+ writeEnvelopeAtomic(path, check.file)
147
+ return { changed: true, summary: `upgraded secrets file to v${SECRETS_FILE_VERSION}` }
148
+ }
149
+
150
+ // Recognizes the two pre-v2 shapes the deleted parseSecretsFile branches used
151
+ // to accept and returns a v2-shaped object. Returns null when the input matches
152
+ // neither (caller turns that into a loud, no-write error).
153
+ //
154
+ // v1 envelope: { version: 1, llm: {...}, channels: { adapter: { ENV: value } } }
155
+ // pre-envelope flat: { providerId: { type, key } } at the top level
156
+ function upgradeToV2(raw: unknown): Record<string, unknown> | null {
157
+ if (typeof raw !== 'object' || raw === null || Array.isArray(raw)) return null
158
+ const obj = raw as Record<string, unknown>
159
+
160
+ if (obj.version === 1) {
161
+ return upgradeV1Envelope(obj)
162
+ }
163
+
164
+ if (looksLikeFlatProviders(obj)) {
165
+ return upgradeV1Envelope({ version: 1, llm: obj, channels: {} })
166
+ }
167
+
168
+ return null
169
+ }
170
+
171
+ function upgradeV1Envelope(obj: Record<string, unknown>): Record<string, unknown> {
172
+ const llm = isPlainObject(obj.llm) ? obj.llm : {}
173
+ const legacyChannels = isPlainObject(obj.channels) ? obj.channels : {}
174
+
175
+ const providers: Record<string, unknown> = {}
176
+ for (const [providerId, cred] of Object.entries(llm)) {
177
+ if (!isPlainObject(cred)) continue
178
+ if (cred.type === 'api_key' && typeof cred.key === 'string') {
179
+ providers[providerId] = { type: 'api_key', key: { value: cred.key } }
180
+ } else {
181
+ // OAuth and any unknown credential type pass through verbatim — they are
182
+ // not env-injectable and the v2 schema accepts them via catchall.
183
+ providers[providerId] = cred
184
+ }
185
+ }
186
+
187
+ const channels: Record<string, Record<string, unknown>> = {}
188
+ for (const [adapterId, slot] of Object.entries(legacyChannels)) {
189
+ if (!isPlainObject(slot)) continue
190
+ const upgradedSlot: Record<string, unknown> = {}
191
+ for (const [key, value] of Object.entries(slot)) {
192
+ if (typeof value !== 'string') {
193
+ // A non-string value means this isn't the flat env-keyed v1 channel
194
+ // shape (e.g. a kakaotalk block, which is structured). Preserve it
195
+ // verbatim so the catchall keeps it valid; do not try to reshape.
196
+ upgradedSlot[key] = value
197
+ continue
198
+ }
199
+ const mapping = LEGACY_CHANNEL_ENV_TO_FIELD[key]
200
+ if (mapping && mapping.adapterId === adapterId) {
201
+ upgradedSlot[mapping.field] = { value }
202
+ } else {
203
+ // Unknown env-var key on a known adapter, or an unknown adapter:
204
+ // preserve under the original key but still wrap as a v2 Secret so the
205
+ // resulting file is valid v2.
206
+ upgradedSlot[key] = { value }
207
+ }
208
+ }
209
+ channels[adapterId] = upgradedSlot
210
+ }
211
+
212
+ const result: Record<string, unknown> = {
213
+ $schema: typeof obj.$schema === 'string' ? obj.$schema : SCHEMA_REL,
214
+ version: SECRETS_FILE_VERSION,
215
+ providers,
216
+ channels,
217
+ }
218
+ return result
219
+ }
220
+
221
+ // A flat pre-envelope file is a top-level record of provider credentials. Every
222
+ // value must be a credential object with a `type` field; anything else means we
223
+ // don't recognize the shape and should not guess.
224
+ function looksLikeFlatProviders(obj: Record<string, unknown>): boolean {
225
+ const entries = Object.entries(obj).filter(([k]) => k !== '$schema')
226
+ if (entries.length === 0) return false
227
+ return entries.every(([, value]) => isPlainObject(value) && typeof value.type === 'string')
228
+ }
229
+
230
+ function isEmptyEnvelope(path: string): boolean {
231
+ const parsed = readJsonOrNull(path)
232
+ if (parsed === undefined) return true
233
+ if (parsed === null) return false
234
+ const result = parseSecretsFile(parsed)
235
+ if (!result.ok) return false
236
+ return Object.keys(result.file.providers).length === 0 && Object.keys(result.file.channels).length === 0
237
+ }
238
+
239
+ // True only when a legacy auth.json carries nothing worth keeping, so dropping
240
+ // it in favor of an existing secrets.json is safe: a missing/blank file, or a
241
+ // valid-but-empty v2 envelope. Anything else parseable — a legacy shape with
242
+ // credentials OR a parseable-but-unrecognized object — returns false so
243
+ // resolveLegacyFilename falls through to the both-non-empty hard error rather
244
+ // than silently deleting a file whose contents we can't account for.
245
+ function isDroppableLegacyFile(path: string): boolean {
246
+ const parsed = readJsonOrNull(path)
247
+ if (parsed === undefined) return true
248
+ if (parsed === null) return false
249
+ const v2 = parseSecretsFile(parsed)
250
+ if (!v2.ok) return false
251
+ return Object.keys(v2.file.providers).length === 0 && Object.keys(v2.file.channels).length === 0
252
+ }
253
+
254
+ // undefined = file missing/blank (treat as empty); null = present but invalid
255
+ // JSON (treat as "has content we can't safely drop").
256
+ function readJsonOrNull(path: string): unknown {
257
+ let raw: string
258
+ try {
259
+ raw = readFileSync(path, 'utf8')
260
+ } catch {
261
+ return undefined
262
+ }
263
+ if (raw.trim() === '') return undefined
264
+ try {
265
+ return JSON.parse(raw)
266
+ } catch {
267
+ return null
268
+ }
269
+ }
270
+
271
+ function stringifyEmptyEnvelope(): string {
272
+ return `${JSON.stringify({ $schema: SCHEMA_REL, version: SECRETS_FILE_VERSION, providers: {}, channels: {} }, null, 2)}\n`
273
+ }
274
+
275
+ function writeEnvelopeAtomic(path: string, envelope: unknown): void {
276
+ const tmp = `${path}.${process.pid}.${Date.now()}.tmp`
277
+ writeFileSync(tmp, `${JSON.stringify(envelope, null, 2)}\n`, { encoding: 'utf8', mode: FILE_MODE })
278
+ try {
279
+ renameSync(tmp, path)
280
+ } catch (err) {
281
+ try {
282
+ unlinkSync(tmp)
283
+ } catch {
284
+ // best-effort cleanup of the temp file when rename fails
285
+ }
286
+ throw err
287
+ }
288
+ chmodSync(path, FILE_MODE)
289
+ }
290
+
291
+ // renameSync is atomic per syscall, but two concurrent migration runs can both
292
+ // observe auth.json exists and secrets.json does not, then race on the rename.
293
+ // One wins; the loser gets ENOENT because the source is already gone — that is
294
+ // a successful migration from its POV, so recheck the target and swallow it.
295
+ function renameWithRaceFallback(from: string, to: string): void {
296
+ try {
297
+ renameSync(from, to)
298
+ } catch (err) {
299
+ if ((err as NodeJS.ErrnoException).code === 'ENOENT' && existsSync(to)) return
300
+ throw err
301
+ }
302
+ }
303
+
304
+ // Mirror SecretsBackend's lock discipline so a concurrent credential write
305
+ // (provider add, OAuth refresh, channel add) can't interleave with the
306
+ // read-transform-write. proper-lockfile needs the target to exist; the target
307
+ // always exists by the time we lock (resolveLegacyFilename guarantees it).
308
+ function withFileLock<T>(path: string, fn: () => T): T {
309
+ let release: (() => void) | undefined
310
+ try {
311
+ release = acquireSyncLockWithRetry(path)
312
+ return fn()
313
+ } finally {
314
+ release?.()
315
+ }
316
+ }
317
+
318
+ const SYNC_LOCK_RETRIES = 10
319
+ const SYNC_LOCK_DELAY_MS = 20
320
+
321
+ function acquireSyncLockWithRetry(path: string): () => void {
322
+ let lastError: unknown
323
+ for (let attempt = 1; attempt <= SYNC_LOCK_RETRIES; attempt++) {
324
+ try {
325
+ return lockfile.lockSync(path, { realpath: false })
326
+ } catch (error) {
327
+ const code =
328
+ typeof error === 'object' && error !== null && 'code' in error
329
+ ? String((error as { code: unknown }).code)
330
+ : undefined
331
+ if (code !== 'ELOCKED' || attempt === SYNC_LOCK_RETRIES) throw error
332
+ lastError = error
333
+ const start = Date.now()
334
+ while (Date.now() - start < SYNC_LOCK_DELAY_MS) {
335
+ // intentionally empty: synchronous busy-wait to match SecretsBackend
336
+ }
337
+ }
338
+ }
339
+ throw (lastError as Error | undefined) ?? new Error('Failed to acquire secrets store lock')
340
+ }
341
+
342
+ function isPlainObject(value: unknown): value is Record<string, unknown> {
343
+ return typeof value === 'object' && value !== null && !Array.isArray(value)
344
+ }