@gajae-code/coding-agent 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.5.4] - 2026-06-17
6
+
7
+ ### Fixed
8
+
9
+ - Fixed subagent resume returning `not_found` after terminal job eviction removed the in-memory subagent record. Resume descriptors are now retained as durable same-session metadata and rehydrate a resumable record from the saved subagent session file, so ralplan Planner revision passes can resume with fallback metadata instead of forcing a fresh Planner spawn after 0.5.3.
10
+ - `AgentSession` now forwards the live provider session state (`providerSessionState`), session affinity id (`providerSessionId ?? sessionId`), and configured WebSocket transport preference (`preferWebsockets`) into local maintenance one-shot calls — manual/automatic compaction summaries, handoff generation, and tree branch summaries — via a shared `#maintenanceProviderTransport()` helper. Previously these Codex/OpenAI-compatible maintenance calls could fall back to HTTP/SSE and lose `session_id` affinity even when `providers.openaiWebsockets: "on"` routed live turns over WebSocket (#736).
11
+ - Fixed `ollama-cloud` first-event timeouts driving an unbounded, usage-spiking retry loop. The ollama-chat backend (exclusively `ollama-cloud`; local Ollama uses the `openai-responses` API) can stall before its first token even for tiny prompts, surfacing `Provider stream timed out while waiting for the first event`. That message matched the generic transient classifier, which retries forever (capped only on delay), so every continuation re-issued the full request to a billable backend and silently spiked usage — disabling retries was the only workaround. First-event timeouts on the ollama-chat API are now a distinct fail-closed class bounded by `retry.maxRetries`: they still retry transient cold starts a few times, then surface instead of looping. First-party providers keep their existing unbounded first-event-timeout retry behavior (#713).
12
+ - Interactive sessions no longer orphan the `browser` tool's headless/spawned Chrome (and the Python eval kernel) to PID 1 when killed by a signal. The interactive entry now registers a bounded, idempotent `postmortem` cleanup (`session-subprocess-teardown`) that runs `AgentSession.disposeChildSubprocesses()` on `SIGINT`/`SIGTERM`/`SIGHUP`, force-releasing the session's browser tabs (`kill:true`) and disposing its Python/JS kernels — the teardown the graceful `/quit` (`dispose()`) path already performs but that an external `kill`/terminal-close used to bypass. Headless `disposeBrowserHandle` now also SIGTERM/SIGKILLs the captured Chrome process tree as a fallback when forced, so a wedged renderer can't survive a bounded CDP `close()`; graceful release behavior is unchanged. The teardown is time-boxed (5s) so a stuck subprocess can't hang process exit (#698).
13
+
5
14
  ## [0.5.3] - 2026-06-16
6
15
 
7
16
  ### Added
@@ -4,6 +4,16 @@ export type ModelProfileRole = GjcModelAssignmentTargetId;
4
4
  export interface ModelProfileDefinition {
5
5
  name: string;
6
6
  requiredProviders: string[];
7
+ /**
8
+ * Optional groups of providers that are interchangeable fallbacks.
9
+ * Each group is an array of provider ids where at least one must be
10
+ * authenticated. Providers NOT in any group are treated as strict
11
+ * requirements (all must be authenticated).
12
+ *
13
+ * Example: `[["xiaomi", "xiaomi-token-plan-sgp", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn"]]`
14
+ * means any single xiaomi credential satisfies the group.
15
+ */
16
+ alternativeProviderGroups?: readonly (readonly string[])[];
7
17
  modelMapping: Partial<Record<ModelProfileRole, string>>;
8
18
  source: "builtin" | "user";
9
19
  }
@@ -93,6 +93,7 @@ export declare class InteractiveMode implements InteractiveModeContext {
93
93
  locallySubmittedUserSignatures: Set<string>;
94
94
  lastSigintTime: number;
95
95
  lastEscapeTime: number;
96
+ lastComposerClearEscapeTime: number;
96
97
  shutdownRequested: boolean;
97
98
  hookSelector: HookSelectorComponent | undefined;
98
99
  hookInput: HookInputComponent | undefined;
@@ -100,6 +100,7 @@ export interface InteractiveModeContext {
100
100
  locallySubmittedUserSignatures: Set<string>;
101
101
  lastSigintTime: number;
102
102
  lastEscapeTime: number;
103
+ lastComposerClearEscapeTime: number;
103
104
  shutdownRequested: boolean;
104
105
  hookSelector: HookSelectorComponent | undefined;
105
106
  hookInput: HookInputComponent | undefined;
@@ -407,6 +407,18 @@ export declare class AgentSession {
407
407
  * Call this when completely done with the session.
408
408
  */
409
409
  dispose(): Promise<void>;
410
+ /**
411
+ * Bounded, best-effort teardown of the subprocess-spawning resources this session
412
+ * owns: the browser tool's headless/spawned Chrome and the Python eval kernel + JS VM
413
+ * contexts. Unlike {@link dispose}, this touches only child processes and is time-boxed,
414
+ * so a top-level `SIGINT`/`SIGTERM`/`SIGHUP` handler can run it without hanging — without
415
+ * it, an external kill bypasses `dispose()` and orphans Chrome/Python to PID 1 (#698).
416
+ *
417
+ * Idempotent: every step is a no-op once the graceful {@link dispose} path has released
418
+ * the resources. Never throws; per-step failures are logged and the whole run is capped
419
+ * at `timeoutMs` so a wedged subprocess can't stall process exit.
420
+ */
421
+ disposeChildSubprocesses(timeoutMs?: number): Promise<void>;
410
422
  /** Full agent state */
411
423
  get state(): AgentState;
412
424
  /** Current model (may be undefined if not yet selected) */
@@ -57,6 +57,13 @@ export interface OutputSinkOptions {
57
57
  * relative to the sink (the sink does not catch errors from this callback).
58
58
  */
59
59
  onRawChunk?: (chunk: string) => void;
60
+ /**
61
+ * Opt-in (F21): when true, sanitization + live callback delivery + retention are coalesced over
62
+ * batched raw chunks instead of run per chunk, bounding sync CPU for many-small-chunk output. The
63
+ * raw artifact mirror stays byte-correct. Defaults to the PI_OUTPUT_SANITIZE_COALESCE env flag
64
+ * (default OFF — the per-chunk path is byte-identical to historical behavior).
65
+ */
66
+ coalesceSanitize?: boolean;
60
67
  }
61
68
  export interface TruncationResult {
62
69
  content: string;
@@ -14,12 +14,12 @@ export interface CodexSearchParams {
14
14
  * Executes a web search using OpenAI code provider's built-in web search tool.
15
15
  *
16
16
  * Default-model behavior:
17
- * - If `PI_OPENAI_CODE_WEB_SEARCH_MODEL` is set, use it exactly once and surface any
17
+ * - If `PI_CODEX_WEB_SEARCH_MODEL` is set, use it exactly once and surface any
18
18
  * upstream error verbatim.
19
- * - Otherwise prefer ChatGPT-account-safe bundled defaults (GPT-5.4, GPT-5
20
- * OpenAI code backend, GPT-5, …) and retry the next candidate only when OpenAI code backend returns the
19
+ * - Otherwise prefer ChatGPT-account-safe bundled defaults (GPT-5.5, GPT-5.4,
20
+ * GPT-5 code backend, …) and retry the next candidate only when OpenAI code backend returns the
21
21
  * known 400 "model is not supported" family. This avoids selecting
22
- * `gpt-5-OpenAI code backend-mini` first on ChatGPT accounts, which OpenAI rejects.
22
+ * `gpt-5-codex-mini` first on ChatGPT accounts, which OpenAI rejects.
23
23
  */
24
24
  export declare function searchCodex(params: SearchParams): Promise<SearchResponse>;
25
25
  /**
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@gajae-code/coding-agent",
4
- "version": "0.5.3",
4
+ "version": "0.5.4",
5
5
  "description": "Gajae Code CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://gaebal-gajae.dev",
7
7
  "author": "Yeachan-Heo",
@@ -51,12 +51,12 @@
51
51
  "@agentclientprotocol/sdk": "0.21.0",
52
52
  "@babel/parser": "^7.29.3",
53
53
  "@mozilla/readability": "^0.6.0",
54
- "@gajae-code/stats": "0.5.3",
55
- "@gajae-code/agent-core": "0.5.3",
56
- "@gajae-code/ai": "0.5.3",
57
- "@gajae-code/natives": "0.5.3",
58
- "@gajae-code/tui": "0.5.3",
59
- "@gajae-code/utils": "0.5.3",
54
+ "@gajae-code/stats": "0.5.4",
55
+ "@gajae-code/agent-core": "0.5.4",
56
+ "@gajae-code/ai": "0.5.4",
57
+ "@gajae-code/natives": "0.5.4",
58
+ "@gajae-code/tui": "0.5.4",
59
+ "@gajae-code/utils": "0.5.4",
60
60
  "@puppeteer/browsers": "^2.13.0",
61
61
  "@types/turndown": "5.0.6",
62
62
  "@xterm/headless": "^6.0.0",
@@ -118,6 +118,12 @@ export interface ResumeDescriptor {
118
118
  data: unknown;
119
119
  }
120
120
 
121
+ function sessionFileFromResumeDescriptorData(data: unknown): string | null {
122
+ if (typeof data !== "object" || data === null) return null;
123
+ const sessionFile = (data as { sessionFile?: unknown }).sessionFile;
124
+ return typeof sessionFile === "string" && sessionFile.trim().length > 0 ? sessionFile : null;
125
+ }
126
+
121
127
  /** A pending resume awaiting a free concurrency slot. */
122
128
  interface ResumeQueueEntry {
123
129
  subagentId: string;
@@ -595,11 +601,31 @@ export class AsyncJobManager {
595
601
  record.modelFellBack = model.modelFellBack;
596
602
  }
597
603
 
604
+ #recordFromResumeDescriptor(subagentId: string, filter?: AsyncJobFilter): SubagentRecord | undefined {
605
+ const descriptor = this.getResumeDescriptor(subagentId, filter);
606
+ if (!descriptor) return undefined;
607
+ const sessionFile = sessionFileFromResumeDescriptorData(descriptor.data);
608
+ const record: SubagentRecord = {
609
+ subagentId: descriptor.subagentId,
610
+ ownerId: descriptor.ownerId,
611
+ currentJobId: null,
612
+ historicalJobIds: [],
613
+ status: "completed",
614
+ sessionFile,
615
+ resumable: sessionFile !== null,
616
+ };
617
+ this.#subagentRecords.set(record.subagentId, record);
618
+ return record;
619
+ }
620
+
598
621
  getSubagentRecord(subagentId: string, filter?: AsyncJobFilter): SubagentRecord | undefined {
599
- const rec = this.#subagentRecords.get(subagentId.trim());
600
- if (!rec) return undefined;
601
- if (filter?.ownerId && rec.ownerId !== filter.ownerId) return undefined;
602
- return rec;
622
+ const trimmed = subagentId.trim();
623
+ const rec = this.#subagentRecords.get(trimmed);
624
+ if (rec) {
625
+ if (filter?.ownerId && rec.ownerId !== filter.ownerId) return undefined;
626
+ return rec;
627
+ }
628
+ return this.#recordFromResumeDescriptor(trimmed, filter);
603
629
  }
604
630
 
605
631
  getSubagentRecords(filter?: AsyncJobFilter): SubagentRecord[] {
@@ -696,8 +722,6 @@ export class AsyncJobManager {
696
722
  if (rec.status === "paused" || rec.status === "queued") return;
697
723
  this.#liveHandles.delete(rec.subagentId);
698
724
  this.#subagentProgress.delete(rec.subagentId);
699
- this.#resumeDescriptors.delete(rec.subagentId);
700
- this.#subagentRecords.delete(rec.subagentId);
701
725
  }
702
726
 
703
727
  #markRecordTerminal(jobId: string, status: "completed" | "failed" | "cancelled"): void {
@@ -61,6 +61,49 @@ function resolveModelProfileName(profileName: string, profiles: ReadonlyMap<stri
61
61
  return replacement && profiles.has(replacement) ? replacement : profileName;
62
62
  }
63
63
 
64
+ /**
65
+ * Rewrite a selector only within the selector provider's own alternative group.
66
+ * Strict providers are never rewritten, and authenticated alternative providers
67
+ * keep their original selectors.
68
+ */
69
+ function rewriteSelectorProvider(
70
+ selector: string,
71
+ authenticatedProviders: ReadonlySet<string>,
72
+ alternativeGroups: readonly (readonly string[])[],
73
+ ): string {
74
+ const slash = selector.indexOf("/");
75
+ if (slash < 0) return selector;
76
+
77
+ const provider = selector.substring(0, slash);
78
+ if (authenticatedProviders.has(provider)) return selector;
79
+
80
+ const group = alternativeGroups.find(candidates => candidates.includes(provider));
81
+ if (!group) return selector;
82
+
83
+ const replacement = group.find(candidate => authenticatedProviders.has(candidate));
84
+ if (!replacement) return selector;
85
+
86
+ return replacement + selector.substring(slash);
87
+ }
88
+
89
+ function rewriteBindingsProviders(
90
+ bindings: { defaultSelector?: string; agentModelOverrides: Record<string, string> },
91
+ authenticatedProviders: ReadonlySet<string>,
92
+ alternativeGroups: readonly (readonly string[])[],
93
+ ): { defaultSelector?: string; agentModelOverrides: Record<string, string> } {
94
+ return {
95
+ defaultSelector: bindings.defaultSelector
96
+ ? rewriteSelectorProvider(bindings.defaultSelector, authenticatedProviders, alternativeGroups)
97
+ : undefined,
98
+ agentModelOverrides: Object.fromEntries(
99
+ Object.entries(bindings.agentModelOverrides).map(([role, sel]) => [
100
+ role,
101
+ rewriteSelectorProvider(sel, authenticatedProviders, alternativeGroups),
102
+ ]),
103
+ ),
104
+ };
105
+ }
106
+
64
107
  export async function prepareModelProfileActivation(
65
108
  options: PrepareModelProfileActivationOptions,
66
109
  ): Promise<PreparedModelProfileActivation> {
@@ -72,19 +115,44 @@ export async function prepareModelProfileActivation(
72
115
  throw new Error(`Unknown model profile "${options.profileName}". Available profiles: ${available}`);
73
116
  }
74
117
 
118
+ const allProviders = aggregateModelProfileRequiredProviders(profile.requiredProviders, profile);
119
+ const alternativeGroups = profile.alternativeProviderGroups ?? [];
120
+ const alternativeSet = new Set(alternativeGroups.flat());
121
+
75
122
  const missingProviders: string[] = [];
76
- for (const provider of aggregateModelProfileRequiredProviders(profile.requiredProviders, profile)) {
123
+ const authenticatedProviders: string[] = [];
124
+ for (const provider of allProviders) {
77
125
  const apiKey = await options.modelRegistry.getApiKeyForProvider(provider, options.session.sessionId);
78
126
  if (!isAuthenticated(apiKey)) {
79
127
  missingProviders.push(provider);
128
+ } else {
129
+ authenticatedProviders.push(provider);
80
130
  }
81
131
  }
82
- if (missingProviders.length > 0) {
132
+
133
+ // Check strict (non-alternative) providers — all must be authenticated.
134
+ const strictMissing = missingProviders.filter(p => !alternativeSet.has(p));
135
+ if (strictMissing.length > 0) {
136
+ throw new Error(formatModelProfileCredentialError(options.profileName, strictMissing));
137
+ }
138
+
139
+ // Check alternative groups — at least one provider per group must be authenticated.
140
+ for (const group of alternativeGroups) {
141
+ const groupAuthenticated = group.some(p => authenticatedProviders.includes(p));
142
+ if (!groupAuthenticated) {
143
+ throw new Error(formatModelProfileCredentialError(options.profileName, [...group]));
144
+ }
145
+ }
146
+
147
+ if (authenticatedProviders.length === 0) {
83
148
  throw new Error(formatModelProfileCredentialError(options.profileName, missingProviders));
84
149
  }
85
150
 
86
151
  const availableModels = options.modelRegistry.getAll();
87
- const bindings = resolveProfileBindings(profile);
152
+ let bindings = resolveProfileBindings(profile);
153
+ if (missingProviders.length > 0 && alternativeGroups.length > 0) {
154
+ bindings = rewriteBindingsProviders(bindings, new Set(authenticatedProviders), alternativeGroups);
155
+ }
88
156
  const resolvedDefault = bindings.defaultSelector
89
157
  ? resolveModelRoleValue(bindings.defaultSelector, availableModels, {
90
158
  settings: options.settings as Settings,
@@ -6,6 +6,16 @@ export type ModelProfileRole = GjcModelAssignmentTargetId;
6
6
  export interface ModelProfileDefinition {
7
7
  name: string;
8
8
  requiredProviders: string[];
9
+ /**
10
+ * Optional groups of providers that are interchangeable fallbacks.
11
+ * Each group is an array of provider ids where at least one must be
12
+ * authenticated. Providers NOT in any group are treated as strict
13
+ * requirements (all must be authenticated).
14
+ *
15
+ * Example: `[["xiaomi", "xiaomi-token-plan-sgp", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn"]]`
16
+ * means any single xiaomi credential satisfies the group.
17
+ */
18
+ alternativeProviderGroups?: readonly (readonly string[])[];
9
19
  modelMapping: Partial<Record<ModelProfileRole, string>>;
10
20
  source: "builtin" | "user";
11
21
  }
@@ -46,9 +56,11 @@ const profile = (
46
56
  name: string,
47
57
  requiredProviders: string[],
48
58
  modelMapping: Record<ModelProfileRole, string>,
59
+ alternativeProviderGroups?: readonly (readonly string[])[],
49
60
  ): ModelProfileDefinition => ({
50
61
  name,
51
62
  requiredProviders: aggregateModelProfileRequiredProviders(requiredProviders, { modelMapping }),
63
+ alternativeProviderGroups,
52
64
  modelMapping,
53
65
  source: "builtin",
54
66
  });
@@ -138,20 +150,30 @@ export const BUILTIN_MODEL_PROFILES: readonly ModelProfileDefinition[] = [
138
150
  critic: "xiaomi/mimo-v2.5-pro:medium",
139
151
  architect: "xiaomi/mimo-v2.5-pro:high",
140
152
  }),
141
- profile("mimo-medium", ["xiaomi"], {
142
- default: "xiaomi/mimo-v2.5-pro:medium",
143
- executor: "xiaomi/mimo-v2.5-pro:low",
144
- planner: "xiaomi/mimo-v2.5-pro:medium",
145
- critic: "xiaomi/mimo-v2.5-pro:high",
146
- architect: "xiaomi/mimo-v2.5-pro:xhigh",
147
- }),
148
- profile("mimo-pro", ["xiaomi"], {
149
- default: "xiaomi/mimo-v2.5-pro:xhigh",
150
- executor: "xiaomi/mimo-v2.5-pro:medium",
151
- planner: "xiaomi/mimo-v2.5-pro:high",
152
- critic: "xiaomi/mimo-v2.5-pro:xhigh",
153
- architect: "xiaomi/mimo-v2.5-pro:xhigh",
154
- }),
153
+ profile(
154
+ "mimo-medium",
155
+ ["xiaomi", "xiaomi-token-plan-sgp", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn"],
156
+ {
157
+ default: "xiaomi/mimo-v2.5-pro:medium",
158
+ executor: "xiaomi/mimo-v2.5-pro:low",
159
+ planner: "xiaomi/mimo-v2.5-pro:medium",
160
+ critic: "xiaomi/mimo-v2.5-pro:high",
161
+ architect: "xiaomi/mimo-v2.5-pro:xhigh",
162
+ },
163
+ [["xiaomi", "xiaomi-token-plan-sgp", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn"]],
164
+ ),
165
+ profile(
166
+ "mimo-pro",
167
+ ["xiaomi", "xiaomi-token-plan-sgp", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn"],
168
+ {
169
+ default: "xiaomi/mimo-v2.5-pro:xhigh",
170
+ executor: "xiaomi/mimo-v2.5-pro:medium",
171
+ planner: "xiaomi/mimo-v2.5-pro:high",
172
+ critic: "xiaomi/mimo-v2.5-pro:xhigh",
173
+ architect: "xiaomi/mimo-v2.5-pro:xhigh",
174
+ },
175
+ [["xiaomi", "xiaomi-token-plan-sgp", "xiaomi-token-plan-ams", "xiaomi-token-plan-cn"]],
176
+ ),
155
177
  profile("grok-eco", ["xai"], {
156
178
  default: "xai/grok-4.3:low",
157
179
  executor: "xai/grok-4.3:minimal",
@@ -292,6 +314,9 @@ const PROFILE_RECOMMENDATIONS: Record<string, string> = {
292
314
  zai: "glm-medium",
293
315
  "kimi-code": "kimi-coding-plan-medium",
294
316
  xiaomi: "mimo-medium",
317
+ "xiaomi-token-plan-sgp": "mimo-medium",
318
+ "xiaomi-token-plan-ams": "mimo-medium",
319
+ "xiaomi-token-plan-cn": "mimo-medium",
295
320
  xai: "grok-medium",
296
321
  "grok-build": "grok-build-pro",
297
322
  cursor: "cursor-medium",
@@ -39,7 +39,8 @@ Inspired by the [Ouroboros project](https://github.com/Q00/ouroboros) which demo
39
39
 
40
40
  <Execution_Policy>
41
41
  - Ask ONE question at a time -- never batch multiple questions
42
- - Preserve the user/session language for every user-facing announcement, topology confirmation, option label, and interview question when state includes `language.instruction`; for example Korean initial ideas must receive Korean deep-interview questions unless the user explicitly requests another language
42
+ - Default to English when no language preference is explicit or obvious. Preserve the user/session language for every user-facing announcement, topology confirmation, option label, and interview question when state includes `language.instruction`; do not add language-specific special cases
43
+ - Before emitting any user-facing natural-language prose governed by `language.instruction`, perform one silent, best-effort self-proofread in the preserved session language for obvious spelling, spacing, grammar, inflection/particle, and word-choice errors, using the same language-agnostic pass for whatever language is active rather than special-casing any single language. Apply it only to newly generated prose and never announce the proofreading, show before/after text, apologize for it, or re-emit a corrected copy. Do not alter code blocks or identifiers, file paths, CLI commands, JSON/configuration keys, `ask` metadata keys, table/round structure, fixed labels, numeric scores, component ids, status tokens, user quotes or source text, Phase 0 threshold markers such as `Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThresholdSource>)`, or fixed paths such as `.gjc/specs/deep-interview-{slug}.md`; still apply the self-proofread to generated natural-language clauses or cells inside those structures, including Why now rationale, gap text, next-target phrasing, and coverage notes
43
44
  - Target the WEAKEST clarity dimension with each question
44
45
  - Before Round 1 ambiguity scoring, run a one-time Round 0 topology enumeration gate that confirms the top-level component list and locks it into state
45
46
  - Make weakest-dimension targeting explicit every round: name the weakest dimension, state its score/gap, and explain why the next question is aimed there
@@ -96,7 +97,7 @@ Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThreshold
96
97
  - Substitute `<resolvedThreshold>`, `<resolvedThresholdPercent>`, and `<resolvedThresholdSource>` throughout the remaining instructions before continuing.
97
98
  - Include `threshold_source` in the first `gjc state write` payload and preserve it on later state updates; do not edit `.gjc/state` files directly unless an explicit force override is active.
98
99
  - Include both threshold and source in the final spec metadata.
99
- - Read any `language` object from active deep-interview state and carry `language.instruction` forward mechanically. If absent, infer the user/session language from `{{ARGUMENTS}}` only when it is obvious. Do not surprise a Korean session with English questions.
100
+ - Read any `language` object from active deep-interview state and carry `language.instruction` forward mechanically. If absent, default to English unless `{{ARGUMENTS}}` makes another user/session language obvious or the user explicitly requests another language. Do not add language-specific special cases.
100
101
 
101
102
  ## Phase 1: Initialize
102
103
 
@@ -175,6 +176,8 @@ The first line of this announcement MUST be exactly the Phase 0 threshold marker
175
176
  > **Project type:** {greenfield|brownfield}
176
177
  > **Current ambiguity:** 100% (we haven't started yet)
177
178
 
179
+ Before emitting the prose lines in this announcement, apply the `<Execution_Policy>` self-proofread once; keep the required threshold marker and the quoted `{initial_idea}` unchanged.
180
+
178
181
  ## Round 0: Topology Enumeration Gate
179
182
 
180
183
  Run this gate exactly once after Phase 1 initialization and before any Phase 2 ambiguity scoring. The goal is to lock the **shape** of the user's scope before depth-first Socratic questioning can overfit to the most-described component.
@@ -293,6 +296,8 @@ Round {n} | Component: {target_component_name} | Targeting: {weakest_dimension}
293
296
 
294
297
  Options should include contextually relevant choices plus free-text, translated/localized according to `language.instruction` when present.
295
298
 
299
+ After applying `language.instruction` to the visible question, options, and generated rationale, apply the self-proofread once to new prose only; preserve only the Round/Component/Targeting/Ambiguity line structure, fixed labels, numeric ambiguity value, component/target identifiers, and `deepInterview.*` metadata keys. Do not exempt generated natural-language rationale such as Why now.
300
+
296
301
  When calling `ask`, SHOULD include optional structured metadata so the runtime can record the round without manual state writes: `deepInterview.round_id?`, `deepInterview.round`, `deepInterview.component`, `deepInterview.dimension`, and `deepInterview.ambiguity`. Keep this metadata aligned with the visible Round/Component/Targeting/Ambiguity line; if metadata cannot be supplied, the legacy formatted question text remains the fallback.
297
302
 
298
303
  ### Step 2b′: Auto-Answer Opted-Out Questions
@@ -436,6 +441,8 @@ Round {n} complete.
436
441
 
437
442
  Apply `language.instruction` when present before showing this progress report so status text, gaps, and next-target phrasing stay in the preserved session language.
438
443
 
444
+ Then apply the self-proofread once to narrative status text, generated prose cells, gaps, and next-target phrasing; preserve only table structure, fixed status labels, scores, weights, component ids, and trigger tokens.
445
+
439
446
  ### Step 2e: Update State
440
447
 
441
448
  Update state in two phases. The `ask` answer is first recorded by the runtime as an `answered` shell. Scoring then enriches the same round record to `scored` with global scores, per-component `topology.components[].clarity_scores`, `topology.components[].weakest_dimension`, trigger metadata, established-facts changes, ontology snapshot, `topology.last_targeted_component_id`, `auto_researched_rounds`, `auto_answered_rounds`, and `architect_failures`. When `deepInterview` ask metadata is present, no manual per-round `gjc state write` is required for the answer shell; only scoring enrichment/state maintenance remains. When metadata is absent, use the legacy `gjc state write` path to persist the new round and never patch `.gjc/state` directly unless an explicit force override is active.
@@ -486,6 +493,7 @@ When ambiguity ≤ threshold (or hard cap / early exit):
486
493
 
487
494
  1. **Generate the specification** using opus model with the prompt-safe transcript. If the full interview transcript or initial context is too large, include the summary plus all concrete decisions, acceptance criteria, unresolved gaps, and ontology snapshots; never overflow the prompt with raw oversized context.
488
495
  - Apply `language.instruction` when present so user-facing prose in the spec preserves the session language; keep code identifiers, file paths, commands, JSON/settings keys, and quoted source text unchanged.
496
+ - Apply the self-proofread once to newly generated spec prose before persistence, including generated natural-language table cells such as coverage notes, while preserving transcript answers, quoted/source text, code identifiers, file paths, commands, JSON/settings keys, table structure/fixed labels, and `.gjc/specs/deep-interview-{slug}.md` unchanged.
489
497
  2. **Write the final spec through the workflow CLI**: persist the artifact at `.gjc/specs/deep-interview-{slug}.md`
490
498
  - Always use this exact final spec path. Do not write temporary working files to the repo root or other ad hoc paths; repos may allowlist `.gjc/` for planning artifacts while protecting product branches.
491
499
  - Use the native deep-interview write command with `--write --stage final --slug {slug} --spec <markdown-or-path> [--json]` for artifact and state persistence; direct `.gjc/` file edits are forbidden unless an explicit force override is active.
@@ -785,6 +793,7 @@ Why bad: 45% ambiguity means nearly half the requirements are unclear. The mathe
785
793
  <Final_Checklist>
786
794
  - [ ] Phase 0 ran before anything: threshold resolved and first line emitted as `Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThresholdSource>)`; state and spec metadata record both `threshold` and `threshold_source`
787
795
  - [ ] `language.instruction` preserved across announcements, questions, options, progress reports, and spec prose when present
796
+ - [ ] User-facing natural-language prose, including generated prose clauses/cells inside round lines or tables, was silently self-proofread once according to `language.instruction`, while code/paths/commands/keys/table or round structure/fixed labels/status tokens/quotes/threshold markers/fixed paths remained unchanged
788
797
  - [ ] Oversized initial context/history summarized before scoring, question generation, spec generation, or handoff
789
798
  - [ ] Round 0 topology gate completed before scoring; `topology.confirmed_at` persisted
790
799
  - [ ] Ambiguity scored and displayed every round, naming the weakest component/dimension target (rotating across active components when N > 1)
@@ -94,7 +94,7 @@ Follow the Plan skill's full documentation for consensus mode details.
94
94
 
95
95
  The Planner is a **same-session persisted subagent**: launched detached once, awaited before the Architect, then **resumed** with consolidated Architect + Critic feedback on every re-review pass instead of being re-spawned. The Architect and Critic stay **fresh, independent spawns each pass** so their verdicts remain reproducible from their pass artifacts alone. Do NOT modify the subagent control surface; this orchestration uses the existing `subagent` resume/steer controls only.
96
96
 
97
- **Persistence boundary:** this is same-parent, active-session continuity only. Resumability depends on the in-memory subagent record (and a persistent parent session an in-memory parent yields `resumable:false`), not just a session file. The `.gjc` run-state record is an audit/routing hint, NOT a durable cross-process subagent registry. After a process restart, a missing record, or any unavailable/failed resume, use the fresh Planner fallback.
97
+ **Persistence boundary:** this is same-parent, active-session continuity only. Resumability depends on the manager's retained subagent resume metadata and a persistent parent session (an in-memory parent yields `resumable:false`), not just the `.gjc` run-state record. A terminal subagent whose live job record was evicted can still be resumed when its retained resume descriptor points at a saved subagent session file. After a process restart, missing resume metadata, or any unavailable/failed resume, use the fresh Planner fallback.
98
98
 
99
99
  **Resume routing table** (per re-review pass, when resuming the persisted Planner id):
100
100
 
@@ -102,7 +102,7 @@ The Planner is a **same-session persisted subagent**: launched detached once, aw
102
102
  |---|---|
103
103
  | `running` | `steer`/inject the consolidated feedback to the same id, then await — do NOT fresh-spawn |
104
104
  | `queued` | retain/update the queued message or await the same id — do NOT fresh-spawn just because it is queued |
105
- | `context_unavailable`, `not_found`, `no_runner`, `resume_failed` | fresh Planner spawn for that pass; record the fallback metadata |
105
+ | `context_unavailable`, `not_found`, `no_runner`, `resume_failed` | fresh Planner spawn for that pass; record the fallback metadata. `not_found` should only mean same-session resume metadata is unavailable, not merely that a terminal live job was evicted. |
106
106
  | terminal (`completed`/`failed`/`cancelled`) + revision message | resume the same id when context is available; otherwise use the fresh fallback above |
107
107
 
108
108
  **Recording persisted-Planner metadata** (audit/routing only — never claim `subagent list` proves resumability, since the snapshot does not expose `resumable`). Ride these optional flags on the normal `--write` for the planner/revision stage of the pass:
@@ -192,7 +192,7 @@ An ultragoal story cannot be checkpointed `complete` until the active agent has
192
192
  5. Delegate an `executor` QA/red-team lane to build and run the e2e/read-teaming QA suite appropriate for the story. This lane must try to break the change, not just confirm the happy path. It must start from the approved plan/spec/acceptance criteria, then user-facing contracts, and only then implementation code as supporting evidence. Plan/code mismatches are blockers, not items to paper over with implementation intent.
193
193
  6. The executor QA/red-team lane must prove evidence by the real surface under test:
194
194
  - GUI/web surfaces require a valid automation transcript plus a non-uniform screenshot. Bare `inlineEvidence` text or typed receipts never prove live GUI/web execution.
195
- - CLI surfaces require runtime argv replay: `replaySafe: true`, an allowlisted argv `command`, and replayed normalized stdout matching `recordedStdout`; unsafe commands require audited `replayExempt` metadata plus a structurally valid fallback artifact.
195
+ - CLI surfaces require runtime argv replay: `replaySafe: true`, an allowlisted argv `command`, and replayed normalized stdout matching `recordedStdout`; unsafe commands require audited `replayExempt` metadata with exact fields `reasonCode`, `reason`, `approvedBy`, and `fallbackArtifactRefs` plus a structurally valid fallback artifact. Allowed `reasonCode` values are exactly `unsafe_side_effect`, `requires_credentials`, `requires_network`, `non_deterministic_external`, `destructive`, `interactive_only`, and `platform_unavailable`.
196
196
  - Native/desktop/tui surfaces require a structurally valid screenshot, PTY capture with terminal control codes, or app-automation transcript.
197
197
  - API/package/algorithm/math surfaces require a real artifact file or typed receipt. Bare `inlineEvidence` text alone is not sufficient for any surface.
198
198
  7. The executor QA/red-team lane must report a matrix using `executorQa.contractCoverage`, `executorQa.surfaceEvidence`, `executorQa.adversarialCases`, and `executorQa.artifactRefs`. Not-applicable rows are allowed only in `contractCoverage` and `surfaceEvidence`; each `status: "not_applicable"` row requires `contractRef` plus `reason`. `adversarialCases` rows cannot be not-applicable.
@@ -316,7 +316,7 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
316
316
  }
317
317
  ```
318
318
 
319
- For CLI replay artifacts, the JSON at `path` must be an object like `{"schemaVersion":1,"kind":"cli-replay","replaySafe":true,"command":["bun","-e","console.log(\"ultragoal-cli-ok\")"],"recordedStdout":"ultragoal-cli-ok\n"}`. Use `replayExempt` only for audited unsafe/non-deterministic invocations, with a substantive reason, approver, and same-surface fallback artifacts.
319
+ For CLI replay artifacts, the JSON at `path` must be an object like `{"schemaVersion":1,"kind":"cli-replay","replaySafe":true,"command":["bun","-e","console.log(\"ultragoal-cli-ok\")"],"recordedStdout":"ultragoal-cli-ok\n"}`. Use `replayExempt` only for audited unsafe/non-deterministic invocations, with exact fields `reasonCode`, `reason`, `approvedBy`, and `fallbackArtifactRefs`. `reason` must be substantive and audited, `approvedBy` must identify the verifier, and `fallbackArtifactRefs` must reference same-surface structurally valid fallback artifacts. Allowed `reasonCode` values are exactly `unsafe_side_effect`, `requires_credentials`, `requires_network`, `non_deterministic_external`, `destructive`, `interactive_only`, and `platform_unavailable`.
320
320
 
321
321
  ## Review mode
322
322
 
@@ -124,8 +124,8 @@ interface ResolvedDeepInterviewArgs {
124
124
  }
125
125
 
126
126
  interface DeepInterviewLanguagePreference {
127
- code: "en" | "ko";
128
- label: "English" | "Korean";
127
+ code: "en" | "user";
128
+ label: "English" | "User language";
129
129
  source: "explicit-user-request" | "initial-idea";
130
130
  instruction: string;
131
131
  }
@@ -239,21 +239,22 @@ function englishLanguagePreference(): DeepInterviewLanguagePreference {
239
239
  };
240
240
  }
241
241
 
242
+ function userLanguagePreference(): DeepInterviewLanguagePreference {
243
+ return {
244
+ code: "user",
245
+ label: "User language",
246
+ source: "initial-idea",
247
+ instruction:
248
+ "Ask every user-facing deep-interview question in the user/session language inferred from the initial idea unless the user explicitly requests another language. Keep code identifiers, file paths, commands, settings/JSON keys, library/API names, and quoted source text unchanged when appropriate.",
249
+ };
250
+ }
251
+
242
252
  function resolveDeepInterviewLanguagePreference(idea: string): DeepInterviewLanguagePreference | undefined {
243
253
  if (/\b(?:answer|ask|respond|reply|write|use|speak)\s+(?:only\s+)?in\s+English\b/i.test(idea)) {
244
254
  return englishLanguagePreference();
245
255
  }
246
- if (/(?:영어로|영문으로|영어\s*(?:질문|답변|응답)|English\s+only)/i.test(idea)) {
247
- return englishLanguagePreference();
248
- }
249
- if (/\p{Script=Hangul}/u.test(idea)) {
250
- return {
251
- code: "ko",
252
- label: "Korean",
253
- source: "initial-idea",
254
- instruction:
255
- "Ask every user-facing deep-interview question in Korean unless the user explicitly requests another language.",
256
- };
256
+ if (/[^\p{Script=Latin}\p{Script=Common}\p{Script=Inherited}]/u.test(idea)) {
257
+ return userLanguagePreference();
257
258
  }
258
259
  return undefined;
259
260
  }
@@ -13,6 +13,7 @@ import {
13
13
  } from "./ledger-event-renderer";
14
14
  import { isRestrictedRoleAgentBash } from "./restricted-role-agent-bash";
15
15
  import { migrateWorkflowState } from "./state-migrations";
16
+ import { runNativeStateCommand } from "./state-runtime";
16
17
  import {
17
18
  appendJsonlIdempotent,
18
19
  readExistingStateForMutation,
@@ -104,6 +105,10 @@ export function isRalplanArtifactWriteInvocation(args: readonly string[]): boole
104
105
  return hasFlag(args, "--write");
105
106
  }
106
107
 
108
+ function isRalplanDoctorInvocation(args: readonly string[]): boolean {
109
+ return args[0] === "doctor";
110
+ }
111
+
107
112
  function assertSafePathComponent(value: string, label: string): void {
108
113
  if (!PATH_COMPONENT_RE.test(value) || value.includes("..")) {
109
114
  throw new RalplanCommandError(2, `invalid path component for --${label}: ${value}`);
@@ -854,10 +859,15 @@ async function handleConsensusHandoff(args: readonly string[], cwd: string): Pro
854
859
  return { status: 0, stdout };
855
860
  }
856
861
 
862
+ async function handleDoctor(args: readonly string[], cwd: string): Promise<RalplanCommandResult> {
863
+ return await runNativeStateCommand(["doctor", "--skill", "ralplan", ...args.slice(1)], cwd);
864
+ }
865
+
857
866
  /* -------------------------------- entry --------------------------------- */
858
867
 
859
868
  export async function runNativeRalplanCommand(args: string[], cwd = process.cwd()): Promise<RalplanCommandResult> {
860
869
  try {
870
+ if (isRalplanDoctorInvocation(args)) return await handleDoctor(args, cwd);
861
871
  if (isRalplanArtifactWriteInvocation(args)) return await handleArtifactWrite(args, cwd);
862
872
  return await handleConsensusHandoff(args, cwd);
863
873
  } catch (error) {