@gajae-code/coding-agent 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/README.md +1 -1
  3. package/dist/types/cli/setup-cli.d.ts +8 -1
  4. package/dist/types/commands/setup.d.ts +7 -0
  5. package/dist/types/config/file-lock.d.ts +24 -2
  6. package/dist/types/config/model-registry.d.ts +4 -0
  7. package/dist/types/config/models-config-schema.d.ts +5 -0
  8. package/dist/types/config/settings-schema.d.ts +62 -0
  9. package/dist/types/gjc-runtime/state-writer.d.ts +64 -2
  10. package/dist/types/gjc-runtime/ultragoal-guard.d.ts +10 -0
  11. package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +29 -0
  12. package/dist/types/modes/components/provider-onboarding-selector.d.ts +1 -1
  13. package/dist/types/modes/interactive-mode.d.ts +1 -1
  14. package/dist/types/modes/rpc/rpc-mode.d.ts +56 -1
  15. package/dist/types/modes/shared/agent-wire/unattended-session.d.ts +10 -0
  16. package/dist/types/modes/theme/defaults/index.d.ts +302 -0
  17. package/dist/types/modes/theme/theme.d.ts +1 -0
  18. package/dist/types/modes/types.d.ts +1 -1
  19. package/dist/types/session/history-storage.d.ts +2 -2
  20. package/dist/types/session/session-manager.d.ts +10 -1
  21. package/dist/types/setup/credential-import.d.ts +79 -0
  22. package/dist/types/task/executor.d.ts +1 -0
  23. package/dist/types/task/render.d.ts +1 -1
  24. package/dist/types/tools/subagent-render.d.ts +7 -1
  25. package/dist/types/tools/subagent.d.ts +21 -0
  26. package/dist/types/tools/ultragoal-ask-guard.d.ts +5 -0
  27. package/dist/types/web/search/index.d.ts +4 -4
  28. package/dist/types/web/search/provider.d.ts +16 -20
  29. package/dist/types/web/search/providers/base.d.ts +2 -1
  30. package/dist/types/web/search/providers/openai-compatible.d.ts +9 -0
  31. package/dist/types/web/search/types.d.ts +14 -2
  32. package/package.json +7 -7
  33. package/scripts/build-binary.ts +7 -0
  34. package/src/cli/args.ts +2 -0
  35. package/src/cli/fast-help.ts +2 -0
  36. package/src/cli/setup-cli.ts +138 -3
  37. package/src/commands/setup.ts +5 -1
  38. package/src/commands/ultragoal.ts +3 -1
  39. package/src/config/file-lock-gc.ts +14 -2
  40. package/src/config/file-lock.ts +54 -12
  41. package/src/config/model-profile-activation.ts +15 -3
  42. package/src/config/model-profiles.ts +15 -15
  43. package/src/config/model-registry.ts +21 -1
  44. package/src/config/models-config-schema.ts +1 -0
  45. package/src/config/settings-schema.ts +62 -0
  46. package/src/defaults/gjc/skills/ultragoal/SKILL.md +30 -8
  47. package/src/gjc-runtime/deep-interview-recorder.ts +40 -0
  48. package/src/gjc-runtime/launch-tmux.ts +3 -4
  49. package/src/gjc-runtime/ralplan-runtime.ts +174 -12
  50. package/src/gjc-runtime/state-runtime.ts +2 -1
  51. package/src/gjc-runtime/state-writer.ts +254 -7
  52. package/src/gjc-runtime/tmux-gc.ts +2 -1
  53. package/src/gjc-runtime/ultragoal-guard.ts +155 -0
  54. package/src/gjc-runtime/ultragoal-runtime.ts +1227 -31
  55. package/src/gjc-runtime/workflow-manifest.generated.json +44 -0
  56. package/src/gjc-runtime/workflow-manifest.ts +12 -0
  57. package/src/harness-control-plane/owner.ts +3 -2
  58. package/src/harness-control-plane/rpc-adapter.ts +1 -1
  59. package/src/hooks/skill-state.ts +121 -2
  60. package/src/internal-urls/docs-index.generated.ts +13 -9
  61. package/src/lsp/defaults.json +1 -0
  62. package/src/main.ts +14 -4
  63. package/src/modes/acp/acp-agent.ts +4 -2
  64. package/src/modes/bridge/bridge-mode.ts +2 -1
  65. package/src/modes/components/history-search.ts +5 -2
  66. package/src/modes/components/model-selector.ts +26 -0
  67. package/src/modes/components/provider-onboarding-selector.ts +6 -1
  68. package/src/modes/controllers/selector-controller.ts +80 -1
  69. package/src/modes/interactive-mode.ts +11 -1
  70. package/src/modes/rpc/rpc-mode.ts +132 -18
  71. package/src/modes/shared/agent-wire/command-dispatch.ts +5 -2
  72. package/src/modes/shared/agent-wire/host-tool-bridge.ts +3 -0
  73. package/src/modes/shared/agent-wire/unattended-session.ts +16 -1
  74. package/src/modes/theme/defaults/claude-code.json +100 -0
  75. package/src/modes/theme/defaults/codex.json +100 -0
  76. package/src/modes/theme/defaults/index.ts +6 -0
  77. package/src/modes/theme/defaults/opencode.json +102 -0
  78. package/src/modes/theme/theme.ts +2 -2
  79. package/src/modes/types.ts +1 -1
  80. package/src/prompts/agents/executor.md +5 -2
  81. package/src/sdk.ts +12 -1
  82. package/src/session/agent-session.ts +22 -11
  83. package/src/session/history-storage.ts +32 -11
  84. package/src/session/session-manager.ts +70 -18
  85. package/src/setup/credential-import.ts +429 -0
  86. package/src/skill-state/deep-interview-mutation-guard.ts +2 -1
  87. package/src/task/executor.ts +7 -1
  88. package/src/task/render.ts +18 -7
  89. package/src/tools/ask.ts +4 -2
  90. package/src/tools/cron.ts +1 -1
  91. package/src/tools/subagent-render.ts +119 -29
  92. package/src/tools/subagent.ts +147 -7
  93. package/src/tools/ultragoal-ask-guard.ts +39 -0
  94. package/src/web/search/index.ts +25 -25
  95. package/src/web/search/provider.ts +178 -87
  96. package/src/web/search/providers/base.ts +2 -1
  97. package/src/web/search/providers/openai-compatible.ts +151 -0
  98. package/src/web/search/types.ts +47 -22
@@ -2,6 +2,7 @@ import type { Effort } from "@gajae-code/ai/model-thinking";
2
2
  import { TASK_SIMPLE_MODES } from "../task/simple-mode";
3
3
  import { getThinkingLevelMetadata } from "../thinking-metadata";
4
4
  import { EDIT_MODES } from "../utils/edit-mode";
5
+ import { CONFIGURABLE_SEARCH_PROVIDER_IDS } from "../web/search/types";
5
6
 
6
7
  const THINKING_EFFORTS = ["minimal", "low", "medium", "high", "xhigh", "max"] as readonly Effort[];
7
8
 
@@ -164,6 +165,7 @@ interface EnumDef<T extends readonly string[]> {
164
165
  interface ArrayDef<T> {
165
166
  type: "array";
166
167
  default: T[];
168
+ items?: { enum: readonly string[] };
167
169
  ui?: UiBase;
168
170
  }
169
171
 
@@ -832,6 +834,55 @@ export const SETTINGS_SCHEMA = {
832
834
  },
833
835
  },
834
836
 
837
+ "task.serviceTier": {
838
+ type: "enum",
839
+ values: [
840
+ "inherit",
841
+ "none",
842
+ "auto",
843
+ "default",
844
+ "flex",
845
+ "scale",
846
+ "priority",
847
+ "openai-only",
848
+ "claude-only",
849
+ ] as const,
850
+ default: "inherit",
851
+ ui: {
852
+ tab: "tasks",
853
+ label: "Subagent Service Tier",
854
+ description:
855
+ 'Service tier applied to task-tool subagents only. "inherit" copies the main session tier; any explicit value overrides it for subagents without touching the main session.',
856
+ options: [
857
+ {
858
+ value: "inherit",
859
+ label: "Inherit",
860
+ description: "Use the main session's service tier (default)",
861
+ },
862
+ { value: "none", label: "None", description: "Omit service_tier for subagents" },
863
+ { value: "auto", label: "Auto", description: "Use provider default tier selection (OpenAI)" },
864
+ { value: "default", label: "Default", description: "Standard priority processing (OpenAI)" },
865
+ { value: "flex", label: "Flex", description: "Flexible capacity tier when available (OpenAI)" },
866
+ { value: "scale", label: "Scale", description: "Scale Tier credits when available (OpenAI)" },
867
+ {
868
+ value: "priority",
869
+ label: "Priority",
870
+ description: "Priority on every supported provider (OpenAI `service_tier`, Anthropic fast mode)",
871
+ },
872
+ {
873
+ value: "openai-only",
874
+ label: "Priority (OpenAI only)",
875
+ description: "Priority on OpenAI/OpenAI-Codex requests; ignored elsewhere",
876
+ },
877
+ {
878
+ value: "claude-only",
879
+ label: "Priority (Claude only)",
880
+ description: "Anthropic fast mode on direct Claude requests; ignored elsewhere (incl. Bedrock/Vertex)",
881
+ },
882
+ ],
883
+ },
884
+ },
885
+
835
886
  // Retries
836
887
  "retry.enabled": { type: "boolean", default: true },
837
888
 
@@ -2068,6 +2119,17 @@ export const SETTINGS_SCHEMA = {
2068
2119
  ui: { tab: "tools", label: "Web Search", description: "Enable the web_search tool for web searching" },
2069
2120
  },
2070
2121
 
2122
+ "web_search.fallback": {
2123
+ type: "array",
2124
+ default: EMPTY_STRING_ARRAY,
2125
+ items: { enum: CONFIGURABLE_SEARCH_PROVIDER_IDS },
2126
+ ui: {
2127
+ tab: "tools",
2128
+ label: "Web Search Fallback",
2129
+ description: "Ordered fallback web search providers after the active model native provider",
2130
+ },
2131
+ },
2132
+
2071
2133
  "browser.enabled": {
2072
2134
  type: "boolean",
2073
2135
  default: true,
@@ -191,10 +191,10 @@ An ultragoal story cannot be checkpointed `complete` until the active agent has
191
191
  - code-side: maintainability, tests, integration points, and unsafe shortcuts.
192
192
  5. Delegate an `executor` QA/red-team lane to build and run the e2e/read-teaming QA suite appropriate for the story. This lane must try to break the change, not just confirm the happy path. It must start from the approved plan/spec/acceptance criteria, then user-facing contracts, and only then implementation code as supporting evidence. Plan/code mismatches are blockers, not items to paper over with implementation intent.
193
193
  6. The executor QA/red-team lane must prove evidence by the real surface under test:
194
- - GUI/web surfaces require browser automation plus a screenshot or image verdict.
195
- - CLI surfaces require logs or terminal transcripts from real invocation.
196
- - API/package surfaces require external consumer or black-box tests through the public interface.
197
- - Algorithm/math surfaces require boundary, property, adversarial, and failure-mode cases.
194
+ - GUI/web surfaces require a valid automation transcript plus a non-uniform screenshot. Bare `inlineEvidence` text or typed receipts never prove live GUI/web execution.
195
+ - CLI surfaces require runtime argv replay: `replaySafe: true`, an allowlisted argv `command`, and replayed normalized stdout matching `recordedStdout`; unsafe commands require audited `replayExempt` metadata plus a structurally valid fallback artifact.
196
+ - Native/desktop/tui surfaces require a structurally valid screenshot, PTY capture with terminal control codes, or app-automation transcript.
197
+ - API/package/algorithm/math surfaces require a real artifact file or typed receipt. Bare `inlineEvidence` text alone is not sufficient for any surface.
198
198
  7. The executor QA/red-team lane must report a matrix using `executorQa.contractCoverage`, `executorQa.surfaceEvidence`, `executorQa.adversarialCases`, and `executorQa.artifactRefs`. Not-applicable rows are allowed only in `contractCoverage` and `surfaceEvidence`; each `status: "not_applicable"` row requires `contractRef` plus `reason`. `adversarialCases` rows cannot be not-applicable.
199
199
  8. Run a final code review pass and fold it into the strict quality gate. Clean means `architectReview.architectureStatus`, `architectReview.productStatus`, and `architectReview.codeStatus` are all `"CLEAR"`, `architectReview.recommendation` is `"APPROVE"`, executor QA statuses are `"passed"`, iteration is `"passed"` with `fullRerun: true`, every evidence field is non-empty, every required matrix row is present, and every blockers array is empty. `COMMENT`, `WATCH`, `REQUEST CHANGES`, `BLOCK`, missing evidence, missing or shallow matrix rows, plan/code mismatches, or non-empty blockers are non-clean.
200
200
  9. If any lane finds an issue, do **not** checkpoint `complete` and do **not** call `goal({"op":"complete"})`. Record durable blocker work instead:
@@ -204,6 +204,8 @@ An ultragoal story cannot be checkpointed `complete` until the active agent has
204
204
  10. Complete or steer through the blocker story, then rerun the full blocking verification loop. Repeat until all verifier lanes are clean.
205
205
  11. Only after the loop is clean, checkpoint the story as complete with a structured quality gate and a fresh active `goal({"op":"get"})` snapshot. The checkpoint creates a receipt; `goals.json.status` alone is not proof. In aggregate mode, the final aggregate receipt must exist before `goal({"op":"complete"})` is allowed.
206
206
 
207
+ While an Ultragoal run is active, the `ask` tool is blocked for all agents. Record unresolved review decisions as durable blockers with `gjc ultragoal record-review-blockers` instead of prompting interactively.
208
+
207
209
  The native `checkpoint --status complete` command rejects missing or shallow gates. `--quality-gate-json` must include:
208
210
 
209
211
  ```json
@@ -229,13 +231,19 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
229
231
  "id": "browser-run",
230
232
  "kind": "browser-automation",
231
233
  "path": "artifacts/browser-run.json",
232
- "description": "browser automation transcript invoking the approved user-facing flow"
234
+ "description": "valid automation transcript with actions, monotonic timestamps, and selectors"
233
235
  },
234
236
  {
235
237
  "id": "gui-screenshot",
236
238
  "kind": "screenshot",
237
239
  "path": "artifacts/gui-screenshot.png",
238
- "description": "screenshot or image-verdict evidence for the GUI/web result"
240
+ "description": "non-uniform screenshot evidence for the GUI/web result"
241
+ },
242
+ {
243
+ "id": "cli-replay",
244
+ "kind": "command-replay",
245
+ "path": "artifacts/cli-replay.json",
246
+ "description": "artifact file containing argv-only CLI replay JSON: schemaVersion 1, kind cli-replay, replaySafe true, allowlisted command, recordedStdout"
239
247
  },
240
248
  {
241
249
  "id": "adversarial-report",
@@ -265,15 +273,23 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
265
273
  {
266
274
  "id": "surface-gui",
267
275
  "contractRef": "user-facing surface or public interface under test",
268
- "surface": "gui|web|cli|api|package|algorithm|math",
276
+ "surface": "gui|web|cli|api|package|algorithm|math|native|desktop|tui",
269
277
  "invocation": "real browser action, CLI command, API/package consumer call, or algorithm/property check",
270
278
  "verdict": "passed",
271
279
  "artifactRefs": ["browser-run", "gui-screenshot"]
272
280
  },
281
+ {
282
+ "id": "surface-cli",
283
+ "contractRef": "CLI or command-line interface under test",
284
+ "surface": "cli",
285
+ "invocation": "argv replay executed by the Ultragoal runtime",
286
+ "verdict": "passed",
287
+ "artifactRefs": ["cli-replay"]
288
+ },
273
289
  {
274
290
  "id": "surface-out-of-scope",
275
291
  "contractRef": "surface intentionally outside this story",
276
- "surface": "gui|web|cli|api|package|algorithm|math",
292
+ "surface": "gui|web|cli|api|package|algorithm|math|native|desktop|tui",
277
293
  "status": "not_applicable",
278
294
  "reason": "why this surface does not apply to the current story"
279
295
  }
@@ -300,6 +316,12 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
300
316
  }
301
317
  ```
302
318
 
319
+ For CLI replay artifacts, the JSON at `path` must be an object like `{"schemaVersion":1,"kind":"cli-replay","replaySafe":true,"command":["bun","-e","console.log(\"ultragoal-cli-ok\")"],"recordedStdout":"ultragoal-cli-ok\n"}`. Use `replayExempt` only for audited unsafe/non-deterministic invocations, with a substantive reason, approver, and same-surface fallback artifacts.
320
+
321
+ ## Review mode
322
+
323
+ `gjc ultragoal review` runs the same hardened gate against an already implemented PR, branch, or worktree. Use `--pr <number>` for a PR, `--branch <ref>` for a branch diff, omit both for the current worktree, and pass `--spec <path>` when a real contract exists. `--mode review-only` emits the verdict/findings without creating fix work; `--mode review-start` records review blockers for follow-up. Review mode validates the same `executorQa` shape and live-surface artifacts as `checkpoint --status complete`. A thin or derived-only contract can never clean-pass: the verdict is capped at `inconclusive: weak-contract` until a supplied spec or equivalent strong acceptance criteria are available.
324
+
303
325
  Receipts are freshness-scoped:
304
326
  - Per-goal receipts remain fresh for their target goal unless that goal, its blocker metadata, or its supersession metadata changes.
305
327
  - Normal later `goal_started` or clean receipt-backed `goal_checkpointed` events for other goals do not stale older per-goal receipts.
@@ -388,6 +388,34 @@ export async function appendOrMergeDeepInterviewRound(
388
388
  return { action: result.action, record: result.record };
389
389
  }
390
390
 
391
+ /**
392
+ * The chronological scored predecessor of the round currently being scored: the
393
+ * scored round with the greatest `round` strictly less than `currentRound`, with
394
+ * the same durable key excluded. Selecting by `round` (not array position) ensures
395
+ * an out-of-order re-score of an earlier round compares against its true prior, never
396
+ * a later ("future") scored round that happens to sit later in the array.
397
+ *
398
+ * Fail-safe: if `currentRound` is not a finite number, or a candidate's `round` is
399
+ * not finite, that comparison is treated as non-matching, so no prior is selected
400
+ * rather than risking a spurious comparison against an unrelated round.
401
+ */
402
+ function latestPriorScoredRound(
403
+ rounds: readonly DeepInterviewRoundRecord[],
404
+ currentKey: string,
405
+ currentRound: number,
406
+ ): DeepInterviewRoundRecord | undefined {
407
+ if (!Number.isFinite(currentRound)) return undefined;
408
+ let prior: DeepInterviewRoundRecord | undefined;
409
+ for (const candidate of rounds) {
410
+ if (candidate.lifecycle !== "scored") continue;
411
+ if (candidate.round_key === currentKey) continue;
412
+ if (!Number.isFinite(candidate.round)) continue;
413
+ if (!(candidate.round < currentRound)) continue;
414
+ if (prior === undefined || candidate.round > prior.round) prior = candidate;
415
+ }
416
+ return prior;
417
+ }
418
+
391
419
  /** Merge scoring output into the same round record, transitioning to `scored`. */
392
420
  export async function enrichDeepInterviewRoundScoring(
393
421
  cwd: string,
@@ -399,6 +427,18 @@ export async function enrichDeepInterviewRoundScoring(
399
427
  const interviewId = input.interviewId ?? interviewIdOf(envelope);
400
428
  const rounds = readRounds(envelope);
401
429
  const { rounds: nextRounds, record } = enrichRoundWithScoring(rounds, { ...input, interviewId });
430
+ // Fail closed: a scored transition that violates the bidirectional invariant
431
+ // (an active trigger that improves the affected dimension or fails to raise
432
+ // overall ambiguity, or a disputed/unresolved trigger lacking a rationale) must
433
+ // never be persisted — storing it lets the interview falsely converge. Validate
434
+ // against the most recent prior scored round before writing any durable state.
435
+ const prior = latestPriorScoredRound(rounds, record.round_key, record.round);
436
+ const validation = validateDeepInterviewScoredTransition(prior, record);
437
+ if (!validation.ok) {
438
+ throw new Error(
439
+ `deep-interview scored transition for round ${record.round} is invalid and was refused: ${validation.violations.join("; ")}`,
440
+ );
441
+ }
402
442
  (envelope.state as Record<string, unknown>).rounds = nextRounds;
403
443
  (envelope.state as Record<string, unknown>).current_ambiguity = input.ambiguity;
404
444
  await persistEnvelope(cwd, statePath, envelope, options.sessionId, "gjc deep-interview score-round");
@@ -1,4 +1,5 @@
1
1
  import * as path from "node:path";
2
+ import { safeStderrWrite } from "@gajae-code/utils";
2
3
  import type { Args } from "../cli/args";
3
4
  import {
4
5
  buildGjcTmuxProfileCommands,
@@ -280,7 +281,7 @@ export function launchDefaultTmuxIfNeeded(context: TmuxLaunchContext): boolean {
280
281
  cleanupCreatedTmuxSession(plan, spawnSync, options);
281
282
  const failure =
282
283
  profile.failures.find(item => item.command.args.includes("@gjc-profile")) ?? profile.failures[0];
283
- (context.diagnosticWriter ?? process.stderr.write.bind(process.stderr))(
284
+ (context.diagnosticWriter ?? safeStderrWrite)(
284
285
  formatTmuxLaunchDiagnostic("profile tagging failed", failure?.stderr),
285
286
  );
286
287
  return true;
@@ -289,8 +290,6 @@ export function launchDefaultTmuxIfNeeded(context: TmuxLaunchContext): boolean {
289
290
  if (created.exitCode !== 0) return false;
290
291
  const attached = spawnSync(plan.tmuxCommand, ["attach-session", "-t", plan.sessionName], options);
291
292
  if (attached.exitCode === 0) return true;
292
- (context.diagnosticWriter ?? process.stderr.write.bind(process.stderr))(
293
- formatTmuxLaunchDiagnostic("attach failed", attached.stderr),
294
- );
293
+ (context.diagnosticWriter ?? safeStderrWrite)(formatTmuxLaunchDiagnostic("attach failed", attached.stderr));
295
294
  return true;
296
295
  }
@@ -13,7 +13,12 @@ import {
13
13
  } from "./ledger-event-renderer";
14
14
  import { isRestrictedRoleAgentBash } from "./restricted-role-agent-bash";
15
15
  import { migrateWorkflowState } from "./state-migrations";
16
- import { appendJsonl, readExistingStateForMutation, writeArtifact, writeWorkflowEnvelopeAtomic } from "./state-writer";
16
+ import {
17
+ appendJsonlIdempotent,
18
+ readExistingStateForMutation,
19
+ writeArtifact,
20
+ writeWorkflowEnvelopeAtomic,
21
+ } from "./state-writer";
17
22
 
18
23
  /**
19
24
  * Native implementation of `gjc ralplan`.
@@ -186,7 +191,37 @@ async function readActiveRunId(cwd: string, sessionId: string | undefined): Prom
186
191
  return candidate;
187
192
  }
188
193
 
189
- async function persistActiveRunId(cwd: string, sessionId: string | undefined, runId: string): Promise<void> {
194
+ /**
195
+ * Run-state phases that an artifact write must never reopen. Once ralplan has
196
+ * reached a terminal/handed-off phase, a stray `--write` must not regress
197
+ * `current_phase` back to a stage — that would silently re-arm a chain guard or
198
+ * undo Stop semantics. Every other phase advances to track the stage just
199
+ * persisted so run-state stays coherent with the active ralplan stage.
200
+ */
201
+ const PHASE_LOCK = new Set([
202
+ "final",
203
+ "handoff",
204
+ "complete",
205
+ "completed",
206
+ "failed",
207
+ "cancelled",
208
+ "canceled",
209
+ "inactive",
210
+ ]);
211
+
212
+ /** Phase that keeps run-state coherent with the stage just written, preserving locked phases. */
213
+ function advanceCurrentPhase(existingPhase: unknown, stage: RalplanStage): string {
214
+ const current = typeof existingPhase === "string" ? existingPhase.trim() : "";
215
+ if (current && PHASE_LOCK.has(current)) return current;
216
+ return stage;
217
+ }
218
+
219
+ async function persistActiveRunId(
220
+ cwd: string,
221
+ sessionId: string | undefined,
222
+ runId: string,
223
+ stage: RalplanStage,
224
+ ): Promise<void> {
190
225
  const statePath = ralplanStatePath(cwd, sessionId);
191
226
  const existingRead = await readExistingStateForMutation(statePath);
192
227
  if (existingRead.kind === "corrupt") {
@@ -197,11 +232,25 @@ async function persistActiveRunId(cwd: string, sessionId: string | undefined, ru
197
232
  }
198
233
  let existing: Record<string, unknown> = existingRead.kind === "valid" ? existingRead.value : {};
199
234
 
200
- if (existing.run_id === runId && existing.version === WORKFLOW_STATE_VERSION) return;
235
+ // A new run_id is a fresh run, not a stray write on the prior run: never inherit a
236
+ // previous run's terminal/locked phase (which would start the new run already
237
+ // "complete"/"handoff" and disarm the Stop hook). PHASE_LOCK only guards same-run writes.
238
+ const isNewRun = existing.run_id !== runId;
239
+ const nextPhase = isNewRun ? stage : advanceCurrentPhase(existing.current_phase, stage);
240
+ if (
241
+ existing.run_id === runId &&
242
+ existing.version === WORKFLOW_STATE_VERSION &&
243
+ existing.current_phase === nextPhase &&
244
+ (existing.active === true || PHASE_LOCK.has(nextPhase))
245
+ ) {
246
+ return;
247
+ }
201
248
  existing.run_id = runId;
202
249
  if (typeof existing.skill !== "string") existing.skill = "ralplan";
203
- if (typeof existing.active !== "boolean") existing.active = true;
204
- if (typeof existing.current_phase !== "string") existing.current_phase = "planner";
250
+ // A successful persist means ralplan is actively writing this run's artifacts, so always
251
+ // re-assert active. Fallback-only init left active:false after a clear (#644, sibling of #638).
252
+ existing.active = true;
253
+ existing.current_phase = nextPhase;
205
254
  existing = migrateWorkflowState(existing, "ralplan").state;
206
255
  existing.updated_at = new Date().toISOString();
207
256
  await writeWorkflowEnvelopeAtomic(statePath, existing, {
@@ -381,8 +430,6 @@ async function resolveArtifactArgs(args: readonly string[], cwd: string): Promis
381
430
  const explicitRunId = flagValue(args, "--run-id")?.trim();
382
431
  const runId = explicitRunId || (await readActiveRunId(cwd, sessionId)) || sessionIdRaw || defaultRunId();
383
432
  assertSafePathComponent(runId, "run-id");
384
- // Persist the active run id so later writes in the same loop land in the same directory.
385
- await persistActiveRunId(cwd, sessionId, runId);
386
433
 
387
434
  const artifact = await resolveArtifactContent(rawArtifact, cwd);
388
435
  return { stage: stage as RalplanStage, stageN, runId, artifact, sessionId, json: hasFlag(args, "--json") };
@@ -398,18 +445,34 @@ interface PersistedArtifact {
398
445
  pendingApprovalPath?: string;
399
446
  }
400
447
 
401
- async function persistArtifact(resolved: ResolvedArtifactArgs, cwd: string): Promise<PersistedArtifact> {
448
+ /**
449
+ * Content-addressed identity for an `index.jsonl` row: a repeated `--write` of the
450
+ * same `(stage, stage_n)` at identical content (same sha256) is the #638 duplicate
451
+ * the append must collapse. Rows missing these fields opt out of dedup.
452
+ */
453
+ function ralplanIndexKey(entry: unknown): string | undefined {
454
+ if (!entry || typeof entry !== "object" || Array.isArray(entry)) return undefined;
455
+ const record = entry as Record<string, unknown>;
456
+ const { stage, stage_n, sha256 } = record;
457
+ if (typeof stage !== "string" || typeof stage_n !== "number" || typeof sha256 !== "string") return undefined;
458
+ return `${stage}\u0000${stage_n}\u0000${sha256}`;
459
+ }
460
+
461
+ async function persistArtifact(
462
+ resolved: ResolvedArtifactArgs,
463
+ cwd: string,
464
+ content: string,
465
+ sha256: string,
466
+ ): Promise<PersistedArtifact> {
402
467
  const runDir = path.join(cwd, ".gjc", "plans", "ralplan", resolved.runId);
403
468
 
404
469
  const fileName = `stage-${pad2(resolved.stageN)}-${resolved.stage}.md`;
405
470
  const filePath = path.join(runDir, fileName);
406
- const content = resolved.artifact.endsWith("\n") ? resolved.artifact : `${resolved.artifact}\n`;
407
471
  await writeArtifact(filePath, content, {
408
472
  cwd,
409
473
  audit: { category: "artifact", verb: "write", owner: "gjc-runtime", skill: "ralplan" },
410
474
  });
411
475
 
412
- const sha256 = createHash("sha256").update(content).digest("hex");
413
476
  const createdAt = new Date().toISOString();
414
477
  const indexEntry = {
415
478
  stage: resolved.stage,
@@ -418,9 +481,10 @@ async function persistArtifact(resolved: ResolvedArtifactArgs, cwd: string): Pro
418
481
  created_at: createdAt,
419
482
  sha256,
420
483
  };
421
- await appendJsonl(path.join(runDir, "index.jsonl"), indexEntry, {
484
+ await appendJsonlIdempotent(path.join(runDir, "index.jsonl"), indexEntry, {
422
485
  cwd,
423
486
  audit: { category: "ledger", verb: "append", owner: "gjc-runtime", skill: "ralplan" },
487
+ key: ralplanIndexKey,
424
488
  });
425
489
 
426
490
  let pendingApprovalPath: string | undefined;
@@ -443,6 +507,56 @@ async function persistArtifact(resolved: ResolvedArtifactArgs, cwd: string): Pro
443
507
  };
444
508
  }
445
509
 
510
+ /** The persisted `(stage, stage_n)` artifact recorded in a run's `index.jsonl`. */
511
+ interface ExistingStageArtifact {
512
+ path: string;
513
+ sha256: string;
514
+ createdAt: string;
515
+ }
516
+
517
+ /**
518
+ * Find the most recent `index.jsonl` row for a `(stage, stage_n)` pair so a
519
+ * repeated `--write` can dedupe instead of silently clobbering the artifact and
520
+ * appending a duplicate ledger row. Best-effort: a missing or unreadable index
521
+ * yields `undefined`, treated as "no prior artifact". The ledger is the source of
522
+ * truth for dedup because it is exactly what a duplicate write would corrupt.
523
+ */
524
+ async function findExistingStageArtifact(
525
+ cwd: string,
526
+ runId: string,
527
+ stage: RalplanStage,
528
+ stageN: number,
529
+ ): Promise<ExistingStageArtifact | undefined> {
530
+ const indexPath = path.join(cwd, ".gjc", "plans", "ralplan", runId, "index.jsonl");
531
+ let text: string;
532
+ try {
533
+ text = await fs.readFile(indexPath, "utf8");
534
+ } catch {
535
+ return undefined;
536
+ }
537
+ let match: ExistingStageArtifact | undefined;
538
+ for (const line of text.split(/\r?\n/)) {
539
+ const trimmed = line.trim();
540
+ if (!trimmed) continue;
541
+ let row: unknown;
542
+ try {
543
+ row = JSON.parse(trimmed);
544
+ } catch {
545
+ continue;
546
+ }
547
+ if (!row || typeof row !== "object" || Array.isArray(row)) continue;
548
+ const record = row as Record<string, unknown>;
549
+ if (record.stage !== stage || record.stage_n !== stageN) continue;
550
+ if (typeof record.path !== "string" || typeof record.sha256 !== "string") continue;
551
+ match = {
552
+ path: record.path,
553
+ sha256: record.sha256,
554
+ createdAt: typeof record.created_at === "string" ? record.created_at : "",
555
+ };
556
+ }
557
+ return match;
558
+ }
559
+
446
560
  /**
447
561
  * Read and parse the run's `index.jsonl` rows. Best-effort: returns [] when the
448
562
  * file is absent or unreadable so HUD sync never fails on a missing index.
@@ -518,7 +632,26 @@ async function buildRalplanHud(options: {
518
632
  async function handleArtifactWrite(args: readonly string[], cwd: string): Promise<RalplanCommandResult> {
519
633
  const plannerState = parsePlannerStateArgs(args);
520
634
  const resolved = await resolveArtifactArgs(args, cwd);
521
- const persisted = await persistArtifact(resolved, cwd);
635
+ const content = resolved.artifact.endsWith("\n") ? resolved.artifact : `${resolved.artifact}\n`;
636
+ const sha256 = createHash("sha256").update(content).digest("hex");
637
+
638
+ // Duplicate-write guard: a second `--write` for the same (stage, stage_n) must not
639
+ // silently clobber the artifact or append a duplicate ledger row. Classify before any
640
+ // state mutation so a conflict never regresses run-state phase.
641
+ const existingArtifact = await findExistingStageArtifact(cwd, resolved.runId, resolved.stage, resolved.stageN);
642
+ if (existingArtifact) {
643
+ if (existingArtifact.sha256 !== sha256) {
644
+ throw new RalplanCommandError(
645
+ 2,
646
+ `refusing to overwrite ralplan ${resolved.stage} stage ${resolved.stageN} at ${existingArtifact.path}: an artifact with different content already exists (existing sha256=${existingArtifact.sha256}, new sha256=${sha256}). Use a new --stage_n to record another pass.`,
647
+ );
648
+ }
649
+ return buildDeduplicatedResult(resolved, existingArtifact, sha256, cwd);
650
+ }
651
+
652
+ // Keep run-state `current_phase` coherent with the stage being persisted.
653
+ await persistActiveRunId(cwd, resolved.sessionId, resolved.runId, resolved.stage);
654
+ const persisted = await persistArtifact(resolved, cwd, content, sha256);
522
655
  if (plannerState) {
523
656
  await applyPlannerStateUpdate(cwd, resolved.sessionId, plannerState);
524
657
  }
@@ -547,6 +680,35 @@ async function handleArtifactWrite(args: readonly string[], cwd: string): Promis
547
680
  return { status: 0, stdout };
548
681
  }
549
682
 
683
+ /**
684
+ * Deterministic no-op receipt for an identical repeated `--write`: report the
685
+ * already-persisted artifact without rewriting the file, appending a ledger row, or
686
+ * churning run-state. `deduplicated: true` lets callers distinguish it from a fresh write.
687
+ */
688
+ function buildDeduplicatedResult(
689
+ resolved: ResolvedArtifactArgs,
690
+ existing: ExistingStageArtifact,
691
+ sha256: string,
692
+ cwd: string,
693
+ ): RalplanCommandResult {
694
+ const payload: Record<string, unknown> = {
695
+ run_id: resolved.runId,
696
+ path: existing.path,
697
+ stage: resolved.stage,
698
+ stage_n: resolved.stageN,
699
+ sha256,
700
+ created_at: existing.createdAt,
701
+ deduplicated: true,
702
+ };
703
+ if (resolved.stage === "final") {
704
+ payload.pending_approval_path = path.join(cwd, ".gjc", "plans", "ralplan", resolved.runId, "pending-approval.md");
705
+ }
706
+ const stdout = resolved.json
707
+ ? `${JSON.stringify(payload, null, 2)}\n`
708
+ : `ralplan ${resolved.stage} stage ${resolved.stageN} already persisted at ${existing.path} (identical content; no changes written).\n`;
709
+ return { status: 0, stdout };
710
+ }
711
+
550
712
  /* -------------------------------- handoff -------------------------------- */
551
713
 
552
714
  interface ConsensusHandoffArgs {
@@ -52,6 +52,7 @@ import {
52
52
  type StateWriterAuditContext,
53
53
  softDelete,
54
54
  updateWorkflowTransactionJournal,
55
+ type WorkflowEnvelopeIntegrityMismatch,
55
56
  writeWorkflowEnvelopeAtomic,
56
57
  } from "./state-writer";
57
58
  import { getSkillManifest, isKnownWorkflowState, isValidTransition, typedArgsFor } from "./workflow-manifest";
@@ -659,7 +660,7 @@ async function warnAndAuditOutOfBandIfNeeded(
659
660
  skill: CanonicalGjcWorkflowSkill,
660
661
  options?: { mutationId?: string; forced?: boolean },
661
662
  ): Promise<string | undefined> {
662
- let mismatch: Awaited<ReturnType<typeof detectWorkflowEnvelopeIntegrityMismatch>>;
663
+ let mismatch: WorkflowEnvelopeIntegrityMismatch | undefined;
663
664
  try {
664
665
  mismatch = await detectWorkflowEnvelopeIntegrityMismatch(filePath);
665
666
  } catch {