npm - @gajae-code/coding-agent - Versions diffs - 0.5.0 → 0.5.2 - Mend

@gajae-code/coding-agent 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

package/CHANGELOG.md +36 -0
package/README.md +1 -1
package/dist/types/async/job-manager.d.ts +26 -0
package/dist/types/cli/args.d.ts +1 -0
package/dist/types/cli/list-models.d.ts +6 -0
package/dist/types/cli/setup-cli.d.ts +8 -1
package/dist/types/commands/gc.d.ts +26 -0
package/dist/types/commands/setup.d.ts +7 -0
package/dist/types/config/file-lock-gc.d.ts +5 -0
package/dist/types/config/file-lock.d.ts +29 -0
package/dist/types/config/model-registry.d.ts +4 -0
package/dist/types/config/models-config-schema.d.ts +5 -0
package/dist/types/config/settings-schema.d.ts +62 -0
package/dist/types/coordinator/contract.d.ts +1 -1
package/dist/types/defaults/gjc/extensions/grok-build/index.d.ts +1 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/index.d.ts +1 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/models/catalog.d.ts +25 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/payload/sanitize.d.ts +27 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/provider/billing.d.ts +8 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/provider/register.d.ts +5 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/provider/stream.d.ts +10 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/provider/usage.d.ts +2 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/shared/base-url.d.ts +2 -0
package/dist/types/defaults/gjc/extensions/grok-cli-vendor/src/shared/errors.d.ts +38 -0
package/dist/types/defaults/gjc-grok-cli.d.ts +5 -0
package/dist/types/extensibility/extensions/index.d.ts +1 -0
package/dist/types/extensibility/extensions/prefix-command-bridge.d.ts +35 -0
package/dist/types/gjc-runtime/deep-interview-recorder.d.ts +103 -0
package/dist/types/gjc-runtime/deep-interview-runtime.d.ts +2 -0
package/dist/types/gjc-runtime/deep-interview-state.d.ts +112 -0
package/dist/types/gjc-runtime/gc-render.d.ts +6 -0
package/dist/types/gjc-runtime/gc-runtime.d.ts +134 -0
package/dist/types/gjc-runtime/ledger-event-renderer.d.ts +68 -0
package/dist/types/gjc-runtime/state-writer.d.ts +64 -2
package/dist/types/gjc-runtime/team-gc.d.ts +7 -0
package/dist/types/gjc-runtime/team-runtime.d.ts +5 -0
package/dist/types/gjc-runtime/tmux-common.d.ts +11 -0
package/dist/types/gjc-runtime/tmux-gc.d.ts +7 -0
package/dist/types/gjc-runtime/tmux-sessions.d.ts +13 -0
package/dist/types/gjc-runtime/ultragoal-guard.d.ts +10 -0
package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +29 -0
package/dist/types/harness-control-plane/gc-adapter.d.ts +3 -0
package/dist/types/harness-control-plane/owner.d.ts +7 -0
package/dist/types/harness-control-plane/storage.d.ts +20 -0
package/dist/types/modes/components/hook-selector.d.ts +7 -1
package/dist/types/modes/components/provider-onboarding-selector.d.ts +1 -1
package/dist/types/modes/controllers/command-controller.d.ts +1 -0
package/dist/types/modes/interactive-mode.d.ts +1 -1
package/dist/types/modes/rpc/rpc-mode.d.ts +72 -2
package/dist/types/modes/shared/agent-wire/deep-interview-gate.d.ts +13 -0
package/dist/types/modes/shared/agent-wire/session-registry.d.ts +25 -0
package/dist/types/modes/shared/agent-wire/unattended-action-policy.d.ts +2 -0
package/dist/types/modes/shared/agent-wire/unattended-session.d.ts +10 -0
package/dist/types/modes/theme/defaults/index.d.ts +302 -0
package/dist/types/modes/theme/theme.d.ts +1 -0
package/dist/types/modes/types.d.ts +1 -1
package/dist/types/session/agent-session.d.ts +1 -1
package/dist/types/session/blob-store.d.ts +39 -3
package/dist/types/session/history-storage.d.ts +2 -2
package/dist/types/session/session-manager.d.ts +10 -1
package/dist/types/setup/credential-import.d.ts +79 -0
package/dist/types/skill-state/workflow-hud.d.ts +14 -0
package/dist/types/task/executor.d.ts +1 -0
package/dist/types/task/render.d.ts +1 -1
package/dist/types/tools/ask.d.ts +15 -1
package/dist/types/tools/subagent-render.d.ts +7 -1
package/dist/types/tools/subagent.d.ts +27 -0
package/dist/types/tools/ultragoal-ask-guard.d.ts +5 -0
package/dist/types/web/search/index.d.ts +4 -4
package/dist/types/web/search/provider.d.ts +16 -20
package/dist/types/web/search/providers/base.d.ts +2 -1
package/dist/types/web/search/providers/openai-compatible.d.ts +9 -0
package/dist/types/web/search/types.d.ts +14 -2
package/package.json +7 -7
package/scripts/build-binary.ts +7 -0
package/src/async/job-manager.ts +52 -0
package/src/cli/args.ts +5 -0
package/src/cli/auth-broker-cli.ts +1 -0
package/src/cli/fast-help.ts +2 -0
package/src/cli/list-models.ts +13 -1
package/src/cli/setup-cli.ts +138 -3
package/src/cli.ts +1 -0
package/src/commands/gc.ts +22 -0
package/src/commands/harness.ts +7 -3
package/src/commands/setup.ts +5 -1
package/src/commands/ultragoal.ts +3 -1
package/src/config/file-lock-gc.ts +193 -0
package/src/config/file-lock.ts +66 -10
package/src/config/model-profile-activation.ts +15 -3
package/src/config/model-profiles.ts +39 -30
package/src/config/model-registry.ts +21 -1
package/src/config/models-config-schema.ts +1 -0
package/src/config/settings-schema.ts +62 -0
package/src/coordinator/contract.ts +1 -0
package/src/coordinator-mcp/server.ts +459 -3
package/src/defaults/gjc/agent.models.grok-cli.yml +36 -0
package/src/defaults/gjc/extensions/grok-build/index.ts +1 -0
package/src/defaults/gjc/extensions/grok-build/package.json +7 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/biome.json +39 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/package.json +8 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/index.ts +1 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/models/catalog.ts +155 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/payload/sanitize.ts +361 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/provider/billing.ts +57 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/provider/register.ts +99 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/provider/stream.ts +50 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/provider/usage.ts +56 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/shared/base-url.ts +36 -0
package/src/defaults/gjc/extensions/grok-cli-vendor/src/shared/errors.ts +44 -0
package/src/defaults/gjc/skills/deep-interview/SKILL.md +131 -113
package/src/defaults/gjc/skills/deep-interview/lateral-review-panel.md +49 -0
package/src/defaults/gjc/skills/ultragoal/SKILL.md +30 -8
package/src/defaults/gjc-defaults.ts +7 -0
package/src/defaults/gjc-grok-cli.ts +22 -0
package/src/extensibility/extensions/index.ts +1 -0
package/src/extensibility/extensions/prefix-command-bridge.ts +128 -0
package/src/gjc-runtime/deep-interview-recorder.ts +457 -0
package/src/gjc-runtime/deep-interview-runtime.ts +18 -26
package/src/gjc-runtime/deep-interview-state.ts +324 -0
package/src/gjc-runtime/gc-render.ts +70 -0
package/src/gjc-runtime/gc-runtime.ts +403 -0
package/src/gjc-runtime/launch-tmux.ts +3 -4
package/src/gjc-runtime/ledger-event-renderer.ts +164 -0
package/src/gjc-runtime/ralplan-runtime.ts +232 -19
package/src/gjc-runtime/state-renderer.ts +12 -3
package/src/gjc-runtime/state-runtime.ts +48 -30
package/src/gjc-runtime/state-writer.ts +254 -7
package/src/gjc-runtime/team-gc.ts +49 -0
package/src/gjc-runtime/team-runtime.ts +179 -2
package/src/gjc-runtime/tmux-common.ts +14 -0
package/src/gjc-runtime/tmux-gc.ts +177 -0
package/src/gjc-runtime/tmux-sessions.ts +49 -1
package/src/gjc-runtime/ultragoal-guard.ts +155 -0
package/src/gjc-runtime/ultragoal-runtime.ts +1239 -31
package/src/gjc-runtime/workflow-manifest.generated.json +44 -0
package/src/gjc-runtime/workflow-manifest.ts +12 -0
package/src/harness-control-plane/gc-adapter.ts +184 -0
package/src/harness-control-plane/owner.ts +14 -2
package/src/harness-control-plane/rpc-adapter.ts +1 -1
package/src/harness-control-plane/storage.ts +70 -0
package/src/hooks/skill-state.ts +121 -2
package/src/internal-urls/docs-index.generated.ts +22 -12
package/src/lsp/defaults.json +1 -0
package/src/main.ts +18 -3
package/src/modes/acp/acp-agent.ts +4 -2
package/src/modes/bridge/bridge-mode.ts +2 -1
package/src/modes/components/history-search.ts +5 -2
package/src/modes/components/hook-selector.ts +19 -0
package/src/modes/components/model-selector.ts +51 -8
package/src/modes/components/provider-onboarding-selector.ts +6 -1
package/src/modes/components/status-line/segments.ts +1 -1
package/src/modes/controllers/command-controller.ts +25 -6
package/src/modes/controllers/extension-ui-controller.ts +3 -0
package/src/modes/controllers/selector-controller.ts +81 -1
package/src/modes/interactive-mode.ts +11 -1
package/src/modes/rpc/rpc-mode.ts +266 -34
package/src/modes/shared/agent-wire/command-dispatch.ts +281 -261
package/src/modes/shared/agent-wire/deep-interview-gate.ts +30 -1
package/src/modes/shared/agent-wire/host-tool-bridge.ts +3 -0
package/src/modes/shared/agent-wire/session-registry.ts +109 -0
package/src/modes/shared/agent-wire/unattended-action-policy.ts +24 -0
package/src/modes/shared/agent-wire/unattended-run-controller.ts +23 -3
package/src/modes/shared/agent-wire/unattended-session.ts +32 -2
package/src/modes/theme/defaults/claude-code.json +100 -0
package/src/modes/theme/defaults/codex.json +100 -0
package/src/modes/theme/defaults/index.ts +6 -0
package/src/modes/theme/defaults/opencode.json +102 -0
package/src/modes/theme/theme.ts +2 -2
package/src/modes/types.ts +1 -1
package/src/prompts/agents/executor.md +5 -2
package/src/sdk.ts +29 -4
package/src/session/agent-session.ts +99 -19
package/src/session/blob-store.ts +59 -3
package/src/session/history-storage.ts +32 -11
package/src/session/session-manager.ts +72 -20
package/src/setup/credential-import.ts +429 -0
package/src/setup/hermes/templates/operator-instructions.v1.md +7 -1
package/src/skill-state/deep-interview-mutation-guard.ts +2 -1
package/src/skill-state/workflow-hud.ts +106 -10
package/src/slash-commands/builtin-registry.ts +3 -2
package/src/task/executor.ts +16 -1
package/src/task/render.ts +18 -7
package/src/tools/ask.ts +59 -2
package/src/tools/cron.ts +1 -1
package/src/tools/job.ts +3 -2
package/src/tools/monitor.ts +36 -1
package/src/tools/subagent-render.ts +128 -29
package/src/tools/subagent.ts +173 -9
package/src/tools/ultragoal-ask-guard.ts +39 -0
package/src/web/search/index.ts +25 -25
package/src/web/search/provider.ts +178 -87
package/src/web/search/providers/base.ts +2 -1
package/src/web/search/providers/openai-compatible.ts +151 -0
package/src/web/search/types.ts +47 -22

package/src/defaults/gjc/skills/deep-interview/SKILL.md CHANGED Viewed

@@ -52,16 +52,19 @@ Inspired by the [Ouroboros project](https://github.com/Q00/ouroboros) which demo
 - Do not proceed to execution until ambiguity ≤ the resolved threshold for this run and the user explicitly approves a scoped execution path
 - Allow early exit with a clear warning if ambiguity is still high
 - Persist interview state for resume across session interruptions
-- Challenge agents activate at specific round thresholds to shift perspective
+- A multi-persona lateral-review panel convenes at ambiguity-milestone transitions (and before synthesizing any agent-supplied answer) to expose blind spots from independent perspectives
+- Refine free-text answers into a structured interpretation and confirm nothing is lost before scoring
+- After 3 consecutive agent-resolved answers (accepted auto-research candidates or auto-answers), route the next question to the user (dialectic rhythm guard)
+- Run an independent closure audit and a one-sentence goal restatement, each requiring explicit user confirmation, before crystallizing the spec
 </Execution_Policy>
 <Internal_Auto_Mode_Protocol>
-- `auto-research-greenfield.md` and `auto-answer-uncertain.md` are internal prompt fragments loaded on demand with bundle metadata `kind: "skill-fragment"`; they are not public skills, are never slash-command/discoverable, and must not be registered through any `skill://` route.
+- `auto-research-greenfield.md`, `auto-answer-uncertain.md`, and `lateral-review-panel.md` are internal prompt fragments loaded on demand with bundle metadata `kind: "skill-fragment"`; they are not public skills, are never slash-command/discoverable, and must not be registered through any `skill://` route.
 - Load fragments only for the specific hook that needs them, with forked inherited context kept read-only and prompt-budgeted; summarize active interview context before spawning the architect if the payload is large.
 - Auto-mode architects are read-only: no code edits, no `.gjc/` mutation, no workflow chaining, no formatters, and no execution delegation.
 - Validate every fragment response before using it: required sections must be present, candidates/answer must match the requested shape, rationale must cite available context, confidence must be explicit, and insufficient-context fallbacks must be honored.
 - If architect spawn, fragment loading, or response validation fails, continue the normal manual interview path silently and record an internal audit note in state by incrementing `architect_failures`; do not expose tool noise to the user unless it changes the next user-facing question.
-- Track `auto_researched_rounds`, `auto_answered_rounds`, and `architect_failures` in state and final spec metadata.
+- Track `auto_researched_rounds`, `auto_answered_rounds`, `lateral_reviews`, `auto_answer_streak`, `refined_rounds`, `architect_failures`, and `lateral_panel_failures` in state and final spec metadata.
 </Internal_Auto_Mode_Protocol>
@@ -132,6 +135,7 @@ Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThreshold
     "initial_idea": "<prompt-safe initial-context summary or user input>",
     "initial_context_summary": "<summary if oversized, else null>",
     "rounds": [],
+    "established_facts": [],
     "current_ambiguity": 1.0,
     "threshold": <resolvedThreshold>,
     "threshold_source": "<resolvedThresholdSource>",
@@ -144,10 +148,16 @@ Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThreshold
       "deferrals": [],
       "last_targeted_component_id": null
     },
-    "challenge_modes_used": [],
     "ontology_snapshots": [],
     "auto_researched_rounds": [],
     "auto_answered_rounds": [],
+    "lateral_reviews": [],
+    "lateral_panel_failures": 0,
+    "auto_answer_streak": 0,
+    "refined_rounds": [],
+    "closure_overrides": [],
+    "restated_goal": null,
+    "ambiguity_milestone": "initial",
     "architect_failures": 0
   }
 }
@@ -238,7 +248,7 @@ Build the question generation prompt with:
 - The prompt-safe initial-context summary (if one was created), otherwise the user's original idea
 - Prior Q&A rounds trimmed or summarized to fit the prompt budget while preserving decisions, constraints, unresolved gaps, and ontology changes
 - Current clarity scores per dimension (which is weakest?)
-- Challenge agent mode (if activated -- see Phase 3)
+- Lateral-review panel findings (if convened this round -- see Phase 3)
 - Brownfield codebase context (if applicable), summarized to cited paths/symbols/patterns instead of raw dumps
 - Locked topology from Round 0, including active components, deferred components, prior per-component scores, and `last_targeted_component_id`
@@ -252,7 +262,9 @@ If any prompt input is too large, summarize it first and then continue from the
 - Generate a question that specifically improves that component's weakest dimension
 - State, in one sentence before the question, why this component/dimension pair is now the bottleneck to reducing ambiguity
 - Questions should expose ASSUMPTIONS, not gather feature lists
+- **Facts vs decisions:** answer factual questions (current stack, versions, existing patterns, external API limits) from explore/research and present them as cited confirmations; route every *decision* (goals, scope, tradeoffs, desired behavior for new work) to the user. When unsure which a question is, treat it as a decision and ask.
 - If the scope is still conceptually fuzzy (entities keep shifting, the user is naming symptoms, or the core noun is unstable), switch to an ontology-style question that asks what the thing fundamentally IS before returning to feature/detail questions
+- **Dialectic rhythm guard:** increment `state.auto_answer_streak` when a round is resolved without direct user judgment (an accepted auto-research candidate or an auto-answer); reset it to 0 on any direct, refined, or cited-confirmation answer from the user. If the streak reaches 3, route the next question directly to the user even if it looks auto-answerable, then reset. The interview is with the human, not the codebase.
 **Question styles by dimension:**
 | Dimension | Question Style | Example |
@@ -281,18 +293,50 @@ Round {n} | Component: {target_component_name} | Targeting: {weakest_dimension}
 Options should include contextually relevant choices plus free-text, translated/localized according to `language.instruction` when present.
+When calling `ask`, SHOULD include optional structured metadata so the runtime can record the round without manual state writes: `deepInterview.round_id?`, `deepInterview.round`, `deepInterview.component`, `deepInterview.dimension`, and `deepInterview.ambiguity`. Keep this metadata aligned with the visible Round/Component/Targeting/Ambiguity line; if metadata cannot be supplied, the legacy formatted question text remains the fallback.
 ### Step 2b′: Auto-Answer Opted-Out Questions
 After the `ask` tool resolves and before ambiguity scoring, if the user opts out of answering the current question or explicitly asks the agent to decide, load `auto-answer-uncertain.md` as an internal `kind: "skill-fragment"` prompt for a fork-context architect. Pass the opted-out question, prompt-safe transcript summary, locked topology, current scores/gaps, and any auto-research candidates used for the round. The architect must return exactly one decisive answer with rationale, confidence, and explicit uncertainty. Validate the response shape before using it; if valid, record it as the tentative answer for scoring, append the round number to `auto_answered_rounds`, and mark the transcript answer as architect-assisted.
 Auto-answer has a clarity cap: unless the architect confidence is `high` and uncertainty is negligible, no dimension score improved solely by the auto-answer may exceed `0.85`. If the auto-answer would make ambiguity cross the resolved threshold, ask the user for threshold-crossing confirmation before Phase 4: present the tentative assumption and require explicit confirmation, revision, or continued questioning. On architect failure or invalid response, continue with the user's opt-out as an unresolved gap, increment `architect_failures`, and do not block the interview.
+### Step 2b″: Refine Free-Text Answers
+When the user's answer is free-text that carries reasoning, constraints, or scope decisions, do not forward it to scoring as a lossy one-line label. First structure it into a compact interpretation using the canonical sections — **Decision**, **Reasoning**, **Constraints (user-stated)**, **Out of scope (user-stated)**, and **Codebase context (verified)** (omit empty sections) — then confirm with exactly one `ask` that nothing is lost or misrepresented. Apply `language.instruction` when present.
+Offer options such as **Send as-is**, **Add a constraint**, **Mark something out of scope**, **Add context**, and **Rewrite**, plus free-text. If the user picks anything other than "Send as-is", collect the exact missing text with one follow-up `ask` (never infer it from the option label), fold it into the structured interpretation, and re-confirm. Do not advance to scoring while the user is still saying something is missing.
+Skip Refine for short answers with no attached reasoning (e.g. "Yes" / "No" / a single proper noun), for pre-built option picks where the structure is already explicit, for auto-confirmed code/brownfield facts, and for architect auto-answers (already structured by Step 2b′). A refined answer counts as direct user judgment: record the round in `refined_rounds` and reset `auto_answer_streak` to 0. Feed the confirmed structured interpretation — not the raw free text — into Step 2c scoring and established-facts maintenance.
 ### Step 2c: Score Ambiguity
 After receiving the user's answer, score clarity across all dimensions.
 If the round used an auto-answer, include the architect answer, rationale, confidence, and uncertainty in the scoring prompt. Apply the Step 2b′ clarity cap mechanically before calculating ambiguity, and treat any low-confidence or insufficient-context auto-answer as an unresolved gap rather than user-confirmed truth.
+Before scoring, compare the new answer against `state.established_facts`. Treat established facts as durable confirmed decisions with source-round evidence; do not score an answer in isolation from facts that the interview has already stabilized.
+Ambiguity is BIDIRECTIONAL and NON-MONOTONIC. A later answer can increase ambiguity when it invalidates, weakens, or expands prior understanding; convergence is not assumed to be a one-way decrease.
+Ambiguity-raising triggers:
+- **A direct contradiction**: the answer contradicts an established fact.
+- **B internal inconsistency**: two requirements that cannot co-hold are now present.
+- **C low-quality/evasive**: the answer avoids, hand-waves, or fails to resolve the targeted gap.
+- **D scope expansion**: the answer adds a component, entity, constraint, deliverable, or integration not already covered or explicitly deferred.
+Use **mechanism A** for every ambiguity rise: a trigger LOWERS the affected component/dimension clarity score, and the existing weighted formula raises ambiguity. There is **no separate penalty term**; ambiguity remains bounded by the same greenfield/brownfield formula.
+The rise is SILENT: no modal, no forced-resolution step, and no dedicated conflict UI. Surface it through the normal per-round report and by targeting the next question at the affected component/dimension.
+Structured scorer output is required. Include `triggers`, `trigger_status`, `affected_component`, `affected_dimension`, `prior_dimension_score`, `new_dimension_score`, `prior_ambiguity`, `new_ambiguity`, `evidence`, `contradicted_established_fact` when relevant, and `disputed_unresolved_rationale` when applicable.
+Established-facts maintenance: promote stable confirmed decisions into `state.established_facts` with source/evidence; when a new answer contradicts an established fact, mark the fact disputed and preserve the contradicted fact instead of deleting it.
+TRANSITION VALIDATION: if a trigger is present, the affected dimension must not improve and overall ambiguity must rise vs the prior scored round, unless the trigger is explicitly marked disputed or unresolved with rationale.
+Convergence Pacing deferral: do not add a min-round floor, score-drop cap, confidence dampening, or other explicit pacing brake. Bidirectional scoring is the pacing mechanism.
 **Scoring prompt** (use opus model, temperature 0.1 for consistency):
 ```
@@ -306,6 +350,9 @@ Transcript or prompt-safe transcript summary:
 Locked topology:
 {state.topology.components and state.topology.deferrals}
+Established facts:
+{state.established_facts}
 Score each active component on each dimension, then provide the overall dimension scores as the minimum or coverage-weighted weakest score across active components. Deferred components are excluded from ambiguity math but must remain listed in topology and the final spec.
 Score each dimension:
@@ -324,6 +371,7 @@ Also identify:
 - weakest_dimension: the single lowest-confidence dimension for that component this round
 - weakest_dimension_rationale: one sentence explaining why this component/dimension pair is the highest-leverage target for the next question
 - component_scores: object keyed by component id, with per-dimension scores and gaps
+- structured_scorer_output: object containing triggers, trigger_status, affected_component, affected_dimension, prior_dimension_score, new_dimension_score, prior_ambiguity, new_ambiguity, evidence, contradicted_established_fact when relevant, and disputed_unresolved_rationale when applicable
 5. Ontology Extraction: Identify all key entities (nouns) discussed in the transcript.
@@ -373,11 +421,12 @@ Round {n} complete.
 | Constraints | {s} | {w} | {s*w} | {gap or "Clear"} |
 | Success Criteria | {s} | {w} | {s*w} | {gap or "Clear"} |
 | Context (brownfield) | {s} | {w} | {s*w} | {gap or "Clear"} |
-| **Ambiguity** | | | **{score}%** | |
+| **Ambiguity** | | | **{prior_score}% -> {score}% {up|down|flat}** | {if up: trigger name such as "A direct contradiction"} |
 **Topology:** Targeted {target_component_name} | Active: {active_component_count} | Deferred: {deferred_component_count} | Next rotation after: {last_targeted_component_id}
 **Ontology:** {entity_count} entities | Stability: {stability_ratio} | New: {new} | Changed: {changed} | Stable: {stable}
+**Milestone:** {prior_milestone} → {current_milestone}{milestone_transition ? " — lateral panel convened" : ""}
 **Next target:** {target_component_name} / {weakest_dimension} — {weakest_dimension_rationale}
@@ -389,7 +438,8 @@ Apply `language.instruction` when present before showing this progress report so
 ### Step 2e: Update State
-Update interview state with the new round, global scores, per-component `topology.components[].clarity_scores`, `topology.components[].weakest_dimension`, ontology snapshot, `topology.last_targeted_component_id`, `auto_researched_rounds`, `auto_answered_rounds`, and `architect_failures` via `gjc state write`; never patch `.gjc/state` directly unless an explicit force override is active.
+Update state in two phases. The `ask` answer is first recorded by the runtime as an `answered` shell. Scoring then enriches the same round record to `scored` with global scores, per-component `topology.components[].clarity_scores`, `topology.components[].weakest_dimension`, trigger metadata, established-facts changes, ontology snapshot, `topology.last_targeted_component_id`, `auto_researched_rounds`, `auto_answered_rounds`, and `architect_failures`. When `deepInterview` ask metadata is present, no manual per-round `gjc state write` is required for the answer shell; only scoring enrichment/state maintenance remains. When metadata is absent, use the legacy `gjc state write` path to persist the new round and never patch `.gjc/state` directly unless an explicit force override is active.
+Also recompute and persist `ambiguity_milestone` each round (detect band transitions for the Phase 3 panel), and persist `auto_answer_streak`, `refined_rounds`, `lateral_reviews`, and `lateral_panel_failures` alongside the existing fields.
 ### Step 2f: Check Soft Limits
@@ -397,28 +447,43 @@ Update interview state with the new round, global scores, per-component `topolog
 - **Round 10**: Show soft warning: "We're at 10 rounds. Current ambiguity: {score}%. Continue or proceed with current clarity?"
 - **Round 20**: Hard cap: "Maximum interview rounds reached. Proceeding with current clarity level ({score}%)."
-## Phase 3: Challenge Agents
+## Phase 3: Lateral Review Panel (milestone-triggered)
+The interview convenes a short multi-persona panel at **ambiguity-milestone transitions** instead of at fixed round numbers. Define milestone bands from the round's ambiguity score:
+| Band | Ambiguity |
+|------|-----------|
+| `initial` | > 0.60 |
+| `progress` | 0.60 ≥ a > 0.30 |
+| `refined` | 0.30 ≥ a > threshold |
+| `ready` | ≤ threshold |
-At specific round thresholds, shift the questioning perspective:
+A transition occurs whenever the band changes versus the prior scored round — in either direction, since bidirectional scoring can move the band back up. On a transition, and also before synthesizing any agent-supplied answer (auto-research candidates, an auto-answer, or a code/brownfield auto-confirm that carries real interpretation), convene the panel before generating or asking the next question.
-### Round 4+: Contrarian Mode
-Inject into the question generation prompt:
-> You are now in CONTRARIAN mode. Your next question should challenge the user's core assumption. Ask "What if the opposite were true?" or "What if this constraint doesn't actually exist?" The goal is to test whether the user's framing is correct or just habitual.
+**Personas (run in parallel, independent context):** dispatch `researcher`, `contrarian`, and `simplifier` as parallel fork-context subagents through the `lateral-review-panel.md` fragment, each with its own copy of the prompt-safe context so no persona anchors on another's framing. Add the `architect` persona when the round changed system shape — scope expansion, a new component or integration (trigger D), or any change to ownership or architecture. Each persona is a read-only architect: no edits, no `.gjc/` mutation, no execution.
-### Round 6+: Simplifier Mode
-Inject into the question generation prompt:
-> You are now in SIMPLIFIER mode. Your next question should probe whether complexity can be removed. Ask "What's the simplest version that would still be valuable?" or "Which of these constraints are actually necessary vs. assumed?" The goal is to find the minimal viable specification.
+**Folding findings:** validate each persona response, then fold only concrete, user-safe findings into the next single user-facing question — as 2-3 ranked answer options or one recommended draft. The panel never adds a second question, never mutates requirements on its own, and never marks the interview complete. The one-question-per-round rule stays intact.
-### Round 8+: Ontologist Mode (if ambiguity still > 0.3)
-Inject into the question generation prompt:
-> You are now in ONTOLOGIST mode. The ambiguity is still high after 8 rounds, suggesting we may be addressing symptoms rather than the core problem. The tracked entities so far are: {current_entities_summary from latest ontology snapshot}. Ask "What IS this, really?" or "Looking at these entities, which one is the CORE concept and which are just supporting?" The goal is to find the essence by examining the ontology.
+**Persona lenses:**
+- `researcher` — surfaces external facts, prior art, and unknowns the interview depends on.
+- `contrarian` — challenges the core assumption: "What if the opposite were true? Is this constraint real or habitual?"
+- `simplifier` — probes whether complexity can be removed: "What is the simplest version that is still valuable?"
+- `architect` — checks system shape, ownership, and integration impact when scope changed.
-Challenge modes are used ONCE each, then return to normal Socratic questioning. Track which modes have been used in state.
+**Ontology escalation:** if ambiguity stalls (same score ±0.05 for 3 rounds) or stays > 0.30 after 8 rounds, instruct the panel (especially `contrarian` + `architect`) to ask "What IS this, really?" — identify the core entity versus supporting views from the latest ontology snapshot before returning to feature questions.
+**Bookkeeping:** record each convened panel in `state.lateral_reviews` (round, milestone transition or pre-answer trigger, personas dispatched, findings folded). On panel spawn or validation failure, fall back silently to the normal generated question and increment `lateral_panel_failures`; do not expose tool noise unless it changes the next user-facing question. The panel is a prompt-budgeted assist layer — summarize oversized context before dispatch.
 ## Phase 4: Crystallize Spec
 When ambiguity ≤ threshold (or hard cap / early exit):
+**Before generating the spec, two gates must pass, in order:**
+**4a. Closure / Acceptance Guard.** Even when ambiguity ≤ threshold, do not treat the math as completion. Run an independent readiness audit from the full main-session perspective (including explore findings, established facts, and triggers the scorer may not have fully weighed). Confirm every active topology component has goal/constraint/criteria coverage, no unresolved or disputed trigger remains on a path that matters, and no low-confidence auto-answer is standing in for user-confirmed truth above the clarity cap. If a material gap exists, explicitly override the gate to the user — "The math says ready, but I am not accepting it yet because {gap}" — and ask the single highest-impact follow-up, returning to Phase 2. Record any override in `state.closure_overrides`.
+**4b. Restate gate.** Once closure passes, collapse the agreed answers into ONE sentence goal that covers every active component, and confirm it with a single `ask`: "If someone read only this line, would they reach the same outcome you have in mind?" Offer **Yes, crystallize**, **Adjust wording**, and **Missing scope**, plus free-text, applying `language.instruction` when present. On "Adjust wording" / "Missing scope", collect the exact correction with one follow-up `ask`, route it back through Step 2c scoring and established-facts maintenance (a correction can change ambiguity), then re-run closure and ask the Restate gate again. Cap at two loops; if alignment is not reached, return to Phase 2 with a targeted question instead of forcing a goal line. Persist the confirmed line as `state.restated_goal`.
 1. **Generate the specification** using opus model with the prompt-safe transcript. If the full interview transcript or initial context is too large, include the summary plus all concrete decisions, acceptance criteria, unresolved gaps, and ontology snapshots; never overflow the prompt with raw oversized context.
    - Apply `language.instruction` when present so user-facing prose in the spec preserves the session language; keep code identifiers, file paths, commands, JSON/settings keys, and quoted source text unchanged.
 2. **Write the final spec through the workflow CLI**: persist the artifact at `.gjc/specs/deep-interview-{slug}.md`
@@ -445,6 +510,11 @@ Spec structure:
 - Auto-Researched Rounds: {auto_researched_rounds}
 - Auto-Answered Rounds: {auto_answered_rounds}
 - Architect Failures: {architect_failures}
+- Lateral Reviews: {lateral_reviews count with milestones}
+- Lateral Panel Failures: {lateral_panel_failures}
+- Refined Rounds: {refined_rounds}
+- Closure Overrides: {closure_overrides count, or none}
+- Restated Goal: {restated_goal}
 ## Clarity Breakdown
 | Dimension | Score | Weight | Weighted |
@@ -463,6 +533,15 @@ Spec structure:
 |-----------|--------|-------------|--------------------------|
 | {component.name} | {active|deferred} | {component.description} | {covered acceptance criteria or deferral reason} |
+## Established Facts
+{List stable confirmed decisions promoted into `state.established_facts`, including source round, evidence, and disputed status when any fact was contradicted.}
+## Trigger Metadata
+{Summarize per-round trigger metadata: trigger label/status, affected component/dimension, prior -> new ambiguity direction, evidence, contradicted established fact when relevant, and disputed/unresolved rationale when applicable.}
+## Lateral Review Panel
+{Summarize convened panels: round, milestone transition or pre-answer trigger, personas dispatched, and the concrete findings folded into questions. Note any lateral_panel_failures.}
 ## Goal
 {crystal-clear goal statement derived from interview, covering every active topology component}
@@ -481,6 +560,9 @@ Spec structure:
 - [ ] {testable criterion 3}
 - ...
+## Deferrals
+{List user-confirmed topology deferrals and scoring/pacing deferrals, including Convergence Pacing when applicable: no min-round floor, score-drop cap, or dampening; bidirectional scoring is the pacing mechanism.}
 ## Assumptions Exposed & Resolved
 | Assumption | Challenge | Resolution |
 |------------|-----------|------------|
@@ -573,7 +655,7 @@ Stage 1: Deep Interview          Stage 2: ralplan consensus       Stage 3: Separ
 ┌─────────────────────┐    ┌───────────────────────────┐    ┌──────────────────────┐
 │ Socratic Q&A        │    │ Planner creates plan      │    │ User chooses if/how  │
 │ Ambiguity scoring   │───>│ Architect reviews         │───>│ execution proceeds   │
-│ Challenge agents    │    │ Critic validates          │    │ via ultragoal (default) │
+│ Lateral panel       │    │ Critic validates          │    │ via ultragoal (default) │
 │ Spec crystallization│    │ Loop until consensus      │    │ no auto-handoff      │
 │ Gate: ≤<resolvedThresholdPercent> ambiguity│    │ ADR + RALPLAN-DR summary  │    │                      │
 └─────────────────────┘    └───────────────────────────┘    └──────────────────────┘
@@ -601,8 +683,9 @@ Skipping any stage is possible but reduces quality assurance:
 - Use `gjc state write` / `gjc state read` for interview state persistence; the initial and subsequent deep-interview state payloads must include `threshold_source` alongside `threshold`; do not edit `.gjc/state` directly without force override.
 - Use the GJC workflow CLI to save the final spec at `.gjc/specs/deep-interview-{slug}.md` exactly; do not use `write`, `edit`, or `ast_edit` directly on `.gjc/` paths without force override.
 - Use public GJC workflow entrypoints to bridge to ralplan, ultragoal, or team only after explicit execution approval — never implement directly. Implementation handoff defaults to ultragoal; reserve team for when tmux-based interactive worker parallelization is genuinely required.
-- Challenge agent modes are prompt injections, not separate agent spawns
-- Use internal fragment auto-modes only at their documented hooks: `auto-research-greenfield.md` between Step 2a and 2b for greenfield `research: true` questions, and `auto-answer-uncertain.md` as Step 2b′ after `ask` resolves and before scoring.
+- The lateral-review panel spawns read-only persona subagents (Task tool) in parallel with independent context; it is an assist layer, never an executor and never the completion authority
+- Apply the Refine gate (Step 2b″), the Dialectic Rhythm Guard (Step 2a), and the Closure + Restate gates (Phase 4) through the `ask` tool, preserving `language.instruction` for each
+- Use internal fragment auto-modes only at their documented hooks: `auto-research-greenfield.md` between Step 2a and 2b for greenfield `research: true` questions, `auto-answer-uncertain.md` as Step 2b′ after `ask` resolves and before scoring, and `lateral-review-panel.md` for the Phase 3 panel personas at ambiguity-milestone transitions and before synthesizing agent-supplied answers.
 - Fragment auto-modes are loaded on demand as `kind: "skill-fragment"`; they are not public workflow skills, not slash-command/discoverable, and not `skill://` registrations.
 </Tool_Usage>
@@ -633,15 +716,15 @@ Why good: Explored first, cited the repo evidence that triggered the question, t
 </Good>
 <Good>
-Contrarian mode activation:
+Lateral panel — contrarian persona:
 ```
-Round 5 | Contrarian Mode | Ambiguity: 42%
+Round 5 | Targeting: Constraints | Lateral panel: progress→refined (contrarian) | Ambiguity: 42%
 You've said this needs to support 10,000 concurrent users. What if it only
 needed to handle 100? Would the architecture change fundamentally, or is
 the 10K number an assumption rather than a measured requirement?
 ```
-Why good: Challenges a specific assumption (scale requirement) that could dramatically simplify the solution.
+Why good: The lateral panel's contrarian persona challenges a specific assumption (scale requirement) that could dramatically simplify the solution.
 </Good>
 <Good>
@@ -659,26 +742,16 @@ Why good: Respects user's desire to stop but transparently shows the risk.
 </Good>
 <Good>
-Ontology convergence tracking:
-```
-Round 3 entities: User, Task, Project (stability: N/A → 67%)
-Round 4 entities: User, Task, Project, Tag (stability: 75% — 3 stable, 1 new)
-Round 5 entities: User, Task, Project, Tag (stability: 100% — all 4 stable)
-"Ontology has converged — the same 4 entities appeared in 2 consecutive rounds
-with no changes. The domain model is stable."
-```
-Why good: Shows entity tracking across rounds with visible convergence. Stability ratio increases as the domain model solidifies, giving mathematical evidence that the interview is converging on a stable understanding.
-</Good>
-<Good>
-Ontology-style question for scope-fuzzy tasks:
+Ontology stabilization — ask, then watch it converge:
 ```
 Round 6 | Targeting: Goal Clarity | Why now: the core entity is still unstable across rounds, so feature questions would compound ambiguity | Ambiguity: 38%
-"Across the last rounds you've described this as a workflow, an inbox, and a planner. Which one is the core thing this product IS, and which ones are supporting metaphors or views?"
+"Across the last rounds you've described this as a workflow, an inbox, and a planner. Which one is the core thing this product IS, and which are supporting views?"
+→ Round 7 entities: User, Task, Project (stability: 67%)
+→ Round 8 entities: User, Task, Project, Tag (stability: 100% — all 4 stable across 2 rounds)
 ```
-Why good: Uses ontology-style questioning to stabilize the core noun before drilling into features, which is the right move when the scope is fuzzy rather than merely incomplete.
+Why good: An ontology-style question stabilizes the core noun before drilling into features; the stability ratio then climbing to 100% across consecutive rounds is the mathematical signal that the domain model has converged.
 </Good>
 <Bad>
@@ -690,14 +763,6 @@ Also, what's the deployment target?"
 Why bad: Four questions at once — causes shallow answers and makes scoring inaccurate.
 </Bad>
-<Bad>
-Asking about codebase facts:
-```
-"What database does your project use?"
-```
-Why bad: Should have spawned explore agent to find this. Never ask the user what the code already tells you.
-</Bad>
 <Bad>
 Proceeding despite high ambiguity:
 ```
@@ -718,29 +783,18 @@ Why bad: 45% ambiguity means nearly half the requirements are unclear. The mathe
 </Escalation_And_Stop_Conditions>
 <Final_Checklist>
-- [ ] Phase 0 completed before Phase 1: settings files were read, threshold was resolved, and the first user-visible line was `Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThresholdSource>)`
-- [ ] State includes both `threshold` and `threshold_source`, and the final spec metadata records both values
-- [ ] Existing `language` state object was preserved, and `language.instruction` was applied to announcements, topology confirmation, option labels, interview questions, progress reports, and spec prose when present
-- [ ] Interview completed (ambiguity ≤ threshold OR user chose early exit)
-- [ ] Oversized initial context/history was summarized before scoring, question generation, spec generation, or execution handoff
-- [ ] Ambiguity score displayed after every round
-- [ ] Every round explicitly names the weakest dimension and why it is the next target
-- [ ] Challenge agents activated at correct thresholds (round 4, 6, 8)
-- [ ] Spec file persisted to `.gjc/specs/deep-interview-{slug}.md` exactly through the GJC workflow CLI; ephemeral artifacts/state used `gjc state write` or workflow CLI writes, with no direct `.gjc/` edits unless force override was explicitly active
-- [ ] Spec includes: topology, goal, constraints, acceptance criteria, clarity breakdown, transcript
-- [ ] Execution bridge presented via the `ask` tool
-- [ ] Selected execution mode invoked via public GJC workflow entrypoint only after explicit execution approval (never direct implementation)
-- [ ] If 3-stage pipeline selected: `/skill:ralplan` invoked with the spec as context, then stopped with the consensus plan marked `pending approval` until the user explicitly approves execution
-- [ ] State cleaned up after approved workflow handoff
-- [ ] Brownfield confirmation questions cite repo evidence (file/path/pattern) before asking the user to decide
-- [ ] Scope-fuzzy tasks can trigger ontology-style questioning to stabilize the core entity before feature elaboration
-- [ ] Round 0 topology gate completed before ambiguity scoring and persisted `topology.confirmed_at`
-- [ ] Per-round ambiguity report includes Topology target/coverage and Ontology row with entity count and stability ratio
-- [ ] Multi-component interviews rotate targeting across active components when N > 1
-- [ ] Spec includes Topology section with confirmed active components and user-confirmed deferrals
-- [ ] Spec includes Ontology (Key Entities) table and Ontology Convergence section
-- [ ] Internal auto-mode fragments, when used, were loaded only on demand as non-public `kind: "skill-fragment"` prompts; responses were validated, failures incremented `architect_failures`, and final metadata includes `auto_researched_rounds`, `auto_answered_rounds`, and `architect_failures`
-- [ ] Auto-answer threshold crossing, if any, received explicit user confirmation before spec crystallization
+- [ ] Phase 0 ran before anything: threshold resolved and first line emitted as `Deep Interview threshold: <resolvedThresholdPercent> (source: <resolvedThresholdSource>)`; state and spec metadata record both `threshold` and `threshold_source`
+- [ ] `language.instruction` preserved across announcements, questions, options, progress reports, and spec prose when present
+- [ ] Oversized initial context/history summarized before scoring, question generation, spec generation, or handoff
+- [ ] Round 0 topology gate completed before scoring; `topology.confirmed_at` persisted
+- [ ] Ambiguity scored and displayed every round, naming the weakest component/dimension target (rotating across active components when N > 1)
+- [ ] Lateral panel convened at milestone transitions (and before synthesizing agent-supplied answers) with parallel read-only personas
+- [ ] Free-text answers passed the Refine gate; dialectic rhythm guard forced a user question after 3 agent-resolved answers; any auto-answer threshold crossing explicitly confirmed
+- [ ] Closure / Acceptance Guard and the one-sentence Restate gate both passed before crystallization
+- [ ] Interview reached ambiguity ≤ threshold OR an explicit early exit with warning
+- [ ] Spec persisted to `.gjc/specs/deep-interview-{slug}.md` exactly via the GJC CLI (no direct `.gjc/` edits without force override), covering every active topology component plus goal/constraints/acceptance criteria/clarity/ontology/transcript
+- [ ] Spec metadata includes the auto/lateral counters (`auto_researched_rounds`, `auto_answered_rounds`, `lateral_reviews`, `refined_rounds`, `architect_failures`, `lateral_panel_failures`)
+- [ ] Execution bridge presented via `ask`; execution invoked only after explicit approval through a public workflow entrypoint (never direct implementation); state cleaned up after handoff
 </Final_Checklist>
 <Advanced>
@@ -783,30 +837,7 @@ If the user chooses interview, team routing invokes `/skill:deep-interview`. Whe
 ## Approval-Gated Pipeline: deep-interview → ralplan → pending approval
-The recommended refinement path chains clarity and feasibility gates, then stops for explicit execution approval:
-```
-/skill:deep-interview "vague idea"
-  → Socratic Q&A until ambiguity ≤ <resolvedThresholdPercent>
-  → Spec written to .gjc/specs/deep-interview-{slug}.md
-  → User explicitly selects "Refine with ralplan consensus"
-  → /skill:ralplan (spec as input)
-    → Planner creates implementation plan from spec
-    → Architect reviews for architectural soundness
-    → Critic validates quality and testability
-    → Loop until consensus (max 5 iterations)
-    → Consensus plan written to .gjc/plans/
-  → Stop with the consensus plan marked pending approval
-  → Only a separate explicit execution approval may invoke execution (ultragoal by default; team only when tmux-based interactive worker parallelization is required)
-```
-**The ralplan skill receives the spec as context through `/skill:ralplan`** because ralplan is already the GJC Planner → Architect → Critic consensus workflow. The consensus plan includes:
-- RALPLAN-DR summary (Principles, Decision Drivers, Options)
-- ADR (Decision, Drivers, Alternatives, Why chosen, Consequences)
-- Testable acceptance criteria (inherited from deep-interview spec)
-- Implementation steps with file references
-**Execution is a separate approval-gated step.** The deep-interview and ralplan skills must not auto-invoke team or ultragoal merely because a spec or plan exists.
+See the Phase 5b "Approval-Gated Refinement Path" diagram for the full flow. In short: interview → spec at `.gjc/specs/deep-interview-{slug}.md` → user selects "Refine with ralplan consensus" → `/skill:ralplan` (Planner/Architect/Critic consensus, plan written to `.gjc/plans/`) → stop at `pending approval`. Execution is always a separate approval-gated step; deep-interview and ralplan never auto-invoke ultragoal or team just because a spec or plan exists.
 ## Integration with Ralplan Gate
@@ -818,24 +849,11 @@ Vague prompt → ralplan gate → deep-interview (if extremely vague) → ralpla
 ## Brownfield vs Greenfield Weights
-| Dimension | Greenfield | Brownfield |
-|-----------|-----------|------------|
-| Goal Clarity | 40% | 35% |
-| Constraint Clarity | 30% | 25% |
-| Success Criteria | 30% | 25% |
-| Context Clarity | N/A | 15% |
-Brownfield adds Context Clarity because modifying existing code safely requires understanding the system being changed.
-## Challenge Agent Modes
+See "Calculate ambiguity" in Step 2c for the weighted formulas. Brownfield adds a 15% Context Clarity dimension (Goal/Constraint/Criteria become 35/25/25) because safely modifying existing code requires understanding the system being changed.
-| Mode | Activates | Purpose | Prompt Injection |
-|------|-----------|---------|-----------------|
-| Contrarian | Round 4+ | Challenge assumptions | "What if the opposite were true?" |
-| Simplifier | Round 6+ | Remove complexity | "What's the simplest version?" |
-| Ontologist | Round 8+ (if ambiguity > 0.3) | Find essence | "What IS this, really?" |
+## Lateral Review Panel
-Each mode is used exactly once, then normal Socratic questioning resumes. Modes are tracked in state to prevent repetition.
+See Phase 3 for the full persona set (researcher/contrarian/simplifier, plus architect on scope change), the milestone bands, and the parallel independent-context dispatch.
 ## Ambiguity Score Interpretation
@@ -845,7 +863,7 @@ Each mode is used exactly once, then normal Socratic questioning resumes. Modes
 | At or below the resolved threshold | Clear enough | Proceed |
 | Above the resolved threshold with minor gaps | Some gaps | Continue interviewing |
 | Moderate ambiguity | Significant gaps | Focus on weakest dimensions |
-| High ambiguity | Very unclear | May need reframing (Ontologist) |
+| High ambiguity | Very unclear | May need reframing (panel ontology escalation) |
 | Extreme ambiguity | Almost nothing known | Early stages, keep going |
 </Advanced>

package/src/defaults/gjc/skills/deep-interview/lateral-review-panel.md ADDED Viewed

@@ -0,0 +1,49 @@
+# Deep Interview Lateral Review Panel
+You are one persona on a read-only architect panel assisting the deep-interview workflow at an ambiguity-milestone transition (or before the workflow synthesizes an agent-supplied answer). You run in parallel with the other personas, each in independent context, so your perspective must be your own — do not assume or anchor on what another persona would say.
+Your assigned persona is provided in the prompt as `persona` (one of `researcher`, `contrarian`, `simplifier`, `architect`).
+Inherited context is read-only background. Do not edit code, write files, mutate `.gjc/` state, run formatters, invoke workflow handoffs, or implement anything. Use only inherited context, the prompt-safe initial idea, locked topology, current scores/gaps, established facts, prior decisions, and read-only repo/context inspection if available.
+Keep the response compact enough to fold back into a single Socratic question.
+## Persona lens
+- `researcher` — surface external facts, prior art, version/compatibility constraints, and unknowns the interview genuinely depends on. Prefer verifiable specifics over speculation.
+- `contrarian` — challenge the core assumption. Ask whether the framing or a stated constraint is real or merely habitual, and name what breaks if the opposite were true.
+- `simplifier` — probe whether complexity can be removed. Name the simplest version that is still valuable and which constraints are necessary versus assumed.
+- `architect` — assess system shape, ownership, and integration impact when scope or architecture changed. Name the highest-risk structural decision still unsettled.
+## Task
+From your assigned persona's lens only, identify the single highest-leverage blind spot or unsettled decision the next question should address, and propose how to resolve it. Stay within the locked topology and confirmed constraints.
+## Response Shape
+Respond with only this JSON object:
+```json
+{
+  "status": "answered",
+  "persona": "researcher|contrarian|simplifier|architect",
+  "finding": "One concrete, user-safe blind spot or decision this persona surfaces.",
+  "rationale": [
+    "Context, repo fact, or confirmed constraint supporting the finding."
+  ],
+  "suggested_options": [
+    "A concise answer option or recommended draft the next single question can offer."
+  ],
+  "confidence": "high|medium|low"
+}
+```
+Rules:
+- `finding` must be non-empty, specific, and must not contradict confirmed user constraints.
+- `rationale` must contain 1-3 bullets citing inherited context, confirmed constraints, or repo facts available in the prompt.
+- `suggested_options` must contain 1-3 entries usable as answer options or a recommended draft for the single next user-facing question.
+- `confidence` must be `high`, `medium`, or `low`.
+## Fallback
+If inherited context is insufficient for a defensible persona finding, do not fabricate one. Return `confidence` `low`, set `finding` to the most important missing piece of context from this persona's lens, and leave `suggested_options` as the single safest clarification to ask the user.

package/src/defaults/gjc/skills/ultragoal/SKILL.md CHANGED Viewed

@@ -191,10 +191,10 @@ An ultragoal story cannot be checkpointed `complete` until the active agent has
    - code-side: maintainability, tests, integration points, and unsafe shortcuts.
 5. Delegate an `executor` QA/red-team lane to build and run the e2e/read-teaming QA suite appropriate for the story. This lane must try to break the change, not just confirm the happy path. It must start from the approved plan/spec/acceptance criteria, then user-facing contracts, and only then implementation code as supporting evidence. Plan/code mismatches are blockers, not items to paper over with implementation intent.
 6. The executor QA/red-team lane must prove evidence by the real surface under test:
-   - GUI/web surfaces require browser automation plus a screenshot or image verdict.
-   - CLI surfaces require logs or terminal transcripts from real invocation.
-   - API/package surfaces require external consumer or black-box tests through the public interface.
-   - Algorithm/math surfaces require boundary, property, adversarial, and failure-mode cases.
+   - GUI/web surfaces require a valid automation transcript plus a non-uniform screenshot. Bare `inlineEvidence` text or typed receipts never prove live GUI/web execution.
+   - CLI surfaces require runtime argv replay: `replaySafe: true`, an allowlisted argv `command`, and replayed normalized stdout matching `recordedStdout`; unsafe commands require audited `replayExempt` metadata plus a structurally valid fallback artifact.
+   - Native/desktop/tui surfaces require a structurally valid screenshot, PTY capture with terminal control codes, or app-automation transcript.
+   - API/package/algorithm/math surfaces require a real artifact file or typed receipt. Bare `inlineEvidence` text alone is not sufficient for any surface.
 7. The executor QA/red-team lane must report a matrix using `executorQa.contractCoverage`, `executorQa.surfaceEvidence`, `executorQa.adversarialCases`, and `executorQa.artifactRefs`. Not-applicable rows are allowed only in `contractCoverage` and `surfaceEvidence`; each `status: "not_applicable"` row requires `contractRef` plus `reason`. `adversarialCases` rows cannot be not-applicable.
 8. Run a final code review pass and fold it into the strict quality gate. Clean means `architectReview.architectureStatus`, `architectReview.productStatus`, and `architectReview.codeStatus` are all `"CLEAR"`, `architectReview.recommendation` is `"APPROVE"`, executor QA statuses are `"passed"`, iteration is `"passed"` with `fullRerun: true`, every evidence field is non-empty, every required matrix row is present, and every blockers array is empty. `COMMENT`, `WATCH`, `REQUEST CHANGES`, `BLOCK`, missing evidence, missing or shallow matrix rows, plan/code mismatches, or non-empty blockers are non-clean.
 9. If any lane finds an issue, do **not** checkpoint `complete` and do **not** call `goal({"op":"complete"})`. Record durable blocker work instead:
@@ -204,6 +204,8 @@ An ultragoal story cannot be checkpointed `complete` until the active agent has
 10. Complete or steer through the blocker story, then rerun the full blocking verification loop. Repeat until all verifier lanes are clean.
 11. Only after the loop is clean, checkpoint the story as complete with a structured quality gate and a fresh active `goal({"op":"get"})` snapshot. The checkpoint creates a receipt; `goals.json.status` alone is not proof. In aggregate mode, the final aggregate receipt must exist before `goal({"op":"complete"})` is allowed.
+While an Ultragoal run is active, the `ask` tool is blocked for all agents. Record unresolved review decisions as durable blockers with `gjc ultragoal record-review-blockers` instead of prompting interactively.
 The native `checkpoint --status complete` command rejects missing or shallow gates. `--quality-gate-json` must include:
 ```json
@@ -229,13 +231,19 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
         "id": "browser-run",
         "kind": "browser-automation",
         "path": "artifacts/browser-run.json",
-        "description": "browser automation transcript invoking the approved user-facing flow"
+        "description": "valid automation transcript with actions, monotonic timestamps, and selectors"
       },
       {
         "id": "gui-screenshot",
         "kind": "screenshot",
         "path": "artifacts/gui-screenshot.png",
-        "description": "screenshot or image-verdict evidence for the GUI/web result"
+        "description": "non-uniform screenshot evidence for the GUI/web result"
+      },
+      {
+        "id": "cli-replay",
+        "kind": "command-replay",
+        "path": "artifacts/cli-replay.json",
+        "description": "artifact file containing argv-only CLI replay JSON: schemaVersion 1, kind cli-replay, replaySafe true, allowlisted command, recordedStdout"
       },
       {
         "id": "adversarial-report",
@@ -265,15 +273,23 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
       {
         "id": "surface-gui",
         "contractRef": "user-facing surface or public interface under test",
-        "surface": "gui|web|cli|api|package|algorithm|math",
+        "surface": "gui|web|cli|api|package|algorithm|math|native|desktop|tui",
         "invocation": "real browser action, CLI command, API/package consumer call, or algorithm/property check",
         "verdict": "passed",
         "artifactRefs": ["browser-run", "gui-screenshot"]
       },
+      {
+        "id": "surface-cli",
+        "contractRef": "CLI or command-line interface under test",
+        "surface": "cli",
+        "invocation": "argv replay executed by the Ultragoal runtime",
+        "verdict": "passed",
+        "artifactRefs": ["cli-replay"]
+      },
       {
         "id": "surface-out-of-scope",
         "contractRef": "surface intentionally outside this story",
-        "surface": "gui|web|cli|api|package|algorithm|math",
+        "surface": "gui|web|cli|api|package|algorithm|math|native|desktop|tui",
         "status": "not_applicable",
         "reason": "why this surface does not apply to the current story"
       }
@@ -300,6 +316,12 @@ The native `checkpoint --status complete` command rejects missing or shallow gat
 }
 ```
+For CLI replay artifacts, the JSON at `path` must be an object like `{"schemaVersion":1,"kind":"cli-replay","replaySafe":true,"command":["bun","-e","console.log(\"ultragoal-cli-ok\")"],"recordedStdout":"ultragoal-cli-ok\n"}`. Use `replayExempt` only for audited unsafe/non-deterministic invocations, with a substantive reason, approver, and same-surface fallback artifacts.
+## Review mode
+`gjc ultragoal review` runs the same hardened gate against an already implemented PR, branch, or worktree. Use `--pr <number>` for a PR, `--branch <ref>` for a branch diff, omit both for the current worktree, and pass `--spec <path>` when a real contract exists. `--mode review-only` emits the verdict/findings without creating fix work; `--mode review-start` records review blockers for follow-up. Review mode validates the same `executorQa` shape and live-surface artifacts as `checkpoint --status complete`. A thin or derived-only contract can never clean-pass: the verdict is capped at `inconclusive: weak-contract` until a supplied spec or equivalent strong acceptance criteria are available.
 Receipts are freshness-scoped:
 - Per-goal receipts remain fresh for their target goal unless that goal, its blocker metadata, or its supersession metadata changes.
 - Normal later `goal_started` or clean receipt-backed `goal_checkpointed` events for other goals do not stale older per-goal receipts.