@kenkaiiii/gg-boss 4.3.162 → 4.3.163

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68336,6 +68336,41 @@ function normalizeProjectPath(cwd2) {
68336
68336
  function nowIso() {
68337
68337
  return (/* @__PURE__ */ new Date()).toISOString();
68338
68338
  }
68339
+ function mergeGoalTasks(existing, input) {
68340
+ if (!input)
68341
+ return existing;
68342
+ const byId = new Map(input.map((task) => [task.id, task]));
68343
+ const merged = existing.map((task) => {
68344
+ const next = byId.get(task.id);
68345
+ if (!next)
68346
+ return task;
68347
+ return {
68348
+ ...task,
68349
+ ...next,
68350
+ status: task.status !== next.status || task.attempts > next.attempts ? task.status : next.status,
68351
+ attempts: Math.max(task.attempts, next.attempts),
68352
+ workerId: task.workerId ?? next.workerId,
68353
+ verification: task.verification ?? next.verification,
68354
+ lastSummary: task.lastSummary ?? next.lastSummary
68355
+ };
68356
+ });
68357
+ for (const task of input) {
68358
+ if (!existing.some((item) => item.id === task.id))
68359
+ merged.push(task);
68360
+ }
68361
+ return merged;
68362
+ }
68363
+ function mergeGoalEvidence(existing, input) {
68364
+ if (!input)
68365
+ return existing;
68366
+ const byId = new Map(existing.map((item) => [item.id, item]));
68367
+ const merged = [...existing];
68368
+ for (const item of input) {
68369
+ if (!byId.has(item.id))
68370
+ merged.push(item);
68371
+ }
68372
+ return merged;
68373
+ }
68339
68374
  function isObject2(value) {
68340
68375
  return typeof value === "object" && value !== null && !Array.isArray(value);
68341
68376
  }
@@ -68358,7 +68393,7 @@ function isEvidenceKind(value) {
68358
68393
  return value === "log" || value === "command" || value === "screenshot" || value === "file" || value === "summary";
68359
68394
  }
68360
68395
  function isEvidenceMechanism(value) {
68361
- return value === "command" || value === "test" || value === "script" || value === "fixture" || value === "log" || value === "screenshot" || value === "video" || value === "browser" || value === "device" || value === "source" || value === "manual";
68396
+ return value === "command" || value === "test" || value === "script" || value === "fixture" || value === "log" || value === "screenshot" || value === "video" || value === "browser" || value === "device" || value === "source" || value === "file" || value === "manual";
68362
68397
  }
68363
68398
  function isEvidencePlanStatus(value) {
68364
68399
  return value === "planned" || value === "ready" || value === "blocked";
@@ -68674,8 +68709,8 @@ async function upsertGoalRun(cwd2, input) {
68674
68709
  prerequisites: input.prerequisites ?? existing.prerequisites,
68675
68710
  harness: input.harness ?? existing.harness,
68676
68711
  evidencePlan: input.evidencePlan ?? existing.evidencePlan,
68677
- tasks: input.tasks ?? existing.tasks,
68678
- evidence: input.evidence ?? existing.evidence,
68712
+ tasks: mergeGoalTasks(existing.tasks, input.tasks),
68713
+ evidence: mergeGoalEvidence(existing.evidence, input.evidence),
68679
68714
  blockers: input.blockers ?? existing.blockers,
68680
68715
  status: deriveRunnableStatus(input.status ?? existing.status, input.prerequisites ?? existing.prerequisites)
68681
68716
  } : createGoalRun(cwd2, input);
@@ -68843,10 +68878,10 @@ function buildHarnessTaskPrompt(run) {
68843
68878
  const harnessItems = run.harness.filter((item) => !item.command && !item.path).map((item) => `- ${item.label}: ${item.description ?? "Create local instrumentation."}`).join("\n");
68844
68879
  return `Goal: ${run.goal}
68845
68880
 
68846
- Build the missing local/free harness instrumentation needed before verification. Translate the user's requested outcome into observable proof: ask what artifact would prove this actually worked end-to-end, then build the simplest reliable local/free path to observe it.
68881
+ Build only the missing local/free harness instrumentation needed before verification. Start by restating the intended experience, the relevant failure modes, and the senses/signals this harness must observe; do not default to generic tests, scripts, screenshots, benchmarks, or simulations unless that signal is required for this specific goal.
68847
68882
  ${harnessItems}
68848
68883
 
68849
- Inventory domain-appropriate local capabilities before blocking: existing tests and CLIs, fixtures or seeded data, dev servers, browser automation, simulator/device screenshots, video/frame inspection, logs, generated assets, protocol traces, database assertions, API probes, contract tests, performance measurements, source/docs/code-search comparison, or other artifacts that directly measure the outcome. For mobile/UI goals, prefer local simulator/browser screenshots (for example iOS Simulator tooling when available) before requiring a physical phone. Create any scripts, fixtures, or test helpers in the repository, update the Goal harness/verifier metadata with the goals tool, and record command/file/screenshot/log evidence. Do not require paid services or signups; block only with exact user instructions if a true external prerequisite is missing.`;
68884
+ Inventory available local capabilities just deeply enough to choose a proportional instrument, then build it. Update the Goal harness/verifier metadata with the goals tool and record durable evidence showing the instrument exists and works. Do not require paid services or signups; block only with exact user instructions if a true external prerequisite is missing.`;
68850
68885
  }
68851
68886
  function blockedEvidencePlanReason(run) {
68852
68887
  const blocked = run.evidencePlan.find((item) => item.status === "blocked");
@@ -68901,15 +68936,15 @@ function buildEvidencePlanTaskPrompt(run) {
68901
68936
  const plannedItems = run.evidencePlan.filter((item) => item.status === "planned").map((item) => `- ${item.label} (${item.mechanism}): ${item.description}${item.command ? `; candidate command: ${item.command}` : ""}${item.path ? `; artifact: ${item.path}` : ""}`).join("\n");
68902
68937
  return `Goal: ${run.goal}
68903
68938
 
68904
- Turn the planned proof paths below into real local/free verification capability before the Goal verifier runs. Translate success criteria and outcome requirements into observable proof paths: ask what would prove this goal actually worked end-to-end, then build the simplest reliable local/free way to capture that proof.
68939
+ Turn the planned proof paths below into real local/free verification capability before the Goal verifier runs. For each path, preserve the orchestrator's goal-specific sensory intent: what experience is being observed, what failure it catches, and what signal proves it.
68905
68940
  ${plannedItems}
68906
68941
 
68907
- Inventory domain-appropriate capabilities deeply enough for this task before blocking: existing tests/CLIs, generated fixtures, seeded data, scripts, dev servers, browser automation, simulator/browser/device screenshots, video/frame inspection, logs, generated assets, protocol traces, database assertions, API probes, contract tests, performance measurements, source/docs/code-search comparison, or other artifacts that directly measure the requested outcome. For mobile/UI goals, screenshots are examples rather than the whole solution: prefer local simulator/browser tooling (for example iOS Simulator screenshots when available) before requiring a physical phone, and add image/frame checks when visual correctness matters. Build what is missing, update the Goal evidence_plan/harness/verifier metadata with the goals tool, and persist command/file/screenshot/log evidence, not narrative-only verification or human visual inspection. Only block with exact user instructions for inputs that cannot be generated or checked locally, such as credentials, paid services, physical devices, or unavailable source assets.`;
68942
+ Inventory available local capabilities without anchoring on any fixed tool category. Build only the proportional instrument needed for this proof path, update the Goal evidence_plan/harness/verifier metadata with the goals tool, and persist concrete command/file/artifact/log evidence that the instrument works. Do not use narrative-only verification or human visual inspection as completion evidence. Only block with exact user instructions for inputs that cannot be generated or checked locally.`;
68908
68943
  }
68909
68944
  function buildVerifierTaskPrompt(run) {
68910
68945
  return `Goal: ${run.goal}
68911
68946
 
68912
- Define and build a real end-to-end verifier for this Goal. Translate the objective into observable proof: what command, artifact, trace, screenshot, log, fixture, database assertion, API probe, contract test, performance measurement, source/docs comparison, or other domain-appropriate signal would prove the requested outcome with near-100% confidence? Create the simplest reliable local/free scripts, fixtures, harnesses, or test commands needed, then update the Goal with a verifier_command and verifier_description using the goals tool. For mobile/UI goals, prefer local simulator/browser evidence such as iOS Simulator screenshots when available before requiring a physical phone. The verifier must be runnable locally/free and produce command or file evidence, not narrative or human visual inspection. If an external prerequisite is missing, mark it missing with exact user instructions.`;
68947
+ Define and build a real end-to-end verifier for this Goal. Begin from the intended experience and required senses/signals already implied by the success criteria and evidence plan. Choose a proportional local/free verifier that observes those signals and catches the important goal-specific failures; do not add generic simulations, screenshots, benchmarks, or scripts unless they directly support that proof. Update the Goal with a verifier_command and verifier_description using the goals tool. The verifier must be runnable locally/free and produce durable command or file evidence, not narrative or human visual inspection. If an external prerequisite is missing, mark it missing with exact user instructions.`;
68913
68948
  }
68914
68949
  function incompleteTasks(run) {
68915
68950
  return run.tasks.filter((task) => task.status !== "done");
@@ -68976,6 +69011,10 @@ ${priorSummaries}
68976
69011
  Run targeted diagnostics, fix the root cause, update durable Goal evidence with the goals tool, and rerun the exact verifier command. Do not mark the Goal complete.`;
68977
69012
  }
68978
69013
  function decideGoalNextAction(run, options2 = {}) {
69014
+ const completion = canCompleteGoalRun(run);
69015
+ if (completion.ok) {
69016
+ return { kind: "complete", reason: completion.reason };
69017
+ }
68979
69018
  if (run.status === "blocked" || run.status === "failed" || run.status === "passed" || run.status === "paused" && !run.continueRequestedAt) {
68980
69019
  return { kind: "terminal", status: run.status, reason: `Goal is ${run.status}.` };
68981
69020
  }
@@ -69016,10 +69055,6 @@ function decideGoalNextAction(run, options2 = {}) {
69016
69055
  reason: `Goal task "${task.title}" is ready for worker attempt ${attempts}.`
69017
69056
  };
69018
69057
  }
69019
- const completion = canCompleteGoalRun(run);
69020
- if (completion.ok) {
69021
- return { kind: "complete", reason: completion.reason };
69022
- }
69023
69058
  const blockedEvidence = blockedEvidencePlanReason(run);
69024
69059
  if (blockedEvidence) {
69025
69060
  return { kind: "blocked", reason: blockedEvidence };
@@ -69120,6 +69155,7 @@ var EvidencePlanInput = external_exports.object({
69120
69155
  "browser",
69121
69156
  "device",
69122
69157
  "source",
69158
+ "file",
69123
69159
  "manual"
69124
69160
  ]).describe("How this proof will be gathered"),
69125
69161
  description: external_exports.string().describe("What this evidence proves"),
@@ -69188,7 +69224,7 @@ function asEvidenceKind(value) {
69188
69224
  return "summary";
69189
69225
  }
69190
69226
  function asEvidenceMechanism(value) {
69191
- if (value === "command" || value === "test" || value === "script" || value === "fixture" || value === "log" || value === "screenshot" || value === "video" || value === "browser" || value === "device" || value === "source" || value === "manual") {
69227
+ if (value === "command" || value === "test" || value === "script" || value === "fixture" || value === "log" || value === "screenshot" || value === "video" || value === "browser" || value === "device" || value === "source" || value === "file" || value === "manual") {
69192
69228
  return value;
69193
69229
  }
69194
69230
  return "command";
@@ -69415,7 +69451,9 @@ function createGoalsTool(cwd2) {
69415
69451
  const completion = canCompleteGoalRun(runWithVerifier);
69416
69452
  const updated = await upsertGoalRun(cwd2, {
69417
69453
  ...runWithVerifier,
69418
- status: result.status === "pass" && completion.ok ? "passed" : result.status === "pass" ? "ready" : result.status === "fail" ? "ready" : "verifying"
69454
+ status: result.status === "pass" && completion.ok ? "passed" : result.status === "pass" ? goalHasBlockingPrerequisites(runWithVerifier) ? "blocked" : "ready" : result.status === "fail" ? goalHasBlockingPrerequisites(runWithVerifier) ? "blocked" : "ready" : "verifying",
69455
+ blockers: result.status === "pass" ? [] : run.blockers,
69456
+ activeWorkerId: void 0
69419
69457
  });
69420
69458
  return `Verifier recorded for "${updated.title}": ${result.status}.`;
69421
69459
  }
@@ -70335,7 +70373,7 @@ ${planContent.trim()}
70335
70373
  function renderResearchSection() {
70336
70374
  return `## Research & Verification
70337
70375
 
70338
- Do not assume APIs, CLI flags, config schema, internals, or error wording. Use \`source_path\` for installed deps and inspect with read/grep/find/ls; use \`web_search\` then \`web_fetch\` for authoritative docs. For public code, use ReferenceSources for curated repos or DiscoverRepos for current/top repos, then verify exact snippets with SearchCode literal text/RE2 (not semantic); \`path\` is a literal path substring and \`repo\` only after broad/peek proof. When driving a programmatic Goal run, proactively ask what observable artifact would prove the requested outcome worked end-to-end, then plan the simplest reliable local/free proof path for that domain: tests/CLIs, fixtures or seeded data, dev servers, browser automation, simulator or device screenshots, video/frame inspection, logs, generated assets, protocol traces, database assertions, API probes, contract tests, performance measurements, source/docs comparisons, or other measurable artifacts. UI/mobile screenshots are examples, not the whole solution; prefer local simulator/browser tooling such as iOS Simulator screenshots when available before blocking on a physical device, and block only with exact user instructions for true external prerequisites. Run relevant checks after edits; read/fix failures; never report unrun or failing checks as passing.`;
70376
+ Do not assume APIs, CLI flags, config schema, internals, or error wording. Use \`source_path\` for installed deps and inspect with read/grep/find/ls; use \`web_search\` then \`web_fetch\` for authoritative docs. For public code, use ReferenceSources for curated repos or DiscoverRepos for current/top repos, then verify exact snippets with SearchCode literal text/RE2 (not semantic); \`path\` is a literal path substring and \`repo\` only after broad/peek proof. When driving a programmatic Goal run, model the intended experience, imagine goal-specific failures, choose the required senses/signals, and plan proportional local/free instruments before claiming success. Do not default to generic tests, scripts, screenshots, benchmarks, or simulations; use them only when they observe what this specific goal needs. Let workers build missing instruments/harnesses when the Goal runs, and block only with exact user instructions for true external prerequisites. Run relevant checks after edits; read/fix failures; never report unrun or failing checks as passing.`;
70339
70377
  }
70340
70378
  function renderCodeQualitySection() {
70341
70379
  return `## Code Quality
@@ -72071,51 +72109,71 @@ var PROMPT_COMMANDS = [
72071
72109
  description: "Create a programmatic goal loop",
72072
72110
  prompt: `# Goal: Programmatic Goal Loop
72073
72111
 
72074
- You are creating a durable Goal run: a programmatic control loop that should keep the main orchestrator focused on the user's objective while workers/harnesses/diagnostics produce evidence.
72112
+ You are creating a durable Goal run: a programmatic control loop that lets the user rely on the agent while they are not watching. The run should keep the main orchestrator focused on the objective while workers build, instrument, diagnose, and gather evidence.
72075
72113
 
72076
72114
  ## User objective
72077
72115
 
72078
72116
  The user's objective is in the command arguments. If the arguments are absent or too vague to identify an actionable objective, ask exactly one concise clarifying question and do not create a Goal run yet.
72079
72117
 
72080
- ## Required behavior
72118
+ ## Non-negotiable boundary: /goal creates a run, it does not do the work
72119
+
72120
+ The initial /goal invocation is setup/orchestration only. During this turn:
72121
+
72122
+ - Create or update the durable run and Goal tasks, then stop.
72123
+ - Do not implement, fix, refactor, edit, or generate project artifacts for the objective yourself.
72124
+ - Do not call subagent, the normal tasks tool, goals resume, or any action that starts workers, verifiers, or auto-continuation.
72125
+ - Do not run the verifier or "just start" any task. Worker agents do implementation after the user explicitly starts the Goal from the Goal pane with (R).
72126
+ - The only non-goals tools allowed before stopping are cheap local prerequisite checks needed to know whether the run is blocked. If a check would mutate files, start a service, run a long process, launch a worker, or begin implementation, make it a Goal task instead.
72127
+
72128
+ ## Core mindset: goal-specific sensory proof
72129
+
72130
+ Do not default to ordinary tests, generic scripts, or broad simulations. First model what must be experienced for this specific goal to be trusted without the human present.
72131
+
72132
+ For each Goal, identify:
72133
+
72134
+ 1. Intended experience \u2014 who or what must experience the result: user, customer, operator, developer, attacker, browser, device, API client, database, model, downstream system, or another relevant perspective.
72135
+ 2. Failure imagination \u2014 the goal-specific ways the result could appear done while still failing in reality.
72136
+ 3. Required senses/signals \u2014 the observations needed to detect those failures. Think in capabilities, not fixed tools: perception of rendered output, interaction, timing, persistence/state, external boundaries, adversarial/social pressure, generated artifacts, traces, comparisons, or other signals relevant to this objective.
72137
+ 4. Proportional instruments \u2014 local/free ways workers can capture those signals. The evidence portfolio should be as small as possible while still removing the important assumptions; do not simulate, script, screenshot, benchmark, or red-team anything unless that signal is relevant to this goal.
72138
+ 5. Completion rule \u2014 why the planned evidence would be enough to claim success, and what remains unproven or blocked.
72139
+
72140
+ Any examples you consider are inspiration, not a checklist. Borrow verification ideas from any domain when useful, but choose only the senses/signals that fit the user's actual objective.
72141
+
72142
+ ## Orchestrator responsibilities
72081
72143
 
72082
72144
  1. Translate the user's objective into:
72083
72145
  - a short title,
72084
72146
  - the original goal text,
72085
72147
  - concrete success criteria that can be verified,
72086
72148
  - prerequisite checks,
72087
- - an evidence plan: the simplest proof paths that would demonstrate success end-to-end,
72088
- - the local/free harness or observability you can build,
72089
- - a verifier command or verifier description.
72090
- 2. Build a capability/evidence plan before implementation: decide what would actually prove the goal works, such as scripts, tests, fixtures, seeded data, app/dev servers, browser automation, screenshots, logs, video/frame inspection, source/docs/code-search comparison, local CLIs, or generated assets. Do not require a script for every task; choose the simplest reliable proof that removes assumptions.
72091
- 3. Before doing implementation work or launching workers, identify prerequisites and check the ones you can check locally. Examples:
72092
- - model/API/OAuth credentials exist for simulated-agent testing,
72093
- - required local CLIs exist (ffmpeg, expo, adb, xcrun, playwright, etc.),
72094
- - required app/dev server can start or is already running,
72095
- - required fixture files, assets, devices, emulators, or test data exist or can be generated locally.
72096
- 4. Prefer local/free tools: scripts, shell commands, existing CLIs, test runners, logs, screenshots/images, existing dependencies, source_path, web docs, kencode search, and disposable workers/subagents. Do not require paid services, signups, or new external accounts unless unavoidable.
72097
- 5. Only ask the user for true external blockers after checking what you can do yourself. If a missing input cannot be generated or verified locally (credentials, paid services, physical devices, private assets, permissions), record the exact minimal prerequisite and ask once in chat; do not ask for broad lists of things you could inspect or create yourself.
72149
+ - an evidence plan describing the goal-specific sensory proof required,
72150
+ - harness or observability items that workers may need to build,
72151
+ - a verifier command when already obvious, otherwise a verifier description or task to define one.
72152
+ 2. Plan first; do not build during initial Goal creation. The orchestrator may do cheap local prerequisite checks needed to determine whether the Goal is blocked, but worker agents should build instruments, implementation changes, harnesses, diagnostics, and verifier commands after the user starts the Goal. If implementation work is needed, capture it as a Goal task instead of doing it yourself.
72153
+ 3. Before launching workers, identify prerequisites and check the ones you can check locally. Examples are non-exhaustive and should not anchor the plan: required credentials or permissions, local capabilities, app/runtime availability, fixture/assets/test data, devices/emulators, network or service access, or domain-specific inputs.
72154
+ 4. Prefer local/free capabilities already available in the project or environment. Do not require paid services, signups, new external accounts, private assets, or physical access unless unavoidable for this specific objective.
72155
+ 5. Only ask the user for true external blockers after checking what you can do yourself. If a missing input cannot be generated or verified locally, record the exact minimal prerequisite and ask once in chat; do not ask for broad lists of things you could inspect or create yourself.
72098
72156
  6. Treat user-provided prerequisites as the first Goal item, named "User prerequisites" in the pane. The user may provide the missing value or instructions in chat. After they do, verify it locally without revealing secrets, then update the matching prerequisite to \`met\` with short evidence before any worker task runs.
72099
72157
  7. Persist the run with the goals tool:
72100
72158
  - call \`goals({ action: "create", ... })\` once the objective is understood,
72101
72159
  - include success criteria, prerequisites, evidence_plan items, harness items, and verifier info,
72102
72160
  - if any prerequisite is missing or unknown and cannot be automatically checked, persist the run as blocked and ask the user for the exact missing thing once.
72103
- 8. Add Goal tasks with \`goals({ action: "task", ... })\`. Do not use the normal tasks tool for this workflow. Each Goal task prompt must be standalone, mention the same project cwd, the specific files/scripts/commands to use, evidence to record, and verification expectations. Avoid pure "investigate and report" tasks unless their prompt explicitly requires persisting concrete findings with \`goals({ action: "evidence", ... })\` and creating or updating the next implementation task from those findings.
72104
- 9. Persist evidence with \`goals({ action: "evidence", ... })\` whenever you create diagnostics, run harnesses, capture logs/screenshots, record controller decisions, attach verifier artifact paths, or learn a blocker.
72105
- 10. Completion means verifier evidence satisfies the original success criteria. Do not call \`goals({ action: "complete" })\` merely because tasks are done; only complete after verification passes.
72106
- 11. When the Goal reaches a terminal state, give the user a concise final summary in chat. Use a compact 3\u20134 column table with columns that fit what happened, such as outcome, evidence/verifier, changed work, blockers, or next action. Do not dump worker logs; point to artifact paths when useful.
72161
+ 8. Add Goal tasks with \`goals({ action: "task", ... })\`. Do not use the normal tasks tool for this workflow. Each Goal task prompt must be standalone, mention the same project cwd, the specific goal slice, the sensory signals or evidence it must produce, any existing instruments it should reuse, and verification expectations. Avoid pure "investigate and report" tasks unless their prompt explicitly requires persisting concrete findings with \`goals({ action: "evidence", ... })\` and creating or updating the next implementation task from those findings.
72162
+ 9. Persist evidence with \`goals({ action: "evidence", ... })\` whenever workers create diagnostics, build or run instruments, capture artifacts, record controller decisions, attach verifier output, or learn a blocker.
72163
+ 10. Completion means verifier evidence satisfies the original success criteria and the required sensory proof. Do not call \`goals({ action: "complete" })\` merely because tasks are done; only complete after verification passes.
72164
+ 11. When the Goal reaches a terminal state, give the user a specific final summary in chat. Do not collapse the outcome into one generic row or say only that it "verified." Use a compact 3\u20134 column table with one row per substantive Goal task, evidence path, success criterion, verifier result, blocker, or decision. For bug/fix/audit goals, include the problem, how it was proven real or wrong, what fixed it, and the exact verification. For creation/improvement/non-problem goals, substitute the requested outcome or gap, what was delivered or decided, and the exact proof that the intended experience now exists. Include small snippets when useful: file:line references, command names and exit codes, short before/after text, log excerpts, artifact paths, or verifier output summaries. Do not dump worker logs; quote only the few details needed to make the conclusion auditable.
72107
72165
 
72108
72166
  ## Loop semantics
72109
72167
 
72110
- Think in this order: observe \u2192 instrument \u2192 automate \u2192 run \u2192 inspect evidence \u2192 fix \u2192 rerun until verified or blocked.
72168
+ Initial /goal turn order: understand intended experience \u2192 imagine relevant failures \u2192 choose required senses/signals \u2192 plan proportional instruments \u2192 persist the run/tasks/evidence plan \u2192 stop.
72111
72169
 
72112
- After the user starts a Goal from the Goal pane with (R), worker and verifier completions are sent back to you as hidden synthetic events. On each event, call \`goals({ action: "status", run_id })\`, inspect current state, briefly say what the orchestrator is doing so the chat shows progress, and take the next durable control-loop action rather than merely narrating. The UI keeps auto-continuing until the run is passed, blocked, paused, or failed.
72170
+ After the user starts a Goal from the Goal pane with (R), worker and verifier completions are sent back to you as hidden synthetic events. On each event, call \`goals({ action: "status", run_id })\`, inspect current state, briefly say what the orchestrator is doing so the chat shows progress, and take the next durable control-loop action rather than merely narrating. The UI keeps auto-continuing until the run is passed, blocked, paused, or failed. Even during auto-continuation, do not switch into hands-on implementation; if work is needed, create or update Goal tasks and let workers/verifiers do it.
72113
72171
 
72114
- If no verifier command exists yet, create a task to define one. If the verifier fails, persist the failure evidence and add the next Goal task that addresses the failure. Cap runaway loops by pausing and recording evidence when repeated attempts stop making progress.
72172
+ If no verifier command exists yet, create a task to define one. If an evidence path or harness is only planned, create a worker task to build the missing instrument, then later workers can reuse that instrument for subsequent slices. If the verifier fails, persist the failure evidence and add the next Goal task that addresses the failure. Cap runaway loops by pausing and recording evidence when repeated attempts stop making progress.
72115
72173
 
72116
72174
  ## Final response
72117
72175
 
72118
- When initially creating the Goal, keep the response short: say whether the Goal was created, ready, or blocked; mention the exact missing prerequisite if blocked; and tell the user they can press Ctrl+G to view it. If they ask how to start it, tell them the Goal pane keybind is (R) to run it. When auto-continuation eventually passes, fails, blocks, or pauses the Goal, provide the compact final summary table described above.`
72176
+ When initially creating the Goal, keep the response short: say whether the Goal was created, ready, or blocked; mention the exact missing prerequisite if blocked; and tell the user they can press Ctrl+G to view it. Then stop. Do not continue into implementation, worker startup, verifier execution, or Goal resume. If they ask how to start it, tell them the Goal pane keybind is (r) to run it. When auto-continuation eventually passes, fails, blocks, or pauses the Goal, provide the specific multi-row final summary table described above, with concrete proof snippets instead of a generic "verified" claim.`
72119
72177
  },
72120
72178
  {
72121
72179
  name: "scan",
@@ -73671,6 +73729,33 @@ function estimateConversationTokens(messages) {
73671
73729
  var TOOL_RESULT_MAX_CHARS = 2e3;
73672
73730
  var MAX_SUMMARY_RETRIES = 2;
73673
73731
  var MAX_SUMMARY_OUTPUT_TOKENS = 4096;
73732
+ var SUMMARY_ATTEMPT_TIMEOUT_MS = 3e4;
73733
+ var SummaryTimeoutError = class extends Error {
73734
+ constructor(timeoutMs) {
73735
+ super(`Summary LLM response timed out after ${timeoutMs}ms`);
73736
+ this.name = "SummaryTimeoutError";
73737
+ }
73738
+ };
73739
+ async function awaitSummaryResponseWithTimeout(response, timeoutMs, signal) {
73740
+ signal?.throwIfAborted();
73741
+ let timeout;
73742
+ let abortListener;
73743
+ try {
73744
+ return await new Promise((resolve4, reject) => {
73745
+ timeout = setTimeout(() => reject(new SummaryTimeoutError(timeoutMs)), timeoutMs);
73746
+ if (typeof timeout.unref === "function")
73747
+ timeout.unref();
73748
+ abortListener = () => reject(new DOMException("Aborted", "AbortError"));
73749
+ signal?.addEventListener("abort", abortListener, { once: true });
73750
+ response.then(resolve4, reject);
73751
+ });
73752
+ } finally {
73753
+ if (timeout)
73754
+ clearTimeout(timeout);
73755
+ if (abortListener)
73756
+ signal?.removeEventListener("abort", abortListener);
73757
+ }
73758
+ }
73674
73759
  var COMPACTION_SYSTEM_PROMPT = "You are a conversation compaction assistant. Your job is to create a concise summary of a conversation between a user and an AI coding assistant.\n\nThis summary will replace older messages to keep the conversation within context limits while preserving all important information needed to continue the work seamlessly.\n\nAlways output the summary \u2014 never refuse, never ask questions, never output empty responses.\n\n## What to Include\n- **User intent and goals** \u2014 what the user is trying to accomplish\n- **What was done** \u2014 what was implemented, modified, or debugged, including technical approaches and outcomes\n- **File operations** \u2014 all files created, modified, or referenced, with key changes\n- **Tool call outcomes** \u2014 which tools were called and their key results\n- **Key decisions** \u2014 important choices made and why\n- **Solutions & troubleshooting** \u2014 problems encountered and how they were resolved\n\n## What to Exclude\n- Redundant or superseded information\n- Full file contents (reference by path instead)\n- Verbose tool output (summarize key results)\n- Plans, next steps, or implementation instructions \u2014 do NOT carry forward action items or plans from old conversation summaries. Summarize what HAPPENED, not what SHOULD happen next. The recent messages (preserved separately) already contain the current context.\n\nFocus on technical precision. Include specific identifiers (file paths, function names, etc.) that would be essential for continuation. Write in third person and maintain an objective, technical tone.";
73675
73760
  var COMPACTION_USER_PROMPT = "Summarize the conversation above into a concise summary following the instructions. Output only the summary, nothing else.";
73676
73761
  var COMPACTION_RESERVE_TOKENS = 16384;
@@ -74032,7 +74117,7 @@ You MUST preserve all references to this plan and its approval status in the sum
74032
74117
  baseUrl: options2.baseUrl,
74033
74118
  signal: options2.signal
74034
74119
  });
74035
- const response = await result.response;
74120
+ const response = await awaitSummaryResponseWithTimeout(result.response, SUMMARY_ATTEMPT_TIMEOUT_MS, options2.signal);
74036
74121
  options2.signal?.throwIfAborted();
74037
74122
  log("INFO", "compaction", `Summary LLM response received`, {
74038
74123
  attempt: String(attempt),
@@ -74061,7 +74146,7 @@ You MUST preserve all references to this plan and its approval status in the sum
74061
74146
  if (options2.signal?.aborted || err instanceof Error && err.name === "AbortError") {
74062
74147
  throw err;
74063
74148
  }
74064
- log("WARN", "compaction", `Summary LLM call failed: ${err instanceof Error ? err.message : String(err)}`, { attempt: String(attempt) });
74149
+ log("WARN", "compaction", err instanceof SummaryTimeoutError ? `Summary LLM call timed out after ${SUMMARY_ATTEMPT_TIMEOUT_MS}ms \u2014 using fallback if no later attempt succeeds` : `Summary LLM call failed: ${err instanceof Error ? err.message : String(err)}`, { attempt: String(attempt), timeoutMs: String(SUMMARY_ATTEMPT_TIMEOUT_MS) });
74065
74150
  }
74066
74151
  }
74067
74152
  if (summaryText.length === 0) {
@@ -99859,4 +99944,4 @@ react/cjs/react-jsx-runtime.development.js:
99859
99944
  * LICENSE file in the root directory of this source tree.
99860
99945
  *)
99861
99946
  */
99862
- //# sourceMappingURL=chunk-U3L3QW6X.js.map
99947
+ //# sourceMappingURL=chunk-JBKZOBJ7.js.map