@drisp/cli 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- // src/infra/plugins/workflowSourceResolution.ts
1
+ // src/infra/plugins/workflowResolver.ts
2
2
  import fs2 from "fs";
3
3
  import path2 from "path";
4
4
 
@@ -28,15 +28,6 @@ function resolvePluginManifestPath(repoDir) {
28
28
  function resolveLegacyPluginManifestPath(repoDir) {
29
29
  return path.join(repoDir, ".claude-plugin", "marketplace.json");
30
30
  }
31
- function resolveWorkflowManifestPath(repoDir) {
32
- const preferredManifestPath = path.join(
33
- repoDir,
34
- ".athena-workflow",
35
- "marketplace.json"
36
- );
37
- const legacyManifestPath = resolveLegacyPluginManifestPath(repoDir);
38
- return fs.existsSync(preferredManifestPath) ? preferredManifestPath : legacyManifestPath;
39
- }
40
31
  function readManifest(manifestPath) {
41
32
  if (!fs.existsSync(manifestPath)) {
42
33
  throw new Error(`Marketplace manifest not found: ${manifestPath}`);
@@ -105,67 +96,6 @@ function resolvePluginDirFromManifest(pluginName, repoDir, manifestPath) {
105
96
  }
106
97
  return pluginDir;
107
98
  }
108
- function resolveWorkflowEntryPath(entry, manifest, repoDir) {
109
- if (typeof entry.source !== "string") {
110
- throw new Error(
111
- `Workflow "${entry.name}" uses a remote source type which is not supported.`
112
- );
113
- }
114
- let sourcePath = entry.source;
115
- const { workflowRoot } = manifest.metadata ?? {};
116
- if (workflowRoot && !path.isAbsolute(sourcePath) && !sourcePath.startsWith("./") && !sourcePath.startsWith("../")) {
117
- sourcePath = path.join(workflowRoot, sourcePath);
118
- }
119
- const workflowPath = path.resolve(repoDir, sourcePath);
120
- if (!workflowPath.startsWith(repoDir + path.sep) && workflowPath !== repoDir) {
121
- throw new Error(
122
- `Workflow "${entry.name}" source resolves outside the marketplace repo: ${workflowPath}`
123
- );
124
- }
125
- const resolvedWorkflowPath = preferCanonicalWorkflowPath(
126
- repoDir,
127
- workflowPath
128
- );
129
- if (!fs.existsSync(resolvedWorkflowPath)) {
130
- throw new Error(`Workflow source not found: ${resolvedWorkflowPath}`);
131
- }
132
- return resolvedWorkflowPath;
133
- }
134
- function resolveWorkflowPathFromManifest(workflowName, repoDir, manifestPath) {
135
- const manifest = readManifest(manifestPath);
136
- const workflows = manifest.workflows ?? [];
137
- const entry = workflows.find((w) => w.name === workflowName);
138
- if (!entry) {
139
- const available = workflows.map((w) => w.name).join(", ") || "(none)";
140
- throw new Error(
141
- `Workflow "${workflowName}" not found in marketplace manifest ${manifestPath}. Available workflows: ${available}`
142
- );
143
- }
144
- return resolveWorkflowEntryPath(entry, manifest, repoDir);
145
- }
146
- function listWorkflowEntriesFromManifest(repoDir, manifestPath, source) {
147
- const manifest = readManifest(manifestPath);
148
- const workflows = manifest.workflows ?? [];
149
- return workflows.filter(
150
- (entry) => typeof entry.source === "string"
151
- ).map((entry) => ({
152
- name: entry.name,
153
- description: entry.description,
154
- version: entry.version,
155
- workflowPath: resolveWorkflowEntryPath(entry, manifest, repoDir),
156
- ref: source.kind === "remote" ? `${entry.name}@${source.owner}/${source.repo}` : void 0,
157
- source
158
- }));
159
- }
160
- function preferCanonicalWorkflowPath(repoDir, workflowPath) {
161
- const relativePath = path.relative(repoDir, workflowPath);
162
- const segments = relativePath.split(path.sep);
163
- if (segments[0] !== ".workflows") {
164
- return workflowPath;
165
- }
166
- const canonicalPath = path.join(repoDir, "workflows", ...segments.slice(1));
167
- return fs.existsSync(canonicalPath) ? canonicalPath : workflowPath;
168
- }
169
99
  function isMarketplaceRef(entry) {
170
100
  return MARKETPLACE_REF_RE.test(entry);
171
101
  }
@@ -326,8 +256,86 @@ var WorkflowNotFoundError = class extends Error {
326
256
  this.searchedSources = searchedSources;
327
257
  }
328
258
  };
259
+ var WorkflowVersionNotFoundError = class extends Error {
260
+ workflowName;
261
+ requestedVersion;
262
+ availableVersion;
263
+ sourceLabel;
264
+ constructor(workflowName, requestedVersion, availableVersion, sourceLabel) {
265
+ const availableText = availableVersion ? `found version ${availableVersion}` : "marketplace entry does not declare a version";
266
+ super(
267
+ `Workflow "${workflowName}" version ${requestedVersion} not found in ${sourceLabel} (${availableText}).`
268
+ );
269
+ this.name = "WorkflowVersionNotFoundError";
270
+ this.workflowName = workflowName;
271
+ this.requestedVersion = requestedVersion;
272
+ this.availableVersion = availableVersion;
273
+ this.sourceLabel = sourceLabel;
274
+ }
275
+ };
329
276
 
330
- // src/infra/plugins/workflowSourceResolution.ts
277
+ // src/infra/plugins/workflowResolver.ts
278
+ function resolveWorkflowManifestPath(repoDir) {
279
+ const preferred = path2.join(repoDir, ".athena-workflow", "marketplace.json");
280
+ const legacy = path2.join(repoDir, ".claude-plugin", "marketplace.json");
281
+ return fs2.existsSync(preferred) ? preferred : legacy;
282
+ }
283
+ function preferCanonicalWorkflowPath(repoDir, workflowPath) {
284
+ const relativePath = path2.relative(repoDir, workflowPath);
285
+ const segments = relativePath.split(path2.sep);
286
+ if (segments[0] !== ".workflows") return workflowPath;
287
+ const canonical = path2.join(repoDir, "workflows", ...segments.slice(1));
288
+ return fs2.existsSync(canonical) ? canonical : workflowPath;
289
+ }
290
+ function resolveWorkflowEntryPath(entry, manifest, repoDir) {
291
+ if (typeof entry.source !== "string") {
292
+ throw new Error(
293
+ `Workflow "${entry.name}" uses a remote source type which is not supported.`
294
+ );
295
+ }
296
+ let sourcePath = entry.source;
297
+ const { workflowRoot } = manifest.metadata ?? {};
298
+ if (workflowRoot && !path2.isAbsolute(sourcePath) && !sourcePath.startsWith("./") && !sourcePath.startsWith("../")) {
299
+ sourcePath = path2.join(workflowRoot, sourcePath);
300
+ }
301
+ const workflowPath = path2.resolve(repoDir, sourcePath);
302
+ if (!workflowPath.startsWith(repoDir + path2.sep) && workflowPath !== repoDir) {
303
+ throw new Error(
304
+ `Workflow "${entry.name}" source resolves outside the marketplace repo: ${workflowPath}`
305
+ );
306
+ }
307
+ const resolved = preferCanonicalWorkflowPath(repoDir, workflowPath);
308
+ if (!fs2.existsSync(resolved)) {
309
+ throw new Error(`Workflow source not found: ${resolved}`);
310
+ }
311
+ return resolved;
312
+ }
313
+ function resolveWorkflowPathFromManifest(workflowName, repoDir, manifestPath) {
314
+ const manifest = readManifest(manifestPath);
315
+ const workflows = manifest.workflows ?? [];
316
+ const entry = workflows.find((w) => w.name === workflowName);
317
+ if (!entry) {
318
+ const available = workflows.map((w) => w.name).join(", ") || "(none)";
319
+ throw new Error(
320
+ `Workflow "${workflowName}" not found in marketplace manifest ${manifestPath}. Available workflows: ${available}`
321
+ );
322
+ }
323
+ return resolveWorkflowEntryPath(entry, manifest, repoDir);
324
+ }
325
+ function listWorkflowEntriesFromManifest(repoDir, manifestPath, source) {
326
+ const manifest = readManifest(manifestPath);
327
+ const workflows = manifest.workflows ?? [];
328
+ return workflows.filter(
329
+ (entry) => typeof entry.source === "string"
330
+ ).map((entry) => ({
331
+ name: entry.name,
332
+ description: entry.description,
333
+ version: entry.version,
334
+ workflowPath: resolveWorkflowEntryPath(entry, manifest, repoDir),
335
+ ref: source.kind === "remote" ? `${entry.name}@${source.owner}/${source.repo}` : void 0,
336
+ source
337
+ }));
338
+ }
331
339
  function findMarketplaceRepoDir(startPath) {
332
340
  let currentDir = path2.resolve(startPath);
333
341
  for (; ; ) {
@@ -335,9 +343,7 @@ function findMarketplaceRepoDir(startPath) {
335
343
  return currentDir;
336
344
  }
337
345
  const parentDir = path2.dirname(currentDir);
338
- if (parentDir === currentDir) {
339
- return void 0;
340
- }
346
+ if (parentDir === currentDir) return void 0;
341
347
  currentDir = parentDir;
342
348
  }
343
349
  }
@@ -359,11 +365,7 @@ function resolveWorkflowMarketplaceSource(source) {
359
365
  `Local marketplace not found from source: ${trimmed}. Expected a marketplace repo root or a path inside one.`
360
366
  );
361
367
  }
362
- return {
363
- kind: "local",
364
- path: resolvedPath,
365
- repoDir
366
- };
368
+ return { kind: "local", path: resolvedPath, repoDir };
367
369
  }
368
370
  function listMarketplaceWorkflows(owner, repo) {
369
371
  requireGitForMarketplace("workflows");
@@ -391,47 +393,11 @@ function resolveMarketplaceWorkflow(ref) {
391
393
  resolveWorkflowManifestPath(repoDir)
392
394
  );
393
395
  }
394
- var WorkflowVersionNotFoundError = class extends Error {
395
- workflowName;
396
- requestedVersion;
397
- availableVersion;
398
- sourceLabel;
399
- constructor(workflowName, requestedVersion, availableVersion, sourceLabel) {
400
- const availableText = availableVersion ? `found version ${availableVersion}` : "marketplace entry does not declare a version";
401
- super(
402
- `Workflow "${workflowName}" version ${requestedVersion} not found in ${sourceLabel} (${availableText}).`
403
- );
404
- this.name = "WorkflowVersionNotFoundError";
405
- this.workflowName = workflowName;
406
- this.requestedVersion = requestedVersion;
407
- this.availableVersion = availableVersion;
408
- this.sourceLabel = sourceLabel;
409
- }
410
- };
411
- function parseBareWorkflowName(source) {
412
- const atIdx = source.indexOf("@");
413
- if (atIdx <= 0 || atIdx === source.length - 1) {
414
- return { bareName: source, pinnedVersion: void 0 };
415
- }
416
- const suffix = source.slice(atIdx + 1);
417
- if (suffix.includes("/")) {
418
- return { bareName: source, pinnedVersion: void 0 };
419
- }
420
- return {
421
- bareName: source.slice(0, atIdx),
422
- pinnedVersion: suffix
423
- };
424
- }
425
396
  function gatherMarketplaceWorkflowSources(source) {
426
397
  const trimmed = source.trim();
427
398
  const resolvedPath = path2.resolve(trimmed);
428
399
  if (fs2.existsSync(resolvedPath) && fs2.statSync(resolvedPath).isFile()) {
429
- return [
430
- {
431
- kind: "filesystem",
432
- workflowPath: fs2.realpathSync(resolvedPath)
433
- }
434
- ];
400
+ return [{ kind: "filesystem", workflowPath: fs2.realpathSync(resolvedPath) }];
435
401
  }
436
402
  if (!fs2.existsSync(resolvedPath) && isMarketplaceSlug(trimmed)) {
437
403
  const slashIdx = trimmed.indexOf("/");
@@ -477,6 +443,17 @@ function gatherMarketplaceWorkflowSources(source) {
477
443
  workflowPath: entry.workflowPath
478
444
  }));
479
445
  }
446
+ function parseBareWorkflowName(source) {
447
+ const atIdx = source.indexOf("@");
448
+ if (atIdx <= 0 || atIdx === source.length - 1) {
449
+ return { bareName: source, pinnedVersion: void 0 };
450
+ }
451
+ const suffix = source.slice(atIdx + 1);
452
+ if (suffix.includes("/")) {
453
+ return { bareName: source, pinnedVersion: void 0 };
454
+ }
455
+ return { bareName: source.slice(0, atIdx), pinnedVersion: suffix };
456
+ }
480
457
  function resolvedSourceLabel(s) {
481
458
  if (s.kind === "marketplace-remote") return `marketplace ${s.slug}`;
482
459
  if (s.kind === "marketplace-local") return `local marketplace ${s.repoDir}`;
@@ -1570,7 +1547,7 @@ import fs10 from "fs";
1570
1547
  import path8 from "path";
1571
1548
 
1572
1549
  // src/core/workflows/stateMachine.md
1573
- var stateMachine_default = "# Stateless Session Protocol\n\nYou operate in stateless sessions managed by a workflow runner. Each session is a fresh process with no memory of prior sessions. The **tracker file** is your only continuity \u2014 it's how you talk to your future self.\n\n## Execution Model\n\nThe runner spawns `claude -p` sessions in a loop:\n\n- **Session 1**: You receive the user's original request.\n- **Sessions 2+**: You receive a continuation prompt directing you to read the tracker.\n- **Between sessions**: The runner inspects the tracker for terminal markers. If found, or if the max iteration cap is reached, the loop ends. The tracker is preserved for resume, audit, and debugging.\n\n### Terminal Markers\n\nBy default, workflows use these tracker markers:\n\n- `<!-- WORKFLOW_COMPLETE -->`\n- `<!-- WORKFLOW_BLOCKED -->`\n- `<!-- WORKFLOW_BLOCKED: reason -->`\n\nWorkflows may override the default marker strings via configuration. Use the markers configured for the current workflow.\n\nRules:\n\n- Only the last non-empty line of the tracker is treated as authoritative\n- Marker-like text earlier in the tracker, including notes, examples, or quoted instructions, is ignored\n- Write `WORKFLOW_COMPLETE` only when the workflow's completion criteria have been fully verified\n- Write `WORKFLOW_BLOCKED` only when progress cannot continue in the current workflow without external intervention or a workflow-defined stop condition has been reached\n- Include a concrete reason after the colon whenever possible, but `<!-- WORKFLOW_BLOCKED -->` without a reason is still valid\n\n### Tracker Path\n\nBy default, the tracker file lives at `.athena/<session_id>/tracker.md` in the project root, where `<session_id>` is the current Athena session ID. This session-scoped path allows multiple workflows to run concurrently and makes resume reliable. The runner provides the session ID \u2014 do not generate one yourself.\n\nWorkflows may override the default tracker path via configuration. Read and write the tracker at the configured path for the current workflow.\n\n**Assume interruption.** Your context window can reset at any moment \u2014 the runner may kill a session that's taking too long, or you may hit token limits mid-task. Any progress not written to the tracker is gone. This isn't a theoretical risk; it's the normal operating mode.\n\n## Session Protocol\n\nEvery session follows four phases: **Read**, **Orient**, **Execute**, **End**.\n\n### Phase 1 \u2014 Read the Tracker\n\nRead the tracker file at the configured tracker path for the current workflow. By default this is `.athena/<session_id>/tracker.md`.\n\n- **Contains `<!-- TRACKER_SKELETON -->`**: This is session 1. The runner created a skeleton tracker with the goal and session metadata. Proceed to Phase 2 (Orient) \u2014 replace the skeleton with a real tracker. **You must do this even if the entire request can be satisfied in a single turn.** Write a minimal tracker (what was asked, what was done, the outcome) and then append `<!-- WORKFLOW_COMPLETE -->`. Leaving the skeleton in place causes the runner to classify the session as a failure.\n- **Otherwise**: This is a continuation session. The tracker contains everything prior sessions learned and decided. Skip to Phase 3 (Execute) using the tracker's context.\n\nWhy read first: without the tracker, you'll duplicate work already done or contradict decisions made in prior sessions. The tracker is the single source of truth across sessions.\n\n### Phase 2 \u2014 Orient (Session 1 Only)\n\n#### 2a. Create the tracker immediately\n\nWrite a skeleton tracker as your first write operation, before doing any domain work. Even a minimal tracker with just the goal and \"orientation in progress\" provides continuity if the session is interrupted during setup.\n\nThe tracker must always answer four questions for any future session:\n\n1. What are we trying to accomplish?\n2. What has been completed so far?\n3. What work is left?\n4. What should the next session do first?\n\nThese answers are the contract between sessions. The exact section headings may vary by workflow, but the tracker must make all four answers explicit and easy to find. A future session reading this tracker has no other context \u2014 if something isn't here, it doesn't exist.\n\n#### 2b. Workflow-specific orientation\n\nExecute the orientation steps defined by the workflow. These vary by domain \u2014 a test-writing workflow explores the product in a browser; a migration workflow audits the database schema. The workflow defines what orientation means.\n\n#### 2c. Create a task plan\n\nRefine the skeleton tracker into granular, verifiable checkpoints based on what orientation revealed. Each task should be a concrete unit of progress, not a vague phase. Include verification steps (running checks, reviewing output), not just implementation. Vague tasks like \"write tests\" can't be meaningfully resumed by a future session that has no idea what \"write tests\" means in this context.\n\n#### 2d. Update the tracker\n\nAfter orientation, ensure the tracker captures: the goal, what was discovered, what's planned, and what the next session should do first. Record concrete observations \u2014 what you actually saw, not what you assumed. Assumptions that turn out wrong waste entire future sessions on rework.\n\n### Phase 3 \u2014 Execute\n\nWork through tasks, advancing the plan step by step.\n\n#### Load skills before acting\n\nIf the workflow defines a skill table, load the relevant skill before each activity. Skills carry implementation details \u2014 scaffolding steps, authentication strategies, locator rules, anti-patterns, code templates \u2014 that would otherwise be lost between sessions. This prompt defines the protocol; skills define how to execute each step.\n\n#### Follow the workflow's sequence\n\nExecute in the order the workflow prescribes. Not every session covers all steps \u2014 pick up where the tracker says rather than restarting the flow.\n\n#### Delegate heavy work\n\nUse subagents via the Task tool to offload heavy exploration or generation, preserving your main context for orchestration. Pass file paths, conventions, and concrete output expectations. Instruct subagents to load the appropriate skill.\n\nRespect the workflow's **delegation constraints** \u2014 some operations must run in the main agent because their output serves as proof or because the main agent needs to interpret results in context.\n\n#### Execute quality gates\n\nIf the workflow defines quality gates, execute them in order. Do not skip gates \u2014 they exist because prior experience showed that skipping them leads to cascading rework. If a gate returns a failing verdict, address the issues and re-run the gate before proceeding.\n\nRespect the workflow's **retry limits** for failing steps. Repeated failures usually signal a deeper issue that another retry won't fix.\n\n#### Update the tracker as you work\n\nTreat tracker updates as defensive checkpoints against three failure modes: the runner killing your session, your context collapsing under tool-output load, and you simply forgetting an hour from now what you just learned. The right cadence sits between \"every tool call\" (noisy, wastes tokens, turns the tracker into a log) and \"at the end of the session\" (everything is lost if you die mid-task).\n\nUpdate the tracker whenever any of the following happens \u2014 these are the checkpoints, not \"felt like a good moment\":\n\n- **You finished a discrete unit of work.** A file written, a fix applied, a test run, a quality gate passed. The tracker should reflect the new reality before you start the next unit, not after several units have piled up.\n- **You learned something a future session can't cheaply rederive.** An API quirk, a config field that turned out to matter, a dead end you've now ruled out, a decision between two approaches. Insights are tracker-worthy even when no code changed \u2014 losing them costs the next session a full re-exploration. The tracker is your knowledge ledger, not just a task log.\n- **You're about to do something risky or long-running.** Dispatching a subagent, kicking off a long build, calling a flaky external service, starting a large refactor. If that operation kills your session, only what's already in the tracker survives. Write first, then act.\n- **Your plan changed.** A task got resequenced, a new task surfaced, a planned task turned out to be unnecessary. Stale plans poison continuation sessions \u2014 the tracker must reflect what you'll actually do next, not what you thought five steps ago.\n- **You've been working a while without writing.** If you can't remember when you last touched the tracker, you've gone too long. A short defensive update (\"currently doing X, last completed Y, next is Z\") beats nothing.\n\nWhat an update contains depends on the trigger, but always cover: what changed (work or knowledge), what's now next, and any caveat the next session needs to know. Avoid transcribing tool calls \u2014 the tracker is a contract with your future self, not a replay log.\n\nThe cost of one extra tracker update is a few tokens. The cost of dying without one is a whole wasted session rediscovering what you already knew. Bias toward writing.\n\n#### Task visibility\n\nThe tracker contains the authoritative task plan \u2014 it persists across sessions. Your harness's task UI is only a live projection of that plan, visible to the user in their CLI widget. It is session-scoped and does not survive process exit.\n\n**The relationship:** tracker is the source of truth, task tools are the display.\n\n{{TASK_TOOL_INSTRUCTIONS}}\n\n- **Session 1 (Orient):** After creating the task plan in the tracker, project each task into the task management tools so the user can see progress in real time.\n- **Session 2+ (Resume):** After reading the tracker, recreate the task projection from the tracker's plan. Set statuses to match what the tracker says is done, remaining, and next. The user sees consistent progress across sessions.\n- **During work:** Update both \u2014 the task tools for immediate UI feedback, the tracker for persistence. When a task completes, mark it done in the task tools and record it in the tracker in the same working phase.\n\nThis gives the user a consistent view of progress in their CLI regardless of which session they're in, while the tracker remains the durable contract between sessions.\n\n### Phase 4 \u2014 End of Session\n\n1. Ensure the tracker reflects all progress, discoveries, and blockers.\n2. Write clear instructions for what the next session should do first.\n3. If all work is complete and verified: write `<!-- WORKFLOW_COMPLETE -->` at the end of the tracker.\n4. If an unrecoverable blocker prevents progress: write `<!-- WORKFLOW_BLOCKED -->` or `<!-- WORKFLOW_BLOCKED: reason -->` at the end of the tracker.\n\nDo not write terminal markers prematurely. The runner trusts markers unconditionally \u2014 a premature marker kills the loop before work is done, and there's no automatic recovery.\n\n## Session Bounding\n\nEach fresh session starts with a clean context window and a compact tracker \u2014 effectively a self-compaction. As you work, your context fills with tool outputs, exploration results, and intermediate state. The longer you run, the more attention is spread across tokens that are no longer relevant, degrading your precision on the work that matters now.\n\nWork on a bounded chunk per session. Ending early and letting the next session pick up from a clean tracker is almost always better than pushing through with a heavy context.\n\nHeuristics for when to checkpoint and end:\n\n- After completing a quality gate \u2014 natural boundary\n- After crossing multiple phases (e.g., explored + planned + wrote specs) \u2014 stop before pushing into the next\n- When you notice your context is heavy with tool outputs from earlier work\n\n## Guardrails\n\nQuick-reference checklist \u2014 each of these is explained in detail above:\n\n- Read the tracker before doing anything else\n- Replace the skeleton immediately \u2014 even for simple requests. Write minimal tracker content (what was asked, what was done, outcome) and then the terminal marker before the session ends.\n- Update the tracker on concrete triggers (unit of work done, insight learned, risky operation pending, plan changed) \u2014 not on a vague sense of \"meaningful progress\"\n- Project the tracker's task plan into task management tools at session start\n- Update both task tools and tracker as milestones complete\n- Load the relevant skill before each activity\n- Do not write the completion marker until all work is verified\n- Respect the workflow's delegation constraints and retry limits\n";
1550
+ var stateMachine_default = "# Stateless Session Protocol\n\nYou run in a stateless loop. Each session is a fresh process with no memory of prior sessions. **The tracker file is your only continuity** \u2014 read it, work, write it. Assume interruption: the runner may kill a long session, your context may collapse under tool output, you may hit token limits mid-task. Anything not in the tracker is gone.\n\n## First action, every session\n\n1. Read the tracker at the configured path (default: `.athena/<session_id>/tracker.md`). The runner provides the session ID \u2014 do not invent one.\n2. If the tracker contains `<!-- TRACKER_SKELETON -->` \u2192 this is session 1, run [**Orient**](#orient-session-1).\n3. Otherwise \u2192 this is a continuation, run [**Execute**](#execute-session-2) from where the tracker says, not from the start of the flow.\n\nReading first prevents two failure modes that waste whole sessions: redoing work already done, or contradicting decisions a prior session made.\n\n## Tracker contract\n\nThe tracker must always answer four questions:\n\n1. What are we trying to accomplish?\n2. What has been done?\n3. What's left?\n4. What should the next session do first?\n\nA future session has no other context. If something isn't here, it doesn't exist. Section headings may vary by workflow, but these four answers must be explicit and easy to find.\n\n### Terminal markers\n\nDefault markers (workflows may override \u2014 use the markers configured for the active workflow):\n\n- `<!-- WORKFLOW_COMPLETE -->` \u2014 all work done and verified\n- `<!-- WORKFLOW_BLOCKED -->` or `<!-- WORKFLOW_BLOCKED: reason -->` \u2014 cannot proceed without external intervention\n\nRules:\n\n- Only the last non-empty line of the tracker is authoritative. Marker-like text in notes, examples, or quoted instructions earlier in the file is ignored.\n- The runner trusts markers unconditionally. A premature marker ends the loop with no automatic recovery \u2014 write one only when its criteria are fully met.\n- Include a concrete reason after `WORKFLOW_BLOCKED:` whenever possible; the bare form is still valid.\n\n## Phases\n\n### Orient (session 1)\n\n1. **Replace the skeleton immediately**, before any domain work. Even a three-line tracker (goal + \"orienting\") protects you if the session dies during setup.\n2. Run the workflow's orientation steps. These vary by domain \u2014 a test-writing workflow explores the product in a browser; a migration workflow audits the schema. The workflow defines what orientation means.\n3. Refine the tracker into a granular plan. Each task a concrete, verifiable unit of work, including verification steps (running checks, reviewing output) \u2014 not just implementation. Vague tasks (\"write tests\") cannot be meaningfully resumed by a future session that has no idea what they mean here.\n4. Record concrete observations \u2014 what you actually saw, not what you assumed. Wrong assumptions burn entire future sessions on rework.\n5. **Single-turn requests still go through this phase.** If the entire request is satisfied in one turn, write a minimal tracker (what was asked, what was done, the outcome) and append `<!-- WORKFLOW_COMPLETE -->`. Leaving the skeleton in place causes the runner to classify the session as a failure.\n\n### Execute (session 2+)\n\n- Work from where the tracker says, in the workflow's prescribed sequence. Not every session covers every step.\n- If the workflow defines a skill table, **load the relevant skill before each activity**. Skills carry the implementation detail (scaffolding steps, locator rules, anti-patterns, code templates) that this protocol intentionally doesn't repeat.\n- Delegate heavy exploration or generation to subagents via the Task tool. Pass file paths, conventions, and concrete output expectations; tell them which skill to load. Respect the workflow's **delegation constraints** \u2014 some operations must run in the main agent because their output is proof, or because the main agent needs to interpret results in context.\n- Run quality gates in order. Do not skip \u2014 they exist because skipping cascades into rework. On a failing verdict, address the issues and re-run before proceeding. Respect the workflow's **retry limits**: repeated failure usually signals a deeper issue another retry won't fix.\n\n### End\n\n1. Tracker reflects all progress, discoveries, and blockers.\n2. Tracker says clearly what the next session should do first.\n3. If all work is verified: append the completion marker.\n4. If an unrecoverable blocker prevents progress: append the blocked marker, with a reason if you have one.\n\n## When to write the tracker\n\nWrite on **concrete triggers**, not on a vague sense of \"meaningful progress.\" The right cadence sits between every-tool-call (noisy log, wastes tokens) and end-of-session (everything lost if you die mid-task).\n\n- **Discrete unit done** \u2014 file written, fix applied, test run, gate passed. Reflect the new reality before starting the next unit.\n- **Insight learned** \u2014 API quirk, config field that turned out to matter, dead end ruled out, decision between two approaches. Insights are tracker-worthy even when no code changed; rediscovering them costs the next session a full re-exploration. The tracker is a knowledge ledger, not just a task log.\n- **About to do something risky or long-running** \u2014 subagent dispatch, long build, flaky external call, large refactor. Write _first_, then act. If the operation kills your session, only what's on disk survives.\n- **Plan changed** \u2014 task resequenced, new task surfaced, planned task no longer needed. Stale plans poison continuation sessions.\n- **You haven't written in a while** \u2014 if you can't remember the last update, you've gone too long. A short defensive update (\"doing X, last completed Y, next is Z\") beats nothing.\n\nEach update covers: what changed (work or knowledge), what's now next, and any caveat the next session needs. Don't transcribe tool calls \u2014 the tracker is a contract with your future self, not a replay log.\n\nThe cost of one extra tracker update is a few tokens. The cost of dying without one is a whole wasted session. Bias toward writing.\n\n## Task UI projection\n\nThe tracker is the durable source of truth. Your harness's task tools are a session-scoped UI projection of the same plan, shown to the user in their CLI widget. They do not survive process exit.\n\n{{TASK_TOOL_INSTRUCTIONS}}\n\n- **Session 1, after orientation:** project the tracker's task plan into the task tools.\n- **Session 2+, after reading the tracker:** recreate the projection from the tracker; do not assume task IDs from prior sessions still exist.\n- **During work:** update both \u2014 the task tools for immediate UI feedback, the tracker for persistence \u2014 in the same working phase.\n\n## Session bounding\n\nEach fresh session starts with a clean context window and a compact tracker \u2014 effectively self-compaction. As you work, context fills with tool outputs and intermediate state. The longer you run, the more attention is spread across tokens that are no longer relevant, degrading precision on the work that matters now.\n\nWork a bounded chunk per session. Ending early and letting the next session pick up from a clean tracker is almost always better than pushing through with a heavy context. Natural checkpoints:\n\n- After a quality gate\n- After crossing multiple phases (explored \u2192 planned \u2192 wrote specs) \u2014 stop before pushing into the next\n- When your context is visibly heavy with tool output from earlier work\n\n## Quick reference\n\n- [ ] Read the tracker before doing anything else\n- [ ] Replace the skeleton immediately, even for single-turn requests\n- [ ] Update on concrete triggers \u2014 unit done, insight learned, risky op pending, plan changed\n- [ ] Project the tracker plan into task tools at session start; keep both in sync as work lands\n- [ ] Load the workflow's skill before each activity\n- [ ] Run quality gates in order; respect delegation constraints and retry limits\n- [ ] Write the completion marker only when all work is verified\n- [ ] Checkpoint and end before context goes stale\n";
1574
1551
 
1575
1552
  // src/core/workflows/stateMachine.ts
1576
1553
  function buildTaskToolInstructions(harness) {
@@ -2118,4 +2095,4 @@ export {
2118
2095
  compileWorkflowPlan,
2119
2096
  collectMcpServersWithOptions
2120
2097
  };
2121
- //# sourceMappingURL=chunk-GE6PPB6Z.js.map
2098
+ //# sourceMappingURL=chunk-5VK2ZMVV.js.map