npm - opencastle - Versions diffs - 0.23.1 → 0.24.0 - Mend

opencastle 0.23.1 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/dist/cli/convoy/engine.d.ts +1 -0
package/dist/cli/convoy/engine.d.ts.map +1 -1
package/dist/cli/convoy/engine.js +72 -22
package/dist/cli/convoy/engine.js.map +1 -1
package/dist/cli/convoy/engine.test.js +205 -0
package/dist/cli/convoy/engine.test.js.map +1 -1
package/dist/cli/dashboard.d.ts.map +1 -1
package/dist/cli/dashboard.js +5 -4
package/dist/cli/dashboard.js.map +1 -1
package/dist/cli/run/adapters/claude.d.ts +6 -0
package/dist/cli/run/adapters/claude.d.ts.map +1 -0
package/dist/cli/run/adapters/claude.js +211 -0
package/dist/cli/run/adapters/claude.js.map +1 -0
package/dist/cli/run/adapters/copilot.d.ts +0 -18
package/dist/cli/run/adapters/copilot.d.ts.map +1 -1
package/dist/cli/run/adapters/copilot.js +123 -38
package/dist/cli/run/adapters/copilot.js.map +1 -1
package/dist/cli/run/adapters/index.js +2 -2
package/dist/cli/run/adapters/index.js.map +1 -1
package/dist/cli/run/schema.d.ts.map +1 -1
package/dist/cli/run/schema.js +8 -0
package/dist/cli/run/schema.js.map +1 -1
package/dist/cli/run/schema.test.js +41 -0
package/dist/cli/run/schema.test.js.map +1 -1
package/dist/cli/run.d.ts.map +1 -1
package/dist/cli/run.js +21 -9
package/dist/cli/run.js.map +1 -1
package/dist/cli/types.d.ts +2 -0
package/dist/cli/types.d.ts.map +1 -1
package/package.json +9 -1
package/src/cli/convoy/engine.test.ts +240 -0
package/src/cli/convoy/engine.ts +80 -23
package/src/cli/dashboard.ts +6 -5
package/src/cli/run/adapters/claude.ts +238 -0
package/src/cli/run/adapters/copilot.ts +125 -47
package/src/cli/run/adapters/index.ts +2 -2
package/src/cli/run/adapters/vendor.d.ts +2 -0
package/src/cli/run/schema.test.ts +51 -0
package/src/cli/run/schema.ts +10 -0
package/src/cli/run.ts +23 -11
package/src/cli/types.ts +2 -0
package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
package/src/orchestrator/agents/team-lead.agent.md +6 -6
package/src/orchestrator/prompts/bug-fix.prompt.md +6 -2
package/src/orchestrator/prompts/generate-convoy.prompt.md +3 -3
package/src/orchestrator/prompts/implement-feature.prompt.md +8 -19
package/dist/cli/run/adapters/claude-code.d.ts +0 -16
package/dist/cli/run/adapters/claude-code.d.ts.map +0 -1
package/dist/cli/run/adapters/claude-code.js +0 -95
package/dist/cli/run/adapters/claude-code.js.map +0 -1
package/src/cli/run/adapters/claude-code.ts +0 -107

package/src/cli/run/adapters/copilot.ts CHANGED Viewed

@@ -1,29 +1,27 @@
 import { spawn } from 'node:child_process'
 import type { CopilotClient as CopilotClientType, CopilotSession, PermissionHandler } from '@github/copilot-sdk'
 import { parseTimeout } from '../schema.js'
-import type { Task, ExecuteOptions, ExecuteResult } from '../../types.js'
+import type { Task, ExecuteOptions, ExecuteResult, TokenUsage } from '../../types.js'
-/** Adapter name */
+// Adapter name
 export const name = 'copilot'
-/**
- * Lazy-initialized shared client instance.
- * The client manages a single Copilot CLI server process; all task sessions
- * multiplex over it via JSON-RPC.
- */
-let clientPromise: Promise<CopilotClientType> | null = null
-/** Cached permission handler from the SDK module. */
-let cachedApproveAll: PermissionHandler | null = null
+// --- Unified adapter: SDK first, fallback to CLI ---
+let mode: 'sdk' | 'cli' | null = null
-/** Active sessions keyed by task id — used by `kill()` for timeout enforcement. */
-const activeSessions = new Map<string, CopilotSession>()
+// SDK check
+async function sdkAvailable(): Promise<boolean> {
+  try {
+    await import('@github/copilot-sdk')
+    return true
+  } catch {
+    return false
+  }
+}
-/**
- * Check if the `copilot` CLI is available on the system PATH.
- * The SDK communicates with the CLI in server mode, so it must be installed.
- */
-export async function isAvailable(): Promise<boolean> {
+// CLI check
+async function cliAvailable(): Promise<boolean> {
   return new Promise((resolve) => {
     const proc = spawn('which', ['copilot'], { stdio: 'pipe' })
     proc.on('close', (code) => resolve(code === 0))
@@ -31,10 +29,23 @@ export async function isAvailable(): Promise<boolean> {
   })
 }
-/**
- * Get or create the shared CopilotClient.
- * The client is started once and reused across all task executions.
- */
+export async function isAvailable(): Promise<boolean> {
+  if (await sdkAvailable()) {
+    mode = 'sdk'
+    return true
+  }
+  if (await cliAvailable()) {
+    mode = 'cli'
+    return true
+  }
+  return false
+}
+// --- SDK implementation (existing logic) ---
+let clientPromise: Promise<CopilotClientType> | null = null
+let cachedApproveAll: PermissionHandler | null = null
+const activeSessions = new Map<string, CopilotSession>()
 async function getClient(): Promise<CopilotClientType> {
   if (!clientPromise) {
     clientPromise = (async () => {
@@ -51,29 +62,16 @@ async function getClient(): Promise<CopilotClientType> {
   return clientPromise
 }
-/**
- * Execute a task using the Copilot SDK.
- *
- * Each task gets its own session with:
- *   - All tool permissions auto-approved (equivalent to `--allow-all-tools`)
- *   - No `ask_user` tool (autonomous — equivalent to `--no-ask-user`)
- *   - System message injected with the agent role
- *   - Streaming enabled in verbose mode for live output
- */
-export async function execute(task: Task, options: ExecuteOptions = {}): Promise<ExecuteResult> {
+async function executeViaSdk(task: Task, options: ExecuteOptions = {}): Promise<ExecuteResult> {
   // NOTE: The Copilot SDK CopilotClient is a shared singleton. Per-task cwd
   // isolation requires SDK support for per-session workingDirectory, which is
   // not yet available. When running in convoy mode with worktrees, prefer
-  // subprocess-based adapters (claude-code, cursor) that support options.cwd
-  // natively. Copilot SDK per-session cwd support is tracked for Phase 3.
+  // subprocess-based adapters (cli mode) that support options.cwd natively.
   let prompt = `You are a ${task.agent}. ${task.prompt}`
   if (task.files && task.files.length > 0) {
     prompt += `\n\nOnly modify files under: ${task.files.join(', ')}`
   }
   const client = await getClient()
   const session = await client.createSession({
     onPermissionRequest: cachedApproveAll!,
     systemMessage: {
@@ -86,16 +84,12 @@ export async function execute(task: Task, options: ExecuteOptions = {}): Promise
     infiniteSessions: { enabled: false },
     ...(options.verbose ? { streaming: true } : {}),
   })
   activeSessions.set(task.id, session)
-  // Stream deltas to stdout in verbose mode
   if (options.verbose) {
     session.on('assistant.message_delta', (event: { data: { deltaContent: string } }) => {
       process.stdout.write(event.data.deltaContent)
     })
   }
   try {
     const timeoutMs = parseTimeout(task.timeout)
     const response = await session.sendAndWait({ prompt }, timeoutMs)
@@ -107,7 +101,6 @@ export async function execute(task: Task, options: ExecuteOptions = {}): Promise
       completion_tokens: u.completion_tokens ?? u.completionTokens,
       total_tokens: u.total_tokens ?? u.totalTokens,
     } : undefined
     return {
       success: true,
       output: output.slice(0, 10_000),
@@ -126,11 +119,7 @@ export async function execute(task: Task, options: ExecuteOptions = {}): Promise
   }
 }
-/**
- * Abort and destroy the session associated with a task.
- * Called by the executor when a task exceeds its timeout.
- */
-export function kill(task: Task): void {
+function killSdk(task: Task): void {
   const session = activeSessions.get(task.id)
   if (session) {
     session.abort().catch(() => {})
@@ -138,3 +127,92 @@ export function kill(task: Task): void {
     activeSessions.delete(task.id)
   }
 }
+// --- CLI implementation ---
+async function executeViaCli(task: Task, options: ExecuteOptions = {}): Promise<ExecuteResult> {
+  // CLI supports --output-format json, --max-turns, and respects cwd
+  let prompt = `You are a ${task.agent}. ${task.prompt}`
+  if (task.files && task.files.length > 0) {
+    prompt += `\n\nOnly modify files under: ${task.files.join(', ')}`
+  }
+  const args = [
+    '-p',
+    prompt,
+    '--output-format',
+    'json',
+    '--max-turns',
+    '50',
+  ]
+  return new Promise((resolve) => {
+    const proc = spawn('copilot', args, {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      env: { ...process.env },
+      cwd: options?.cwd ?? process.cwd(),
+    })
+    let stdout = ''
+    let stderr = ''
+    proc.stdout.on('data', (chunk: Buffer) => {
+      stdout += chunk.toString()
+      if (options.verbose) {
+        process.stdout.write(chunk)
+      }
+    })
+    proc.stderr.on('data', (chunk: Buffer) => {
+      stderr += chunk.toString()
+      if (options.verbose) {
+        process.stderr.write(chunk)
+      }
+    })
+    proc.on('close', (code) => {
+      const output = [stdout, stderr].filter(Boolean).join('\n')
+      let usage: TokenUsage | undefined
+      try {
+        const parsedJson = JSON.parse(stdout) as Record<string, unknown>
+        const u = parsedJson?.usage as Record<string, number> | undefined
+        if (u) {
+          const promptTokens = (u.input_tokens ?? u.prompt_tokens) as number | undefined
+          const completionTokens = (u.output_tokens ?? u.completion_tokens) as number | undefined
+          const total = ((promptTokens ?? 0) + (completionTokens ?? 0)) || undefined
+          usage = { prompt_tokens: promptTokens, completion_tokens: completionTokens, total_tokens: total }
+        }
+      } catch { /* not JSON or no usage — graceful degradation */ }
+      resolve({
+        success: code === 0,
+        output: output.slice(0, 10000),
+        exitCode: code ?? -1,
+        usage,
+      })
+    })
+    proc.on('error', (err) => {
+      resolve({
+        success: false,
+        output: `Failed to spawn copilot: ${err.message}`,
+        exitCode: -1,
+      })
+    })
+    task._process = proc
+  })
+}
+function killCli(task: Task): void {
+  if (task._process && !task._process.killed) {
+    task._process.kill('SIGTERM')
+    setTimeout(() => {
+      if (task._process && !task._process.killed) {
+        task._process.kill('SIGKILL')
+      }
+    }, 5000)
+  }
+}
+// --- Unified interface ---
+export async function execute(task: Task, options: ExecuteOptions = {}): Promise<ExecuteResult> {
+  if (!mode) await isAvailable()
+  if (mode === 'sdk') return executeViaSdk(task, options)
+  return executeViaCli(task, options)
+}
+export function kill(task: Task): void {
+  if (mode === 'sdk') killSdk(task)
+  else killCli(task)
+}

package/src/cli/run/adapters/index.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import type { AgentAdapter } from '../../types.js'
  * Adapter registry for agent runtimes.
  */
 const ADAPTERS: Record<string, () => Promise<AgentAdapter>> = {
-  'claude-code': () => import('./claude-code.js') as Promise<AgentAdapter>,
+  claude: () => import('./claude.js') as Promise<AgentAdapter>,
   copilot: () => import('./copilot.js') as Promise<AgentAdapter>,
   cursor: () => import('./cursor.js') as Promise<AgentAdapter>,
   opencode: () => import('./opencode.js') as Promise<AgentAdapter>,
@@ -29,7 +29,7 @@ export async function getAdapter(name: string): Promise<AgentAdapter> {
  * Detection priority order — checked first-to-last.
  * The first available adapter wins.
  */
-const DETECTION_ORDER = ['copilot', 'claude-code', 'cursor', 'opencode'] as const
+const DETECTION_ORDER = ['copilot', 'claude', 'cursor', 'opencode'] as const
 /**
  * Auto-detect which adapter CLI is available on the system.

package/src/cli/run/adapters/vendor.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ /** Ambient declaration for optional peer dependency — resolved at runtime when installed. */
2	+ declare module '@anthropic-ai/agent-sdk'

package/src/cli/run/schema.test.ts CHANGED Viewed

@@ -1160,3 +1160,54 @@ describe('applyDefaults — pipeline spec (version:2, no tasks)', () => {
     expect(spec.depends_on_convoy).toEqual(['phase-1', 'phase-2'])
   })
 })
+// ── validateSpec — gate_retries field ─────────────────────────
+describe('validateSpec — gate_retries field', () => {
+  const validSpec = {
+    name: 'test',
+    tasks: [{ id: 'a', prompt: 'do something' }],
+  }
+  it('accepts gate_retries as 0', () => {
+    const result = validateSpec({ ...validSpec, gate_retries: 0 })
+    expect(result.valid).toBe(true)
+  })
+  it('accepts gate_retries as a positive integer', () => {
+    const result = validateSpec({ ...validSpec, gate_retries: 3 })
+    expect(result.valid).toBe(true)
+  })
+  it('rejects gate_retries as negative', () => {
+    const result = validateSpec({ ...validSpec, gate_retries: -1 })
+    expect(result.valid).toBe(false)
+    expect(result.errors).toContainEqual(expect.stringContaining('gate_retries'))
+  })
+  it('rejects gate_retries as a float', () => {
+    const result = validateSpec({ ...validSpec, gate_retries: 1.5 })
+    expect(result.valid).toBe(false)
+    expect(result.errors).toContainEqual(expect.stringContaining('gate_retries'))
+  })
+  it('rejects gate_retries as a string', () => {
+    const result = validateSpec({ ...validSpec, gate_retries: 'two' })
+    expect(result.valid).toBe(false)
+    expect(result.errors).toContainEqual(expect.stringContaining('gate_retries'))
+  })
+})
+// ── applyDefaults — gate_retries default ───────────────────────
+describe('applyDefaults — gate_retries default', () => {
+  it('defaults gate_retries to 0', () => {
+    const spec = applyDefaults({ name: 'test', tasks: [{ id: 'a', prompt: 'p' }] })
+    expect(spec.gate_retries).toBe(0)
+  })
+  it('preserves explicit gate_retries value', () => {
+    const spec = applyDefaults({ name: 'test', tasks: [{ id: 'a', prompt: 'p' }], gate_retries: 2 })
+    expect(spec.gate_retries).toBe(2)
+  })
+})

package/src/cli/run/schema.ts CHANGED Viewed

@@ -42,6 +42,7 @@ interface RawSpec {
   version?: unknown
   defaults?: unknown
   gates?: unknown
+  gate_retries?: unknown
   branch?: unknown
   depends_on_convoy?: unknown
 }
@@ -154,6 +155,14 @@ export function validateSpec(spec: unknown): ValidationResult {
     }
   }
+  // gate_retries
+  if (s.gate_retries !== undefined) {
+    const gr = Number(s.gate_retries)
+    if (!Number.isInteger(gr) || gr < 0) {
+      errors.push('`gate_retries` must be a non-negative integer')
+    }
+  }
   // branch
   if (s.branch !== undefined && typeof s.branch !== 'string') {
     errors.push('`branch` must be a string')
@@ -319,6 +328,7 @@ export function applyDefaults(spec: Record<string, unknown>): TaskSpec {
   s.on_failure = (s.on_failure as string) || 'continue'
   // Leave adapter empty so run.ts can auto-detect the best available CLI
   s.adapter = (s.adapter as string) || ''
+  s.gate_retries = s.gate_retries !== undefined ? Number(s.gate_retries) : 0
   const tasks = (s.tasks as Array<Record<string, unknown>> | undefined) ?? []
   const d =

package/src/cli/run.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { parseTaskSpecText, isConvoySpec, isPipelineSpec } from './run/schema.js
 import { createExecutor, buildPhases } from './run/executor.js'
 import { getAdapter, detectAdapter } from './run/adapters/index.js'
 import { createReporter, printExecutionPlan } from './run/reporter.js'
+import { c } from './prompt.js'
 import type { CliContext, RunOptions } from './types.js'
 import type { ConvoyResult } from './convoy/engine.js'
 import type { PipelineResult } from './convoy/pipeline.js'
@@ -22,7 +23,7 @@ const HELP = `
   Version 1 specs use the Convoy Engine; legacy specs use the standard executor.
   Options:
-    --file, -f <path>        Task spec file (default: opencastle.tasks.yml)
+    --file, -f <path>        Task spec file
     --dry-run                Show execution plan without running
     --concurrency, -c <n>    Override max parallel tasks
     --adapter, -a <name>     Override agent runtime adapter
@@ -38,7 +39,7 @@ const HELP = `
  */
 function parseArgs(args: string[]): RunOptions {
   const opts: RunOptions = {
-    file: 'opencastle.tasks.yml',
+    file: 'convoy.yml',
     dryRun: false,
     concurrency: null,
     adapter: null,
@@ -121,7 +122,7 @@ function printAdapterError(detectionFailed: boolean, adapterName: string): void
     )
   } else {
     const hints: Record<string, string> = {
-      'claude-code':
+      'claude':
         '    Install: npm install -g @anthropic-ai/claude-code\n' +
         '    Docs:    https://docs.anthropic.com/en/docs/claude-code',
       copilot:
@@ -136,7 +137,7 @@ function printAdapterError(detectionFailed: boolean, adapterName: string): void
         '    Install OpenCode from https://opencode.ai\n' +
         '    Ensure the "opencode" command is on your PATH.',
     }
-    const cliName = adapterName === 'claude-code' ? 'claude' : adapterName === 'cursor' ? 'agent' : adapterName
+    const cliName = adapterName === 'cursor' ? 'agent' : adapterName
     const hint = hints[adapterName] ?? ''
     console.error(
       `  ✗ Adapter "${adapterName}" is not available.\n` +
@@ -153,10 +154,15 @@ function printConvoyResult(result: ConvoyResult): void {
   console.log(`\n  ──────────────────────────────────────`)
   console.log(`  Convoy ${result.status}: ${result.duration}`)
   console.log(
-    `  Done: ${result.summary.done} | Failed: ${result.summary.failed} | Skipped: ${result.summary.skipped} | Timed out: ${result.summary.timedOut}`
+    `  Tasks: ${result.summary.done}/${result.summary.total} done` +
+    (result.summary.failed > 0 ? ` | ${result.summary.failed} failed` : '') +
+    (result.summary.skipped > 0 ? ` | ${result.summary.skipped} skipped` : '') +
+    (result.summary.timedOut > 0 ? ` | ${result.summary.timedOut} timed out` : '')
   )
   if (result.gateResults) {
-    console.log(`  Gates:`)
+    const gatesPassed = result.gateResults.filter(g => g.passed).length
+    const gatesFailed = result.gateResults.filter(g => !g.passed).length
+    console.log(`  Gates: ${gatesPassed}/${result.gateResults.length} passed${gatesFailed > 0 ? ` | ${gatesFailed} failed` : ''}`)
     for (const g of result.gateResults) {
       console.log(`    ${g.passed ? '✓' : '✗'} ${g.command}`)
     }
@@ -307,7 +313,7 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
           console.log(`  ℹ Auto-detected adapter: ${detected}`)
         } else {
           resumePipelineDetectionFailed = true
-          resumePipelineSpec.adapter = 'claude-code'
+          resumePipelineSpec.adapter = 'claude'
         }
       }
@@ -359,7 +365,7 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
         console.log(`  ℹ Auto-detected adapter: ${detected}`)
       } else {
         resumeDetectionFailed = true
-        resumeSpec.adapter = 'claude-code'
+        resumeSpec.adapter = 'claude'
       }
     }
@@ -421,7 +427,7 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
       console.log(`  ℹ Auto-detected adapter: ${detected}`)
     } else {
       detectionFailed = true
-      spec.adapter = 'claude-code' // fallback for availability check below
+      spec.adapter = 'claude' // fallback for availability check below
     }
   }
@@ -466,7 +472,7 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
     if (spec.gates?.length) console.log(`  Gates: ${spec.gates.length} validation commands`)
     const { startDashboardServer } = await import('./dashboard.js')
-    let pipelineDashboardResult: { server: import('node:http').Server } | null = null
+    let pipelineDashboardResult: { server: import('node:http').Server; port: number; url: string } | null = null
     try {
       pipelineDashboardResult = await startDashboardServer({
         pkgRoot,
@@ -476,6 +482,9 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
     } catch {
       // Dashboard failure must not block pipeline
     }
+    if (pipelineDashboardResult) {
+      console.log(`  ${c.dim('Dashboard:')} ${pipelineDashboardResult.url}`)
+    }
     const pipelineOrchestrator = createPipelineOrchestrator({
       spec,
@@ -503,7 +512,7 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
     if (spec.gates?.length) console.log(`  Gates: ${spec.gates.length} validation commands`)
     const { startDashboardServer } = await import('./dashboard.js')
-    let dashboardResult: { server: import('node:http').Server } | null = null
+    let dashboardResult: { server: import('node:http').Server; port: number; url: string } | null = null
     try {
       dashboardResult = await startDashboardServer({
         pkgRoot,
@@ -513,6 +522,9 @@ export default async function run({ args, pkgRoot }: CliContext): Promise<void>
     } catch {
       // Dashboard failure must not block convoy
     }
+    if (dashboardResult) {
+      console.log(`  ${c.dim('Dashboard:')} ${dashboardResult.url}`)
+    }
     const engine = createConvoyEngine({
       spec,

package/src/cli/types.ts CHANGED Viewed

@@ -164,6 +164,8 @@ export interface TaskSpec {
   defaults?: TaskDefaults;
   /** Shell commands run after all tasks complete; each must exit 0. */
   gates?: string[];
+  /** How many times to retry failing gates with an auto-fix task (default: 0). */
+  gate_retries?: number;
   /** Git feature branch name. */
   branch?: string;
   /** Other convoy spec names to run before this one (version: 2 pipeline specs). */

package/src/dashboard/node_modules/.vite/deps/_metadata.json CHANGED Viewed

@@ -1,25 +1,25 @@
 {
-  "hash": "47102a21",
+  "hash": "8d888497",
   "configHash": "30f8ea04",
-  "lockfileHash": "ecc512ab",
-  "browserHash": "a22d8cb1",
+  "lockfileHash": "433479a7",
+  "browserHash": "261fa44b",
   "optimized": {
     "astro > cssesc": {
       "src": "../../../../../node_modules/cssesc/cssesc.js",
       "file": "astro___cssesc.js",
-      "fileHash": "11a66757",
+      "fileHash": "de4544e1",
       "needsInterop": true
     },
     "astro > aria-query": {
       "src": "../../../../../node_modules/aria-query/lib/index.js",
       "file": "astro___aria-query.js",
-      "fileHash": "468db3c9",
+      "fileHash": "e15a50a2",
       "needsInterop": true
     },
     "astro > axobject-query": {
       "src": "../../../../../node_modules/axobject-query/lib/index.js",
       "file": "astro___axobject-query.js",
-      "fileHash": "8c573df7",
+      "fileHash": "e06f0936",
       "needsInterop": true
     }
   },

package/src/orchestrator/agents/team-lead.agent.md CHANGED Viewed

@@ -145,14 +145,14 @@ Before EVERY delegation verify: (1) Tracker issue exists, (2) File partition is
 ## Convoy Integration
-The convoy engine is the preferred execution mechanism for multi-task work. Use it when a request decomposes into 3 or more subtasks.
+The convoy engine is the **mandatory** execution mechanism for all project-related work — features, bug fixes, and refactors. This ensures consistent observability, crash recovery, and progress visibility.
 ### When to use convoy vs. direct delegation
-| Task count | Approach |
-|------------|----------|
-| 1–2 subtasks | **Direct delegation** — sub-agents inline, standard workflow |
-| 3+ subtasks | **Convoy execution** — generate spec, hand to user to run |
+| Work type | Approach |
+|-----------|----------|
+| Features, bug fixes, refactors (any subtask count) | **Convoy execution** — always generate a `.convoy.yml` spec, even for 1-task fixes |
+| Utility prompts (`bootstrap-customizations`, `create-skill`, `generate-convoy`, `brainstorm`, `quick-refinement`) | **Direct** — these are meta/tooling operations, not project code changes |
 ### How to generate a convoy spec
@@ -164,7 +164,7 @@ The convoy engine is the preferred execution mechanism for multi-task work. Use
 Tell the user to run:
 ```
-npx opencastle run -f <name>.convoy.yml
+npx opencastle run -f .opencastle/convoys/<name>.convoy.yml
 ```
 This gives the user control over when execution starts (preferred — supports overnight/unattended runs and manual review of the spec before execution).

package/src/orchestrator/prompts/bug-fix.prompt.md CHANGED Viewed

@@ -83,9 +83,13 @@ Find WHY the bug happens, not just WHERE:
 ### 4. Implement the Fix
-Delegate to the appropriate specialist agent via **sub-agent** (inline). For bugs that are clearly isolated and well-understood, a single delegation is usually sufficient.
+All bug fixes are executed via the convoy engine — even single-task fixes — to ensure observability and crash recovery.
-#### Delegation Prompt Must Include
+1. **Generate a convoy spec** — use the `generate-convoy` prompt with the root cause analysis, fix approach, and file paths as context.
+2. **Hand the spec to the user** — tell them to run: `npx opencastle run -f .opencastle/convoys/<name>.convoy.yml`
+3. **After convoy completes** — proceed to Step 5 (validation).
+#### Convoy Task Prompt Must Include
 - **Tracker issue ID and title** — e.g., `TAS-XX — [Bug] Description`
 - **Root cause** — What's wrong and why

package/src/orchestrator/prompts/generate-convoy.prompt.md CHANGED Viewed

@@ -7,7 +7,7 @@ agent: 'Team Lead (OpenCastle)'
 # Generate Convoy Spec
-You are the Team Lead. The user wants to run `opencastle run` to execute a batch of tasks autonomously via the convoy engine. Your job is to produce a valid `.convoy.yml` file they can feed to the CLI. Derive a short, descriptive, kebab-case filename from the user's goal (2–4 words max) and use it as the filename — for example `auth-refactor.convoy.yml` or `add-search.convoy.yml`. Always use the `.convoy.yml` extension.
+You are the Team Lead. The user wants to run `opencastle run` to execute a batch of tasks autonomously via the convoy engine. Your job is to produce a valid `.convoy.yml` file they can feed to the CLI. Derive a short, descriptive, kebab-case filename from the user's goal (2–4 words max) and use it as the filename — for example `auth-refactor.convoy.yml` or `add-search.convoy.yml`. Always use the `.convoy.yml` extension. Store all generated convoy specs in the `.opencastle/convoys/` directory (create it if it doesn't exist).
 ## User Goal
@@ -137,7 +137,7 @@ Before presenting the YAML, mentally verify:
 Return the final YAML inside a fenced code block with a filename annotation:
 ````yaml
-# <feature-name>.convoy.yml
+# .opencastle/convoys/<feature-name>.convoy.yml
 name: <run name>
 version: 1
 concurrency: <n>
@@ -172,6 +172,6 @@ gates:
 Also provide:
 1. A **DAG summary** showing the phase structure so the user can verify execution order.
 2. An **estimated total duration** (sum of timeouts on the critical path).
-3. A `--dry-run` command they can use to validate: `npx opencastle run --file <feature-name>.convoy.yml --dry-run`
+3. A `--dry-run` command they can use to validate: `npx opencastle run --file .opencastle/convoys/<feature-name>.convoy.yml --dry-run`

package/src/orchestrator/prompts/implement-feature.prompt.md CHANGED Viewed

@@ -47,31 +47,20 @@ Every subtask must be tracked. **No issue = no implementation.** This step produ
 5. **Link to roadmap** — Reference the roadmap section in the issue description so context is never lost
 6. **Verify issues exist** — List all created issue IDs. If count is 0, do NOT proceed to Step 2.5
-### 2.5 Choose Execution Path (BLOCKING — decides how Step 3 proceeds)
+### 2.5 Generate Convoy Spec (BLOCKING — decides how Step 3 proceeds)
-With the full task list in hand, decide the execution mechanism:
+All project-related work is executed via the convoy engine — regardless of subtask count. This ensures consistent observability, crash recovery, and live progress.
-| Condition | Execution path |
-|-----------|----------------|
-| 1–2 subtasks | **Direct delegation** — delegate to sub-agents as today (proceed to Step 3 as-is) |
-| 3+ subtasks | **Convoy execution** — generate a `.convoy.yml` spec using the `generate-convoy` prompt, then hand it to the user |
-#### Direct delegation (1–2 subtasks)
-Proceed with the normal Step 3 delegation workflow. Sub-agents handle each task inline.
-#### Convoy execution (3+ subtasks)
-1. **Generate the spec** — use the `generate-convoy` prompt with the decomposed task list as context. The spec IS the implementation plan — no manual per-task delegation is needed.
-2. **Hand the spec to the user** — tell them to run: `npx opencastle run -f <name>.convoy.yml`
-3. **The convoy engine handles** isolated git worktrees, parallel execution, merge queue ordering, and crash recovery automatically.
+1. **Generate the spec** — use the `generate-convoy` prompt with the decomposed task list as context. The spec IS the implementation plan — no manual per-task delegation is needed. Even single-task fixes go through convoy for observability.
+2. **Hand the spec to the user** — tell them to run: `npx opencastle run -f .opencastle/convoys/<name>.convoy.yml`
+3. **The convoy engine handles** isolated git worktrees, parallel execution, merge queue ordering, crash recovery, and structured logging automatically.
 4. **After convoy completes** — proceed to Step 4 (validation) and Step 5 (delivery/PR). The convoy engine will have created its own commits on the configured branch.
-> **Why convoy for 3+ tasks?** Parallel worktree isolation prevents file conflicts. The merge queue ensures safe ordering. Crash recovery means a failing task doesn't block others. Manual delegation of 3+ parallel tasks risks conflicts and is harder to monitor.
+> **Why always convoy?** Convoy execution is the only path that guarantees observability logging, crash recovery, gate validation, and live progress. Direct sub-agent delegation produces no structured logs and cannot be resumed if interrupted.
 ### 3. Implementation Rules
-> **For convoy execution (3+ subtasks):** The convoy spec file IS the implementation plan — skip the manual delegation workflow below and jump to Step 4 after the user runs the convoy. The convoy engine delegates tasks internally using the agents and prompts defined in the spec.
+> **Convoy execution:** The convoy spec file IS the implementation plan — skip the manual delegation workflow below and jump to Step 4 after the user runs the convoy. The convoy engine delegates tasks internally using the agents and prompts defined in the spec.
 #### Issue Traceability
@@ -117,7 +106,7 @@ Every subtask must pass ALL gates before being marked Done:
 Follow the **Delivery Outcome** defined in the **git-workflow** skill — commit, push, open PR (not merged), and link to the tracker.
-> **For convoy execution:** The convoy engine creates commits on the configured `branch` directly. After validation passes, open the PR from that branch. No additional commits from the Team Lead are needed unless gates failed and required manual fixes.
+> The convoy engine creates commits on the configured `branch` directly. After validation passes, open the PR from that branch. No additional commits from the Team Lead are needed unless gates failed and required manual fixes.
 ### 6. Documentation & Traceability

package/dist/cli/run/adapters/claude-code.d.ts DELETED Viewed

@@ -1,16 +0,0 @@
-import type { Task, ExecuteOptions, ExecuteResult } from '../../types.js';
-/** Adapter name */
-export declare const name = "claude-code";
-/**
- * Check if the `claude` CLI is available on the system PATH.
- */
-export declare function isAvailable(): Promise<boolean>;
-/**
- * Execute a task by invoking the Claude Code CLI in print mode.
- */
-export declare function execute(task: Task, options?: ExecuteOptions): Promise<ExecuteResult>;
-/**
- * Kill the process associated with a task (used by timeout enforcement).
- */
-export declare function kill(task: Task): void;
-//# sourceMappingURL=claude-code.d.ts.map

package/dist/cli/run/adapters/claude-code.d.ts.map DELETED Viewed

@@ -1 +0,0 @@

- {"version":3,"file":"claude-code.d.ts","sourceRoot":"","sources":["../../../../src/cli/run/adapters/claude-code.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,cAAc,EAAE,aAAa,EAAc,MAAM,gBAAgB,CAAA;AAErF,mBAAmB;AACnB,eAAO,MAAM,IAAI,gBAAgB,CAAA;AAEjC;;GAEG;AACH,wBAAsB,WAAW,IAAI,OAAO,CAAC,OAAO,CAAC,CAMpD;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,GAAE,cAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,CAwE9F;AAED;;GAEG;AACH,wBAAgB,IAAI,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI,CASrC"}