npm - opencastle - Versions diffs - 0.31.6 → 0.32.0 - Mend

opencastle 0.31.6 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (210) hide show

package/LICENSE +93 -21
package/README.md +9 -3
package/bin/cli.mjs +15 -0
package/dist/cli/agents.d.ts.map +1 -1
package/dist/cli/agents.js +19 -5
package/dist/cli/agents.js.map +1 -1
package/dist/cli/artifacts-cli.d.ts +3 -0
package/dist/cli/artifacts-cli.d.ts.map +1 -0
package/dist/cli/artifacts-cli.js +36 -0
package/dist/cli/artifacts-cli.js.map +1 -0
package/dist/cli/baselines.d.ts.map +1 -1
package/dist/cli/baselines.js +11 -0
package/dist/cli/baselines.js.map +1 -1
package/dist/cli/convoy/artifacts.d.ts +25 -0
package/dist/cli/convoy/artifacts.d.ts.map +1 -0
package/dist/cli/convoy/artifacts.js +129 -0
package/dist/cli/convoy/artifacts.js.map +1 -0
package/dist/cli/convoy/artifacts.test.d.ts +2 -0
package/dist/cli/convoy/artifacts.test.d.ts.map +1 -0
package/dist/cli/convoy/artifacts.test.js +169 -0
package/dist/cli/convoy/artifacts.test.js.map +1 -0
package/dist/cli/convoy/compaction.d.ts +23 -0
package/dist/cli/convoy/compaction.d.ts.map +1 -0
package/dist/cli/convoy/compaction.js +117 -0
package/dist/cli/convoy/compaction.js.map +1 -0
package/dist/cli/convoy/compaction.test.d.ts +2 -0
package/dist/cli/convoy/compaction.test.d.ts.map +1 -0
package/dist/cli/convoy/compaction.test.js +205 -0
package/dist/cli/convoy/compaction.test.js.map +1 -0
package/dist/cli/convoy/contracts.d.ts +22 -0
package/dist/cli/convoy/contracts.d.ts.map +1 -0
package/dist/cli/convoy/contracts.js +254 -0
package/dist/cli/convoy/contracts.js.map +1 -0
package/dist/cli/convoy/contracts.test.d.ts +2 -0
package/dist/cli/convoy/contracts.test.d.ts.map +1 -0
package/dist/cli/convoy/contracts.test.js +239 -0
package/dist/cli/convoy/contracts.test.js.map +1 -0
package/dist/cli/convoy/dag-analysis.d.ts +40 -0
package/dist/cli/convoy/dag-analysis.d.ts.map +1 -0
package/dist/cli/convoy/dag-analysis.js +282 -0
package/dist/cli/convoy/dag-analysis.js.map +1 -0
package/dist/cli/convoy/dag-analysis.test.d.ts +2 -0
package/dist/cli/convoy/dag-analysis.test.d.ts.map +1 -0
package/dist/cli/convoy/dag-analysis.test.js +289 -0
package/dist/cli/convoy/dag-analysis.test.js.map +1 -0
package/dist/cli/convoy/effort-scaling.d.ts +20 -0
package/dist/cli/convoy/effort-scaling.d.ts.map +1 -0
package/dist/cli/convoy/effort-scaling.js +82 -0
package/dist/cli/convoy/effort-scaling.js.map +1 -0
package/dist/cli/convoy/effort-scaling.test.d.ts +2 -0
package/dist/cli/convoy/effort-scaling.test.d.ts.map +1 -0
package/dist/cli/convoy/effort-scaling.test.js +120 -0
package/dist/cli/convoy/effort-scaling.test.js.map +1 -0
package/dist/cli/convoy/engine.d.ts.map +1 -1
package/dist/cli/convoy/engine.js +298 -11
package/dist/cli/convoy/engine.js.map +1 -1
package/dist/cli/convoy/engine.test.js +155 -18
package/dist/cli/convoy/engine.test.js.map +1 -1
package/dist/cli/convoy/event-schemas.d.ts.map +1 -1
package/dist/cli/convoy/event-schemas.js +55 -0
package/dist/cli/convoy/event-schemas.js.map +1 -1
package/dist/cli/convoy/isolation.d.ts +27 -0
package/dist/cli/convoy/isolation.d.ts.map +1 -0
package/dist/cli/convoy/isolation.js +120 -0
package/dist/cli/convoy/isolation.js.map +1 -0
package/dist/cli/convoy/isolation.test.d.ts +2 -0
package/dist/cli/convoy/isolation.test.d.ts.map +1 -0
package/dist/cli/convoy/isolation.test.js +105 -0
package/dist/cli/convoy/isolation.test.js.map +1 -0
package/dist/cli/convoy/review-stages.d.ts +9 -0
package/dist/cli/convoy/review-stages.d.ts.map +1 -0
package/dist/cli/convoy/review-stages.js +134 -0
package/dist/cli/convoy/review-stages.js.map +1 -0
package/dist/cli/convoy/review-stages.test.d.ts +2 -0
package/dist/cli/convoy/review-stages.test.d.ts.map +1 -0
package/dist/cli/convoy/review-stages.test.js +197 -0
package/dist/cli/convoy/review-stages.test.js.map +1 -0
package/dist/cli/convoy/skill-refinement.d.ts +39 -0
package/dist/cli/convoy/skill-refinement.d.ts.map +1 -0
package/dist/cli/convoy/skill-refinement.js +239 -0
package/dist/cli/convoy/skill-refinement.js.map +1 -0
package/dist/cli/convoy/skill-refinement.test.d.ts +2 -0
package/dist/cli/convoy/skill-refinement.test.d.ts.map +1 -0
package/dist/cli/convoy/skill-refinement.test.js +230 -0
package/dist/cli/convoy/skill-refinement.test.js.map +1 -0
package/dist/cli/convoy/spec-builder.d.ts +1 -0
package/dist/cli/convoy/spec-builder.d.ts.map +1 -1
package/dist/cli/convoy/spec-builder.js +11 -0
package/dist/cli/convoy/spec-builder.js.map +1 -1
package/dist/cli/convoy/spec-builder.test.js +54 -0
package/dist/cli/convoy/spec-builder.test.js.map +1 -1
package/dist/cli/convoy/store.d.ts +3 -2
package/dist/cli/convoy/store.d.ts.map +1 -1
package/dist/cli/convoy/store.js +20 -2
package/dist/cli/convoy/store.js.map +1 -1
package/dist/cli/convoy/store.test.js +15 -15
package/dist/cli/convoy/store.test.js.map +1 -1
package/dist/cli/convoy/tdd-gate.d.ts +15 -0
package/dist/cli/convoy/tdd-gate.d.ts.map +1 -0
package/dist/cli/convoy/tdd-gate.js +119 -0
package/dist/cli/convoy/tdd-gate.js.map +1 -0
package/dist/cli/convoy/tdd-gate.test.d.ts +2 -0
package/dist/cli/convoy/tdd-gate.test.d.ts.map +1 -0
package/dist/cli/convoy/tdd-gate.test.js +227 -0
package/dist/cli/convoy/tdd-gate.test.js.map +1 -0
package/dist/cli/convoy/types.d.ts +91 -0
package/dist/cli/convoy/types.d.ts.map +1 -1
package/dist/cli/convoy/types.js +8 -0
package/dist/cli/convoy/types.js.map +1 -1
package/dist/cli/dashboard.d.ts.map +1 -1
package/dist/cli/dashboard.js +54 -0
package/dist/cli/dashboard.js.map +1 -1
package/dist/cli/insights.d.ts +3 -0
package/dist/cli/insights.d.ts.map +1 -0
package/dist/cli/insights.js +94 -0
package/dist/cli/insights.js.map +1 -0
package/dist/cli/lesson.d.ts.map +1 -1
package/dist/cli/lesson.js +7 -0
package/dist/cli/lesson.js.map +1 -1
package/dist/cli/log.d.ts.map +1 -1
package/dist/cli/log.js +7 -0
package/dist/cli/log.js.map +1 -1
package/dist/cli/package-config.d.ts +12 -0
package/dist/cli/package-config.d.ts.map +1 -0
package/dist/cli/package-config.js +37 -0
package/dist/cli/package-config.js.map +1 -0
package/dist/cli/package.d.ts +23 -0
package/dist/cli/package.d.ts.map +1 -0
package/dist/cli/package.js +285 -0
package/dist/cli/package.js.map +1 -0
package/dist/cli/package.test.d.ts +2 -0
package/dist/cli/package.test.d.ts.map +1 -0
package/dist/cli/package.test.js +236 -0
package/dist/cli/package.test.js.map +1 -0
package/dist/cli/pipeline.d.ts +6 -0
package/dist/cli/pipeline.d.ts.map +1 -1
package/dist/cli/pipeline.js +15 -2
package/dist/cli/pipeline.js.map +1 -1
package/dist/cli/run/schema.d.ts.map +1 -1
package/dist/cli/run/schema.js +32 -0
package/dist/cli/run/schema.js.map +1 -1
package/dist/cli/run/schema.test.js +51 -0
package/dist/cli/run/schema.test.js.map +1 -1
package/dist/cli/run.d.ts.map +1 -1
package/dist/cli/run.js +10 -1
package/dist/cli/run.js.map +1 -1
package/dist/cli/skills.d.ts +3 -0
package/dist/cli/skills.d.ts.map +1 -0
package/dist/cli/skills.js +107 -0
package/dist/cli/skills.js.map +1 -0
package/dist/cli/types.d.ts +4 -1
package/dist/cli/types.d.ts.map +1 -1
package/dist/cli/update.js +2 -2
package/package.json +3 -2
package/src/cli/agents.ts +20 -5
package/src/cli/artifacts-cli.ts +41 -0
package/src/cli/baselines.ts +12 -0
package/src/cli/convoy/artifacts.test.ts +201 -0
package/src/cli/convoy/artifacts.ts +186 -0
package/src/cli/convoy/compaction.test.ts +245 -0
package/src/cli/convoy/compaction.ts +164 -0
package/src/cli/convoy/contracts.test.ts +279 -0
package/src/cli/convoy/contracts.ts +280 -0
package/src/cli/convoy/dag-analysis.test.ts +349 -0
package/src/cli/convoy/dag-analysis.ts +371 -0
package/src/cli/convoy/effort-scaling.test.ts +140 -0
package/src/cli/convoy/effort-scaling.ts +90 -0
package/src/cli/convoy/engine.test.ts +175 -18
package/src/cli/convoy/engine.ts +315 -12
package/src/cli/convoy/event-schemas.ts +55 -0
package/src/cli/convoy/isolation.test.ts +137 -0
package/src/cli/convoy/isolation.ts +165 -0
package/src/cli/convoy/review-stages.test.ts +235 -0
package/src/cli/convoy/review-stages.ts +166 -0
package/src/cli/convoy/skill-refinement.test.ts +277 -0
package/src/cli/convoy/skill-refinement.ts +306 -0
package/src/cli/convoy/spec-builder.test.ts +61 -0
package/src/cli/convoy/spec-builder.ts +9 -0
package/src/cli/convoy/store.test.ts +15 -15
package/src/cli/convoy/store.ts +26 -4
package/src/cli/convoy/tdd-gate.test.ts +281 -0
package/src/cli/convoy/tdd-gate.ts +154 -0
package/src/cli/convoy/types.ts +51 -0
package/src/cli/dashboard.ts +55 -0
package/src/cli/insights.ts +99 -0
package/src/cli/lesson.ts +8 -0
package/src/cli/log.ts +8 -0
package/src/cli/package-config.ts +48 -0
package/src/cli/package.test.ts +276 -0
package/src/cli/package.ts +329 -0
package/src/cli/pipeline.ts +21 -2
package/src/cli/run/schema.test.ts +58 -0
package/src/cli/run/schema.ts +33 -0
package/src/cli/run.ts +14 -1
package/src/cli/skills.ts +121 -0
package/src/cli/types.ts +4 -1
package/src/cli/update.ts +2 -2
package/src/dashboard/dist/_astro/{index.Je1YjU_y.css → index.BRDFmNzR.css} +1 -1
package/src/dashboard/dist/index.html +163 -2
package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
package/src/dashboard/src/pages/index.astro +162 -1
package/src/dashboard/src/styles/dashboard.css +85 -0
package/src/orchestrator/agents/developer.agent.md +8 -0
package/src/orchestrator/agents/ui-ux-expert.agent.md +7 -0
package/src/orchestrator/prompts/assess-complexity.prompt.md +13 -0
package/src/orchestrator/prompts/brainstorm.prompt.md +18 -0
package/src/orchestrator/prompts/generate-convoy.prompt.md +61 -0
package/src/orchestrator/skills/decomposition/SKILL.md +35 -0
package/src/orchestrator/skills/frontend-design/SKILL.md +27 -1
package/src/orchestrator/skills/project-consistency/SKILL.md +350 -0

package/src/cli/convoy/skill-refinement.test.ts ADDED Viewed

@@ -0,0 +1,277 @@
+import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { realpathSync, existsSync, readFileSync } from 'node:fs'
+import { describe, it, expect, beforeEach, afterEach } from 'vitest'
+import {
+  trackSkillFailure,
+  getSkillFailures,
+  detectFailurePatterns,
+  generateRefinementProposal,
+  saveProposal,
+  getFailureStats,
+  runSkillRefinementCheck,
+} from './skill-refinement.js'
+import type { SkillFailureRecord, SkillRefinementProposal } from './skill-refinement.js'
+let tmpDir: string
+beforeEach(() => {
+  tmpDir = realpathSync(mkdtempSync(join(tmpdir(), 'skill-ref-test-')))
+})
+afterEach(() => {
+  rmSync(tmpDir, { recursive: true, force: true })
+})
+function makeRecord(overrides: Partial<SkillFailureRecord> = {}): SkillFailureRecord {
+  return {
+    skill_name: 'react-development',
+    agent: 'Developer',
+    task_id: 'task-1',
+    convoy_id: 'convoy-1',
+    failure_reason: 'missing type annotation on props',
+    retry_count: 1,
+    eventually_succeeded: false,
+    timestamp: '2026-01-01T00:00:00.000Z',
+    ...overrides,
+  }
+}
+describe('trackSkillFailure', () => {
+  it('appends a valid JSON line to NDJSON file', () => {
+    const record = makeRecord()
+    trackSkillFailure(record, tmpDir)
+    const filePath = join(tmpDir, '.opencastle/telemetry/skill-failures.ndjson')
+    expect(existsSync(filePath)).toBe(true)
+    const parsed = JSON.parse(readFileSync(filePath, 'utf8').trim())
+    expect(parsed.skill_name).toBe('react-development')
+    expect(parsed.agent).toBe('Developer')
+  })
+  it('creates directory if it does not exist', () => {
+    trackSkillFailure(makeRecord(), tmpDir)
+    expect(existsSync(join(tmpDir, '.opencastle/telemetry'))).toBe(true)
+  })
+  it('appends multiple records', () => {
+    trackSkillFailure(makeRecord({ task_id: 'task-1' }), tmpDir)
+    trackSkillFailure(makeRecord({ task_id: 'task-2' }), tmpDir)
+    const lines = readFileSync(
+      join(tmpDir, '.opencastle/telemetry/skill-failures.ndjson'),
+      'utf8',
+    )
+      .trim()
+      .split('\n')
+    expect(lines).toHaveLength(2)
+  })
+})
+describe('getSkillFailures', () => {
+  it('returns empty array when file does not exist', () => {
+    expect(getSkillFailures('react-development', tmpDir)).toEqual([])
+  })
+  it('filters by skill name', () => {
+    trackSkillFailure(makeRecord({ skill_name: 'react-development' }), tmpDir)
+    trackSkillFailure(makeRecord({ skill_name: 'api-patterns' }), tmpDir)
+    const result = getSkillFailures('react-development', tmpDir)
+    expect(result).toHaveLength(1)
+    expect(result[0].skill_name).toBe('react-development')
+  })
+  it('filters by since timestamp', () => {
+    trackSkillFailure(makeRecord({ timestamp: '2026-01-01T00:00:00.000Z' }), tmpDir)
+    trackSkillFailure(makeRecord({ timestamp: '2026-02-01T00:00:00.000Z' }), tmpDir)
+    const result = getSkillFailures('react-development', tmpDir, '2026-01-15T00:00:00.000Z')
+    expect(result).toHaveLength(1)
+    expect(result[0].timestamp).toBe('2026-02-01T00:00:00.000Z')
+  })
+  it('skips malformed lines', () => {
+    const dir = join(tmpDir, '.opencastle/telemetry')
+    mkdirSync(dir, { recursive: true })
+    writeFileSync(
+      join(dir, 'skill-failures.ndjson'),
+      'not-valid-json\n' + JSON.stringify(makeRecord()) + '\n',
+      'utf8',
+    )
+    const result = getSkillFailures('react-development', tmpDir)
+    expect(result).toHaveLength(1)
+  })
+})
+describe('detectFailurePatterns', () => {
+  it('returns should_refine false with 0 failures', () => {
+    const result = detectFailurePatterns([])
+    expect(result.should_refine).toBe(false)
+    expect(result.threshold_met).toBe(false)
+  })
+  it('returns should_refine false with 1 failure', () => {
+    expect(detectFailurePatterns([makeRecord()]).should_refine).toBe(false)
+    expect(detectFailurePatterns([makeRecord()]).threshold_met).toBe(false)
+  })
+  it('returns threshold_met true with 3 failures from different convoys', () => {
+    const failures = [
+      makeRecord({ convoy_id: 'c1', agent: 'Dev', failure_reason: 'missing type annotation on props interface' }),
+      makeRecord({ convoy_id: 'c2', agent: 'Dev', failure_reason: 'missing type annotation on props interface' }),
+      makeRecord({ convoy_id: 'c3', agent: 'Dev', failure_reason: 'missing type annotation on props interface' }),
+    ]
+    expect(detectFailurePatterns(failures).threshold_met).toBe(true)
+  })
+  it('returns threshold_met true with 2 failures from same agent', () => {
+    const failures = [
+      makeRecord({ agent: 'Developer', convoy_id: 'c1' }),
+      makeRecord({ agent: 'Developer', convoy_id: 'c2' }),
+    ]
+    expect(detectFailurePatterns(failures).threshold_met).toBe(true)
+  })
+  it('returns threshold_met true with failures from 2 different agents from different convoys', () => {
+    const failures = [
+      makeRecord({ agent: 'Developer', convoy_id: 'c1' }),
+      makeRecord({ agent: 'UI-Expert', convoy_id: 'c2' }),
+    ]
+    expect(detectFailurePatterns(failures).threshold_met).toBe(true)
+  })
+  it('returns threshold_met false with 2 failures from different agents but same convoy', () => {
+    const failures = [
+      makeRecord({ agent: 'Developer', convoy_id: 'c1', task_id: 't1' }),
+      makeRecord({ agent: 'UI-Expert', convoy_id: 'c1', task_id: 't2' }),
+    ]
+    // 2 different agents but same convoy: uniqueConvoys < 3 (false), sameAgentDoubleFailure false,
+    // uniqueAgents >= 2 but uniqueConvoys < 2 → threshold_met = false
+    expect(detectFailurePatterns(failures).threshold_met).toBe(false)
+  })
+  it('groups similar failure reasons into patterns', () => {
+    const failures = [
+      makeRecord({ failure_reason: 'missing type annotation on props', convoy_id: 'c1' }),
+      makeRecord({ failure_reason: 'missing type annotation for function params', convoy_id: 'c2' }),
+      makeRecord({ failure_reason: 'completely unrelated import error issue', convoy_id: 'c3' }),
+    ]
+    const result = detectFailurePatterns(failures)
+    expect(result.patterns.length).toBeGreaterThan(0)
+    // first two share "missing", "type", "annotation" → grouped
+    expect(result.patterns[0]).toContain('annotation')
+  })
+})
+describe('generateRefinementProposal', () => {
+  it('generates proposal with correct fields', () => {
+    const failures = [
+      makeRecord({ convoy_id: 'c1' }),
+      makeRecord({ convoy_id: 'c2' }),
+      makeRecord({ convoy_id: 'c3' }),
+    ]
+    const proposal = generateRefinementProposal('react-development', failures, tmpDir)
+    expect(proposal.skill_name).toBe('react-development')
+    expect(proposal.failure_count).toBe(3)
+    expect(typeof proposal.generated_at).toBe('string')
+    expect(proposal.skill_path).toBe('unknown')
+  })
+  it('sets confidence based on failure count', () => {
+    const two = Array.from({ length: 2 }, (_, i) => makeRecord({ convoy_id: `c${i}`, agent: 'Dev' }))
+    expect(generateRefinementProposal('s', two, tmpDir).confidence).toBe('low')
+    const three = Array.from({ length: 3 }, (_, i) => makeRecord({ convoy_id: `c${i}`, agent: 'Dev' }))
+    expect(generateRefinementProposal('s', three, tmpDir).confidence).toBe('medium')
+    const five = Array.from({ length: 5 }, (_, i) => makeRecord({ convoy_id: `c${i}`, agent: 'Dev' }))
+    expect(generateRefinementProposal('s', five, tmpDir).confidence).toBe('high')
+  })
+  it('includes proposed additions derived from patterns', () => {
+    const failures = [
+      makeRecord({ failure_reason: 'missing type annotation on props', convoy_id: 'c1', agent: 'Dev1' }),
+      makeRecord({ failure_reason: 'missing type annotation for function', convoy_id: 'c2', agent: 'Dev1' }),
+      makeRecord({ failure_reason: 'missing type annotation in hooks', convoy_id: 'c3', agent: 'Dev1' }),
+    ]
+    const proposal = generateRefinementProposal('react-development', failures, tmpDir)
+    if (proposal.proposed_additions.length > 0) {
+      expect(proposal.proposed_additions[0]).toMatch(/Add to ## Common Pitfalls:/)
+    }
+  })
+})
+describe('saveProposal', () => {
+  function makeProposal(overrides: Partial<SkillRefinementProposal> = {}): SkillRefinementProposal {
+    return {
+      skill_name: 'react-development',
+      skill_path: 'unknown',
+      failure_count: 3,
+      common_failure_patterns: ['type, annotation, missing'],
+      proposed_additions: ["Add to ## Common Pitfalls: 'type, annotation, missing'"],
+      confidence: 'medium',
+      generated_at: '2026-01-15T10:00:00.000Z',
+      ...overrides,
+    }
+  }
+  it('writes markdown file with correct format', () => {
+    const filePath = saveProposal(makeProposal(), tmpDir)
+    expect(existsSync(filePath)).toBe(true)
+    const content = readFileSync(filePath, 'utf8')
+    expect(content).toContain('# Skill Refinement Proposal: react-development')
+    expect(content).toContain('**Confidence:** medium')
+    expect(content).toContain('## Proposed Changes')
+    expect(content).toContain('## Action')
+  })
+  it('creates proposals directory if needed', () => {
+    saveProposal(makeProposal(), tmpDir)
+    expect(existsSync(join(tmpDir, '.opencastle/proposals'))).toBe(true)
+  })
+  it('handles existing file for same date (counter suffix)', () => {
+    const path1 = saveProposal(makeProposal(), tmpDir)
+    const path2 = saveProposal(makeProposal(), tmpDir)
+    expect(path1).not.toBe(path2)
+    expect(path2).toContain('-2.md')
+  })
+})
+describe('getFailureStats', () => {
+  it('returns empty array when no data', () => {
+    expect(getFailureStats(tmpDir)).toEqual([])
+  })
+  it('groups and sorts by count descending', () => {
+    trackSkillFailure(makeRecord({ skill_name: 'react-development' }), tmpDir)
+    trackSkillFailure(makeRecord({ skill_name: 'react-development' }), tmpDir)
+    trackSkillFailure(makeRecord({ skill_name: 'api-patterns' }), tmpDir)
+    const result = getFailureStats(tmpDir)
+    expect(result[0].skill_name).toBe('react-development')
+    expect(result[0].count).toBe(2)
+    expect(result[1].skill_name).toBe('api-patterns')
+    expect(result[1].count).toBe(1)
+  })
+})
+describe('runSkillRefinementCheck', () => {
+  it('returns empty array when no failures for convoy', () => {
+    expect(runSkillRefinementCheck('unknown-convoy', tmpDir)).toEqual([])
+  })
+  it('generates proposals for skills meeting threshold', () => {
+    // 3 failures from different convoys for same skill; convoy c3 triggers the check
+    trackSkillFailure(makeRecord({ skill_name: 'react-development', convoy_id: 'c1', agent: 'Dev', task_id: 't1' }), tmpDir)
+    trackSkillFailure(makeRecord({ skill_name: 'react-development', convoy_id: 'c2', agent: 'Dev', task_id: 't2' }), tmpDir)
+    trackSkillFailure(makeRecord({ skill_name: 'react-development', convoy_id: 'c3', agent: 'Dev', task_id: 't3' }), tmpDir)
+    const results = runSkillRefinementCheck('c3', tmpDir)
+    expect(results.length).toBeGreaterThan(0)
+    expect(results[0].skill).toBe('react-development')
+    expect(existsSync(results[0].proposalPath)).toBe(true)
+  })
+  it('skips skills not meeting threshold', () => {
+    // Only 1 failure total for the skill
+    trackSkillFailure(makeRecord({ skill_name: 'api-patterns', convoy_id: 'c1', task_id: 't1' }), tmpDir)
+    expect(runSkillRefinementCheck('c1', tmpDir)).toEqual([])
+  })
+})

package/src/cli/convoy/skill-refinement.ts ADDED Viewed

@@ -0,0 +1,306 @@
+import { existsSync, readFileSync, appendFileSync, mkdirSync, writeFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { scanForSecrets } from './gates.js'
+const SKILL_FAILURES_PATH = '.opencastle/telemetry/skill-failures.ndjson'
+const STOP_WORDS = new Set(['the', 'a', 'is', 'to', 'and', 'in', 'for', 'of', 'with'])
+export interface SkillFailureRecord {
+  skill_name: string
+  agent: string
+  task_id: string
+  convoy_id: string
+  failure_reason: string
+  retry_count: number
+  eventually_succeeded: boolean
+  timestamp: string
+}
+export interface SkillRefinementProposal {
+  skill_name: string
+  skill_path: string
+  failure_count: number
+  common_failure_patterns: string[]
+  proposed_additions: string[]
+  confidence: 'low' | 'medium' | 'high'
+  generated_at: string
+}
+export function trackSkillFailure(record: SkillFailureRecord, basePath?: string): void {
+  const base = basePath ?? process.cwd()
+  const filePath = join(base, SKILL_FAILURES_PATH)
+  mkdirSync(join(base, '.opencastle', 'telemetry'), { recursive: true })
+  const line = JSON.stringify(record) + '\n'
+  const scan = scanForSecrets(line, 'skill-failures.ndjson')
+  if (!scan.clean) return
+  appendFileSync(filePath, line, 'utf8')
+}
+export function getSkillFailures(skillName: string, basePath?: string, since?: string): SkillFailureRecord[] {
+  const base = basePath ?? process.cwd()
+  const filePath = join(base, SKILL_FAILURES_PATH)
+  if (!existsSync(filePath)) return []
+  const content = readFileSync(filePath, 'utf8')
+  const records: SkillFailureRecord[] = []
+  for (const line of content.split('\n')) {
+    if (!line.trim()) continue
+    try {
+      const record = JSON.parse(line) as SkillFailureRecord
+      if (record.skill_name !== skillName) continue
+      if (since && record.timestamp < since) continue
+      records.push(record)
+    } catch {
+      // skip malformed lines
+    }
+  }
+  return records
+}
+function tokenize(text: string): Set<string> {
+  return new Set(
+    text
+      .toLowerCase()
+      .split(/\W+/)
+      .filter(w => w.length > 2 && !STOP_WORDS.has(w)),
+  )
+}
+export function detectFailurePatterns(failures: SkillFailureRecord[]): {
+  should_refine: boolean
+  patterns: string[]
+  threshold_met: boolean
+} {
+  if (failures.length < 2) {
+    return { should_refine: false, patterns: [], threshold_met: false }
+  }
+  const uniqueConvoys = new Set(failures.map(f => f.convoy_id))
+  const uniqueAgents = new Set(failures.map(f => f.agent))
+  const agentCounts = new Map<string, number>()
+  for (const f of failures) {
+    agentCounts.set(f.agent, (agentCounts.get(f.agent) ?? 0) + 1)
+  }
+  const sameAgentDoubleFailure = [...agentCounts.values()].some(c => c >= 2)
+  // threshold: 3+ different convoys, OR same agent 2+ failures,
+  // OR 2+ different agents each from distinct convoys
+  const threshold_met =
+    uniqueConvoys.size >= 3 ||
+    sameAgentDoubleFailure ||
+    (uniqueAgents.size >= 2 && uniqueConvoys.size >= 2)
+  // Group failure_reasons by word overlap
+  const groups: string[][] = []
+  for (const failure of failures) {
+    const words = tokenize(failure.failure_reason)
+    let matched = false
+    for (const group of groups) {
+      const groupWords = tokenize(group[0])
+      const intersection = [...words].filter(w => groupWords.has(w))
+      const minSize = Math.min(words.size, groupWords.size)
+      if (minSize > 0 && intersection.length / minSize >= 0.5) {
+        group.push(failure.failure_reason)
+        matched = true
+        break
+      }
+    }
+    if (!matched) {
+      groups.push([failure.failure_reason])
+    }
+  }
+  groups.sort((a, b) => b.length - a.length)
+  const patterns: string[] = []
+  for (const group of groups) {
+    const wordSets = group.map(r => tokenize(r))
+    let shared = new Set(wordSets[0])
+    for (const ws of wordSets.slice(1)) {
+      shared = new Set([...shared].filter(w => ws.has(w)))
+    }
+    if (shared.size > 0) {
+      patterns.push([...shared].slice(0, 5).join(', '))
+    } else if (group.length > 1) {
+      patterns.push(group[0].slice(0, 60))
+    }
+  }
+  const should_refine = threshold_met && patterns.length > 0
+  return { should_refine, patterns, threshold_met }
+}
+export function generateRefinementProposal(
+  skillName: string,
+  failures: SkillFailureRecord[],
+  basePath?: string,
+): SkillRefinementProposal {
+  const base = basePath ?? process.cwd()
+  const skillFilePath = join(base, '.github', 'skills', skillName, 'SKILL.md')
+  const skill_path = existsSync(skillFilePath) ? skillFilePath : 'unknown'
+  const { patterns } = detectFailurePatterns(failures)
+  const count = failures.length
+  const confidence: 'low' | 'medium' | 'high' =
+    count >= 5 ? 'high' : count >= 3 ? 'medium' : 'low'
+  const proposed_additions = patterns.map(
+    p => `Add to ## Common Pitfalls: '${p}'`,
+  )
+  return {
+    skill_name: skillName,
+    skill_path,
+    failure_count: count,
+    common_failure_patterns: patterns,
+    proposed_additions,
+    confidence,
+    generated_at: new Date().toISOString(),
+  }
+}
+function buildProposalMarkdown(proposal: SkillRefinementProposal, failures: SkillFailureRecord[]): string {
+  const date = proposal.generated_at.slice(0, 10)
+  const patternList =
+    proposal.common_failure_patterns.map(p => `- ${p}`).join('\n') || '- (none detected)'
+  const additionsList =
+    proposal.proposed_additions.map(a => `- ${a}`).join('\n') || '- (none)'
+  const evidenceRows = failures
+    .slice(0, 20)
+    .map(f => `| ${f.convoy_id} | ${f.task_id} | ${f.agent} | ${f.failure_reason.slice(0, 80)} |`)
+    .join('\n')
+  return `# Skill Refinement Proposal: ${proposal.skill_name}
+**Generated:** ${date}
+**Failures analyzed:** ${proposal.failure_count}
+**Confidence:** ${proposal.confidence}
+## Failure Pattern Summary
+${patternList}
+## Proposed Changes
+${additionsList}
+## Evidence
+| Convoy | Task | Agent | Failure Reason |
+|--------|------|-------|---------------|
+${evidenceRows}
+## Action
+- [ ] Apply this proposal: edit \`${proposal.skill_path}\` manually
+- [ ] Reject: delete this file
+`
+}
+export function saveProposal(
+  proposal: SkillRefinementProposal,
+  basePath?: string,
+  failures: SkillFailureRecord[] = [],
+): string {
+  const base = basePath ?? process.cwd()
+  const dir = join(base, '.opencastle', 'proposals')
+  mkdirSync(dir, { recursive: true })
+  const date = proposal.generated_at.slice(0, 10)
+  let filePath = join(dir, `skill-${proposal.skill_name}-${date}.md`)
+  if (existsSync(filePath)) {
+    let counter = 2
+    while (existsSync(join(dir, `skill-${proposal.skill_name}-${date}-${counter}.md`))) {
+      counter++
+    }
+    filePath = join(dir, `skill-${proposal.skill_name}-${date}-${counter}.md`)
+  }
+  writeFileSync(filePath, buildProposalMarkdown(proposal, failures), 'utf8')
+  return filePath
+}
+export function getFailureStats(
+  basePath?: string,
+): Array<{ skill_name: string; count: number; agents: string[]; latest: string }> {
+  const base = basePath ?? process.cwd()
+  const filePath = join(base, SKILL_FAILURES_PATH)
+  if (!existsSync(filePath)) return []
+  const content = readFileSync(filePath, 'utf8')
+  const statsMap = new Map<string, { count: number; agents: Set<string>; latest: string }>()
+  for (const line of content.split('\n')) {
+    if (!line.trim()) continue
+    try {
+      const record = JSON.parse(line) as SkillFailureRecord
+      const existing = statsMap.get(record.skill_name)
+      if (existing) {
+        existing.count++
+        existing.agents.add(record.agent)
+        if (record.timestamp > existing.latest) existing.latest = record.timestamp
+      } else {
+        statsMap.set(record.skill_name, {
+          count: 1,
+          agents: new Set([record.agent]),
+          latest: record.timestamp,
+        })
+      }
+    } catch {
+      // skip malformed lines
+    }
+  }
+  return [...statsMap.entries()]
+    .map(([skill_name, s]) => ({
+      skill_name,
+      count: s.count,
+      agents: [...s.agents],
+      latest: s.latest,
+    }))
+    .sort((a, b) => b.count - a.count)
+}
+export function runSkillRefinementCheck(
+  convoyId: string,
+  basePath?: string,
+): Array<{ skill: string; proposalPath: string }> {
+  const base = basePath ?? process.cwd()
+  const filePath = join(base, SKILL_FAILURES_PATH)
+  if (!existsSync(filePath)) return []
+  const content = readFileSync(filePath, 'utf8')
+  const allRecords: SkillFailureRecord[] = []
+  for (const line of content.split('\n')) {
+    if (!line.trim()) continue
+    try {
+      allRecords.push(JSON.parse(line) as SkillFailureRecord)
+    } catch {
+      // skip malformed
+    }
+  }
+  const convoySkills = new Set(
+    allRecords.filter(r => r.convoy_id === convoyId).map(r => r.skill_name),
+  )
+  if (convoySkills.size === 0) return []
+  const results: Array<{ skill: string; proposalPath: string }> = []
+  for (const skillName of convoySkills) {
+    const allSkillFailures = allRecords.filter(r => r.skill_name === skillName)
+    const { threshold_met } = detectFailurePatterns(allSkillFailures)
+    if (!threshold_met) continue
+    const proposal = generateRefinementProposal(skillName, allSkillFailures, base)
+    const proposalPath = saveProposal(proposal, base, allSkillFailures)
+    results.push({ skill: skillName, proposalPath })
+  }
+  return results
+}

package/src/cli/convoy/spec-builder.test.ts CHANGED Viewed

@@ -228,6 +228,67 @@ describe('buildConvoyYaml', () => {
     expect(result.valid).toBe(true)
     expect(result.errors).toEqual([])
   })
+  // ── complexity effort-scaling integration ────────────────────────────────────
+  it('auto-populates timeout, max_retries, review from effort table when complexity is set', () => {
+    const plan: TaskPlan = {
+      name: 'Effort Test',
+      tasks: [{ id: 'task-1', complexity: 3, prompt: 'Do something' }],
+    }
+    const parsed = yamlParse(buildConvoyYaml(plan))
+    const task = parsed.tasks[0]
+    expect(task.timeout).toBe('15m')
+    expect(task.max_retries).toBe(2)
+    expect(task.review).toBe('fast')
+  })
+  it('does not override explicitly set timeout when complexity is also set', () => {
+    const plan: TaskPlan = {
+      name: 'Effort Override Test',
+      tasks: [{ id: 'task-1', complexity: 3, timeout: '1h', prompt: 'Do something' }],
+    }
+    const parsed = yamlParse(buildConvoyYaml(plan))
+    expect(parsed.tasks[0].timeout).toBe('1h')
+  })
+  it('does not override explicitly set max_retries when complexity is also set', () => {
+    const plan: TaskPlan = {
+      name: 'Effort Override Test',
+      tasks: [{ id: 'task-1', complexity: 5, max_retries: 5, prompt: 'Do something' }],
+    }
+    const parsed = yamlParse(buildConvoyYaml(plan))
+    expect(parsed.tasks[0].max_retries).toBe(5)
+  })
+  it('does not override explicitly set review when complexity is also set', () => {
+    const plan: TaskPlan = {
+      name: 'Effort Override Test',
+      tasks: [{ id: 'task-1', complexity: 8, review: 'panel', prompt: 'Do something' }],
+    }
+    const parsed = yamlParse(buildConvoyYaml(plan))
+    expect(parsed.tasks[0].review).toBe('panel')
+  })
+  it('works unchanged (backward compatible) when complexity is not set', () => {
+    const parsed = yamlParse(buildConvoyYaml(minimalPlan()))
+    const task = parsed.tasks[0]
+    expect(task.timeout).toBeUndefined()
+    expect(task.max_retries).toBeUndefined()
+    expect(task.review).toBeUndefined()
+  })
+  it('uses complexity-13 profile for epic tasks', () => {
+    const plan: TaskPlan = {
+      name: 'Epic Test',
+      tasks: [{ id: 'task-1', complexity: 13, prompt: 'Epic task' }],
+    }
+    const parsed = yamlParse(buildConvoyYaml(plan))
+    const task = parsed.tasks[0]
+    expect(task.timeout).toBe('45m')
+    expect(task.max_retries).toBe(3)
+    expect(task.review).toBe('panel')
+  })
 })
 // ── applyPatches ──────────────────────────────────────────────────────────────

package/src/cli/convoy/spec-builder.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { stringify } from 'yaml'
+import { getEffortProfile } from './effort-scaling.js'
 /** The task plan generated by the LLM — just the creative decomposition, no YAML concerns */
 export interface TaskPlanTask {
@@ -10,6 +11,7 @@ export interface TaskPlanTask {
   timeout?: string
   max_retries?: number
   review?: string
+  complexity?: 1 | 2 | 3 | 5 | 8 | 13
   prompt: string
   gates?: string[]
   built_in_gates?: Record<string, boolean | string>
@@ -117,6 +119,13 @@ export function buildConvoyYaml(plan: TaskPlan, enrichment?: SpecEnrichment): st
     if (task.review !== undefined) t.review = task.review
     if (task.gates !== undefined) t.gates = task.gates
     if (task.built_in_gates !== undefined) t.built_in_gates = task.built_in_gates
+    // Auto-populate from effort table when complexity is set and fields are missing
+    if (task.complexity !== undefined) {
+      const profile = getEffortProfile(task.complexity)
+      if (t.timeout === undefined) t.timeout = profile.timeout
+      if (t.max_retries === undefined) t.max_retries = profile.max_retries
+      if (t.review === undefined) t.review = profile.review
+    }
     // prompt last — keeps the long text at the end of each task block
     t.prompt = task.prompt
     return t