opencastle 0.31.6 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/LICENSE +93 -21
  2. package/README.md +9 -3
  3. package/bin/cli.mjs +15 -0
  4. package/dist/cli/agents.d.ts.map +1 -1
  5. package/dist/cli/agents.js +19 -5
  6. package/dist/cli/agents.js.map +1 -1
  7. package/dist/cli/artifacts-cli.d.ts +3 -0
  8. package/dist/cli/artifacts-cli.d.ts.map +1 -0
  9. package/dist/cli/artifacts-cli.js +36 -0
  10. package/dist/cli/artifacts-cli.js.map +1 -0
  11. package/dist/cli/baselines.d.ts.map +1 -1
  12. package/dist/cli/baselines.js +11 -0
  13. package/dist/cli/baselines.js.map +1 -1
  14. package/dist/cli/convoy/artifacts.d.ts +25 -0
  15. package/dist/cli/convoy/artifacts.d.ts.map +1 -0
  16. package/dist/cli/convoy/artifacts.js +129 -0
  17. package/dist/cli/convoy/artifacts.js.map +1 -0
  18. package/dist/cli/convoy/artifacts.test.d.ts +2 -0
  19. package/dist/cli/convoy/artifacts.test.d.ts.map +1 -0
  20. package/dist/cli/convoy/artifacts.test.js +169 -0
  21. package/dist/cli/convoy/artifacts.test.js.map +1 -0
  22. package/dist/cli/convoy/compaction.d.ts +23 -0
  23. package/dist/cli/convoy/compaction.d.ts.map +1 -0
  24. package/dist/cli/convoy/compaction.js +117 -0
  25. package/dist/cli/convoy/compaction.js.map +1 -0
  26. package/dist/cli/convoy/compaction.test.d.ts +2 -0
  27. package/dist/cli/convoy/compaction.test.d.ts.map +1 -0
  28. package/dist/cli/convoy/compaction.test.js +205 -0
  29. package/dist/cli/convoy/compaction.test.js.map +1 -0
  30. package/dist/cli/convoy/contracts.d.ts +22 -0
  31. package/dist/cli/convoy/contracts.d.ts.map +1 -0
  32. package/dist/cli/convoy/contracts.js +254 -0
  33. package/dist/cli/convoy/contracts.js.map +1 -0
  34. package/dist/cli/convoy/contracts.test.d.ts +2 -0
  35. package/dist/cli/convoy/contracts.test.d.ts.map +1 -0
  36. package/dist/cli/convoy/contracts.test.js +239 -0
  37. package/dist/cli/convoy/contracts.test.js.map +1 -0
  38. package/dist/cli/convoy/dag-analysis.d.ts +40 -0
  39. package/dist/cli/convoy/dag-analysis.d.ts.map +1 -0
  40. package/dist/cli/convoy/dag-analysis.js +282 -0
  41. package/dist/cli/convoy/dag-analysis.js.map +1 -0
  42. package/dist/cli/convoy/dag-analysis.test.d.ts +2 -0
  43. package/dist/cli/convoy/dag-analysis.test.d.ts.map +1 -0
  44. package/dist/cli/convoy/dag-analysis.test.js +289 -0
  45. package/dist/cli/convoy/dag-analysis.test.js.map +1 -0
  46. package/dist/cli/convoy/effort-scaling.d.ts +20 -0
  47. package/dist/cli/convoy/effort-scaling.d.ts.map +1 -0
  48. package/dist/cli/convoy/effort-scaling.js +82 -0
  49. package/dist/cli/convoy/effort-scaling.js.map +1 -0
  50. package/dist/cli/convoy/effort-scaling.test.d.ts +2 -0
  51. package/dist/cli/convoy/effort-scaling.test.d.ts.map +1 -0
  52. package/dist/cli/convoy/effort-scaling.test.js +120 -0
  53. package/dist/cli/convoy/effort-scaling.test.js.map +1 -0
  54. package/dist/cli/convoy/engine.d.ts.map +1 -1
  55. package/dist/cli/convoy/engine.js +298 -11
  56. package/dist/cli/convoy/engine.js.map +1 -1
  57. package/dist/cli/convoy/engine.test.js +155 -18
  58. package/dist/cli/convoy/engine.test.js.map +1 -1
  59. package/dist/cli/convoy/event-schemas.d.ts.map +1 -1
  60. package/dist/cli/convoy/event-schemas.js +55 -0
  61. package/dist/cli/convoy/event-schemas.js.map +1 -1
  62. package/dist/cli/convoy/isolation.d.ts +27 -0
  63. package/dist/cli/convoy/isolation.d.ts.map +1 -0
  64. package/dist/cli/convoy/isolation.js +120 -0
  65. package/dist/cli/convoy/isolation.js.map +1 -0
  66. package/dist/cli/convoy/isolation.test.d.ts +2 -0
  67. package/dist/cli/convoy/isolation.test.d.ts.map +1 -0
  68. package/dist/cli/convoy/isolation.test.js +105 -0
  69. package/dist/cli/convoy/isolation.test.js.map +1 -0
  70. package/dist/cli/convoy/review-stages.d.ts +9 -0
  71. package/dist/cli/convoy/review-stages.d.ts.map +1 -0
  72. package/dist/cli/convoy/review-stages.js +134 -0
  73. package/dist/cli/convoy/review-stages.js.map +1 -0
  74. package/dist/cli/convoy/review-stages.test.d.ts +2 -0
  75. package/dist/cli/convoy/review-stages.test.d.ts.map +1 -0
  76. package/dist/cli/convoy/review-stages.test.js +197 -0
  77. package/dist/cli/convoy/review-stages.test.js.map +1 -0
  78. package/dist/cli/convoy/skill-refinement.d.ts +39 -0
  79. package/dist/cli/convoy/skill-refinement.d.ts.map +1 -0
  80. package/dist/cli/convoy/skill-refinement.js +239 -0
  81. package/dist/cli/convoy/skill-refinement.js.map +1 -0
  82. package/dist/cli/convoy/skill-refinement.test.d.ts +2 -0
  83. package/dist/cli/convoy/skill-refinement.test.d.ts.map +1 -0
  84. package/dist/cli/convoy/skill-refinement.test.js +230 -0
  85. package/dist/cli/convoy/skill-refinement.test.js.map +1 -0
  86. package/dist/cli/convoy/spec-builder.d.ts +1 -0
  87. package/dist/cli/convoy/spec-builder.d.ts.map +1 -1
  88. package/dist/cli/convoy/spec-builder.js +11 -0
  89. package/dist/cli/convoy/spec-builder.js.map +1 -1
  90. package/dist/cli/convoy/spec-builder.test.js +54 -0
  91. package/dist/cli/convoy/spec-builder.test.js.map +1 -1
  92. package/dist/cli/convoy/store.d.ts +3 -2
  93. package/dist/cli/convoy/store.d.ts.map +1 -1
  94. package/dist/cli/convoy/store.js +20 -2
  95. package/dist/cli/convoy/store.js.map +1 -1
  96. package/dist/cli/convoy/store.test.js +15 -15
  97. package/dist/cli/convoy/store.test.js.map +1 -1
  98. package/dist/cli/convoy/tdd-gate.d.ts +15 -0
  99. package/dist/cli/convoy/tdd-gate.d.ts.map +1 -0
  100. package/dist/cli/convoy/tdd-gate.js +119 -0
  101. package/dist/cli/convoy/tdd-gate.js.map +1 -0
  102. package/dist/cli/convoy/tdd-gate.test.d.ts +2 -0
  103. package/dist/cli/convoy/tdd-gate.test.d.ts.map +1 -0
  104. package/dist/cli/convoy/tdd-gate.test.js +227 -0
  105. package/dist/cli/convoy/tdd-gate.test.js.map +1 -0
  106. package/dist/cli/convoy/types.d.ts +91 -0
  107. package/dist/cli/convoy/types.d.ts.map +1 -1
  108. package/dist/cli/convoy/types.js +8 -0
  109. package/dist/cli/convoy/types.js.map +1 -1
  110. package/dist/cli/dashboard.d.ts.map +1 -1
  111. package/dist/cli/dashboard.js +54 -0
  112. package/dist/cli/dashboard.js.map +1 -1
  113. package/dist/cli/insights.d.ts +3 -0
  114. package/dist/cli/insights.d.ts.map +1 -0
  115. package/dist/cli/insights.js +94 -0
  116. package/dist/cli/insights.js.map +1 -0
  117. package/dist/cli/lesson.d.ts.map +1 -1
  118. package/dist/cli/lesson.js +7 -0
  119. package/dist/cli/lesson.js.map +1 -1
  120. package/dist/cli/log.d.ts.map +1 -1
  121. package/dist/cli/log.js +7 -0
  122. package/dist/cli/log.js.map +1 -1
  123. package/dist/cli/package-config.d.ts +12 -0
  124. package/dist/cli/package-config.d.ts.map +1 -0
  125. package/dist/cli/package-config.js +37 -0
  126. package/dist/cli/package-config.js.map +1 -0
  127. package/dist/cli/package.d.ts +23 -0
  128. package/dist/cli/package.d.ts.map +1 -0
  129. package/dist/cli/package.js +285 -0
  130. package/dist/cli/package.js.map +1 -0
  131. package/dist/cli/package.test.d.ts +2 -0
  132. package/dist/cli/package.test.d.ts.map +1 -0
  133. package/dist/cli/package.test.js +236 -0
  134. package/dist/cli/package.test.js.map +1 -0
  135. package/dist/cli/pipeline.d.ts +6 -0
  136. package/dist/cli/pipeline.d.ts.map +1 -1
  137. package/dist/cli/pipeline.js +15 -2
  138. package/dist/cli/pipeline.js.map +1 -1
  139. package/dist/cli/run/schema.d.ts.map +1 -1
  140. package/dist/cli/run/schema.js +32 -0
  141. package/dist/cli/run/schema.js.map +1 -1
  142. package/dist/cli/run/schema.test.js +51 -0
  143. package/dist/cli/run/schema.test.js.map +1 -1
  144. package/dist/cli/run.d.ts.map +1 -1
  145. package/dist/cli/run.js +10 -1
  146. package/dist/cli/run.js.map +1 -1
  147. package/dist/cli/skills.d.ts +3 -0
  148. package/dist/cli/skills.d.ts.map +1 -0
  149. package/dist/cli/skills.js +107 -0
  150. package/dist/cli/skills.js.map +1 -0
  151. package/dist/cli/types.d.ts +4 -1
  152. package/dist/cli/types.d.ts.map +1 -1
  153. package/dist/cli/update.js +2 -2
  154. package/package.json +3 -2
  155. package/src/cli/agents.ts +20 -5
  156. package/src/cli/artifacts-cli.ts +41 -0
  157. package/src/cli/baselines.ts +12 -0
  158. package/src/cli/convoy/artifacts.test.ts +201 -0
  159. package/src/cli/convoy/artifacts.ts +186 -0
  160. package/src/cli/convoy/compaction.test.ts +245 -0
  161. package/src/cli/convoy/compaction.ts +164 -0
  162. package/src/cli/convoy/contracts.test.ts +279 -0
  163. package/src/cli/convoy/contracts.ts +280 -0
  164. package/src/cli/convoy/dag-analysis.test.ts +349 -0
  165. package/src/cli/convoy/dag-analysis.ts +371 -0
  166. package/src/cli/convoy/effort-scaling.test.ts +140 -0
  167. package/src/cli/convoy/effort-scaling.ts +90 -0
  168. package/src/cli/convoy/engine.test.ts +175 -18
  169. package/src/cli/convoy/engine.ts +315 -12
  170. package/src/cli/convoy/event-schemas.ts +55 -0
  171. package/src/cli/convoy/isolation.test.ts +137 -0
  172. package/src/cli/convoy/isolation.ts +165 -0
  173. package/src/cli/convoy/review-stages.test.ts +235 -0
  174. package/src/cli/convoy/review-stages.ts +166 -0
  175. package/src/cli/convoy/skill-refinement.test.ts +277 -0
  176. package/src/cli/convoy/skill-refinement.ts +306 -0
  177. package/src/cli/convoy/spec-builder.test.ts +61 -0
  178. package/src/cli/convoy/spec-builder.ts +9 -0
  179. package/src/cli/convoy/store.test.ts +15 -15
  180. package/src/cli/convoy/store.ts +26 -4
  181. package/src/cli/convoy/tdd-gate.test.ts +281 -0
  182. package/src/cli/convoy/tdd-gate.ts +154 -0
  183. package/src/cli/convoy/types.ts +51 -0
  184. package/src/cli/dashboard.ts +55 -0
  185. package/src/cli/insights.ts +99 -0
  186. package/src/cli/lesson.ts +8 -0
  187. package/src/cli/log.ts +8 -0
  188. package/src/cli/package-config.ts +48 -0
  189. package/src/cli/package.test.ts +276 -0
  190. package/src/cli/package.ts +329 -0
  191. package/src/cli/pipeline.ts +21 -2
  192. package/src/cli/run/schema.test.ts +58 -0
  193. package/src/cli/run/schema.ts +33 -0
  194. package/src/cli/run.ts +14 -1
  195. package/src/cli/skills.ts +121 -0
  196. package/src/cli/types.ts +4 -1
  197. package/src/cli/update.ts +2 -2
  198. package/src/dashboard/dist/_astro/{index.Je1YjU_y.css → index.BRDFmNzR.css} +1 -1
  199. package/src/dashboard/dist/index.html +163 -2
  200. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
  201. package/src/dashboard/src/pages/index.astro +162 -1
  202. package/src/dashboard/src/styles/dashboard.css +85 -0
  203. package/src/orchestrator/agents/developer.agent.md +8 -0
  204. package/src/orchestrator/agents/ui-ux-expert.agent.md +7 -0
  205. package/src/orchestrator/prompts/assess-complexity.prompt.md +13 -0
  206. package/src/orchestrator/prompts/brainstorm.prompt.md +18 -0
  207. package/src/orchestrator/prompts/generate-convoy.prompt.md +61 -0
  208. package/src/orchestrator/skills/decomposition/SKILL.md +35 -0
  209. package/src/orchestrator/skills/frontend-design/SKILL.md +27 -1
  210. package/src/orchestrator/skills/project-consistency/SKILL.md +350 -0
@@ -0,0 +1,277 @@
1
+ import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from 'node:fs'
2
+ import { tmpdir } from 'node:os'
3
+ import { join } from 'node:path'
4
+ import { realpathSync, existsSync, readFileSync } from 'node:fs'
5
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
6
+ import {
7
+ trackSkillFailure,
8
+ getSkillFailures,
9
+ detectFailurePatterns,
10
+ generateRefinementProposal,
11
+ saveProposal,
12
+ getFailureStats,
13
+ runSkillRefinementCheck,
14
+ } from './skill-refinement.js'
15
+ import type { SkillFailureRecord, SkillRefinementProposal } from './skill-refinement.js'
16
+
17
+ let tmpDir: string
18
+
19
+ beforeEach(() => {
20
+ tmpDir = realpathSync(mkdtempSync(join(tmpdir(), 'skill-ref-test-')))
21
+ })
22
+
23
+ afterEach(() => {
24
+ rmSync(tmpDir, { recursive: true, force: true })
25
+ })
26
+
27
+ function makeRecord(overrides: Partial<SkillFailureRecord> = {}): SkillFailureRecord {
28
+ return {
29
+ skill_name: 'react-development',
30
+ agent: 'Developer',
31
+ task_id: 'task-1',
32
+ convoy_id: 'convoy-1',
33
+ failure_reason: 'missing type annotation on props',
34
+ retry_count: 1,
35
+ eventually_succeeded: false,
36
+ timestamp: '2026-01-01T00:00:00.000Z',
37
+ ...overrides,
38
+ }
39
+ }
40
+
41
+ describe('trackSkillFailure', () => {
42
+ it('appends a valid JSON line to NDJSON file', () => {
43
+ const record = makeRecord()
44
+ trackSkillFailure(record, tmpDir)
45
+ const filePath = join(tmpDir, '.opencastle/telemetry/skill-failures.ndjson')
46
+ expect(existsSync(filePath)).toBe(true)
47
+ const parsed = JSON.parse(readFileSync(filePath, 'utf8').trim())
48
+ expect(parsed.skill_name).toBe('react-development')
49
+ expect(parsed.agent).toBe('Developer')
50
+ })
51
+
52
+ it('creates directory if it does not exist', () => {
53
+ trackSkillFailure(makeRecord(), tmpDir)
54
+ expect(existsSync(join(tmpDir, '.opencastle/telemetry'))).toBe(true)
55
+ })
56
+
57
+ it('appends multiple records', () => {
58
+ trackSkillFailure(makeRecord({ task_id: 'task-1' }), tmpDir)
59
+ trackSkillFailure(makeRecord({ task_id: 'task-2' }), tmpDir)
60
+ const lines = readFileSync(
61
+ join(tmpDir, '.opencastle/telemetry/skill-failures.ndjson'),
62
+ 'utf8',
63
+ )
64
+ .trim()
65
+ .split('\n')
66
+ expect(lines).toHaveLength(2)
67
+ })
68
+ })
69
+
70
+ describe('getSkillFailures', () => {
71
+ it('returns empty array when file does not exist', () => {
72
+ expect(getSkillFailures('react-development', tmpDir)).toEqual([])
73
+ })
74
+
75
+ it('filters by skill name', () => {
76
+ trackSkillFailure(makeRecord({ skill_name: 'react-development' }), tmpDir)
77
+ trackSkillFailure(makeRecord({ skill_name: 'api-patterns' }), tmpDir)
78
+ const result = getSkillFailures('react-development', tmpDir)
79
+ expect(result).toHaveLength(1)
80
+ expect(result[0].skill_name).toBe('react-development')
81
+ })
82
+
83
+ it('filters by since timestamp', () => {
84
+ trackSkillFailure(makeRecord({ timestamp: '2026-01-01T00:00:00.000Z' }), tmpDir)
85
+ trackSkillFailure(makeRecord({ timestamp: '2026-02-01T00:00:00.000Z' }), tmpDir)
86
+ const result = getSkillFailures('react-development', tmpDir, '2026-01-15T00:00:00.000Z')
87
+ expect(result).toHaveLength(1)
88
+ expect(result[0].timestamp).toBe('2026-02-01T00:00:00.000Z')
89
+ })
90
+
91
+ it('skips malformed lines', () => {
92
+ const dir = join(tmpDir, '.opencastle/telemetry')
93
+ mkdirSync(dir, { recursive: true })
94
+ writeFileSync(
95
+ join(dir, 'skill-failures.ndjson'),
96
+ 'not-valid-json\n' + JSON.stringify(makeRecord()) + '\n',
97
+ 'utf8',
98
+ )
99
+ const result = getSkillFailures('react-development', tmpDir)
100
+ expect(result).toHaveLength(1)
101
+ })
102
+ })
103
+
104
+ describe('detectFailurePatterns', () => {
105
+ it('returns should_refine false with 0 failures', () => {
106
+ const result = detectFailurePatterns([])
107
+ expect(result.should_refine).toBe(false)
108
+ expect(result.threshold_met).toBe(false)
109
+ })
110
+
111
+ it('returns should_refine false with 1 failure', () => {
112
+ expect(detectFailurePatterns([makeRecord()]).should_refine).toBe(false)
113
+ expect(detectFailurePatterns([makeRecord()]).threshold_met).toBe(false)
114
+ })
115
+
116
+ it('returns threshold_met true with 3 failures from different convoys', () => {
117
+ const failures = [
118
+ makeRecord({ convoy_id: 'c1', agent: 'Dev', failure_reason: 'missing type annotation on props interface' }),
119
+ makeRecord({ convoy_id: 'c2', agent: 'Dev', failure_reason: 'missing type annotation on props interface' }),
120
+ makeRecord({ convoy_id: 'c3', agent: 'Dev', failure_reason: 'missing type annotation on props interface' }),
121
+ ]
122
+ expect(detectFailurePatterns(failures).threshold_met).toBe(true)
123
+ })
124
+
125
+ it('returns threshold_met true with 2 failures from same agent', () => {
126
+ const failures = [
127
+ makeRecord({ agent: 'Developer', convoy_id: 'c1' }),
128
+ makeRecord({ agent: 'Developer', convoy_id: 'c2' }),
129
+ ]
130
+ expect(detectFailurePatterns(failures).threshold_met).toBe(true)
131
+ })
132
+
133
+ it('returns threshold_met true with failures from 2 different agents from different convoys', () => {
134
+ const failures = [
135
+ makeRecord({ agent: 'Developer', convoy_id: 'c1' }),
136
+ makeRecord({ agent: 'UI-Expert', convoy_id: 'c2' }),
137
+ ]
138
+ expect(detectFailurePatterns(failures).threshold_met).toBe(true)
139
+ })
140
+
141
+ it('returns threshold_met false with 2 failures from different agents but same convoy', () => {
142
+ const failures = [
143
+ makeRecord({ agent: 'Developer', convoy_id: 'c1', task_id: 't1' }),
144
+ makeRecord({ agent: 'UI-Expert', convoy_id: 'c1', task_id: 't2' }),
145
+ ]
146
+ // 2 different agents but same convoy: uniqueConvoys < 3 (false), sameAgentDoubleFailure false,
147
+ // uniqueAgents >= 2 but uniqueConvoys < 2 → threshold_met = false
148
+ expect(detectFailurePatterns(failures).threshold_met).toBe(false)
149
+ })
150
+
151
+ it('groups similar failure reasons into patterns', () => {
152
+ const failures = [
153
+ makeRecord({ failure_reason: 'missing type annotation on props', convoy_id: 'c1' }),
154
+ makeRecord({ failure_reason: 'missing type annotation for function params', convoy_id: 'c2' }),
155
+ makeRecord({ failure_reason: 'completely unrelated import error issue', convoy_id: 'c3' }),
156
+ ]
157
+ const result = detectFailurePatterns(failures)
158
+ expect(result.patterns.length).toBeGreaterThan(0)
159
+ // first two share "missing", "type", "annotation" → grouped
160
+ expect(result.patterns[0]).toContain('annotation')
161
+ })
162
+ })
163
+
164
+ describe('generateRefinementProposal', () => {
165
+ it('generates proposal with correct fields', () => {
166
+ const failures = [
167
+ makeRecord({ convoy_id: 'c1' }),
168
+ makeRecord({ convoy_id: 'c2' }),
169
+ makeRecord({ convoy_id: 'c3' }),
170
+ ]
171
+ const proposal = generateRefinementProposal('react-development', failures, tmpDir)
172
+ expect(proposal.skill_name).toBe('react-development')
173
+ expect(proposal.failure_count).toBe(3)
174
+ expect(typeof proposal.generated_at).toBe('string')
175
+ expect(proposal.skill_path).toBe('unknown')
176
+ })
177
+
178
+ it('sets confidence based on failure count', () => {
179
+ const two = Array.from({ length: 2 }, (_, i) => makeRecord({ convoy_id: `c${i}`, agent: 'Dev' }))
180
+ expect(generateRefinementProposal('s', two, tmpDir).confidence).toBe('low')
181
+
182
+ const three = Array.from({ length: 3 }, (_, i) => makeRecord({ convoy_id: `c${i}`, agent: 'Dev' }))
183
+ expect(generateRefinementProposal('s', three, tmpDir).confidence).toBe('medium')
184
+
185
+ const five = Array.from({ length: 5 }, (_, i) => makeRecord({ convoy_id: `c${i}`, agent: 'Dev' }))
186
+ expect(generateRefinementProposal('s', five, tmpDir).confidence).toBe('high')
187
+ })
188
+
189
+ it('includes proposed additions derived from patterns', () => {
190
+ const failures = [
191
+ makeRecord({ failure_reason: 'missing type annotation on props', convoy_id: 'c1', agent: 'Dev1' }),
192
+ makeRecord({ failure_reason: 'missing type annotation for function', convoy_id: 'c2', agent: 'Dev1' }),
193
+ makeRecord({ failure_reason: 'missing type annotation in hooks', convoy_id: 'c3', agent: 'Dev1' }),
194
+ ]
195
+ const proposal = generateRefinementProposal('react-development', failures, tmpDir)
196
+ if (proposal.proposed_additions.length > 0) {
197
+ expect(proposal.proposed_additions[0]).toMatch(/Add to ## Common Pitfalls:/)
198
+ }
199
+ })
200
+ })
201
+
202
+ describe('saveProposal', () => {
203
+ function makeProposal(overrides: Partial<SkillRefinementProposal> = {}): SkillRefinementProposal {
204
+ return {
205
+ skill_name: 'react-development',
206
+ skill_path: 'unknown',
207
+ failure_count: 3,
208
+ common_failure_patterns: ['type, annotation, missing'],
209
+ proposed_additions: ["Add to ## Common Pitfalls: 'type, annotation, missing'"],
210
+ confidence: 'medium',
211
+ generated_at: '2026-01-15T10:00:00.000Z',
212
+ ...overrides,
213
+ }
214
+ }
215
+
216
+ it('writes markdown file with correct format', () => {
217
+ const filePath = saveProposal(makeProposal(), tmpDir)
218
+ expect(existsSync(filePath)).toBe(true)
219
+ const content = readFileSync(filePath, 'utf8')
220
+ expect(content).toContain('# Skill Refinement Proposal: react-development')
221
+ expect(content).toContain('**Confidence:** medium')
222
+ expect(content).toContain('## Proposed Changes')
223
+ expect(content).toContain('## Action')
224
+ })
225
+
226
+ it('creates proposals directory if needed', () => {
227
+ saveProposal(makeProposal(), tmpDir)
228
+ expect(existsSync(join(tmpDir, '.opencastle/proposals'))).toBe(true)
229
+ })
230
+
231
+ it('handles existing file for same date (counter suffix)', () => {
232
+ const path1 = saveProposal(makeProposal(), tmpDir)
233
+ const path2 = saveProposal(makeProposal(), tmpDir)
234
+ expect(path1).not.toBe(path2)
235
+ expect(path2).toContain('-2.md')
236
+ })
237
+ })
238
+
239
+ describe('getFailureStats', () => {
240
+ it('returns empty array when no data', () => {
241
+ expect(getFailureStats(tmpDir)).toEqual([])
242
+ })
243
+
244
+ it('groups and sorts by count descending', () => {
245
+ trackSkillFailure(makeRecord({ skill_name: 'react-development' }), tmpDir)
246
+ trackSkillFailure(makeRecord({ skill_name: 'react-development' }), tmpDir)
247
+ trackSkillFailure(makeRecord({ skill_name: 'api-patterns' }), tmpDir)
248
+ const result = getFailureStats(tmpDir)
249
+ expect(result[0].skill_name).toBe('react-development')
250
+ expect(result[0].count).toBe(2)
251
+ expect(result[1].skill_name).toBe('api-patterns')
252
+ expect(result[1].count).toBe(1)
253
+ })
254
+ })
255
+
256
+ describe('runSkillRefinementCheck', () => {
257
+ it('returns empty array when no failures for convoy', () => {
258
+ expect(runSkillRefinementCheck('unknown-convoy', tmpDir)).toEqual([])
259
+ })
260
+
261
+ it('generates proposals for skills meeting threshold', () => {
262
+ // 3 failures from different convoys for same skill; convoy c3 triggers the check
263
+ trackSkillFailure(makeRecord({ skill_name: 'react-development', convoy_id: 'c1', agent: 'Dev', task_id: 't1' }), tmpDir)
264
+ trackSkillFailure(makeRecord({ skill_name: 'react-development', convoy_id: 'c2', agent: 'Dev', task_id: 't2' }), tmpDir)
265
+ trackSkillFailure(makeRecord({ skill_name: 'react-development', convoy_id: 'c3', agent: 'Dev', task_id: 't3' }), tmpDir)
266
+ const results = runSkillRefinementCheck('c3', tmpDir)
267
+ expect(results.length).toBeGreaterThan(0)
268
+ expect(results[0].skill).toBe('react-development')
269
+ expect(existsSync(results[0].proposalPath)).toBe(true)
270
+ })
271
+
272
+ it('skips skills not meeting threshold', () => {
273
+ // Only 1 failure total for the skill
274
+ trackSkillFailure(makeRecord({ skill_name: 'api-patterns', convoy_id: 'c1', task_id: 't1' }), tmpDir)
275
+ expect(runSkillRefinementCheck('c1', tmpDir)).toEqual([])
276
+ })
277
+ })
@@ -0,0 +1,306 @@
1
+ import { existsSync, readFileSync, appendFileSync, mkdirSync, writeFileSync } from 'node:fs'
2
+ import { join } from 'node:path'
3
+ import { scanForSecrets } from './gates.js'
4
+
5
+ const SKILL_FAILURES_PATH = '.opencastle/telemetry/skill-failures.ndjson'
6
+ const STOP_WORDS = new Set(['the', 'a', 'is', 'to', 'and', 'in', 'for', 'of', 'with'])
7
+
8
+ export interface SkillFailureRecord {
9
+ skill_name: string
10
+ agent: string
11
+ task_id: string
12
+ convoy_id: string
13
+ failure_reason: string
14
+ retry_count: number
15
+ eventually_succeeded: boolean
16
+ timestamp: string
17
+ }
18
+
19
+ export interface SkillRefinementProposal {
20
+ skill_name: string
21
+ skill_path: string
22
+ failure_count: number
23
+ common_failure_patterns: string[]
24
+ proposed_additions: string[]
25
+ confidence: 'low' | 'medium' | 'high'
26
+ generated_at: string
27
+ }
28
+
29
+ export function trackSkillFailure(record: SkillFailureRecord, basePath?: string): void {
30
+ const base = basePath ?? process.cwd()
31
+ const filePath = join(base, SKILL_FAILURES_PATH)
32
+ mkdirSync(join(base, '.opencastle', 'telemetry'), { recursive: true })
33
+ const line = JSON.stringify(record) + '\n'
34
+ const scan = scanForSecrets(line, 'skill-failures.ndjson')
35
+ if (!scan.clean) return
36
+ appendFileSync(filePath, line, 'utf8')
37
+ }
38
+
39
+ export function getSkillFailures(skillName: string, basePath?: string, since?: string): SkillFailureRecord[] {
40
+ const base = basePath ?? process.cwd()
41
+ const filePath = join(base, SKILL_FAILURES_PATH)
42
+ if (!existsSync(filePath)) return []
43
+ const content = readFileSync(filePath, 'utf8')
44
+ const records: SkillFailureRecord[] = []
45
+ for (const line of content.split('\n')) {
46
+ if (!line.trim()) continue
47
+ try {
48
+ const record = JSON.parse(line) as SkillFailureRecord
49
+ if (record.skill_name !== skillName) continue
50
+ if (since && record.timestamp < since) continue
51
+ records.push(record)
52
+ } catch {
53
+ // skip malformed lines
54
+ }
55
+ }
56
+ return records
57
+ }
58
+
59
+ function tokenize(text: string): Set<string> {
60
+ return new Set(
61
+ text
62
+ .toLowerCase()
63
+ .split(/\W+/)
64
+ .filter(w => w.length > 2 && !STOP_WORDS.has(w)),
65
+ )
66
+ }
67
+
68
+ export function detectFailurePatterns(failures: SkillFailureRecord[]): {
69
+ should_refine: boolean
70
+ patterns: string[]
71
+ threshold_met: boolean
72
+ } {
73
+ if (failures.length < 2) {
74
+ return { should_refine: false, patterns: [], threshold_met: false }
75
+ }
76
+
77
+ const uniqueConvoys = new Set(failures.map(f => f.convoy_id))
78
+ const uniqueAgents = new Set(failures.map(f => f.agent))
79
+
80
+ const agentCounts = new Map<string, number>()
81
+ for (const f of failures) {
82
+ agentCounts.set(f.agent, (agentCounts.get(f.agent) ?? 0) + 1)
83
+ }
84
+ const sameAgentDoubleFailure = [...agentCounts.values()].some(c => c >= 2)
85
+
86
+ // threshold: 3+ different convoys, OR same agent 2+ failures,
87
+ // OR 2+ different agents each from distinct convoys
88
+ const threshold_met =
89
+ uniqueConvoys.size >= 3 ||
90
+ sameAgentDoubleFailure ||
91
+ (uniqueAgents.size >= 2 && uniqueConvoys.size >= 2)
92
+
93
+ // Group failure_reasons by word overlap
94
+ const groups: string[][] = []
95
+ for (const failure of failures) {
96
+ const words = tokenize(failure.failure_reason)
97
+ let matched = false
98
+ for (const group of groups) {
99
+ const groupWords = tokenize(group[0])
100
+ const intersection = [...words].filter(w => groupWords.has(w))
101
+ const minSize = Math.min(words.size, groupWords.size)
102
+ if (minSize > 0 && intersection.length / minSize >= 0.5) {
103
+ group.push(failure.failure_reason)
104
+ matched = true
105
+ break
106
+ }
107
+ }
108
+ if (!matched) {
109
+ groups.push([failure.failure_reason])
110
+ }
111
+ }
112
+
113
+ groups.sort((a, b) => b.length - a.length)
114
+
115
+ const patterns: string[] = []
116
+ for (const group of groups) {
117
+ const wordSets = group.map(r => tokenize(r))
118
+ let shared = new Set(wordSets[0])
119
+ for (const ws of wordSets.slice(1)) {
120
+ shared = new Set([...shared].filter(w => ws.has(w)))
121
+ }
122
+ if (shared.size > 0) {
123
+ patterns.push([...shared].slice(0, 5).join(', '))
124
+ } else if (group.length > 1) {
125
+ patterns.push(group[0].slice(0, 60))
126
+ }
127
+ }
128
+
129
+ const should_refine = threshold_met && patterns.length > 0
130
+
131
+ return { should_refine, patterns, threshold_met }
132
+ }
133
+
134
+ export function generateRefinementProposal(
135
+ skillName: string,
136
+ failures: SkillFailureRecord[],
137
+ basePath?: string,
138
+ ): SkillRefinementProposal {
139
+ const base = basePath ?? process.cwd()
140
+ const skillFilePath = join(base, '.github', 'skills', skillName, 'SKILL.md')
141
+ const skill_path = existsSync(skillFilePath) ? skillFilePath : 'unknown'
142
+
143
+ const { patterns } = detectFailurePatterns(failures)
144
+
145
+ const count = failures.length
146
+ const confidence: 'low' | 'medium' | 'high' =
147
+ count >= 5 ? 'high' : count >= 3 ? 'medium' : 'low'
148
+
149
+ const proposed_additions = patterns.map(
150
+ p => `Add to ## Common Pitfalls: '${p}'`,
151
+ )
152
+
153
+ return {
154
+ skill_name: skillName,
155
+ skill_path,
156
+ failure_count: count,
157
+ common_failure_patterns: patterns,
158
+ proposed_additions,
159
+ confidence,
160
+ generated_at: new Date().toISOString(),
161
+ }
162
+ }
163
+
164
+ function buildProposalMarkdown(proposal: SkillRefinementProposal, failures: SkillFailureRecord[]): string {
165
+ const date = proposal.generated_at.slice(0, 10)
166
+ const patternList =
167
+ proposal.common_failure_patterns.map(p => `- ${p}`).join('\n') || '- (none detected)'
168
+ const additionsList =
169
+ proposal.proposed_additions.map(a => `- ${a}`).join('\n') || '- (none)'
170
+ const evidenceRows = failures
171
+ .slice(0, 20)
172
+ .map(f => `| ${f.convoy_id} | ${f.task_id} | ${f.agent} | ${f.failure_reason.slice(0, 80)} |`)
173
+ .join('\n')
174
+
175
+ return `# Skill Refinement Proposal: ${proposal.skill_name}
176
+
177
+ **Generated:** ${date}
178
+ **Failures analyzed:** ${proposal.failure_count}
179
+ **Confidence:** ${proposal.confidence}
180
+
181
+ ## Failure Pattern Summary
182
+
183
+ ${patternList}
184
+
185
+ ## Proposed Changes
186
+
187
+ ${additionsList}
188
+
189
+ ## Evidence
190
+
191
+ | Convoy | Task | Agent | Failure Reason |
192
+ |--------|------|-------|---------------|
193
+ ${evidenceRows}
194
+
195
+ ## Action
196
+
197
+ - [ ] Apply this proposal: edit \`${proposal.skill_path}\` manually
198
+ - [ ] Reject: delete this file
199
+ `
200
+ }
201
+
202
+ export function saveProposal(
203
+ proposal: SkillRefinementProposal,
204
+ basePath?: string,
205
+ failures: SkillFailureRecord[] = [],
206
+ ): string {
207
+ const base = basePath ?? process.cwd()
208
+ const dir = join(base, '.opencastle', 'proposals')
209
+ mkdirSync(dir, { recursive: true })
210
+
211
+ const date = proposal.generated_at.slice(0, 10)
212
+ let filePath = join(dir, `skill-${proposal.skill_name}-${date}.md`)
213
+
214
+ if (existsSync(filePath)) {
215
+ let counter = 2
216
+ while (existsSync(join(dir, `skill-${proposal.skill_name}-${date}-${counter}.md`))) {
217
+ counter++
218
+ }
219
+ filePath = join(dir, `skill-${proposal.skill_name}-${date}-${counter}.md`)
220
+ }
221
+
222
+ writeFileSync(filePath, buildProposalMarkdown(proposal, failures), 'utf8')
223
+ return filePath
224
+ }
225
+
226
+ export function getFailureStats(
227
+ basePath?: string,
228
+ ): Array<{ skill_name: string; count: number; agents: string[]; latest: string }> {
229
+ const base = basePath ?? process.cwd()
230
+ const filePath = join(base, SKILL_FAILURES_PATH)
231
+ if (!existsSync(filePath)) return []
232
+
233
+ const content = readFileSync(filePath, 'utf8')
234
+ const statsMap = new Map<string, { count: number; agents: Set<string>; latest: string }>()
235
+
236
+ for (const line of content.split('\n')) {
237
+ if (!line.trim()) continue
238
+ try {
239
+ const record = JSON.parse(line) as SkillFailureRecord
240
+ const existing = statsMap.get(record.skill_name)
241
+ if (existing) {
242
+ existing.count++
243
+ existing.agents.add(record.agent)
244
+ if (record.timestamp > existing.latest) existing.latest = record.timestamp
245
+ } else {
246
+ statsMap.set(record.skill_name, {
247
+ count: 1,
248
+ agents: new Set([record.agent]),
249
+ latest: record.timestamp,
250
+ })
251
+ }
252
+ } catch {
253
+ // skip malformed lines
254
+ }
255
+ }
256
+
257
+ return [...statsMap.entries()]
258
+ .map(([skill_name, s]) => ({
259
+ skill_name,
260
+ count: s.count,
261
+ agents: [...s.agents],
262
+ latest: s.latest,
263
+ }))
264
+ .sort((a, b) => b.count - a.count)
265
+ }
266
+
267
+ export function runSkillRefinementCheck(
268
+ convoyId: string,
269
+ basePath?: string,
270
+ ): Array<{ skill: string; proposalPath: string }> {
271
+ const base = basePath ?? process.cwd()
272
+ const filePath = join(base, SKILL_FAILURES_PATH)
273
+ if (!existsSync(filePath)) return []
274
+
275
+ const content = readFileSync(filePath, 'utf8')
276
+ const allRecords: SkillFailureRecord[] = []
277
+
278
+ for (const line of content.split('\n')) {
279
+ if (!line.trim()) continue
280
+ try {
281
+ allRecords.push(JSON.parse(line) as SkillFailureRecord)
282
+ } catch {
283
+ // skip malformed
284
+ }
285
+ }
286
+
287
+ const convoySkills = new Set(
288
+ allRecords.filter(r => r.convoy_id === convoyId).map(r => r.skill_name),
289
+ )
290
+
291
+ if (convoySkills.size === 0) return []
292
+
293
+ const results: Array<{ skill: string; proposalPath: string }> = []
294
+
295
+ for (const skillName of convoySkills) {
296
+ const allSkillFailures = allRecords.filter(r => r.skill_name === skillName)
297
+ const { threshold_met } = detectFailurePatterns(allSkillFailures)
298
+ if (!threshold_met) continue
299
+
300
+ const proposal = generateRefinementProposal(skillName, allSkillFailures, base)
301
+ const proposalPath = saveProposal(proposal, base, allSkillFailures)
302
+ results.push({ skill: skillName, proposalPath })
303
+ }
304
+
305
+ return results
306
+ }
@@ -228,6 +228,67 @@ describe('buildConvoyYaml', () => {
228
228
  expect(result.valid).toBe(true)
229
229
  expect(result.errors).toEqual([])
230
230
  })
231
+
232
+ // ── complexity effort-scaling integration ────────────────────────────────────
233
+
234
+ it('auto-populates timeout, max_retries, review from effort table when complexity is set', () => {
235
+ const plan: TaskPlan = {
236
+ name: 'Effort Test',
237
+ tasks: [{ id: 'task-1', complexity: 3, prompt: 'Do something' }],
238
+ }
239
+ const parsed = yamlParse(buildConvoyYaml(plan))
240
+ const task = parsed.tasks[0]
241
+ expect(task.timeout).toBe('15m')
242
+ expect(task.max_retries).toBe(2)
243
+ expect(task.review).toBe('fast')
244
+ })
245
+
246
+ it('does not override explicitly set timeout when complexity is also set', () => {
247
+ const plan: TaskPlan = {
248
+ name: 'Effort Override Test',
249
+ tasks: [{ id: 'task-1', complexity: 3, timeout: '1h', prompt: 'Do something' }],
250
+ }
251
+ const parsed = yamlParse(buildConvoyYaml(plan))
252
+ expect(parsed.tasks[0].timeout).toBe('1h')
253
+ })
254
+
255
+ it('does not override explicitly set max_retries when complexity is also set', () => {
256
+ const plan: TaskPlan = {
257
+ name: 'Effort Override Test',
258
+ tasks: [{ id: 'task-1', complexity: 5, max_retries: 5, prompt: 'Do something' }],
259
+ }
260
+ const parsed = yamlParse(buildConvoyYaml(plan))
261
+ expect(parsed.tasks[0].max_retries).toBe(5)
262
+ })
263
+
264
+ it('does not override explicitly set review when complexity is also set', () => {
265
+ const plan: TaskPlan = {
266
+ name: 'Effort Override Test',
267
+ tasks: [{ id: 'task-1', complexity: 8, review: 'panel', prompt: 'Do something' }],
268
+ }
269
+ const parsed = yamlParse(buildConvoyYaml(plan))
270
+ expect(parsed.tasks[0].review).toBe('panel')
271
+ })
272
+
273
+ it('works unchanged (backward compatible) when complexity is not set', () => {
274
+ const parsed = yamlParse(buildConvoyYaml(minimalPlan()))
275
+ const task = parsed.tasks[0]
276
+ expect(task.timeout).toBeUndefined()
277
+ expect(task.max_retries).toBeUndefined()
278
+ expect(task.review).toBeUndefined()
279
+ })
280
+
281
+ it('uses complexity-13 profile for epic tasks', () => {
282
+ const plan: TaskPlan = {
283
+ name: 'Epic Test',
284
+ tasks: [{ id: 'task-1', complexity: 13, prompt: 'Epic task' }],
285
+ }
286
+ const parsed = yamlParse(buildConvoyYaml(plan))
287
+ const task = parsed.tasks[0]
288
+ expect(task.timeout).toBe('45m')
289
+ expect(task.max_retries).toBe(3)
290
+ expect(task.review).toBe('panel')
291
+ })
231
292
  })
232
293
 
233
294
  // ── applyPatches ──────────────────────────────────────────────────────────────
@@ -1,4 +1,5 @@
1
1
  import { stringify } from 'yaml'
2
+ import { getEffortProfile } from './effort-scaling.js'
2
3
 
3
4
  /** The task plan generated by the LLM — just the creative decomposition, no YAML concerns */
4
5
  export interface TaskPlanTask {
@@ -10,6 +11,7 @@ export interface TaskPlanTask {
10
11
  timeout?: string
11
12
  max_retries?: number
12
13
  review?: string
14
+ complexity?: 1 | 2 | 3 | 5 | 8 | 13
13
15
  prompt: string
14
16
  gates?: string[]
15
17
  built_in_gates?: Record<string, boolean | string>
@@ -117,6 +119,13 @@ export function buildConvoyYaml(plan: TaskPlan, enrichment?: SpecEnrichment): st
117
119
  if (task.review !== undefined) t.review = task.review
118
120
  if (task.gates !== undefined) t.gates = task.gates
119
121
  if (task.built_in_gates !== undefined) t.built_in_gates = task.built_in_gates
122
+ // Auto-populate from effort table when complexity is set and fields are missing
123
+ if (task.complexity !== undefined) {
124
+ const profile = getEffortProfile(task.complexity)
125
+ if (t.timeout === undefined) t.timeout = profile.timeout
126
+ if (t.max_retries === undefined) t.max_retries = profile.max_retries
127
+ if (t.review === undefined) t.review = profile.review
128
+ }
120
129
  // prompt last — keeps the long text at the end of each task block
121
130
  t.prompt = task.prompt
122
131
  return t