@helmiq/crew 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/defaults/personas/architect.persona.yaml +72 -0
  2. package/defaults/personas/engineer.persona.yaml +137 -0
  3. package/defaults/personas/persona-spec.schema.yaml +149 -0
  4. package/defaults/personas/reviewer.persona.yaml +47 -0
  5. package/defaults/rubrics/adr.rubric.yaml +48 -0
  6. package/defaults/rubrics/code-review.rubric.yaml +39 -0
  7. package/defaults/rubrics/pull-request.rubric.yaml +40 -0
  8. package/dist/actions/actions.test.d.ts +2 -0
  9. package/dist/actions/actions.test.d.ts.map +1 -0
  10. package/dist/actions/actions.test.js +158 -0
  11. package/dist/actions/direct-dispatcher.d.ts +10 -0
  12. package/dist/actions/direct-dispatcher.d.ts.map +1 -0
  13. package/dist/actions/direct-dispatcher.js +27 -0
  14. package/dist/actions/dispatcher.d.ts +11 -0
  15. package/dist/actions/dispatcher.d.ts.map +1 -0
  16. package/dist/actions/dispatcher.js +1 -0
  17. package/dist/actions/index.d.ts +7 -0
  18. package/dist/actions/index.d.ts.map +1 -0
  19. package/dist/actions/index.js +3 -0
  20. package/dist/actions/registry.d.ts +13 -0
  21. package/dist/actions/registry.d.ts.map +1 -0
  22. package/dist/actions/registry.js +40 -0
  23. package/dist/actions/resolver.d.ts +47 -0
  24. package/dist/actions/resolver.d.ts.map +1 -0
  25. package/dist/actions/resolver.js +43 -0
  26. package/dist/cli/cli.test.d.ts +2 -0
  27. package/dist/cli/cli.test.d.ts.map +1 -0
  28. package/dist/cli/cli.test.js +392 -0
  29. package/dist/cli/run.d.ts +45 -0
  30. package/dist/cli/run.d.ts.map +1 -0
  31. package/dist/cli/run.js +236 -0
  32. package/dist/common/errors.d.ts +76 -0
  33. package/dist/common/errors.d.ts.map +1 -0
  34. package/dist/common/errors.js +74 -0
  35. package/dist/config/config.test.d.ts +2 -0
  36. package/dist/config/config.test.d.ts.map +1 -0
  37. package/dist/config/config.test.js +691 -0
  38. package/dist/config/index.d.ts +7 -0
  39. package/dist/config/index.d.ts.map +1 -0
  40. package/dist/config/index.js +4 -0
  41. package/dist/config/loader.d.ts +16 -0
  42. package/dist/config/loader.d.ts.map +1 -0
  43. package/dist/config/loader.js +56 -0
  44. package/dist/config/model-resolver.d.ts +24 -0
  45. package/dist/config/model-resolver.d.ts.map +1 -0
  46. package/dist/config/model-resolver.js +39 -0
  47. package/dist/config/resolver.d.ts +22 -0
  48. package/dist/config/resolver.d.ts.map +1 -0
  49. package/dist/config/resolver.js +115 -0
  50. package/dist/config/schemas.d.ts +266 -0
  51. package/dist/config/schemas.d.ts.map +1 -0
  52. package/dist/config/schemas.js +115 -0
  53. package/dist/context/artifact-reader.d.ts +12 -0
  54. package/dist/context/artifact-reader.d.ts.map +1 -0
  55. package/dist/context/artifact-reader.js +92 -0
  56. package/dist/context/assembler.d.ts +22 -0
  57. package/dist/context/assembler.d.ts.map +1 -0
  58. package/dist/context/assembler.js +126 -0
  59. package/dist/context/code-reader.d.ts +14 -0
  60. package/dist/context/code-reader.d.ts.map +1 -0
  61. package/dist/context/code-reader.js +56 -0
  62. package/dist/context/context.test.d.ts +2 -0
  63. package/dist/context/context.test.d.ts.map +1 -0
  64. package/dist/context/context.test.js +260 -0
  65. package/dist/context/index.d.ts +9 -0
  66. package/dist/context/index.d.ts.map +1 -0
  67. package/dist/context/index.js +5 -0
  68. package/dist/context/section-extractor.d.ts +9 -0
  69. package/dist/context/section-extractor.d.ts.map +1 -0
  70. package/dist/context/section-extractor.js +32 -0
  71. package/dist/context/token-budget.d.ts +11 -0
  72. package/dist/context/token-budget.d.ts.map +1 -0
  73. package/dist/context/token-budget.js +22 -0
  74. package/dist/control/control.test.d.ts +2 -0
  75. package/dist/control/control.test.d.ts.map +1 -0
  76. package/dist/control/control.test.js +137 -0
  77. package/dist/control/id-generator.d.ts +12 -0
  78. package/dist/control/id-generator.d.ts.map +1 -0
  79. package/dist/control/id-generator.js +20 -0
  80. package/dist/control/index.d.ts +5 -0
  81. package/dist/control/index.d.ts.map +1 -0
  82. package/dist/control/index.js +3 -0
  83. package/dist/control/lock-manager.d.ts +13 -0
  84. package/dist/control/lock-manager.d.ts.map +1 -0
  85. package/dist/control/lock-manager.js +72 -0
  86. package/dist/control/run-state.d.ts +16 -0
  87. package/dist/control/run-state.d.ts.map +1 -0
  88. package/dist/control/run-state.js +55 -0
  89. package/dist/engine/composite.d.ts +34 -0
  90. package/dist/engine/composite.d.ts.map +1 -0
  91. package/dist/engine/composite.js +192 -0
  92. package/dist/engine/composite.test.d.ts +2 -0
  93. package/dist/engine/composite.test.d.ts.map +1 -0
  94. package/dist/engine/composite.test.js +1947 -0
  95. package/dist/engine/engine.test.d.ts +2 -0
  96. package/dist/engine/engine.test.d.ts.map +1 -0
  97. package/dist/engine/engine.test.js +334 -0
  98. package/dist/engine/index.d.ts +10 -0
  99. package/dist/engine/index.d.ts.map +1 -0
  100. package/dist/engine/index.js +5 -0
  101. package/dist/engine/llm-client.d.ts +27 -0
  102. package/dist/engine/llm-client.d.ts.map +1 -0
  103. package/dist/engine/llm-client.js +46 -0
  104. package/dist/engine/simple.d.ts +21 -0
  105. package/dist/engine/simple.d.ts.map +1 -0
  106. package/dist/engine/simple.js +59 -0
  107. package/dist/engine/tool-dispatch.d.ts +37 -0
  108. package/dist/engine/tool-dispatch.d.ts.map +1 -0
  109. package/dist/engine/tool-dispatch.js +146 -0
  110. package/dist/engine/tool-dispatch.test.d.ts +2 -0
  111. package/dist/engine/tool-dispatch.test.d.ts.map +1 -0
  112. package/dist/engine/tool-dispatch.test.js +348 -0
  113. package/dist/engine/tool-filter.d.ts +13 -0
  114. package/dist/engine/tool-filter.d.ts.map +1 -0
  115. package/dist/engine/tool-filter.js +25 -0
  116. package/dist/evaluation/evaluation.test.d.ts +2 -0
  117. package/dist/evaluation/evaluation.test.d.ts.map +1 -0
  118. package/dist/evaluation/evaluation.test.js +490 -0
  119. package/dist/evaluation/evaluator.d.ts +19 -0
  120. package/dist/evaluation/evaluator.d.ts.map +1 -0
  121. package/dist/evaluation/evaluator.js +78 -0
  122. package/dist/evaluation/index.d.ts +4 -0
  123. package/dist/evaluation/index.d.ts.map +1 -0
  124. package/dist/evaluation/index.js +2 -0
  125. package/dist/evaluation/scorer.d.ts +38 -0
  126. package/dist/evaluation/scorer.d.ts.map +1 -0
  127. package/dist/evaluation/scorer.js +94 -0
  128. package/dist/index.d.ts +47 -0
  129. package/dist/index.d.ts.map +1 -0
  130. package/dist/index.js +28 -0
  131. package/dist/providers/index.d.ts +2 -0
  132. package/dist/providers/index.d.ts.map +1 -0
  133. package/dist/providers/index.js +1 -0
  134. package/dist/providers/provider-factory.d.ts +11 -0
  135. package/dist/providers/provider-factory.d.ts.map +1 -0
  136. package/dist/providers/provider-factory.js +30 -0
  137. package/dist/publication/frontmatter.d.ts +21 -0
  138. package/dist/publication/frontmatter.d.ts.map +1 -0
  139. package/dist/publication/frontmatter.js +15 -0
  140. package/dist/publication/git-ops.d.ts +18 -0
  141. package/dist/publication/git-ops.d.ts.map +1 -0
  142. package/dist/publication/git-ops.js +74 -0
  143. package/dist/publication/index.d.ts +9 -0
  144. package/dist/publication/index.d.ts.map +1 -0
  145. package/dist/publication/index.js +5 -0
  146. package/dist/publication/provenance-writer.d.ts +27 -0
  147. package/dist/publication/provenance-writer.d.ts.map +1 -0
  148. package/dist/publication/provenance-writer.js +21 -0
  149. package/dist/publication/publication.test.d.ts +2 -0
  150. package/dist/publication/publication.test.d.ts.map +1 -0
  151. package/dist/publication/publication.test.js +235 -0
  152. package/dist/publication/publisher.d.ts +32 -0
  153. package/dist/publication/publisher.d.ts.map +1 -0
  154. package/dist/publication/publisher.js +113 -0
  155. package/dist/publication/secret-scanner.d.ts +6 -0
  156. package/dist/publication/secret-scanner.d.ts.map +1 -0
  157. package/dist/publication/secret-scanner.js +19 -0
  158. package/dist/tools/index.d.ts +4 -0
  159. package/dist/tools/index.d.ts.map +1 -0
  160. package/dist/tools/index.js +2 -0
  161. package/dist/tools/registry.d.ts +15 -0
  162. package/dist/tools/registry.d.ts.map +1 -0
  163. package/dist/tools/registry.js +288 -0
  164. package/dist/tools/registry.test.d.ts +2 -0
  165. package/dist/tools/registry.test.d.ts.map +1 -0
  166. package/dist/tools/registry.test.js +131 -0
  167. package/dist/tools/tool-groups.d.ts +20 -0
  168. package/dist/tools/tool-groups.d.ts.map +1 -0
  169. package/dist/tools/tool-groups.js +48 -0
  170. package/dist/tools/tool-groups.test.d.ts +2 -0
  171. package/dist/tools/tool-groups.test.d.ts.map +1 -0
  172. package/dist/tools/tool-groups.test.js +127 -0
  173. package/dist/types/artifact-store.d.ts +33 -0
  174. package/dist/types/artifact-store.d.ts.map +1 -0
  175. package/dist/types/artifact-store.js +9 -0
  176. package/dist/types/evaluation-rubric.d.ts +18 -0
  177. package/dist/types/evaluation-rubric.d.ts.map +1 -0
  178. package/dist/types/evaluation-rubric.js +1 -0
  179. package/dist/types/index.d.ts +10 -0
  180. package/dist/types/index.d.ts.map +1 -0
  181. package/dist/types/index.js +1 -0
  182. package/dist/types/llm-provider.d.ts +47 -0
  183. package/dist/types/llm-provider.d.ts.map +1 -0
  184. package/dist/types/llm-provider.js +8 -0
  185. package/dist/types/persona-spec.d.ts +79 -0
  186. package/dist/types/persona-spec.d.ts.map +1 -0
  187. package/dist/types/persona-spec.js +1 -0
  188. package/dist/types/project-config.d.ts +28 -0
  189. package/dist/types/project-config.d.ts.map +1 -0
  190. package/dist/types/project-config.js +1 -0
  191. package/dist/types/provenance.d.ts +67 -0
  192. package/dist/types/provenance.d.ts.map +1 -0
  193. package/dist/types/provenance.js +1 -0
  194. package/dist/types/run-state.d.ts +11 -0
  195. package/dist/types/run-state.d.ts.map +1 -0
  196. package/dist/types/run-state.js +1 -0
  197. package/dist/types/tool-runtime.d.ts +43 -0
  198. package/dist/types/tool-runtime.d.ts.map +1 -0
  199. package/dist/types/tool-runtime.js +30 -0
  200. package/dist/workspace/detect.d.ts +11 -0
  201. package/dist/workspace/detect.d.ts.map +1 -0
  202. package/dist/workspace/detect.js +28 -0
  203. package/dist/workspace/detect.test.d.ts +2 -0
  204. package/dist/workspace/detect.test.d.ts.map +1 -0
  205. package/dist/workspace/detect.test.js +53 -0
  206. package/dist/workspace/index.d.ts +2 -0
  207. package/dist/workspace/index.d.ts.map +1 -0
  208. package/dist/workspace/index.js +1 -0
  209. package/package.json +51 -0
@@ -0,0 +1,392 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
2
+ import { mkdtemp, writeFile, mkdir } from 'node:fs/promises';
3
+ import { join } from 'node:path';
4
+ import { tmpdir } from 'node:os';
5
+ import { execSync } from 'node:child_process';
6
+ import { resolveProtectedPaths } from './run.js';
7
+ const stubProviderFactory = () => ({
8
+ generateText: async () => ({ text: '', tokensIn: 0, tokensOut: 0, toolCalls: [] }),
9
+ });
10
+ let workspacePath;
11
+ function canRunGit() {
12
+ try {
13
+ const testDir = join(tmpdir(), `crew-cli-git-${Date.now()}`);
14
+ execSync(`mkdir -p ${testDir} && cd ${testDir} && git init`, {
15
+ stdio: 'ignore',
16
+ });
17
+ execSync(`rm -rf ${testDir}`, { stdio: 'ignore' });
18
+ return true;
19
+ }
20
+ catch {
21
+ return false;
22
+ }
23
+ }
24
+ async function createFixtureWorkspace() {
25
+ const base = await mkdtemp(join(tmpdir(), 'crew-cli-test-'));
26
+ const ws = join(base, 'workspace');
27
+ const crewDir = join(ws, '.crew');
28
+ const agentDir = join(crewDir, 'agents', 'test-persona');
29
+ const rubricDir = join(crewDir, 'rubrics', 'rubrics');
30
+ const skillDir = join(crewDir, 'skills', 'test-skill');
31
+ const workDir = join(ws, 'work', 'TEST-01');
32
+ const runsDir = join(ws, 'runs');
33
+ await mkdir(agentDir, { recursive: true });
34
+ await mkdir(rubricDir, { recursive: true });
35
+ await mkdir(skillDir, { recursive: true });
36
+ await mkdir(workDir, { recursive: true });
37
+ await mkdir(runsDir, { recursive: true });
38
+ await writeFile(join(crewDir, 'config'), `project:
39
+ name: Test
40
+ key: TEST
41
+ workspace:
42
+ path: ${ws}
43
+ work: work/{EPIC_ID}/
44
+ runs: runs/
45
+ source:
46
+ repo: github:test/project
47
+ path: ../target
48
+ llm:
49
+ default_model: claude-sonnet
50
+ providers:
51
+ anthropic:
52
+ api_key_env: ANTHROPIC_API_KEY
53
+ models:
54
+ claude-sonnet: claude-sonnet-4-20250514
55
+ `, 'utf-8');
56
+ await writeFile(join(skillDir, 'test-skill.prompt.md'), 'You are a Test Role persona. Produce a test artifact.', 'utf-8');
57
+ await writeFile(join(agentDir, 'persona.yaml'), `persona:
58
+ name: test-persona
59
+ identity:
60
+ role: Test Role
61
+ skills:
62
+ - test-skill
63
+ perception:
64
+ per_task:
65
+ test-task:
66
+ - artifact: requirements
67
+ scope: { epic: '{{ .EPIC_ID }}' }
68
+ tasks:
69
+ test-task:
70
+ mode: simple
71
+ trigger: [manual]
72
+ skill: test-skill
73
+ produces: test-output
74
+ tools:
75
+ permitted: [read-artifact]
76
+ denied: []
77
+ cadence: {}
78
+ evaluation:
79
+ rubric: rubrics/test.rubric.yaml
80
+ `, 'utf-8');
81
+ await writeFile(join(rubricDir, 'test.rubric.yaml'), `rubric:
82
+ artifact_type: test-output
83
+ scoring_scale: 10
84
+ pass_threshold: 7
85
+ criteria:
86
+ - name: Completeness
87
+ weight: blocking
88
+ description: Output addresses all requirements
89
+ `, 'utf-8');
90
+ if (canRunGit()) {
91
+ execSync('git init', { cwd: ws, stdio: 'ignore' });
92
+ execSync('git config user.email "test@test.com"', {
93
+ cwd: ws,
94
+ stdio: 'ignore',
95
+ });
96
+ execSync('git config user.name "Test"', { cwd: ws, stdio: 'ignore' });
97
+ execSync('git commit --allow-empty -m "init"', {
98
+ cwd: ws,
99
+ stdio: 'ignore',
100
+ });
101
+ }
102
+ return ws;
103
+ }
104
+ beforeEach(async () => {
105
+ vi.restoreAllMocks();
106
+ workspacePath = await createFixtureWorkspace();
107
+ });
108
+ describe('resolveProtectedPaths', () => {
109
+ it('returns all default protected paths when no writable_paths provided', () => {
110
+ const paths = resolveProtectedPaths(undefined);
111
+ expect(paths).toContain('product/');
112
+ expect(paths).toContain('standards/');
113
+ expect(paths).toContain('architecture/decisions/');
114
+ });
115
+ it('returns all default protected paths for empty writable_paths', () => {
116
+ const paths = resolveProtectedPaths([]);
117
+ expect(paths).toContain('product/');
118
+ expect(paths).toContain('standards/');
119
+ expect(paths).toContain('architecture/decisions/');
120
+ });
121
+ it('excludes writable_paths from protected paths', () => {
122
+ const paths = resolveProtectedPaths(['architecture/decisions/']);
123
+ expect(paths).toContain('product/');
124
+ expect(paths).toContain('standards/');
125
+ expect(paths).not.toContain('architecture/decisions/');
126
+ });
127
+ it('can exclude multiple writable paths', () => {
128
+ const paths = resolveProtectedPaths(['architecture/decisions/', 'standards/']);
129
+ expect(paths).toContain('product/');
130
+ expect(paths).not.toContain('standards/');
131
+ expect(paths).not.toContain('architecture/decisions/');
132
+ });
133
+ });
134
+ describe('T-01-008a: CLI triggers full run', () => {
135
+ it('loads config and executes the pipeline', async () => {
136
+ vi.resetModules();
137
+ const crewModule = await import('./run.js');
138
+ const configModule = await import('../config/index.js');
139
+ const simpleModule = await import('../engine/simple.js');
140
+ const evalModule = await import('../evaluation/evaluator.js');
141
+ const pubModule = await import('../publication/publisher.js');
142
+ const loadConfigSpy = vi.spyOn(configModule, 'loadConfig');
143
+ vi.spyOn(simpleModule, 'executeSimpleTask').mockResolvedValue({
144
+ content: 'Test artifact output',
145
+ toolCalls: [],
146
+ tokensIn: 50,
147
+ tokensOut: 30,
148
+ durationMs: 500,
149
+ model: 'claude-sonnet-4-20250514',
150
+ });
151
+ vi.spyOn(evalModule, 'evaluate').mockResolvedValue({
152
+ finalArtifact: 'Test artifact output',
153
+ evaluation: {
154
+ scores: [{ name: 'Completeness', weight: 'blocking', score: 8, reasoning: 'Good' }],
155
+ aggregate: 8,
156
+ pass: true,
157
+ iteration: 0,
158
+ maxIterations: 2,
159
+ },
160
+ });
161
+ vi.spyOn(pubModule, 'publish').mockResolvedValue({
162
+ artifactPath: join(workspacePath, 'work/TEST-01/test-output.md'),
163
+ commitSha: 'abc123',
164
+ provenancePath: join(workspacePath, 'runs/run-test/provenance.yaml'),
165
+ });
166
+ try {
167
+ const result = await crewModule.executeRunCommand({
168
+ persona: 'test-persona',
169
+ task: 'test-task',
170
+ scope: 'TEST-01',
171
+ workspace: workspacePath,
172
+ createProvider: stubProviderFactory,
173
+ });
174
+ expect(loadConfigSpy).toHaveBeenCalledOnce();
175
+ expect(result.status).toBe('published');
176
+ expect(result.artifactPath).toContain('test-output.md');
177
+ expect(result.provenancePath).toContain('provenance.yaml');
178
+ expect(result.durationMs).toBeGreaterThan(0);
179
+ }
180
+ finally {
181
+ /* cleanup */
182
+ }
183
+ });
184
+ });
185
+ describe('T-01-008b: status and artifact output', () => {
186
+ it('returns awaiting_review when evaluation fails', async () => {
187
+ vi.resetModules();
188
+ const crewModule = await import('./run.js');
189
+ const simpleModule = await import('../engine/simple.js');
190
+ const evalModule = await import('../evaluation/evaluator.js');
191
+ const pubModule = await import('../publication/publisher.js');
192
+ vi.spyOn(simpleModule, 'executeSimpleTask').mockResolvedValue({
193
+ content: 'Poor output',
194
+ toolCalls: [],
195
+ tokensIn: 50,
196
+ tokensOut: 30,
197
+ durationMs: 500,
198
+ model: 'claude-sonnet-4-20250514',
199
+ });
200
+ vi.spyOn(evalModule, 'evaluate').mockResolvedValue({
201
+ finalArtifact: 'Poor output',
202
+ evaluation: {
203
+ scores: [{ name: 'Completeness', weight: 'blocking', score: 4, reasoning: 'Incomplete' }],
204
+ aggregate: 4,
205
+ pass: false,
206
+ iteration: 1,
207
+ maxIterations: 2,
208
+ },
209
+ });
210
+ vi.spyOn(pubModule, 'publish').mockResolvedValue({
211
+ artifactPath: join(workspacePath, 'work/TEST-01/test-output.md'),
212
+ commitSha: 'def456',
213
+ provenancePath: join(workspacePath, 'runs/run-test/provenance.yaml'),
214
+ });
215
+ try {
216
+ const result = await crewModule.executeRunCommand({
217
+ persona: 'test-persona',
218
+ task: 'test-task',
219
+ scope: 'TEST-01',
220
+ workspace: workspacePath,
221
+ createProvider: stubProviderFactory,
222
+ });
223
+ expect(result.status).toBe('awaiting_review');
224
+ expect(result.evaluationScore).toBe(4);
225
+ }
226
+ finally {
227
+ /* cleanup */
228
+ }
229
+ });
230
+ });
231
+ describe('T-FR-03: self-evaluation refinement wiring', () => {
232
+ it('passes a refineCallback to evaluate for artifact refinement', async () => {
233
+ vi.resetModules();
234
+ const crewModule = await import('./run.js');
235
+ const simpleModule = await import('../engine/simple.js');
236
+ const evalModule = await import('../evaluation/evaluator.js');
237
+ const pubModule = await import('../publication/publisher.js');
238
+ vi.spyOn(simpleModule, 'executeSimpleTask').mockResolvedValue({
239
+ content: 'Review artifact content',
240
+ toolCalls: [],
241
+ tokensIn: 50,
242
+ tokensOut: 30,
243
+ durationMs: 500,
244
+ model: 'claude-sonnet-4-20250514',
245
+ });
246
+ const evaluateSpy = vi.spyOn(evalModule, 'evaluate').mockResolvedValue({
247
+ finalArtifact: 'Review artifact content',
248
+ evaluation: {
249
+ scores: [{ name: 'Completeness', weight: 'blocking', score: 8, reasoning: 'Good' }],
250
+ aggregate: 8,
251
+ pass: true,
252
+ iteration: 0,
253
+ maxIterations: 2,
254
+ },
255
+ });
256
+ vi.spyOn(pubModule, 'publish').mockResolvedValue({
257
+ artifactPath: join(workspacePath, 'work/TEST-01/test-output.md'),
258
+ commitSha: 'abc',
259
+ provenancePath: join(workspacePath, 'runs/run-test/provenance.yaml'),
260
+ });
261
+ try {
262
+ await crewModule.executeRunCommand({
263
+ persona: 'test-persona',
264
+ task: 'test-task',
265
+ scope: 'TEST-01',
266
+ workspace: workspacePath,
267
+ createProvider: stubProviderFactory,
268
+ });
269
+ expect(evaluateSpy).toHaveBeenCalledOnce();
270
+ const callArgs = evaluateSpy.mock.calls[0];
271
+ expect(callArgs[0]).toBe('Review artifact content');
272
+ expect(typeof callArgs[5]).toBe('function');
273
+ }
274
+ finally {
275
+ /* cleanup */
276
+ }
277
+ });
278
+ });
279
+ describe('T-01-008c: dry-run mode', () => {
280
+ it('assembles context without calling LLM', async () => {
281
+ vi.resetModules();
282
+ const crewModule = await import('./run.js');
283
+ const simpleModule = await import('../engine/simple.js');
284
+ const executeSpy = vi.spyOn(simpleModule, 'executeSimpleTask');
285
+ const result = await crewModule.executeRunCommand({
286
+ persona: 'test-persona',
287
+ task: 'test-task',
288
+ scope: 'TEST-01',
289
+ workspace: workspacePath,
290
+ dryRun: true,
291
+ createProvider: stubProviderFactory,
292
+ });
293
+ expect(result.dryRun).toBe(true);
294
+ expect(result.status).toBe('running');
295
+ expect(result.runId).toContain('run-');
296
+ expect(executeSpy).not.toHaveBeenCalled();
297
+ });
298
+ });
299
+ describe('T-FR-05: feedback loop iteration tracking', () => {
300
+ it('sets feedbackIteration based on prior runs for the same persona/task/scope', async () => {
301
+ vi.resetModules();
302
+ const runsDir = join(workspacePath, 'runs');
303
+ await mkdir(join(runsDir, 'run-2026-01-01T00-00-00Z-test-persona-test-task-TEST-01'), {
304
+ recursive: true,
305
+ });
306
+ const crewModule = await import('./run.js');
307
+ const simpleModule = await import('../engine/simple.js');
308
+ const evalModule = await import('../evaluation/evaluator.js');
309
+ const pubModule = await import('../publication/publisher.js');
310
+ vi.spyOn(simpleModule, 'executeSimpleTask').mockResolvedValue({
311
+ content: 'Output',
312
+ toolCalls: [],
313
+ tokensIn: 50,
314
+ tokensOut: 30,
315
+ durationMs: 500,
316
+ model: 'claude-sonnet-4-20250514',
317
+ });
318
+ vi.spyOn(evalModule, 'evaluate').mockResolvedValue({
319
+ finalArtifact: 'Output',
320
+ evaluation: {
321
+ scores: [{ name: 'Completeness', weight: 'blocking', score: 8, reasoning: 'Good' }],
322
+ aggregate: 8,
323
+ pass: true,
324
+ iteration: 0,
325
+ maxIterations: 2,
326
+ },
327
+ });
328
+ vi.spyOn(pubModule, 'publish').mockResolvedValue({
329
+ artifactPath: join(workspacePath, 'work/TEST-01/test-output.md'),
330
+ commitSha: 'abc',
331
+ provenancePath: join(runsDir, 'run-test/provenance.yaml'),
332
+ });
333
+ const result = await crewModule.executeRunCommand({
334
+ persona: 'test-persona',
335
+ task: 'test-task',
336
+ scope: 'TEST-01',
337
+ workspace: workspacePath,
338
+ createProvider: stubProviderFactory,
339
+ });
340
+ expect(result.feedbackIteration).toBe(1);
341
+ expect(result.escalated).toBeUndefined();
342
+ });
343
+ it('does not set escalated for non-review tasks even with many prior runs', async () => {
344
+ vi.resetModules();
345
+ const runsDir = join(workspacePath, 'runs');
346
+ await mkdir(join(runsDir, 'run-2026-01-01T00-00-00Z-test-persona-test-task-TEST-01'), {
347
+ recursive: true,
348
+ });
349
+ await mkdir(join(runsDir, 'run-2026-01-02T00-00-00Z-test-persona-test-task-TEST-01'), {
350
+ recursive: true,
351
+ });
352
+ await mkdir(join(runsDir, 'run-2026-01-03T00-00-00Z-test-persona-test-task-TEST-01'), {
353
+ recursive: true,
354
+ });
355
+ const crewModule = await import('./run.js');
356
+ const simpleModule = await import('../engine/simple.js');
357
+ const evalModule = await import('../evaluation/evaluator.js');
358
+ const pubModule = await import('../publication/publisher.js');
359
+ vi.spyOn(simpleModule, 'executeSimpleTask').mockResolvedValue({
360
+ content: 'Output',
361
+ toolCalls: [],
362
+ tokensIn: 50,
363
+ tokensOut: 30,
364
+ durationMs: 500,
365
+ model: 'claude-sonnet-4-20250514',
366
+ });
367
+ vi.spyOn(evalModule, 'evaluate').mockResolvedValue({
368
+ finalArtifact: 'Output',
369
+ evaluation: {
370
+ scores: [{ name: 'Completeness', weight: 'blocking', score: 8, reasoning: 'Good' }],
371
+ aggregate: 8,
372
+ pass: true,
373
+ iteration: 0,
374
+ maxIterations: 2,
375
+ },
376
+ });
377
+ vi.spyOn(pubModule, 'publish').mockResolvedValue({
378
+ artifactPath: join(workspacePath, 'work/TEST-01/test-output.md'),
379
+ commitSha: 'abc',
380
+ provenancePath: join(runsDir, 'run-test/provenance.yaml'),
381
+ });
382
+ const result = await crewModule.executeRunCommand({
383
+ persona: 'test-persona',
384
+ task: 'test-task',
385
+ scope: 'TEST-01',
386
+ workspace: workspacePath,
387
+ createProvider: stubProviderFactory,
388
+ });
389
+ expect(result.feedbackIteration).toBe(3);
390
+ expect(result.escalated).toBeUndefined();
391
+ });
392
+ });
@@ -0,0 +1,45 @@
1
+ import type { LlmProvider, RunStatus, ArtifactStore } from '../types/index.js';
2
+ import type { ResolvedModel } from '../config/model-resolver.js';
3
+ export type ProviderFactory = (model: ResolvedModel) => LlmProvider | Promise<LlmProvider>;
4
+ export interface RunCommandOptions {
5
+ persona: string;
6
+ task: string;
7
+ scope: string;
8
+ workspace: string;
9
+ dryRun?: boolean;
10
+ /** Override for testing. When omitted the runtime resolves providers automatically. */
11
+ createProvider?: ProviderFactory;
12
+ /** Full story ID (e.g. CREW-03-011). Defaults to scope when not provided. */
13
+ storyId?: string;
14
+ /** Optional artifact store enabling read-artifact and write-artifact tools. */
15
+ artifactStore?: ArtifactStore;
16
+ }
17
+ export interface RunCommandResult {
18
+ runId: string;
19
+ status: RunStatus;
20
+ artifactPath?: string;
21
+ provenancePath?: string;
22
+ evaluationScore?: number;
23
+ durationMs: number;
24
+ dryRun: boolean;
25
+ feedbackIteration?: number;
26
+ escalated?: boolean;
27
+ }
28
+ /**
29
+ * Resolve protected paths for a persona run. Paths listed in the persona
30
+ * spec's `tools.writable_paths` are excluded from the default protected set,
31
+ * allowing that persona to write to those locations via tool calls.
32
+ */
33
+ export declare function resolveProtectedPaths(writablePaths?: string[], defaults?: string[]): string[];
34
+ /**
35
+ * Execute the full `crew run` pipeline:
36
+ * 1. Load config (project, persona, rubric)
37
+ * 2. Acquire lock
38
+ * 3. Assemble context
39
+ * 4. Execute task (simple or composite)
40
+ * 5. Evaluate output
41
+ * 6. Publish artifact + provenance
42
+ * 7. Release lock
43
+ */
44
+ export declare function executeRunCommand(options: RunCommandOptions): Promise<RunCommandResult>;
45
+ //# sourceMappingURL=run.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../src/cli/run.ts"],"names":[],"mappings":"AAeA,OAAO,KAAK,EACV,WAAW,EAGX,SAAS,EACT,aAAa,EACd,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAGjE,MAAM,MAAM,eAAe,GAAG,CAAC,KAAK,EAAE,aAAa,KAAK,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;AAE3F,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,uFAAuF;IACvF,cAAc,CAAC,EAAE,eAAe,CAAC;IACjC,6EAA6E;IAC7E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,+EAA+E;IAC/E,aAAa,CAAC,EAAE,aAAa,CAAC;CAC/B;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,SAAS,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,CAAC;IAChB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAMD;;;;GAIG;AACH,wBAAgB,qBAAqB,CACnC,aAAa,CAAC,EAAE,MAAM,EAAE,EACxB,QAAQ,GAAE,MAAM,EAA4B,GAC3C,MAAM,EAAE,CAKV;AAkBD;;;;;;;;;GASG;AACH,wBAAsB,iBAAiB,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAuP7F"}
@@ -0,0 +1,236 @@
1
+ import { join, resolve } from 'node:path';
2
+ import { mkdir, readdir } from 'node:fs/promises';
3
+ import { loadConfig } from '../config/index.js';
4
+ import { resolveExecutionModel, resolveEvalModel } from '../config/model-resolver.js';
5
+ import { assembleContext } from '../context/index.js';
6
+ import { executeSimpleTask } from '../engine/simple.js';
7
+ import { executeCompositeTask } from '../engine/composite.js';
8
+ import { evaluate } from '../evaluation/evaluator.js';
9
+ import { publish } from '../publication/publisher.js';
10
+ import { acquireLock } from '../control/lock-manager.js';
11
+ import { transitionState, persistState } from '../control/run-state.js';
12
+ import { generateRunId } from '../control/id-generator.js';
13
+ import { createToolRegistry } from '../tools/registry.js';
14
+ import { callLlm } from '../engine/llm-client.js';
15
+ import { resolveProvider } from '../providers/index.js';
16
+ const MAX_FEEDBACK_ITERATIONS = 2;
17
+ const DEFAULT_PROTECTED_PATHS = ['product/', 'standards/', 'architecture/decisions/'];
18
+ /**
19
+ * Resolve protected paths for a persona run. Paths listed in the persona
20
+ * spec's `tools.writable_paths` are excluded from the default protected set,
21
+ * allowing that persona to write to those locations via tool calls.
22
+ */
23
+ export function resolveProtectedPaths(writablePaths, defaults = DEFAULT_PROTECTED_PATHS) {
24
+ if (!writablePaths || writablePaths.length === 0) {
25
+ return defaults;
26
+ }
27
+ return defaults.filter((p) => !writablePaths.includes(p));
28
+ }
29
+ async function countPriorRuns(runsDir, persona, task, scope, excludeRunId) {
30
+ try {
31
+ const entries = await readdir(runsDir);
32
+ const suffix = `-${persona}-${task}-${scope}`;
33
+ return entries.filter((e) => e.endsWith(suffix) && e !== excludeRunId).length;
34
+ }
35
+ catch {
36
+ return 0;
37
+ }
38
+ }
39
+ /**
40
+ * Execute the full `crew run` pipeline:
41
+ * 1. Load config (project, persona, rubric)
42
+ * 2. Acquire lock
43
+ * 3. Assemble context
44
+ * 4. Execute task (simple or composite)
45
+ * 5. Evaluate output
46
+ * 6. Publish artifact + provenance
47
+ * 7. Release lock
48
+ */
49
+ export async function executeRunCommand(options) {
50
+ const { persona, task, scope, workspace, dryRun, storyId } = options;
51
+ const createProvider = options.createProvider ?? resolveProvider;
52
+ const workspacePath = resolve(workspace);
53
+ const startMs = Date.now();
54
+ const config = await loadConfig({ workspacePath, personaName: persona });
55
+ const projectCfg = config.project;
56
+ const executionModel = resolveExecutionModel(projectCfg.llm);
57
+ const evalModel = resolveEvalModel(projectCfg.llm, config.persona.persona.evaluation.self_eval_model);
58
+ const runsDir = join(workspacePath, projectCfg.workspace.runs);
59
+ const runId = generateRunId(persona, task, scope);
60
+ const runDir = join(runsDir, runId);
61
+ await mkdir(runDir, { recursive: true });
62
+ let record = {
63
+ run_id: runId,
64
+ persona,
65
+ task,
66
+ scope,
67
+ status: 'queued',
68
+ started_at: new Date().toISOString(),
69
+ };
70
+ await persistState(runDir, record);
71
+ const lock = await acquireLock(persona, task, scope, runsDir);
72
+ try {
73
+ record = transitionState(record, 'running');
74
+ await persistState(runDir, record);
75
+ const taskDef = config.persona.persona.tasks[task];
76
+ if (!taskDef) {
77
+ throw new Error(`Task '${task}' not found in persona '${persona}'. Available: ${Object.keys(config.persona.persona.tasks).join(', ')}`);
78
+ }
79
+ const context = await assembleContext(config.persona.persona.perception, task, { EPIC_ID: scope, STORY_ID: storyId ?? scope }, projectCfg, workspacePath);
80
+ if (dryRun) {
81
+ await lock.release();
82
+ return {
83
+ runId,
84
+ status: 'running',
85
+ durationMs: Date.now() - startMs,
86
+ dryRun: true,
87
+ };
88
+ }
89
+ const provider = await createProvider(executionModel);
90
+ const tools = createToolRegistry({ artifactStore: options.artifactStore });
91
+ const toolContext = {
92
+ workspacePath,
93
+ targetRepoPath: resolve(workspacePath, projectCfg.source.path),
94
+ project: projectCfg,
95
+ persona,
96
+ task,
97
+ runId,
98
+ protectedPaths: resolveProtectedPaths(config.persona.persona.tools.writable_paths),
99
+ logger: (msg) => {
100
+ /* structured logging deferred */
101
+ void msg;
102
+ },
103
+ };
104
+ let artifactContent;
105
+ let totalTokensIn = 0;
106
+ let totalTokensOut = 0;
107
+ let compositeResult;
108
+ if (taskDef.mode === 'simple') {
109
+ const result = await executeSimpleTask(config.persona, taskDef, context, executionModel, provider, tools, config.skills, toolContext);
110
+ artifactContent = result.content;
111
+ totalTokensIn = result.tokensIn;
112
+ totalTokensOut = result.tokensOut;
113
+ }
114
+ else {
115
+ const result = await executeCompositeTask(config.persona, taskDef, context, executionModel, provider, tools, runDir, config.skills, toolContext);
116
+ artifactContent = result.content;
117
+ totalTokensIn = result.tokensIn;
118
+ totalTokensOut = result.tokensOut;
119
+ compositeResult = result;
120
+ }
121
+ record = transitionState(record, 'evaluating');
122
+ await persistState(runDir, record);
123
+ const evalProvider = await createProvider(evalModel);
124
+ const refineCallback = async (feedback) => {
125
+ const result = await callLlm({
126
+ provider,
127
+ model: executionModel,
128
+ system: `You are a ${config.persona.persona.identity.role}. You produced an artifact that did not pass quality evaluation. Revise it to address the feedback while preserving the overall structure and content that was acceptable.`,
129
+ messages: [
130
+ {
131
+ role: 'user',
132
+ content: `--- Original artifact ---\n${artifactContent}\n--- End original artifact ---\n\n--- Evaluation feedback ---\n${feedback}\n--- End feedback ---\n\nProduce a revised version of the artifact that addresses the evaluation feedback. Output only the revised artifact content.`,
133
+ },
134
+ ],
135
+ maxRetries: 1,
136
+ });
137
+ return result.text;
138
+ };
139
+ const evalResult = await evaluate(artifactContent, config.rubric, evalModel, evalProvider, undefined, refineCallback);
140
+ record = transitionState(record, 'publishing');
141
+ await persistState(runDir, record);
142
+ const artifactPath = join(projectCfg.workspace.work.replace('{EPIC_ID}', scope), `${taskDef.mode === 'simple' ? taskDef.produces : taskDef.published_artifact}.md`);
143
+ const feedbackIteration = await countPriorRuns(runsDir, persona, task, scope, runId);
144
+ const escalated = task === 'review-pr' && feedbackIteration >= MAX_FEEDBACK_ITERATIONS;
145
+ const provenance = {
146
+ run_id: runId,
147
+ persona,
148
+ task,
149
+ sub_agent_chain: compositeResult?.subAgentResults.map((r) => r.name),
150
+ feedback_iteration: feedbackIteration > 0 ? feedbackIteration : undefined,
151
+ versions: {
152
+ persona_spec_hash: 'pending',
153
+ prompt_hashes: {},
154
+ model: executionModel.concreteModel,
155
+ rubric_hash: 'pending',
156
+ runtime_version: '0.0.0',
157
+ },
158
+ timestamp: record.started_at,
159
+ duration_ms: Date.now() - startMs,
160
+ trigger: 'manual',
161
+ inputs: {
162
+ artifacts_read: context.blocks.map((b) => ({
163
+ type: b.source.type,
164
+ path: b.source.path ?? '',
165
+ })),
166
+ context_tokens: context.totalTokens,
167
+ },
168
+ outputs: {
169
+ artifact_produced: {
170
+ type: taskDef.mode === 'simple'
171
+ ? taskDef.produces
172
+ : taskDef.published_artifact,
173
+ id: runId,
174
+ },
175
+ work_products: compositeResult?.checkpoints.map((c) => c.path),
176
+ },
177
+ evaluation: {
178
+ self_eval_score: evalResult.evaluation.aggregate,
179
+ self_eval_pass: evalResult.evaluation.pass,
180
+ self_eval_model: evalModel.concreteModel,
181
+ },
182
+ cost: {
183
+ llm_calls: compositeResult
184
+ ? compositeResult.subAgentResults.reduce((sum, r) => sum + r.result.rounds, 0)
185
+ : 1,
186
+ total_tokens: totalTokensIn + totalTokensOut,
187
+ estimated_cost_usd: 0,
188
+ },
189
+ run_state: 'published',
190
+ };
191
+ const pubResult = await publish({
192
+ artifact: evalResult.finalArtifact,
193
+ artifactPath,
194
+ metadata: {
195
+ author: persona,
196
+ run_id: runId,
197
+ status: evalResult.evaluation.pass ? 'published' : 'awaiting_review',
198
+ timestamp: new Date().toISOString(),
199
+ eval_score: evalResult.evaluation.aggregate,
200
+ eval_pass: evalResult.evaluation.pass,
201
+ },
202
+ provenance,
203
+ config: projectCfg,
204
+ runRecord: record,
205
+ runDir,
206
+ push: false,
207
+ });
208
+ const finalStatus = evalResult.evaluation.pass ? 'published' : 'awaiting_review';
209
+ record = transitionState(record, finalStatus);
210
+ await persistState(runDir, record);
211
+ return {
212
+ runId,
213
+ status: record.status,
214
+ artifactPath: pubResult.artifactPath,
215
+ provenancePath: pubResult.provenancePath,
216
+ evaluationScore: evalResult.evaluation.aggregate,
217
+ durationMs: Date.now() - startMs,
218
+ dryRun: false,
219
+ feedbackIteration: feedbackIteration > 0 ? feedbackIteration : undefined,
220
+ escalated: escalated || undefined,
221
+ };
222
+ }
223
+ catch (err) {
224
+ try {
225
+ record = transitionState(record, 'failed');
226
+ await persistState(runDir, record);
227
+ }
228
+ catch {
229
+ /* state transition may fail if already terminal */
230
+ }
231
+ throw err;
232
+ }
233
+ finally {
234
+ await lock.release();
235
+ }
236
+ }