@codexa/cli 9.0.7 → 9.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/commands/decide.ts +120 -3
- package/commands/discover.ts +18 -9
- package/commands/integration.test.ts +754 -0
- package/commands/knowledge.test.ts +2 -6
- package/commands/knowledge.ts +20 -4
- package/commands/patterns.ts +8 -644
- package/commands/product.ts +41 -104
- package/commands/spec-resolver.test.ts +2 -13
- package/commands/standards.ts +33 -3
- package/commands/task.ts +21 -4
- package/commands/utils.test.ts +25 -87
- package/commands/utils.ts +20 -82
- package/context/assembly.ts +11 -12
- package/context/domains.test.ts +300 -0
- package/context/domains.ts +157 -0
- package/context/generator.ts +14 -13
- package/context/index.ts +6 -1
- package/context/references.test.ts +159 -0
- package/context/references.ts +159 -0
- package/context/sections.ts +18 -1
- package/db/schema.ts +40 -5
- package/db/test-helpers.ts +33 -0
- package/gates/standards-validator.test.ts +447 -0
- package/gates/standards-validator.ts +164 -125
- package/gates/typecheck-validator.ts +296 -92
- package/gates/validator.ts +93 -8
- package/package.json +1 -1
- package/protocol/process-return.ts +39 -4
- package/workflow.ts +54 -84
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration Tests — PLAN → CHECK → IMP → REV full lifecycle
|
|
3
|
+
*
|
|
4
|
+
* Tests the complete workflow cycle to ensure data flows correctly
|
|
5
|
+
* between phases, knowledge propagates between tasks, and gates
|
|
6
|
+
* enforce constraints as expected.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect, beforeEach } from "bun:test";
|
|
9
|
+
import { getDb } from "../db/connection";
|
|
10
|
+
import { initSchema } from "../db/schema";
|
|
11
|
+
import { cleanDb } from "../db/test-helpers";
|
|
12
|
+
import { planStart, planTaskAdd } from "./plan";
|
|
13
|
+
import { checkRequest, checkApprove } from "./check";
|
|
14
|
+
import { taskStart, taskDone, taskNext } from "./task";
|
|
15
|
+
import { reviewStart, reviewApprove, calculateReviewScore } from "./review";
|
|
16
|
+
import { processSubagentReturn } from "../protocol/process-return";
|
|
17
|
+
import { validateGate } from "../gates/validator";
|
|
18
|
+
|
|
19
|
+
// Helper: criar spec + tasks diretamente no DB (bypass console.log)
|
|
20
|
+
function setupSpec(name: string, phase: string = "planning"): string {
|
|
21
|
+
const db = getDb();
|
|
22
|
+
const id = `test-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`;
|
|
23
|
+
const now = new Date().toISOString();
|
|
24
|
+
db.run(
|
|
25
|
+
"INSERT INTO specs (id, name, phase, created_at, updated_at) VALUES (?, ?, ?, ?, ?)",
|
|
26
|
+
[id, name, phase, now, now]
|
|
27
|
+
);
|
|
28
|
+
db.run(
|
|
29
|
+
"INSERT INTO context (spec_id, objective, updated_at) VALUES (?, ?, ?)",
|
|
30
|
+
[id, name, now]
|
|
31
|
+
);
|
|
32
|
+
return id;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function addTask(specId: string, number: number, name: string, agent?: string, dependsOn?: number[]): number {
|
|
36
|
+
const db = getDb();
|
|
37
|
+
const deps = dependsOn && dependsOn.length > 0 ? JSON.stringify(dependsOn) : null;
|
|
38
|
+
db.run(
|
|
39
|
+
`INSERT INTO tasks (spec_id, number, name, agent, depends_on, can_parallel, status)
|
|
40
|
+
VALUES (?, ?, ?, ?, ?, 1, 'pending')`,
|
|
41
|
+
[specId, number, name, agent || null, deps]
|
|
42
|
+
);
|
|
43
|
+
const task = db.query(
|
|
44
|
+
"SELECT id FROM tasks WHERE spec_id = ? AND number = ?"
|
|
45
|
+
).get(specId, number) as any;
|
|
46
|
+
return task.id;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function approveSpec(specId: string) {
|
|
50
|
+
const db = getDb();
|
|
51
|
+
const now = new Date().toISOString();
|
|
52
|
+
db.run(
|
|
53
|
+
"UPDATE specs SET phase = 'implementing', approved_at = ?, updated_at = ? WHERE id = ?",
|
|
54
|
+
[now, now, specId]
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function startTask(taskId: number) {
|
|
59
|
+
const db = getDb();
|
|
60
|
+
const now = new Date().toISOString();
|
|
61
|
+
db.run("UPDATE tasks SET status = 'running', started_at = ? WHERE id = ?", [now, taskId]);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function completeTask(taskId: number, checkpoint: string) {
|
|
65
|
+
const db = getDb();
|
|
66
|
+
const now = new Date().toISOString();
|
|
67
|
+
db.run(
|
|
68
|
+
"UPDATE tasks SET status = 'done', checkpoint = ?, completed_at = ? WHERE id = ?",
|
|
69
|
+
[checkpoint, now, taskId]
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ============================================================
|
|
74
|
+
// Tests
|
|
75
|
+
// ============================================================
|
|
76
|
+
|
|
77
|
+
beforeEach(() => {
|
|
78
|
+
initSchema();
|
|
79
|
+
cleanDb();
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
describe("PLAN → CHECK → IMP → REV lifecycle", () => {
|
|
83
|
+
it("should complete full lifecycle with knowledge propagation", () => {
|
|
84
|
+
const db = getDb();
|
|
85
|
+
|
|
86
|
+
// ──── PLAN ────
|
|
87
|
+
const specId = setupSpec("E2E Test Feature");
|
|
88
|
+
const task1Id = addTask(specId, 1, "Setup database schema", "backend-javascript");
|
|
89
|
+
const task2Id = addTask(specId, 2, "Create API endpoints", "backend-javascript", [1]);
|
|
90
|
+
|
|
91
|
+
// Update task count in context
|
|
92
|
+
db.run("UPDATE context SET total_tasks = 2, updated_at = ? WHERE spec_id = ?", [
|
|
93
|
+
new Date().toISOString(), specId,
|
|
94
|
+
]);
|
|
95
|
+
|
|
96
|
+
// Verify plan exists
|
|
97
|
+
const spec = db.query("SELECT * FROM specs WHERE id = ?").get(specId) as any;
|
|
98
|
+
expect(spec.phase).toBe("planning");
|
|
99
|
+
|
|
100
|
+
// ──── CHECK ────
|
|
101
|
+
// Move to checking phase, then approve
|
|
102
|
+
db.run("UPDATE specs SET phase = 'checking', updated_at = ? WHERE id = ?", [
|
|
103
|
+
new Date().toISOString(), specId,
|
|
104
|
+
]);
|
|
105
|
+
approveSpec(specId);
|
|
106
|
+
|
|
107
|
+
const approvedSpec = db.query("SELECT * FROM specs WHERE id = ?").get(specId) as any;
|
|
108
|
+
expect(approvedSpec.phase).toBe("implementing");
|
|
109
|
+
expect(approvedSpec.approved_at).toBeTruthy();
|
|
110
|
+
|
|
111
|
+
// ──── IMP Task 1 ────
|
|
112
|
+
startTask(task1Id);
|
|
113
|
+
|
|
114
|
+
const runningTask = db.query("SELECT * FROM tasks WHERE id = ?").get(task1Id) as any;
|
|
115
|
+
expect(runningTask.status).toBe("running");
|
|
116
|
+
expect(runningTask.started_at).toBeTruthy();
|
|
117
|
+
|
|
118
|
+
// Simulate subagent return for task 1
|
|
119
|
+
const subagentReturn1 = {
|
|
120
|
+
status: "completed" as const,
|
|
121
|
+
summary: "Created database schema with users and sessions tables",
|
|
122
|
+
files_created: ["src/db/schema.sql", "src/db/migrate.ts"],
|
|
123
|
+
files_modified: [],
|
|
124
|
+
reasoning: {
|
|
125
|
+
approach: "Used PostgreSQL with Drizzle ORM for type-safe database access and migrations",
|
|
126
|
+
challenges: ["Deciding between raw SQL and ORM migrations"],
|
|
127
|
+
alternatives: ["Prisma was considered but Drizzle is lighter"],
|
|
128
|
+
recommendations: "Use Drizzle push for dev, generate for prod migrations",
|
|
129
|
+
},
|
|
130
|
+
decisions_made: [
|
|
131
|
+
{
|
|
132
|
+
title: "Database ORM choice",
|
|
133
|
+
decision: "Use Drizzle ORM over Prisma",
|
|
134
|
+
rationale: "Drizzle is lighter, SQL-first, and better for PostgreSQL",
|
|
135
|
+
},
|
|
136
|
+
],
|
|
137
|
+
knowledge_to_broadcast: [
|
|
138
|
+
{
|
|
139
|
+
category: "discovery" as const,
|
|
140
|
+
content: "Database uses pgcrypto extension for UUID generation",
|
|
141
|
+
severity: "warning" as const,
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
category: "pattern" as const,
|
|
145
|
+
content: "All tables have created_at and updated_at timestamps",
|
|
146
|
+
severity: "info" as const,
|
|
147
|
+
},
|
|
148
|
+
],
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const processResult1 = processSubagentReturn(specId, task1Id, 1, subagentReturn1);
|
|
152
|
+
expect(processResult1.success).toBe(true);
|
|
153
|
+
expect(processResult1.knowledgeAdded).toBe(2);
|
|
154
|
+
expect(processResult1.decisionsAdded).toBe(1);
|
|
155
|
+
expect(processResult1.artifactsAdded).toBe(2);
|
|
156
|
+
expect(processResult1.reasoningAdded).toBeGreaterThanOrEqual(3); // approach + challenge + alternative + recommendation
|
|
157
|
+
|
|
158
|
+
// Complete task 1
|
|
159
|
+
completeTask(task1Id, "Created database schema with users and sessions tables");
|
|
160
|
+
|
|
161
|
+
// Verify knowledge was stored
|
|
162
|
+
const knowledge = db.query(
|
|
163
|
+
"SELECT * FROM knowledge WHERE spec_id = ? ORDER BY created_at"
|
|
164
|
+
).all(specId) as any[];
|
|
165
|
+
expect(knowledge.length).toBeGreaterThanOrEqual(2);
|
|
166
|
+
|
|
167
|
+
const warningKnowledge = knowledge.find(
|
|
168
|
+
(k: any) => k.severity === "warning" && k.content.includes("pgcrypto")
|
|
169
|
+
);
|
|
170
|
+
expect(warningKnowledge).toBeTruthy();
|
|
171
|
+
|
|
172
|
+
// Verify decisions were stored
|
|
173
|
+
const decisions = db.query(
|
|
174
|
+
"SELECT * FROM decisions WHERE spec_id = ?"
|
|
175
|
+
).all(specId) as any[];
|
|
176
|
+
expect(decisions.length).toBe(1);
|
|
177
|
+
expect(decisions[0].title).toBe("Database ORM choice");
|
|
178
|
+
|
|
179
|
+
// Verify artifacts were stored
|
|
180
|
+
const artifacts = db.query(
|
|
181
|
+
"SELECT * FROM artifacts WHERE spec_id = ?"
|
|
182
|
+
).all(specId) as any[];
|
|
183
|
+
expect(artifacts.length).toBe(2);
|
|
184
|
+
|
|
185
|
+
// Verify reasoning was logged
|
|
186
|
+
const reasoning = db.query(
|
|
187
|
+
"SELECT * FROM reasoning_log WHERE spec_id = ? AND task_id = ?"
|
|
188
|
+
).all(specId, task1Id) as any[];
|
|
189
|
+
expect(reasoning.length).toBeGreaterThanOrEqual(3);
|
|
190
|
+
|
|
191
|
+
// Verify knowledge graph relations
|
|
192
|
+
const relations = db.query(
|
|
193
|
+
"SELECT * FROM knowledge_graph WHERE spec_id = ?"
|
|
194
|
+
).all(specId) as any[];
|
|
195
|
+
expect(relations.length).toBeGreaterThan(0);
|
|
196
|
+
|
|
197
|
+
// ──── IMP Task 2 ────
|
|
198
|
+
// Task 2 depends on task 1 — verify dependencies gate passes
|
|
199
|
+
const task1Done = db.query("SELECT status FROM tasks WHERE id = ?").get(task1Id) as any;
|
|
200
|
+
expect(task1Done.status).toBe("done");
|
|
201
|
+
|
|
202
|
+
startTask(task2Id);
|
|
203
|
+
|
|
204
|
+
// Simulate subagent return for task 2
|
|
205
|
+
const subagentReturn2 = {
|
|
206
|
+
status: "completed" as const,
|
|
207
|
+
summary: "Created REST API endpoints for users CRUD with authentication",
|
|
208
|
+
files_created: ["src/api/users.ts", "src/api/auth.ts"],
|
|
209
|
+
files_modified: ["src/db/schema.sql"],
|
|
210
|
+
reasoning: {
|
|
211
|
+
approach: "Created Hono routes with middleware for authentication and input validation",
|
|
212
|
+
},
|
|
213
|
+
knowledge_to_broadcast: [
|
|
214
|
+
{
|
|
215
|
+
category: "discovery" as const,
|
|
216
|
+
content: "API uses rate limiting of 100 req/min per IP",
|
|
217
|
+
severity: "info" as const,
|
|
218
|
+
},
|
|
219
|
+
],
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
const processResult2 = processSubagentReturn(specId, task2Id, 2, subagentReturn2);
|
|
223
|
+
expect(processResult2.success).toBe(true);
|
|
224
|
+
expect(processResult2.artifactsAdded).toBe(3); // 2 created + 1 modified (OR REPLACE)
|
|
225
|
+
|
|
226
|
+
completeTask(task2Id, "Created REST API endpoints for users CRUD with authentication");
|
|
227
|
+
|
|
228
|
+
// Verify total knowledge accumulated from both tasks
|
|
229
|
+
const allKnowledge = db.query(
|
|
230
|
+
"SELECT * FROM knowledge WHERE spec_id = ?"
|
|
231
|
+
).all(specId) as any[];
|
|
232
|
+
expect(allKnowledge.length).toBeGreaterThanOrEqual(3); // 2 from task 1 + 1 from task 2
|
|
233
|
+
|
|
234
|
+
// ──── REV ────
|
|
235
|
+
// Move to reviewing phase
|
|
236
|
+
db.run("UPDATE specs SET phase = 'reviewing', updated_at = ? WHERE id = ?", [
|
|
237
|
+
new Date().toISOString(), specId,
|
|
238
|
+
]);
|
|
239
|
+
|
|
240
|
+
// Calculate review score
|
|
241
|
+
const score = calculateReviewScore(specId);
|
|
242
|
+
expect(score.total).toBeGreaterThanOrEqual(50);
|
|
243
|
+
expect(score.breakdown.tasksCompleted).toBe(25); // All tasks done = full marks
|
|
244
|
+
|
|
245
|
+
// Verify all tasks are done
|
|
246
|
+
const pendingTasks = db.query(
|
|
247
|
+
"SELECT COUNT(*) as c FROM tasks WHERE spec_id = ? AND status != 'done'"
|
|
248
|
+
).get(specId) as any;
|
|
249
|
+
expect(pendingTasks.c).toBe(0);
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
it("should propagate knowledge from task 1 to task 2 context", () => {
|
|
253
|
+
const db = getDb();
|
|
254
|
+
|
|
255
|
+
const specId = setupSpec("Knowledge Propagation Test");
|
|
256
|
+
const task1Id = addTask(specId, 1, "First task", "backend-javascript");
|
|
257
|
+
const task2Id = addTask(specId, 2, "Second task", "backend-javascript", [1]);
|
|
258
|
+
|
|
259
|
+
approveSpec(specId);
|
|
260
|
+
startTask(task1Id);
|
|
261
|
+
|
|
262
|
+
// Task 1 broadcasts critical knowledge
|
|
263
|
+
processSubagentReturn(specId, task1Id, 1, {
|
|
264
|
+
status: "completed",
|
|
265
|
+
summary: "Completed first task with important discovery",
|
|
266
|
+
files_created: ["src/first.ts"],
|
|
267
|
+
files_modified: [],
|
|
268
|
+
reasoning: { approach: "Implemented the first component using standard patterns" },
|
|
269
|
+
knowledge_to_broadcast: [
|
|
270
|
+
{
|
|
271
|
+
category: "discovery",
|
|
272
|
+
content: "CRITICAL: API requires authentication token in X-Auth header",
|
|
273
|
+
severity: "critical",
|
|
274
|
+
},
|
|
275
|
+
],
|
|
276
|
+
});
|
|
277
|
+
completeTask(task1Id, "Done with first task");
|
|
278
|
+
|
|
279
|
+
// Verify knowledge exists
|
|
280
|
+
const criticalKnowledge = db.query(
|
|
281
|
+
"SELECT * FROM knowledge WHERE spec_id = ? AND severity = 'critical'"
|
|
282
|
+
).all(specId) as any[];
|
|
283
|
+
expect(criticalKnowledge.length).toBeGreaterThanOrEqual(1);
|
|
284
|
+
expect(criticalKnowledge[0].content).toContain("X-Auth header");
|
|
285
|
+
|
|
286
|
+
// Knowledge should be visible for task 2
|
|
287
|
+
const task2Knowledge = db.query(
|
|
288
|
+
`SELECT * FROM knowledge WHERE spec_id = ? AND task_origin = ?`
|
|
289
|
+
).all(specId, task1Id) as any[];
|
|
290
|
+
expect(task2Knowledge.length).toBeGreaterThanOrEqual(1);
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
it("should deduplicate identical knowledge entries", () => {
|
|
294
|
+
const db = getDb();
|
|
295
|
+
|
|
296
|
+
const specId = setupSpec("Dedup Test");
|
|
297
|
+
const task1Id = addTask(specId, 1, "Task 1", "backend-javascript");
|
|
298
|
+
const task2Id = addTask(specId, 2, "Task 2", "backend-javascript");
|
|
299
|
+
|
|
300
|
+
approveSpec(specId);
|
|
301
|
+
|
|
302
|
+
// Both tasks broadcast the same knowledge
|
|
303
|
+
startTask(task1Id);
|
|
304
|
+
processSubagentReturn(specId, task1Id, 1, {
|
|
305
|
+
status: "completed",
|
|
306
|
+
summary: "First task done",
|
|
307
|
+
files_created: ["src/a.ts"],
|
|
308
|
+
files_modified: [],
|
|
309
|
+
reasoning: { approach: "Standard implementation approach used for component" },
|
|
310
|
+
knowledge_to_broadcast: [
|
|
311
|
+
{ category: "discovery", content: "Uses UTF-8 encoding everywhere", severity: "info" },
|
|
312
|
+
],
|
|
313
|
+
});
|
|
314
|
+
completeTask(task1Id, "Done");
|
|
315
|
+
|
|
316
|
+
startTask(task2Id);
|
|
317
|
+
processSubagentReturn(specId, task2Id, 2, {
|
|
318
|
+
status: "completed",
|
|
319
|
+
summary: "Second task done",
|
|
320
|
+
files_created: ["src/b.ts"],
|
|
321
|
+
files_modified: [],
|
|
322
|
+
reasoning: { approach: "Standard implementation approach used for second component" },
|
|
323
|
+
knowledge_to_broadcast: [
|
|
324
|
+
{ category: "discovery", content: "Uses UTF-8 encoding everywhere", severity: "info" },
|
|
325
|
+
],
|
|
326
|
+
});
|
|
327
|
+
completeTask(task2Id, "Done");
|
|
328
|
+
|
|
329
|
+
// Should only have 1 entry (deduplicated)
|
|
330
|
+
const entries = db.query(
|
|
331
|
+
"SELECT * FROM knowledge WHERE spec_id = ? AND content = 'Uses UTF-8 encoding everywhere'"
|
|
332
|
+
).all(specId) as any[];
|
|
333
|
+
expect(entries.length).toBe(1);
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
it("should register decisions and detect conflicts", () => {
|
|
337
|
+
const db = getDb();
|
|
338
|
+
|
|
339
|
+
const specId = setupSpec("Decision Conflict Test");
|
|
340
|
+
const task1Id = addTask(specId, 1, "Task 1", "backend-javascript");
|
|
341
|
+
const task2Id = addTask(specId, 2, "Task 2", "backend-javascript");
|
|
342
|
+
|
|
343
|
+
approveSpec(specId);
|
|
344
|
+
|
|
345
|
+
// Task 1 decides to use Prisma
|
|
346
|
+
startTask(task1Id);
|
|
347
|
+
processSubagentReturn(specId, task1Id, 1, {
|
|
348
|
+
status: "completed",
|
|
349
|
+
summary: "Setup database with Prisma ORM for type-safe database access",
|
|
350
|
+
files_created: ["prisma/schema.prisma"],
|
|
351
|
+
files_modified: [],
|
|
352
|
+
reasoning: { approach: "Used Prisma for its excellent TypeScript integration and auto-generated types" },
|
|
353
|
+
decisions_made: [
|
|
354
|
+
{ title: "ORM Selection", decision: "Use Prisma ORM", rationale: "Best TypeScript integration" },
|
|
355
|
+
],
|
|
356
|
+
});
|
|
357
|
+
completeTask(task1Id, "Done with Prisma setup");
|
|
358
|
+
|
|
359
|
+
// Verify decision saved
|
|
360
|
+
const decisions = db.query(
|
|
361
|
+
"SELECT * FROM decisions WHERE spec_id = ? AND status = 'active'"
|
|
362
|
+
).all(specId) as any[];
|
|
363
|
+
expect(decisions.length).toBe(1);
|
|
364
|
+
expect(decisions[0].decision).toContain("Prisma");
|
|
365
|
+
});
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
describe("Gate enforcement", () => {
|
|
369
|
+
it("should block task-done without checkpoint", () => {
|
|
370
|
+
const db = getDb();
|
|
371
|
+
|
|
372
|
+
const specId = setupSpec("Gate Test");
|
|
373
|
+
const taskId = addTask(specId, 1, "Test task");
|
|
374
|
+
approveSpec(specId);
|
|
375
|
+
startTask(taskId);
|
|
376
|
+
|
|
377
|
+
const result = validateGate("task-done", {
|
|
378
|
+
taskId,
|
|
379
|
+
checkpoint: "short", // < 10 chars
|
|
380
|
+
files: [],
|
|
381
|
+
});
|
|
382
|
+
expect(result.passed).toBe(false);
|
|
383
|
+
expect(result.reason).toContain("Checkpoint");
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
it("should block task-start when dependencies are not done", () => {
|
|
387
|
+
const db = getDb();
|
|
388
|
+
|
|
389
|
+
const specId = setupSpec("Dependency Gate Test");
|
|
390
|
+
const task1Id = addTask(specId, 1, "First task");
|
|
391
|
+
const task2Id = addTask(specId, 2, "Second task", undefined, [1]);
|
|
392
|
+
approveSpec(specId);
|
|
393
|
+
|
|
394
|
+
// Task 1 is still pending
|
|
395
|
+
const result = validateGate("task-start", {
|
|
396
|
+
taskId: task2Id,
|
|
397
|
+
specId,
|
|
398
|
+
});
|
|
399
|
+
expect(result.passed).toBe(false);
|
|
400
|
+
expect(result.reason).toContain("Dependencias");
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
it("should pass task-start when dependencies are done", () => {
|
|
404
|
+
const db = getDb();
|
|
405
|
+
|
|
406
|
+
const specId = setupSpec("Dependency Pass Test");
|
|
407
|
+
const task1Id = addTask(specId, 1, "First task");
|
|
408
|
+
const task2Id = addTask(specId, 2, "Second task", undefined, [1]);
|
|
409
|
+
approveSpec(specId);
|
|
410
|
+
|
|
411
|
+
// Complete task 1
|
|
412
|
+
startTask(task1Id);
|
|
413
|
+
completeTask(task1Id, "First task completed successfully");
|
|
414
|
+
|
|
415
|
+
const result = validateGate("task-start", {
|
|
416
|
+
taskId: task2Id,
|
|
417
|
+
specId,
|
|
418
|
+
});
|
|
419
|
+
expect(result.passed).toBe(true);
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it("should block review-start when tasks are pending", () => {
|
|
423
|
+
const specId = setupSpec("Review Gate Test");
|
|
424
|
+
addTask(specId, 1, "Incomplete task");
|
|
425
|
+
approveSpec(specId);
|
|
426
|
+
|
|
427
|
+
const result = validateGate("review-start", { specId });
|
|
428
|
+
expect(result.passed).toBe(false);
|
|
429
|
+
expect(result.reason).toContain("Tasks pendentes");
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
it("should pass review-start when all tasks are done", () => {
|
|
433
|
+
const specId = setupSpec("Review Pass Test");
|
|
434
|
+
const taskId = addTask(specId, 1, "Only task");
|
|
435
|
+
approveSpec(specId);
|
|
436
|
+
startTask(taskId);
|
|
437
|
+
completeTask(taskId, "Completed the only task successfully");
|
|
438
|
+
|
|
439
|
+
const result = validateGate("review-start", { specId });
|
|
440
|
+
expect(result.passed).toBe(true);
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
it("should require reasoning for completed subagent returns", () => {
|
|
444
|
+
const specId = setupSpec("Reasoning Gate Test");
|
|
445
|
+
const taskId = addTask(specId, 1, "Reasoning task");
|
|
446
|
+
approveSpec(specId);
|
|
447
|
+
startTask(taskId);
|
|
448
|
+
|
|
449
|
+
const result = validateGate("task-done", {
|
|
450
|
+
taskId,
|
|
451
|
+
checkpoint: "This is a valid checkpoint text",
|
|
452
|
+
files: [],
|
|
453
|
+
subagentData: {
|
|
454
|
+
status: "completed",
|
|
455
|
+
summary: "Done",
|
|
456
|
+
files_created: [],
|
|
457
|
+
files_modified: [],
|
|
458
|
+
reasoning: { approach: "short" }, // < 20 chars
|
|
459
|
+
},
|
|
460
|
+
});
|
|
461
|
+
expect(result.passed).toBe(false);
|
|
462
|
+
expect(result.reason).toContain("reasoning");
|
|
463
|
+
});
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
describe("files-exist gate with sandbox", () => {
|
|
467
|
+
it("should pass when subagent reports files that dont exist on disk", () => {
|
|
468
|
+
const specId = setupSpec("Sandbox Files Test");
|
|
469
|
+
const taskId = addTask(specId, 1, "Sandbox task");
|
|
470
|
+
approveSpec(specId);
|
|
471
|
+
startTask(taskId);
|
|
472
|
+
|
|
473
|
+
const result = validateGate("task-done", {
|
|
474
|
+
taskId,
|
|
475
|
+
checkpoint: "Completed sandbox task with new files created",
|
|
476
|
+
files: ["src/sandbox/nonexistent-file.ts"],
|
|
477
|
+
subagentData: {
|
|
478
|
+
status: "completed",
|
|
479
|
+
summary: "Created file in sandbox",
|
|
480
|
+
files_created: ["src/sandbox/nonexistent-file.ts"],
|
|
481
|
+
files_modified: [],
|
|
482
|
+
reasoning: { approach: "Created the file using standard TypeScript patterns and conventions" },
|
|
483
|
+
},
|
|
484
|
+
});
|
|
485
|
+
expect(result.passed).toBe(true);
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
it("should fail when file not on disk and not from subagent", () => {
|
|
489
|
+
const specId = setupSpec("Missing File Test");
|
|
490
|
+
const taskId = addTask(specId, 1, "Missing file task");
|
|
491
|
+
approveSpec(specId);
|
|
492
|
+
startTask(taskId);
|
|
493
|
+
|
|
494
|
+
const result = validateGate("task-done", {
|
|
495
|
+
taskId,
|
|
496
|
+
checkpoint: "Completed missing file task properly",
|
|
497
|
+
files: ["src/this-file-definitely-does-not-exist-xyz.ts"],
|
|
498
|
+
// No subagentData — file should exist on disk
|
|
499
|
+
});
|
|
500
|
+
expect(result.passed).toBe(false);
|
|
501
|
+
expect(result.reason).toContain("nao encontrado");
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
it("should pass when subagent reports mix of existing and sandbox files", () => {
|
|
505
|
+
const db = getDb();
|
|
506
|
+
const specId = setupSpec("Mixed Files Test");
|
|
507
|
+
const taskId = addTask(specId, 1, "Mixed files task");
|
|
508
|
+
approveSpec(specId);
|
|
509
|
+
|
|
510
|
+
// Set started_at far in the past so mtime check passes for existing file
|
|
511
|
+
const pastTime = new Date(Date.now() - 365 * 24 * 60 * 60 * 1000).toISOString();
|
|
512
|
+
db.run("UPDATE tasks SET status = 'running', started_at = ? WHERE id = ?", [pastTime, taskId]);
|
|
513
|
+
|
|
514
|
+
// Use a file that actually exists (this test file itself)
|
|
515
|
+
const existingFile = import.meta.path;
|
|
516
|
+
|
|
517
|
+
const result = validateGate("task-done", {
|
|
518
|
+
taskId,
|
|
519
|
+
checkpoint: "Completed mixed files task with both types",
|
|
520
|
+
files: [existingFile, "src/sandbox/new-file.ts"],
|
|
521
|
+
subagentData: {
|
|
522
|
+
status: "completed",
|
|
523
|
+
summary: "Modified existing and created new file",
|
|
524
|
+
files_created: ["src/sandbox/new-file.ts"],
|
|
525
|
+
files_modified: [existingFile],
|
|
526
|
+
reasoning: { approach: "Modified existing file and created new file for component" },
|
|
527
|
+
},
|
|
528
|
+
});
|
|
529
|
+
// The existing file passes validation, the sandbox file is trusted via subagent
|
|
530
|
+
expect(result.passed).toBe(true);
|
|
531
|
+
});
|
|
532
|
+
});
|
|
533
|
+
|
|
534
|
+
describe("Review scoring", () => {
|
|
535
|
+
it("should calculate perfect score when all tasks done and no bypasses", () => {
|
|
536
|
+
const db = getDb();
|
|
537
|
+
|
|
538
|
+
const specId = setupSpec("Perfect Score Test");
|
|
539
|
+
const task1Id = addTask(specId, 1, "Task 1");
|
|
540
|
+
const task2Id = addTask(specId, 2, "Task 2");
|
|
541
|
+
approveSpec(specId);
|
|
542
|
+
|
|
543
|
+
// Complete both tasks
|
|
544
|
+
startTask(task1Id);
|
|
545
|
+
completeTask(task1Id, "Task 1 done perfectly");
|
|
546
|
+
startTask(task2Id);
|
|
547
|
+
completeTask(task2Id, "Task 2 done perfectly");
|
|
548
|
+
|
|
549
|
+
// Register some artifacts
|
|
550
|
+
db.run(
|
|
551
|
+
"INSERT INTO artifacts (spec_id, task_ref, path, action) VALUES (?, 1, 'src/a.ts', 'created')",
|
|
552
|
+
[specId]
|
|
553
|
+
);
|
|
554
|
+
db.run(
|
|
555
|
+
"INSERT INTO artifacts (spec_id, task_ref, path, action) VALUES (?, 2, 'src/b.ts', 'created')",
|
|
556
|
+
[specId]
|
|
557
|
+
);
|
|
558
|
+
|
|
559
|
+
const score = calculateReviewScore(specId);
|
|
560
|
+
expect(score.breakdown.tasksCompleted).toBe(25);
|
|
561
|
+
expect(score.breakdown.gatesPassedClean).toBe(25);
|
|
562
|
+
expect(score.breakdown.standardsFollowed).toBe(25);
|
|
563
|
+
expect(score.total).toBeGreaterThanOrEqual(75); // files_delivered depends on planned vs created
|
|
564
|
+
expect(score.autoApproveEligible).toBe(true);
|
|
565
|
+
expect(score.mustReviewItems.length).toBe(0);
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
it("should reduce score when gates are bypassed", () => {
|
|
569
|
+
const db = getDb();
|
|
570
|
+
|
|
571
|
+
const specId = setupSpec("Bypass Score Test");
|
|
572
|
+
const taskId = addTask(specId, 1, "Bypassed task");
|
|
573
|
+
approveSpec(specId);
|
|
574
|
+
|
|
575
|
+
startTask(taskId);
|
|
576
|
+
completeTask(taskId, "Done with bypasses");
|
|
577
|
+
|
|
578
|
+
// Register a critical bypass
|
|
579
|
+
db.run(
|
|
580
|
+
"INSERT INTO gate_bypasses (spec_id, task_id, gate_name, reason) VALUES (?, ?, 'standards-follow', 'Test bypass')",
|
|
581
|
+
[specId, taskId]
|
|
582
|
+
);
|
|
583
|
+
|
|
584
|
+
const score = calculateReviewScore(specId);
|
|
585
|
+
expect(score.breakdown.gatesPassedClean).toBeLessThan(25);
|
|
586
|
+
expect(score.breakdown.standardsFollowed).toBeLessThan(25);
|
|
587
|
+
expect(score.autoApproveEligible).toBe(false); // Critical bypass
|
|
588
|
+
expect(score.mustReviewItems.length).toBeGreaterThan(0);
|
|
589
|
+
});
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
describe("processSubagentReturn", () => {
|
|
593
|
+
it("should handle blocked status with blockers as critical knowledge", () => {
|
|
594
|
+
const db = getDb();
|
|
595
|
+
|
|
596
|
+
const specId = setupSpec("Blocked Task Test");
|
|
597
|
+
const taskId = addTask(specId, 1, "Blocked task");
|
|
598
|
+
approveSpec(specId);
|
|
599
|
+
startTask(taskId);
|
|
600
|
+
|
|
601
|
+
const result = processSubagentReturn(specId, taskId, 1, {
|
|
602
|
+
status: "blocked",
|
|
603
|
+
summary: "Blocked by missing API key configuration",
|
|
604
|
+
files_created: [],
|
|
605
|
+
files_modified: [],
|
|
606
|
+
blockers: [
|
|
607
|
+
"Missing API key for external service",
|
|
608
|
+
"Configuration file not found at expected path",
|
|
609
|
+
],
|
|
610
|
+
});
|
|
611
|
+
|
|
612
|
+
expect(result.knowledgeAdded).toBe(2);
|
|
613
|
+
expect(result.artifactsAdded).toBe(0);
|
|
614
|
+
|
|
615
|
+
// Verify blockers stored as critical knowledge
|
|
616
|
+
const blockers = db.query(
|
|
617
|
+
"SELECT * FROM knowledge WHERE spec_id = ? AND category = 'blocker' AND severity = 'critical'"
|
|
618
|
+
).all(specId) as any[];
|
|
619
|
+
expect(blockers.length).toBe(2);
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
it("should store all artifacts from files_created and files_modified", () => {
|
|
623
|
+
const db = getDb();
|
|
624
|
+
|
|
625
|
+
const specId = setupSpec("Artifacts Test");
|
|
626
|
+
const taskId = addTask(specId, 1, "Artifact task");
|
|
627
|
+
approveSpec(specId);
|
|
628
|
+
startTask(taskId);
|
|
629
|
+
|
|
630
|
+
const result = processSubagentReturn(specId, taskId, 1, {
|
|
631
|
+
status: "completed",
|
|
632
|
+
summary: "Created and modified multiple files for the feature",
|
|
633
|
+
files_created: ["src/new1.ts", "src/new2.ts"],
|
|
634
|
+
files_modified: ["src/existing.ts"],
|
|
635
|
+
reasoning: { approach: "Created two new files and modified one existing file for the feature" },
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
expect(result.artifactsAdded).toBe(3);
|
|
639
|
+
|
|
640
|
+
const artifacts = db.query(
|
|
641
|
+
"SELECT * FROM artifacts WHERE spec_id = ? ORDER BY path"
|
|
642
|
+
).all(specId) as any[];
|
|
643
|
+
expect(artifacts.length).toBe(3);
|
|
644
|
+
|
|
645
|
+
const created = artifacts.filter((a: any) => a.action === "created");
|
|
646
|
+
const modified = artifacts.filter((a: any) => a.action === "modified");
|
|
647
|
+
expect(created.length).toBe(2);
|
|
648
|
+
expect(modified.length).toBe(1);
|
|
649
|
+
});
|
|
650
|
+
|
|
651
|
+
it("should build knowledge graph relations", () => {
|
|
652
|
+
const db = getDb();
|
|
653
|
+
|
|
654
|
+
const specId = setupSpec("Graph Test");
|
|
655
|
+
const taskId = addTask(specId, 1, "Graph task");
|
|
656
|
+
approveSpec(specId);
|
|
657
|
+
startTask(taskId);
|
|
658
|
+
|
|
659
|
+
processSubagentReturn(specId, taskId, 1, {
|
|
660
|
+
status: "completed",
|
|
661
|
+
summary: "Created files with patterns for the component",
|
|
662
|
+
files_created: ["src/api.ts"],
|
|
663
|
+
files_modified: ["src/config.ts"],
|
|
664
|
+
reasoning: { approach: "Implemented API endpoint following existing patterns in the project" },
|
|
665
|
+
patterns_discovered: ["REST endpoint pattern"],
|
|
666
|
+
});
|
|
667
|
+
|
|
668
|
+
const relations = db.query(
|
|
669
|
+
"SELECT * FROM knowledge_graph WHERE spec_id = ?"
|
|
670
|
+
).all(specId) as any[];
|
|
671
|
+
|
|
672
|
+
// Should have: task->api.ts(creates), task->config.ts(modifies),
|
|
673
|
+
// pattern->api.ts(extracted_from)
|
|
674
|
+
const creates = relations.filter((r: any) => r.relation === "creates");
|
|
675
|
+
const modifies = relations.filter((r: any) => r.relation === "modifies");
|
|
676
|
+
const extracted = relations.filter((r: any) => r.relation === "extracted_from");
|
|
677
|
+
|
|
678
|
+
expect(creates.length).toBe(1);
|
|
679
|
+
expect(modifies.length).toBe(1);
|
|
680
|
+
expect(extracted.length).toBe(1);
|
|
681
|
+
});
|
|
682
|
+
});
|
|
683
|
+
|
|
684
|
+
describe("Multi-spec parallel", () => {
|
|
685
|
+
it("should allow multiple specs to coexist", () => {
|
|
686
|
+
const db = getDb();
|
|
687
|
+
|
|
688
|
+
const specA = setupSpec("Feature A");
|
|
689
|
+
const specB = setupSpec("Feature B");
|
|
690
|
+
|
|
691
|
+
addTask(specA, 1, "Task A1");
|
|
692
|
+
addTask(specB, 1, "Task B1");
|
|
693
|
+
|
|
694
|
+
// Both should exist
|
|
695
|
+
const specs = db.query(
|
|
696
|
+
"SELECT * FROM specs WHERE phase NOT IN ('completed', 'cancelled')"
|
|
697
|
+
).all() as any[];
|
|
698
|
+
expect(specs.length).toBe(2);
|
|
699
|
+
|
|
700
|
+
// Tasks should be isolated per spec
|
|
701
|
+
const tasksA = db.query("SELECT * FROM tasks WHERE spec_id = ?").all(specA) as any[];
|
|
702
|
+
const tasksB = db.query("SELECT * FROM tasks WHERE spec_id = ?").all(specB) as any[];
|
|
703
|
+
expect(tasksA.length).toBe(1);
|
|
704
|
+
expect(tasksB.length).toBe(1);
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
it("should isolate knowledge per spec", () => {
|
|
708
|
+
const db = getDb();
|
|
709
|
+
|
|
710
|
+
const specA = setupSpec("Feature A");
|
|
711
|
+
const specB = setupSpec("Feature B");
|
|
712
|
+
const taskA = addTask(specA, 1, "Task A1");
|
|
713
|
+
const taskB = addTask(specB, 1, "Task B1");
|
|
714
|
+
|
|
715
|
+
approveSpec(specA);
|
|
716
|
+
approveSpec(specB);
|
|
717
|
+
startTask(taskA);
|
|
718
|
+
startTask(taskB);
|
|
719
|
+
|
|
720
|
+
processSubagentReturn(specA, taskA, 1, {
|
|
721
|
+
status: "completed",
|
|
722
|
+
summary: "Done with Feature A task and found important info",
|
|
723
|
+
files_created: ["src/a.ts"],
|
|
724
|
+
files_modified: [],
|
|
725
|
+
reasoning: { approach: "Standard implementation of feature A component" },
|
|
726
|
+
knowledge_to_broadcast: [
|
|
727
|
+
{ category: "discovery", content: "Knowledge from Feature A", severity: "info" },
|
|
728
|
+
],
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
processSubagentReturn(specB, taskB, 1, {
|
|
732
|
+
status: "completed",
|
|
733
|
+
summary: "Done with Feature B task and found other info",
|
|
734
|
+
files_created: ["src/b.ts"],
|
|
735
|
+
files_modified: [],
|
|
736
|
+
reasoning: { approach: "Standard implementation of feature B component" },
|
|
737
|
+
knowledge_to_broadcast: [
|
|
738
|
+
{ category: "discovery", content: "Knowledge from Feature B", severity: "info" },
|
|
739
|
+
],
|
|
740
|
+
});
|
|
741
|
+
|
|
742
|
+
const knowledgeA = db.query(
|
|
743
|
+
"SELECT * FROM knowledge WHERE spec_id = ?"
|
|
744
|
+
).all(specA) as any[];
|
|
745
|
+
const knowledgeB = db.query(
|
|
746
|
+
"SELECT * FROM knowledge WHERE spec_id = ?"
|
|
747
|
+
).all(specB) as any[];
|
|
748
|
+
|
|
749
|
+
expect(knowledgeA.length).toBeGreaterThanOrEqual(1);
|
|
750
|
+
expect(knowledgeB.length).toBeGreaterThanOrEqual(1);
|
|
751
|
+
expect(knowledgeA[0].content).toContain("Feature A");
|
|
752
|
+
expect(knowledgeB[0].content).toContain("Feature B");
|
|
753
|
+
});
|
|
754
|
+
});
|