@nathapp/nax 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +70 -56
- package/docs/ROADMAP.md +45 -15
- package/docs/specs/trigger-completion.md +145 -0
- package/nax/features/routing-persistence/prd.json +104 -0
- package/nax/features/routing-persistence/progress.txt +1 -0
- package/nax/features/trigger-completion/prd.json +150 -0
- package/nax/features/trigger-completion/progress.txt +7 -0
- package/nax/status.json +15 -16
- package/package.json +1 -1
- package/src/config/types.ts +3 -1
- package/src/execution/crash-recovery.ts +11 -0
- package/src/execution/executor-types.ts +1 -1
- package/src/execution/iteration-runner.ts +1 -0
- package/src/execution/lifecycle/run-setup.ts +4 -0
- package/src/execution/sequential-executor.ts +45 -7
- package/src/interaction/plugins/auto.ts +10 -1
- package/src/metrics/aggregator.ts +2 -1
- package/src/metrics/tracker.ts +26 -14
- package/src/metrics/types.ts +2 -0
- package/src/pipeline/event-bus.ts +14 -1
- package/src/pipeline/stages/completion.ts +20 -0
- package/src/pipeline/stages/execution.ts +62 -0
- package/src/pipeline/stages/review.ts +25 -1
- package/src/pipeline/stages/routing.ts +42 -8
- package/src/pipeline/subscribers/hooks.ts +32 -0
- package/src/pipeline/subscribers/interaction.ts +36 -1
- package/src/pipeline/types.ts +2 -0
- package/src/prd/types.ts +4 -0
- package/src/routing/content-hash.ts +25 -0
- package/src/routing/index.ts +3 -0
- package/src/routing/router.ts +3 -2
- package/src/routing/strategies/keyword.ts +2 -1
- package/src/routing/strategies/llm-prompts.ts +29 -28
- package/src/utils/git.ts +21 -0
- package/test/integration/routing/plugin-routing-core.test.ts +1 -1
- package/test/unit/execution/sequential-executor.test.ts +235 -0
- package/test/unit/interaction/auto-plugin.test.ts +162 -0
- package/test/unit/interaction-plugins.test.ts +308 -1
- package/test/unit/metrics/aggregator.test.ts +164 -0
- package/test/unit/metrics/tracker.test.ts +186 -0
- package/test/unit/pipeline/stages/completion-review-gate.test.ts +218 -0
- package/test/unit/pipeline/stages/execution-ambiguity.test.ts +311 -0
- package/test/unit/pipeline/stages/execution-merge-conflict.test.ts +218 -0
- package/test/unit/pipeline/stages/review.test.ts +201 -0
- package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
- package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
- package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
- package/test/unit/pipeline/subscribers/hooks.test.ts +43 -4
- package/test/unit/pipeline/subscribers/interaction.test.ts +284 -2
- package/test/unit/prd-auto-default.test.ts +2 -2
- package/test/unit/routing/content-hash.test.ts +99 -0
- package/test/unit/routing/routing-stability.test.ts +1 -1
- package/test/unit/routing-core.test.ts +5 -5
- package/test/unit/routing-strategies.test.ts +1 -3
- package/test/unit/utils/git.test.ts +50 -0
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
* Tests for Telegram, Webhook, and Auto plugins.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import {
|
|
8
|
+
import { createHmac } from "node:crypto";
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
9
10
|
import type { InteractionRequest } from "../../src/interaction";
|
|
10
11
|
import { AutoInteractionPlugin } from "../../src/interaction/plugins/auto";
|
|
11
12
|
import { TelegramInteractionPlugin } from "../../src/interaction/plugins/telegram";
|
|
@@ -163,3 +164,309 @@ describe("AutoInteractionPlugin", () => {
|
|
|
163
164
|
expect(plugin.name).toBe("auto");
|
|
164
165
|
});
|
|
165
166
|
});
|
|
167
|
+
|
|
168
|
+
// ---------------------------------------------------------------------------
|
|
169
|
+
// Telegram send() and poll() flow tests (TC-006)
|
|
170
|
+
// ---------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
describe("TelegramInteractionPlugin - send() and poll()", () => {
|
|
173
|
+
const originalFetch = globalThis.fetch;
|
|
174
|
+
|
|
175
|
+
afterEach(() => {
|
|
176
|
+
mock.restore();
|
|
177
|
+
globalThis.fetch = originalFetch;
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
function makeConfirmRequest(id: string): InteractionRequest {
|
|
181
|
+
return {
|
|
182
|
+
id,
|
|
183
|
+
type: "confirm",
|
|
184
|
+
featureName: "my-feature",
|
|
185
|
+
stage: "review",
|
|
186
|
+
summary: "Proceed with merge?",
|
|
187
|
+
fallback: "abort",
|
|
188
|
+
createdAt: Date.now(),
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
test("send() POSTs to correct Telegram API URL with message text and inline keyboard", async () => {
|
|
193
|
+
const calls: Array<{ url: string; body: Record<string, unknown> }> = [];
|
|
194
|
+
|
|
195
|
+
globalThis.fetch = mock(async (url: string | URL | Request, init?: RequestInit) => {
|
|
196
|
+
const urlStr = url.toString();
|
|
197
|
+
const body = JSON.parse((init?.body as string) ?? "{}");
|
|
198
|
+
calls.push({ url: urlStr, body });
|
|
199
|
+
return new Response(
|
|
200
|
+
JSON.stringify({ ok: true, result: { message_id: 42, chat: { id: 12345 } } }),
|
|
201
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
202
|
+
);
|
|
203
|
+
}) as typeof fetch;
|
|
204
|
+
|
|
205
|
+
const plugin = new TelegramInteractionPlugin();
|
|
206
|
+
await plugin.init({ botToken: "bot-abc123", chatId: "99999" });
|
|
207
|
+
|
|
208
|
+
await plugin.send(makeConfirmRequest("tg-send-1"));
|
|
209
|
+
|
|
210
|
+
expect(calls).toHaveLength(1);
|
|
211
|
+
const { url, body } = calls[0];
|
|
212
|
+
|
|
213
|
+
// Correct API endpoint
|
|
214
|
+
expect(url).toContain("api.telegram.org/botbot-abc123/sendMessage");
|
|
215
|
+
|
|
216
|
+
// Correct chat_id
|
|
217
|
+
expect(body.chat_id).toBe("99999");
|
|
218
|
+
|
|
219
|
+
// Message text present
|
|
220
|
+
expect(typeof body.text).toBe("string");
|
|
221
|
+
expect((body.text as string).length).toBeGreaterThan(0);
|
|
222
|
+
|
|
223
|
+
// Inline keyboard has approve and reject buttons
|
|
224
|
+
const keyboard = (body.reply_markup as { inline_keyboard: Array<Array<{ text: string; callback_data: string }>> })
|
|
225
|
+
.inline_keyboard;
|
|
226
|
+
expect(Array.isArray(keyboard)).toBe(true);
|
|
227
|
+
const allButtons = keyboard.flat();
|
|
228
|
+
const approveBtn = allButtons.find((b) => b.callback_data === "tg-send-1:approve");
|
|
229
|
+
const rejectBtn = allButtons.find((b) => b.callback_data === "tg-send-1:reject");
|
|
230
|
+
expect(approveBtn).toBeDefined();
|
|
231
|
+
expect(rejectBtn).toBeDefined();
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
test("receive() parses callback_query correctly", async () => {
|
|
235
|
+
globalThis.fetch = mock(async (url: string | URL | Request, init?: RequestInit) => {
|
|
236
|
+
const urlStr = url.toString();
|
|
237
|
+
|
|
238
|
+
if (urlStr.includes("sendMessage")) {
|
|
239
|
+
return new Response(
|
|
240
|
+
JSON.stringify({ ok: true, result: { message_id: 10, chat: { id: 99999 } } }),
|
|
241
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
if (urlStr.includes("getUpdates")) {
|
|
246
|
+
return new Response(
|
|
247
|
+
JSON.stringify({
|
|
248
|
+
ok: true,
|
|
249
|
+
result: [
|
|
250
|
+
{
|
|
251
|
+
update_id: 1,
|
|
252
|
+
callback_query: {
|
|
253
|
+
id: "cq-001",
|
|
254
|
+
data: "tg-poll-1:approve",
|
|
255
|
+
message: { message_id: 10, chat: { id: 99999 } },
|
|
256
|
+
},
|
|
257
|
+
},
|
|
258
|
+
],
|
|
259
|
+
}),
|
|
260
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (urlStr.includes("answerCallbackQuery")) {
|
|
265
|
+
return new Response(JSON.stringify({ ok: true }), { status: 200 });
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return new Response("not found", { status: 404 });
|
|
269
|
+
}) as typeof fetch;
|
|
270
|
+
|
|
271
|
+
const plugin = new TelegramInteractionPlugin();
|
|
272
|
+
await plugin.init({ botToken: "bot-abc123", chatId: "99999" });
|
|
273
|
+
|
|
274
|
+
// send() first so message_id is stored (needed for text-message flow, not callback_query)
|
|
275
|
+
await plugin.send(makeConfirmRequest("tg-poll-1"));
|
|
276
|
+
|
|
277
|
+
const response = await plugin.receive("tg-poll-1", 5000);
|
|
278
|
+
|
|
279
|
+
expect(response.action).toBe("approve");
|
|
280
|
+
expect(response.respondedBy).toBe("telegram");
|
|
281
|
+
expect(response.requestId).toBe("tg-poll-1");
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
test("receive() handles choose callback_query with value", async () => {
|
|
285
|
+
globalThis.fetch = mock(async (url: string | URL | Request) => {
|
|
286
|
+
const urlStr = url.toString();
|
|
287
|
+
|
|
288
|
+
if (urlStr.includes("sendMessage")) {
|
|
289
|
+
return new Response(
|
|
290
|
+
JSON.stringify({ ok: true, result: { message_id: 11, chat: { id: 99999 } } }),
|
|
291
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (urlStr.includes("getUpdates")) {
|
|
296
|
+
return new Response(
|
|
297
|
+
JSON.stringify({
|
|
298
|
+
ok: true,
|
|
299
|
+
result: [
|
|
300
|
+
{
|
|
301
|
+
update_id: 2,
|
|
302
|
+
callback_query: {
|
|
303
|
+
id: "cq-002",
|
|
304
|
+
data: "tg-choose-1:choose:option-b",
|
|
305
|
+
message: { message_id: 11, chat: { id: 99999 } },
|
|
306
|
+
},
|
|
307
|
+
},
|
|
308
|
+
],
|
|
309
|
+
}),
|
|
310
|
+
{ status: 200, headers: { "Content-Type": "application/json" } },
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
if (urlStr.includes("answerCallbackQuery")) {
|
|
315
|
+
return new Response(JSON.stringify({ ok: true }), { status: 200 });
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return new Response("not found", { status: 404 });
|
|
319
|
+
}) as typeof fetch;
|
|
320
|
+
|
|
321
|
+
const plugin = new TelegramInteractionPlugin();
|
|
322
|
+
await plugin.init({ botToken: "bot-abc123", chatId: "99999" });
|
|
323
|
+
|
|
324
|
+
const chooseRequest: InteractionRequest = {
|
|
325
|
+
id: "tg-choose-1",
|
|
326
|
+
type: "choose",
|
|
327
|
+
featureName: "my-feature",
|
|
328
|
+
stage: "review",
|
|
329
|
+
summary: "Which option?",
|
|
330
|
+
fallback: "continue",
|
|
331
|
+
createdAt: Date.now(),
|
|
332
|
+
options: [
|
|
333
|
+
{ key: "a", label: "Option A" },
|
|
334
|
+
{ key: "b", label: "Option B" },
|
|
335
|
+
],
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
await plugin.send(chooseRequest);
|
|
339
|
+
const response = await plugin.receive("tg-choose-1", 5000);
|
|
340
|
+
|
|
341
|
+
expect(response.action).toBe("choose");
|
|
342
|
+
expect(response.value).toBe("option-b");
|
|
343
|
+
});
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
// ---------------------------------------------------------------------------
|
|
347
|
+
// Webhook send() and HMAC validation tests (TC-006)
|
|
348
|
+
// ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
describe("WebhookInteractionPlugin - send() and HMAC validation", () => {
|
|
351
|
+
afterEach(async () => {
|
|
352
|
+
mock.restore();
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
function makeWebhookRequest(id: string): InteractionRequest {
|
|
356
|
+
return {
|
|
357
|
+
id,
|
|
358
|
+
type: "confirm",
|
|
359
|
+
featureName: "wh-feature",
|
|
360
|
+
stage: "merge",
|
|
361
|
+
summary: "Approve merge?",
|
|
362
|
+
fallback: "abort",
|
|
363
|
+
createdAt: Date.now(),
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
test("send() POSTs payload with correct Content-Type", async () => {
|
|
368
|
+
// Start a local server to capture the outgoing request
|
|
369
|
+
const captured: { contentType: string | null; body: unknown } = { contentType: null, body: null };
|
|
370
|
+
|
|
371
|
+
const testServer = Bun.serve({
|
|
372
|
+
port: 19977,
|
|
373
|
+
fetch: async (req) => {
|
|
374
|
+
captured.contentType = req.headers.get("content-type");
|
|
375
|
+
captured.body = await req.json();
|
|
376
|
+
return new Response("OK", { status: 200 });
|
|
377
|
+
},
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
const plugin = new WebhookInteractionPlugin();
|
|
381
|
+
try {
|
|
382
|
+
await plugin.init({ url: "http://localhost:19977/hook" });
|
|
383
|
+
|
|
384
|
+
await plugin.send(makeWebhookRequest("wh-send-1"));
|
|
385
|
+
|
|
386
|
+
expect(captured.contentType).toBe("application/json");
|
|
387
|
+
expect((captured.body as { id: string }).id).toBe("wh-send-1");
|
|
388
|
+
// callbackUrl is injected by send()
|
|
389
|
+
expect(typeof (captured.body as { callbackUrl: string }).callbackUrl).toBe("string");
|
|
390
|
+
} finally {
|
|
391
|
+
testServer.stop();
|
|
392
|
+
await plugin.destroy();
|
|
393
|
+
}
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
test("send() includes X-Nax-Signature header when secret is configured", async () => {
|
|
397
|
+
const captured: { signature: string | null; body: string } = { signature: null, body: "" };
|
|
398
|
+
|
|
399
|
+
const testServer = Bun.serve({
|
|
400
|
+
port: 19978,
|
|
401
|
+
fetch: async (req) => {
|
|
402
|
+
captured.signature = req.headers.get("x-nax-signature");
|
|
403
|
+
captured.body = await req.text();
|
|
404
|
+
return new Response("OK", { status: 200 });
|
|
405
|
+
},
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
const plugin = new WebhookInteractionPlugin();
|
|
409
|
+
try {
|
|
410
|
+
await plugin.init({ url: "http://localhost:19978/hook", secret: "my-secret" });
|
|
411
|
+
|
|
412
|
+
await plugin.send(makeWebhookRequest("wh-sig-1"));
|
|
413
|
+
|
|
414
|
+
expect(captured.signature).not.toBeNull();
|
|
415
|
+
// Verify the signature matches expected HMAC
|
|
416
|
+
const expected = createHmac("sha256", "my-secret").update(captured.body).digest("hex");
|
|
417
|
+
expect(captured.signature).toBe(expected);
|
|
418
|
+
} finally {
|
|
419
|
+
testServer.stop();
|
|
420
|
+
await plugin.destroy();
|
|
421
|
+
}
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
test("HMAC validation: tampered payload (no signature) is rejected with 401", async () => {
|
|
425
|
+
const plugin = new WebhookInteractionPlugin();
|
|
426
|
+
// url won't be called in this test — we test the callback server
|
|
427
|
+
await plugin.init({
|
|
428
|
+
url: "http://localhost:19900/unused",
|
|
429
|
+
secret: "test-secret",
|
|
430
|
+
callbackPort: 19988,
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
// Start the callback server by calling receive() in the background
|
|
434
|
+
const receivePromise = plugin.receive("wh-hmac-1", 4000);
|
|
435
|
+
|
|
436
|
+
// Give the server a moment to bind
|
|
437
|
+
await Bun.sleep(60);
|
|
438
|
+
|
|
439
|
+
try {
|
|
440
|
+
// POST without signature → 401
|
|
441
|
+
const noSigResp = await fetch("http://localhost:19988/nax/interact/wh-hmac-1", {
|
|
442
|
+
method: "POST",
|
|
443
|
+
headers: { "Content-Type": "application/json" },
|
|
444
|
+
body: JSON.stringify({ requestId: "wh-hmac-1", action: "approve", respondedAt: Date.now() }),
|
|
445
|
+
});
|
|
446
|
+
expect(noSigResp.status).toBe(401);
|
|
447
|
+
|
|
448
|
+
// POST with wrong signature → 401
|
|
449
|
+
const badSigResp = await fetch("http://localhost:19988/nax/interact/wh-hmac-1", {
|
|
450
|
+
method: "POST",
|
|
451
|
+
headers: { "Content-Type": "application/json", "X-Nax-Signature": "deadbeef" },
|
|
452
|
+
body: JSON.stringify({ requestId: "wh-hmac-1", action: "approve", respondedAt: Date.now() }),
|
|
453
|
+
});
|
|
454
|
+
expect(badSigResp.status).toBe(401);
|
|
455
|
+
|
|
456
|
+
// POST with correct HMAC signature → 200, receive() resolves
|
|
457
|
+
const payload = JSON.stringify({ requestId: "wh-hmac-1", action: "approve", respondedAt: Date.now() });
|
|
458
|
+
const sig = createHmac("sha256", "test-secret").update(payload).digest("hex");
|
|
459
|
+
const validResp = await fetch("http://localhost:19988/nax/interact/wh-hmac-1", {
|
|
460
|
+
method: "POST",
|
|
461
|
+
headers: { "Content-Type": "application/json", "X-Nax-Signature": sig },
|
|
462
|
+
body: payload,
|
|
463
|
+
});
|
|
464
|
+
expect(validResp.status).toBe(200);
|
|
465
|
+
|
|
466
|
+
const response = await receivePromise;
|
|
467
|
+
expect(response.action).toBe("approve");
|
|
468
|
+
} finally {
|
|
469
|
+
await plugin.destroy();
|
|
470
|
+
}
|
|
471
|
+
});
|
|
472
|
+
});
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Aggregator — RRP-002: complexityAccuracy uses initialComplexity
|
|
3
|
+
*
|
|
4
|
+
* AC-6: calculateAggregateMetrics complexityAccuracy compares
|
|
5
|
+
* initialComplexity (predicted) vs finalTier (actual), not
|
|
6
|
+
* complexity (which may reflect post-escalation state).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { calculateAggregateMetrics } from "../../../src/metrics/aggregator";
|
|
11
|
+
import type { RunMetrics, StoryMetrics } from "../../../src/metrics/types";
|
|
12
|
+
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Helpers
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
function makeStoryMetrics(overrides: Partial<StoryMetrics> & { storyId: string }): StoryMetrics {
|
|
18
|
+
return {
|
|
19
|
+
storyId: overrides.storyId,
|
|
20
|
+
complexity: "medium",
|
|
21
|
+
modelTier: "balanced",
|
|
22
|
+
modelUsed: "claude-sonnet-4-5",
|
|
23
|
+
attempts: 1,
|
|
24
|
+
finalTier: "balanced",
|
|
25
|
+
success: true,
|
|
26
|
+
cost: 0.01,
|
|
27
|
+
durationMs: 5000,
|
|
28
|
+
firstPassSuccess: true,
|
|
29
|
+
startedAt: "2026-01-01T00:00:00Z",
|
|
30
|
+
completedAt: "2026-01-01T00:00:05Z",
|
|
31
|
+
...overrides,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function makeRun(stories: StoryMetrics[]): RunMetrics {
|
|
36
|
+
return {
|
|
37
|
+
runId: "run-001",
|
|
38
|
+
feature: "test-feature",
|
|
39
|
+
startedAt: "2026-01-01T00:00:00Z",
|
|
40
|
+
completedAt: "2026-01-01T00:01:00Z",
|
|
41
|
+
totalCost: stories.reduce((sum, s) => sum + s.cost, 0),
|
|
42
|
+
totalStories: stories.length,
|
|
43
|
+
storiesCompleted: stories.filter((s) => s.success).length,
|
|
44
|
+
storiesFailed: stories.filter((s) => !s.success).length,
|
|
45
|
+
totalDurationMs: 60000,
|
|
46
|
+
stories,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// AC-6: complexityAccuracy uses initialComplexity as predicted complexity
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
describe("calculateAggregateMetrics - complexityAccuracy uses initialComplexity", () => {
|
|
55
|
+
test("complexityAccuracy keyed by initialComplexity when present", () => {
|
|
56
|
+
// Story originally predicted as 'simple' but escalated (finalTier = 'powerful')
|
|
57
|
+
const story = makeStoryMetrics({
|
|
58
|
+
storyId: "US-001",
|
|
59
|
+
complexity: "medium", // post-escalation complexity
|
|
60
|
+
initialComplexity: "simple", // original prediction
|
|
61
|
+
modelTier: "fast",
|
|
62
|
+
finalTier: "powerful",
|
|
63
|
+
attempts: 2,
|
|
64
|
+
firstPassSuccess: false,
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const runs = [makeRun([story])];
|
|
68
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
69
|
+
|
|
70
|
+
// complexityAccuracy should be keyed by initialComplexity ("simple"), not complexity ("medium")
|
|
71
|
+
expect(aggregate.complexityAccuracy["simple"]).toBeDefined();
|
|
72
|
+
expect(aggregate.complexityAccuracy["medium"]).toBeUndefined();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test("mismatch detected when initialComplexity tier != finalTier", () => {
|
|
76
|
+
const escalatedStory = makeStoryMetrics({
|
|
77
|
+
storyId: "US-001",
|
|
78
|
+
complexity: "medium",
|
|
79
|
+
initialComplexity: "simple",
|
|
80
|
+
modelTier: "fast",
|
|
81
|
+
finalTier: "powerful",
|
|
82
|
+
attempts: 2,
|
|
83
|
+
firstPassSuccess: false,
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
const runs = [makeRun([escalatedStory])];
|
|
87
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
88
|
+
|
|
89
|
+
// simple -> powerful: mismatch expected
|
|
90
|
+
expect(aggregate.complexityAccuracy["simple"].mismatchRate).toBeGreaterThan(0);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
test("no mismatch when initialComplexity tier matches finalTier", () => {
|
|
94
|
+
const successStory = makeStoryMetrics({
|
|
95
|
+
storyId: "US-001",
|
|
96
|
+
complexity: "medium",
|
|
97
|
+
initialComplexity: "medium",
|
|
98
|
+
modelTier: "balanced",
|
|
99
|
+
finalTier: "balanced",
|
|
100
|
+
attempts: 1,
|
|
101
|
+
firstPassSuccess: true,
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
const runs = [makeRun([successStory])];
|
|
105
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
106
|
+
|
|
107
|
+
expect(aggregate.complexityAccuracy["medium"].mismatchRate).toBe(0);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("falls back to complexity when initialComplexity is absent (backward compat)", () => {
|
|
111
|
+
// Legacy story metrics without initialComplexity
|
|
112
|
+
const legacyStory = makeStoryMetrics({
|
|
113
|
+
storyId: "US-001",
|
|
114
|
+
complexity: "complex",
|
|
115
|
+
// no initialComplexity
|
|
116
|
+
modelTier: "powerful",
|
|
117
|
+
finalTier: "powerful",
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const runs = [makeRun([legacyStory])];
|
|
121
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
122
|
+
|
|
123
|
+
// Falls back to complexity as key
|
|
124
|
+
expect(aggregate.complexityAccuracy["complex"]).toBeDefined();
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("mixes initialComplexity-keyed and legacy entries correctly", () => {
|
|
128
|
+
const modernStory = makeStoryMetrics({
|
|
129
|
+
storyId: "US-001",
|
|
130
|
+
complexity: "medium",
|
|
131
|
+
initialComplexity: "simple",
|
|
132
|
+
modelTier: "balanced",
|
|
133
|
+
finalTier: "balanced",
|
|
134
|
+
});
|
|
135
|
+
const legacyStory = makeStoryMetrics({
|
|
136
|
+
storyId: "US-002",
|
|
137
|
+
complexity: "complex",
|
|
138
|
+
// no initialComplexity
|
|
139
|
+
modelTier: "powerful",
|
|
140
|
+
finalTier: "powerful",
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const runs = [makeRun([modernStory, legacyStory])];
|
|
144
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
145
|
+
|
|
146
|
+
expect(aggregate.complexityAccuracy["simple"]).toBeDefined(); // from initialComplexity
|
|
147
|
+
expect(aggregate.complexityAccuracy["complex"]).toBeDefined(); // from complexity fallback
|
|
148
|
+
expect(aggregate.complexityAccuracy["medium"]).toBeUndefined(); // NOT used (initialComplexity takes over)
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test("complexityAccuracy.predicted count matches number of stories with that initialComplexity", () => {
|
|
152
|
+
const stories = [
|
|
153
|
+
makeStoryMetrics({ storyId: "US-001", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
|
|
154
|
+
makeStoryMetrics({ storyId: "US-002", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
|
|
155
|
+
makeStoryMetrics({ storyId: "US-003", complexity: "complex", initialComplexity: "complex", finalTier: "powerful" }),
|
|
156
|
+
];
|
|
157
|
+
|
|
158
|
+
const runs = [makeRun(stories)];
|
|
159
|
+
const aggregate = calculateAggregateMetrics(runs);
|
|
160
|
+
|
|
161
|
+
expect(aggregate.complexityAccuracy["simple"].predicted).toBe(2);
|
|
162
|
+
expect(aggregate.complexityAccuracy["complex"].predicted).toBe(1);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics Tracker — RRP-002: initialComplexity in StoryMetrics
|
|
3
|
+
*
|
|
4
|
+
* AC-4: StoryMetrics gains initialComplexity?: string field
|
|
5
|
+
* AC-5: collectStoryMetrics() reads story.routing.initialComplexity,
|
|
6
|
+
* falls back to routing.complexity for backward compat
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { DEFAULT_CONFIG } from "../../../src/config/defaults";
|
|
11
|
+
import type { NaxConfig } from "../../../src/config";
|
|
12
|
+
import type { PipelineContext } from "../../../src/pipeline/types";
|
|
13
|
+
import type { PRD, UserStory } from "../../../src/prd";
|
|
14
|
+
import type { StoryRouting } from "../../../src/prd/types";
|
|
15
|
+
import { collectStoryMetrics } from "../../../src/metrics/tracker";
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Helpers
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
function makeStory(overrides?: Partial<UserStory>): UserStory {
|
|
22
|
+
return {
|
|
23
|
+
id: "US-001",
|
|
24
|
+
title: "Test Story",
|
|
25
|
+
description: "Test description",
|
|
26
|
+
acceptanceCriteria: [],
|
|
27
|
+
tags: [],
|
|
28
|
+
dependencies: [],
|
|
29
|
+
status: "passed",
|
|
30
|
+
passes: true,
|
|
31
|
+
escalations: [],
|
|
32
|
+
attempts: 1,
|
|
33
|
+
...overrides,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function makePRD(story: UserStory): PRD {
|
|
38
|
+
return {
|
|
39
|
+
project: "test-project",
|
|
40
|
+
feature: "test-feature",
|
|
41
|
+
branchName: "feat/test",
|
|
42
|
+
createdAt: new Date().toISOString(),
|
|
43
|
+
updatedAt: new Date().toISOString(),
|
|
44
|
+
userStories: [story],
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function makeConfig(): NaxConfig {
|
|
49
|
+
return { ...DEFAULT_CONFIG };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function makeCtx(story: UserStory, routingOverrides?: Partial<PipelineContext["routing"]>): PipelineContext {
|
|
53
|
+
return {
|
|
54
|
+
config: makeConfig(),
|
|
55
|
+
prd: makePRD(story),
|
|
56
|
+
story,
|
|
57
|
+
stories: [story],
|
|
58
|
+
routing: {
|
|
59
|
+
complexity: "medium",
|
|
60
|
+
modelTier: "balanced",
|
|
61
|
+
testStrategy: "test-after",
|
|
62
|
+
reasoning: "test",
|
|
63
|
+
...routingOverrides,
|
|
64
|
+
},
|
|
65
|
+
workdir: "/tmp/nax-tracker-test",
|
|
66
|
+
hooks: { hooks: {} },
|
|
67
|
+
agentResult: {
|
|
68
|
+
success: true,
|
|
69
|
+
output: "",
|
|
70
|
+
estimatedCost: 0.01,
|
|
71
|
+
durationMs: 5000,
|
|
72
|
+
},
|
|
73
|
+
} as unknown as PipelineContext;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// AC-5: collectStoryMetrics reads initialComplexity from story.routing
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
describe("collectStoryMetrics - initialComplexity field", () => {
|
|
81
|
+
test("includes initialComplexity from story.routing.initialComplexity", () => {
|
|
82
|
+
const routing: StoryRouting = {
|
|
83
|
+
complexity: "medium",
|
|
84
|
+
initialComplexity: "simple", // original prediction before potential escalation
|
|
85
|
+
testStrategy: "test-after",
|
|
86
|
+
reasoning: "test",
|
|
87
|
+
};
|
|
88
|
+
const story = makeStory({ routing });
|
|
89
|
+
const ctx = makeCtx(story, { complexity: "medium" });
|
|
90
|
+
|
|
91
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
92
|
+
|
|
93
|
+
expect(metrics.initialComplexity).toBe("simple");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("initialComplexity differs from complexity when story was escalated", () => {
|
|
97
|
+
const routing: StoryRouting = {
|
|
98
|
+
complexity: "medium", // complexity as classified
|
|
99
|
+
initialComplexity: "simple", // original first-classify prediction
|
|
100
|
+
modelTier: "powerful", // escalated tier
|
|
101
|
+
testStrategy: "three-session-tdd",
|
|
102
|
+
reasoning: "escalated",
|
|
103
|
+
};
|
|
104
|
+
const story = makeStory({
|
|
105
|
+
routing,
|
|
106
|
+
escalations: [
|
|
107
|
+
{
|
|
108
|
+
fromTier: "balanced",
|
|
109
|
+
toTier: "powerful",
|
|
110
|
+
reason: "test failure",
|
|
111
|
+
timestamp: new Date().toISOString(),
|
|
112
|
+
},
|
|
113
|
+
],
|
|
114
|
+
attempts: 2,
|
|
115
|
+
});
|
|
116
|
+
const ctx = makeCtx(story, { complexity: "medium", modelTier: "balanced" });
|
|
117
|
+
|
|
118
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
119
|
+
|
|
120
|
+
expect(metrics.initialComplexity).toBe("simple");
|
|
121
|
+
// complexity field unchanged (backward compat)
|
|
122
|
+
expect(metrics.complexity).toBe("medium");
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test("falls back to routing.complexity when story.routing.initialComplexity is absent", () => {
|
|
126
|
+
// Backward compat: story.routing exists but has no initialComplexity
|
|
127
|
+
const routing: StoryRouting = {
|
|
128
|
+
complexity: "complex",
|
|
129
|
+
testStrategy: "three-session-tdd",
|
|
130
|
+
reasoning: "legacy routing",
|
|
131
|
+
// no initialComplexity
|
|
132
|
+
};
|
|
133
|
+
const story = makeStory({ routing });
|
|
134
|
+
const ctx = makeCtx(story, { complexity: "complex" });
|
|
135
|
+
|
|
136
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
137
|
+
|
|
138
|
+
expect(metrics.initialComplexity).toBe("complex");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
test("falls back to routing.complexity when story.routing is undefined", () => {
|
|
142
|
+
const story = makeStory({ routing: undefined });
|
|
143
|
+
const ctx = makeCtx(story, { complexity: "simple" });
|
|
144
|
+
|
|
145
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
146
|
+
|
|
147
|
+
expect(metrics.initialComplexity).toBe("simple");
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// ---------------------------------------------------------------------------
|
|
152
|
+
// AC-4: StoryMetrics type has initialComplexity?: string
|
|
153
|
+
// ---------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
describe("StoryMetrics type - initialComplexity field", () => {
|
|
156
|
+
test("StoryMetrics includes initialComplexity field", () => {
|
|
157
|
+
const routing: StoryRouting = {
|
|
158
|
+
complexity: "medium",
|
|
159
|
+
initialComplexity: "simple",
|
|
160
|
+
testStrategy: "test-after",
|
|
161
|
+
reasoning: "test",
|
|
162
|
+
};
|
|
163
|
+
const story = makeStory({ routing });
|
|
164
|
+
const ctx = makeCtx(story, { complexity: "medium" });
|
|
165
|
+
|
|
166
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
167
|
+
|
|
168
|
+
// TypeScript will error at compile time if initialComplexity is not on StoryMetrics
|
|
169
|
+
expect("initialComplexity" in metrics).toBe(true);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("initialComplexity is a string when present", () => {
|
|
173
|
+
const routing: StoryRouting = {
|
|
174
|
+
complexity: "expert",
|
|
175
|
+
initialComplexity: "expert",
|
|
176
|
+
testStrategy: "three-session-tdd",
|
|
177
|
+
reasoning: "test",
|
|
178
|
+
};
|
|
179
|
+
const story = makeStory({ routing });
|
|
180
|
+
const ctx = makeCtx(story, { complexity: "expert" });
|
|
181
|
+
|
|
182
|
+
const metrics = collectStoryMetrics(ctx, new Date().toISOString());
|
|
183
|
+
|
|
184
|
+
expect(typeof metrics.initialComplexity).toBe("string");
|
|
185
|
+
});
|
|
186
|
+
});
|