karajan-code 1.29.1 → 1.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +2 -0
- package/src/config.js +7 -4
- package/src/hu/graph.js +71 -0
- package/src/hu/store.js +153 -0
- package/src/mcp/run-kj.js +2 -0
- package/src/mcp/tools.js +2 -0
- package/src/orchestrator/pre-loop-stages.js +210 -0
- package/src/orchestrator.js +13 -3
- package/src/prompts/hu-reviewer.js +130 -0
- package/src/roles/hu-reviewer-role.js +112 -0
- package/src/roles/index.js +1 -0
- package/templates/roles/hu-reviewer.md +192 -0
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -80,6 +80,8 @@ program
|
|
|
80
80
|
.option("--enable-triage")
|
|
81
81
|
.option("--enable-discover")
|
|
82
82
|
.option("--enable-architect")
|
|
83
|
+
.option("--enable-hu-reviewer")
|
|
84
|
+
.option("--hu-file <path>", "YAML file with HU stories to certify before coding")
|
|
83
85
|
.option("--enable-serena")
|
|
84
86
|
.option("--mode <name>")
|
|
85
87
|
.option("--max-iterations <n>")
|
package/src/config.js
CHANGED
|
@@ -19,7 +19,8 @@ const DEFAULTS = {
|
|
|
19
19
|
impeccable: { provider: null, model: null },
|
|
20
20
|
triage: { provider: null, model: null },
|
|
21
21
|
discover: { provider: null, model: null },
|
|
22
|
-
architect: { provider: null, model: null }
|
|
22
|
+
architect: { provider: null, model: null },
|
|
23
|
+
hu_reviewer: { provider: null, model: null }
|
|
23
24
|
},
|
|
24
25
|
pipeline: {
|
|
25
26
|
planner: { enabled: false },
|
|
@@ -32,6 +33,7 @@ const DEFAULTS = {
|
|
|
32
33
|
triage: { enabled: true },
|
|
33
34
|
discover: { enabled: false },
|
|
34
35
|
architect: { enabled: false },
|
|
36
|
+
hu_reviewer: { enabled: false },
|
|
35
37
|
auto_simplify: true
|
|
36
38
|
},
|
|
37
39
|
review_mode: "standard",
|
|
@@ -281,7 +283,8 @@ const PIPELINE_ENABLE_FLAGS = [
|
|
|
281
283
|
["enableSolomon", "solomon"], ["enableResearcher", "researcher"],
|
|
282
284
|
["enableTester", "tester"], ["enableSecurity", "security"], ["enableImpeccable", "impeccable"],
|
|
283
285
|
["enableTriage", "triage"], ["enableDiscover", "discover"],
|
|
284
|
-
["enableArchitect", "architect"]
|
|
286
|
+
["enableArchitect", "architect"],
|
|
287
|
+
["enableHuReviewer", "hu_reviewer"]
|
|
285
288
|
];
|
|
286
289
|
|
|
287
290
|
const AUTO_SIMPLIFY_FLAG = "autoSimplify";
|
|
@@ -414,14 +417,14 @@ export function resolveRole(config, role) {
|
|
|
414
417
|
let provider = roleConfig.provider ?? null;
|
|
415
418
|
if (!provider && role === "coder") provider = legacyCoder;
|
|
416
419
|
if (!provider && role === "reviewer") provider = legacyReviewer;
|
|
417
|
-
if (!provider && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "impeccable" || role === "triage" || role === "discover" || role === "architect" || role === "audit")) {
|
|
420
|
+
if (!provider && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "impeccable" || role === "triage" || role === "discover" || role === "architect" || role === "audit" || role === "hu_reviewer" || role === "hu-reviewer")) {
|
|
418
421
|
provider = roles.coder?.provider || legacyCoder;
|
|
419
422
|
}
|
|
420
423
|
|
|
421
424
|
let model = roleConfig.model ?? null;
|
|
422
425
|
if (!model && role === "coder") model = config?.coder_options?.model ?? null;
|
|
423
426
|
if (!model && role === "reviewer") model = config?.reviewer_options?.model ?? null;
|
|
424
|
-
if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "impeccable" || role === "triage" || role === "discover" || role === "architect")) {
|
|
427
|
+
if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "impeccable" || role === "triage" || role === "discover" || role === "architect" || role === "hu_reviewer" || role === "hu-reviewer")) {
|
|
425
428
|
model = config?.coder_options?.model ?? null;
|
|
426
429
|
}
|
|
427
430
|
|
package/src/hu/graph.js
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Topological sort of HU stories respecting blocked_by dependencies.
|
|
3
|
+
* Returns ordered array of story IDs (dependencies first).
|
|
4
|
+
* Throws if circular dependency detected.
|
|
5
|
+
* @param {Array<{id: string, blocked_by?: string[]}>} stories
|
|
6
|
+
* @returns {string[]} Sorted story IDs.
|
|
7
|
+
*/
|
|
8
|
+
export function topologicalSort(stories) {
|
|
9
|
+
const ids = new Set(stories.map(s => s.id));
|
|
10
|
+
const adj = new Map(); // id -> [dependents]
|
|
11
|
+
const inDegree = new Map();
|
|
12
|
+
|
|
13
|
+
for (const s of stories) {
|
|
14
|
+
adj.set(s.id, []);
|
|
15
|
+
inDegree.set(s.id, 0);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
for (const s of stories) {
|
|
19
|
+
for (const dep of (s.blocked_by || [])) {
|
|
20
|
+
if (!ids.has(dep)) throw new Error(`Dependency ${dep} not found in batch`);
|
|
21
|
+
adj.get(dep).push(s.id);
|
|
22
|
+
inDegree.set(s.id, (inDegree.get(s.id) || 0) + 1);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const queue = [];
|
|
27
|
+
for (const [id, degree] of inDegree) {
|
|
28
|
+
if (degree === 0) queue.push(id);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const sorted = [];
|
|
32
|
+
while (queue.length > 0) {
|
|
33
|
+
const id = queue.shift();
|
|
34
|
+
sorted.push(id);
|
|
35
|
+
for (const dependent of adj.get(id)) {
|
|
36
|
+
inDegree.set(dependent, inDegree.get(dependent) - 1);
|
|
37
|
+
if (inDegree.get(dependent) === 0) queue.push(dependent);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (sorted.length !== stories.length) {
|
|
42
|
+
throw new Error("Circular dependency detected in HU batch");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return sorted;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Check if a story is ready to execute (all its dependencies are done).
|
|
50
|
+
* @param {{blocked_by?: string[]}} story
|
|
51
|
+
* @param {{stories: Array<{id: string, status: string}>}} batch
|
|
52
|
+
* @returns {boolean}
|
|
53
|
+
*/
|
|
54
|
+
export function isStoryReady(story, batch) {
|
|
55
|
+
if (!story.blocked_by || story.blocked_by.length === 0) return true;
|
|
56
|
+
return story.blocked_by.every(depId => {
|
|
57
|
+
const dep = batch.stories.find(s => s.id === depId);
|
|
58
|
+
return dep && dep.status === "done";
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get next stories ready for execution (certified + all deps done).
|
|
64
|
+
* @param {{stories: Array<{id: string, status: string, blocked_by?: string[]}>}} batch
|
|
65
|
+
* @returns {Array<object>} Stories that are certified and whose deps are all done.
|
|
66
|
+
*/
|
|
67
|
+
export function getNextReadyStories(batch) {
|
|
68
|
+
return batch.stories.filter(s =>
|
|
69
|
+
s.status === "certified" && isStoryReady(s, batch)
|
|
70
|
+
);
|
|
71
|
+
}
|
package/src/hu/store.js
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { getKarajanHome } from "../utils/paths.js";
|
|
4
|
+
|
|
5
|
+
// FUTURE: hu-storage adapter for PG/Trello/etc — currently local files only
|
|
6
|
+
|
|
7
|
+
/** @returns {string} Path to the hu-stories directory (evaluated at call time). */
|
|
8
|
+
function getHuDir() {
|
|
9
|
+
return path.join(getKarajanHome(), "hu-stories");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Create a new HU batch from an array of story definitions.
|
|
14
|
+
* @param {string} sessionId - The session identifier.
|
|
15
|
+
* @param {Array<{id?: string, text: string, blocked_by?: string[]}>} stories - Raw story inputs.
|
|
16
|
+
* @returns {Promise<object>} The created batch object.
|
|
17
|
+
*/
|
|
18
|
+
export async function createHuBatch(sessionId, stories) {
|
|
19
|
+
const dir = path.join(getHuDir(), sessionId);
|
|
20
|
+
await fs.mkdir(dir, { recursive: true });
|
|
21
|
+
|
|
22
|
+
const batch = {
|
|
23
|
+
session_id: sessionId,
|
|
24
|
+
created_at: new Date().toISOString(),
|
|
25
|
+
stories: stories.map((s, i) => ({
|
|
26
|
+
id: s.id || `HU-${Date.now()}-${i}`,
|
|
27
|
+
status: "pending",
|
|
28
|
+
original: { text: s.text },
|
|
29
|
+
blocked_by: s.blocked_by || [],
|
|
30
|
+
certified: null,
|
|
31
|
+
quality: null,
|
|
32
|
+
context_requests: [],
|
|
33
|
+
created_at: new Date().toISOString(),
|
|
34
|
+
updated_at: new Date().toISOString()
|
|
35
|
+
}))
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
await fs.writeFile(path.join(dir, "batch.json"), JSON.stringify(batch, null, 2));
|
|
39
|
+
return batch;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Load an existing HU batch from disk.
|
|
44
|
+
* @param {string} sessionId - The session identifier.
|
|
45
|
+
* @returns {Promise<object>} The loaded batch object.
|
|
46
|
+
*/
|
|
47
|
+
export async function loadHuBatch(sessionId) {
|
|
48
|
+
const file = path.join(getHuDir(), sessionId, "batch.json");
|
|
49
|
+
const raw = await fs.readFile(file, "utf8");
|
|
50
|
+
return JSON.parse(raw);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Save a batch back to disk.
|
|
55
|
+
* @param {string} sessionId - The session identifier.
|
|
56
|
+
* @param {object} batch - The batch object to persist.
|
|
57
|
+
* @returns {Promise<void>}
|
|
58
|
+
*/
|
|
59
|
+
export async function saveHuBatch(sessionId, batch) {
|
|
60
|
+
const dir = path.join(getHuDir(), sessionId);
|
|
61
|
+
batch.updated_at = new Date().toISOString();
|
|
62
|
+
await fs.writeFile(path.join(dir, "batch.json"), JSON.stringify(batch, null, 2));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Update the status of a single story within a batch.
|
|
67
|
+
* @param {object} batch - The batch object.
|
|
68
|
+
* @param {string} storyId - Story ID to update.
|
|
69
|
+
* @param {string} status - New status value.
|
|
70
|
+
* @param {object} [extra={}] - Additional fields to merge.
|
|
71
|
+
* @returns {object} The updated story.
|
|
72
|
+
*/
|
|
73
|
+
export function updateStoryStatus(batch, storyId, status, extra = {}) {
|
|
74
|
+
const story = batch.stories.find(s => s.id === storyId);
|
|
75
|
+
if (!story) throw new Error(`Story ${storyId} not found`);
|
|
76
|
+
story.status = status;
|
|
77
|
+
story.updated_at = new Date().toISOString();
|
|
78
|
+
Object.assign(story, extra);
|
|
79
|
+
return story;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Store quality scores on a story.
|
|
84
|
+
* @param {object} batch - The batch object.
|
|
85
|
+
* @param {string} storyId - Story ID.
|
|
86
|
+
* @param {object} quality - Quality scores object.
|
|
87
|
+
* @returns {object} The updated story.
|
|
88
|
+
*/
|
|
89
|
+
export function updateStoryQuality(batch, storyId, quality) {
|
|
90
|
+
const story = batch.stories.find(s => s.id === storyId);
|
|
91
|
+
if (!story) throw new Error(`Story ${storyId} not found`);
|
|
92
|
+
story.quality = { ...quality, evaluated_at: new Date().toISOString() };
|
|
93
|
+
story.updated_at = new Date().toISOString();
|
|
94
|
+
return story;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Mark a story as certified with the provided certified data.
|
|
99
|
+
* @param {object} batch - The batch object.
|
|
100
|
+
* @param {string} storyId - Story ID.
|
|
101
|
+
* @param {object} certified - Certified HU data.
|
|
102
|
+
* @returns {object} The updated story.
|
|
103
|
+
*/
|
|
104
|
+
export function updateStoryCertified(batch, storyId, certified) {
|
|
105
|
+
const story = batch.stories.find(s => s.id === storyId);
|
|
106
|
+
if (!story) throw new Error(`Story ${storyId} not found`);
|
|
107
|
+
story.certified = certified;
|
|
108
|
+
story.status = "certified";
|
|
109
|
+
story.updated_at = new Date().toISOString();
|
|
110
|
+
return story;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Add a context request to a story and set its status to needs_context.
|
|
115
|
+
* @param {object} batch - The batch object.
|
|
116
|
+
* @param {string} storyId - Story ID.
|
|
117
|
+
* @param {{fields_needed: string[], question: string}} request - Context request.
|
|
118
|
+
* @returns {object} The updated story.
|
|
119
|
+
*/
|
|
120
|
+
export function addContextRequest(batch, storyId, request) {
|
|
121
|
+
const story = batch.stories.find(s => s.id === storyId);
|
|
122
|
+
if (!story) throw new Error(`Story ${storyId} not found`);
|
|
123
|
+
story.context_requests.push({
|
|
124
|
+
requested_at: new Date().toISOString(),
|
|
125
|
+
fields_needed: request.fields_needed,
|
|
126
|
+
question_to_fde: request.question,
|
|
127
|
+
answered_at: null,
|
|
128
|
+
answer: null
|
|
129
|
+
});
|
|
130
|
+
story.status = "needs_context";
|
|
131
|
+
story.updated_at = new Date().toISOString();
|
|
132
|
+
return story;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Answer the most recent pending context request and reset status to pending.
|
|
137
|
+
* @param {object} batch - The batch object.
|
|
138
|
+
* @param {string} storyId - Story ID.
|
|
139
|
+
* @param {string} answer - The FDE's answer.
|
|
140
|
+
* @returns {object} The updated story.
|
|
141
|
+
*/
|
|
142
|
+
export function answerContextRequest(batch, storyId, answer) {
|
|
143
|
+
const story = batch.stories.find(s => s.id === storyId);
|
|
144
|
+
if (!story) throw new Error(`Story ${storyId} not found`);
|
|
145
|
+
const pending = story.context_requests.find(r => !r.answered_at);
|
|
146
|
+
if (pending) {
|
|
147
|
+
pending.answered_at = new Date().toISOString();
|
|
148
|
+
pending.answer = answer;
|
|
149
|
+
}
|
|
150
|
+
story.status = "pending"; // back to pending for re-evaluation
|
|
151
|
+
story.updated_at = new Date().toISOString();
|
|
152
|
+
return story;
|
|
153
|
+
}
|
package/src/mcp/run-kj.js
CHANGED
|
@@ -46,6 +46,8 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
|
|
|
46
46
|
normalizeBoolFlag(options.enableTriage, "--enable-triage", args);
|
|
47
47
|
normalizeBoolFlag(options.enableDiscover, "--enable-discover", args);
|
|
48
48
|
normalizeBoolFlag(options.enableArchitect, "--enable-architect", args);
|
|
49
|
+
normalizeBoolFlag(options.enableHuReviewer, "--enable-hu-reviewer", args);
|
|
50
|
+
addOptionalValue(args, "--hu-file", options.huFile);
|
|
49
51
|
normalizeBoolFlag(options.enableSerena, "--enable-serena", args);
|
|
50
52
|
normalizeBoolFlag(options.autoCommit, "--auto-commit", args);
|
|
51
53
|
normalizeBoolFlag(options.autoPush, "--auto-push", args);
|
package/src/mcp/tools.js
CHANGED
|
@@ -74,7 +74,9 @@ export const tools = [
|
|
|
74
74
|
enableTriage: { type: "boolean" },
|
|
75
75
|
enableDiscover: { type: "boolean" },
|
|
76
76
|
enableArchitect: { type: "boolean" },
|
|
77
|
+
enableHuReviewer: { type: "boolean" },
|
|
77
78
|
architectModel: { type: "string" },
|
|
79
|
+
huFile: { type: "string", description: "Path to YAML file with HU stories to certify before coding" },
|
|
78
80
|
enableSerena: { type: "boolean" },
|
|
79
81
|
enableBecaria: { type: "boolean", description: "Enable BecarIA Gateway (early PR + dispatch comments/reviews)" },
|
|
80
82
|
reviewerFallback: { type: "string" },
|
|
@@ -3,6 +3,7 @@ import { ResearcherRole } from "../roles/researcher-role.js";
|
|
|
3
3
|
import { PlannerRole } from "../roles/planner-role.js";
|
|
4
4
|
import { DiscoverRole } from "../roles/discover-role.js";
|
|
5
5
|
import { ArchitectRole } from "../roles/architect-role.js";
|
|
6
|
+
import { HuReviewerRole } from "../roles/hu-reviewer-role.js";
|
|
6
7
|
import { createAgent } from "../agents/index.js";
|
|
7
8
|
import { createArchitectADRs } from "../planning-game/architect-adrs.js";
|
|
8
9
|
import { addCheckpoint, markSessionStatus } from "../session-store.js";
|
|
@@ -10,6 +11,8 @@ import { emitProgress, makeEvent } from "../utils/events.js";
|
|
|
10
11
|
import { parsePlannerOutput } from "../prompts/planner.js";
|
|
11
12
|
import { selectModelsForRoles } from "../utils/model-selector.js";
|
|
12
13
|
import { createStallDetector } from "../utils/stall-detector.js";
|
|
14
|
+
import { createHuBatch, loadHuBatch, saveHuBatch, updateStoryStatus, updateStoryQuality, updateStoryCertified, addContextRequest, answerContextRequest } from "../hu/store.js";
|
|
15
|
+
import { topologicalSort } from "../hu/graph.js";
|
|
13
16
|
|
|
14
17
|
const ROLE_NAMES = ["planner", "researcher", "architect", "refactorer", "reviewer", "tester", "security", "impeccable"];
|
|
15
18
|
|
|
@@ -507,3 +510,210 @@ export async function runDiscoverStage({ config, logger, emitter, eventBase, ses
|
|
|
507
510
|
|
|
508
511
|
return { stageResult };
|
|
509
512
|
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Run the HU Reviewer stage: load stories from YAML, evaluate, certify, and return in topological order.
|
|
516
|
+
* @param {object} params
|
|
517
|
+
* @returns {Promise<{stageResult: object}>}
|
|
518
|
+
*/
|
|
519
|
+
export async function runHuReviewerStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget, huFile, askQuestion }) {
|
|
520
|
+
logger.setContext({ iteration: 0, stage: "hu-reviewer" });
|
|
521
|
+
emitProgress(
|
|
522
|
+
emitter,
|
|
523
|
+
makeEvent("hu-reviewer:start", { ...eventBase, stage: "hu-reviewer" }, {
|
|
524
|
+
message: "HU Reviewer certifying user stories"
|
|
525
|
+
})
|
|
526
|
+
);
|
|
527
|
+
|
|
528
|
+
// --- Load YAML file ---
|
|
529
|
+
const yaml = await import("js-yaml");
|
|
530
|
+
const fs = await import("node:fs/promises");
|
|
531
|
+
let rawYaml;
|
|
532
|
+
try {
|
|
533
|
+
rawYaml = await fs.readFile(huFile, "utf8");
|
|
534
|
+
} catch (err) {
|
|
535
|
+
const stageResult = { ok: false, error: `Could not read HU file: ${err.message}` };
|
|
536
|
+
emitProgress(emitter, makeEvent("hu-reviewer:end", { ...eventBase, stage: "hu-reviewer" }, {
|
|
537
|
+
status: "fail", message: stageResult.error
|
|
538
|
+
}));
|
|
539
|
+
return { stageResult };
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
let stories;
|
|
543
|
+
try {
|
|
544
|
+
const parsed = yaml.load(rawYaml);
|
|
545
|
+
stories = Array.isArray(parsed) ? parsed : (parsed?.stories || []);
|
|
546
|
+
} catch (err) {
|
|
547
|
+
const stageResult = { ok: false, error: `Invalid YAML in HU file: ${err.message}` };
|
|
548
|
+
emitProgress(emitter, makeEvent("hu-reviewer:end", { ...eventBase, stage: "hu-reviewer" }, {
|
|
549
|
+
status: "fail", message: stageResult.error
|
|
550
|
+
}));
|
|
551
|
+
return { stageResult };
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
if (stories.length === 0) {
|
|
555
|
+
const stageResult = { ok: true, certified: 0, stories: [] };
|
|
556
|
+
emitProgress(emitter, makeEvent("hu-reviewer:end", { ...eventBase, stage: "hu-reviewer" }, {
|
|
557
|
+
status: "ok", message: "No stories to evaluate"
|
|
558
|
+
}));
|
|
559
|
+
return { stageResult };
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// --- Create or load batch ---
|
|
563
|
+
const batchSessionId = `hu-${session.id}`;
|
|
564
|
+
let batch;
|
|
565
|
+
try {
|
|
566
|
+
batch = await loadHuBatch(batchSessionId);
|
|
567
|
+
} catch {
|
|
568
|
+
batch = await createHuBatch(batchSessionId, stories);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
// --- Evaluate loop (re-evaluate entire batch until all certified or needs_context with no askQuestion) ---
|
|
572
|
+
const huReviewerProvider = config?.roles?.hu_reviewer?.provider || coderRole.provider;
|
|
573
|
+
const huReviewerOnOutput = ({ stream, line }) => {
|
|
574
|
+
emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "hu-reviewer" }, {
|
|
575
|
+
message: line,
|
|
576
|
+
detail: { stream, agent: huReviewerProvider }
|
|
577
|
+
}));
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
let maxRounds = 5;
|
|
581
|
+
let round = 0;
|
|
582
|
+
|
|
583
|
+
while (round < maxRounds) {
|
|
584
|
+
round += 1;
|
|
585
|
+
|
|
586
|
+
const pendingStories = batch.stories.filter(s => s.status === "pending" || s.status === "needs_context");
|
|
587
|
+
if (pendingStories.length === 0) break;
|
|
588
|
+
|
|
589
|
+
const storiesToEvaluate = pendingStories.map(s => ({ id: s.id, text: s.original.text }));
|
|
590
|
+
|
|
591
|
+
const stall = createStallDetector({
|
|
592
|
+
onOutput: huReviewerOnOutput, emitter, eventBase, stage: "hu-reviewer", provider: huReviewerProvider
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
const huReviewer = new HuReviewerRole({ config, logger, emitter, createAgentFn: createAgent });
|
|
596
|
+
await huReviewer.init({ task: session.task, sessionId: session.id, iteration: 0 });
|
|
597
|
+
const reviewStart = Date.now();
|
|
598
|
+
let reviewOutput;
|
|
599
|
+
try {
|
|
600
|
+
reviewOutput = await huReviewer.run({ stories: storiesToEvaluate, onOutput: stall.onOutput });
|
|
601
|
+
} catch (err) {
|
|
602
|
+
logger.warn(`HU Reviewer threw: ${err.message}`);
|
|
603
|
+
reviewOutput = { ok: false, summary: `HU Reviewer error: ${err.message}`, result: { error: err.message } };
|
|
604
|
+
} finally {
|
|
605
|
+
stall.stop();
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
trackBudget({
|
|
609
|
+
role: "hu-reviewer",
|
|
610
|
+
provider: huReviewerProvider,
|
|
611
|
+
model: config?.roles?.hu_reviewer?.model || coderRole.model,
|
|
612
|
+
result: reviewOutput,
|
|
613
|
+
duration_ms: Date.now() - reviewStart
|
|
614
|
+
});
|
|
615
|
+
|
|
616
|
+
if (!reviewOutput.ok || !reviewOutput.result?.evaluations) {
|
|
617
|
+
break;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// --- Process evaluations ---
|
|
621
|
+
for (const evaluation of reviewOutput.result.evaluations) {
|
|
622
|
+
const storyId = evaluation.story_id;
|
|
623
|
+
try {
|
|
624
|
+
updateStoryQuality(batch, storyId, evaluation.scores);
|
|
625
|
+
} catch {
|
|
626
|
+
continue; // story not found in batch, skip
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
if (evaluation.verdict === "certified") {
|
|
630
|
+
updateStoryCertified(batch, storyId, evaluation.certified_hu);
|
|
631
|
+
} else if (evaluation.verdict === "needs_context" && evaluation.context_needed) {
|
|
632
|
+
addContextRequest(batch, storyId, {
|
|
633
|
+
fields_needed: evaluation.context_needed.fields_needed || [],
|
|
634
|
+
question: evaluation.context_needed.question_to_fde || ""
|
|
635
|
+
});
|
|
636
|
+
} else if (evaluation.verdict === "needs_rewrite" && evaluation.rewritten) {
|
|
637
|
+
// Accept the rewrite and re-certify
|
|
638
|
+
updateStoryCertified(batch, storyId, evaluation.rewritten);
|
|
639
|
+
} else {
|
|
640
|
+
updateStoryStatus(batch, storyId, "pending");
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
await saveHuBatch(batchSessionId, batch);
|
|
645
|
+
|
|
646
|
+
// --- Check if any need context ---
|
|
647
|
+
const needsContext = batch.stories.filter(s => s.status === "needs_context");
|
|
648
|
+
if (needsContext.length > 0) {
|
|
649
|
+
const consolidatedQuestions = reviewOutput.result.batch_summary?.consolidated_questions
|
|
650
|
+
|| needsContext.map(s => {
|
|
651
|
+
const pending = s.context_requests.find(r => !r.answered_at);
|
|
652
|
+
return pending ? `[${s.id}] ${pending.question_to_fde}` : null;
|
|
653
|
+
}).filter(Boolean).join("\n");
|
|
654
|
+
|
|
655
|
+
if (!askQuestion) {
|
|
656
|
+
// No interactive input — pause session
|
|
657
|
+
break;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
emitProgress(emitter, makeEvent("hu-reviewer:needs-context", { ...eventBase, stage: "hu-reviewer" }, {
|
|
661
|
+
message: `${needsContext.length} story(ies) need context from FDE`,
|
|
662
|
+
detail: { questions: consolidatedQuestions }
|
|
663
|
+
}));
|
|
664
|
+
|
|
665
|
+
const answer = await askQuestion(
|
|
666
|
+
`The HU Reviewer needs additional context:\n\n${consolidatedQuestions}\n\nPlease provide your answers:`,
|
|
667
|
+
{ iteration: 0, stage: "hu-reviewer" }
|
|
668
|
+
);
|
|
669
|
+
|
|
670
|
+
if (!answer) break;
|
|
671
|
+
|
|
672
|
+
// --- Incorporate FDE answers and re-evaluate ---
|
|
673
|
+
for (const s of needsContext) {
|
|
674
|
+
answerContextRequest(batch, s.id, answer);
|
|
675
|
+
}
|
|
676
|
+
await saveHuBatch(batchSessionId, batch);
|
|
677
|
+
// Loop will re-evaluate entire batch
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
await addCheckpoint(session, {
|
|
682
|
+
stage: "hu-reviewer",
|
|
683
|
+
iteration: 0,
|
|
684
|
+
ok: true,
|
|
685
|
+
certified: batch.stories.filter(s => s.status === "certified").length,
|
|
686
|
+
total: batch.stories.length
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
// --- Return certified stories in topological order ---
|
|
690
|
+
const certifiedStories = batch.stories.filter(s => s.status === "certified");
|
|
691
|
+
let orderedIds;
|
|
692
|
+
try {
|
|
693
|
+
orderedIds = topologicalSort(certifiedStories);
|
|
694
|
+
} catch {
|
|
695
|
+
orderedIds = certifiedStories.map(s => s.id);
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
const orderedStories = orderedIds.map(id => batch.stories.find(s => s.id === id)).filter(Boolean);
|
|
699
|
+
|
|
700
|
+
const stageResult = {
|
|
701
|
+
ok: true,
|
|
702
|
+
certified: certifiedStories.length,
|
|
703
|
+
total: batch.stories.length,
|
|
704
|
+
needsContext: batch.stories.filter(s => s.status === "needs_context").length,
|
|
705
|
+
stories: orderedStories,
|
|
706
|
+
batchSessionId
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
emitProgress(
|
|
710
|
+
emitter,
|
|
711
|
+
makeEvent("hu-reviewer:end", { ...eventBase, stage: "hu-reviewer" }, {
|
|
712
|
+
status: "ok",
|
|
713
|
+
message: `HU Review complete: ${certifiedStories.length}/${batch.stories.length} certified`,
|
|
714
|
+
detail: stageResult
|
|
715
|
+
})
|
|
716
|
+
);
|
|
717
|
+
|
|
718
|
+
return { stageResult };
|
|
719
|
+
}
|
package/src/orchestrator.js
CHANGED
|
@@ -30,7 +30,7 @@ import { resolveReviewProfile } from "./review/profiles.js";
|
|
|
30
30
|
import { CoderRole } from "./roles/coder-role.js";
|
|
31
31
|
import { invokeSolomon } from "./orchestrator/solomon-escalation.js";
|
|
32
32
|
import { PipelineContext } from "./orchestrator/pipeline-context.js";
|
|
33
|
-
import { runTriageStage, runResearcherStage, runArchitectStage, runPlannerStage, runDiscoverStage } from "./orchestrator/pre-loop-stages.js";
|
|
33
|
+
import { runTriageStage, runResearcherStage, runArchitectStage, runPlannerStage, runDiscoverStage, runHuReviewerStage } from "./orchestrator/pre-loop-stages.js";
|
|
34
34
|
import { runCoderStage, runRefactorerStage, runTddCheckStage, runSonarStage, runSonarCloudStage, runReviewerStage } from "./orchestrator/iteration-stages.js";
|
|
35
35
|
import { runTesterStage, runSecurityStage, runImpeccableStage } from "./orchestrator/post-loop-stages.js";
|
|
36
36
|
import { waitForCooldown, MAX_STANDBY_RETRIES } from "./orchestrator/standby.js";
|
|
@@ -51,11 +51,12 @@ function resolvePipelineFlags(config) {
|
|
|
51
51
|
reviewerEnabled: config.pipeline?.reviewer?.enabled !== false,
|
|
52
52
|
discoverEnabled: Boolean(config.pipeline?.discover?.enabled),
|
|
53
53
|
architectEnabled: Boolean(config.pipeline?.architect?.enabled),
|
|
54
|
+
huReviewerEnabled: Boolean(config.pipeline?.hu_reviewer?.enabled),
|
|
54
55
|
};
|
|
55
56
|
}
|
|
56
57
|
|
|
57
58
|
async function handleDryRun({ task, config, flags, emitter, pipelineFlags }) {
|
|
58
|
-
const { plannerEnabled, refactorerEnabled, researcherEnabled, testerEnabled, securityEnabled, impeccableEnabled, reviewerEnabled, discoverEnabled, architectEnabled } = pipelineFlags;
|
|
59
|
+
const { plannerEnabled, refactorerEnabled, researcherEnabled, testerEnabled, securityEnabled, impeccableEnabled, reviewerEnabled, discoverEnabled, architectEnabled, huReviewerEnabled } = pipelineFlags;
|
|
59
60
|
const plannerRole = resolveRole(config, "planner");
|
|
60
61
|
const coderRole = resolveRole(config, "coder");
|
|
61
62
|
const reviewerRole = resolveRole(config, "reviewer");
|
|
@@ -89,7 +90,8 @@ async function handleDryRun({ task, config, flags, emitter, pipelineFlags }) {
|
|
|
89
90
|
tester_enabled: testerEnabled,
|
|
90
91
|
security_enabled: securityEnabled,
|
|
91
92
|
impeccable_enabled: impeccableEnabled,
|
|
92
|
-
solomon_enabled: Boolean(config.pipeline?.solomon?.enabled)
|
|
93
|
+
solomon_enabled: Boolean(config.pipeline?.solomon?.enabled),
|
|
94
|
+
hu_reviewer_enabled: huReviewerEnabled
|
|
93
95
|
},
|
|
94
96
|
limits: {
|
|
95
97
|
max_iterations: config.max_iterations,
|
|
@@ -707,6 +709,14 @@ async function handleReviewerRetryAndSolomon({ config, session, emitter, eventBa
|
|
|
707
709
|
|
|
708
710
|
|
|
709
711
|
async function runPreLoopStages({ config, logger, emitter, eventBase, session, flags, pipelineFlags, coderRole, trackBudget, task, askQuestion, pgTaskId, pgProject, stageResults }) {
|
|
712
|
+
// --- HU Reviewer (first stage, before everything else, opt-in) ---
|
|
713
|
+
const huFile = flags.huFile || null;
|
|
714
|
+
if (flags.enableHuReviewer !== undefined) pipelineFlags.huReviewerEnabled = Boolean(flags.enableHuReviewer);
|
|
715
|
+
if (pipelineFlags.huReviewerEnabled && huFile) {
|
|
716
|
+
const huResult = await runHuReviewerStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget, huFile, askQuestion });
|
|
717
|
+
stageResults.huReviewer = huResult.stageResult;
|
|
718
|
+
}
|
|
719
|
+
|
|
710
720
|
// --- Intent classifier (deterministic pre-triage, opt-in) ---
|
|
711
721
|
if (config.guards?.intent?.enabled) {
|
|
712
722
|
const intentResult = classifyIntent(task, config);
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
const SUBAGENT_PREAMBLE = [
|
|
2
|
+
"IMPORTANT: You are running as a Karajan sub-agent.",
|
|
3
|
+
"Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
|
|
4
|
+
"Do NOT use any MCP tools. Focus only on evaluating HUs."
|
|
5
|
+
].join(" ");
|
|
6
|
+
|
|
7
|
+
const VALID_VERDICTS = new Set(["certified", "needs_rewrite", "needs_context"]);
|
|
8
|
+
const VALID_ANTIPATTERNS = new Set([
|
|
9
|
+
"ghost_user", "swiss_army_knife", "implementation_leak",
|
|
10
|
+
"moving_goalpost", "orphan_story", "invisible_dependency", "premature_optimization"
|
|
11
|
+
]);
|
|
12
|
+
const DIMENSION_KEYS = [
|
|
13
|
+
"D1_jtbd_context", "D2_user_specificity", "D3_behavior_change",
|
|
14
|
+
"D4_control_zone", "D5_time_constraints", "D6_survivable_experiment"
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Build the prompt for the HU reviewer agent.
|
|
19
|
+
* @param {{stories: Array<{id: string, text: string}>, instructions: string|null, context?: string|null}} params
|
|
20
|
+
* @returns {string} The assembled prompt.
|
|
21
|
+
*/
|
|
22
|
+
export function buildHuReviewerPrompt({ stories, instructions, context = null }) {
|
|
23
|
+
const sections = [SUBAGENT_PREAMBLE];
|
|
24
|
+
|
|
25
|
+
if (instructions) {
|
|
26
|
+
sections.push(instructions);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
sections.push("## Stories to Evaluate");
|
|
30
|
+
|
|
31
|
+
for (const story of stories) {
|
|
32
|
+
sections.push(`### ${story.id}\n${story.text}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
sections.push(
|
|
36
|
+
"Return a single valid JSON object and nothing else.",
|
|
37
|
+
`JSON schema: {"evaluations":[{"story_id":string,"scores":{"D1_jtbd_context":number,"D2_user_specificity":number,"D3_behavior_change":number,"D4_control_zone":number,"D5_time_constraints":number,"D6_survivable_experiment":number},"total":number,"antipatterns_detected":[string],"verdict":"certified|needs_rewrite|needs_context","evaluation_notes":string,"rewritten":object|null,"certified_hu":object|null,"context_needed":object|null}],"batch_summary":{"total":number,"certified":number,"needs_rewrite":number,"needs_context":number,"consolidated_questions":string}}`
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
if (context) {
|
|
41
|
+
sections.push(`## Additional Context\n${context}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return sections.join("\n\n");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Clamp a score to 0-10 range.
|
|
49
|
+
* @param {*} value
|
|
50
|
+
* @returns {number}
|
|
51
|
+
*/
|
|
52
|
+
function clampScore(value) {
|
|
53
|
+
const n = Number(value);
|
|
54
|
+
if (!Number.isFinite(n)) return 0;
|
|
55
|
+
return Math.max(0, Math.min(10, Math.round(n)));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Parse and validate a single evaluation object.
|
|
60
|
+
* @param {object} raw
|
|
61
|
+
* @returns {object|null}
|
|
62
|
+
*/
|
|
63
|
+
function parseEvaluation(raw) {
|
|
64
|
+
if (!raw || !raw.story_id || !raw.scores) return null;
|
|
65
|
+
|
|
66
|
+
const scores = {};
|
|
67
|
+
for (const key of DIMENSION_KEYS) {
|
|
68
|
+
scores[key] = clampScore(raw.scores[key]);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const total = Object.values(scores).reduce((sum, v) => sum + v, 0);
|
|
72
|
+
const antipatterns = (Array.isArray(raw.antipatterns_detected) ? raw.antipatterns_detected : [])
|
|
73
|
+
.filter(ap => VALID_ANTIPATTERNS.has(ap));
|
|
74
|
+
|
|
75
|
+
const rawVerdict = String(raw.verdict || "").toLowerCase();
|
|
76
|
+
const verdict = VALID_VERDICTS.has(rawVerdict) ? rawVerdict : "needs_context";
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
story_id: raw.story_id,
|
|
80
|
+
scores,
|
|
81
|
+
total,
|
|
82
|
+
antipatterns_detected: antipatterns,
|
|
83
|
+
verdict,
|
|
84
|
+
evaluation_notes: raw.evaluation_notes || "",
|
|
85
|
+
rewritten: raw.rewritten || null,
|
|
86
|
+
certified_hu: raw.certified_hu || null,
|
|
87
|
+
context_needed: raw.context_needed || null
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Parse the raw output from the HU reviewer agent.
|
|
93
|
+
* @param {string} raw - Raw text output from the agent.
|
|
94
|
+
* @returns {object|null} Parsed result with evaluations and batch_summary, or null.
|
|
95
|
+
*/
|
|
96
|
+
export function parseHuReviewerOutput(raw) {
|
|
97
|
+
const text = raw?.trim() || "";
|
|
98
|
+
const jsonMatch = /\{[\s\S]*\}/.exec(text);
|
|
99
|
+
if (!jsonMatch) return null;
|
|
100
|
+
|
|
101
|
+
let parsed;
|
|
102
|
+
try {
|
|
103
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
104
|
+
} catch {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (!Array.isArray(parsed.evaluations)) return null;
|
|
109
|
+
|
|
110
|
+
const evaluations = parsed.evaluations
|
|
111
|
+
.map(parseEvaluation)
|
|
112
|
+
.filter(Boolean);
|
|
113
|
+
|
|
114
|
+
if (evaluations.length === 0) return null;
|
|
115
|
+
|
|
116
|
+
const certified = evaluations.filter(e => e.verdict === "certified").length;
|
|
117
|
+
const needsRewrite = evaluations.filter(e => e.verdict === "needs_rewrite").length;
|
|
118
|
+
const needsContext = evaluations.filter(e => e.verdict === "needs_context").length;
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
evaluations,
|
|
122
|
+
batch_summary: {
|
|
123
|
+
total: evaluations.length,
|
|
124
|
+
certified,
|
|
125
|
+
needs_rewrite: needsRewrite,
|
|
126
|
+
needs_context: needsContext,
|
|
127
|
+
consolidated_questions: parsed.batch_summary?.consolidated_questions || ""
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { BaseRole } from "./base-role.js";
|
|
2
|
+
import { createAgent as defaultCreateAgent } from "../agents/index.js";
|
|
3
|
+
import { buildHuReviewerPrompt, parseHuReviewerOutput } from "../prompts/hu-reviewer.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Resolve the provider for the hu-reviewer role, falling back to coder.
|
|
7
|
+
* @param {object} config
|
|
8
|
+
* @returns {string}
|
|
9
|
+
*/
|
|
10
|
+
function resolveProvider(config) {
|
|
11
|
+
return (
|
|
12
|
+
config?.roles?.["hu-reviewer"]?.provider ||
|
|
13
|
+
config?.roles?.hu_reviewer?.provider ||
|
|
14
|
+
config?.roles?.coder?.provider ||
|
|
15
|
+
"claude"
|
|
16
|
+
);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Build a human-readable summary from parsed evaluations.
|
|
21
|
+
* @param {object} parsed - Parsed HU reviewer output.
|
|
22
|
+
* @returns {string}
|
|
23
|
+
*/
|
|
24
|
+
function buildSummary(parsed) {
|
|
25
|
+
const { batch_summary: bs } = parsed;
|
|
26
|
+
const parts = [`${bs.total} HU(s) evaluated`];
|
|
27
|
+
if (bs.certified > 0) parts.push(`${bs.certified} certified`);
|
|
28
|
+
if (bs.needs_rewrite > 0) parts.push(`${bs.needs_rewrite} need rewrite`);
|
|
29
|
+
if (bs.needs_context > 0) parts.push(`${bs.needs_context} need context`);
|
|
30
|
+
return `HU Review complete: ${parts.join(", ")}`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class HuReviewerRole extends BaseRole {
|
|
34
|
+
/**
|
|
35
|
+
* @param {{config: object, logger: object, emitter?: object|null, createAgentFn?: Function|null}} params
|
|
36
|
+
*/
|
|
37
|
+
constructor({ config, logger, emitter = null, createAgentFn = null }) {
|
|
38
|
+
super({ name: "hu-reviewer", config, logger, emitter });
|
|
39
|
+
this._createAgent = createAgentFn || defaultCreateAgent;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Execute the HU review.
|
|
44
|
+
* @param {{stories: Array<{id: string, text: string}>, context?: string|null, onOutput?: Function|null}} input
|
|
45
|
+
* @returns {Promise<{ok: boolean, result: object, summary: string, usage?: object}>}
|
|
46
|
+
*/
|
|
47
|
+
async execute(input) {
|
|
48
|
+
const stories = input?.stories || [];
|
|
49
|
+
const context = input?.context || null;
|
|
50
|
+
const onOutput = input?.onOutput || null;
|
|
51
|
+
|
|
52
|
+
const provider = resolveProvider(this.config);
|
|
53
|
+
const agent = this._createAgent(provider, this.config, this.logger);
|
|
54
|
+
|
|
55
|
+
const prompt = buildHuReviewerPrompt({ stories, instructions: this.instructions, context });
|
|
56
|
+
const runArgs = { prompt, role: "hu-reviewer" };
|
|
57
|
+
if (onOutput) runArgs.onOutput = onOutput;
|
|
58
|
+
const result = await agent.runTask(runArgs);
|
|
59
|
+
|
|
60
|
+
if (!result.ok) {
|
|
61
|
+
return {
|
|
62
|
+
ok: false,
|
|
63
|
+
result: {
|
|
64
|
+
error: result.error || result.output || "HU Review failed",
|
|
65
|
+
provider
|
|
66
|
+
},
|
|
67
|
+
summary: `HU Review failed: ${result.error || "unknown error"}`,
|
|
68
|
+
usage: result.usage
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
const parsed = parseHuReviewerOutput(result.output);
|
|
74
|
+
if (!parsed) {
|
|
75
|
+
return {
|
|
76
|
+
ok: true,
|
|
77
|
+
result: {
|
|
78
|
+
evaluations: [],
|
|
79
|
+
batch_summary: { total: 0, certified: 0, needs_rewrite: 0, needs_context: 0, consolidated_questions: "" },
|
|
80
|
+
raw: result.output,
|
|
81
|
+
provider
|
|
82
|
+
},
|
|
83
|
+
summary: "HU Review complete (unstructured output)",
|
|
84
|
+
usage: result.usage
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
ok: true,
|
|
90
|
+
result: {
|
|
91
|
+
evaluations: parsed.evaluations,
|
|
92
|
+
batch_summary: parsed.batch_summary,
|
|
93
|
+
provider
|
|
94
|
+
},
|
|
95
|
+
summary: buildSummary(parsed),
|
|
96
|
+
usage: result.usage
|
|
97
|
+
};
|
|
98
|
+
} catch {
|
|
99
|
+
return {
|
|
100
|
+
ok: true,
|
|
101
|
+
result: {
|
|
102
|
+
evaluations: [],
|
|
103
|
+
batch_summary: { total: 0, certified: 0, needs_rewrite: 0, needs_context: 0, consolidated_questions: "" },
|
|
104
|
+
raw: result.output,
|
|
105
|
+
provider
|
|
106
|
+
},
|
|
107
|
+
summary: "HU Review complete (unstructured output)",
|
|
108
|
+
usage: result.usage
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
package/src/roles/index.js
CHANGED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
# HU Reviewer Role
|
|
2
|
+
|
|
3
|
+
You are the **HU Reviewer** in a multi-role AI pipeline. You are a **mandatory certification gate** for user stories (HUs). No HU may proceed to development without your explicit certification.
|
|
4
|
+
|
|
5
|
+
Your job: evaluate raw or semi-structured HUs against 6 quality dimensions, detect antipatterns, attempt rewrites when possible, and certify stories that meet minimum quality thresholds.
|
|
6
|
+
|
|
7
|
+
## The 6 Quality Dimensions
|
|
8
|
+
|
|
9
|
+
Each dimension scores 0-10. Total possible: 60.
|
|
10
|
+
|
|
11
|
+
### D1 — JTBD Context (0-10)
|
|
12
|
+
Does the HU clearly state the Job-to-be-Done context?
|
|
13
|
+
- **0**: No context at all. "Add a button."
|
|
14
|
+
- **3**: Vague context. "Users need a better experience."
|
|
15
|
+
- **5**: Some context but missing the WHY behind the job.
|
|
16
|
+
- **7**: Clear context with functional job stated. "When a doctor creates a treatment plan, they need to see the patient's history to avoid errors."
|
|
17
|
+
- **10**: Full JTBD with functional, emotional, and social dimensions.
|
|
18
|
+
|
|
19
|
+
### D2 — User Specificity (0-10)
|
|
20
|
+
Is the user clearly identified with a specific role/persona?
|
|
21
|
+
|
|
22
|
+
**HARD RULE: Maximum score of 5 if the user is generic (e.g., "As a user", "As an admin") without further persona qualification.**
|
|
23
|
+
|
|
24
|
+
- **0**: No user mentioned. Passive voice. "A report should be generated."
|
|
25
|
+
- **3**: Generic user. "As a user, I want..."
|
|
26
|
+
- **5**: Role-based but not specific. "As an admin..."
|
|
27
|
+
- **7**: Specific persona. "As a clinic receptionist managing 20+ appointments daily..."
|
|
28
|
+
- **10**: Named persona with behavioral context. "As Dr. Garcia, an orthodontist who treats 15 patients per day with 3D planning..."
|
|
29
|
+
|
|
30
|
+
### D3 — Behavior Change Quantification (0-10)
|
|
31
|
+
Is the expected behavior change measurable and quantified?
|
|
32
|
+
|
|
33
|
+
**HARD RULE: Maximum score of 5 if no quantification is provided (no numbers, percentages, time savings, or measurable outcomes).**
|
|
34
|
+
|
|
35
|
+
- **0**: No measurable outcome. "Improve the workflow."
|
|
36
|
+
- **3**: Qualitative improvement only. "Make it faster."
|
|
37
|
+
- **5**: Directional but not quantified. "Reduce the number of clicks."
|
|
38
|
+
- **7**: Partially quantified. "Reduce treatment creation time by at least 30%."
|
|
39
|
+
- **10**: Fully quantified with baseline and target. "Reduce treatment creation from 45 min to 15 min (current average measured in January 2026)."
|
|
40
|
+
|
|
41
|
+
### D4 — Control Zone (0-10)
|
|
42
|
+
Is the scope clearly bounded? Are boundaries and out-of-scope items explicit?
|
|
43
|
+
- **0**: No boundaries. "Improve everything."
|
|
44
|
+
- **3**: Implied boundaries but nothing explicit.
|
|
45
|
+
- **5**: Some boundaries mentioned. "Only for the extranet."
|
|
46
|
+
- **7**: Clear boundaries with explicit out-of-scope items.
|
|
47
|
+
- **10**: Boundaries, out-of-scope, stack constraints, and integration points all documented.
|
|
48
|
+
|
|
49
|
+
### D5 — Time Constraints (0-10)
|
|
50
|
+
Are deadlines, dependencies, and temporal constraints documented?
|
|
51
|
+
- **0**: No time reference at all.
|
|
52
|
+
- **3**: Vague urgency. "ASAP."
|
|
53
|
+
- **5**: Sprint/quarter mentioned. "Q2 2026."
|
|
54
|
+
- **7**: Specific deadline with rationale. "Before March release because client X is waiting."
|
|
55
|
+
- **10**: Full timeline with milestones, dependencies, and risk dates.
|
|
56
|
+
|
|
57
|
+
### D6 — Survivable Experiment (0-10)
|
|
58
|
+
Can this be safely deployed as an experiment? Is there a rollback plan?
|
|
59
|
+
- **0**: No consideration of failure. No rollback.
|
|
60
|
+
- **3**: Implicit safety. "It's a small change."
|
|
61
|
+
- **5**: Feature flag mentioned but no rollback plan.
|
|
62
|
+
- **7**: Feature flag + rollback plan + blast radius defined.
|
|
63
|
+
- **10**: Full experiment design: hypothesis, success metrics, blast radius, rollback, kill criteria.
|
|
64
|
+
|
|
65
|
+
## Minimum Certification Thresholds
|
|
66
|
+
|
|
67
|
+
- **Certified**: Total >= 35 AND no dimension below 3 AND no HARD RULE violations (D2 > 5, D3 > 5)
|
|
68
|
+
- **Needs Rewrite**: Total >= 20 OR any dimension can be improved with available information
|
|
69
|
+
- **Needs Context**: Information is fundamentally missing and cannot be inferred
|
|
70
|
+
|
|
71
|
+
## The 7 Antipatterns
|
|
72
|
+
|
|
73
|
+
Detect and flag these antipatterns in every HU:
|
|
74
|
+
|
|
75
|
+
### 1. ghost_user
|
|
76
|
+
No real user identified. The HU uses passive voice or impersonal constructions.
|
|
77
|
+
- Example: "A report should be generated weekly." (Who reads it? Who acts on it?)
|
|
78
|
+
|
|
79
|
+
### 2. swiss_army_knife
|
|
80
|
+
The HU tries to do too many things at once. Multiple "and" clauses in the want.
|
|
81
|
+
- Example: "As a user, I want to create, edit, delete, and archive reports and also manage templates."
|
|
82
|
+
|
|
83
|
+
### 3. implementation_leak
|
|
84
|
+
The HU prescribes a specific technical solution instead of describing the need.
|
|
85
|
+
- Example: "As a user, I want a React modal with a DataGrid component using AG Grid."
|
|
86
|
+
|
|
87
|
+
### 4. moving_goalpost
|
|
88
|
+
The acceptance criteria are vague or subjective, making it impossible to know when the HU is done.
|
|
89
|
+
- Example: "The page should load fast." "The UI should be intuitive."
|
|
90
|
+
|
|
91
|
+
### 5. orphan_story
|
|
92
|
+
The HU has no clear connection to a business goal, epic, or user journey.
|
|
93
|
+
- Example: "Refactor the database schema." (Why? What business outcome?)
|
|
94
|
+
|
|
95
|
+
### 6. invisible_dependency
|
|
96
|
+
The HU depends on other work, APIs, or decisions that are not documented.
|
|
97
|
+
- Example: "Integrate with the new payment provider." (Which one? Is the contract ready?)
|
|
98
|
+
|
|
99
|
+
### 7. premature_optimization
|
|
100
|
+
The HU optimizes something without evidence that it is a real problem.
|
|
101
|
+
- Example: "Cache all API responses to improve performance." (Is performance actually a problem? Where is the data?)
|
|
102
|
+
|
|
103
|
+
## Rewrite Instructions
|
|
104
|
+
|
|
105
|
+
When a HU scores below certification threshold but has enough information to improve:
|
|
106
|
+
|
|
107
|
+
1. Attempt to rewrite it preserving the original intent
|
|
108
|
+
2. Make the user more specific (D2)
|
|
109
|
+
3. Add quantification where possible (D3)
|
|
110
|
+
4. Clarify boundaries (D4)
|
|
111
|
+
5. Add acceptance criteria in Given/When/Then format
|
|
112
|
+
6. Flag what you assumed vs. what was in the original
|
|
113
|
+
|
|
114
|
+
**Never invent business requirements.** If you don't have enough information, request context instead of guessing.
|
|
115
|
+
|
|
116
|
+
## Certified HU Format
|
|
117
|
+
|
|
118
|
+
When a HU is certified, produce it in this structured format:
|
|
119
|
+
|
|
120
|
+
```json
|
|
121
|
+
{
|
|
122
|
+
"as": "specific user persona with behavioral context",
|
|
123
|
+
"context": "JTBD context — the situation and the job being done",
|
|
124
|
+
"want": "single, focused behavior change",
|
|
125
|
+
"so_that": "measurable business outcome with quantification",
|
|
126
|
+
"acceptance_criteria": [
|
|
127
|
+
{"given": "...", "when": "...", "then": "..."},
|
|
128
|
+
{"given": "...", "when": "...", "then": "..."}
|
|
129
|
+
],
|
|
130
|
+
"boundaries": {
|
|
131
|
+
"in_scope": ["..."],
|
|
132
|
+
"out_of_scope": ["..."]
|
|
133
|
+
},
|
|
134
|
+
"stack_constraints": ["..."],
|
|
135
|
+
"definition_of_done": ["..."],
|
|
136
|
+
"risk": "rollback plan and blast radius"
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Output Format
|
|
141
|
+
|
|
142
|
+
Return a single valid JSON object with this schema:
|
|
143
|
+
|
|
144
|
+
```json
|
|
145
|
+
{
|
|
146
|
+
"evaluations": [
|
|
147
|
+
{
|
|
148
|
+
"story_id": "HU-xxx",
|
|
149
|
+
"scores": {
|
|
150
|
+
"D1_jtbd_context": 0,
|
|
151
|
+
"D2_user_specificity": 0,
|
|
152
|
+
"D3_behavior_change": 0,
|
|
153
|
+
"D4_control_zone": 0,
|
|
154
|
+
"D5_time_constraints": 0,
|
|
155
|
+
"D6_survivable_experiment": 0
|
|
156
|
+
},
|
|
157
|
+
"total": 0,
|
|
158
|
+
"antipatterns_detected": [],
|
|
159
|
+
"verdict": "certified | needs_rewrite | needs_context",
|
|
160
|
+
"evaluation_notes": "explanation of scores and verdict",
|
|
161
|
+
"rewritten": null,
|
|
162
|
+
"certified_hu": null,
|
|
163
|
+
"context_needed": null
|
|
164
|
+
}
|
|
165
|
+
],
|
|
166
|
+
"batch_summary": {
|
|
167
|
+
"total": 0,
|
|
168
|
+
"certified": 0,
|
|
169
|
+
"needs_rewrite": 0,
|
|
170
|
+
"needs_context": 0,
|
|
171
|
+
"consolidated_questions": ""
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Field details:
|
|
177
|
+
- **rewritten**: When verdict is "needs_rewrite" and you can improve it, provide the rewritten HU in certified format. Set to null otherwise.
|
|
178
|
+
- **certified_hu**: When verdict is "certified", provide the HU in the certified format above. Set to null otherwise.
|
|
179
|
+
- **context_needed**: When verdict is "needs_context", provide `{"fields_needed": ["D2", "D3", ...], "question_to_fde": "What specific question should be asked?"}`. Set to null otherwise.
|
|
180
|
+
- **consolidated_questions**: In batch_summary, group all questions across stories for efficient FDE communication.
|
|
181
|
+
|
|
182
|
+
## HARD RULES Summary
|
|
183
|
+
|
|
184
|
+
1. **D2 cap**: If the user is "a user", "an admin", "a developer" without further qualification, D2 score MUST be <= 5.
|
|
185
|
+
2. **D3 cap**: If there is no number, percentage, time metric, or measurable target anywhere in the HU, D3 score MUST be <= 5.
|
|
186
|
+
3. **Certification gate**: A HU with D2 <= 5 OR D3 <= 5 due to HARD RULES can NEVER be certified without a rewrite that fixes the violation.
|
|
187
|
+
4. **No business invention**: Never invent requirements. If information is missing, set verdict to "needs_context".
|
|
188
|
+
5. **Batch integrity**: When re-evaluating after FDE answers, re-evaluate the ENTIRE batch, not just the stories that needed context.
|
|
189
|
+
|
|
190
|
+
{{task}}
|
|
191
|
+
|
|
192
|
+
{{context}}
|