karajan-code 1.14.0 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +2 -0
- package/src/config.js +10 -4
- package/src/mcp/run-kj.js +2 -0
- package/src/mcp/server-handlers.js +70 -0
- package/src/mcp/tools.js +18 -0
- package/src/orchestrator/pre-loop-stages.js +75 -6
- package/src/orchestrator.js +14 -3
- package/src/prompts/discover.js +227 -0
- package/src/prompts/triage.js +2 -2
- package/src/roles/discover-role.js +130 -0
- package/src/roles/index.js +1 -0
- package/src/roles/triage-role.js +6 -0
- package/templates/roles/discover.md +167 -0
- package/templates/roles/triage.md +9 -1
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -72,6 +72,7 @@ program
|
|
|
72
72
|
.option("--enable-tester")
|
|
73
73
|
.option("--enable-security")
|
|
74
74
|
.option("--enable-triage")
|
|
75
|
+
.option("--enable-discover")
|
|
75
76
|
.option("--enable-serena")
|
|
76
77
|
.option("--mode <name>")
|
|
77
78
|
.option("--max-iterations <n>")
|
|
@@ -87,6 +88,7 @@ program
|
|
|
87
88
|
.option("--auto-pr")
|
|
88
89
|
.option("--enable-becaria", "Enable BecarIA Gateway (early PR + dispatch comments/reviews)")
|
|
89
90
|
.option("--branch-prefix <prefix>")
|
|
91
|
+
.option("--task-type <type>", "Explicit task type: sw, infra, doc, add-tests, refactor")
|
|
90
92
|
.option("--methodology <name>")
|
|
91
93
|
.option("--no-auto-rebase")
|
|
92
94
|
.option("--no-sonar")
|
package/src/config.js
CHANGED
|
@@ -16,7 +16,8 @@ const DEFAULTS = {
|
|
|
16
16
|
researcher: { provider: null, model: null },
|
|
17
17
|
tester: { provider: null, model: null },
|
|
18
18
|
security: { provider: null, model: null },
|
|
19
|
-
triage: { provider: null, model: null }
|
|
19
|
+
triage: { provider: null, model: null },
|
|
20
|
+
discover: { provider: null, model: null }
|
|
20
21
|
},
|
|
21
22
|
pipeline: {
|
|
22
23
|
planner: { enabled: false },
|
|
@@ -25,7 +26,8 @@ const DEFAULTS = {
|
|
|
25
26
|
researcher: { enabled: false },
|
|
26
27
|
tester: { enabled: true },
|
|
27
28
|
security: { enabled: true },
|
|
28
|
-
triage: { enabled: true }
|
|
29
|
+
triage: { enabled: true },
|
|
30
|
+
discover: { enabled: false }
|
|
29
31
|
},
|
|
30
32
|
review_mode: "standard",
|
|
31
33
|
max_iterations: 5,
|
|
@@ -245,6 +247,9 @@ export function applyRunOverrides(config, flags) {
|
|
|
245
247
|
if (flags.tester) out.roles.tester.provider = flags.tester;
|
|
246
248
|
if (flags.security) out.roles.security.provider = flags.security;
|
|
247
249
|
if (flags.triage) out.roles.triage.provider = flags.triage;
|
|
250
|
+
if (flags.discover) out.roles.discover.provider = flags.discover;
|
|
251
|
+
if (flags.discoverModel) out.roles.discover.model = String(flags.discoverModel);
|
|
252
|
+
if (flags.enableDiscover !== undefined) out.pipeline.discover.enabled = Boolean(flags.enableDiscover);
|
|
248
253
|
if (flags.plannerModel) out.roles.planner.model = String(flags.plannerModel);
|
|
249
254
|
if (flags.coderModel) {
|
|
250
255
|
out.roles.coder.model = String(flags.coderModel);
|
|
@@ -318,14 +323,14 @@ export function resolveRole(config, role) {
|
|
|
318
323
|
let provider = roleConfig.provider ?? null;
|
|
319
324
|
if (!provider && role === "coder") provider = legacyCoder;
|
|
320
325
|
if (!provider && role === "reviewer") provider = legacyReviewer;
|
|
321
|
-
if (!provider && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage")) {
|
|
326
|
+
if (!provider && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage" || role === "discover")) {
|
|
322
327
|
provider = roles.coder?.provider || legacyCoder;
|
|
323
328
|
}
|
|
324
329
|
|
|
325
330
|
let model = roleConfig.model ?? null;
|
|
326
331
|
if (!model && role === "coder") model = config?.coder_options?.model ?? null;
|
|
327
332
|
if (!model && role === "reviewer") model = config?.reviewer_options?.model ?? null;
|
|
328
|
-
if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage")) {
|
|
333
|
+
if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage" || role === "discover")) {
|
|
329
334
|
model = config?.coder_options?.model ?? null;
|
|
330
335
|
}
|
|
331
336
|
|
|
@@ -344,6 +349,7 @@ function requiredRolesFor(commandName, config) {
|
|
|
344
349
|
if (config?.pipeline?.security?.enabled) required.push("security");
|
|
345
350
|
return required;
|
|
346
351
|
}
|
|
352
|
+
if (commandName === "discover") return ["discover"];
|
|
347
353
|
if (commandName === "plan") return ["planner"];
|
|
348
354
|
if (commandName === "code") return ["coder"];
|
|
349
355
|
if (commandName === "review") return ["reviewer"];
|
package/src/mcp/run-kj.js
CHANGED
|
@@ -43,11 +43,13 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
|
|
|
43
43
|
normalizeBoolFlag(options.enableTester, "--enable-tester", args);
|
|
44
44
|
normalizeBoolFlag(options.enableSecurity, "--enable-security", args);
|
|
45
45
|
normalizeBoolFlag(options.enableTriage, "--enable-triage", args);
|
|
46
|
+
normalizeBoolFlag(options.enableDiscover, "--enable-discover", args);
|
|
46
47
|
normalizeBoolFlag(options.enableSerena, "--enable-serena", args);
|
|
47
48
|
normalizeBoolFlag(options.autoCommit, "--auto-commit", args);
|
|
48
49
|
normalizeBoolFlag(options.autoPush, "--auto-push", args);
|
|
49
50
|
normalizeBoolFlag(options.autoPr, "--auto-pr", args);
|
|
50
51
|
if (options.autoRebase === false) args.push("--no-auto-rebase");
|
|
52
|
+
addOptionalValue(args, "--task-type", options.taskType);
|
|
51
53
|
normalizeBoolFlag(options.noSonar, "--no-sonar", args);
|
|
52
54
|
if (options.smartModels === true) args.push("--smart-models");
|
|
53
55
|
if (options.smartModels === false) args.push("--no-smart-models");
|
|
@@ -428,6 +428,59 @@ export async function handleReviewDirect(a, server, extra) {
|
|
|
428
428
|
return { ok: true, review: parsed || result.output, raw: result.output };
|
|
429
429
|
}
|
|
430
430
|
|
|
431
|
+
export async function handleDiscoverDirect(a, server, extra) {
|
|
432
|
+
const config = await buildConfig(a, "discover");
|
|
433
|
+
const logger = createLogger(config.output.log_level, "mcp");
|
|
434
|
+
|
|
435
|
+
const discoverRole = resolveRole(config, "discover");
|
|
436
|
+
await assertAgentsAvailable([discoverRole.provider]);
|
|
437
|
+
|
|
438
|
+
const projectDir = await resolveProjectDir(server);
|
|
439
|
+
const runLog = createRunLog(projectDir);
|
|
440
|
+
runLog.logText(`[kj_discover] started — mode=${a.mode || "gaps"}`);
|
|
441
|
+
const emitter = buildDirectEmitter(server, runLog, extra);
|
|
442
|
+
const eventBase = { sessionId: null, iteration: 0, startedAt: Date.now() };
|
|
443
|
+
const onOutput = ({ stream, line }) => {
|
|
444
|
+
emitter.emit("progress", { type: "agent:output", stage: "discover", message: line, detail: { stream, agent: discoverRole.provider } });
|
|
445
|
+
};
|
|
446
|
+
const stallDetector = createStallDetector({
|
|
447
|
+
onOutput, emitter, eventBase, stage: "discover", provider: discoverRole.provider
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
const { DiscoverRole } = await import("../roles/discover-role.js");
|
|
451
|
+
const discover = new DiscoverRole({ config, logger, emitter });
|
|
452
|
+
await discover.init({ task: a.task });
|
|
453
|
+
|
|
454
|
+
// Build context from pgTask if provided
|
|
455
|
+
let context = a.context || null;
|
|
456
|
+
if (a.pgTask && a.pgProject) {
|
|
457
|
+
try {
|
|
458
|
+
const pgContext = `Planning Game card: ${a.pgTask} (project: ${a.pgProject})`;
|
|
459
|
+
context = context ? `${context}\n\n${pgContext}` : pgContext;
|
|
460
|
+
} catch { /* PG not available — proceed without */ }
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
sendTrackerLog(server, "discover", "running", discoverRole.provider);
|
|
464
|
+
runLog.logText(`[discover] agent launched, waiting for response...`);
|
|
465
|
+
let result;
|
|
466
|
+
try {
|
|
467
|
+
result = await discover.run({ task: a.task, mode: a.mode || "gaps", context, onOutput: stallDetector.onOutput });
|
|
468
|
+
} finally {
|
|
469
|
+
stallDetector.stop();
|
|
470
|
+
const stats = stallDetector.stats();
|
|
471
|
+
runLog.logText(`[discover] finished — lines=${stats.lineCount}, bytes=${stats.bytesReceived}, elapsed=${Math.round(stats.elapsedMs / 1000)}s`);
|
|
472
|
+
runLog.close();
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (!result.ok) {
|
|
476
|
+
sendTrackerLog(server, "discover", "failed");
|
|
477
|
+
throw new Error(result.result?.error || result.summary || "Discovery failed");
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
sendTrackerLog(server, "discover", "done");
|
|
481
|
+
return { ok: true, ...result.result, summary: result.summary };
|
|
482
|
+
}
|
|
483
|
+
|
|
431
484
|
export async function handleToolCall(name, args, server, extra) {
|
|
432
485
|
const a = asObject(args);
|
|
433
486
|
|
|
@@ -565,6 +618,12 @@ export async function handleToolCall(name, args, server, extra) {
|
|
|
565
618
|
if (!a.task) {
|
|
566
619
|
return failPayload("Missing required field: task");
|
|
567
620
|
}
|
|
621
|
+
if (a.taskType) {
|
|
622
|
+
const validTypes = ["sw", "infra", "doc", "add-tests", "refactor"];
|
|
623
|
+
if (!validTypes.includes(a.taskType)) {
|
|
624
|
+
return failPayload(`Invalid taskType "${a.taskType}". Valid values: ${validTypes.join(", ")}`);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
568
627
|
if (!isPreflightAcked()) {
|
|
569
628
|
const { config } = await loadConfig();
|
|
570
629
|
const { listAgents } = await import("../commands/agents.js");
|
|
@@ -629,5 +688,16 @@ export async function handleToolCall(name, args, server, extra) {
|
|
|
629
688
|
return handlePlanDirect(a, server, extra);
|
|
630
689
|
}
|
|
631
690
|
|
|
691
|
+
if (name === "kj_discover") {
|
|
692
|
+
if (!a.task) {
|
|
693
|
+
return failPayload("Missing required field: task");
|
|
694
|
+
}
|
|
695
|
+
const validModes = ["gaps", "momtest", "wendel", "classify", "jtbd"];
|
|
696
|
+
if (a.mode && !validModes.includes(a.mode)) {
|
|
697
|
+
return failPayload(`Invalid mode "${a.mode}". Valid values: ${validModes.join(", ")}`);
|
|
698
|
+
}
|
|
699
|
+
return handleDiscoverDirect(a, server, extra);
|
|
700
|
+
}
|
|
701
|
+
|
|
632
702
|
return failPayload(`Unknown tool: ${name}`);
|
|
633
703
|
}
|
package/src/mcp/tools.js
CHANGED
|
@@ -70,6 +70,7 @@ export const tools = [
|
|
|
70
70
|
enableTester: { type: "boolean" },
|
|
71
71
|
enableSecurity: { type: "boolean" },
|
|
72
72
|
enableTriage: { type: "boolean" },
|
|
73
|
+
enableDiscover: { type: "boolean" },
|
|
73
74
|
enableSerena: { type: "boolean" },
|
|
74
75
|
enableBecaria: { type: "boolean", description: "Enable BecarIA Gateway (early PR + dispatch comments/reviews)" },
|
|
75
76
|
reviewerFallback: { type: "string" },
|
|
@@ -88,6 +89,7 @@ export const tools = [
|
|
|
88
89
|
branchPrefix: { type: "string" },
|
|
89
90
|
smartModels: { type: "boolean", description: "Enable/disable smart model selection based on triage complexity" },
|
|
90
91
|
checkpointInterval: { type: "number", description: "Minutes between interactive checkpoints (default: 5). Set 0 to disable." },
|
|
92
|
+
taskType: { type: "string", enum: ["sw", "infra", "doc", "add-tests", "refactor"], description: "Explicit task type for policy resolution. Overrides triage classification." },
|
|
91
93
|
noSonar: { type: "boolean" },
|
|
92
94
|
kjHome: { type: "string" },
|
|
93
95
|
sonarToken: { type: "string" },
|
|
@@ -222,5 +224,21 @@ export const tools = [
|
|
|
222
224
|
kjHome: { type: "string" }
|
|
223
225
|
}
|
|
224
226
|
}
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
name: "kj_discover",
|
|
230
|
+
description: "Analyze a task for gaps, ambiguities, and missing information before execution. Returns a verdict (ready/needs_validation) with structured gap list. Can read task details from Planning Game if pgTask is provided.",
|
|
231
|
+
inputSchema: {
|
|
232
|
+
type: "object",
|
|
233
|
+
required: ["task"],
|
|
234
|
+
properties: {
|
|
235
|
+
task: { type: "string", description: "Task description to analyze for gaps" },
|
|
236
|
+
mode: { type: "string", enum: ["gaps", "momtest", "wendel", "classify", "jtbd"], description: "Discovery mode: gaps (default), momtest (Mom Test questions), wendel (behavior change checklist), classify (START/STOP/DIFFERENT), or jtbd (Jobs-to-be-Done)" },
|
|
237
|
+
context: { type: "string", description: "Additional context for the analysis (e.g., research output)" },
|
|
238
|
+
pgTask: { type: "string", description: "Planning Game card ID (e.g., KJC-TSK-0042). If provided, fetches full card details as additional context." },
|
|
239
|
+
pgProject: { type: "string", description: "Planning Game project ID. Required when pgTask is used." },
|
|
240
|
+
kjHome: { type: "string" }
|
|
241
|
+
}
|
|
242
|
+
}
|
|
225
243
|
}
|
|
226
244
|
];
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { TriageRole } from "../roles/triage-role.js";
|
|
2
2
|
import { ResearcherRole } from "../roles/researcher-role.js";
|
|
3
3
|
import { PlannerRole } from "../roles/planner-role.js";
|
|
4
|
+
import { DiscoverRole } from "../roles/discover-role.js";
|
|
4
5
|
import { createAgent } from "../agents/index.js";
|
|
5
6
|
import { addCheckpoint, markSessionStatus } from "../session-store.js";
|
|
6
7
|
import { emitProgress, makeEvent } from "../utils/events.js";
|
|
@@ -56,12 +57,14 @@ export async function runTriageStage({ config, logger, emitter, eventBase, sessi
|
|
|
56
57
|
const recommendedRoles = new Set(triageOutput.result?.roles || []);
|
|
57
58
|
const roleOverrides = {};
|
|
58
59
|
if (triageOutput.ok) {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
roleOverrides.
|
|
62
|
-
roleOverrides.
|
|
63
|
-
roleOverrides.
|
|
64
|
-
roleOverrides.
|
|
60
|
+
// Triage can activate roles, but cannot deactivate roles explicitly enabled in pipeline config
|
|
61
|
+
const p = config.pipeline || {};
|
|
62
|
+
roleOverrides.plannerEnabled = recommendedRoles.has("planner") || Boolean(p.planner?.enabled);
|
|
63
|
+
roleOverrides.researcherEnabled = recommendedRoles.has("researcher") || Boolean(p.researcher?.enabled);
|
|
64
|
+
roleOverrides.refactorerEnabled = recommendedRoles.has("refactorer") || Boolean(p.refactorer?.enabled);
|
|
65
|
+
roleOverrides.reviewerEnabled = recommendedRoles.has("reviewer") || Boolean(p.reviewer?.enabled);
|
|
66
|
+
roleOverrides.testerEnabled = recommendedRoles.has("tester") || Boolean(p.tester?.enabled);
|
|
67
|
+
roleOverrides.securityEnabled = recommendedRoles.has("security") || Boolean(p.security?.enabled);
|
|
65
68
|
}
|
|
66
69
|
|
|
67
70
|
const shouldDecompose = triageOutput.result?.shouldDecompose || false;
|
|
@@ -72,6 +75,7 @@ export async function runTriageStage({ config, logger, emitter, eventBase, sessi
|
|
|
72
75
|
level: triageOutput.result?.level || null,
|
|
73
76
|
roles: Array.from(recommendedRoles),
|
|
74
77
|
reasoning: triageOutput.result?.reasoning || null,
|
|
78
|
+
taskType: triageOutput.result?.taskType || "sw",
|
|
75
79
|
shouldDecompose,
|
|
76
80
|
subtasks
|
|
77
81
|
};
|
|
@@ -255,3 +259,68 @@ export async function runPlannerStage({ config, logger, emitter, eventBase, sess
|
|
|
255
259
|
|
|
256
260
|
return { plannedTask, stageResult };
|
|
257
261
|
}
|
|
262
|
+
|
|
263
|
+
export async function runDiscoverStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget }) {
|
|
264
|
+
logger.setContext({ iteration: 0, stage: "discover" });
|
|
265
|
+
emitProgress(
|
|
266
|
+
emitter,
|
|
267
|
+
makeEvent("discover:start", { ...eventBase, stage: "discover" }, {
|
|
268
|
+
message: "Discover analyzing task for gaps"
|
|
269
|
+
})
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
const discoverProvider = config?.roles?.discover?.provider || coderRole.provider;
|
|
273
|
+
const discoverOnOutput = ({ stream, line }) => {
|
|
274
|
+
emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "discover" }, {
|
|
275
|
+
message: line,
|
|
276
|
+
detail: { stream, agent: discoverProvider }
|
|
277
|
+
}));
|
|
278
|
+
};
|
|
279
|
+
const discoverStall = createStallDetector({
|
|
280
|
+
onOutput: discoverOnOutput, emitter, eventBase, stage: "discover", provider: discoverProvider
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
const mode = config?.pipeline?.discover?.mode || "gaps";
|
|
284
|
+
const discover = new DiscoverRole({ config, logger, emitter });
|
|
285
|
+
await discover.init({ task: session.task, sessionId: session.id, iteration: 0 });
|
|
286
|
+
const discoverStart = Date.now();
|
|
287
|
+
let discoverOutput;
|
|
288
|
+
try {
|
|
289
|
+
discoverOutput = await discover.run({ task: session.task, mode, onOutput: discoverStall.onOutput });
|
|
290
|
+
} finally {
|
|
291
|
+
discoverStall.stop();
|
|
292
|
+
}
|
|
293
|
+
trackBudget({
|
|
294
|
+
role: "discover",
|
|
295
|
+
provider: discoverProvider,
|
|
296
|
+
model: config?.roles?.discover?.model || coderRole.model,
|
|
297
|
+
result: discoverOutput,
|
|
298
|
+
duration_ms: Date.now() - discoverStart
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
await addCheckpoint(session, {
|
|
302
|
+
stage: "discover",
|
|
303
|
+
iteration: 0,
|
|
304
|
+
ok: discoverOutput.ok,
|
|
305
|
+
provider: discoverProvider,
|
|
306
|
+
model: config?.roles?.discover?.model || coderRole.model || null
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
const stageResult = {
|
|
310
|
+
ok: discoverOutput.ok,
|
|
311
|
+
verdict: discoverOutput.result?.verdict || null,
|
|
312
|
+
gaps: discoverOutput.result?.gaps || [],
|
|
313
|
+
mode
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
emitProgress(
|
|
317
|
+
emitter,
|
|
318
|
+
makeEvent("discover:end", { ...eventBase, stage: "discover" }, {
|
|
319
|
+
status: discoverOutput.ok ? "ok" : "fail",
|
|
320
|
+
message: discoverOutput.ok ? "Discovery completed" : `Discovery failed: ${discoverOutput.summary}`,
|
|
321
|
+
detail: stageResult
|
|
322
|
+
})
|
|
323
|
+
);
|
|
324
|
+
|
|
325
|
+
return { stageResult };
|
|
326
|
+
}
|
package/src/orchestrator.js
CHANGED
|
@@ -26,7 +26,7 @@ import { applyPolicies } from "./guards/policy-resolver.js";
|
|
|
26
26
|
import { resolveReviewProfile } from "./review/profiles.js";
|
|
27
27
|
import { CoderRole } from "./roles/coder-role.js";
|
|
28
28
|
import { invokeSolomon } from "./orchestrator/solomon-escalation.js";
|
|
29
|
-
import { runTriageStage, runResearcherStage, runPlannerStage } from "./orchestrator/pre-loop-stages.js";
|
|
29
|
+
import { runTriageStage, runResearcherStage, runPlannerStage, runDiscoverStage } from "./orchestrator/pre-loop-stages.js";
|
|
30
30
|
import { runCoderStage, runRefactorerStage, runTddCheckStage, runSonarStage, runReviewerStage } from "./orchestrator/iteration-stages.js";
|
|
31
31
|
import { runTesterStage, runSecurityStage } from "./orchestrator/post-loop-stages.js";
|
|
32
32
|
import { waitForCooldown, MAX_STANDBY_RETRIES } from "./orchestrator/standby.js";
|
|
@@ -44,7 +44,9 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
44
44
|
let testerEnabled = Boolean(config.pipeline?.tester?.enabled);
|
|
45
45
|
let securityEnabled = Boolean(config.pipeline?.security?.enabled);
|
|
46
46
|
let reviewerEnabled = config.pipeline?.reviewer?.enabled !== false;
|
|
47
|
-
|
|
47
|
+
let discoverEnabled = Boolean(config.pipeline?.discover?.enabled);
|
|
48
|
+
// Triage is always mandatory — it classifies taskType for policy resolution
|
|
49
|
+
const triageEnabled = true;
|
|
48
50
|
|
|
49
51
|
// --- Dry-run: return summary without executing anything ---
|
|
50
52
|
if (flags.dryRun) {
|
|
@@ -69,6 +71,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
69
71
|
refactorer: refactorerRole
|
|
70
72
|
},
|
|
71
73
|
pipeline: {
|
|
74
|
+
discover_enabled: discoverEnabled,
|
|
72
75
|
triage_enabled: triageEnabled,
|
|
73
76
|
planner_enabled: plannerEnabled,
|
|
74
77
|
refactorer_enabled: refactorerEnabled,
|
|
@@ -212,6 +215,13 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
212
215
|
const stageResults = {};
|
|
213
216
|
const sonarState = { issuesInitial: null, issuesFinal: null };
|
|
214
217
|
|
|
218
|
+
// --- Discover (pre-triage, opt-in) ---
|
|
219
|
+
if (flags.enableDiscover !== undefined) discoverEnabled = Boolean(flags.enableDiscover);
|
|
220
|
+
if (discoverEnabled) {
|
|
221
|
+
const discoverResult = await runDiscoverStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
|
|
222
|
+
stageResults.discover = discoverResult.stageResult;
|
|
223
|
+
}
|
|
224
|
+
|
|
215
225
|
if (triageEnabled) {
|
|
216
226
|
const triageResult = await runTriageStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
|
|
217
227
|
if (triageResult.roleOverrides.plannerEnabled !== undefined) plannerEnabled = triageResult.roleOverrides.plannerEnabled;
|
|
@@ -282,8 +292,9 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
|
|
|
282
292
|
if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
|
|
283
293
|
|
|
284
294
|
// --- Policy resolver: gate stages by taskType ---
|
|
295
|
+
// Priority: explicit flag > config > triage classification > default (sw)
|
|
285
296
|
const resolvedPolicies = applyPolicies({
|
|
286
|
-
taskType: flags.taskType || config.taskType || null,
|
|
297
|
+
taskType: flags.taskType || config.taskType || stageResults.triage?.taskType || null,
|
|
287
298
|
policies: config.policies,
|
|
288
299
|
});
|
|
289
300
|
session.resolved_policies = resolvedPolicies;
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
const SUBAGENT_PREAMBLE = [
|
|
2
|
+
"IMPORTANT: You are running as a Karajan sub-agent.",
|
|
3
|
+
"Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
|
|
4
|
+
"Do NOT use any MCP tools. Focus only on discovering gaps in the task specification."
|
|
5
|
+
].join(" ");
|
|
6
|
+
|
|
7
|
+
export const DISCOVER_MODES = ["gaps", "momtest", "wendel", "classify", "jtbd"];
|
|
8
|
+
|
|
9
|
+
const VALID_VERDICTS = ["ready", "needs_validation"];
|
|
10
|
+
const VALID_SEVERITIES = ["critical", "major", "minor"];
|
|
11
|
+
const VALID_WENDEL_STATUSES = ["pass", "fail", "unknown", "not_applicable"];
|
|
12
|
+
const VALID_CLASSIFY_TYPES = ["START", "STOP", "DIFFERENT", "not_applicable"];
|
|
13
|
+
const VALID_ADOPTION_RISKS = ["none", "low", "medium", "high"];
|
|
14
|
+
|
|
15
|
+
export function buildDiscoverPrompt({ task, instructions, mode = "gaps", context = null }) {
|
|
16
|
+
const sections = [SUBAGENT_PREAMBLE];
|
|
17
|
+
|
|
18
|
+
if (instructions) {
|
|
19
|
+
sections.push(instructions);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
sections.push(
|
|
23
|
+
"You are a task discovery agent for Karajan Code, a multi-agent coding orchestrator.",
|
|
24
|
+
"Analyze the following task and identify gaps, ambiguities, missing information, and implicit assumptions."
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
sections.push(
|
|
28
|
+
"## Gap Detection Guidelines",
|
|
29
|
+
[
|
|
30
|
+
"- Look for missing acceptance criteria or requirements",
|
|
31
|
+
"- Identify implicit assumptions that need explicit confirmation",
|
|
32
|
+
"- Find ambiguities where multiple interpretations exist",
|
|
33
|
+
"- Check for contradictions between different parts of the spec",
|
|
34
|
+
"- Consider edge cases and error scenarios not addressed",
|
|
35
|
+
"- Classify each gap by severity: critical (blocks implementation), major (could cause rework), minor (reasonable default exists)"
|
|
36
|
+
].join("\n")
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
if (mode === "momtest") {
|
|
40
|
+
sections.push(
|
|
41
|
+
"## Mom Test Rules",
|
|
42
|
+
[
|
|
43
|
+
"For each gap, generate questions that follow The Mom Test principles:",
|
|
44
|
+
"- ALWAYS ask about past behavior and real experiences, never hypothetical scenarios",
|
|
45
|
+
"- NEVER ask 'Would you...?', 'Do you think...?', 'Would it be useful if...?'",
|
|
46
|
+
"- ALWAYS ask 'When was the last time...?', 'How do you currently...?', 'What happened when...?'",
|
|
47
|
+
"- Ask about specifics, not generalities",
|
|
48
|
+
"- Each question must have a targetRole (who to ask) and rationale (why this matters)",
|
|
49
|
+
"",
|
|
50
|
+
"Examples of BAD questions (hypothetical/opinion):",
|
|
51
|
+
" - 'Would you use this feature?' -> opinion, not data",
|
|
52
|
+
" - 'Do you think users need this?' -> speculation",
|
|
53
|
+
"",
|
|
54
|
+
"Examples of GOOD questions (past behavior):",
|
|
55
|
+
" - 'When was the last time you had to do X manually?' -> real experience",
|
|
56
|
+
" - 'How are you currently handling Y?' -> current behavior",
|
|
57
|
+
" - 'What happened the last time Z failed?' -> real consequence"
|
|
58
|
+
].join("\n")
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (mode === "wendel") {
|
|
63
|
+
sections.push(
|
|
64
|
+
"## Wendel Behavior Change Checklist",
|
|
65
|
+
[
|
|
66
|
+
"Evaluate whether the task implies a user behavior change. If it does, assess these 5 conditions:",
|
|
67
|
+
"",
|
|
68
|
+
"1. **CUE** — Is there a clear trigger that will prompt the user to take the new action?",
|
|
69
|
+
"2. **REACTION** — Will the user have a positive emotional reaction when they encounter the cue?",
|
|
70
|
+
"3. **EVALUATION** — Can the user quickly understand the value of the new behavior?",
|
|
71
|
+
"4. **ABILITY** — Does the user have the skill and resources to perform the new behavior?",
|
|
72
|
+
"5. **TIMING** — Is this the right moment to introduce this change?",
|
|
73
|
+
"",
|
|
74
|
+
"For each condition, set status to: pass, fail, unknown, or not_applicable",
|
|
75
|
+
"If the task does NOT imply behavior change (e.g., internal refactor, backend optimization), set ALL conditions to 'not_applicable'",
|
|
76
|
+
"If ANY condition is 'fail', set verdict to 'needs_validation'"
|
|
77
|
+
].join("\n")
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (mode === "classify") {
|
|
82
|
+
sections.push(
|
|
83
|
+
"## Behavior Change Classification",
|
|
84
|
+
[
|
|
85
|
+
"Classify the task by its impact on user behavior:",
|
|
86
|
+
"",
|
|
87
|
+
"- **START**: User must adopt a completely new behavior or workflow",
|
|
88
|
+
"- **STOP**: User must stop doing something they currently do (highest resistance risk)",
|
|
89
|
+
"- **DIFFERENT**: User must do something they already do, but differently",
|
|
90
|
+
"- **not_applicable**: Task has no user behavior impact (internal refactor, backend, infra)",
|
|
91
|
+
"",
|
|
92
|
+
"Assess adoption risk: none (no user impact), low, medium, high",
|
|
93
|
+
"STOP changes carry the highest risk of resistance — always flag them",
|
|
94
|
+
"Provide a frictionEstimate explaining the expected friction"
|
|
95
|
+
].join("\n")
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (mode === "jtbd") {
|
|
100
|
+
sections.push(
|
|
101
|
+
"## Jobs-to-be-Done Framework",
|
|
102
|
+
[
|
|
103
|
+
"Generate reinforced Jobs-to-be-Done from the task and any provided context (interview notes, field observations).",
|
|
104
|
+
"Each JTBD must include 5 layers:",
|
|
105
|
+
"",
|
|
106
|
+
"- **functional**: The practical job the user is trying to accomplish",
|
|
107
|
+
"- **emotionalPersonal**: How the user wants to feel personally",
|
|
108
|
+
"- **emotionalSocial**: How the user wants to be perceived by others",
|
|
109
|
+
"- **behaviorChange**: Type of change: START, STOP, DIFFERENT, or not_applicable",
|
|
110
|
+
"- **evidence**: Direct quotes or specific references from the context. If no context provided, set to 'not_available' and suggest what context is needed",
|
|
111
|
+
"",
|
|
112
|
+
"CRITICAL: evidence must contain real quotes or references from the provided context, NEVER invented assumptions",
|
|
113
|
+
"If no context is provided, mark evidence as 'not_available'"
|
|
114
|
+
].join("\n")
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const baseSchema = '{"verdict":"ready|needs_validation","gaps":[{"id":string,"description":string,"severity":"critical|major|minor","suggestedQuestion":string}]';
|
|
119
|
+
const momtestSchema = mode === "momtest"
|
|
120
|
+
? ',"momTestQuestions":[{"gapId":string,"question":string,"targetRole":string,"rationale":string}]'
|
|
121
|
+
: "";
|
|
122
|
+
const wendelSchema = mode === "wendel"
|
|
123
|
+
? ',"wendelChecklist":[{"condition":"CUE|REACTION|EVALUATION|ABILITY|TIMING","status":"pass|fail|unknown|not_applicable","justification":string}]'
|
|
124
|
+
: "";
|
|
125
|
+
const classifySchema = mode === "classify"
|
|
126
|
+
? ',"classification":{"type":"START|STOP|DIFFERENT|not_applicable","adoptionRisk":"none|low|medium|high","frictionEstimate":string}'
|
|
127
|
+
: "";
|
|
128
|
+
const jtbdSchema = mode === "jtbd"
|
|
129
|
+
? ',"jtbds":[{"id":string,"functional":string,"emotionalPersonal":string,"emotionalSocial":string,"behaviorChange":"START|STOP|DIFFERENT|not_applicable","evidence":string}]'
|
|
130
|
+
: "";
|
|
131
|
+
|
|
132
|
+
sections.push(
|
|
133
|
+
"Return a single valid JSON object and nothing else.",
|
|
134
|
+
`JSON schema: ${baseSchema}${momtestSchema}${wendelSchema}${classifySchema}${jtbdSchema},"summary":string}`
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
if (context) {
|
|
138
|
+
sections.push(`## Context\n${context}`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
sections.push(`## Task\n${task}`);
|
|
142
|
+
|
|
143
|
+
return sections.join("\n\n");
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export function parseDiscoverOutput(raw) {
|
|
147
|
+
const text = raw?.trim() || "";
|
|
148
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
149
|
+
if (!jsonMatch) return null;
|
|
150
|
+
|
|
151
|
+
let parsed;
|
|
152
|
+
try {
|
|
153
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
154
|
+
} catch {
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const verdict = VALID_VERDICTS.includes(parsed.verdict) ? parsed.verdict : "ready";
|
|
159
|
+
|
|
160
|
+
const rawGaps = Array.isArray(parsed.gaps) ? parsed.gaps : [];
|
|
161
|
+
const gaps = rawGaps
|
|
162
|
+
.filter((g) => g && g.id && g.description && g.suggestedQuestion)
|
|
163
|
+
.map((g) => ({
|
|
164
|
+
id: g.id,
|
|
165
|
+
description: g.description,
|
|
166
|
+
severity: VALID_SEVERITIES.includes(String(g.severity).toLowerCase())
|
|
167
|
+
? String(g.severity).toLowerCase()
|
|
168
|
+
: "major",
|
|
169
|
+
suggestedQuestion: g.suggestedQuestion
|
|
170
|
+
}));
|
|
171
|
+
|
|
172
|
+
const rawQuestions = Array.isArray(parsed.momTestQuestions) ? parsed.momTestQuestions : [];
|
|
173
|
+
const momTestQuestions = rawQuestions
|
|
174
|
+
.filter((q) => q && q.gapId && q.question && q.targetRole && q.rationale)
|
|
175
|
+
.map((q) => ({
|
|
176
|
+
gapId: q.gapId,
|
|
177
|
+
question: q.question,
|
|
178
|
+
targetRole: q.targetRole,
|
|
179
|
+
rationale: q.rationale
|
|
180
|
+
}));
|
|
181
|
+
|
|
182
|
+
const rawChecklist = Array.isArray(parsed.wendelChecklist) ? parsed.wendelChecklist : [];
|
|
183
|
+
const wendelChecklist = rawChecklist
|
|
184
|
+
.filter((c) => c && c.condition && c.justification && c.status)
|
|
185
|
+
.map((c) => ({
|
|
186
|
+
condition: c.condition,
|
|
187
|
+
status: VALID_WENDEL_STATUSES.includes(String(c.status).toLowerCase())
|
|
188
|
+
? String(c.status).toLowerCase()
|
|
189
|
+
: "unknown",
|
|
190
|
+
justification: c.justification
|
|
191
|
+
}));
|
|
192
|
+
|
|
193
|
+
const rawJtbds = Array.isArray(parsed.jtbds) ? parsed.jtbds : [];
|
|
194
|
+
const jtbds = rawJtbds
|
|
195
|
+
.filter((j) => j && j.id && j.functional && j.emotionalPersonal && j.emotionalSocial && j.behaviorChange && j.evidence)
|
|
196
|
+
.map((j) => ({
|
|
197
|
+
id: j.id,
|
|
198
|
+
functional: j.functional,
|
|
199
|
+
emotionalPersonal: j.emotionalPersonal,
|
|
200
|
+
emotionalSocial: j.emotionalSocial,
|
|
201
|
+
behaviorChange: j.behaviorChange,
|
|
202
|
+
evidence: j.evidence
|
|
203
|
+
}));
|
|
204
|
+
|
|
205
|
+
let classification = null;
|
|
206
|
+
if (parsed.classification && typeof parsed.classification === "object") {
|
|
207
|
+
const rawType = String(parsed.classification.type || "").toUpperCase();
|
|
208
|
+
const type = rawType === "NOT_APPLICABLE" ? "not_applicable"
|
|
209
|
+
: VALID_CLASSIFY_TYPES.includes(rawType) ? rawType : "not_applicable";
|
|
210
|
+
const rawRisk = String(parsed.classification.adoptionRisk || "").toLowerCase();
|
|
211
|
+
classification = {
|
|
212
|
+
type,
|
|
213
|
+
adoptionRisk: VALID_ADOPTION_RISKS.includes(rawRisk) ? rawRisk : "medium",
|
|
214
|
+
frictionEstimate: parsed.classification.frictionEstimate || ""
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
verdict,
|
|
220
|
+
gaps,
|
|
221
|
+
momTestQuestions,
|
|
222
|
+
wendelChecklist,
|
|
223
|
+
classification,
|
|
224
|
+
jtbds,
|
|
225
|
+
summary: parsed.summary || ""
|
|
226
|
+
};
|
|
227
|
+
}
|
package/src/prompts/triage.js
CHANGED
|
@@ -47,10 +47,10 @@ export function buildTriagePrompt({ task, instructions, availableRoles }) {
|
|
|
47
47
|
);
|
|
48
48
|
|
|
49
49
|
sections.push(
|
|
50
|
-
"Classify the task complexity, recommend only the necessary pipeline roles, and assess whether the task should be decomposed into smaller subtasks.",
|
|
50
|
+
"Classify the task complexity, determine its taskType, recommend only the necessary pipeline roles, and assess whether the task should be decomposed into smaller subtasks.",
|
|
51
51
|
"Keep the reasoning short and practical.",
|
|
52
52
|
"Return a single valid JSON object and nothing else.",
|
|
53
|
-
'JSON schema: {"level":"trivial|simple|medium|complex","roles":["planner|researcher|refactorer|reviewer|tester|security"],"reasoning":string,"shouldDecompose":boolean,"subtasks":string[]}'
|
|
53
|
+
'JSON schema: {"level":"trivial|simple|medium|complex","roles":["planner|researcher|refactorer|reviewer|tester|security"],"taskType":"sw|infra|doc|add-tests|refactor","reasoning":string,"shouldDecompose":boolean,"subtasks":string[]}'
|
|
54
54
|
);
|
|
55
55
|
|
|
56
56
|
sections.push(`## Task\n${task}`);
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { BaseRole } from "./base-role.js";
|
|
2
|
+
import { createAgent as defaultCreateAgent } from "../agents/index.js";
|
|
3
|
+
import { buildDiscoverPrompt, parseDiscoverOutput } from "../prompts/discover.js";
|
|
4
|
+
|
|
5
|
+
function resolveProvider(config) {
|
|
6
|
+
return (
|
|
7
|
+
config?.roles?.discover?.provider ||
|
|
8
|
+
config?.roles?.coder?.provider ||
|
|
9
|
+
"claude"
|
|
10
|
+
);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function buildSummary(parsed, mode) {
|
|
14
|
+
const gapCount = parsed.gaps?.length || 0;
|
|
15
|
+
if (gapCount === 0 && mode !== "wendel" && mode !== "jtbd") return "Discovery complete: task is ready";
|
|
16
|
+
const parts = [];
|
|
17
|
+
if (gapCount > 0) parts.push(`${gapCount} gap${gapCount !== 1 ? "s" : ""} found`);
|
|
18
|
+
if (mode === "momtest") {
|
|
19
|
+
const qCount = parsed.momTestQuestions?.length || 0;
|
|
20
|
+
if (qCount > 0) parts.push(`${qCount} Mom Test question${qCount !== 1 ? "s" : ""}`);
|
|
21
|
+
}
|
|
22
|
+
if (mode === "wendel") {
|
|
23
|
+
const failCount = (parsed.wendelChecklist || []).filter(c => c.status === "fail").length;
|
|
24
|
+
if (failCount > 0) parts.push(`${failCount} Wendel condition${failCount !== 1 ? "s" : ""} failed`);
|
|
25
|
+
else if (gapCount === 0) return "Discovery complete: task is ready";
|
|
26
|
+
}
|
|
27
|
+
if (mode === "classify" && parsed.classification) {
|
|
28
|
+
parts.push(`type: ${parsed.classification.type}, risk: ${parsed.classification.adoptionRisk}`);
|
|
29
|
+
}
|
|
30
|
+
if (mode === "jtbd") {
|
|
31
|
+
const jCount = parsed.jtbds?.length || 0;
|
|
32
|
+
if (jCount > 0) parts.push(`${jCount} JTBD${jCount !== 1 ? "s" : ""} generated`);
|
|
33
|
+
else if (gapCount === 0) return "Discovery complete: task is ready";
|
|
34
|
+
}
|
|
35
|
+
return `Discovery complete: ${parts.join(", ")} (verdict: ${parsed.verdict})`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export class DiscoverRole extends BaseRole {
|
|
39
|
+
constructor({ config, logger, emitter = null, createAgentFn = null }) {
|
|
40
|
+
super({ name: "discover", config, logger, emitter });
|
|
41
|
+
this._createAgent = createAgentFn || defaultCreateAgent;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async execute(input) {
|
|
45
|
+
const task = typeof input === "string"
|
|
46
|
+
? input
|
|
47
|
+
: input?.task || this.context?.task || "";
|
|
48
|
+
const onOutput = typeof input === "string" ? null : input?.onOutput || null;
|
|
49
|
+
const mode = (typeof input === "object" ? input?.mode : null) || "gaps";
|
|
50
|
+
const context = typeof input === "object" ? input?.context || null : null;
|
|
51
|
+
|
|
52
|
+
const provider = resolveProvider(this.config);
|
|
53
|
+
const agent = this._createAgent(provider, this.config, this.logger);
|
|
54
|
+
|
|
55
|
+
const prompt = buildDiscoverPrompt({ task, instructions: this.instructions, mode, context });
|
|
56
|
+
const runArgs = { prompt, role: "discover" };
|
|
57
|
+
if (onOutput) runArgs.onOutput = onOutput;
|
|
58
|
+
const result = await agent.runTask(runArgs);
|
|
59
|
+
|
|
60
|
+
if (!result.ok) {
|
|
61
|
+
return {
|
|
62
|
+
ok: false,
|
|
63
|
+
result: {
|
|
64
|
+
error: result.error || result.output || "Discovery failed",
|
|
65
|
+
provider,
|
|
66
|
+
mode
|
|
67
|
+
},
|
|
68
|
+
summary: `Discovery failed: ${result.error || "unknown error"}`,
|
|
69
|
+
usage: result.usage
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
const parsed = parseDiscoverOutput(result.output);
|
|
75
|
+
if (!parsed) {
|
|
76
|
+
return {
|
|
77
|
+
ok: true,
|
|
78
|
+
result: {
|
|
79
|
+
verdict: "ready",
|
|
80
|
+
gaps: [],
|
|
81
|
+
mode,
|
|
82
|
+
raw: result.output,
|
|
83
|
+
provider
|
|
84
|
+
},
|
|
85
|
+
summary: "Discovery complete (unstructured output)",
|
|
86
|
+
usage: result.usage
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const resultObj = {
|
|
91
|
+
verdict: parsed.verdict,
|
|
92
|
+
gaps: parsed.gaps,
|
|
93
|
+
mode,
|
|
94
|
+
provider
|
|
95
|
+
};
|
|
96
|
+
if (mode === "momtest") {
|
|
97
|
+
resultObj.momTestQuestions = parsed.momTestQuestions || [];
|
|
98
|
+
}
|
|
99
|
+
if (mode === "wendel") {
|
|
100
|
+
resultObj.wendelChecklist = parsed.wendelChecklist || [];
|
|
101
|
+
}
|
|
102
|
+
if (mode === "classify") {
|
|
103
|
+
resultObj.classification = parsed.classification || null;
|
|
104
|
+
}
|
|
105
|
+
if (mode === "jtbd") {
|
|
106
|
+
resultObj.jtbds = parsed.jtbds || [];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
ok: true,
|
|
111
|
+
result: resultObj,
|
|
112
|
+
summary: buildSummary(parsed, mode),
|
|
113
|
+
usage: result.usage
|
|
114
|
+
};
|
|
115
|
+
} catch {
|
|
116
|
+
return {
|
|
117
|
+
ok: true,
|
|
118
|
+
result: {
|
|
119
|
+
verdict: "ready",
|
|
120
|
+
gaps: [],
|
|
121
|
+
mode,
|
|
122
|
+
raw: result.output,
|
|
123
|
+
provider
|
|
124
|
+
},
|
|
125
|
+
summary: "Discovery complete (unstructured output)",
|
|
126
|
+
usage: result.usage
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
package/src/roles/index.js
CHANGED
package/src/roles/triage-role.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { BaseRole } from "./base-role.js";
|
|
2
2
|
import { createAgent as defaultCreateAgent } from "../agents/index.js";
|
|
3
3
|
import { buildTriagePrompt } from "../prompts/triage.js";
|
|
4
|
+
import { VALID_TASK_TYPES } from "../guards/policy-resolver.js";
|
|
4
5
|
|
|
5
6
|
const VALID_LEVELS = new Set(["trivial", "simple", "medium", "complex"]);
|
|
6
7
|
const VALID_ROLES = new Set(["planner", "researcher", "refactorer", "reviewer", "tester", "security"]);
|
|
8
|
+
const FALLBACK_TASK_TYPE = "sw";
|
|
7
9
|
|
|
8
10
|
function resolveProvider(config) {
|
|
9
11
|
return (
|
|
@@ -74,6 +76,7 @@ export class TriageRole extends BaseRole {
|
|
|
74
76
|
level: "medium",
|
|
75
77
|
roles: ["reviewer"],
|
|
76
78
|
reasoning: "Unstructured output, using safe defaults.",
|
|
79
|
+
taskType: FALLBACK_TASK_TYPE,
|
|
77
80
|
provider,
|
|
78
81
|
raw: result.output
|
|
79
82
|
},
|
|
@@ -87,11 +90,13 @@ export class TriageRole extends BaseRole {
|
|
|
87
90
|
const reasoning = String(parsed.reasoning || "").trim() || "No reasoning provided.";
|
|
88
91
|
const shouldDecompose = Boolean(parsed.shouldDecompose);
|
|
89
92
|
const subtasks = normalizeSubtasks(parsed.subtasks);
|
|
93
|
+
const taskType = VALID_TASK_TYPES.includes(parsed.taskType) ? parsed.taskType : FALLBACK_TASK_TYPE;
|
|
90
94
|
|
|
91
95
|
const triageResult = {
|
|
92
96
|
level,
|
|
93
97
|
roles,
|
|
94
98
|
reasoning,
|
|
99
|
+
taskType,
|
|
95
100
|
provider
|
|
96
101
|
};
|
|
97
102
|
|
|
@@ -116,6 +121,7 @@ export class TriageRole extends BaseRole {
|
|
|
116
121
|
level: "medium",
|
|
117
122
|
roles: ["reviewer"],
|
|
118
123
|
reasoning: "Failed to parse triage output, using safe defaults.",
|
|
124
|
+
taskType: FALLBACK_TASK_TYPE,
|
|
119
125
|
provider,
|
|
120
126
|
raw: result.output
|
|
121
127
|
},
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# Discover Role
|
|
2
|
+
|
|
3
|
+
You are the **Discover** role in a multi-role AI pipeline.
|
|
4
|
+
|
|
5
|
+
Your job is to analyze a task description, ticket, or brief and identify **gaps** — missing information, implicit assumptions, ambiguities, and contradictions that could cause unnecessary iterations during implementation.
|
|
6
|
+
|
|
7
|
+
## Responsibilities
|
|
8
|
+
|
|
9
|
+
- Detect missing requirements or acceptance criteria
|
|
10
|
+
- Identify implicit assumptions that need explicit confirmation
|
|
11
|
+
- Find ambiguities where multiple interpretations are possible
|
|
12
|
+
- Spot contradictions between different parts of the specification
|
|
13
|
+
- Suggest specific questions that would resolve each gap
|
|
14
|
+
|
|
15
|
+
## Severity Classification
|
|
16
|
+
|
|
17
|
+
- **critical**: Blocks implementation entirely — cannot proceed without this information
|
|
18
|
+
- **major**: Could lead to significant rework if assumed incorrectly
|
|
19
|
+
- **minor**: Nice to clarify but a reasonable default exists
|
|
20
|
+
|
|
21
|
+
## Verdict
|
|
22
|
+
|
|
23
|
+
- **ready**: The task is well-defined and can proceed to implementation without further clarification
|
|
24
|
+
- **needs_validation**: One or more gaps were found that should be resolved before implementation
|
|
25
|
+
|
|
26
|
+
## Output format
|
|
27
|
+
|
|
28
|
+
Return a single valid JSON object and nothing else.
|
|
29
|
+
|
|
30
|
+
```json
|
|
31
|
+
{
|
|
32
|
+
"verdict": "ready|needs_validation",
|
|
33
|
+
"gaps": [
|
|
34
|
+
{
|
|
35
|
+
"id": "gap-1",
|
|
36
|
+
"description": "What information is missing or ambiguous",
|
|
37
|
+
"severity": "critical|major|minor",
|
|
38
|
+
"suggestedQuestion": "A specific question to resolve this gap"
|
|
39
|
+
}
|
|
40
|
+
],
|
|
41
|
+
"summary": "Brief human-readable summary of findings"
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
If the task is well-defined with no gaps, return `verdict: "ready"` with an empty `gaps` array.
|
|
46
|
+
|
|
47
|
+
## Mom Test Mode
|
|
48
|
+
|
|
49
|
+
When running in **momtest** mode, for each gap generate questions following The Mom Test principles:
|
|
50
|
+
|
|
51
|
+
- Ask about **past behavior** and real experiences, never hypothetical scenarios
|
|
52
|
+
- Ask about **specifics**, not generalities
|
|
53
|
+
- Focus on what people **actually do**, not what they say they would do
|
|
54
|
+
|
|
55
|
+
### Good vs Bad Questions
|
|
56
|
+
|
|
57
|
+
| Bad (hypothetical/opinion) | Good (past behavior) |
|
|
58
|
+
|---|---|
|
|
59
|
+
| "Would you use a notification system?" | "When was the last time you missed an important update?" |
|
|
60
|
+
| "Do you think users need dark mode?" | "How many support tickets mentioned readability issues?" |
|
|
61
|
+
| "Would it be useful to have X?" | "How are you currently handling X?" |
|
|
62
|
+
|
|
63
|
+
### Mom Test Output Schema (additional fields for momtest mode)
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"momTestQuestions": [
|
|
68
|
+
{
|
|
69
|
+
"gapId": "gap-1",
|
|
70
|
+
"question": "Past-behavior question to validate this gap",
|
|
71
|
+
"targetRole": "Who should answer (end-user, developer, PM, etc.)",
|
|
72
|
+
"rationale": "Why this question matters for the gap"
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Wendel Mode
|
|
79
|
+
|
|
80
|
+
When running in **wendel** mode, evaluate whether the task implies a **user behavior change** and assess 5 adoption conditions:
|
|
81
|
+
|
|
82
|
+
| Condition | Question |
|
|
83
|
+
|-----------|----------|
|
|
84
|
+
| **CUE** | Is there a clear trigger that will prompt the user to take the new action? |
|
|
85
|
+
| **REACTION** | Will the user have a positive emotional reaction when encountering the cue? |
|
|
86
|
+
| **EVALUATION** | Can the user quickly understand the value of the new behavior? |
|
|
87
|
+
| **ABILITY** | Does the user have the skill and resources to perform the new behavior? |
|
|
88
|
+
| **TIMING** | Is this the right moment to introduce this change? |
|
|
89
|
+
|
|
90
|
+
### Status Values
|
|
91
|
+
|
|
92
|
+
- **pass**: Condition is clearly met based on the task specification
|
|
93
|
+
- **fail**: Condition is NOT met — adoption risk identified
|
|
94
|
+
- **unknown**: Not enough information to evaluate
|
|
95
|
+
- **not_applicable**: Task does not imply user behavior change (e.g., refactor, backend optimization)
|
|
96
|
+
|
|
97
|
+
If the task does NOT imply behavior change, set ALL conditions to `not_applicable` and verdict to `ready`.
|
|
98
|
+
|
|
99
|
+
### Wendel Output Schema (additional fields for wendel mode)
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"wendelChecklist": [
|
|
104
|
+
{
|
|
105
|
+
"condition": "CUE|REACTION|EVALUATION|ABILITY|TIMING",
|
|
106
|
+
"status": "pass|fail|unknown|not_applicable",
|
|
107
|
+
"justification": "Why this condition passes or fails"
|
|
108
|
+
}
|
|
109
|
+
]
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Classify Mode
|
|
114
|
+
|
|
115
|
+
When running in **classify** mode, classify the task by its impact on user behavior:
|
|
116
|
+
|
|
117
|
+
| Type | Description | Risk Level |
|
|
118
|
+
|------|-------------|------------|
|
|
119
|
+
| **START** | User must adopt a completely new behavior or workflow | Medium-High |
|
|
120
|
+
| **STOP** | User must stop doing something they currently do | **Highest** resistance risk |
|
|
121
|
+
| **DIFFERENT** | User must do something they already do, but differently | Low-Medium |
|
|
122
|
+
| **not_applicable** | No user behavior impact (internal refactor, backend, infra) | None |
|
|
123
|
+
|
|
124
|
+
### Classify Output Schema (additional fields for classify mode)
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"classification": {
|
|
129
|
+
"type": "START|STOP|DIFFERENT|not_applicable",
|
|
130
|
+
"adoptionRisk": "none|low|medium|high",
|
|
131
|
+
"frictionEstimate": "Description of expected friction"
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## JTBD Mode
|
|
137
|
+
|
|
138
|
+
When running in **jtbd** mode, generate reinforced Jobs-to-be-Done from the task and provided context (interview notes, field observations).
|
|
139
|
+
|
|
140
|
+
Each JTBD must include 5 layers:
|
|
141
|
+
|
|
142
|
+
| Layer | Description |
|
|
143
|
+
|-------|-------------|
|
|
144
|
+
| **functional** | The practical job the user is trying to accomplish |
|
|
145
|
+
| **emotionalPersonal** | How the user wants to feel personally |
|
|
146
|
+
| **emotionalSocial** | How the user wants to be perceived by others |
|
|
147
|
+
| **behaviorChange** | Type of change: START, STOP, DIFFERENT, or not_applicable |
|
|
148
|
+
| **evidence** | Direct quotes or references from context. Set to `not_available` if no context provided |
|
|
149
|
+
|
|
150
|
+
**CRITICAL**: The `evidence` field must contain real quotes or specific references from the provided context. Never invent assumptions.
|
|
151
|
+
|
|
152
|
+
### JTBD Output Schema (additional fields for jtbd mode)
|
|
153
|
+
|
|
154
|
+
```json
|
|
155
|
+
{
|
|
156
|
+
"jtbds": [
|
|
157
|
+
{
|
|
158
|
+
"id": "jtbd-1",
|
|
159
|
+
"functional": "The practical job",
|
|
160
|
+
"emotionalPersonal": "How the user wants to feel",
|
|
161
|
+
"emotionalSocial": "How the user wants to be perceived",
|
|
162
|
+
"behaviorChange": "START|STOP|DIFFERENT|not_applicable",
|
|
163
|
+
"evidence": "Direct quote or 'not_available'"
|
|
164
|
+
}
|
|
165
|
+
]
|
|
166
|
+
}
|
|
167
|
+
```
|
|
@@ -8,6 +8,7 @@ Return a single valid JSON object and nothing else:
|
|
|
8
8
|
```json
|
|
9
9
|
{
|
|
10
10
|
"level": "trivial|simple|medium|complex",
|
|
11
|
+
"taskType": "sw|infra|doc|add-tests|refactor",
|
|
11
12
|
"roles": ["planner", "researcher", "refactorer", "reviewer", "tester", "security"],
|
|
12
13
|
"reasoning": "brief practical justification",
|
|
13
14
|
"shouldDecompose": false,
|
|
@@ -15,7 +16,14 @@ Return a single valid JSON object and nothing else:
|
|
|
15
16
|
}
|
|
16
17
|
```
|
|
17
18
|
|
|
18
|
-
##
|
|
19
|
+
## Task type classification
|
|
20
|
+
- `sw`: writing or modifying business logic, features, APIs, components, services.
|
|
21
|
+
- `infra`: CI/CD, Docker, deploy scripts, build configuration, environment setup.
|
|
22
|
+
- `doc`: documentation, README, CHANGELOG, comments-only changes.
|
|
23
|
+
- `add-tests`: adding tests to existing code without changing functionality.
|
|
24
|
+
- `refactor`: restructuring code without changing external behavior.
|
|
25
|
+
|
|
26
|
+
## Complexity classification
|
|
19
27
|
- `trivial`: tiny, low-risk, straightforward. Usually no extra roles.
|
|
20
28
|
- `simple`: limited scope with low risk. Usually reviewer only.
|
|
21
29
|
- `medium`: moderate scope/risk. Reviewer required; optional planner/researcher.
|