karajan-code 1.15.0 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.15.0",
3
+ "version": "1.16.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
package/src/cli.js CHANGED
@@ -72,6 +72,7 @@ program
72
72
  .option("--enable-tester")
73
73
  .option("--enable-security")
74
74
  .option("--enable-triage")
75
+ .option("--enable-discover")
75
76
  .option("--enable-serena")
76
77
  .option("--mode <name>")
77
78
  .option("--max-iterations <n>")
package/src/config.js CHANGED
@@ -16,7 +16,8 @@ const DEFAULTS = {
16
16
  researcher: { provider: null, model: null },
17
17
  tester: { provider: null, model: null },
18
18
  security: { provider: null, model: null },
19
- triage: { provider: null, model: null }
19
+ triage: { provider: null, model: null },
20
+ discover: { provider: null, model: null }
20
21
  },
21
22
  pipeline: {
22
23
  planner: { enabled: false },
@@ -25,7 +26,8 @@ const DEFAULTS = {
25
26
  researcher: { enabled: false },
26
27
  tester: { enabled: true },
27
28
  security: { enabled: true },
28
- triage: { enabled: true }
29
+ triage: { enabled: true },
30
+ discover: { enabled: false }
29
31
  },
30
32
  review_mode: "standard",
31
33
  max_iterations: 5,
@@ -245,6 +247,9 @@ export function applyRunOverrides(config, flags) {
245
247
  if (flags.tester) out.roles.tester.provider = flags.tester;
246
248
  if (flags.security) out.roles.security.provider = flags.security;
247
249
  if (flags.triage) out.roles.triage.provider = flags.triage;
250
+ if (flags.discover) out.roles.discover.provider = flags.discover;
251
+ if (flags.discoverModel) out.roles.discover.model = String(flags.discoverModel);
252
+ if (flags.enableDiscover !== undefined) out.pipeline.discover.enabled = Boolean(flags.enableDiscover);
248
253
  if (flags.plannerModel) out.roles.planner.model = String(flags.plannerModel);
249
254
  if (flags.coderModel) {
250
255
  out.roles.coder.model = String(flags.coderModel);
@@ -318,14 +323,14 @@ export function resolveRole(config, role) {
318
323
  let provider = roleConfig.provider ?? null;
319
324
  if (!provider && role === "coder") provider = legacyCoder;
320
325
  if (!provider && role === "reviewer") provider = legacyReviewer;
321
- if (!provider && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage")) {
326
+ if (!provider && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage" || role === "discover")) {
322
327
  provider = roles.coder?.provider || legacyCoder;
323
328
  }
324
329
 
325
330
  let model = roleConfig.model ?? null;
326
331
  if (!model && role === "coder") model = config?.coder_options?.model ?? null;
327
332
  if (!model && role === "reviewer") model = config?.reviewer_options?.model ?? null;
328
- if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage")) {
333
+ if (!model && (role === "planner" || role === "refactorer" || role === "solomon" || role === "researcher" || role === "tester" || role === "security" || role === "triage" || role === "discover")) {
329
334
  model = config?.coder_options?.model ?? null;
330
335
  }
331
336
 
@@ -344,6 +349,7 @@ function requiredRolesFor(commandName, config) {
344
349
  if (config?.pipeline?.security?.enabled) required.push("security");
345
350
  return required;
346
351
  }
352
+ if (commandName === "discover") return ["discover"];
347
353
  if (commandName === "plan") return ["planner"];
348
354
  if (commandName === "code") return ["coder"];
349
355
  if (commandName === "review") return ["reviewer"];
package/src/mcp/run-kj.js CHANGED
@@ -43,6 +43,7 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
43
43
  normalizeBoolFlag(options.enableTester, "--enable-tester", args);
44
44
  normalizeBoolFlag(options.enableSecurity, "--enable-security", args);
45
45
  normalizeBoolFlag(options.enableTriage, "--enable-triage", args);
46
+ normalizeBoolFlag(options.enableDiscover, "--enable-discover", args);
46
47
  normalizeBoolFlag(options.enableSerena, "--enable-serena", args);
47
48
  normalizeBoolFlag(options.autoCommit, "--auto-commit", args);
48
49
  normalizeBoolFlag(options.autoPush, "--auto-push", args);
@@ -428,6 +428,59 @@ export async function handleReviewDirect(a, server, extra) {
428
428
  return { ok: true, review: parsed || result.output, raw: result.output };
429
429
  }
430
430
 
431
+ export async function handleDiscoverDirect(a, server, extra) {
432
+ const config = await buildConfig(a, "discover");
433
+ const logger = createLogger(config.output.log_level, "mcp");
434
+
435
+ const discoverRole = resolveRole(config, "discover");
436
+ await assertAgentsAvailable([discoverRole.provider]);
437
+
438
+ const projectDir = await resolveProjectDir(server);
439
+ const runLog = createRunLog(projectDir);
440
+ runLog.logText(`[kj_discover] started — mode=${a.mode || "gaps"}`);
441
+ const emitter = buildDirectEmitter(server, runLog, extra);
442
+ const eventBase = { sessionId: null, iteration: 0, startedAt: Date.now() };
443
+ const onOutput = ({ stream, line }) => {
444
+ emitter.emit("progress", { type: "agent:output", stage: "discover", message: line, detail: { stream, agent: discoverRole.provider } });
445
+ };
446
+ const stallDetector = createStallDetector({
447
+ onOutput, emitter, eventBase, stage: "discover", provider: discoverRole.provider
448
+ });
449
+
450
+ const { DiscoverRole } = await import("../roles/discover-role.js");
451
+ const discover = new DiscoverRole({ config, logger, emitter });
452
+ await discover.init({ task: a.task });
453
+
454
+ // Build context from pgTask if provided
455
+ let context = a.context || null;
456
+ if (a.pgTask && a.pgProject) {
457
+ try {
458
+ const pgContext = `Planning Game card: ${a.pgTask} (project: ${a.pgProject})`;
459
+ context = context ? `${context}\n\n${pgContext}` : pgContext;
460
+ } catch { /* PG not available — proceed without */ }
461
+ }
462
+
463
+ sendTrackerLog(server, "discover", "running", discoverRole.provider);
464
+ runLog.logText(`[discover] agent launched, waiting for response...`);
465
+ let result;
466
+ try {
467
+ result = await discover.run({ task: a.task, mode: a.mode || "gaps", context, onOutput: stallDetector.onOutput });
468
+ } finally {
469
+ stallDetector.stop();
470
+ const stats = stallDetector.stats();
471
+ runLog.logText(`[discover] finished — lines=${stats.lineCount}, bytes=${stats.bytesReceived}, elapsed=${Math.round(stats.elapsedMs / 1000)}s`);
472
+ runLog.close();
473
+ }
474
+
475
+ if (!result.ok) {
476
+ sendTrackerLog(server, "discover", "failed");
477
+ throw new Error(result.result?.error || result.summary || "Discovery failed");
478
+ }
479
+
480
+ sendTrackerLog(server, "discover", "done");
481
+ return { ok: true, ...result.result, summary: result.summary };
482
+ }
483
+
431
484
  export async function handleToolCall(name, args, server, extra) {
432
485
  const a = asObject(args);
433
486
 
@@ -635,5 +688,16 @@ export async function handleToolCall(name, args, server, extra) {
635
688
  return handlePlanDirect(a, server, extra);
636
689
  }
637
690
 
691
+ if (name === "kj_discover") {
692
+ if (!a.task) {
693
+ return failPayload("Missing required field: task");
694
+ }
695
+ const validModes = ["gaps", "momtest", "wendel", "classify", "jtbd"];
696
+ if (a.mode && !validModes.includes(a.mode)) {
697
+ return failPayload(`Invalid mode "${a.mode}". Valid values: ${validModes.join(", ")}`);
698
+ }
699
+ return handleDiscoverDirect(a, server, extra);
700
+ }
701
+
638
702
  return failPayload(`Unknown tool: ${name}`);
639
703
  }
package/src/mcp/tools.js CHANGED
@@ -70,6 +70,7 @@ export const tools = [
70
70
  enableTester: { type: "boolean" },
71
71
  enableSecurity: { type: "boolean" },
72
72
  enableTriage: { type: "boolean" },
73
+ enableDiscover: { type: "boolean" },
73
74
  enableSerena: { type: "boolean" },
74
75
  enableBecaria: { type: "boolean", description: "Enable BecarIA Gateway (early PR + dispatch comments/reviews)" },
75
76
  reviewerFallback: { type: "string" },
@@ -223,5 +224,21 @@ export const tools = [
223
224
  kjHome: { type: "string" }
224
225
  }
225
226
  }
227
+ },
228
+ {
229
+ name: "kj_discover",
230
+ description: "Analyze a task for gaps, ambiguities, and missing information before execution. Returns a verdict (ready/needs_validation) with structured gap list. Can read task details from Planning Game if pgTask is provided.",
231
+ inputSchema: {
232
+ type: "object",
233
+ required: ["task"],
234
+ properties: {
235
+ task: { type: "string", description: "Task description to analyze for gaps" },
236
+ mode: { type: "string", enum: ["gaps", "momtest", "wendel", "classify", "jtbd"], description: "Discovery mode: gaps (default), momtest (Mom Test questions), wendel (behavior change checklist), classify (START/STOP/DIFFERENT), or jtbd (Jobs-to-be-Done)" },
237
+ context: { type: "string", description: "Additional context for the analysis (e.g., research output)" },
238
+ pgTask: { type: "string", description: "Planning Game card ID (e.g., KJC-TSK-0042). If provided, fetches full card details as additional context." },
239
+ pgProject: { type: "string", description: "Planning Game project ID. Required when pgTask is used." },
240
+ kjHome: { type: "string" }
241
+ }
242
+ }
226
243
  }
227
244
  ];
@@ -1,6 +1,7 @@
1
1
  import { TriageRole } from "../roles/triage-role.js";
2
2
  import { ResearcherRole } from "../roles/researcher-role.js";
3
3
  import { PlannerRole } from "../roles/planner-role.js";
4
+ import { DiscoverRole } from "../roles/discover-role.js";
4
5
  import { createAgent } from "../agents/index.js";
5
6
  import { addCheckpoint, markSessionStatus } from "../session-store.js";
6
7
  import { emitProgress, makeEvent } from "../utils/events.js";
@@ -258,3 +259,68 @@ export async function runPlannerStage({ config, logger, emitter, eventBase, sess
258
259
 
259
260
  return { plannedTask, stageResult };
260
261
  }
262
+
263
+ export async function runDiscoverStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget }) {
264
+ logger.setContext({ iteration: 0, stage: "discover" });
265
+ emitProgress(
266
+ emitter,
267
+ makeEvent("discover:start", { ...eventBase, stage: "discover" }, {
268
+ message: "Discover analyzing task for gaps"
269
+ })
270
+ );
271
+
272
+ const discoverProvider = config?.roles?.discover?.provider || coderRole.provider;
273
+ const discoverOnOutput = ({ stream, line }) => {
274
+ emitProgress(emitter, makeEvent("agent:output", { ...eventBase, stage: "discover" }, {
275
+ message: line,
276
+ detail: { stream, agent: discoverProvider }
277
+ }));
278
+ };
279
+ const discoverStall = createStallDetector({
280
+ onOutput: discoverOnOutput, emitter, eventBase, stage: "discover", provider: discoverProvider
281
+ });
282
+
283
+ const mode = config?.pipeline?.discover?.mode || "gaps";
284
+ const discover = new DiscoverRole({ config, logger, emitter });
285
+ await discover.init({ task: session.task, sessionId: session.id, iteration: 0 });
286
+ const discoverStart = Date.now();
287
+ let discoverOutput;
288
+ try {
289
+ discoverOutput = await discover.run({ task: session.task, mode, onOutput: discoverStall.onOutput });
290
+ } finally {
291
+ discoverStall.stop();
292
+ }
293
+ trackBudget({
294
+ role: "discover",
295
+ provider: discoverProvider,
296
+ model: config?.roles?.discover?.model || coderRole.model,
297
+ result: discoverOutput,
298
+ duration_ms: Date.now() - discoverStart
299
+ });
300
+
301
+ await addCheckpoint(session, {
302
+ stage: "discover",
303
+ iteration: 0,
304
+ ok: discoverOutput.ok,
305
+ provider: discoverProvider,
306
+ model: config?.roles?.discover?.model || coderRole.model || null
307
+ });
308
+
309
+ const stageResult = {
310
+ ok: discoverOutput.ok,
311
+ verdict: discoverOutput.result?.verdict || null,
312
+ gaps: discoverOutput.result?.gaps || [],
313
+ mode
314
+ };
315
+
316
+ emitProgress(
317
+ emitter,
318
+ makeEvent("discover:end", { ...eventBase, stage: "discover" }, {
319
+ status: discoverOutput.ok ? "ok" : "fail",
320
+ message: discoverOutput.ok ? "Discovery completed" : `Discovery failed: ${discoverOutput.summary}`,
321
+ detail: stageResult
322
+ })
323
+ );
324
+
325
+ return { stageResult };
326
+ }
@@ -26,7 +26,7 @@ import { applyPolicies } from "./guards/policy-resolver.js";
26
26
  import { resolveReviewProfile } from "./review/profiles.js";
27
27
  import { CoderRole } from "./roles/coder-role.js";
28
28
  import { invokeSolomon } from "./orchestrator/solomon-escalation.js";
29
- import { runTriageStage, runResearcherStage, runPlannerStage } from "./orchestrator/pre-loop-stages.js";
29
+ import { runTriageStage, runResearcherStage, runPlannerStage, runDiscoverStage } from "./orchestrator/pre-loop-stages.js";
30
30
  import { runCoderStage, runRefactorerStage, runTddCheckStage, runSonarStage, runReviewerStage } from "./orchestrator/iteration-stages.js";
31
31
  import { runTesterStage, runSecurityStage } from "./orchestrator/post-loop-stages.js";
32
32
  import { waitForCooldown, MAX_STANDBY_RETRIES } from "./orchestrator/standby.js";
@@ -44,6 +44,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
44
44
  let testerEnabled = Boolean(config.pipeline?.tester?.enabled);
45
45
  let securityEnabled = Boolean(config.pipeline?.security?.enabled);
46
46
  let reviewerEnabled = config.pipeline?.reviewer?.enabled !== false;
47
+ let discoverEnabled = Boolean(config.pipeline?.discover?.enabled);
47
48
  // Triage is always mandatory — it classifies taskType for policy resolution
48
49
  const triageEnabled = true;
49
50
 
@@ -70,6 +71,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
70
71
  refactorer: refactorerRole
71
72
  },
72
73
  pipeline: {
74
+ discover_enabled: discoverEnabled,
73
75
  triage_enabled: triageEnabled,
74
76
  planner_enabled: plannerEnabled,
75
77
  refactorer_enabled: refactorerEnabled,
@@ -213,6 +215,13 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
213
215
  const stageResults = {};
214
216
  const sonarState = { issuesInitial: null, issuesFinal: null };
215
217
 
218
+ // --- Discover (pre-triage, opt-in) ---
219
+ if (flags.enableDiscover !== undefined) discoverEnabled = Boolean(flags.enableDiscover);
220
+ if (discoverEnabled) {
221
+ const discoverResult = await runDiscoverStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
222
+ stageResults.discover = discoverResult.stageResult;
223
+ }
224
+
216
225
  if (triageEnabled) {
217
226
  const triageResult = await runTriageStage({ config, logger, emitter, eventBase, session, coderRole, trackBudget });
218
227
  if (triageResult.roleOverrides.plannerEnabled !== undefined) plannerEnabled = triageResult.roleOverrides.plannerEnabled;
@@ -0,0 +1,227 @@
1
+ const SUBAGENT_PREAMBLE = [
2
+ "IMPORTANT: You are running as a Karajan sub-agent.",
3
+ "Do NOT ask about using Karajan, do NOT mention Karajan, do NOT suggest orchestration.",
4
+ "Do NOT use any MCP tools. Focus only on discovering gaps in the task specification."
5
+ ].join(" ");
6
+
7
+ export const DISCOVER_MODES = ["gaps", "momtest", "wendel", "classify", "jtbd"];
8
+
9
+ const VALID_VERDICTS = ["ready", "needs_validation"];
10
+ const VALID_SEVERITIES = ["critical", "major", "minor"];
11
+ const VALID_WENDEL_STATUSES = ["pass", "fail", "unknown", "not_applicable"];
12
+ const VALID_CLASSIFY_TYPES = ["START", "STOP", "DIFFERENT", "not_applicable"];
13
+ const VALID_ADOPTION_RISKS = ["none", "low", "medium", "high"];
14
+
15
+ export function buildDiscoverPrompt({ task, instructions, mode = "gaps", context = null }) {
16
+ const sections = [SUBAGENT_PREAMBLE];
17
+
18
+ if (instructions) {
19
+ sections.push(instructions);
20
+ }
21
+
22
+ sections.push(
23
+ "You are a task discovery agent for Karajan Code, a multi-agent coding orchestrator.",
24
+ "Analyze the following task and identify gaps, ambiguities, missing information, and implicit assumptions."
25
+ );
26
+
27
+ sections.push(
28
+ "## Gap Detection Guidelines",
29
+ [
30
+ "- Look for missing acceptance criteria or requirements",
31
+ "- Identify implicit assumptions that need explicit confirmation",
32
+ "- Find ambiguities where multiple interpretations exist",
33
+ "- Check for contradictions between different parts of the spec",
34
+ "- Consider edge cases and error scenarios not addressed",
35
+ "- Classify each gap by severity: critical (blocks implementation), major (could cause rework), minor (reasonable default exists)"
36
+ ].join("\n")
37
+ );
38
+
39
+ if (mode === "momtest") {
40
+ sections.push(
41
+ "## Mom Test Rules",
42
+ [
43
+ "For each gap, generate questions that follow The Mom Test principles:",
44
+ "- ALWAYS ask about past behavior and real experiences, never hypothetical scenarios",
45
+ "- NEVER ask 'Would you...?', 'Do you think...?', 'Would it be useful if...?'",
46
+ "- ALWAYS ask 'When was the last time...?', 'How do you currently...?', 'What happened when...?'",
47
+ "- Ask about specifics, not generalities",
48
+ "- Each question must have a targetRole (who to ask) and rationale (why this matters)",
49
+ "",
50
+ "Examples of BAD questions (hypothetical/opinion):",
51
+ " - 'Would you use this feature?' -> opinion, not data",
52
+ " - 'Do you think users need this?' -> speculation",
53
+ "",
54
+ "Examples of GOOD questions (past behavior):",
55
+ " - 'When was the last time you had to do X manually?' -> real experience",
56
+ " - 'How are you currently handling Y?' -> current behavior",
57
+ " - 'What happened the last time Z failed?' -> real consequence"
58
+ ].join("\n")
59
+ );
60
+ }
61
+
62
+ if (mode === "wendel") {
63
+ sections.push(
64
+ "## Wendel Behavior Change Checklist",
65
+ [
66
+ "Evaluate whether the task implies a user behavior change. If it does, assess these 5 conditions:",
67
+ "",
68
+ "1. **CUE** — Is there a clear trigger that will prompt the user to take the new action?",
69
+ "2. **REACTION** — Will the user have a positive emotional reaction when they encounter the cue?",
70
+ "3. **EVALUATION** — Can the user quickly understand the value of the new behavior?",
71
+ "4. **ABILITY** — Does the user have the skill and resources to perform the new behavior?",
72
+ "5. **TIMING** — Is this the right moment to introduce this change?",
73
+ "",
74
+ "For each condition, set status to: pass, fail, unknown, or not_applicable",
75
+ "If the task does NOT imply behavior change (e.g., internal refactor, backend optimization), set ALL conditions to 'not_applicable'",
76
+ "If ANY condition is 'fail', set verdict to 'needs_validation'"
77
+ ].join("\n")
78
+ );
79
+ }
80
+
81
+ if (mode === "classify") {
82
+ sections.push(
83
+ "## Behavior Change Classification",
84
+ [
85
+ "Classify the task by its impact on user behavior:",
86
+ "",
87
+ "- **START**: User must adopt a completely new behavior or workflow",
88
+ "- **STOP**: User must stop doing something they currently do (highest resistance risk)",
89
+ "- **DIFFERENT**: User must do something they already do, but differently",
90
+ "- **not_applicable**: Task has no user behavior impact (internal refactor, backend, infra)",
91
+ "",
92
+ "Assess adoption risk: none (no user impact), low, medium, high",
93
+ "STOP changes carry the highest risk of resistance — always flag them",
94
+ "Provide a frictionEstimate explaining the expected friction"
95
+ ].join("\n")
96
+ );
97
+ }
98
+
99
+ if (mode === "jtbd") {
100
+ sections.push(
101
+ "## Jobs-to-be-Done Framework",
102
+ [
103
+ "Generate reinforced Jobs-to-be-Done from the task and any provided context (interview notes, field observations).",
104
+ "Each JTBD must include 5 layers:",
105
+ "",
106
+ "- **functional**: The practical job the user is trying to accomplish",
107
+ "- **emotionalPersonal**: How the user wants to feel personally",
108
+ "- **emotionalSocial**: How the user wants to be perceived by others",
109
+ "- **behaviorChange**: Type of change: START, STOP, DIFFERENT, or not_applicable",
110
+ "- **evidence**: Direct quotes or specific references from the context. If no context provided, set to 'not_available' and suggest what context is needed",
111
+ "",
112
+ "CRITICAL: evidence must contain real quotes or references from the provided context, NEVER invented assumptions",
113
+ "If no context is provided, mark evidence as 'not_available'"
114
+ ].join("\n")
115
+ );
116
+ }
117
+
118
+ const baseSchema = '{"verdict":"ready|needs_validation","gaps":[{"id":string,"description":string,"severity":"critical|major|minor","suggestedQuestion":string}]';
119
+ const momtestSchema = mode === "momtest"
120
+ ? ',"momTestQuestions":[{"gapId":string,"question":string,"targetRole":string,"rationale":string}]'
121
+ : "";
122
+ const wendelSchema = mode === "wendel"
123
+ ? ',"wendelChecklist":[{"condition":"CUE|REACTION|EVALUATION|ABILITY|TIMING","status":"pass|fail|unknown|not_applicable","justification":string}]'
124
+ : "";
125
+ const classifySchema = mode === "classify"
126
+ ? ',"classification":{"type":"START|STOP|DIFFERENT|not_applicable","adoptionRisk":"none|low|medium|high","frictionEstimate":string}'
127
+ : "";
128
+ const jtbdSchema = mode === "jtbd"
129
+ ? ',"jtbds":[{"id":string,"functional":string,"emotionalPersonal":string,"emotionalSocial":string,"behaviorChange":"START|STOP|DIFFERENT|not_applicable","evidence":string}]'
130
+ : "";
131
+
132
+ sections.push(
133
+ "Return a single valid JSON object and nothing else.",
134
+ `JSON schema: ${baseSchema}${momtestSchema}${wendelSchema}${classifySchema}${jtbdSchema},"summary":string}`
135
+ );
136
+
137
+ if (context) {
138
+ sections.push(`## Context\n${context}`);
139
+ }
140
+
141
+ sections.push(`## Task\n${task}`);
142
+
143
+ return sections.join("\n\n");
144
+ }
145
+
146
+ export function parseDiscoverOutput(raw) {
147
+ const text = raw?.trim() || "";
148
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
149
+ if (!jsonMatch) return null;
150
+
151
+ let parsed;
152
+ try {
153
+ parsed = JSON.parse(jsonMatch[0]);
154
+ } catch {
155
+ return null;
156
+ }
157
+
158
+ const verdict = VALID_VERDICTS.includes(parsed.verdict) ? parsed.verdict : "ready";
159
+
160
+ const rawGaps = Array.isArray(parsed.gaps) ? parsed.gaps : [];
161
+ const gaps = rawGaps
162
+ .filter((g) => g && g.id && g.description && g.suggestedQuestion)
163
+ .map((g) => ({
164
+ id: g.id,
165
+ description: g.description,
166
+ severity: VALID_SEVERITIES.includes(String(g.severity).toLowerCase())
167
+ ? String(g.severity).toLowerCase()
168
+ : "major",
169
+ suggestedQuestion: g.suggestedQuestion
170
+ }));
171
+
172
+ const rawQuestions = Array.isArray(parsed.momTestQuestions) ? parsed.momTestQuestions : [];
173
+ const momTestQuestions = rawQuestions
174
+ .filter((q) => q && q.gapId && q.question && q.targetRole && q.rationale)
175
+ .map((q) => ({
176
+ gapId: q.gapId,
177
+ question: q.question,
178
+ targetRole: q.targetRole,
179
+ rationale: q.rationale
180
+ }));
181
+
182
+ const rawChecklist = Array.isArray(parsed.wendelChecklist) ? parsed.wendelChecklist : [];
183
+ const wendelChecklist = rawChecklist
184
+ .filter((c) => c && c.condition && c.justification && c.status)
185
+ .map((c) => ({
186
+ condition: c.condition,
187
+ status: VALID_WENDEL_STATUSES.includes(String(c.status).toLowerCase())
188
+ ? String(c.status).toLowerCase()
189
+ : "unknown",
190
+ justification: c.justification
191
+ }));
192
+
193
+ const rawJtbds = Array.isArray(parsed.jtbds) ? parsed.jtbds : [];
194
+ const jtbds = rawJtbds
195
+ .filter((j) => j && j.id && j.functional && j.emotionalPersonal && j.emotionalSocial && j.behaviorChange && j.evidence)
196
+ .map((j) => ({
197
+ id: j.id,
198
+ functional: j.functional,
199
+ emotionalPersonal: j.emotionalPersonal,
200
+ emotionalSocial: j.emotionalSocial,
201
+ behaviorChange: j.behaviorChange,
202
+ evidence: j.evidence
203
+ }));
204
+
205
+ let classification = null;
206
+ if (parsed.classification && typeof parsed.classification === "object") {
207
+ const rawType = String(parsed.classification.type || "").toUpperCase();
208
+ const type = rawType === "NOT_APPLICABLE" ? "not_applicable"
209
+ : VALID_CLASSIFY_TYPES.includes(rawType) ? rawType : "not_applicable";
210
+ const rawRisk = String(parsed.classification.adoptionRisk || "").toLowerCase();
211
+ classification = {
212
+ type,
213
+ adoptionRisk: VALID_ADOPTION_RISKS.includes(rawRisk) ? rawRisk : "medium",
214
+ frictionEstimate: parsed.classification.frictionEstimate || ""
215
+ };
216
+ }
217
+
218
+ return {
219
+ verdict,
220
+ gaps,
221
+ momTestQuestions,
222
+ wendelChecklist,
223
+ classification,
224
+ jtbds,
225
+ summary: parsed.summary || ""
226
+ };
227
+ }
@@ -0,0 +1,130 @@
1
+ import { BaseRole } from "./base-role.js";
2
+ import { createAgent as defaultCreateAgent } from "../agents/index.js";
3
+ import { buildDiscoverPrompt, parseDiscoverOutput } from "../prompts/discover.js";
4
+
5
+ function resolveProvider(config) {
6
+ return (
7
+ config?.roles?.discover?.provider ||
8
+ config?.roles?.coder?.provider ||
9
+ "claude"
10
+ );
11
+ }
12
+
13
+ function buildSummary(parsed, mode) {
14
+ const gapCount = parsed.gaps?.length || 0;
15
+ if (gapCount === 0 && mode !== "wendel" && mode !== "jtbd") return "Discovery complete: task is ready";
16
+ const parts = [];
17
+ if (gapCount > 0) parts.push(`${gapCount} gap${gapCount !== 1 ? "s" : ""} found`);
18
+ if (mode === "momtest") {
19
+ const qCount = parsed.momTestQuestions?.length || 0;
20
+ if (qCount > 0) parts.push(`${qCount} Mom Test question${qCount !== 1 ? "s" : ""}`);
21
+ }
22
+ if (mode === "wendel") {
23
+ const failCount = (parsed.wendelChecklist || []).filter(c => c.status === "fail").length;
24
+ if (failCount > 0) parts.push(`${failCount} Wendel condition${failCount !== 1 ? "s" : ""} failed`);
25
+ else if (gapCount === 0) return "Discovery complete: task is ready";
26
+ }
27
+ if (mode === "classify" && parsed.classification) {
28
+ parts.push(`type: ${parsed.classification.type}, risk: ${parsed.classification.adoptionRisk}`);
29
+ }
30
+ if (mode === "jtbd") {
31
+ const jCount = parsed.jtbds?.length || 0;
32
+ if (jCount > 0) parts.push(`${jCount} JTBD${jCount !== 1 ? "s" : ""} generated`);
33
+ else if (gapCount === 0) return "Discovery complete: task is ready";
34
+ }
35
+ return `Discovery complete: ${parts.join(", ")} (verdict: ${parsed.verdict})`;
36
+ }
37
+
38
+ export class DiscoverRole extends BaseRole {
39
+ constructor({ config, logger, emitter = null, createAgentFn = null }) {
40
+ super({ name: "discover", config, logger, emitter });
41
+ this._createAgent = createAgentFn || defaultCreateAgent;
42
+ }
43
+
44
+ async execute(input) {
45
+ const task = typeof input === "string"
46
+ ? input
47
+ : input?.task || this.context?.task || "";
48
+ const onOutput = typeof input === "string" ? null : input?.onOutput || null;
49
+ const mode = (typeof input === "object" ? input?.mode : null) || "gaps";
50
+ const context = typeof input === "object" ? input?.context || null : null;
51
+
52
+ const provider = resolveProvider(this.config);
53
+ const agent = this._createAgent(provider, this.config, this.logger);
54
+
55
+ const prompt = buildDiscoverPrompt({ task, instructions: this.instructions, mode, context });
56
+ const runArgs = { prompt, role: "discover" };
57
+ if (onOutput) runArgs.onOutput = onOutput;
58
+ const result = await agent.runTask(runArgs);
59
+
60
+ if (!result.ok) {
61
+ return {
62
+ ok: false,
63
+ result: {
64
+ error: result.error || result.output || "Discovery failed",
65
+ provider,
66
+ mode
67
+ },
68
+ summary: `Discovery failed: ${result.error || "unknown error"}`,
69
+ usage: result.usage
70
+ };
71
+ }
72
+
73
+ try {
74
+ const parsed = parseDiscoverOutput(result.output);
75
+ if (!parsed) {
76
+ return {
77
+ ok: true,
78
+ result: {
79
+ verdict: "ready",
80
+ gaps: [],
81
+ mode,
82
+ raw: result.output,
83
+ provider
84
+ },
85
+ summary: "Discovery complete (unstructured output)",
86
+ usage: result.usage
87
+ };
88
+ }
89
+
90
+ const resultObj = {
91
+ verdict: parsed.verdict,
92
+ gaps: parsed.gaps,
93
+ mode,
94
+ provider
95
+ };
96
+ if (mode === "momtest") {
97
+ resultObj.momTestQuestions = parsed.momTestQuestions || [];
98
+ }
99
+ if (mode === "wendel") {
100
+ resultObj.wendelChecklist = parsed.wendelChecklist || [];
101
+ }
102
+ if (mode === "classify") {
103
+ resultObj.classification = parsed.classification || null;
104
+ }
105
+ if (mode === "jtbd") {
106
+ resultObj.jtbds = parsed.jtbds || [];
107
+ }
108
+
109
+ return {
110
+ ok: true,
111
+ result: resultObj,
112
+ summary: buildSummary(parsed, mode),
113
+ usage: result.usage
114
+ };
115
+ } catch {
116
+ return {
117
+ ok: true,
118
+ result: {
119
+ verdict: "ready",
120
+ gaps: [],
121
+ mode,
122
+ raw: result.output,
123
+ provider
124
+ },
125
+ summary: "Discovery complete (unstructured output)",
126
+ usage: result.usage
127
+ };
128
+ }
129
+ }
130
+ }
@@ -10,3 +10,4 @@ export { TriageRole } from "./triage-role.js";
10
10
  export { TesterRole } from "./tester-role.js";
11
11
  export { SecurityRole } from "./security-role.js";
12
12
  export { SolomonRole } from "./solomon-role.js";
13
+ export { DiscoverRole } from "./discover-role.js";
@@ -0,0 +1,167 @@
1
+ # Discover Role
2
+
3
+ You are the **Discover** role in a multi-role AI pipeline.
4
+
5
+ Your job is to analyze a task description, ticket, or brief and identify **gaps** — missing information, implicit assumptions, ambiguities, and contradictions that could cause unnecessary iterations during implementation.
6
+
7
+ ## Responsibilities
8
+
9
+ - Detect missing requirements or acceptance criteria
10
+ - Identify implicit assumptions that need explicit confirmation
11
+ - Find ambiguities where multiple interpretations are possible
12
+ - Spot contradictions between different parts of the specification
13
+ - Suggest specific questions that would resolve each gap
14
+
15
+ ## Severity Classification
16
+
17
+ - **critical**: Blocks implementation entirely — cannot proceed without this information
18
+ - **major**: Could lead to significant rework if assumed incorrectly
19
+ - **minor**: Nice to clarify but a reasonable default exists
20
+
21
+ ## Verdict
22
+
23
+ - **ready**: The task is well-defined and can proceed to implementation without further clarification
24
+ - **needs_validation**: One or more gaps were found that should be resolved before implementation
25
+
26
+ ## Output format
27
+
28
+ Return a single valid JSON object and nothing else.
29
+
30
+ ```json
31
+ {
32
+ "verdict": "ready|needs_validation",
33
+ "gaps": [
34
+ {
35
+ "id": "gap-1",
36
+ "description": "What information is missing or ambiguous",
37
+ "severity": "critical|major|minor",
38
+ "suggestedQuestion": "A specific question to resolve this gap"
39
+ }
40
+ ],
41
+ "summary": "Brief human-readable summary of findings"
42
+ }
43
+ ```
44
+
45
+ If the task is well-defined with no gaps, return `verdict: "ready"` with an empty `gaps` array.
46
+
47
+ ## Mom Test Mode
48
+
49
+ When running in **momtest** mode, for each gap generate questions following The Mom Test principles:
50
+
51
+ - Ask about **past behavior** and real experiences, never hypothetical scenarios
52
+ - Ask about **specifics**, not generalities
53
+ - Focus on what people **actually do**, not what they say they would do
54
+
55
+ ### Good vs Bad Questions
56
+
57
+ | Bad (hypothetical/opinion) | Good (past behavior) |
58
+ |---|---|
59
+ | "Would you use a notification system?" | "When was the last time you missed an important update?" |
60
+ | "Do you think users need dark mode?" | "How many support tickets mentioned readability issues?" |
61
+ | "Would it be useful to have X?" | "How are you currently handling X?" |
62
+
63
+ ### Mom Test Output Schema (additional fields for momtest mode)
64
+
65
+ ```json
66
+ {
67
+ "momTestQuestions": [
68
+ {
69
+ "gapId": "gap-1",
70
+ "question": "Past-behavior question to validate this gap",
71
+ "targetRole": "Who should answer (end-user, developer, PM, etc.)",
72
+ "rationale": "Why this question matters for the gap"
73
+ }
74
+ ]
75
+ }
76
+ ```
77
+
78
+ ## Wendel Mode
79
+
80
+ When running in **wendel** mode, evaluate whether the task implies a **user behavior change** and assess 5 adoption conditions:
81
+
82
+ | Condition | Question |
83
+ |-----------|----------|
84
+ | **CUE** | Is there a clear trigger that will prompt the user to take the new action? |
85
+ | **REACTION** | Will the user have a positive emotional reaction when encountering the cue? |
86
+ | **EVALUATION** | Can the user quickly understand the value of the new behavior? |
87
+ | **ABILITY** | Does the user have the skill and resources to perform the new behavior? |
88
+ | **TIMING** | Is this the right moment to introduce this change? |
89
+
90
+ ### Status Values
91
+
92
+ - **pass**: Condition is clearly met based on the task specification
93
+ - **fail**: Condition is NOT met — adoption risk identified
94
+ - **unknown**: Not enough information to evaluate
95
+ - **not_applicable**: Task does not imply user behavior change (e.g., refactor, backend optimization)
96
+
97
+ If the task does NOT imply behavior change, set ALL conditions to `not_applicable` and verdict to `ready`.
98
+
99
+ ### Wendel Output Schema (additional fields for wendel mode)
100
+
101
+ ```json
102
+ {
103
+ "wendelChecklist": [
104
+ {
105
+ "condition": "CUE|REACTION|EVALUATION|ABILITY|TIMING",
106
+ "status": "pass|fail|unknown|not_applicable",
107
+ "justification": "Why this condition passes or fails"
108
+ }
109
+ ]
110
+ }
111
+ ```
112
+
113
+ ## Classify Mode
114
+
115
+ When running in **classify** mode, classify the task by its impact on user behavior:
116
+
117
+ | Type | Description | Risk Level |
118
+ |------|-------------|------------|
119
+ | **START** | User must adopt a completely new behavior or workflow | Medium-High |
120
+ | **STOP** | User must stop doing something they currently do | **Highest** resistance risk |
121
+ | **DIFFERENT** | User must do something they already do, but differently | Low-Medium |
122
+ | **not_applicable** | No user behavior impact (internal refactor, backend, infra) | None |
123
+
124
+ ### Classify Output Schema (additional fields for classify mode)
125
+
126
+ ```json
127
+ {
128
+ "classification": {
129
+ "type": "START|STOP|DIFFERENT|not_applicable",
130
+ "adoptionRisk": "none|low|medium|high",
131
+ "frictionEstimate": "Description of expected friction"
132
+ }
133
+ }
134
+ ```
135
+
136
+ ## JTBD Mode
137
+
138
+ When running in **jtbd** mode, generate reinforced Jobs-to-be-Done from the task and provided context (interview notes, field observations).
139
+
140
+ Each JTBD must include 5 layers:
141
+
142
+ | Layer | Description |
143
+ |-------|-------------|
144
+ | **functional** | The practical job the user is trying to accomplish |
145
+ | **emotionalPersonal** | How the user wants to feel personally |
146
+ | **emotionalSocial** | How the user wants to be perceived by others |
147
+ | **behaviorChange** | Type of change: START, STOP, DIFFERENT, or not_applicable |
148
+ | **evidence** | Direct quotes or references from context. Set to `not_available` if no context provided |
149
+
150
+ **CRITICAL**: The `evidence` field must contain real quotes or specific references from the provided context. Never invent assumptions.
151
+
152
+ ### JTBD Output Schema (additional fields for jtbd mode)
153
+
154
+ ```json
155
+ {
156
+ "jtbds": [
157
+ {
158
+ "id": "jtbd-1",
159
+ "functional": "The practical job",
160
+ "emotionalPersonal": "How the user wants to feel",
161
+ "emotionalSocial": "How the user wants to be perceived",
162
+ "behaviorChange": "START|STOP|DIFFERENT|not_applicable",
163
+ "evidence": "Direct quote or 'not_available'"
164
+ }
165
+ ]
166
+ }
167
+ ```