karajan-code 1.13.2 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.13.2",
3
+ "version": "1.15.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
package/src/cli.js CHANGED
@@ -87,6 +87,7 @@ program
87
87
  .option("--auto-pr")
88
88
  .option("--enable-becaria", "Enable BecarIA Gateway (early PR + dispatch comments/reviews)")
89
89
  .option("--branch-prefix <prefix>")
90
+ .option("--task-type <type>", "Explicit task type: sw, infra, doc, add-tests, refactor")
90
91
  .option("--methodology <name>")
91
92
  .option("--no-auto-rebase")
92
93
  .option("--no-sonar")
package/src/config.js CHANGED
@@ -97,6 +97,7 @@ const DEFAULTS = {
97
97
  disabled_rules: ["javascript:S1116", "javascript:S3776"]
98
98
  }
99
99
  },
100
+ policies: {},
100
101
  serena: { enabled: false },
101
102
  planning_game: { enabled: false, project_id: null, codeveloper: null },
102
103
  becaria: { enabled: false, review_event: "becaria-review", comment_event: "becaria-comment", comment_prefix: true },
@@ -285,6 +286,7 @@ export function applyRunOverrides(config, flags) {
285
286
  out.development.methodology = methodology;
286
287
  out.development.require_test_changes = methodology === "tdd";
287
288
  }
289
+ if (flags.taskType) out.taskType = String(flags.taskType);
288
290
  if (flags.noSonar || flags.sonar === false) out.sonarqube.enabled = false;
289
291
  out.serena = out.serena || { enabled: false };
290
292
  if (flags.enableSerena !== undefined) out.serena.enabled = Boolean(flags.enableSerena);
@@ -0,0 +1,37 @@
1
+ export const VALID_TASK_TYPES = ["sw", "infra", "doc", "add-tests", "refactor"];
2
+
3
+ export const DEFAULT_POLICIES = {
4
+ sw: { tdd: true, sonar: true, reviewer: true, testsRequired: true },
5
+ infra: { tdd: false, sonar: false, reviewer: true, testsRequired: false },
6
+ doc: { tdd: false, sonar: false, reviewer: true, testsRequired: false },
7
+ "add-tests": { tdd: false, sonar: true, reviewer: true, testsRequired: true },
8
+ refactor: { tdd: true, sonar: true, reviewer: true, testsRequired: false },
9
+ };
10
+
11
+ const FALLBACK_TYPE = "sw";
12
+
13
+ /**
14
+ * Resolve pipeline policies for a given taskType.
15
+ * Unknown / null / undefined taskType falls back to "sw" (conservative).
16
+ * configOverrides optionally merges over defaults per taskType.
17
+ */
18
+ export function resolvePolicies(taskType, configOverrides) {
19
+ const resolvedType = VALID_TASK_TYPES.includes(taskType) ? taskType : FALLBACK_TYPE;
20
+ const base = { ...DEFAULT_POLICIES[resolvedType] };
21
+ const overrides = configOverrides?.[resolvedType];
22
+ if (overrides && typeof overrides === "object") {
23
+ Object.assign(base, overrides);
24
+ }
25
+ return base;
26
+ }
27
+
28
+ /**
29
+ * Resolve policies for a taskType and return a flat object with the resolved
30
+ * taskType plus all policy flags. This is the main entry point for the
31
+ * orchestrator to determine which pipeline stages to enable/disable.
32
+ */
33
+ export function applyPolicies({ taskType, policies } = {}) {
34
+ const resolvedType = VALID_TASK_TYPES.includes(taskType) ? taskType : FALLBACK_TYPE;
35
+ const resolved = resolvePolicies(taskType, policies);
36
+ return { taskType: resolvedType, ...resolved };
37
+ }
package/src/mcp/run-kj.js CHANGED
@@ -48,6 +48,7 @@ export async function runKjCommand({ command, commandArgs = [], options = {}, en
48
48
  normalizeBoolFlag(options.autoPush, "--auto-push", args);
49
49
  normalizeBoolFlag(options.autoPr, "--auto-pr", args);
50
50
  if (options.autoRebase === false) args.push("--no-auto-rebase");
51
+ addOptionalValue(args, "--task-type", options.taskType);
51
52
  normalizeBoolFlag(options.noSonar, "--no-sonar", args);
52
53
  if (options.smartModels === true) args.push("--smart-models");
53
54
  if (options.smartModels === false) args.push("--no-smart-models");
@@ -565,6 +565,12 @@ export async function handleToolCall(name, args, server, extra) {
565
565
  if (!a.task) {
566
566
  return failPayload("Missing required field: task");
567
567
  }
568
+ if (a.taskType) {
569
+ const validTypes = ["sw", "infra", "doc", "add-tests", "refactor"];
570
+ if (!validTypes.includes(a.taskType)) {
571
+ return failPayload(`Invalid taskType "${a.taskType}". Valid values: ${validTypes.join(", ")}`);
572
+ }
573
+ }
568
574
  if (!isPreflightAcked()) {
569
575
  const { config } = await loadConfig();
570
576
  const { listAgents } = await import("../commands/agents.js");
package/src/mcp/tools.js CHANGED
@@ -88,6 +88,7 @@ export const tools = [
88
88
  branchPrefix: { type: "string" },
89
89
  smartModels: { type: "boolean", description: "Enable/disable smart model selection based on triage complexity" },
90
90
  checkpointInterval: { type: "number", description: "Minutes between interactive checkpoints (default: 5). Set 0 to disable." },
91
+ taskType: { type: "string", enum: ["sw", "infra", "doc", "add-tests", "refactor"], description: "Explicit task type for policy resolution. Overrides triage classification." },
91
92
  noSonar: { type: "boolean" },
92
93
  kjHome: { type: "string" },
93
94
  sonarToken: { type: "string" },
@@ -3,7 +3,7 @@ import { CoderRole } from "../roles/coder-role.js";
3
3
  import { RefactorerRole } from "../roles/refactorer-role.js";
4
4
  import { SonarRole } from "../roles/sonar-role.js";
5
5
  import { addCheckpoint, markSessionStatus, saveSession, pauseSession } from "../session-store.js";
6
- import { generateDiff } from "../review/diff-generator.js";
6
+ import { generateDiff, getUntrackedFiles } from "../review/diff-generator.js";
7
7
  import { evaluateTddPolicy } from "../review/tdd-policy.js";
8
8
  import { validateReviewResult } from "../review/schema.js";
9
9
  import { filterReviewScope, buildDeferredContext } from "../review/scope-filter.js";
@@ -198,7 +198,8 @@ export async function runRefactorerStage({ refactorerRole, config, logger, emitt
198
198
  export async function runTddCheckStage({ config, logger, emitter, eventBase, session, trackBudget, iteration, askQuestion }) {
199
199
  logger.setContext({ iteration, stage: "tdd" });
200
200
  const tddDiff = await generateDiff({ baseRef: session.session_start_sha });
201
- const tddEval = evaluateTddPolicy(tddDiff, config.development);
201
+ const untrackedFiles = await getUntrackedFiles();
202
+ const tddEval = evaluateTddPolicy(tddDiff, config.development, untrackedFiles);
202
203
  await addCheckpoint(session, {
203
204
  stage: "tdd-policy",
204
205
  iteration,
@@ -227,34 +228,42 @@ export async function runTddCheckStage({ config, logger, emitter, eventBase, ses
227
228
  session.repeated_issue_count += 1;
228
229
  await saveSession(session);
229
230
  if (session.repeated_issue_count >= config.session.fail_fast_repeats) {
230
- const question = `TDD policy has failed ${session.repeated_issue_count} times. The coder is not creating tests. How should we proceed? Issue: ${tddEval.reason}`;
231
- if (askQuestion) {
232
- const answer = await askQuestion(question, { iteration, stage: "tdd" });
233
- if (answer) {
234
- session.last_reviewer_feedback += `\nUser guidance: ${answer}`;
235
- session.repeated_issue_count = 0;
236
- await saveSession(session);
237
- return { action: "continue" };
238
- }
239
- }
240
- await pauseSession(session, {
241
- question,
242
- context: {
243
- iteration,
244
- stage: "tdd",
245
- lastFeedback: tddEval.message,
246
- repeatedCount: session.repeated_issue_count
247
- }
248
- });
249
231
  emitProgress(
250
232
  emitter,
251
- makeEvent("question", { ...eventBase, stage: "tdd" }, {
252
- status: "paused",
253
- message: question,
254
- detail: { question, sessionId: session.id }
233
+ makeEvent("solomon:escalate", { ...eventBase, stage: "tdd" }, {
234
+ message: `TDD sub-loop limit reached (${session.repeated_issue_count}/${config.session.fail_fast_repeats})`,
235
+ detail: { subloop: "tdd", retryCount: session.repeated_issue_count, reason: tddEval.reason }
255
236
  })
256
237
  );
257
- return { action: "pause", result: { paused: true, sessionId: session.id, question, context: "tdd_fail_fast" } };
238
+
239
+ const solomonResult = await invokeSolomon({
240
+ config, logger, emitter, eventBase, stage: "tdd", askQuestion, session, iteration,
241
+ conflict: {
242
+ stage: "tdd",
243
+ task: session.task,
244
+ iterationCount: session.repeated_issue_count,
245
+ maxIterations: config.session.fail_fast_repeats,
246
+ reason: tddEval.reason,
247
+ sourceFiles: tddEval.sourceFiles,
248
+ testFiles: tddEval.testFiles,
249
+ history: [{ agent: "tdd-policy", feedback: tddEval.message }]
250
+ }
251
+ });
252
+
253
+ if (solomonResult.action === "pause") {
254
+ return { action: "pause", result: { paused: true, sessionId: session.id, question: solomonResult.question, context: "tdd_fail_fast" } };
255
+ }
256
+ if (solomonResult.action === "continue") {
257
+ if (solomonResult.humanGuidance) {
258
+ session.last_reviewer_feedback += `\nUser guidance: ${solomonResult.humanGuidance}`;
259
+ }
260
+ session.repeated_issue_count = 0;
261
+ await saveSession(session);
262
+ return { action: "continue" };
263
+ }
264
+ if (solomonResult.action === "subtask") {
265
+ return { action: "pause", result: { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "tdd_subtask" } };
266
+ }
258
267
  }
259
268
  return { action: "continue" };
260
269
  }
@@ -56,12 +56,14 @@ export async function runTriageStage({ config, logger, emitter, eventBase, sessi
56
56
  const recommendedRoles = new Set(triageOutput.result?.roles || []);
57
57
  const roleOverrides = {};
58
58
  if (triageOutput.ok) {
59
- roleOverrides.plannerEnabled = recommendedRoles.has("planner");
60
- roleOverrides.researcherEnabled = recommendedRoles.has("researcher");
61
- roleOverrides.refactorerEnabled = recommendedRoles.has("refactorer");
62
- roleOverrides.reviewerEnabled = recommendedRoles.has("reviewer");
63
- roleOverrides.testerEnabled = recommendedRoles.has("tester");
64
- roleOverrides.securityEnabled = recommendedRoles.has("security");
59
+ // Triage can activate roles, but cannot deactivate roles explicitly enabled in pipeline config
60
+ const p = config.pipeline || {};
61
+ roleOverrides.plannerEnabled = recommendedRoles.has("planner") || Boolean(p.planner?.enabled);
62
+ roleOverrides.researcherEnabled = recommendedRoles.has("researcher") || Boolean(p.researcher?.enabled);
63
+ roleOverrides.refactorerEnabled = recommendedRoles.has("refactorer") || Boolean(p.refactorer?.enabled);
64
+ roleOverrides.reviewerEnabled = recommendedRoles.has("reviewer") || Boolean(p.reviewer?.enabled);
65
+ roleOverrides.testerEnabled = recommendedRoles.has("tester") || Boolean(p.tester?.enabled);
66
+ roleOverrides.securityEnabled = recommendedRoles.has("security") || Boolean(p.security?.enabled);
65
67
  }
66
68
 
67
69
  const shouldDecompose = triageOutput.result?.shouldDecompose || false;
@@ -72,6 +74,7 @@ export async function runTriageStage({ config, logger, emitter, eventBase, sessi
72
74
  level: triageOutput.result?.level || null,
73
75
  roles: Array.from(recommendedRoles),
74
76
  reasoning: triageOutput.result?.reasoning || null,
77
+ taskType: triageOutput.result?.taskType || "sw",
75
78
  shouldDecompose,
76
79
  subtasks
77
80
  };
@@ -22,6 +22,7 @@ import {
22
22
  incrementalPush
23
23
  } from "./git/automation.js";
24
24
  import { resolveRoleMdPath, loadFirstExisting } from "./roles/base-role.js";
25
+ import { applyPolicies } from "./guards/policy-resolver.js";
25
26
  import { resolveReviewProfile } from "./review/profiles.js";
26
27
  import { CoderRole } from "./roles/coder-role.js";
27
28
  import { invokeSolomon } from "./orchestrator/solomon-escalation.js";
@@ -43,10 +44,15 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
43
44
  let testerEnabled = Boolean(config.pipeline?.tester?.enabled);
44
45
  let securityEnabled = Boolean(config.pipeline?.security?.enabled);
45
46
  let reviewerEnabled = config.pipeline?.reviewer?.enabled !== false;
46
- const triageEnabled = Boolean(config.pipeline?.triage?.enabled);
47
+ // Triage is always mandatory — it classifies taskType for policy resolution
48
+ const triageEnabled = true;
47
49
 
48
50
  // --- Dry-run: return summary without executing anything ---
49
51
  if (flags.dryRun) {
52
+ const dryRunPolicies = applyPolicies({
53
+ taskType: flags.taskType || config.taskType || null,
54
+ policies: config.policies,
55
+ });
50
56
  const projectDir = config.projectDir || process.cwd();
51
57
  const { rules: reviewRules } = await resolveReviewProfile({ mode: config.review_mode, projectDir });
52
58
  const coderRules = await loadFirstExisting(resolveRoleMdPath("coder", projectDir));
@@ -56,6 +62,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
56
62
  const summary = {
57
63
  dry_run: true,
58
64
  task,
65
+ policies: dryRunPolicies,
59
66
  roles: {
60
67
  planner: plannerRole,
61
68
  coder: coderRole,
@@ -275,6 +282,33 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
275
282
  if (flags.enableTester !== undefined) testerEnabled = Boolean(flags.enableTester);
276
283
  if (flags.enableSecurity !== undefined) securityEnabled = Boolean(flags.enableSecurity);
277
284
 
285
+ // --- Policy resolver: gate stages by taskType ---
286
+ // Priority: explicit flag > config > triage classification > default (sw)
287
+ const resolvedPolicies = applyPolicies({
288
+ taskType: flags.taskType || config.taskType || stageResults.triage?.taskType || null,
289
+ policies: config.policies,
290
+ });
291
+ session.resolved_policies = resolvedPolicies;
292
+
293
+ // Apply policy gates on shallow copies (never mutate the caller's config)
294
+ if (!resolvedPolicies.tdd) {
295
+ config = { ...config, development: { ...config.development, methodology: "standard", require_test_changes: false } };
296
+ }
297
+ if (!resolvedPolicies.sonar) {
298
+ config = { ...config, sonarqube: { ...config.sonarqube, enabled: false } };
299
+ }
300
+ if (!resolvedPolicies.reviewer) {
301
+ reviewerEnabled = false;
302
+ }
303
+
304
+ emitProgress(
305
+ emitter,
306
+ makeEvent("policies:resolved", eventBase, {
307
+ message: `Policies resolved for taskType="${resolvedPolicies.taskType}"`,
308
+ detail: resolvedPolicies
309
+ })
310
+ );
311
+
278
312
  // --- Researcher (pre-planning) ---
279
313
  let researchContext = null;
280
314
  if (researcherEnabled) {
@@ -47,10 +47,10 @@ export function buildTriagePrompt({ task, instructions, availableRoles }) {
47
47
  );
48
48
 
49
49
  sections.push(
50
- "Classify the task complexity, recommend only the necessary pipeline roles, and assess whether the task should be decomposed into smaller subtasks.",
50
+ "Classify the task complexity, determine its taskType, recommend only the necessary pipeline roles, and assess whether the task should be decomposed into smaller subtasks.",
51
51
  "Keep the reasoning short and practical.",
52
52
  "Return a single valid JSON object and nothing else.",
53
- 'JSON schema: {"level":"trivial|simple|medium|complex","roles":["planner|researcher|refactorer|reviewer|tester|security"],"reasoning":string,"shouldDecompose":boolean,"subtasks":string[]}'
53
+ 'JSON schema: {"level":"trivial|simple|medium|complex","roles":["planner|researcher|refactorer|reviewer|tester|security"],"taskType":"sw|infra|doc|add-tests|refactor","reasoning":string,"shouldDecompose":boolean,"subtasks":string[]}'
54
54
  );
55
55
 
56
56
  sections.push(`## Task\n${task}`);
@@ -20,3 +20,9 @@ export async function generateDiff({ baseRef }) {
20
20
  }
21
21
  return result.stdout;
22
22
  }
23
+
24
+ export async function getUntrackedFiles() {
25
+ const result = await runCommand("git", ["ls-files", "--others", "--exclude-standard"]);
26
+ if (result.exitCode !== 0) return [];
27
+ return result.stdout.trim().split("\n").filter(Boolean);
28
+ }
@@ -19,13 +19,15 @@ function isSourceFile(file, extensions = []) {
19
19
  return extensions.some((ext) => file.endsWith(ext));
20
20
  }
21
21
 
22
- export function evaluateTddPolicy(diff, developmentConfig = {}) {
22
+ export function evaluateTddPolicy(diff, developmentConfig = {}, untrackedFiles = []) {
23
23
  const requireTestChanges = developmentConfig.require_test_changes !== false;
24
24
  const patterns = developmentConfig.test_file_patterns || ["/tests/", "/__tests__/", ".test.", ".spec."];
25
25
  const extensions =
26
26
  developmentConfig.source_file_extensions || [".js", ".jsx", ".ts", ".tsx", ".py", ".go", ".java", ".rb", ".php", ".cs"];
27
27
 
28
- const files = extractChangedFiles(diff);
28
+ const diffFiles = extractChangedFiles(diff);
29
+ const extra = Array.isArray(untrackedFiles) ? untrackedFiles : [];
30
+ const files = [...new Set([...diffFiles, ...extra])];
29
31
  const sourceFiles = files.filter((f) => isSourceFile(f, extensions) && !isTestFile(f, patterns));
30
32
  const testFiles = files.filter((f) => isTestFile(f, patterns));
31
33
 
@@ -1,9 +1,11 @@
1
1
  import { BaseRole } from "./base-role.js";
2
2
  import { createAgent as defaultCreateAgent } from "../agents/index.js";
3
3
  import { buildTriagePrompt } from "../prompts/triage.js";
4
+ import { VALID_TASK_TYPES } from "../guards/policy-resolver.js";
4
5
 
5
6
  const VALID_LEVELS = new Set(["trivial", "simple", "medium", "complex"]);
6
7
  const VALID_ROLES = new Set(["planner", "researcher", "refactorer", "reviewer", "tester", "security"]);
8
+ const FALLBACK_TASK_TYPE = "sw";
7
9
 
8
10
  function resolveProvider(config) {
9
11
  return (
@@ -74,6 +76,7 @@ export class TriageRole extends BaseRole {
74
76
  level: "medium",
75
77
  roles: ["reviewer"],
76
78
  reasoning: "Unstructured output, using safe defaults.",
79
+ taskType: FALLBACK_TASK_TYPE,
77
80
  provider,
78
81
  raw: result.output
79
82
  },
@@ -87,11 +90,13 @@ export class TriageRole extends BaseRole {
87
90
  const reasoning = String(parsed.reasoning || "").trim() || "No reasoning provided.";
88
91
  const shouldDecompose = Boolean(parsed.shouldDecompose);
89
92
  const subtasks = normalizeSubtasks(parsed.subtasks);
93
+ const taskType = VALID_TASK_TYPES.includes(parsed.taskType) ? parsed.taskType : FALLBACK_TASK_TYPE;
90
94
 
91
95
  const triageResult = {
92
96
  level,
93
97
  roles,
94
98
  reasoning,
99
+ taskType,
95
100
  provider
96
101
  };
97
102
 
@@ -116,6 +121,7 @@ export class TriageRole extends BaseRole {
116
121
  level: "medium",
117
122
  roles: ["reviewer"],
118
123
  reasoning: "Failed to parse triage output, using safe defaults.",
124
+ taskType: FALLBACK_TASK_TYPE,
119
125
  provider,
120
126
  raw: result.output
121
127
  },
@@ -8,6 +8,7 @@ Return a single valid JSON object and nothing else:
8
8
  ```json
9
9
  {
10
10
  "level": "trivial|simple|medium|complex",
11
+ "taskType": "sw|infra|doc|add-tests|refactor",
11
12
  "roles": ["planner", "researcher", "refactorer", "reviewer", "tester", "security"],
12
13
  "reasoning": "brief practical justification",
13
14
  "shouldDecompose": false,
@@ -15,7 +16,14 @@ Return a single valid JSON object and nothing else:
15
16
  }
16
17
  ```
17
18
 
18
- ## Classification guidance
19
+ ## Task type classification
20
+ - `sw`: writing or modifying business logic, features, APIs, components, services.
21
+ - `infra`: CI/CD, Docker, deploy scripts, build configuration, environment setup.
22
+ - `doc`: documentation, README, CHANGELOG, comments-only changes.
23
+ - `add-tests`: adding tests to existing code without changing functionality.
24
+ - `refactor`: restructuring code without changing external behavior.
25
+
26
+ ## Complexity classification
19
27
  - `trivial`: tiny, low-risk, straightforward. Usually no extra roles.
20
28
  - `simple`: limited scope with low risk. Usually reviewer only.
21
29
  - `medium`: moderate scope/risk. Reviewer required; optional planner/researcher.