karajan-code 1.21.2 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "karajan-code",
3
- "version": "1.21.2",
3
+ "version": "1.23.0",
4
4
  "description": "Local multi-agent coding orchestrator with TDD, SonarQube, and code review pipeline",
5
5
  "type": "module",
6
6
  "license": "AGPL-3.0",
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Host Agent — delegates task execution to the MCP host AI via elicitation.
3
+ *
4
+ * Instead of spawning a subprocess, returns the prompt to the host AI
5
+ * (Claude, Codex, etc.) for direct execution. The host has full access
6
+ * to the codebase and tools — no subprocess overhead.
7
+ *
8
+ * Used when: the MCP host IS the same agent configured for a role.
9
+ */
10
+
11
+ import { BaseAgent } from "./base-agent.js";
12
+
13
+ export class HostAgent extends BaseAgent {
14
+ constructor(config, logger, { askHost }) {
15
+ super("host", config, logger);
16
+ this._askHost = askHost;
17
+ }
18
+
19
+ async runTask(task) {
20
+ const { prompt, onOutput } = task;
21
+
22
+ if (!this._askHost) {
23
+ return { ok: false, output: "", error: "Host agent has no askHost callback" };
24
+ }
25
+
26
+ if (onOutput) onOutput({ stream: "info", line: "[host-agent] Delegating to host AI..." });
27
+
28
+ const answer = await this._askHost(prompt);
29
+
30
+ if (!answer) {
31
+ return { ok: false, output: "", error: "Host AI declined or returned no response" };
32
+ }
33
+
34
+ if (onOutput) onOutput({ stream: "info", line: "[host-agent] Host AI completed task" });
35
+
36
+ return { ok: true, output: answer, exitCode: 0 };
37
+ }
38
+ }
@@ -204,6 +204,56 @@ async function scaffoldBecariaGateway(config, flags, logger) {
204
204
  logger.info(" 4. Push the workflow files and enable 'kj run --enable-becaria'");
205
205
  }
206
206
 
207
+ async function installSkills(logger, interactive) {
208
+ const projectDir = process.cwd();
209
+ const commandsDir = path.join(projectDir, ".claude", "commands");
210
+ const skillsTemplateDir = path.resolve(import.meta.dirname, "../../templates/skills");
211
+
212
+ let doInstall = true;
213
+ if (interactive) {
214
+ const wizard = createWizard();
215
+ try {
216
+ doInstall = await wizard.confirm("Install Karajan skills as slash commands (/kj-code, /kj-review, etc.)?", true);
217
+ } finally {
218
+ wizard.close();
219
+ }
220
+ }
221
+
222
+ if (!doInstall) {
223
+ logger.info("Skills installation skipped.");
224
+ return;
225
+ }
226
+
227
+ await ensureDir(commandsDir);
228
+
229
+ let installed = 0;
230
+ try {
231
+ const files = await fs.readdir(skillsTemplateDir);
232
+ for (const file of files) {
233
+ if (!file.endsWith(".md")) continue;
234
+ const src = path.join(skillsTemplateDir, file);
235
+ const dest = path.join(commandsDir, file);
236
+ if (await exists(dest)) {
237
+ logger.info(` ${file} already exists — skipping`);
238
+ continue;
239
+ }
240
+ const content = await fs.readFile(src, "utf8");
241
+ await fs.writeFile(dest, content, "utf8");
242
+ installed += 1;
243
+ }
244
+ } catch (err) {
245
+ logger.warn(`Could not install skills: ${err.message}`);
246
+ return;
247
+ }
248
+
249
+ if (installed > 0) {
250
+ logger.info(`Installed ${installed} Karajan skill(s) in .claude/commands/`);
251
+ logger.info("Available as slash commands: /kj-run, /kj-code, /kj-review, /kj-test, /kj-security, /kj-discover, /kj-architect, /kj-sonar");
252
+ } else {
253
+ logger.info("All skills already installed.");
254
+ }
255
+ }
256
+
207
257
  export async function initCommand({ logger, flags = {} }) {
208
258
  const karajanHome = getKarajanHome();
209
259
  await ensureDir(karajanHome);
@@ -219,6 +269,7 @@ export async function initCommand({ logger, flags = {} }) {
219
269
  await handleConfigSetup({ config, configExists, interactive, configPath, logger });
220
270
  await ensureReviewRules(reviewRulesPath, logger);
221
271
  await ensureCoderRules(coderRulesPath, logger);
272
+ await installSkills(logger, interactive);
222
273
  await setupSonarQube(config, logger);
223
274
  await scaffoldBecariaGateway(config, flags, logger);
224
275
  }
@@ -7,6 +7,12 @@ import { resolveRole } from "../config.js";
7
7
  import { parseCardId } from "../planning-game/adapter.js";
8
8
 
9
9
  export async function runCommandHandler({ task, config, logger, flags }) {
10
+ // Best-effort session cleanup before starting
11
+ try {
12
+ const { cleanupExpiredSessions } = await import("../session-cleanup.js");
13
+ await cleanupExpiredSessions({ logger });
14
+ } catch { /* non-blocking */ }
15
+
10
16
  const requiredProviders = [
11
17
  resolveRole(config, "coder").provider,
12
18
  config.reviewer_options?.fallback_reviewer
@@ -22,6 +22,53 @@ export function setupOrphanGuard({ intervalMs = DEFAULT_INTERVAL_MS, exitFn = ()
22
22
  return { timer, parentPid };
23
23
  }
24
24
 
25
+ const DEFAULT_MEMORY_CHECK_MS = 30_000;
26
+ const DEFAULT_WARN_HEAP_MB = 512;
27
+ const DEFAULT_CRITICAL_HEAP_MB = 768;
28
+
29
+ export function setupMemoryWatchdog({
30
+ intervalMs = DEFAULT_MEMORY_CHECK_MS,
31
+ warnHeapMb = DEFAULT_WARN_HEAP_MB,
32
+ criticalHeapMb = DEFAULT_CRITICAL_HEAP_MB,
33
+ onWarn = null,
34
+ onCritical = null,
35
+ exitFn = () => process.exit(1)
36
+ } = {}) {
37
+ const warnBytes = warnHeapMb * 1024 * 1024;
38
+ const criticalBytes = criticalHeapMb * 1024 * 1024;
39
+ let warned = false;
40
+
41
+ const timer = setInterval(() => {
42
+ const { heapUsed, rss } = process.memoryUsage();
43
+
44
+ if (heapUsed >= criticalBytes) {
45
+ if (global.gc) {
46
+ try { global.gc(); } catch { /* --expose-gc not set */ }
47
+ const after = process.memoryUsage().heapUsed;
48
+ if (after < criticalBytes) return; // GC freed enough
49
+ }
50
+ const msg = `Memory critical: heap ${(heapUsed / 1024 / 1024).toFixed(0)}MB / rss ${(rss / 1024 / 1024).toFixed(0)}MB — exiting to prevent OOM`;
51
+ if (onCritical) onCritical(msg);
52
+ else process.stderr.write(`[karajan-mcp] ${msg}\n`);
53
+ clearInterval(timer);
54
+ exitFn();
55
+ return;
56
+ }
57
+
58
+ if (heapUsed >= warnBytes && !warned) {
59
+ warned = true;
60
+ const msg = `Memory warning: heap ${(heapUsed / 1024 / 1024).toFixed(0)}MB / rss ${(rss / 1024 / 1024).toFixed(0)}MB (critical at ${criticalHeapMb}MB)`;
61
+ if (onWarn) onWarn(msg);
62
+ else process.stderr.write(`[karajan-mcp] ${msg}\n`);
63
+ } else if (heapUsed < warnBytes) {
64
+ warned = false;
65
+ }
66
+ }, intervalMs);
67
+ timer.unref();
68
+
69
+ return { timer };
70
+ }
71
+
25
72
  export function setupVersionWatcher({ pkgPath, currentVersion, exitFn = () => process.exit(0) } = {}) {
26
73
  if (!pkgPath) return null;
27
74
 
@@ -239,6 +239,12 @@ export async function handleRunDirect(a, server, extra) {
239
239
  await assertNotOnBaseBranch(config);
240
240
  const logger = createLogger(config.output.log_level, "mcp");
241
241
 
242
+ // Best-effort session cleanup before starting
243
+ try {
244
+ const { cleanupExpiredSessions } = await import("../session-cleanup.js");
245
+ await cleanupExpiredSessions({ logger });
246
+ } catch { /* non-blocking */ }
247
+
242
248
  const requiredProviders = [
243
249
  resolveRole(config, "coder").provider,
244
250
  config.reviewer_options?.fallback_reviewer
@@ -287,22 +293,36 @@ export async function handleResumeDirect(a, server, extra) {
287
293
  const config = await buildConfig(a);
288
294
  const logger = createLogger(config.output.log_level, "mcp");
289
295
 
296
+ const projectDir = await resolveProjectDir(server);
297
+ const runLog = createRunLog(projectDir);
298
+ runLog.logText(`[kj_resume] started — session="${a.sessionId}"`);
299
+
290
300
  const emitter = new EventEmitter();
291
301
  emitter.on("progress", buildProgressHandler(server));
302
+ emitter.on("progress", (event) => runLog.logEvent(event));
292
303
  const progressNotifier = buildProgressNotifier(extra);
293
304
  if (progressNotifier) emitter.on("progress", progressNotifier);
294
305
 
295
306
  const askQuestion = buildAskQuestion(server);
296
- const result = await resumeFlow({
297
- sessionId: a.sessionId,
298
- answer: a.answer || null,
299
- config,
300
- logger,
301
- flags: a,
302
- emitter,
303
- askQuestion
304
- });
305
- return { ok: true, ...result };
307
+ try {
308
+ const result = await resumeFlow({
309
+ sessionId: a.sessionId,
310
+ answer: a.answer || null,
311
+ config,
312
+ logger,
313
+ flags: a,
314
+ emitter,
315
+ askQuestion
316
+ });
317
+ const ok = !result.paused && (result.approved !== false);
318
+ runLog.logText(`[kj_resume] finished — ok=${ok}`);
319
+ return { ok, ...result };
320
+ } catch (err) {
321
+ runLog.logText(`[kj_resume] failed: ${err.message}`);
322
+ throw err;
323
+ } finally {
324
+ runLog.close();
325
+ }
306
326
  }
307
327
 
308
328
  function buildDirectEmitter(server, runLog, extra) {
package/src/mcp/server.js CHANGED
@@ -50,9 +50,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
50
50
  });
51
51
 
52
52
  // --- Orphan process protection + version watcher ---
53
- import { setupOrphanGuard, setupVersionWatcher } from "./orphan-guard.js";
53
+ import { setupOrphanGuard, setupVersionWatcher, setupMemoryWatchdog } from "./orphan-guard.js";
54
54
  setupOrphanGuard();
55
55
  setupVersionWatcher({ pkgPath: PKG_PATH, currentVersion: LOADED_VERSION });
56
+ setupMemoryWatchdog();
56
57
 
57
58
  const transport = new StdioServerTransport();
58
59
  await mcpServer.connect(transport);
@@ -958,14 +958,45 @@ async function handleApprovedReview({ config, session, emitter, eventBase, coder
958
958
  return { action: "return", result };
959
959
  }
960
960
 
961
- async function handleMaxIterationsReached({ session, budgetSummary, emitter, eventBase, config, stageResults }) {
961
+ async function handleMaxIterationsReached({ session, budgetSummary, emitter, eventBase, config, stageResults, logger, askQuestion, task }) {
962
+ // Escalate to Solomon / human before giving up
963
+ const solomonResult = await invokeSolomon({
964
+ config, logger, emitter, eventBase, stage: "max_iterations", askQuestion, session,
965
+ iteration: config.max_iterations,
966
+ conflict: {
967
+ stage: "max_iterations",
968
+ task,
969
+ iterationCount: config.max_iterations,
970
+ maxIterations: config.max_iterations,
971
+ history: [{ agent: "pipeline", feedback: session.last_reviewer_feedback || "Max iterations reached without reviewer approval" }]
972
+ }
973
+ });
974
+
975
+ if (solomonResult.action === "continue") {
976
+ if (solomonResult.humanGuidance) {
977
+ session.last_reviewer_feedback = `User guidance: ${solomonResult.humanGuidance}`;
978
+ }
979
+ session.reviewer_retry_count = 0;
980
+ await saveSession(session);
981
+ return { approved: false, sessionId: session.id, reason: "max_iterations_extended", humanGuidance: solomonResult.humanGuidance };
982
+ }
983
+
984
+ if (solomonResult.action === "pause") {
985
+ return { paused: true, sessionId: session.id, question: solomonResult.question, context: "max_iterations" };
986
+ }
987
+
988
+ if (solomonResult.action === "subtask") {
989
+ return { paused: true, sessionId: session.id, subtask: solomonResult.subtask, context: "max_iterations_subtask" };
990
+ }
991
+
992
+ // Solomon also couldn't resolve — fail
962
993
  session.budget = budgetSummary();
963
994
  await markSessionStatus(session, "failed");
964
995
  emitProgress(
965
996
  emitter,
966
997
  makeEvent("session:end", { ...eventBase, stage: "done" }, {
967
998
  status: "fail",
968
- message: "Max iterations reached",
999
+ message: "Max iterations reached (Solomon could not resolve)",
969
1000
  detail: { approved: false, reason: "max_iterations", iterations: config.max_iterations, stages: stageResults, budget: budgetSummary() }
970
1001
  })
971
1002
  );
@@ -978,7 +1009,7 @@ async function initFlowContext({ task, config, logger, emitter, askQuestion, pgT
978
1009
  const refactorerRole = resolveRole(config, "refactorer");
979
1010
  const pipelineFlags = resolvePipelineFlags(config);
980
1011
  const repeatDetector = new RepeatDetector({ threshold: getRepeatThreshold(config) });
981
- const coderRoleInstance = new CoderRole({ config, logger, emitter, createAgentFn: createAgent });
1012
+ const coderRoleInstance = new CoderRole({ config, logger, emitter, createAgentFn: createAgent, askHost: askQuestion });
982
1013
  const startedAt = Date.now();
983
1014
  const eventBase = { sessionId: null, iteration: 0, stage: null, startedAt };
984
1015
  const { budgetTracker, budgetLimit, budgetSummary, trackBudget } = createBudgetManager({ config, emitter, eventBase });
@@ -1109,7 +1140,7 @@ export async function runFlow({ task, config, logger, flags = {}, emitter = null
1109
1140
  if (iterResult.action === "retry") { i -= 1; }
1110
1141
  }
1111
1142
 
1112
- return handleMaxIterationsReached({ session: ctx.session, budgetSummary: ctx.budgetSummary, emitter, eventBase: ctx.eventBase, config, stageResults: ctx.stageResults });
1143
+ return handleMaxIterationsReached({ session: ctx.session, budgetSummary: ctx.budgetSummary, emitter, eventBase: ctx.eventBase, config, stageResults: ctx.stageResults, logger, askQuestion, task });
1113
1144
  }
1114
1145
 
1115
1146
  export async function resumeFlow({ sessionId, answer, config, logger, flags = {}, emitter = null, askQuestion = null }) {
@@ -1162,5 +1193,10 @@ export async function resumeFlow({ sessionId, answer, config, logger, flags = {}
1162
1193
  await saveSession(session);
1163
1194
 
1164
1195
  // Re-run the flow with the existing session context
1165
- return runFlow({ task, config: sessionConfig, logger, flags, emitter, askQuestion });
1196
+ try {
1197
+ return await runFlow({ task, config: sessionConfig, logger, flags, emitter, askQuestion });
1198
+ } catch (err) {
1199
+ await markSessionStatus(session, "failed");
1200
+ throw err;
1201
+ }
1166
1202
  }
@@ -1,6 +1,8 @@
1
1
  import { BaseRole } from "./base-role.js";
2
2
  import { createAgent as defaultCreateAgent } from "../agents/index.js";
3
3
  import { buildCoderPrompt } from "../prompts/coder.js";
4
+ import { isHostAgent } from "../utils/agent-detect.js";
5
+ import { HostAgent } from "../agents/host-agent.js";
4
6
 
5
7
  function resolveProvider(config) {
6
8
  return (
@@ -11,9 +13,10 @@ function resolveProvider(config) {
11
13
  }
12
14
 
13
15
  export class CoderRole extends BaseRole {
14
- constructor({ config, logger, emitter = null, createAgentFn = null }) {
16
+ constructor({ config, logger, emitter = null, createAgentFn = null, askHost = null }) {
15
17
  super({ name: "coder", config, logger, emitter });
16
18
  this._createAgent = createAgentFn || defaultCreateAgent;
19
+ this._askHost = askHost;
17
20
  }
18
21
 
19
22
  async execute(input) {
@@ -22,7 +25,14 @@ export class CoderRole extends BaseRole {
22
25
  : input || {};
23
26
 
24
27
  const provider = resolveProvider(this.config);
25
- const agent = this._createAgent(provider, this.config, this.logger);
28
+ const useHost = this._askHost && isHostAgent(provider);
29
+ const agent = useHost
30
+ ? new HostAgent(this.config, this.logger, { askHost: this._askHost })
31
+ : this._createAgent(provider, this.config, this.logger);
32
+
33
+ if (useHost) {
34
+ this.logger.info(`Host-as-coder: delegating to host AI (skipping ${provider} subprocess)`);
35
+ }
26
36
 
27
37
  const prompt = buildCoderPrompt({
28
38
  task: task || this.context?.task || "",
@@ -1,48 +1,72 @@
1
1
  /**
2
2
  * Automatic cleanup of expired sessions.
3
- * Removes session directories older than session.expiry_days (default: 30).
3
+ *
4
+ * Policy (by status):
5
+ * - failed / stopped: removed after 1 day
6
+ * - approved: removed after 7 days
7
+ * - running (stale): marked failed + removed after 1 day (crash without cleanup)
8
+ * - paused: kept (user may want to resume)
9
+ *
10
+ * Runs automatically at the start of every kj_run (best-effort, non-blocking).
4
11
  */
5
12
 
6
13
  import fs from "node:fs/promises";
7
14
  import path from "node:path";
8
15
  import { getSessionRoot } from "./utils/paths.js";
9
16
 
10
- const DEFAULT_EXPIRY_DAYS = 30;
17
+ const ONE_DAY_MS = 24 * 60 * 60 * 1000;
11
18
 
12
- async function tryRemoveOrphan({ sessionDir, dirName, cutoff, removed, errors, logger }) {
13
- const stat = await fs.stat(sessionDir).catch(() => null);
14
- if (!stat || stat.mtimeMs >= cutoff) return;
15
- try {
16
- await fs.rm(sessionDir, { recursive: true, force: true });
17
- removed.push(dirName);
18
- logger?.debug?.(`Orphan session dir removed: ${dirName}`);
19
- } catch (error_) {
20
- errors.push({ session: dirName, error: error_.message });
21
- }
19
+ const POLICY = {
20
+ failed: { expiryMs: ONE_DAY_MS },
21
+ stopped: { expiryMs: ONE_DAY_MS },
22
+ running: { expiryMs: ONE_DAY_MS }, // stale — crashed without marking failed
23
+ approved: { expiryMs: 7 * ONE_DAY_MS },
24
+ paused: null // never auto-delete
25
+ };
26
+
27
+ function shouldRemove(session) {
28
+ const status = session.status || "unknown";
29
+ const policy = POLICY[status];
30
+ if (!policy) return false;
31
+
32
+ const updatedAt = new Date(session.updated_at || session.created_at).getTime();
33
+ return Date.now() - updatedAt > policy.expiryMs;
22
34
  }
23
35
 
24
- async function tryCleanupSession({ sessionDir, dirName, cutoff, removed, errors, logger }) {
36
+ async function tryCleanupSession({ sessionDir, dirName, removed, errors, logger }) {
25
37
  const sessionFile = path.join(sessionDir, "session.json");
38
+ let session;
26
39
  try {
27
40
  const raw = await fs.readFile(sessionFile, "utf8");
28
- const session = JSON.parse(raw);
29
- const updatedAt = new Date(session.updated_at || session.created_at).getTime();
30
- if (updatedAt < cutoff) {
31
- await fs.rm(sessionDir, { recursive: true, force: true });
32
- removed.push(dirName);
33
- logger?.debug?.(`Session expired and removed: ${dirName}`);
34
- }
41
+ session = JSON.parse(raw);
35
42
  } catch {
36
- await tryRemoveOrphan({ sessionDir, dirName, cutoff, removed, errors, logger });
43
+ // Orphan dir without valid session.json remove if older than 1 day
44
+ const stat = await fs.stat(sessionDir).catch(() => null);
45
+ if (stat && Date.now() - stat.mtimeMs > ONE_DAY_MS) {
46
+ try {
47
+ await fs.rm(sessionDir, { recursive: true, force: true });
48
+ removed.push(dirName);
49
+ logger?.debug?.(`Orphan session dir removed: ${dirName}`);
50
+ } catch (err) {
51
+ errors.push({ session: dirName, error: err.message });
52
+ }
53
+ }
54
+ return;
37
55
  }
38
- }
39
56
 
40
- export async function cleanupExpiredSessions({ config, logger } = {}) {
41
- const expiryDays = config?.session?.expiry_days ?? DEFAULT_EXPIRY_DAYS;
42
- if (expiryDays <= 0) return { removed: 0, errors: [] };
57
+ if (!shouldRemove(session)) return;
58
+
59
+ try {
60
+ await fs.rm(sessionDir, { recursive: true, force: true });
61
+ removed.push(dirName);
62
+ logger?.debug?.(`Session cleaned up: ${dirName} (status: ${session.status})`);
63
+ } catch (err) {
64
+ errors.push({ session: dirName, error: err.message });
65
+ }
66
+ }
43
67
 
68
+ export async function cleanupExpiredSessions({ logger } = {}) {
44
69
  const sessionRoot = getSessionRoot();
45
- const cutoff = Date.now() - expiryDays * 24 * 60 * 60 * 1000;
46
70
 
47
71
  let entries;
48
72
  try {
@@ -57,7 +81,7 @@ export async function cleanupExpiredSessions({ config, logger } = {}) {
57
81
 
58
82
  for (const dir of dirs) {
59
83
  const sessionDir = path.join(sessionRoot, dir.name);
60
- await tryCleanupSession({ sessionDir, dirName: dir.name, cutoff, removed, errors, logger });
84
+ await tryCleanupSession({ sessionDir, dirName: dir.name, removed, errors, logger });
61
85
  }
62
86
 
63
87
  if (removed.length > 0) {
@@ -83,8 +83,9 @@ export async function loadMostRecentSession() {
83
83
 
84
84
  export async function resumeSessionWithAnswer(sessionId, answer) {
85
85
  const session = await loadSession(sessionId);
86
- if (session.status !== "paused") {
87
- throw new Error(`Session ${sessionId} is not paused (status: ${session.status})`);
86
+ const resumable = new Set(["paused", "running", "failed", "stopped"]);
87
+ if (!resumable.has(session.status)) {
88
+ throw new Error(`Session ${sessionId} cannot be resumed (status: ${session.status})`);
88
89
  }
89
90
  const pausedState = session.paused_state;
90
91
  if (!pausedState) {
@@ -30,4 +30,25 @@ export async function detectAvailableAgents() {
30
30
  return results;
31
31
  }
32
32
 
33
+ /**
34
+ * Detect which AI agent is the current MCP host (if any).
35
+ * Returns the agent name ("claude", "codex", etc.) or null if not inside an agent.
36
+ */
37
+ export function detectHostAgent() {
38
+ if (process.env.CLAUDECODE === "1" || process.env.CLAUDE_CODE === "1") return "claude";
39
+ if (process.env.CODEX_CLI === "1" || process.env.CODEX === "1") return "codex";
40
+ if (process.env.GEMINI_CLI === "1") return "gemini";
41
+ if (process.env.OPENCODE === "1") return "opencode";
42
+ return null;
43
+ }
44
+
45
+ /**
46
+ * Check if a given provider matches the current host agent.
47
+ * When true, we can skip subprocess spawning and delegate to the host.
48
+ */
49
+ export function isHostAgent(provider) {
50
+ const host = detectHostAgent();
51
+ return host !== null && host === provider;
52
+ }
53
+
33
54
  export { KNOWN_AGENTS };
@@ -0,0 +1,45 @@
1
+ # kj-architect — Architecture Design
2
+
3
+ Analyze the task and propose an architecture before implementation.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## Steps
10
+
11
+ 1. Read the task and understand the requirements
12
+ 2. Explore the existing codebase structure (`ls`, `find`, read key files)
13
+ 3. Identify the appropriate architectural approach
14
+ 4. Propose a design with tradeoffs
15
+
16
+ ## What to deliver
17
+
18
+ ### Architecture overview
19
+ - Architecture type (layered, hexagonal, event-driven, etc.)
20
+ - Key components/layers and their responsibilities
21
+ - Data flow between components
22
+
23
+ ### API contracts (if applicable)
24
+ - Endpoints with method, path, request/response schema
25
+ - Error handling strategy
26
+
27
+ ### Data model changes (if applicable)
28
+ - New entities/collections
29
+ - Modified fields
30
+ - Migration strategy
31
+
32
+ ### Tradeoffs
33
+ - For each design decision: what was chosen, why, and what alternatives were considered
34
+ - Constraints that influenced the design
35
+
36
+ ### Clarification questions
37
+ - Any ambiguities that could affect the architecture
38
+ - Decisions that need stakeholder input
39
+
40
+ ## Constraints
41
+
42
+ - Follow existing patterns in the codebase — don't introduce a new architecture without justification
43
+ - Keep it simple — the right amount of complexity is the minimum needed
44
+ - Consider testability in every design decision
45
+ - Do NOT start coding — this is design only
@@ -0,0 +1,51 @@
1
+ # kj-code — Coder with Guardrails
2
+
3
+ Implement the task with TDD methodology and built-in quality checks.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## Methodology
10
+
11
+ 1. **Tests first**: Write or update tests BEFORE implementation
12
+ 2. **Implement**: Write minimal, focused code to pass the tests
13
+ 3. **Verify**: Run the test suite (`npm test` or project equivalent)
14
+ 4. **Check diff**: Run `git diff` and verify ONLY intended lines changed
15
+
16
+ ## Guardrails (MANDATORY)
17
+
18
+ After writing code, verify ALL of these before reporting done:
19
+
20
+ ### Security check
21
+ - [ ] No hardcoded credentials, API keys, or secrets in the diff
22
+ - [ ] No `eval()`, `innerHTML` with user input, or SQL string concatenation
23
+ - [ ] User input is validated/sanitized at system boundaries
24
+
25
+ ### Destructive operation check
26
+ - [ ] No `rm -rf /`, `DROP TABLE`, `git push --force`, or similar in the diff
27
+ - [ ] No `fs.rmSync` or `fs.rm` on paths derived from user input
28
+ - [ ] No `process.exit()` in library code
29
+
30
+ ### Performance check
31
+ - [ ] No synchronous file I/O (`readFileSync`, `writeFileSync`) in request handlers
32
+ - [ ] No `document.write()` or layout thrashing patterns
33
+ - [ ] No unbounded loops or missing pagination
34
+
35
+ ### TDD check
36
+ - [ ] Source changes have corresponding test changes
37
+ - [ ] Tests actually run and pass
38
+
39
+ ## File modification safety
40
+
41
+ - NEVER overwrite existing files entirely — make targeted edits
42
+ - After each edit, verify with `git diff` that ONLY intended lines changed
43
+ - If unintended changes detected, revert immediately with `git checkout -- <file>`
44
+
45
+ ## Completeness check
46
+
47
+ Before reporting done:
48
+ - Re-read the task description
49
+ - Check every requirement is addressed
50
+ - Run the test suite
51
+ - Verify no regressions
@@ -0,0 +1,24 @@
1
+ # kj-discover — Gap Detection
2
+
3
+ Analyze the task for gaps, ambiguities, and missing information BEFORE coding.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## What to do
10
+
11
+ 1. Read the task description carefully
12
+ 2. Identify gaps: missing requirements, implicit assumptions, ambiguities, contradictions
13
+ 3. Classify each gap: **critical** (blocks implementation), **major** (risks rework), **minor** (reasonable default exists)
14
+ 4. For each gap, suggest a specific question to resolve it
15
+ 5. Give a verdict: **ready** (no gaps) or **needs_validation** (gaps found)
16
+
17
+ ## Output
18
+
19
+ Present findings clearly:
20
+ - List each gap with severity and suggested question
21
+ - Give your verdict at the end
22
+ - If ready, say so and suggest proceeding to implementation
23
+
24
+ Do NOT start coding. This is analysis only.
@@ -0,0 +1,47 @@
1
+ # kj-review — Code Review with Quality Gates
2
+
3
+ Review the current changes against task requirements and quality standards.
4
+
5
+ ## Your task
6
+
7
+ Review the changes in the current branch: $ARGUMENTS
8
+
9
+ ## Steps
10
+
11
+ 1. Run `git diff main...HEAD` (or appropriate base branch) to see all changes
12
+ 2. Review each changed file against the priorities below
13
+ 3. Report findings clearly
14
+
15
+ ## Review priorities (in order)
16
+
17
+ 1. **Security** — vulnerabilities, exposed secrets, injection vectors
18
+ 2. **Correctness** — logic errors, edge cases, broken tests
19
+ 3. **Tests** — adequate coverage, meaningful assertions
20
+ 4. **Architecture** — patterns, maintainability, SOLID principles
21
+ 5. **Style** — naming, formatting (only flag if egregious)
22
+
23
+ ## Scope constraint
24
+
25
+ - **ONLY review files present in the diff** — do not flag issues in untouched files
26
+ - Out-of-scope issues go as suggestions, never as blocking
27
+
28
+ ## Guardrails (auto-check)
29
+
30
+ Flag as BLOCKING if any of these are detected in the diff:
31
+ - [ ] Hardcoded credentials, API keys, or secrets
32
+ - [ ] Entire file replaced (massive deletions + additions instead of targeted edits)
33
+ - [ ] `eval()`, `innerHTML` with user input, SQL string concatenation
34
+ - [ ] Missing test changes when source files changed (TDD violation)
35
+ - [ ] `rm -rf`, `DROP TABLE`, `git push --force` or similar destructive operations
36
+
37
+ ## Output
38
+
39
+ For each issue found:
40
+ - **File and line** where the issue is
41
+ - **Severity**: critical / major / minor
42
+ - **Description**: what's wrong
43
+ - **Suggested fix**: how to fix it
44
+
45
+ End with a clear verdict:
46
+ - **APPROVED** — no blocking issues found
47
+ - **REQUEST_CHANGES** — blocking issues listed above must be fixed
@@ -0,0 +1,69 @@
1
+ # kj-run — Full Pipeline (Skills Mode)
2
+
3
+ Execute the complete Karajan pipeline as sequential skills.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## Pipeline steps (execute in order)
10
+
11
+ ### Step 1 — Discover (optional but recommended)
12
+ Analyze the task for gaps before coding:
13
+ - Identify missing requirements, ambiguities, contradictions
14
+ - If critical gaps found, STOP and ask the user before proceeding
15
+ - If ready, continue
16
+
17
+ ### Step 2 — Code (with guardrails)
18
+ Implement the task:
19
+ 1. **Tests first** (TDD): write/update tests before implementation
20
+ 2. **Implement**: minimal, focused code to fulfill the task
21
+ 3. **Verify**: run the test suite
22
+ 4. **Security check**: no hardcoded secrets, no injection vectors, no destructive ops in the diff
23
+ 5. **Diff check**: run `git diff` and verify only intended lines changed
24
+ 6. If any guardrail fails, fix before proceeding
25
+
26
+ ### Step 3 — Review (self-review)
27
+ Review your own changes against quality standards:
28
+ 1. Run `git diff main...HEAD` (or base branch)
29
+ 2. Check: security, correctness, tests, architecture, style (in that order)
30
+ 3. Flag blocking issues:
31
+ - Hardcoded credentials or secrets
32
+ - Entire files overwritten instead of targeted edits
33
+ - Missing tests for new code
34
+ - SQL injection, XSS, command injection
35
+ - Destructive operations
36
+ 4. If blocking issues found, fix them and re-review
37
+ 5. If clean, proceed
38
+
39
+ ### Step 4 — Test audit
40
+ Verify test quality:
41
+ 1. Every changed source file has corresponding tests
42
+ 2. Run `npm test` (or equivalent) — all must pass
43
+ 3. No skipped tests for changed code
44
+ 4. If tests fail, fix before proceeding
45
+
46
+ ### Step 5 — Security scan
47
+ Quick security audit on the diff:
48
+ 1. Scan for OWASP top 10 in changed files
49
+ 2. Check for leaked secrets, injection vectors, missing auth
50
+ 3. If critical/high findings, fix before proceeding
51
+
52
+ ### Step 6 — Sonar (if available)
53
+ If SonarQube is running (`docker ps | grep sonarqube`):
54
+ 1. Run `npx @sonar/scan`
55
+ 2. Check quality gate
56
+ 3. Fix blockers and critical issues
57
+
58
+ ### Step 7 — Commit
59
+ If all steps pass:
60
+ 1. Stage changed files: `git add <specific files>`
61
+ 2. Commit with conventional commit message: `feat:`, `fix:`, `refactor:`, etc.
62
+ 3. Do NOT push unless the user explicitly asks
63
+
64
+ ## Important rules
65
+
66
+ - **Never skip steps** — execute all applicable steps in order
67
+ - **Fix before proceeding** — if a step finds issues, fix them before moving to the next
68
+ - **Report progress** — after each step, briefly state what was done and the result
69
+ - **Stop on critical** — if a critical security or correctness issue can't be fixed, stop and report
@@ -0,0 +1,49 @@
1
+ # kj-security — Security Audit
2
+
3
+ Perform a security audit on the current changes.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## Steps
10
+
11
+ 1. Run `git diff main...HEAD` to see all changes
12
+ 2. Scan for each vulnerability category below
13
+ 3. Report findings with severity and remediation
14
+
15
+ ## Vulnerability categories
16
+
17
+ ### Critical
18
+ - [ ] Hardcoded secrets (API keys, passwords, tokens, connection strings)
19
+ - [ ] SQL injection (string concatenation in queries)
20
+ - [ ] Command injection (`exec`, `spawn` with unsanitized input)
21
+ - [ ] Path traversal (file operations with user-controlled paths)
22
+
23
+ ### High
24
+ - [ ] XSS (Cross-Site Scripting) — `innerHTML`, `dangerouslySetInnerHTML` with user input
25
+ - [ ] Missing authentication/authorization checks on new endpoints
26
+ - [ ] Insecure deserialization
27
+ - [ ] SSRF (Server-Side Request Forgery) — fetch/request with user-controlled URLs
28
+
29
+ ### Medium
30
+ - [ ] Missing input validation at system boundaries
31
+ - [ ] Verbose error messages that leak internal details
32
+ - [ ] Missing CSRF protection on state-changing endpoints
33
+ - [ ] Insecure random number generation for security purposes
34
+
35
+ ### Low
36
+ - [ ] Missing security headers
37
+ - [ ] Dependencies with known vulnerabilities (check `npm audit`)
38
+ - [ ] Console.log with sensitive data
39
+
40
+ ## Output
41
+
42
+ For each finding:
43
+ - **Severity**: critical / high / medium / low
44
+ - **File and line**: where the issue is
45
+ - **Category**: which vulnerability type
46
+ - **Description**: what's wrong
47
+ - **Remediation**: specific fix
48
+
49
+ End with a summary: total findings by severity, and whether the code is safe to ship.
@@ -0,0 +1,41 @@
1
+ # kj-sonar — Static Analysis
2
+
3
+ Run SonarQube/SonarCloud analysis and fix any issues found.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## Steps
10
+
11
+ 1. Check if SonarQube is running: `docker ps | grep sonarqube`
12
+ 2. If running, execute scan:
13
+ ```bash
14
+ npx @sonar/scan -Dsonar.host.url=http://localhost:9000 -Dsonar.projectKey=<project-key>
15
+ ```
16
+ 3. Check quality gate status:
17
+ ```bash
18
+ curl -s -u admin:admin "http://localhost:9000/api/qualitygates/project_status?projectKey=<project-key>"
19
+ ```
20
+ 4. List issues:
21
+ ```bash
22
+ curl -s -u admin:admin "http://localhost:9000/api/issues/search?projectKeys=<project-key>&statuses=OPEN&ps=50"
23
+ ```
24
+
25
+ ## If SonarQube is not available
26
+
27
+ Perform manual static analysis checks:
28
+ - [ ] Cognitive complexity — functions over 15 should be refactored
29
+ - [ ] Duplicated code blocks (3+ lines repeated)
30
+ - [ ] Unused imports and variables
31
+ - [ ] Empty catch blocks without comments
32
+ - [ ] Nested ternary operations
33
+ - [ ] `console.log` left in production code
34
+
35
+ ## Output
36
+
37
+ Report:
38
+ - Quality gate status (passed/failed)
39
+ - Issues found by severity (blocker, critical, major, minor)
40
+ - For each issue: file, line, rule, and suggested fix
41
+ - Fix critical and blocker issues before proceeding
@@ -0,0 +1,40 @@
1
+ # kj-test — Test Quality Audit
2
+
3
+ Evaluate test coverage and quality for the current changes.
4
+
5
+ ## Your task
6
+
7
+ $ARGUMENTS
8
+
9
+ ## Steps
10
+
11
+ 1. Run `git diff main...HEAD` to identify changed source files
12
+ 2. For each changed source file, find the corresponding test file
13
+ 3. Run the test suite and check results
14
+ 4. Evaluate test quality
15
+
16
+ ## Checks
17
+
18
+ ### Coverage
19
+ - [ ] Every changed source file has a corresponding test file
20
+ - [ ] New functions/methods have at least one test
21
+ - [ ] Edge cases are covered (null, empty, boundary values)
22
+
23
+ ### Quality
24
+ - [ ] Tests have meaningful assertions (not just "no error thrown")
25
+ - [ ] Test descriptions clearly state what is being tested
26
+ - [ ] No tests that always pass (e.g., empty test body, `expect(true).toBe(true)`)
27
+ - [ ] Mocks are minimal — prefer real implementations where feasible
28
+
29
+ ### Execution
30
+ - [ ] Run `npm test` (or project equivalent) and report results
31
+ - [ ] All tests pass
32
+ - [ ] No skipped tests (`.skip`) for the changed code
33
+
34
+ ## Output
35
+
36
+ Report:
37
+ - Test files found/missing for each changed source file
38
+ - Test execution results (pass/fail count)
39
+ - Quality issues found
40
+ - Suggestions for improving coverage