@calltelemetry/openclaw-linear 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +834 -536
  2. package/index.ts +1 -1
  3. package/openclaw.plugin.json +3 -2
  4. package/package.json +1 -1
  5. package/prompts.yaml +46 -6
  6. package/src/__test__/fixtures/linear-responses.ts +75 -0
  7. package/src/__test__/fixtures/webhook-payloads.ts +113 -0
  8. package/src/__test__/helpers.ts +133 -0
  9. package/src/agent/agent.test.ts +192 -0
  10. package/src/agent/agent.ts +26 -1
  11. package/src/api/linear-api.test.ts +93 -1
  12. package/src/api/linear-api.ts +37 -1
  13. package/src/gateway/dispatch-methods.test.ts +409 -0
  14. package/src/infra/cli.ts +176 -1
  15. package/src/infra/commands.test.ts +276 -0
  16. package/src/infra/doctor.test.ts +19 -0
  17. package/src/infra/doctor.ts +30 -25
  18. package/src/infra/multi-repo.test.ts +163 -0
  19. package/src/infra/multi-repo.ts +29 -0
  20. package/src/infra/notify.test.ts +155 -16
  21. package/src/infra/notify.ts +26 -15
  22. package/src/infra/observability.test.ts +85 -0
  23. package/src/pipeline/artifacts.test.ts +26 -3
  24. package/src/pipeline/dispatch-state.ts +1 -0
  25. package/src/pipeline/e2e-dispatch.test.ts +584 -0
  26. package/src/pipeline/e2e-planning.test.ts +478 -0
  27. package/src/pipeline/intent-classify.test.ts +285 -0
  28. package/src/pipeline/intent-classify.ts +259 -0
  29. package/src/pipeline/pipeline.test.ts +69 -0
  30. package/src/pipeline/pipeline.ts +47 -18
  31. package/src/pipeline/planner.test.ts +159 -40
  32. package/src/pipeline/planner.ts +108 -60
  33. package/src/pipeline/tier-assess.test.ts +89 -0
  34. package/src/pipeline/webhook.ts +424 -251
  35. package/src/tools/claude-tool.ts +6 -0
  36. package/src/tools/cli-shared.test.ts +155 -0
  37. package/src/tools/code-tool.test.ts +210 -0
  38. package/src/tools/code-tool.ts +2 -2
  39. package/src/tools/dispatch-history-tool.test.ts +315 -0
  40. package/src/tools/planner-tools.test.ts +1 -1
  41. package/src/tools/planner-tools.ts +10 -2
@@ -60,15 +60,15 @@ interface PromptTemplates {
60
60
 
61
61
  const DEFAULT_PROMPTS: PromptTemplates = {
62
62
  worker: {
63
- system: "You are implementing a Linear issue. Post an implementation summary as a Linear comment when done. DO NOT mark the issue as Done.",
64
- task: "Implement issue {{identifier}}: {{title}}\n\nIssue body:\n{{description}}\n\nWorktree: {{worktreePath}}",
63
+ system: "You are a coding worker implementing a Linear issue. Your ONLY job is to write code and return a text summary. Do NOT attempt to update, close, comment on, or modify the Linear issue. Do NOT mark the issue as Done.",
64
+ task: "Implement issue {{identifier}}: {{title}}\n\nIssue body:\n{{description}}\n\nWorktree: {{worktreePath}}\n\nImplement the solution, run tests, commit your work, and return a text summary.",
65
65
  },
66
66
  audit: {
67
67
  system: "You are an independent auditor. The Linear issue body is the SOURCE OF TRUTH. Worker comments are secondary evidence.",
68
68
  task: 'Audit issue {{identifier}}: {{title}}\n\nIssue body:\n{{description}}\n\nWorktree: {{worktreePath}}\n\nReturn JSON verdict: {"pass": true/false, "criteria": [...], "gaps": [...], "testResults": "..."}',
69
69
  },
70
70
  rework: {
71
- addendum: "PREVIOUS AUDIT FAILED (attempt {{attempt}}). Gaps:\n{{gaps}}\n\nAddress these specific issues.",
71
+ addendum: "PREVIOUS AUDIT FAILED (attempt {{attempt}}). Gaps:\n{{gaps}}\n\nAddress these specific issues. Preserve correct code from prior attempts.",
72
72
  },
73
73
  };
74
74
 
@@ -88,12 +88,11 @@ function mergePromptLayers(base: PromptTemplates, overlay: Partial<PromptTemplat
88
88
  }
89
89
 
90
90
  /**
91
- * Load global prompts (layers 1+2: hardcoded defaults + global promptsPath override).
92
- * Cached after first load.
91
+ * Load and parse the raw prompts YAML file (global promptsPath or sidecar).
92
+ * Returns the parsed object, or null if no file found.
93
+ * Shared by both pipeline and planner prompt loaders.
93
94
  */
94
- function loadGlobalPrompts(pluginConfig?: Record<string, unknown>): PromptTemplates {
95
- if (_cachedGlobalPrompts) return _cachedGlobalPrompts;
96
-
95
+ export function loadRawPromptYaml(pluginConfig?: Record<string, unknown>): Record<string, any> | null {
97
96
  try {
98
97
  const customPath = pluginConfig?.promptsPath as string | undefined;
99
98
  let raw: string;
@@ -108,9 +107,23 @@ function loadGlobalPrompts(pluginConfig?: Record<string, unknown>): PromptTempla
108
107
  raw = readFileSync(join(pluginRoot, "prompts.yaml"), "utf-8");
109
108
  }
110
109
 
111
- const parsed = parseYaml(raw) as Partial<PromptTemplates>;
112
- _cachedGlobalPrompts = mergePromptLayers(DEFAULT_PROMPTS, parsed);
110
+ return parseYaml(raw) as Record<string, any>;
113
111
  } catch {
112
+ return null;
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Load global prompts (layers 1+2: hardcoded defaults + global promptsPath override).
118
+ * Cached after first load.
119
+ */
120
+ function loadGlobalPrompts(pluginConfig?: Record<string, unknown>): PromptTemplates {
121
+ if (_cachedGlobalPrompts) return _cachedGlobalPrompts;
122
+
123
+ const parsed = loadRawPromptYaml(pluginConfig);
124
+ if (parsed) {
125
+ _cachedGlobalPrompts = mergePromptLayers(DEFAULT_PROMPTS, parsed as Partial<PromptTemplates>);
126
+ } else {
114
127
  _cachedGlobalPrompts = DEFAULT_PROMPTS;
115
128
  }
116
129
 
@@ -188,7 +201,7 @@ export function buildWorkerTask(
188
201
  worktreePath,
189
202
  tier: "",
190
203
  attempt: String(opts?.attempt ?? 0),
191
- gaps: opts?.gaps?.join("\n- ") ?? "",
204
+ gaps: opts?.gaps?.length ? "- " + opts.gaps.join("\n- ") : "",
192
205
  };
193
206
 
194
207
  let task = renderTemplate(prompts.worker.task, vars);
@@ -332,7 +345,12 @@ export async function triggerAudit(
332
345
  };
333
346
 
334
347
  // Build audit prompt from YAML templates
335
- const auditPrompt = buildAuditTask(issue, dispatch.worktreePath, pluginConfig);
348
+ // For multi-repo dispatches, render worktreePath as a list of repo→path mappings
349
+ const effectiveAuditPath = dispatch.worktrees
350
+ ? dispatch.worktrees.map(w => `${w.repoName}: ${w.path}`).join("\n")
351
+ : dispatch.worktreePath;
352
+
353
+ const auditPrompt = buildAuditTask(issue, effectiveAuditPath, pluginConfig);
336
354
 
337
355
  // Set Linear label
338
356
  await linearApi.emitActivity(dispatch.agentSessionId ?? "", {
@@ -444,6 +462,11 @@ export async function processVerdict(
444
462
  const verdict = parseVerdict(auditOutput);
445
463
  if (!verdict) {
446
464
  api.logger.warn(`${TAG} could not parse audit verdict from output (${auditOutput.length} chars)`);
465
+ // Post comment so user knows what happened
466
+ await linearApi.createComment(
467
+ dispatch.issueId,
468
+ `## Audit Inconclusive\n\nThe auditor's response couldn't be parsed as a verdict. **Retrying automatically** — this usually resolves itself.\n\n**If it keeps happening:** \`openclaw openclaw-linear prompts validate\`\n\n**Status:** Retrying audit now. No action needed.`,
469
+ ).catch((err) => api.logger.error(`${TAG} failed to post inconclusive comment: ${err}`));
447
470
  // Treat unparseable verdict as failure
448
471
  await handleAuditFail(hookCtx, dispatch, {
449
472
  pass: false,
@@ -527,7 +550,7 @@ async function handleAuditPass(
527
550
  const summaryExcerpt = summary ? `\n\n**Summary:**\n${summary.slice(0, 2000)}` : "";
528
551
  await linearApi.createComment(
529
552
  dispatch.issueId,
530
- `## Audit Passed\n\n**Criteria verified:**\n${criteriaList}\n\n**Tests:** ${verdict.testResults || "N/A"}${summaryExcerpt}\n\n---\n*Attempt ${dispatch.attempt + 1} audit passed. Artifacts: \`${dispatch.worktreePath}/.claw/\`*`,
553
+ `## Done\n\nThis issue has been implemented and verified.\n\n**What was checked:**\n${criteriaList}\n\n**Test results:** ${verdict.testResults || "N/A"}${summaryExcerpt}\n\n---\n*Completed on attempt ${dispatch.attempt + 1}.*\n\n**Next steps:**\n- Review the code: \`cd ${dispatch.worktreePath}\`\n- View artifacts: \`ls ${dispatch.worktreePath}/.claw/\`\n- Create a PR from the worktree branch if one wasn't opened automatically`,
531
554
  ).catch((err) => api.logger.error(`${TAG} failed to post audit pass comment: ${err}`));
532
555
 
533
556
  api.logger.info(`${TAG} audit PASSED — dispatch completed (attempt ${dispatch.attempt})`);
@@ -603,7 +626,7 @@ async function handleAuditFail(
603
626
  const gapsList = verdict.gaps.map((g) => `- ${g}`).join("\n");
604
627
  await linearApi.createComment(
605
628
  dispatch.issueId,
606
- `## Audit Failed — Escalating\n\n**Attempt ${nextAttempt} of ${maxAttempts + 1}**\n\n**Gaps:**\n${gapsList}\n\n**Tests:** ${verdict.testResults || "N/A"}\n\n---\n*Max rework attempts reached. Needs human review. Artifacts: \`${dispatch.worktreePath}/.claw/\`*`,
629
+ `## Needs Your Help\n\nAll ${nextAttempt} attempts failed. The agent couldn't resolve these issues on its own.\n\n**What went wrong:**\n${gapsList}\n\n**Test results:** ${verdict.testResults || "N/A"}\n\n---\n\n**What you can do:**\n1. **Clarify requirements** — update the issue body with more detail, then re-assign to try again\n2. **Fix it manually** — the agent's work is in the worktree: \`cd ${dispatch.worktreePath}\`\n3. **Force retry** — \`/dispatch retry ${dispatch.issueIdentifier}\`\n4. **View logs** — worker output: \`.claw/worker-*.md\`, audit verdicts: \`.claw/audit-*.json\``,
607
630
  ).catch((err) => api.logger.error(`${TAG} failed to post escalation comment: ${err}`));
608
631
 
609
632
  api.logger.warn(`${TAG} audit FAILED ${nextAttempt}x — escalating to human`);
@@ -647,7 +670,7 @@ async function handleAuditFail(
647
670
  const gapsList = verdict.gaps.map((g) => `- ${g}`).join("\n");
648
671
  await linearApi.createComment(
649
672
  dispatch.issueId,
650
- `## Audit FailedRework\n\n**Attempt ${nextAttempt} of ${maxAttempts + 1}**\n\n**Gaps:**\n${gapsList}\n\n**Tests:** ${verdict.testResults || "N/A"}\n\n---\n*Reworking: addressing gaps above.*`,
673
+ `## Needs More Work\n\nThe audit found gaps. **Retrying now** the worker gets the feedback below as context.\n\n**Attempt ${nextAttempt} of ${maxAttempts + 1}** — ${maxAttempts + 1 - nextAttempt > 0 ? `${maxAttempts + 1 - nextAttempt} more ${maxAttempts + 1 - nextAttempt === 1 ? "retry" : "retries"} if this fails too` : "this is the last attempt"}.\n\n**What needs fixing:**\n${gapsList}\n\n**Test results:** ${verdict.testResults || "N/A"}\n\n**Status:** Worker is restarting with the gaps above as context. No action needed unless all retries fail.`,
651
674
  ).catch((err) => api.logger.error(`${TAG} failed to post rework comment: ${err}`));
652
675
 
653
676
  api.logger.info(`${TAG} audit FAILED — rework attempt ${nextAttempt}/${maxAttempts + 1}`);
@@ -716,7 +739,12 @@ export async function spawnWorker(
716
739
  };
717
740
 
718
741
  // Build worker prompt from YAML templates
719
- const workerPrompt = buildWorkerTask(issue, dispatch.worktreePath, {
742
+ // For multi-repo dispatches, render worktreePath as a list of repo→path mappings
743
+ const effectiveWorkerPath = dispatch.worktrees
744
+ ? dispatch.worktrees.map(w => `${w.repoName}: ${w.path}`).join("\n")
745
+ : dispatch.worktreePath;
746
+
747
+ const workerPrompt = buildWorkerTask(issue, effectiveWorkerPath, {
720
748
  attempt: dispatch.attempt,
721
749
  gaps: opts?.gaps,
722
750
  pluginConfig,
@@ -798,8 +826,9 @@ export async function spawnWorker(
798
826
 
799
827
  await linearApi.createComment(
800
828
  dispatch.issueId,
801
- `## Watchdog Kill\n\nAgent killed by inactivity watchdog (no I/O for ${thresholdSec}s). ` +
802
- `Automatic retry also failed.\n\n---\n*Needs human review. Artifacts: \`${dispatch.worktreePath}/.claw/\`*`,
829
+ `## Agent Timed Out\n\nThe agent stopped responding for over ${thresholdSec}s. It was automatically restarted, but the retry also timed out.\n\n` +
830
+ `**What you can do:**\n1. **Try again** re-assign this issue or \`/dispatch retry ${dispatch.issueIdentifier}\`\n2. **Break it down** — if it keeps timing out, split into smaller issues\n3. **Increase timeout** — set \`inactivitySec\` higher in your agent profile\n\n` +
831
+ `**Logs:** \`${dispatch.worktreePath}/.claw/log.jsonl\` (look for \`"phase": "watchdog"\`)\n\n**Current status:** Stuck — waiting for you.`,
803
832
  ).catch(() => {});
804
833
 
805
834
  await hookCtx.notify("watchdog_kill", {
@@ -16,6 +16,17 @@ vi.mock("../api/linear-api.js", () => ({}));
16
16
 
17
17
  vi.mock("openclaw/plugin-sdk", () => ({}));
18
18
 
19
+ // Mock CLI tool runners for cross-model review
20
+ vi.mock("../tools/claude-tool.js", () => ({
21
+ runClaude: vi.fn().mockResolvedValue({ success: true, output: "Claude review feedback" }),
22
+ }));
23
+ vi.mock("../tools/codex-tool.js", () => ({
24
+ runCodex: vi.fn().mockResolvedValue({ success: true, output: "Codex review feedback" }),
25
+ }));
26
+ vi.mock("../tools/gemini-tool.js", () => ({
27
+ runGemini: vi.fn().mockResolvedValue({ success: true, output: "Gemini review feedback" }),
28
+ }));
29
+
19
30
  const mockLinearApi = {
20
31
  getProject: vi.fn().mockResolvedValue({
21
32
  id: "proj-1",
@@ -61,7 +72,13 @@ vi.mock("../tools/planner-tools.js", () => ({
61
72
  // Imports (AFTER mocks)
62
73
  // ---------------------------------------------------------------------------
63
74
 
64
- import { initiatePlanningSession, handlePlannerTurn, runPlanAudit } from "./planner.js";
75
+ import {
76
+ initiatePlanningSession,
77
+ handlePlannerTurn,
78
+ runPlanAudit,
79
+ runCrossModelReview,
80
+ resolveReviewModel,
81
+ } from "./planner.js";
65
82
  import {
66
83
  registerPlanningSession,
67
84
  updatePlanningSession,
@@ -73,6 +90,9 @@ import {
73
90
  clearActivePlannerContext,
74
91
  auditPlan,
75
92
  } from "../tools/planner-tools.js";
93
+ import { runClaude } from "../tools/claude-tool.js";
94
+ import { runCodex } from "../tools/codex-tool.js";
95
+ import { runGemini } from "../tools/gemini-tool.js";
76
96
 
77
97
  // ---------------------------------------------------------------------------
78
98
  // Helpers
@@ -230,47 +250,31 @@ describe("handlePlannerTurn", () => {
230
250
  );
231
251
  });
232
252
 
233
- it("detects finalize plan intent and triggers audit instead of regular turn", async () => {
234
- const ctx = createCtx();
235
- const session = createSession();
236
-
237
- await handlePlannerTurn(ctx, session, {
238
- issueId: "issue-1",
239
- commentBody: "finalize plan",
240
- commentorName: "Tester",
241
- });
253
+ // Note: finalize/abandon intent detection has moved to webhook.ts via
254
+ // intent-classify.ts. handlePlannerTurn is now a pure "continue planning"
255
+ // function that always runs the agent.
256
+ });
242
257
 
243
- // Audit path: auditPlan is called, runAgent is NOT called
244
- expect(auditPlan).toHaveBeenCalled();
245
- expect(runAgentMock).not.toHaveBeenCalled();
246
- });
258
+ // ---------------------------------------------------------------------------
259
+ // runPlanAudit
260
+ // ---------------------------------------------------------------------------
247
261
 
248
- it("detects abandon intent and ends session as abandoned", async () => {
262
+ describe("runPlanAudit", () => {
263
+ it("transitions to plan_review on passing audit", async () => {
264
+ vi.mocked(auditPlan).mockReturnValue({ pass: true, problems: [], warnings: [] });
249
265
  const ctx = createCtx();
250
266
  const session = createSession();
251
267
 
252
- await handlePlannerTurn(ctx, session, {
253
- issueId: "issue-1",
254
- commentBody: "abandon",
255
- commentorName: "Tester",
256
- });
268
+ await runPlanAudit(ctx, session);
257
269
 
258
- expect(endPlanningSession).toHaveBeenCalledWith(
270
+ expect(updatePlanningSession).toHaveBeenCalledWith(
259
271
  "proj-1",
260
- "abandoned",
272
+ { status: "plan_review" },
261
273
  undefined,
262
274
  );
263
- // Should NOT run the agent
264
- expect(runAgentMock).not.toHaveBeenCalled();
265
275
  });
266
- });
267
-
268
- // ---------------------------------------------------------------------------
269
- // runPlanAudit
270
- // ---------------------------------------------------------------------------
271
276
 
272
- describe("runPlanAudit", () => {
273
- it("posts success comment on passing audit", async () => {
277
+ it("posts 'Passed Checks' comment on passing audit", async () => {
274
278
  vi.mocked(auditPlan).mockReturnValue({ pass: true, problems: [], warnings: [] });
275
279
  const ctx = createCtx();
276
280
  const session = createSession();
@@ -279,24 +283,46 @@ describe("runPlanAudit", () => {
279
283
 
280
284
  expect(mockLinearApi.createComment).toHaveBeenCalledWith(
281
285
  "issue-1",
282
- expect.stringContaining("Approved"),
286
+ expect.stringContaining("Plan Passed Checks"),
283
287
  );
284
288
  });
285
289
 
286
- it("ends session as approved on pass", async () => {
290
+ it("runs cross-model review automatically on passing audit", async () => {
287
291
  vi.mocked(auditPlan).mockReturnValue({ pass: true, problems: [], warnings: [] });
288
292
  const ctx = createCtx();
289
293
  const session = createSession();
290
294
 
291
295
  await runPlanAudit(ctx, session);
292
296
 
293
- expect(endPlanningSession).toHaveBeenCalledWith(
294
- "proj-1",
295
- "approved",
296
- undefined,
297
+ // Default review model is "gemini" (since no primary model configured)
298
+ expect(runGemini).toHaveBeenCalled();
299
+ });
300
+
301
+ it("runs planner agent with review prompt including cross-model feedback", async () => {
302
+ vi.mocked(auditPlan).mockReturnValue({ pass: true, problems: [], warnings: [] });
303
+ const ctx = createCtx();
304
+ const session = createSession();
305
+
306
+ await runPlanAudit(ctx, session);
307
+
308
+ // Agent should run with a review prompt
309
+ expect(runAgentMock).toHaveBeenCalledWith(
310
+ expect.objectContaining({
311
+ message: expect.stringContaining("Plan Review"),
312
+ }),
297
313
  );
298
314
  });
299
315
 
316
+ it("does NOT end session as approved on passing audit (waits for user approval)", async () => {
317
+ vi.mocked(auditPlan).mockReturnValue({ pass: true, problems: [], warnings: [] });
318
+ const ctx = createCtx();
319
+ const session = createSession();
320
+
321
+ await runPlanAudit(ctx, session);
322
+
323
+ expect(endPlanningSession).not.toHaveBeenCalled();
324
+ });
325
+
300
326
  it("posts problems on failing audit", async () => {
301
327
  vi.mocked(auditPlan).mockReturnValue({
302
328
  pass: false,
@@ -314,7 +340,7 @@ describe("runPlanAudit", () => {
314
340
  );
315
341
  });
316
342
 
317
- it("does NOT end session as approved on fail", async () => {
343
+ it("does NOT transition to plan_review on failing audit", async () => {
318
344
  vi.mocked(auditPlan).mockReturnValue({
319
345
  pass: false,
320
346
  problems: ["No estimates"],
@@ -325,10 +351,103 @@ describe("runPlanAudit", () => {
325
351
 
326
352
  await runPlanAudit(ctx, session);
327
353
 
328
- expect(endPlanningSession).not.toHaveBeenCalledWith(
354
+ expect(updatePlanningSession).not.toHaveBeenCalledWith(
329
355
  "proj-1",
330
- "approved",
356
+ { status: "plan_review" },
331
357
  expect.anything(),
332
358
  );
333
359
  });
360
+
361
+ it("includes warnings in success comment when present", async () => {
362
+ vi.mocked(auditPlan).mockReturnValue({
363
+ pass: true,
364
+ problems: [],
365
+ warnings: ["PROJ-3 has no acceptance criteria"],
366
+ });
367
+ const ctx = createCtx();
368
+ const session = createSession();
369
+
370
+ await runPlanAudit(ctx, session);
371
+
372
+ expect(mockLinearApi.createComment).toHaveBeenCalledWith(
373
+ "issue-1",
374
+ expect.stringContaining("PROJ-3 has no acceptance criteria"),
375
+ );
376
+ });
377
+ });
378
+
379
+ // ---------------------------------------------------------------------------
380
+ // resolveReviewModel
381
+ // ---------------------------------------------------------------------------
382
+
383
+ describe("resolveReviewModel", () => {
384
+ it("returns 'codex' when primary model is claude-based", () => {
385
+ expect(resolveReviewModel({
386
+ agents: { defaults: { model: { primary: "anthropic/claude-sonnet-4" } } },
387
+ } as any)).toBe("codex");
388
+ });
389
+
390
+ it("returns 'gemini' when primary model is codex-based", () => {
391
+ expect(resolveReviewModel({
392
+ agents: { defaults: { model: { primary: "openai/codex-3" } } },
393
+ } as any)).toBe("gemini");
394
+ });
395
+
396
+ it("returns 'codex' when primary model is gemini-based", () => {
397
+ expect(resolveReviewModel({
398
+ agents: { defaults: { model: { primary: "google/gemini-2" } } },
399
+ } as any)).toBe("codex");
400
+ });
401
+
402
+ it("returns 'gemini' when no primary model configured", () => {
403
+ expect(resolveReviewModel({})).toBe("gemini");
404
+ });
405
+
406
+ it("respects explicit plannerReviewModel config override", () => {
407
+ expect(resolveReviewModel({
408
+ plannerReviewModel: "gemini",
409
+ agents: { defaults: { model: { primary: "anthropic/claude-sonnet-4" } } },
410
+ } as any)).toBe("gemini");
411
+ });
412
+ });
413
+
414
+ // ---------------------------------------------------------------------------
415
+ // runCrossModelReview
416
+ // ---------------------------------------------------------------------------
417
+
418
+ describe("runCrossModelReview", () => {
419
+ it("calls the correct CLI runner for the specified model", async () => {
420
+ const api = createApi();
421
+
422
+ await runCrossModelReview(api, "claude", "test snapshot");
423
+ expect(runClaude).toHaveBeenCalled();
424
+
425
+ vi.clearAllMocks();
426
+ await runCrossModelReview(api, "codex", "test snapshot");
427
+ expect(runCodex).toHaveBeenCalled();
428
+
429
+ vi.clearAllMocks();
430
+ await runCrossModelReview(api, "gemini", "test snapshot");
431
+ expect(runGemini).toHaveBeenCalled();
432
+ });
433
+
434
+ it("returns review output on success", async () => {
435
+ const api = createApi();
436
+ const result = await runCrossModelReview(api, "claude", "test snapshot");
437
+ expect(result).toBe("Claude review feedback");
438
+ });
439
+
440
+ it("returns graceful fallback on failure", async () => {
441
+ vi.mocked(runClaude).mockResolvedValueOnce({ success: false, error: "timeout" } as any);
442
+ const api = createApi();
443
+ const result = await runCrossModelReview(api, "claude", "test snapshot");
444
+ expect(result).toContain("review failed");
445
+ });
446
+
447
+ it("returns graceful fallback on exception", async () => {
448
+ vi.mocked(runClaude).mockRejectedValueOnce(new Error("network error"));
449
+ const api = createApi();
450
+ const result = await runCrossModelReview(api, "claude", "test snapshot");
451
+ expect(result).toContain("review unavailable");
452
+ });
334
453
  });
@@ -6,11 +6,8 @@
6
6
  * - handlePlannerTurn: processes each user comment during planning
7
7
  * - runPlanAudit: validates the plan before finalizing
8
8
  */
9
- import { readFileSync } from "node:fs";
10
- import { join, dirname } from "node:path";
11
- import { fileURLToPath } from "node:url";
12
- import { parse as parseYaml } from "yaml";
13
9
  import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
10
+ import { loadRawPromptYaml } from "./pipeline.js";
14
11
  import type { LinearAgentApi } from "../api/linear-api.js";
15
12
  import { runAgent } from "../agent/agent.js";
16
13
  import {
@@ -26,6 +23,9 @@ import {
26
23
  buildPlanSnapshot,
27
24
  auditPlan,
28
25
  } from "../tools/planner-tools.js";
26
+ import { runClaude } from "../tools/claude-tool.js";
27
+ import { runCodex } from "../tools/codex-tool.js";
28
+ import { runGemini } from "../tools/gemini-tool.js";
29
29
 
30
30
  // ---------------------------------------------------------------------------
31
31
  // Types
@@ -42,6 +42,7 @@ interface PlannerPrompts {
42
42
  interview: string;
43
43
  audit_prompt: string;
44
44
  welcome: string;
45
+ review: string;
45
46
  }
46
47
 
47
48
  // ---------------------------------------------------------------------------
@@ -54,32 +55,19 @@ function loadPlannerPrompts(pluginConfig?: Record<string, unknown>): PlannerProm
54
55
  interview: "Project: {{projectName}}\n\nPlan:\n{{planSnapshot}}\n\nUser said: {{userMessage}}\n\nContinue planning.",
55
56
  audit_prompt: "Run audit_plan for {{projectName}}.",
56
57
  welcome: "Entering planning mode for **{{projectName}}**. What are the main feature areas?",
58
+ review: "Plan for {{projectName}} passed checks. {{reviewModel}} recommends:\n{{crossModelFeedback}}\n\nReview and suggest changes, then invite the user to approve.",
57
59
  };
58
60
 
59
- try {
60
- const customPath = pluginConfig?.promptsPath as string | undefined;
61
- let raw: string;
62
-
63
- if (customPath) {
64
- const resolved = customPath.startsWith("~")
65
- ? customPath.replace("~", process.env.HOME ?? "")
66
- : customPath;
67
- raw = readFileSync(resolved, "utf-8");
68
- } else {
69
- const pluginRoot = join(dirname(fileURLToPath(import.meta.url)), "../..");
70
- raw = readFileSync(join(pluginRoot, "prompts.yaml"), "utf-8");
71
- }
72
-
73
- const parsed = parseYaml(raw) as any;
74
- if (parsed?.planner) {
75
- return {
76
- system: parsed.planner.system ?? defaults.system,
77
- interview: parsed.planner.interview ?? defaults.interview,
78
- audit_prompt: parsed.planner.audit_prompt ?? defaults.audit_prompt,
79
- welcome: parsed.planner.welcome ?? defaults.welcome,
80
- };
81
- }
82
- } catch { /* use defaults */ }
61
+ const parsed = loadRawPromptYaml(pluginConfig);
62
+ if (parsed?.planner) {
63
+ return {
64
+ system: parsed.planner.system ?? defaults.system,
65
+ interview: parsed.planner.interview ?? defaults.interview,
66
+ audit_prompt: parsed.planner.audit_prompt ?? defaults.audit_prompt,
67
+ welcome: parsed.planner.welcome ?? defaults.welcome,
68
+ review: parsed.planner.review ?? defaults.review,
69
+ };
70
+ }
83
71
 
84
72
  return defaults;
85
73
  }
@@ -142,35 +130,19 @@ export async function initiatePlanningSession(
142
130
  // Interview turn
143
131
  // ---------------------------------------------------------------------------
144
132
 
145
- const FINALIZE_PATTERN = /\b(finalize\s+plan|finalize|done\s+planning|approve\s+plan|plan\s+looks\s+good)\b/i;
146
- const ABANDON_PATTERN = /\b(abandon|cancel\s+planning|stop\s+planning|exit\s+planning)\b/i;
147
-
133
+ /**
134
+ * Handle a planning conversation turn. Intent detection (finalize/abandon)
135
+ * is handled by the webhook via intent-classify.ts before calling this function.
136
+ * This is a pure "continue planning" function.
137
+ */
148
138
  export async function handlePlannerTurn(
149
139
  ctx: PlannerContext,
150
140
  session: PlanningSession,
151
141
  input: { issueId: string; commentBody: string; commentorName: string },
152
- opts?: { onApproved?: (projectId: string) => void },
153
142
  ): Promise<void> {
154
143
  const { api, linearApi, pluginConfig } = ctx;
155
144
  const configPath = pluginConfig?.planningStatePath as string | undefined;
156
145
 
157
- // Detect finalization intent
158
- if (FINALIZE_PATTERN.test(input.commentBody)) {
159
- await runPlanAudit(ctx, session, { onApproved: opts?.onApproved });
160
- return;
161
- }
162
-
163
- // Detect abandon intent
164
- if (ABANDON_PATTERN.test(input.commentBody)) {
165
- await endPlanningSession(session.projectId, "abandoned", configPath);
166
- await linearApi.createComment(
167
- session.rootIssueId,
168
- `Planning mode ended for **${session.projectName}**. Session abandoned.`,
169
- );
170
- api.logger.info(`Planning: session abandoned for ${session.projectName}`);
171
- return;
172
- }
173
-
174
146
  // Increment turn count
175
147
  const newTurnCount = session.turnCount + 1;
176
148
  await updatePlanningSession(session.projectId, { turnCount: newTurnCount }, configPath);
@@ -204,6 +176,8 @@ export async function handlePlannerTurn(
204
176
  linearApi,
205
177
  projectId: session.projectId,
206
178
  teamId: session.teamId,
179
+ api,
180
+ pluginConfig,
207
181
  });
208
182
 
209
183
  try {
@@ -235,7 +209,6 @@ export async function handlePlannerTurn(
235
209
  export async function runPlanAudit(
236
210
  ctx: PlannerContext,
237
211
  session: PlanningSession,
238
- opts?: { onApproved?: (projectId: string) => void },
239
212
  ): Promise<void> {
240
213
  const { api, linearApi, pluginConfig } = ctx;
241
214
  const configPath = pluginConfig?.planningStatePath as string | undefined;
@@ -247,26 +220,63 @@ export async function runPlanAudit(
247
220
  const result = auditPlan(issues);
248
221
 
249
222
  if (result.pass) {
250
- // Build final summary
223
+ // Transition to plan_review (not approved yet — cross-model review first)
224
+ await updatePlanningSession(session.projectId, { status: "plan_review" }, configPath);
225
+
251
226
  const snapshot = buildPlanSnapshot(issues);
252
227
  const warningsList = result.warnings.length > 0
253
228
  ? `\n\n**Warnings:**\n${result.warnings.map((w) => `- ${w}`).join("\n")}`
254
229
  : "";
255
230
 
231
+ // Determine review model and post "running review" message
232
+ const reviewModel = resolveReviewModel(pluginConfig);
233
+ const reviewModelName = reviewModel.charAt(0).toUpperCase() + reviewModel.slice(1);
234
+
256
235
  await linearApi.createComment(
257
236
  session.rootIssueId,
258
- `## Plan Approved\n\n` +
259
- `The plan for **${session.projectName}** passed all checks.\n\n` +
260
- `**${issues.length} issues** created with valid dependency graph.${warningsList}\n\n` +
261
- `### Final Plan\n${snapshot}\n\n` +
262
- `---\n*Planning mode complete. Project is ready for implementation dispatch.*`,
237
+ `## Plan Passed Checks\n\n` +
238
+ `**${issues.length} issues** with valid dependency graph.${warningsList}\n\n` +
239
+ `Let me have **${reviewModelName}** audit this and make recommendations.`,
263
240
  );
264
241
 
265
- await endPlanningSession(session.projectId, "approved", configPath);
266
- api.logger.info(`Planning: session approved for ${session.projectName}`);
242
+ // Run cross-model review
243
+ const crossReview = await runCrossModelReview(api, reviewModel, snapshot, pluginConfig);
244
+
245
+ // Run planner agent with review prompt + cross-model feedback
246
+ const prompts = loadPlannerPrompts(pluginConfig);
247
+ const reviewPrompt = renderTemplate(prompts.review, {
248
+ projectName: session.projectName,
249
+ planSnapshot: snapshot,
250
+ issueCount: String(issues.length),
251
+ reviewModel: reviewModelName,
252
+ crossModelFeedback: crossReview,
253
+ });
254
+
255
+ const agentId = (pluginConfig?.defaultAgentId as string) ?? "default";
256
+
257
+ setActivePlannerContext({
258
+ linearApi,
259
+ projectId: session.projectId,
260
+ teamId: session.teamId,
261
+ api,
262
+ pluginConfig,
263
+ });
267
264
 
268
- // Trigger DAG-based dispatch if callback provided
269
- opts?.onApproved?.(session.projectId);
265
+ try {
266
+ const agentResult = await runAgent({
267
+ api,
268
+ agentId,
269
+ sessionId: `planner-${session.rootIdentifier}-review`,
270
+ message: `${prompts.system}\n\n${reviewPrompt}`,
271
+ });
272
+ if (agentResult.output) {
273
+ await linearApi.createComment(session.rootIssueId, agentResult.output);
274
+ }
275
+ } finally {
276
+ clearActivePlannerContext();
277
+ }
278
+
279
+ api.logger.info(`Planning: entered plan_review for ${session.projectName} (reviewed by ${reviewModelName})`);
270
280
  } else {
271
281
  // Post problems and keep planning
272
282
  const problemsList = result.problems.map((p) => `- ${p}`).join("\n");
@@ -285,3 +295,41 @@ export async function runPlanAudit(
285
295
  api.logger.info(`Planning: audit failed for ${session.projectName} (${result.problems.length} problems)`);
286
296
  }
287
297
  }
298
+
299
+ // ---------------------------------------------------------------------------
300
+ // Cross-model review
301
+ // ---------------------------------------------------------------------------
302
+
303
+ export async function runCrossModelReview(
304
+ api: OpenClawPluginApi,
305
+ model: "claude" | "codex" | "gemini",
306
+ planSnapshot: string,
307
+ pluginConfig?: Record<string, unknown>,
308
+ ): Promise<string> {
309
+ const prompt = `You are reviewing a project plan. Analyze it and suggest specific improvements.\n\n${planSnapshot}\n\nFocus on: missing acceptance criteria, dependency gaps, estimation accuracy, testability, and edge cases. Reference specific issue identifiers. Be concise and actionable.`;
310
+
311
+ try {
312
+ const runner = model === "claude" ? runClaude
313
+ : model === "codex" ? runCodex
314
+ : runGemini;
315
+ const result = await runner(api, { prompt } as any, pluginConfig);
316
+ return result.success ? (result.output ?? "(no feedback)") : `(${model} review failed: ${result.error})`;
317
+ } catch (err) {
318
+ api.logger.warn(`Cross-model review failed: ${err}`);
319
+ return "(cross-model review unavailable)";
320
+ }
321
+ }
322
+
323
+ export function resolveReviewModel(pluginConfig?: Record<string, unknown>): "claude" | "codex" | "gemini" {
324
+ // User override in config
325
+ const configured = (pluginConfig as any)?.plannerReviewModel as string | undefined;
326
+ if (configured && ["claude", "codex", "gemini"].includes(configured)) {
327
+ return configured as "claude" | "codex" | "gemini";
328
+ }
329
+ // Always the complement of the user's primary model
330
+ const currentModel = (pluginConfig as any)?.agents?.defaults?.model?.primary as string ?? "";
331
+ if (currentModel.includes("claude") || currentModel.includes("anthropic")) return "codex";
332
+ if (currentModel.includes("codex") || currentModel.includes("openai")) return "gemini";
333
+ if (currentModel.includes("gemini") || currentModel.includes("google")) return "codex";
334
+ return "gemini"; // Kimi, Mistral, other, or unconfigured → Gemini reviews
335
+ }