@almightygpt/core 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/adapters/claude.d.ts.map +1 -1
  2. package/dist/adapters/claude.js +3 -1
  3. package/dist/adapters/claude.js.map +1 -1
  4. package/dist/adapters/defaults.d.ts +34 -0
  5. package/dist/adapters/defaults.d.ts.map +1 -0
  6. package/dist/adapters/defaults.js +41 -0
  7. package/dist/adapters/defaults.js.map +1 -0
  8. package/dist/adapters/factory.d.ts +12 -0
  9. package/dist/adapters/factory.d.ts.map +1 -0
  10. package/dist/adapters/factory.js +40 -0
  11. package/dist/adapters/factory.js.map +1 -0
  12. package/dist/adapters/gemini.d.ts.map +1 -1
  13. package/dist/adapters/gemini.js +3 -1
  14. package/dist/adapters/gemini.js.map +1 -1
  15. package/dist/adapters/openai.d.ts.map +1 -1
  16. package/dist/adapters/openai.js +3 -1
  17. package/dist/adapters/openai.js.map +1 -1
  18. package/dist/auth/keychain.d.ts +11 -1
  19. package/dist/auth/keychain.d.ts.map +1 -1
  20. package/dist/auth/keychain.js +13 -4
  21. package/dist/auth/keychain.js.map +1 -1
  22. package/dist/auth/resolver.d.ts.map +1 -1
  23. package/dist/auth/resolver.js +19 -3
  24. package/dist/auth/resolver.js.map +1 -1
  25. package/dist/auth/types.d.ts +18 -5
  26. package/dist/auth/types.d.ts.map +1 -1
  27. package/dist/auth/types.js +8 -6
  28. package/dist/auth/types.js.map +1 -1
  29. package/dist/auth/validator.d.ts +23 -3
  30. package/dist/auth/validator.d.ts.map +1 -1
  31. package/dist/auth/validator.js +126 -21
  32. package/dist/auth/validator.js.map +1 -1
  33. package/dist/index.d.ts +3 -1
  34. package/dist/index.d.ts.map +1 -1
  35. package/dist/index.js +4 -1
  36. package/dist/index.js.map +1 -1
  37. package/dist/plan/run-plan-review.d.ts +40 -0
  38. package/dist/plan/run-plan-review.d.ts.map +1 -0
  39. package/dist/plan/run-plan-review.js +224 -0
  40. package/dist/plan/run-plan-review.js.map +1 -0
  41. package/dist/plan/run-plan.d.ts +42 -0
  42. package/dist/plan/run-plan.d.ts.map +1 -0
  43. package/dist/plan/run-plan.js +193 -0
  44. package/dist/plan/run-plan.js.map +1 -0
  45. package/dist/runs/types.d.ts +1 -1
  46. package/dist/runs/types.d.ts.map +1 -1
  47. package/package.json +1 -1
  48. package/src/adapters/claude.ts +3 -1
  49. package/src/adapters/defaults.ts +44 -0
  50. package/src/adapters/factory.ts +45 -0
  51. package/src/adapters/gemini.ts +3 -1
  52. package/src/adapters/openai.ts +3 -1
  53. package/src/auth/keychain.ts +20 -6
  54. package/src/auth/resolver.ts +23 -3
  55. package/src/auth/types.ts +27 -8
  56. package/src/auth/validator.ts +149 -25
  57. package/src/index.ts +13 -1
  58. package/src/plan/run-plan-review.ts +302 -0
  59. package/src/plan/run-plan.ts +247 -0
  60. package/src/runs/types.ts +3 -1
@@ -13,26 +13,41 @@
13
13
  * - Google: tiny generateContent call against gemini-2.5-flash
14
14
  *
15
15
  * Cost is fractions of a cent per call. Latency is ~1-3 seconds.
16
- * Failure surfaces the provider's exact error message so the user
17
- * can fix it.
16
+ *
17
+ * **Error handling (per Codex v0.8 P2 #6):** failure responses are
18
+ * parsed into a short user-safe message + status code. We never
19
+ * return the raw provider body to the user — those bodies can include
20
+ * request IDs, billing/account state, org/project details, quota
21
+ * metadata, or other account information that's too noisy for CLI
22
+ * logs. The raw body is available via the `rawBody` field for callers
23
+ * that explicitly want it (e.g. debug mode).
24
+ *
25
+ * **Default models (per Codex v0.8 P2 #5):** sourced from
26
+ * `adapters/defaults.ts` — the single source of truth — so the
27
+ * validator can't drift from the adapters.
18
28
  */
19
29
 
20
30
  import type { ProviderId } from "./types.js";
31
+ import { DEFAULT_MODELS } from "../adapters/defaults.js";
21
32
 
22
33
  export interface ValidationResult {
23
34
  ok: boolean;
24
35
  /** Provider-reported model used (e.g. "gpt-4o-2024-08-06"). */
25
36
  model?: string;
26
- /** Provider's error message verbatim if ok === false. */
37
+ /** Short user-safe message never includes raw provider body. */
27
38
  error?: string;
39
+ /** HTTP status code from the provider, if applicable. */
40
+ statusCode?: number;
41
+ /** Latency in ms — useful for `auth status --validate` JSON output. */
42
+ latencyMs?: number;
43
+ /**
44
+ * Raw provider response body. Present iff a request was made and
45
+ * returned non-OK. Callers should NOT surface this to users without
46
+ * explicit debug intent; it may contain account metadata.
47
+ */
48
+ rawBody?: string;
28
49
  }
29
50
 
30
- const DEFAULT_VALIDATION_MODELS: Record<ProviderId, string> = {
31
- openai: "gpt-4o",
32
- anthropic: "claude-sonnet-4-6",
33
- google: "gemini-2.5-flash",
34
- };
35
-
36
51
  const VALIDATION_TIMEOUT_MS = 15_000;
37
52
 
38
53
  /**
@@ -44,25 +59,33 @@ export async function validateKey(
44
59
  provider: ProviderId,
45
60
  key: string,
46
61
  ): Promise<ValidationResult> {
62
+ const start = Date.now();
47
63
  try {
64
+ let result: ValidationResult;
48
65
  switch (provider) {
49
66
  case "openai":
50
- return await validateOpenAI(key);
67
+ result = await validateOpenAI(key);
68
+ break;
51
69
  case "anthropic":
52
- return await validateAnthropic(key);
70
+ result = await validateAnthropic(key);
71
+ break;
53
72
  case "google":
54
- return await validateGoogle(key);
73
+ result = await validateGoogle(key);
74
+ break;
55
75
  }
76
+ result.latencyMs = Date.now() - start;
77
+ return result;
56
78
  } catch (err) {
57
79
  return {
58
80
  ok: false,
59
- error: err instanceof Error ? err.message : String(err),
81
+ error: friendlyNetworkError(err),
82
+ latencyMs: Date.now() - start,
60
83
  };
61
84
  }
62
85
  }
63
86
 
64
87
  async function validateOpenAI(key: string): Promise<ValidationResult> {
65
- const model = DEFAULT_VALIDATION_MODELS.openai;
88
+ const model = DEFAULT_MODELS.openai;
66
89
  const controller = new AbortController();
67
90
  const timer = setTimeout(() => controller.abort(), VALIDATION_TIMEOUT_MS);
68
91
  try {
@@ -80,7 +103,13 @@ async function validateOpenAI(key: string): Promise<ValidationResult> {
80
103
  signal: controller.signal,
81
104
  });
82
105
  if (!res.ok) {
83
- return { ok: false, error: await res.text().catch(() => res.statusText) };
106
+ const rawBody = await res.text().catch(() => "");
107
+ return {
108
+ ok: false,
109
+ statusCode: res.status,
110
+ error: normalizeOpenAIError(res.status, rawBody),
111
+ rawBody,
112
+ };
84
113
  }
85
114
  const data = (await res.json()) as { model?: string };
86
115
  return { ok: true, model: data.model ?? model };
@@ -90,7 +119,7 @@ async function validateOpenAI(key: string): Promise<ValidationResult> {
90
119
  }
91
120
 
92
121
  async function validateAnthropic(key: string): Promise<ValidationResult> {
93
- const model = DEFAULT_VALIDATION_MODELS.anthropic;
122
+ const model = DEFAULT_MODELS.anthropic;
94
123
  const controller = new AbortController();
95
124
  const timer = setTimeout(() => controller.abort(), VALIDATION_TIMEOUT_MS);
96
125
  try {
@@ -109,7 +138,13 @@ async function validateAnthropic(key: string): Promise<ValidationResult> {
109
138
  signal: controller.signal,
110
139
  });
111
140
  if (!res.ok) {
112
- return { ok: false, error: await res.text().catch(() => res.statusText) };
141
+ const rawBody = await res.text().catch(() => "");
142
+ return {
143
+ ok: false,
144
+ statusCode: res.status,
145
+ error: normalizeAnthropicError(res.status, rawBody),
146
+ rawBody,
147
+ };
113
148
  }
114
149
  const data = (await res.json()) as { model?: string };
115
150
  return { ok: true, model: data.model ?? model };
@@ -119,16 +154,12 @@ async function validateAnthropic(key: string): Promise<ValidationResult> {
119
154
  }
120
155
 
121
156
  async function validateGoogle(key: string): Promise<ValidationResult> {
122
- const model = DEFAULT_VALIDATION_MODELS.google;
157
+ const model = DEFAULT_MODELS.google;
123
158
  const controller = new AbortController();
124
159
  const timer = setTimeout(() => controller.abort(), VALIDATION_TIMEOUT_MS);
125
160
  try {
126
- // Use x-goog-api-key header rather than ?key= URL parameter. URLs
127
- // leak into proxy logs, HTTP tooling, crash diagnostics. The header
128
- // path is supported by all v1beta endpoints. Codex's v0.8 security
129
- // review flagged the URL-key approach as a P1.
130
- const url =
131
- `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
161
+ // x-goog-api-key header (per v0.9.1 fix) — never the URL query.
162
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
132
163
  const res = await fetch(url, {
133
164
  method: "POST",
134
165
  headers: {
@@ -142,10 +173,103 @@ async function validateGoogle(key: string): Promise<ValidationResult> {
142
173
  signal: controller.signal,
143
174
  });
144
175
  if (!res.ok) {
145
- return { ok: false, error: await res.text().catch(() => res.statusText) };
176
+ const rawBody = await res.text().catch(() => "");
177
+ return {
178
+ ok: false,
179
+ statusCode: res.status,
180
+ error: normalizeGoogleError(res.status, rawBody, key),
181
+ rawBody,
182
+ };
146
183
  }
147
184
  return { ok: true, model };
148
185
  } finally {
149
186
  clearTimeout(timer);
150
187
  }
151
188
  }
189
+
190
+ // ─── Error normalization (Codex v0.8 P2 #6) ──────────────────────────
191
+ //
192
+ // Parse known provider JSON error shapes into short, user-safe messages.
193
+ // Never echo the raw key back even by accident (defense in depth: we
194
+ // also redact anything that looks like the submitted key).
195
+
196
+ function normalizeOpenAIError(status: number, rawBody: string): string {
197
+ // OpenAI shape: { "error": { "message": "...", "type": "...", "code": "..." } }
198
+ try {
199
+ const parsed = JSON.parse(rawBody) as {
200
+ error?: { message?: string; type?: string; code?: string };
201
+ };
202
+ const msg = parsed.error?.message;
203
+ if (msg) return `[${status}] OpenAI: ${truncate(msg, 200)}`;
204
+ } catch {
205
+ /* fall through */
206
+ }
207
+ return statusOnlyMessage("OpenAI", status);
208
+ }
209
+
210
+ function normalizeAnthropicError(status: number, rawBody: string): string {
211
+ // Anthropic shape: { "type": "error", "error": { "type": "...", "message": "..." } }
212
+ try {
213
+ const parsed = JSON.parse(rawBody) as {
214
+ error?: { type?: string; message?: string };
215
+ };
216
+ const msg = parsed.error?.message;
217
+ if (msg) return `[${status}] Anthropic: ${truncate(msg, 200)}`;
218
+ } catch {
219
+ /* fall through */
220
+ }
221
+ return statusOnlyMessage("Anthropic", status);
222
+ }
223
+
224
+ function normalizeGoogleError(
225
+ status: number,
226
+ rawBody: string,
227
+ submittedKey: string,
228
+ ): string {
229
+ // Google shape: { "error": { "code": N, "message": "...", "status": "..." } }
230
+ try {
231
+ const parsed = JSON.parse(rawBody) as {
232
+ error?: { code?: number; message?: string; status?: string };
233
+ };
234
+ let msg = parsed.error?.message ?? "";
235
+ // Belt-and-braces redaction: Google sometimes echoes the key in
236
+ // error messages (e.g. "API key not valid. Pass a valid API key.")
237
+ // — we don't ship the actual key value if it ever ends up here.
238
+ if (submittedKey && msg.includes(submittedKey)) {
239
+ msg = msg.replace(submittedKey, "<redacted-key>");
240
+ }
241
+ if (msg) return `[${status}] Google: ${truncate(msg, 200)}`;
242
+ } catch {
243
+ /* fall through */
244
+ }
245
+ return statusOnlyMessage("Google", status);
246
+ }
247
+
248
+ function statusOnlyMessage(provider: string, status: number): string {
249
+ if (status === 401 || status === 403) {
250
+ return `[${status}] ${provider} rejected the key (unauthorized).`;
251
+ }
252
+ if (status === 429) {
253
+ return `[${status}] ${provider} rate-limited or quota exceeded.`;
254
+ }
255
+ if (status >= 500) {
256
+ return `[${status}] ${provider} returned a server error; try again.`;
257
+ }
258
+ return `[${status}] ${provider} returned an unrecognized error.`;
259
+ }
260
+
261
+ function truncate(s: string, n: number): string {
262
+ return s.length <= n ? s : s.slice(0, n - 1) + "…";
263
+ }
264
+
265
+ function friendlyNetworkError(err: unknown): string {
266
+ const msg = err instanceof Error ? err.message : String(err);
267
+ if (msg.includes("abort")) {
268
+ return "Validation timed out (network or provider too slow).";
269
+ }
270
+ if (msg.includes("ENOTFOUND") || msg.includes("ECONNREFUSED")) {
271
+ return "Could not reach the provider (network issue).";
272
+ }
273
+ // Generic — but never include random stack data; just the message.
274
+ return `Network error: ${truncate(msg, 200)}`;
275
+ }
package/src/index.ts CHANGED
@@ -13,7 +13,7 @@
13
13
  * - budget/ ✅ task #14 BudgetTracker + BudgetExceededError
14
14
  */
15
15
 
16
- export const VERSION = "0.9.1";
16
+ export const VERSION = "0.10.0";
17
17
 
18
18
  // MCP server (v0.9.0+) — exposes AlmightyGPT's review surface as MCP tools.
19
19
  export { startMcpServer } from "./mcp/server.js";
@@ -143,6 +143,18 @@ export {
143
143
  type KeychainAdapter,
144
144
  } from "./auth/keychain.js";
145
145
  export { validateKey, type ValidationResult } from "./auth/validator.js";
146
+ // Plan subsystem (v0.10.0+) — Worker plan + Reviewer plan-review
147
+ export {
148
+ runWorkerPlan,
149
+ type PlanOptions,
150
+ type PlanResult,
151
+ } from "./plan/run-plan.js";
152
+ export {
153
+ runPlanReview,
154
+ type PlanReviewOptions,
155
+ type PlanReviewResult,
156
+ } from "./plan/run-plan-review.js";
157
+
146
158
  export {
147
159
  AuthMissingError,
148
160
  PROVIDER_ENV_VARS,
@@ -0,0 +1,302 @@
1
+ /**
2
+ * `almightygpt review --plan <file>` — Reviewer AI critiques a PLAN
3
+ * doc (not a git diff). Same review primitives as runDiffReview but
4
+ * the input is the plan markdown and the framing tells the Reviewer
5
+ * to critique the plan's structure, completeness, and risks rather
6
+ * than line-by-line code.
7
+ *
8
+ * Output lands at `<reviewsDir>/plan-<topic>.md` (prefix distinguishes
9
+ * plan reviews from diff reviews when they share the same topic name).
10
+ */
11
+
12
+ import { readFile } from "node:fs/promises";
13
+ import { join } from "node:path";
14
+ import { loadConfig } from "../config/load.js";
15
+ import { makeAdapter } from "../adapters/factory.js";
16
+ import { AdapterError } from "../adapters/types.js";
17
+ import { assembleMemory } from "../review/memory.js";
18
+ import {
19
+ createRunFolder,
20
+ writeRunMetadata,
21
+ writeRunInput,
22
+ writeAgentOutput,
23
+ collectGitContext,
24
+ } from "../runs/folder.js";
25
+ import {
26
+ writeHumanReviewFile,
27
+ preflightReviewFileCollision,
28
+ } from "../review/write.js";
29
+
30
+ export interface PlanReviewOptions {
31
+ repoRoot: string;
32
+ topic: string;
33
+ /** Path (relative to repoRoot) to the plan markdown to review. */
34
+ planPath: string;
35
+ reviewer?: string;
36
+ force?: boolean;
37
+ }
38
+
39
+ export interface PlanReviewResult {
40
+ reviewPath: string;
41
+ reviewBytes: number;
42
+ reviewer: string;
43
+ provider: string;
44
+ modelUsed: string;
45
+ tokensIn: number;
46
+ cachedTokensIn: number;
47
+ tokensOut: number;
48
+ costUsd: number;
49
+ latencyMs: number;
50
+ memorySources: { path: string; bytes: number }[];
51
+ memoryMissing: string[];
52
+ runId: string;
53
+ runFolder: string;
54
+ shallowWarning?: string;
55
+ }
56
+
57
+ const PLAN_REVIEW_SYSTEM_FRAMING = [
58
+ "You are the Reviewer AI in an AlmightyGPT Plan-review stage.",
59
+ "",
60
+ "WHAT YOU ARE REVIEWING: the PLAN markdown supplied below. NOT a git",
61
+ "diff and NOT code — a plan that a Worker AI drafted in response to a",
62
+ "human's requirement. The plan has not been implemented yet. Your job",
63
+ "is to find what's WRONG with the plan: missing steps, hidden",
64
+ "dependencies, risks the Worker didn't surface, ambiguous decisions,",
65
+ "things that will break in production, edge cases the test plan misses.",
66
+ "",
67
+ "WHAT THE ORCHESTRATOR OWNS (do NOT include these in your response):",
68
+ " - The H1 title (orchestrator prepends `# Plan Review: <topic>`).",
69
+ " - The header block with model / tokens / cost.",
70
+ " - The `## Cost and Latency` and `## Appendix: Raw Outputs` sections.",
71
+ "",
72
+ "WHAT YOU MUST PRODUCE — start with `## Decision Required` and emit",
73
+ "ONLY these sections in this order:",
74
+ " ## Decision Required",
75
+ " ## Highest-Risk Findings",
76
+ " ## Concrete Weaknesses",
77
+ " ## Worker Plan Summary",
78
+ " ## Test Plan",
79
+ " ## Human Decision",
80
+ "",
81
+ "ANTI-SYCOPHANCY (non-negotiable):",
82
+ " - Find at least 3 concrete weaknesses with specific file / step / line",
83
+ " references from the plan.",
84
+ " - A finding without a specific anchor is too vague.",
85
+ " - 'Looks good, minor suggestions' is a FAILED review — recalibrate.",
86
+ "",
87
+ "REVIEW LENS — focus on what plans commonly get wrong:",
88
+ " - Steps assume capabilities that don't exist yet",
89
+ " - Risks section underweights production blast radius",
90
+ " - Test plan is generic ('add tests') instead of named cases",
91
+ " - Open questions are missing things the human will actually need to",
92
+ " decide before implementation",
93
+ " - Affected modules list is incomplete (the plan touches more surfaces",
94
+ " than it admits)",
95
+ " - Migration / rollback story is missing or hand-wavy",
96
+ ].join("\n");
97
+
98
+ function buildPlanReviewUserMessage(
99
+ topic: string,
100
+ planContent: string,
101
+ ): string {
102
+ return [
103
+ `# Plan-review request — topic: \`${topic}\``,
104
+ "",
105
+ "## The plan to review",
106
+ "",
107
+ "```markdown",
108
+ planContent.trim(),
109
+ "```",
110
+ "",
111
+ "## Your task",
112
+ "",
113
+ "Critique the plan above using the structure in your system prompt.",
114
+ "Start your response with `## Decision Required` (no H1, no preamble).",
115
+ ].join("\n");
116
+ }
117
+
118
+ export async function runPlanReview(
119
+ opts: PlanReviewOptions,
120
+ ): Promise<PlanReviewResult> {
121
+ const config = await loadConfig(opts.repoRoot);
122
+
123
+ const reviewerName = opts.reviewer ?? config.defaults.reviewer;
124
+ if (!reviewerName) {
125
+ throw new Error(
126
+ "No reviewer specified. Pass --reviewer <name> or set defaults.reviewer in .almightygpt/config.yaml.",
127
+ );
128
+ }
129
+ const agentConfig = config.agents[reviewerName];
130
+ if (!agentConfig) {
131
+ throw new Error(
132
+ `Reviewer "${reviewerName}" not found in .almightygpt/config.yaml agents map.`,
133
+ );
134
+ }
135
+ if (!agentConfig.enabled) {
136
+ throw new Error(`Reviewer "${reviewerName}" is disabled in .almightygpt/config.yaml.`);
137
+ }
138
+
139
+ const adapter = makeAdapter(reviewerName, agentConfig.provider);
140
+ if (!(await adapter.isAvailable())) {
141
+ throw new AdapterError(
142
+ `Adapter "${reviewerName}" (${agentConfig.provider}) is not available. Set the provider's API key.`,
143
+ reviewerName,
144
+ );
145
+ }
146
+
147
+ // Plan reviews land at <reviewsDir>/plan-<topic>.md so they don't
148
+ // collide with diff reviews on the same topic.
149
+ const planReviewTopic = `plan-${opts.topic}`;
150
+ await preflightReviewFileCollision(
151
+ opts.repoRoot,
152
+ config.reviewsDir,
153
+ planReviewTopic,
154
+ opts.force ?? false,
155
+ );
156
+
157
+ // Load the plan to review.
158
+ let planContent: string;
159
+ try {
160
+ planContent = await readFile(join(opts.repoRoot, opts.planPath), "utf8");
161
+ } catch (err) {
162
+ throw new Error(
163
+ `Could not read plan file at ${opts.planPath}: ${err instanceof Error ? err.message : String(err)}`,
164
+ );
165
+ }
166
+
167
+ const runFolder = await createRunFolder({
168
+ repoRoot: opts.repoRoot,
169
+ runsDir: config.runsDir,
170
+ topic: planReviewTopic,
171
+ type: "review-plan",
172
+ });
173
+ const createdAt = new Date().toISOString();
174
+
175
+ const memory = await assembleMemory(opts.repoRoot, agentConfig.memoryFile);
176
+ const systemPrompt = PLAN_REVIEW_SYSTEM_FRAMING + "\n\n" + memory.text;
177
+ const userMessage = buildPlanReviewUserMessage(opts.topic, planContent);
178
+
179
+ await writeRunInput(runFolder.absPath, userMessage);
180
+
181
+ const adapterOut = await adapter.execute({
182
+ role: "reviewer",
183
+ systemPrompt,
184
+ userMessage,
185
+ });
186
+
187
+ await writeAgentOutput(runFolder.absPath, "reviewer", adapterOut.content);
188
+
189
+ // Shallow detection: same rule as diff reviews — need N concrete
190
+ // weaknesses with anchors.
191
+ const shallowWarning = detectShallowPlanReview(
192
+ adapterOut.content,
193
+ config.review.requireConcreteWeaknesses,
194
+ );
195
+
196
+ const writeOpts: Parameters<typeof writeHumanReviewFile>[0] = {
197
+ repoRoot: opts.repoRoot,
198
+ reviewsDir: config.reviewsDir,
199
+ topic: planReviewTopic,
200
+ reviewerName,
201
+ reviewerProvider: adapter.provider,
202
+ modelUsed: adapterOut.modelUsed,
203
+ body: adapterOut.content,
204
+ metrics: {
205
+ tokensIn: adapterOut.tokensIn,
206
+ tokensOut: adapterOut.tokensOut,
207
+ costUsd: adapterOut.costUsd,
208
+ latencyMs: adapterOut.latencyMs,
209
+ },
210
+ runFolder: runFolder.relPath,
211
+ };
212
+ if (shallowWarning) writeOpts.shallowWarning = shallowWarning;
213
+ if (opts.force) writeOpts.force = opts.force;
214
+ const written = await writeHumanReviewFile(writeOpts);
215
+
216
+ const git = await collectGitContext(opts.repoRoot);
217
+ await writeRunMetadata(runFolder.absPath, {
218
+ id: runFolder.id,
219
+ type: "review-plan",
220
+ createdAt,
221
+ finishedAt: new Date().toISOString(),
222
+ workspacePath: opts.repoRoot,
223
+ topic: planReviewTopic,
224
+ git,
225
+ input: { source: "requirement-file", path: opts.planPath },
226
+ agents: [
227
+ {
228
+ name: reviewerName,
229
+ role: "reviewer",
230
+ provider: agentConfig.provider,
231
+ enabled: true,
232
+ },
233
+ ],
234
+ adapterVersions: [],
235
+ status: "completed",
236
+ metrics: [
237
+ {
238
+ agent: reviewerName,
239
+ role: "reviewer",
240
+ provider: adapter.provider,
241
+ model: adapterOut.modelUsed,
242
+ tokensIn: adapterOut.tokensIn,
243
+ cachedTokensIn: adapterOut.cachedTokensIn ?? 0,
244
+ tokensOut: adapterOut.tokensOut,
245
+ costUsd: adapterOut.costUsd,
246
+ latencyMs: adapterOut.latencyMs,
247
+ },
248
+ ],
249
+ totals: {
250
+ tokensIn: adapterOut.tokensIn,
251
+ tokensOut: adapterOut.tokensOut,
252
+ costUsd: adapterOut.costUsd,
253
+ latencyMs: adapterOut.latencyMs,
254
+ },
255
+ reviewPath: written.path,
256
+ budget: {
257
+ maxCostPerRunUsd: config.budget.maxCostPerRunUsd,
258
+ maxTokensPerRun: config.budget.maxTokensPerRun,
259
+ },
260
+ });
261
+
262
+ const result: PlanReviewResult = {
263
+ reviewPath: written.path,
264
+ reviewBytes: written.bytes,
265
+ reviewer: reviewerName,
266
+ provider: adapter.provider,
267
+ modelUsed: adapterOut.modelUsed,
268
+ tokensIn: adapterOut.tokensIn,
269
+ cachedTokensIn: adapterOut.cachedTokensIn ?? 0,
270
+ tokensOut: adapterOut.tokensOut,
271
+ costUsd: adapterOut.costUsd,
272
+ latencyMs: adapterOut.latencyMs,
273
+ memorySources: memory.sources,
274
+ memoryMissing: memory.missing,
275
+ runId: runFolder.id,
276
+ runFolder: runFolder.relPath,
277
+ };
278
+ if (shallowWarning) result.shallowWarning = shallowWarning;
279
+ return result;
280
+ }
281
+
282
+ /** Same shallow heuristic as diff reviews — count file/step/line anchors. */
283
+ function detectShallowPlanReview(
284
+ content: string,
285
+ requireConcreteWeaknesses: number,
286
+ ): string | undefined {
287
+ const anchorPattern = /\b(?:file|step|line|section)[:\s][\w\-./]+/gi;
288
+ const anchors = content.match(anchorPattern) ?? [];
289
+ const weaknessesSection = content.match(
290
+ /## Concrete Weaknesses([\s\S]*?)(?=##|$)/i,
291
+ );
292
+ const weaknessBullets =
293
+ weaknessesSection?.[1]?.match(/^\s*[-*\d.]/gm)?.length ?? 0;
294
+
295
+ if (anchors.length === 0) {
296
+ return "Plan review has zero anchored references (file / step / line). The Reviewer may not have engaged with specifics — consider re-running.";
297
+ }
298
+ if (weaknessBullets < requireConcreteWeaknesses) {
299
+ return `Plan review listed ${weaknessBullets} concrete weaknesses, fewer than the configured minimum of ${requireConcreteWeaknesses}. Output may be shallow — consider re-running with a more rigorous Reviewer.`;
300
+ }
301
+ return undefined;
302
+ }