@contextrail/code-review-agent 0.1.1 → 0.1.2-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/cli/help.d.ts +2 -0
  2. package/dist/cli/help.js +48 -0
  3. package/dist/cli/index.d.ts +3 -0
  4. package/dist/cli/index.js +2 -0
  5. package/dist/cli/parser.d.ts +3 -0
  6. package/dist/cli/parser.js +144 -0
  7. package/dist/cli/types.d.ts +17 -0
  8. package/dist/cli/types.js +1 -0
  9. package/dist/config/index.d.ts +2 -0
  10. package/dist/config/index.js +16 -0
  11. package/dist/errors/error-utils.d.ts +2 -0
  12. package/dist/errors/error-utils.js +37 -0
  13. package/dist/index.js +40 -578
  14. package/dist/lifecycle.d.ts +4 -0
  15. package/dist/lifecycle.js +52 -0
  16. package/dist/orchestrator/agentic-orchestrator.d.ts +2 -0
  17. package/dist/orchestrator/agentic-orchestrator.js +9 -13
  18. package/dist/orchestrator/writer.js +1 -1
  19. package/dist/output/aggregator.d.ts +2 -1
  20. package/dist/output/aggregator.js +3 -12
  21. package/dist/output/schema.d.ts +86 -86
  22. package/dist/output/summary-logger.d.ts +3 -0
  23. package/dist/output/summary-logger.js +81 -0
  24. package/dist/pipeline.d.ts +25 -0
  25. package/dist/pipeline.js +276 -0
  26. package/dist/prompts/blocks.d.ts +25 -0
  27. package/dist/prompts/blocks.js +129 -0
  28. package/dist/prompts/decision.d.ts +15 -0
  29. package/dist/prompts/decision.js +30 -0
  30. package/dist/prompts/index.d.ts +5 -0
  31. package/dist/prompts/index.js +5 -0
  32. package/dist/{orchestrator/prompts.d.ts → prompts/orchestrator.d.ts} +2 -3
  33. package/dist/{orchestrator/prompts.js → prompts/orchestrator.js} +2 -3
  34. package/dist/prompts/reviewer.d.ts +12 -0
  35. package/dist/prompts/reviewer.js +107 -0
  36. package/dist/{output/prompts.d.ts → prompts/synthesis.d.ts} +1 -16
  37. package/dist/{output/prompts.js → prompts/synthesis.js} +0 -30
  38. package/dist/review-inputs/filtering.d.ts +14 -0
  39. package/dist/review-inputs/filtering.js +97 -8
  40. package/dist/review-inputs/index.js +59 -13
  41. package/dist/reviewers/executor.d.ts +2 -0
  42. package/dist/reviewers/executor.js +1 -8
  43. package/dist/reviewers/prompt.d.ts +2 -28
  44. package/dist/reviewers/prompt.js +5 -235
  45. package/package.json +1 -1
@@ -0,0 +1,3 @@
1
+ import type { Logger } from '../logging/logger.js';
2
+ import type { ReviewResult, ReviewerResult } from './schema.js';
3
+ export declare const logReviewSummary: (log: Logger, result: ReviewResult, reviewerResults: ReviewerResult[]) => void;
@@ -0,0 +1,81 @@
1
+ export const logReviewSummary = (log, result, reviewerResults) => {
2
+ log.info('\n═══════════════════════════════════════════════════════════');
3
+ log.info('Review Summary');
4
+ log.info('═══════════════════════════════════════════════════════════');
5
+ log.info(` Files reviewed: ${result.metadata.fileCount}`);
6
+ log.info(` Reviewers executed: ${reviewerResults.length}`);
7
+ log.info(` Total issue findings: ${result.summary.totalFindings}`);
8
+ log.info(` Critical: ${result.summary.bySeverity.critical}`);
9
+ log.info(` Major: ${result.summary.bySeverity.major}`);
10
+ log.info(` Minor: ${result.summary.bySeverity.minor}`);
11
+ log.info(` Info: ${result.summary.bySeverity.info}`);
12
+ log.info(` Pass signals: ${result.summary.bySeverity.pass}`);
13
+ if (result.summary.totalFindings > 0) {
14
+ log.info('\n Findings by reviewer:');
15
+ for (const [reviewer, count] of Object.entries(result.summary.byReviewer)) {
16
+ log.info(` ${reviewer}: ${count} finding(s)`);
17
+ }
18
+ }
19
+ log.info(`\n Decision: ${result.decision.decision.toUpperCase()}`);
20
+ log.info(` Summary: ${result.decision.summary}`);
21
+ log.info(` Rationale: ${result.decision.rationale}`);
22
+ const passReviewers = reviewerResults
23
+ .map((rr) => ({
24
+ reviewer: rr.reviewer,
25
+ passFinding: rr.findings.find((f) => f.severity === 'pass'),
26
+ notes: rr.notes,
27
+ validated: rr.validated,
28
+ }))
29
+ .filter((rr) => rr.validated && (rr.passFinding || rr.notes));
30
+ if (passReviewers.length > 0) {
31
+ log.info('\n Reviewer pass summaries:');
32
+ for (const rr of passReviewers) {
33
+ const passLine = rr.notes?.trim() ?? rr.passFinding?.title ?? 'PASS';
34
+ log.info(` ${rr.reviewer}: ${passLine.split('\n')[0] ?? passLine}`);
35
+ }
36
+ }
37
+ if (result.synthesis && result.synthesis.findings.length > 0) {
38
+ log.debug('\n Deduplicated findings (synthesis):');
39
+ for (const finding of result.synthesis.findings) {
40
+ log.debug(` [${finding.severity.toUpperCase()}] ${finding.title}`);
41
+ if (finding.file) {
42
+ log.debug(` File: ${finding.file}${finding.line ? `:${finding.line}` : ''}`);
43
+ }
44
+ if (finding.sourceReviewers && finding.sourceReviewers.length > 0) {
45
+ log.debug(` Source reviewers: ${finding.sourceReviewers.join(', ')}`);
46
+ }
47
+ if (finding.contextTitles && finding.contextTitles.length > 0) {
48
+ log.debug(` ContextRail Standards: ${finding.contextTitles.join(', ')}`);
49
+ }
50
+ log.debug(` ${finding.description}`);
51
+ }
52
+ }
53
+ else {
54
+ const hasAnyEntries = reviewerResults.some((rr) => rr.findings.length > 0);
55
+ if (hasAnyEntries) {
56
+ log.debug('\n All findings:');
57
+ for (const rr of reviewerResults) {
58
+ if (rr.findings.length > 0) {
59
+ log.debug(`\n ${rr.reviewer}:`);
60
+ for (const finding of rr.findings) {
61
+ log.debug(` [${finding.severity.toUpperCase()}] ${finding.title}`);
62
+ if (finding.file) {
63
+ log.debug(` File: ${finding.file}${finding.line ? `:${finding.line}` : ''}`);
64
+ }
65
+ if (finding.contextTitles && finding.contextTitles.length > 0) {
66
+ log.debug(` ContextRail Standards: ${finding.contextTitles.join(', ')}`);
67
+ }
68
+ log.debug(` ${finding.description}`);
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
74
+ if (result.failures && result.failures.length > 0) {
75
+ log.warn('\n Reviewer failures:');
76
+ for (const failure of result.failures) {
77
+ log.warn(` ${failure.reviewer}: ${failure.message}`);
78
+ }
79
+ }
80
+ log.info('═══════════════════════════════════════════════════════════\n');
81
+ };
@@ -0,0 +1,25 @@
1
+ import type { ValidatedReviewAgentConfig } from './config/index.js';
2
+ import type { Logger } from './logging/logger.js';
3
+ import { McpClient } from './mcp/client.js';
4
+ export type PipelineInput = {
5
+ config: ValidatedReviewAgentConfig;
6
+ repoPath: string;
7
+ outputDir: string;
8
+ files?: string[];
9
+ from?: string;
10
+ to?: string;
11
+ prDescription?: string;
12
+ log: Logger;
13
+ onMcpClientReady?: (client: McpClient) => void;
14
+ };
15
+ export type PipelineResult = {
16
+ decision: string;
17
+ resultPath: string;
18
+ totalFindings: number;
19
+ hasValidationFailures: boolean;
20
+ failures: Array<{
21
+ reviewer: string;
22
+ message: string;
23
+ }>;
24
+ };
25
+ export declare const runReview: (input: PipelineInput) => Promise<PipelineResult>;
@@ -0,0 +1,276 @@
1
+ import path from 'node:path';
2
+ import { DEFAULT_ORCHESTRATOR_MODEL, DEFAULT_REVIEWER_MODEL } from './config/defaults.js';
3
+ import { McpClient } from './mcp/client.js';
4
+ import { createLlmService } from './llm/factory.js';
5
+ import { buildReviewInputs, triagePr } from './review-inputs/index.js';
6
+ import { runOrchestrator } from './orchestrator/agentic-orchestrator.js';
7
+ import { runReviewerLoop } from './reviewers/executor.js';
8
+ import { aggregateResults, writeResult, writeTokenBudgetMetrics } from './output/writer.js';
9
+ import { logReviewSummary } from './output/summary-logger.js';
10
+ import { metadataSchema, reviewerFindingsSchema } from './output/schema.js';
11
+ import { generateReviewDecision, normalizeDecisionWithSynthesis, synthesizeFindings } from './output/aggregator.js';
12
+ import { serializeError, toError } from './errors/error-utils.js';
13
+ export const runReview = async (input) => {
14
+ const { config, repoPath, outputDir, files, from, to, prDescription, log, onMcpClientReady } = input;
15
+ log.info('Starting code review...');
16
+ log.info(`Repository: ${repoPath}`);
17
+ log.info(`Output: ${outputDir}`);
18
+ if (files && files.length > 0) {
19
+ log.info(`Files: ${files.join(', ')}`);
20
+ }
21
+ else {
22
+ log.info(`From: ${from}`);
23
+ log.info(`To: ${to}`);
24
+ }
25
+ const mcpClient = new McpClient({
26
+ serverUrl: config.mcpServerUrl,
27
+ authToken: config.mcpAuthToken,
28
+ clientName: 'code-review-agent',
29
+ clientVersion: '0.1.0',
30
+ logger: log,
31
+ });
32
+ onMcpClientReady?.(mcpClient);
33
+ // Create a single LLM service instance for the entire pipeline
34
+ const { service: llmService } = createLlmService({
35
+ openRouterApiKey: config.openRouterApiKey,
36
+ mcpClient,
37
+ logger: log,
38
+ });
39
+ try {
40
+ try {
41
+ await mcpClient.connect();
42
+ log.info('Connected to MCP server');
43
+ }
44
+ catch (error) {
45
+ const connectionError = toError(error);
46
+ log.error({
47
+ msg: 'Failed to connect to MCP server',
48
+ serverUrl: config.mcpServerUrl,
49
+ error: serializeError(connectionError),
50
+ });
51
+ throw new Error(`Failed to connect to MCP server at ${config.mcpServerUrl}: ${connectionError.message}`, {
52
+ cause: connectionError,
53
+ });
54
+ }
55
+ // Build review inputs
56
+ log.info('Building review inputs...');
57
+ const inputs = files && files.length > 0
58
+ ? await buildReviewInputs({
59
+ mode: 'file-list',
60
+ files,
61
+ basePath: repoPath,
62
+ surroundingContext: {
63
+ enabled: true,
64
+ maxTokensPerFile: config.maxTokensPerFile,
65
+ contextLines: config.contextLines,
66
+ },
67
+ })
68
+ : await buildReviewInputs({
69
+ mode: 'diff',
70
+ repoPath,
71
+ from: from,
72
+ to: to,
73
+ surroundingContext: {
74
+ enabled: true,
75
+ maxTokensPerFile: config.maxTokensPerFile,
76
+ contextLines: config.contextLines,
77
+ },
78
+ });
79
+ log.info(`Found ${inputs.files.length} files to review`);
80
+ if (inputs.files.length > 0) {
81
+ log.debug(` Files: ${inputs.files.slice(0, 10).join(', ')}${inputs.files.length > 10 ? ` ... and ${inputs.files.length - 10} more` : ''}`);
82
+ }
83
+ // Triage PR to determine if it's trivial
84
+ const triageResult = triagePr(inputs);
85
+ log.info(`PR triage: ${triageResult.reason}`);
86
+ if (triageResult.isTrivial) {
87
+ log.info(`Trivial PR detected (${triageResult.isDocsOnly ? 'docs-only' : 'small'}). Skipping full reviewer flow.`);
88
+ // For trivial PRs, we still run orchestrator but with awareness
89
+ // The orchestrator can use this information to select fewer reviewers or skip entirely
90
+ }
91
+ // Run orchestrator
92
+ log.info('Running orchestrator...');
93
+ if (prDescription) {
94
+ log.debug('PR description provided, will be included in prompts');
95
+ }
96
+ if (config.reviewDomains && config.reviewDomains.length > 0) {
97
+ log.debug(`Review domains provided: ${config.reviewDomains.join(', ')}`);
98
+ }
99
+ const orchestratorOutput = await runOrchestrator(inputs, outputDir, {
100
+ mcpClient,
101
+ llmService,
102
+ config: {
103
+ openRouterApiKey: config.openRouterApiKey,
104
+ orchestratorModel: config.orchestratorModel ?? DEFAULT_ORCHESTRATOR_MODEL,
105
+ maxSteps: config.maxSteps,
106
+ prDescription,
107
+ reviewDomains: config.reviewDomains,
108
+ },
109
+ logger: log,
110
+ });
111
+ log.info(`Selected reviewers: ${orchestratorOutput.reviewers.join(', ')}`);
112
+ log.debug(`Orchestrator understanding:\n${orchestratorOutput.understanding}`);
113
+ // Run reviewers in parallel with progress logging
114
+ log.info(`Running ${orchestratorOutput.reviewers.length} reviewer(s) in parallel...`);
115
+ const reviewerFailures = [];
116
+ const reviewerResults = await Promise.all(orchestratorOutput.reviewers.map(async (reviewer) => {
117
+ const startTime = Date.now();
118
+ log.info(`[${reviewer}] Starting review...`);
119
+ try {
120
+ const rawFindings = await runReviewerLoop(reviewer, inputs, orchestratorOutput.understanding, outputDir, {
121
+ mcpClient,
122
+ llmService,
123
+ config: {
124
+ openRouterApiKey: config.openRouterApiKey,
125
+ reviewerModel: config.reviewerModel ?? DEFAULT_REVIEWER_MODEL,
126
+ criticModel: config.criticModel,
127
+ maxSteps: config.maxSteps,
128
+ maxIterations: config.maxIterations,
129
+ prDescription,
130
+ reviewDomains: config.reviewDomains,
131
+ },
132
+ logger: log,
133
+ });
134
+ const parsedFindings = reviewerFindingsSchema.safeParse(rawFindings);
135
+ if (!parsedFindings.success) {
136
+ throw new Error(`Reviewer ${reviewer} returned invalid findings payload: ${parsedFindings.error.issues
137
+ .slice(0, 3)
138
+ .map((issue) => `${issue.path.join('.') || 'root'}: ${issue.message}`)
139
+ .join('; ')}`, { cause: parsedFindings.error });
140
+ }
141
+ const findings = parsedFindings.data;
142
+ const result = {
143
+ reviewer,
144
+ findings: findings.findings,
145
+ validated: findings.validated,
146
+ notes: findings.notes ?? undefined,
147
+ };
148
+ const duration = ((Date.now() - startTime) / 1000).toFixed(1);
149
+ log.info(`[${reviewer}] Completed in ${duration}s`);
150
+ const issueCount = findings.findings.filter((f) => f.severity !== 'pass').length;
151
+ const passCount = findings.findings.filter((f) => f.severity === 'pass').length;
152
+ if (issueCount === 0) {
153
+ log.info(` ✓ ${reviewer}: Clean pass (pass signals: ${passCount}, validated: ${findings.validated})`);
154
+ }
155
+ else {
156
+ log.info(` ✓ ${reviewer}: ${issueCount} issue(s) (pass signals: ${passCount}, validated: ${findings.validated})`);
157
+ }
158
+ log.debug(` Findings: ${JSON.stringify(findings.findings, null, 2)}`);
159
+ if (findings.notes) {
160
+ log.debug(` Notes: ${findings.notes}`);
161
+ }
162
+ return result;
163
+ }
164
+ catch (error) {
165
+ const duration = ((Date.now() - startTime) / 1000).toFixed(1);
166
+ const reviewerError = toError(error);
167
+ const message = reviewerError.message;
168
+ log.error({
169
+ msg: `[${reviewer}] Failed after ${duration}s`,
170
+ reviewer,
171
+ durationSeconds: Number(duration),
172
+ error: serializeError(reviewerError),
173
+ });
174
+ if (message.includes('No object generated') || message.includes('No output generated')) {
175
+ log.error({
176
+ msg: `[${reviewer}] TROUBLESHOOTING`,
177
+ reviewer,
178
+ guidance: [
179
+ 'The model may not support structured output well.',
180
+ 'Try using a different model (e.g., anthropic/claude-haiku-4.5).',
181
+ 'Check model compatibility with structured output.',
182
+ "Review the model's response format requirements.",
183
+ ],
184
+ });
185
+ }
186
+ reviewerFailures.push({ reviewer, message });
187
+ return {
188
+ reviewer,
189
+ findings: [],
190
+ validated: false,
191
+ notes: `Error: ${message}`,
192
+ };
193
+ }
194
+ }));
195
+ // Aggregate results
196
+ log.info('Aggregating results...');
197
+ log.debug(` Total issue findings to aggregate: ${reviewerResults.reduce((sum, rr) => sum + rr.findings.filter((f) => f.severity !== 'pass').length, 0)}`);
198
+ log.debug(` Total pass signals to aggregate: ${reviewerResults.reduce((sum, rr) => sum + rr.findings.filter((f) => f.severity === 'pass').length, 0)}`);
199
+ log.debug(` Reviewers validated: ${reviewerResults.filter((rr) => rr.validated).length}/${reviewerResults.length}`);
200
+ const metadata = metadataSchema.parse({
201
+ timestamp: new Date().toISOString(),
202
+ mode: inputs.mode,
203
+ fileCount: inputs.files.length,
204
+ });
205
+ // Synthesize findings across reviewers (deduplication, contradictions, compound risks)
206
+ log.info(`Synthesis pass starting (model: ${config.orchestratorModel ?? DEFAULT_ORCHESTRATOR_MODEL}, reviewers: ${reviewerResults.length})`);
207
+ const synthesisResult = await synthesizeFindings(reviewerResults, {
208
+ llmService,
209
+ model: config.orchestratorModel ?? DEFAULT_ORCHESTRATOR_MODEL,
210
+ maxSteps: config.aggregationMaxSteps,
211
+ logger: log,
212
+ });
213
+ log.info(`Synthesis pass complete: ${synthesisResult.synthesis.findings.length} deduped findings, ${synthesisResult.synthesis.contradictions.length} contradictions, ${synthesisResult.synthesis.compoundRisks.length} compound risks`);
214
+ log.debug(` Synthesis: ${synthesisResult.synthesis.findings.length} deduplicated findings`);
215
+ log.debug(` Contradictions: ${synthesisResult.synthesis.contradictions.length}`);
216
+ log.debug(` Compound risks: ${synthesisResult.synthesis.compoundRisks.length}`);
217
+ const synthesisSeverityCounts = synthesisResult.synthesis.findings.reduce((acc, finding) => {
218
+ acc[finding.severity] += 1;
219
+ return acc;
220
+ }, { critical: 0, major: 0, minor: 0, info: 0, pass: 0 });
221
+ log.debug(` Synthesis severity: ${JSON.stringify(synthesisSeverityCounts)}`);
222
+ // Generate review decision based on synthesized findings
223
+ log.info(`Decision pass starting (model: ${config.orchestratorModel ?? DEFAULT_ORCHESTRATOR_MODEL}, synthesis findings: ${synthesisResult.synthesis.findings.length})`);
224
+ const decisionResult = await generateReviewDecision(orchestratorOutput.understanding, synthesisResult.synthesis, {
225
+ llmService,
226
+ model: config.orchestratorModel ?? DEFAULT_ORCHESTRATOR_MODEL,
227
+ maxSteps: config.aggregationMaxSteps,
228
+ logger: log,
229
+ });
230
+ const normalizedDecision = normalizeDecisionWithSynthesis(decisionResult.decision, synthesisResult.synthesis);
231
+ const decisionEvidence = {
232
+ findings: synthesisResult.synthesis.findings.length,
233
+ blockingFindings: synthesisResult.synthesis.findings.filter((f) => f.severity === 'critical' || f.severity === 'major').length,
234
+ contradictions: synthesisResult.synthesis.contradictions.length,
235
+ compoundRisks: synthesisResult.synthesis.compoundRisks.length,
236
+ modelDecision: decisionResult.decision.decision,
237
+ normalizedDecision: normalizedDecision.decision,
238
+ };
239
+ log.debug(` Decision evidence: ${JSON.stringify(decisionEvidence)}`);
240
+ if (normalizedDecision.decision !== decisionResult.decision.decision) {
241
+ log.warn(`Decision normalized from ${decisionResult.decision.decision} to ${normalizedDecision.decision} to match synthesized findings.`);
242
+ }
243
+ log.info(`Decision pass complete: ${normalizedDecision.decision}`);
244
+ log.debug(` Aggregation decision: ${normalizedDecision.decision}`);
245
+ log.debug(` Decision summary: ${normalizedDecision.summary}`);
246
+ log.debug(` Decision rationale: ${normalizedDecision.rationale}`);
247
+ const result = aggregateResults(metadata, orchestratorOutput.understanding, normalizedDecision, reviewerResults, reviewerFailures, synthesisResult.synthesis);
248
+ // Write result.json
249
+ await writeResult(outputDir, result);
250
+ const resultPath = path.join(outputDir, 'result.json');
251
+ log.info(`Results written to ${resultPath}`);
252
+ log.debug(` Full review results available at: ${resultPath}`);
253
+ // Write token budget metrics
254
+ const aggregationUsage = [];
255
+ if (synthesisResult.usage) {
256
+ aggregationUsage.push(synthesisResult.usage);
257
+ }
258
+ if (decisionResult.usage) {
259
+ aggregationUsage.push(decisionResult.usage);
260
+ }
261
+ await writeTokenBudgetMetrics(outputDir, orchestratorOutput.reviewers, aggregationUsage.length > 0 ? aggregationUsage : undefined);
262
+ log.info(`Token budget metrics written to ${path.join(outputDir, 'token-budget.json')}`);
263
+ // Print summary
264
+ logReviewSummary(log, result, reviewerResults);
265
+ return {
266
+ decision: result.decision.decision,
267
+ resultPath,
268
+ totalFindings: result.summary.totalFindings,
269
+ hasValidationFailures: reviewerResults.some((rr) => !rr.validated),
270
+ failures: result.failures ?? [],
271
+ };
272
+ }
273
+ finally {
274
+ await mcpClient.close();
275
+ }
276
+ };
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Severity calibration block with definitions and examples for each severity level.
3
+ * This block is injected into reviewer system prompts to ensure consistent severity assignment.
4
+ */
5
+ export declare const SEVERITY_CALIBRATION_BLOCK = "\n## Severity Calibration\n\nYou must assign severity levels accurately based on the following definitions and examples:\n\n### Critical\n**Definition**: Blocks deployment, active exploit path, or data corruption risk. Findings that could lead to security breaches, data loss, or system compromise.\n\n**Examples**:\n- SQL injection vulnerability: `const query = `SELECT * FROM users WHERE id = ${userInput}`;` (user input directly interpolated)\n- Authentication bypass: `if (user.role === 'admin' || user.id === 1) { grantAccess(); }` (hardcoded admin check)\n- Missing input validation on sensitive operations: `await db.delete(userId);` (no validation that userId belongs to requester)\n\n**When to use**: Only when there is a clear exploit path or risk of data corruption/integrity violation.\n\n### Major\n**Definition**: Significant bug, breaks functionality, or violates established patterns. Findings that cause incorrect behavior or violate architectural standards.\n\n**Examples**:\n- Missing error handling: `const data = await fetch(url); return data.json();` (no try-catch, will crash on network error)\n- Tight coupling: `import { DatabaseConnection } from './db'; class UserService { private db = new DatabaseConnection(); }` (direct instantiation, violates dependency injection)\n- Race condition: `let count = 0; async function increment() { count++; await save(count); }` (non-atomic increment)\n\n**When to use**: When functionality is broken or architectural patterns are violated, but no immediate security/data risk.\n\n### Minor\n**Definition**: Code quality issue, convention violation, or maintainability concern. Findings that don't break functionality but reduce code quality.\n\n**Examples**:\n- Magic numbers: `if (user.age > 18) { ... }` (should be `const MIN_ADULT_AGE = 18`)\n- Inconsistent naming: `function getUserData() { ... }` but `function fetch_user_info() { ... }` (mixed naming conventions)\n- Missing JSDoc: `function calculateTotal(items) { ... }` (no documentation for complex logic)\n\n**When to use**: Code quality, readability, or maintainability issues that don't affect functionality.\n\n### Info\n**Definition**: Observation, suggestion, or educational note. Findings that provide helpful context or suggestions without indicating a problem.\n\n**Examples**:\n- Performance suggestion: `// Consider caching this query result if called frequently`\n- Pattern suggestion: `// This could use the Repository pattern for better testability`\n- Documentation opportunity: `// This algorithm implements the Fisher-Yates shuffle`\n\n**When to use**: Helpful suggestions or observations that don't indicate actual problems.\n";
6
+ /**
7
+ * Severity validation rules for critic pass (iteration 2+).
8
+ * These rules help the critic validate and potentially downgrade severity.
9
+ */
10
+ export declare const SEVERITY_VALIDATION_RULES = "\n## Severity Validation Rules\n\nDuring the critic pass, validate each finding's severity against these rules:\n\n1. **Critical findings MUST have exploit path or data integrity evidence**\n - If a critical finding lacks a clear exploit path or data corruption risk described in the rationale, downgrade to major\n - Example: \"SQL injection\" without showing how user input reaches the query \u2192 downgrade to major\n\n2. **Major findings MUST demonstrate functional impact**\n - If a major finding doesn't show how functionality is broken or patterns violated, downgrade to minor\n - Example: \"Missing error handling\" without showing what breaks \u2192 downgrade to minor\n\n3. **Minor findings MUST indicate code quality impact**\n - If a minor finding is just a style preference without maintainability impact, consider downgrade to info\n - Example: \"Use const instead of let\" without explaining why \u2192 consider info\n\n4. **Downgrade rules**:\n - Critical \u2192 Major: No exploit path or data integrity risk described\n - Major \u2192 Minor: No functional impact demonstrated\n - Minor \u2192 Info: No code quality/maintainability impact shown\n\n5. **Never upgrade severity** - Only downgrade if evidence doesn't support the assigned level\n";
11
+ /**
12
+ * ContextRail tooling workflow requirements.
13
+ * Forces reviewers to ground findings in retrieved contexts instead of guessing IDs/titles.
14
+ */
15
+ export declare const CONTEXTRAIL_TOOLING_BLOCK = "\n## ContextRail Standards Workflow (Required)\n\nWhen you identify potential issues, you MUST ground them in retrieved ContextRail standards:\n\n1. Use `search_contexts` first to discover relevant standards for this change.\n2. Use `get_context` for each context you plan to cite in findings.\n3. Use `resolve_dependencies` when cited contexts have required dependencies.\n4. Perform at least one `search_contexts` call before finalizing your findings.\n\nAttribution rules:\n- NEVER invent context IDs or titles.\n- Only include `contextIdsUsed`, `contextIdsViolated`, and `contextTitles` from contexts you actually retrieved.\n- If no relevant ContextRail standard exists after tool lookup, set those fields to `null` (not empty arrays) and explain that briefly in `rationale`.\n";
16
+ /**
17
+ * Compact output contract optimized for structured-output reliability.
18
+ * Keeps formatting constraints in one place for both standard and critic prompts.
19
+ */
20
+ export declare const OUTPUT_CONTRACT_BLOCK = "\n## Output Contract (Strict)\n\nReturn a JSON object with:\n- `findings`: array\n- `validated`: boolean\n- `notes`: string | null\n\nFor each finding:\n- Required keys: `severity`, `title`, `description`, `rationale`\n- Optional-but-required-by-schema keys: `suggestedFix`, `file`, `line`, `endLine`, `contextIdsUsed`, `contextIdsViolated`, `contextTitles`\n\nSchema compatibility rules:\n- Include all keys (never omit)\n- Use `null` when a value is not available\n- Use `null` (not empty arrays) for context attribution fields when no standards apply\n";
21
+ /**
22
+ * Final guardrail checklist near generation point.
23
+ * Repeats only non-negotiables to reduce mid-prompt loss on long inputs.
24
+ */
25
+ export declare const FINAL_CHECKLIST_BLOCK = "\n## Final Checklist (Must Pass)\n1. Every finding is supported by concrete code/diff evidence.\n2. ContextRail workflow was used (`search_contexts` -> `get_context` -> `resolve_dependencies` as needed).\n3. Context attribution fields only reference retrieved contexts (or are `null`).\n4. Severity is calibrated to evidence.\n5. Output strictly matches the JSON contract, and `notes` summarizes retrieved contexts (or none found).\n";
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Severity calibration block with definitions and examples for each severity level.
3
+ * This block is injected into reviewer system prompts to ensure consistent severity assignment.
4
+ */
5
+ export const SEVERITY_CALIBRATION_BLOCK = `
6
+ ## Severity Calibration
7
+
8
+ You must assign severity levels accurately based on the following definitions and examples:
9
+
10
+ ### Critical
11
+ **Definition**: Blocks deployment, active exploit path, or data corruption risk. Findings that could lead to security breaches, data loss, or system compromise.
12
+
13
+ **Examples**:
14
+ - SQL injection vulnerability: \`const query = \`SELECT * FROM users WHERE id = \${userInput}\`;\` (user input directly interpolated)
15
+ - Authentication bypass: \`if (user.role === 'admin' || user.id === 1) { grantAccess(); }\` (hardcoded admin check)
16
+ - Missing input validation on sensitive operations: \`await db.delete(userId);\` (no validation that userId belongs to requester)
17
+
18
+ **When to use**: Only when there is a clear exploit path or risk of data corruption/integrity violation.
19
+
20
+ ### Major
21
+ **Definition**: Significant bug, breaks functionality, or violates established patterns. Findings that cause incorrect behavior or violate architectural standards.
22
+
23
+ **Examples**:
24
+ - Missing error handling: \`const data = await fetch(url); return data.json();\` (no try-catch, will crash on network error)
25
+ - Tight coupling: \`import { DatabaseConnection } from './db'; class UserService { private db = new DatabaseConnection(); }\` (direct instantiation, violates dependency injection)
26
+ - Race condition: \`let count = 0; async function increment() { count++; await save(count); }\` (non-atomic increment)
27
+
28
+ **When to use**: When functionality is broken or architectural patterns are violated, but no immediate security/data risk.
29
+
30
+ ### Minor
31
+ **Definition**: Code quality issue, convention violation, or maintainability concern. Findings that don't break functionality but reduce code quality.
32
+
33
+ **Examples**:
34
+ - Magic numbers: \`if (user.age > 18) { ... }\` (should be \`const MIN_ADULT_AGE = 18\`)
35
+ - Inconsistent naming: \`function getUserData() { ... }\` but \`function fetch_user_info() { ... }\` (mixed naming conventions)
36
+ - Missing JSDoc: \`function calculateTotal(items) { ... }\` (no documentation for complex logic)
37
+
38
+ **When to use**: Code quality, readability, or maintainability issues that don't affect functionality.
39
+
40
+ ### Info
41
+ **Definition**: Observation, suggestion, or educational note. Findings that provide helpful context or suggestions without indicating a problem.
42
+
43
+ **Examples**:
44
+ - Performance suggestion: \`// Consider caching this query result if called frequently\`
45
+ - Pattern suggestion: \`// This could use the Repository pattern for better testability\`
46
+ - Documentation opportunity: \`// This algorithm implements the Fisher-Yates shuffle\`
47
+
48
+ **When to use**: Helpful suggestions or observations that don't indicate actual problems.
49
+ `;
50
+ /**
51
+ * Severity validation rules for critic pass (iteration 2+).
52
+ * These rules help the critic validate and potentially downgrade severity.
53
+ */
54
+ export const SEVERITY_VALIDATION_RULES = `
55
+ ## Severity Validation Rules
56
+
57
+ During the critic pass, validate each finding's severity against these rules:
58
+
59
+ 1. **Critical findings MUST have exploit path or data integrity evidence**
60
+ - If a critical finding lacks a clear exploit path or data corruption risk described in the rationale, downgrade to major
61
+ - Example: "SQL injection" without showing how user input reaches the query → downgrade to major
62
+
63
+ 2. **Major findings MUST demonstrate functional impact**
64
+ - If a major finding doesn't show how functionality is broken or patterns violated, downgrade to minor
65
+ - Example: "Missing error handling" without showing what breaks → downgrade to minor
66
+
67
+ 3. **Minor findings MUST indicate code quality impact**
68
+ - If a minor finding is just a style preference without maintainability impact, consider downgrade to info
69
+ - Example: "Use const instead of let" without explaining why → consider info
70
+
71
+ 4. **Downgrade rules**:
72
+ - Critical → Major: No exploit path or data integrity risk described
73
+ - Major → Minor: No functional impact demonstrated
74
+ - Minor → Info: No code quality/maintainability impact shown
75
+
76
+ 5. **Never upgrade severity** - Only downgrade if evidence doesn't support the assigned level
77
+ `;
78
+ /**
79
+ * ContextRail tooling workflow requirements.
80
+ * Forces reviewers to ground findings in retrieved contexts instead of guessing IDs/titles.
81
+ */
82
+ export const CONTEXTRAIL_TOOLING_BLOCK = `
83
+ ## ContextRail Standards Workflow (Required)
84
+
85
+ When you identify potential issues, you MUST ground them in retrieved ContextRail standards:
86
+
87
+ 1. Use \`search_contexts\` first to discover relevant standards for this change.
88
+ 2. Use \`get_context\` for each context you plan to cite in findings.
89
+ 3. Use \`resolve_dependencies\` when cited contexts have required dependencies.
90
+ 4. Perform at least one \`search_contexts\` call before finalizing your findings.
91
+
92
+ Attribution rules:
93
+ - NEVER invent context IDs or titles.
94
+ - Only include \`contextIdsUsed\`, \`contextIdsViolated\`, and \`contextTitles\` from contexts you actually retrieved.
95
+ - If no relevant ContextRail standard exists after tool lookup, set those fields to \`null\` (not empty arrays) and explain that briefly in \`rationale\`.
96
+ `;
97
+ /**
98
+ * Compact output contract optimized for structured-output reliability.
99
+ * Keeps formatting constraints in one place for both standard and critic prompts.
100
+ */
101
+ export const OUTPUT_CONTRACT_BLOCK = `
102
+ ## Output Contract (Strict)
103
+
104
+ Return a JSON object with:
105
+ - \`findings\`: array
106
+ - \`validated\`: boolean
107
+ - \`notes\`: string | null
108
+
109
+ For each finding:
110
+ - Required keys: \`severity\`, \`title\`, \`description\`, \`rationale\`
111
+ - Optional-but-required-by-schema keys: \`suggestedFix\`, \`file\`, \`line\`, \`endLine\`, \`contextIdsUsed\`, \`contextIdsViolated\`, \`contextTitles\`
112
+
113
+ Schema compatibility rules:
114
+ - Include all keys (never omit)
115
+ - Use \`null\` when a value is not available
116
+ - Use \`null\` (not empty arrays) for context attribution fields when no standards apply
117
+ `;
118
+ /**
119
+ * Final guardrail checklist near generation point.
120
+ * Repeats only non-negotiables to reduce mid-prompt loss on long inputs.
121
+ */
122
+ export const FINAL_CHECKLIST_BLOCK = `
123
+ ## Final Checklist (Must Pass)
124
+ 1. Every finding is supported by concrete code/diff evidence.
125
+ 2. ContextRail workflow was used (\`search_contexts\` -> \`get_context\` -> \`resolve_dependencies\` as needed).
126
+ 3. Context attribution fields only reference retrieved contexts (or are \`null\`).
127
+ 4. Severity is calibrated to evidence.
128
+ 5. Output strictly matches the JSON contract, and \`notes\` summarizes retrieved contexts (or none found).
129
+ `;
@@ -0,0 +1,15 @@
1
+ export declare const buildReviewDecisionPrompt: (understanding: string, synthesisResult: {
2
+ findings: Array<{
3
+ severity: string;
4
+ title: string;
5
+ description: string;
6
+ }>;
7
+ contradictions: Array<{
8
+ context: string;
9
+ }>;
10
+ compoundRisks: Array<{
11
+ description: string;
12
+ affectedFindings: string[];
13
+ severity: string;
14
+ }>;
15
+ }) => string;
@@ -0,0 +1,30 @@
1
+ export const buildReviewDecisionPrompt = (understanding, synthesisResult) => {
2
+ const findingsSummary = synthesisResult.findings.length > 0
3
+ ? synthesisResult.findings
4
+ .map((f) => `- [${f.severity.toUpperCase()}] ${f.title}: ${f.description}`)
5
+ .join('\n')
6
+ : '- None';
7
+ const contradictionsText = synthesisResult.contradictions.length > 0
8
+ ? `\n\nContradictions:\n${synthesisResult.contradictions.map((c) => `- ${c.context}`).join('\n')}`
9
+ : '';
10
+ const compoundRisksText = synthesisResult.compoundRisks.length > 0
11
+ ? `\n\nCompound Risks:\n${synthesisResult.compoundRisks.map((r) => `- [${r.severity.toUpperCase()}] ${r.description}`).join('\n')}`
12
+ : '';
13
+ return `You are making a final review decision based on synthesized findings from multiple reviewers.
14
+
15
+ Return "approve" only when there are no critical or major findings.
16
+ Return "request-changes" when there are any critical/major findings.
17
+ Hard rule: if synthesized findings contain zero critical/major items, decision MUST be "approve".
18
+
19
+ Provide:
20
+ - decision: approve | request-changes
21
+ - summary: 2-4 sentences, plain language
22
+ - rationale: explain why the decision was made, referencing key findings or lack thereof
23
+
24
+ Review context:
25
+ ${understanding}
26
+
27
+ Synthesized findings:
28
+ ${findingsSummary}${contradictionsText}${compoundRisksText}
29
+ `;
30
+ };
@@ -0,0 +1,5 @@
1
+ export * from './blocks.js';
2
+ export * from './reviewer.js';
3
+ export * from './synthesis.js';
4
+ export * from './decision.js';
5
+ export * from './orchestrator.js';
@@ -0,0 +1,5 @@
1
+ export * from './blocks.js';
2
+ export * from './reviewer.js';
3
+ export * from './synthesis.js';
4
+ export * from './decision.js';
5
+ export * from './orchestrator.js';
@@ -8,7 +8,7 @@ import type { FilePatterns } from '../review-inputs/file-patterns.js';
8
8
  * @param contextIds - The context IDs.
9
9
  * @returns The activity content.
10
10
  */
11
- declare const generateActivityContentPrompt: (understanding: string, reviewers: string[], toolCalls: Array<{
11
+ export declare const generateActivityContentPrompt: (understanding: string, reviewers: string[], toolCalls: Array<{
12
12
  tool: string;
13
13
  input?: unknown;
14
14
  }>, contextIds: string[]) => string;
@@ -21,5 +21,4 @@ declare const generateActivityContentPrompt: (understanding: string, reviewers:
21
21
  * @param reviewDomains - Optional review focus domains to prioritize.
22
22
  * @returns The user message.
23
23
  */
24
- declare const generateUserMessagePrompt: (inputs: ReviewInputs, availableReviewers: string[], prDescription?: string, reviewerFilePatterns?: Map<string, FilePatterns | undefined>, reviewDomains?: string[]) => string;
25
- export { generateActivityContentPrompt, generateUserMessagePrompt };
24
+ export declare const generateUserMessagePrompt: (inputs: ReviewInputs, availableReviewers: string[], prDescription?: string, reviewerFilePatterns?: Map<string, FilePatterns | undefined>, reviewDomains?: string[]) => string;
@@ -9,7 +9,7 @@ import dedent from 'dedent';
9
9
  * @param contextIds - The context IDs.
10
10
  * @returns The activity content.
11
11
  */
12
- const generateActivityContentPrompt = (understanding, reviewers, toolCalls, contextIds) => dedent(`# Orchestration Activity Log
12
+ export const generateActivityContentPrompt = (understanding, reviewers, toolCalls, contextIds) => dedent(`# Orchestration Activity Log
13
13
 
14
14
  **Phase**: Orchestration
15
15
  **Completed**: ${new Date().toISOString()}
@@ -39,7 +39,7 @@ const generateActivityContentPrompt = (understanding, reviewers, toolCalls, cont
39
39
  * @param reviewDomains - Optional review focus domains to prioritize.
40
40
  * @returns The user message.
41
41
  */
42
- const generateUserMessagePrompt = (inputs, availableReviewers, prDescription, reviewerFilePatterns, reviewDomains) => {
42
+ export const generateUserMessagePrompt = (inputs, availableReviewers, prDescription, reviewerFilePatterns, reviewDomains) => {
43
43
  const diffSummary = generateDiffSummary(inputs);
44
44
  const prDescriptionBlock = prDescription ? `\n\nPR Description:\n${prDescription}\n` : '';
45
45
  // Build reviewer scope information block
@@ -95,4 +95,3 @@ ${availableReviewers
95
95
  - If review focus domains are provided, prioritize reviewers and rationale aligned to those domains.
96
96
  `);
97
97
  };
98
- export { generateActivityContentPrompt, generateUserMessagePrompt };