@contextrail/code-review-agent 0.1.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/LICENSE +26 -0
  2. package/MODEL_RECOMMENDATIONS.md +178 -0
  3. package/README.md +177 -0
  4. package/dist/config/defaults.d.ts +72 -0
  5. package/dist/config/defaults.js +113 -0
  6. package/dist/config/index.d.ts +34 -0
  7. package/dist/config/index.js +89 -0
  8. package/dist/index.d.ts +2 -0
  9. package/dist/index.js +603 -0
  10. package/dist/llm/factory.d.ts +21 -0
  11. package/dist/llm/factory.js +50 -0
  12. package/dist/llm/index.d.ts +3 -0
  13. package/dist/llm/index.js +2 -0
  14. package/dist/llm/service.d.ts +38 -0
  15. package/dist/llm/service.js +191 -0
  16. package/dist/llm/types.d.ts +119 -0
  17. package/dist/llm/types.js +1 -0
  18. package/dist/logging/logger.d.ts +9 -0
  19. package/dist/logging/logger.js +52 -0
  20. package/dist/mcp/client.d.ts +429 -0
  21. package/dist/mcp/client.js +173 -0
  22. package/dist/mcp/mcp-tools.d.ts +292 -0
  23. package/dist/mcp/mcp-tools.js +40 -0
  24. package/dist/mcp/token-validation.d.ts +31 -0
  25. package/dist/mcp/token-validation.js +57 -0
  26. package/dist/mcp/tools-provider.d.ts +18 -0
  27. package/dist/mcp/tools-provider.js +24 -0
  28. package/dist/observability/index.d.ts +2 -0
  29. package/dist/observability/index.js +1 -0
  30. package/dist/observability/metrics.d.ts +48 -0
  31. package/dist/observability/metrics.js +86 -0
  32. package/dist/orchestrator/agentic-orchestrator.d.ts +29 -0
  33. package/dist/orchestrator/agentic-orchestrator.js +136 -0
  34. package/dist/orchestrator/prompts.d.ts +25 -0
  35. package/dist/orchestrator/prompts.js +98 -0
  36. package/dist/orchestrator/validation.d.ts +2 -0
  37. package/dist/orchestrator/validation.js +7 -0
  38. package/dist/orchestrator/writer.d.ts +4 -0
  39. package/dist/orchestrator/writer.js +17 -0
  40. package/dist/output/aggregator.d.ts +30 -0
  41. package/dist/output/aggregator.js +132 -0
  42. package/dist/output/prompts.d.ts +32 -0
  43. package/dist/output/prompts.js +153 -0
  44. package/dist/output/schema.d.ts +1515 -0
  45. package/dist/output/schema.js +224 -0
  46. package/dist/output/writer.d.ts +31 -0
  47. package/dist/output/writer.js +120 -0
  48. package/dist/review-inputs/chunking.d.ts +29 -0
  49. package/dist/review-inputs/chunking.js +113 -0
  50. package/dist/review-inputs/diff-summary.d.ts +52 -0
  51. package/dist/review-inputs/diff-summary.js +83 -0
  52. package/dist/review-inputs/file-patterns.d.ts +40 -0
  53. package/dist/review-inputs/file-patterns.js +182 -0
  54. package/dist/review-inputs/filtering.d.ts +31 -0
  55. package/dist/review-inputs/filtering.js +53 -0
  56. package/dist/review-inputs/git-diff-provider.d.ts +2 -0
  57. package/dist/review-inputs/git-diff-provider.js +42 -0
  58. package/dist/review-inputs/index.d.ts +46 -0
  59. package/dist/review-inputs/index.js +122 -0
  60. package/dist/review-inputs/path-validation.d.ts +10 -0
  61. package/dist/review-inputs/path-validation.js +37 -0
  62. package/dist/review-inputs/surrounding-context.d.ts +35 -0
  63. package/dist/review-inputs/surrounding-context.js +180 -0
  64. package/dist/review-inputs/triage.d.ts +57 -0
  65. package/dist/review-inputs/triage.js +81 -0
  66. package/dist/reviewers/executor.d.ts +41 -0
  67. package/dist/reviewers/executor.js +357 -0
  68. package/dist/reviewers/findings-merge.d.ts +9 -0
  69. package/dist/reviewers/findings-merge.js +131 -0
  70. package/dist/reviewers/iteration.d.ts +17 -0
  71. package/dist/reviewers/iteration.js +95 -0
  72. package/dist/reviewers/persistence.d.ts +17 -0
  73. package/dist/reviewers/persistence.js +55 -0
  74. package/dist/reviewers/progress-tracker.d.ts +115 -0
  75. package/dist/reviewers/progress-tracker.js +194 -0
  76. package/dist/reviewers/prompt.d.ts +42 -0
  77. package/dist/reviewers/prompt.js +246 -0
  78. package/dist/reviewers/tool-call-tracker.d.ts +18 -0
  79. package/dist/reviewers/tool-call-tracker.js +40 -0
  80. package/dist/reviewers/types.d.ts +12 -0
  81. package/dist/reviewers/types.js +1 -0
  82. package/dist/reviewers/validation-rules.d.ts +27 -0
  83. package/dist/reviewers/validation-rules.js +189 -0
  84. package/package.json +79 -0
@@ -0,0 +1,55 @@
1
+ import { writeFile } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { appendActivity } from './progress-tracker.js';
4
+ export const logIteration = async (reviewersDir, reviewer, iteration, findings, toolCalls, contextIds) => {
5
+ await appendActivity(reviewersDir, reviewer, `## Iteration ${iteration}
6
+
7
+ **Status**: ${findings.validated ? 'Validated' : 'Needs re-review'}
8
+
9
+ **Findings Count**: ${findings.findings.length}
10
+
11
+ **ContextRail Contexts Used**:
12
+ ${contextIds.length > 0 ? contextIds.map((id) => `- ${id}`).join('\n') : 'None'}
13
+
14
+ **Tool Calls**:
15
+ ${toolCalls.length > 0 ? toolCalls.map((tc) => `- **${tc.tool}**: ${JSON.stringify(tc.input, null, 2)}`).join('\n\n') : 'None'}
16
+
17
+ ${findings.notes ? `**Notes**:\n${findings.notes}\n` : ''}`);
18
+ };
19
+ export const logContinuation = async (reviewersDir, reviewer, nextIteration) => {
20
+ await appendActivity(reviewersDir, reviewer, `**Action**: Findings not validated, continuing to iteration ${nextIteration}`);
21
+ };
22
+ export const writeFindings = async (reviewerDir, findings) => {
23
+ await writeFile(path.join(reviewerDir, 'findings.json'), JSON.stringify(findings, null, 2), 'utf-8');
24
+ };
25
+ export const logCompletion = async (reviewersDir, reviewer, iteration, findings) => {
26
+ await appendActivity(reviewersDir, reviewer, `## Completed
27
+
28
+ **Total Iterations**: ${iteration}
29
+ **Final Findings Count**: ${findings.findings.length}
30
+ **Status**: Validated and complete`);
31
+ };
32
+ /**
33
+ * Write token usage metrics for a reviewer.
34
+ */
35
+ export const writeTokenMetrics = async (reviewerDir, reviewer, iterationTokenData) => {
36
+ if (iterationTokenData.length === 0) {
37
+ return; // No token usage data to write
38
+ }
39
+ // Calculate total usage across all iterations
40
+ const totalUsage = {
41
+ promptTokens: iterationTokenData.reduce((sum, it) => sum + it.usage.promptTokens, 0),
42
+ completionTokens: iterationTokenData.reduce((sum, it) => sum + it.usage.completionTokens, 0),
43
+ totalTokens: iterationTokenData.reduce((sum, it) => sum + it.usage.totalTokens, 0),
44
+ };
45
+ const metrics = {
46
+ reviewer,
47
+ totalUsage,
48
+ iterations: iterationTokenData.map((it) => ({
49
+ iteration: it.iteration,
50
+ usage: it.usage,
51
+ chunks: it.chunks,
52
+ })),
53
+ };
54
+ await writeFile(path.join(reviewerDir, 'token-budget.json'), JSON.stringify(metrics, null, 2), 'utf-8');
55
+ };
@@ -0,0 +1,115 @@
1
+ import { z } from 'zod';
2
+ /**
3
+ * Progress state schema for reviewer execution tracking.
4
+ * Based on context://agentic/execution/progress-tracking
5
+ */
6
+ export declare const progressSchema: z.ZodObject<{
7
+ reviewer: z.ZodString;
8
+ status: z.ZodEnum<["pending", "in-progress", "done", "blocked"]>;
9
+ attempts: z.ZodNumber;
10
+ startedAt: z.ZodOptional<z.ZodString>;
11
+ lastAttempt: z.ZodOptional<z.ZodString>;
12
+ completedAt: z.ZodOptional<z.ZodString>;
13
+ lastFailure: z.ZodOptional<z.ZodObject<{
14
+ gate: z.ZodString;
15
+ message: z.ZodString;
16
+ details: z.ZodOptional<z.ZodString>;
17
+ }, "strip", z.ZodTypeAny, {
18
+ message: string;
19
+ gate: string;
20
+ details?: string | undefined;
21
+ }, {
22
+ message: string;
23
+ gate: string;
24
+ details?: string | undefined;
25
+ }>>;
26
+ evidence: z.ZodArray<z.ZodString, "many">;
27
+ notes: z.ZodOptional<z.ZodString>;
28
+ }, "strip", z.ZodTypeAny, {
29
+ status: "pending" | "in-progress" | "done" | "blocked";
30
+ reviewer: string;
31
+ attempts: number;
32
+ evidence: string[];
33
+ notes?: string | undefined;
34
+ startedAt?: string | undefined;
35
+ lastAttempt?: string | undefined;
36
+ completedAt?: string | undefined;
37
+ lastFailure?: {
38
+ message: string;
39
+ gate: string;
40
+ details?: string | undefined;
41
+ } | undefined;
42
+ }, {
43
+ status: "pending" | "in-progress" | "done" | "blocked";
44
+ reviewer: string;
45
+ attempts: number;
46
+ evidence: string[];
47
+ notes?: string | undefined;
48
+ startedAt?: string | undefined;
49
+ lastAttempt?: string | undefined;
50
+ completedAt?: string | undefined;
51
+ lastFailure?: {
52
+ message: string;
53
+ gate: string;
54
+ details?: string | undefined;
55
+ } | undefined;
56
+ }>;
57
+ export type Progress = z.infer<typeof progressSchema>;
58
+ /**
59
+ * Initialize progress.json for a reviewer.
60
+ * Creates the initial state file with default values.
61
+ *
62
+ * @param outputDir - Base output directory
63
+ * @param reviewer - Reviewer name
64
+ * @returns Initial progress state
65
+ */
66
+ export declare const initProgress: (outputDir: string, reviewer: string) => Promise<Progress>;
67
+ /**
68
+ * Read current progress state for a reviewer.
69
+ * Returns default state if file doesn't exist.
70
+ *
71
+ * @param outputDir - Base output directory
72
+ * @param reviewer - Reviewer name
73
+ * @returns Current progress state
74
+ */
75
+ export declare const readProgress: (outputDir: string, reviewer: string) => Promise<Progress>;
76
+ /**
77
+ * Update progress state atomically.
78
+ * Uses temp file + rename pattern to prevent corruption.
79
+ *
80
+ * @param outputDir - Base output directory
81
+ * @param reviewer - Reviewer name
82
+ * @param updates - Partial progress updates
83
+ * @returns Updated progress state
84
+ */
85
+ export declare const updateProgress: (outputDir: string, reviewer: string, updates: Partial<Progress>) => Promise<Progress>;
86
+ /**
87
+ * Format ISO 8601 timestamp.
88
+ * Utility for consistent timestamp formatting.
89
+ *
90
+ * @returns ISO 8601 timestamp string
91
+ */
92
+ export declare const formatTimestamp: () => string;
93
+ /**
94
+ * Append entry to activity.md log.
95
+ * Creates file if it doesn't exist, appends otherwise.
96
+ *
97
+ * @param outputDir - Base output directory
98
+ * @param reviewer - Reviewer name
99
+ * @param entry - Activity log entry (markdown formatted)
100
+ */
101
+ export declare const appendActivity: (outputDir: string, reviewer: string, entry: string) => Promise<void>;
102
+ /**
103
+ * Append failure entry to failures.md log.
104
+ * Creates file if it doesn't exist, appends otherwise.
105
+ *
106
+ * @param outputDir - Base output directory
107
+ * @param reviewer - Reviewer name
108
+ * @param failure - Failure information object
109
+ */
110
+ export declare const appendFailure: (outputDir: string, reviewer: string, failure: {
111
+ gate: string;
112
+ message: string;
113
+ details?: string;
114
+ timestamp?: string;
115
+ }) => Promise<void>;
@@ -0,0 +1,194 @@
1
+ import { mkdir, readFile, rename, unlink, writeFile } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { randomUUID } from 'node:crypto';
4
+ import { z } from 'zod';
5
+ /**
6
+ * Progress state schema for reviewer execution tracking.
7
+ * Based on context://agentic/execution/progress-tracking
8
+ */
9
+ export const progressSchema = z.object({
10
+ reviewer: z.string().describe('Reviewer name'),
11
+ status: z.enum(['pending', 'in-progress', 'done', 'blocked']).describe('Current execution status'),
12
+ attempts: z.number().int().min(0).describe('Number of attempts made'),
13
+ startedAt: z.string().datetime().optional().describe('ISO timestamp when execution started'),
14
+ lastAttempt: z.string().datetime().optional().describe('ISO timestamp of last attempt'),
15
+ completedAt: z.string().datetime().optional().describe('ISO timestamp when execution completed'),
16
+ lastFailure: z
17
+ .object({
18
+ gate: z.string().describe('Gate that failed (build|test|lint|types|contextrail)'),
19
+ message: z.string().describe('Error message'),
20
+ details: z.string().optional().describe('Additional error details'),
21
+ })
22
+ .optional()
23
+ .describe('Last failure information if status is not done'),
24
+ evidence: z.array(z.string()).describe('Evidence links (file paths, test files, etc.)'),
25
+ notes: z.string().optional().describe('Additional notes'),
26
+ });
27
+ /**
28
+ * Initialize progress.json for a reviewer.
29
+ * Creates the initial state file with default values.
30
+ *
31
+ * @param outputDir - Base output directory
32
+ * @param reviewer - Reviewer name
33
+ * @returns Initial progress state
34
+ */
35
+ export const initProgress = async (outputDir, reviewer) => {
36
+ const reviewerDir = path.join(outputDir, reviewer);
37
+ await mkdir(reviewerDir, { recursive: true });
38
+ const progress = {
39
+ reviewer,
40
+ status: 'pending',
41
+ attempts: 0,
42
+ evidence: [],
43
+ };
44
+ await writeProgressAtomically(reviewerDir, progress);
45
+ return progress;
46
+ };
47
+ /**
48
+ * Read current progress state for a reviewer.
49
+ * Returns default state if file doesn't exist.
50
+ *
51
+ * @param outputDir - Base output directory
52
+ * @param reviewer - Reviewer name
53
+ * @returns Current progress state
54
+ */
55
+ export const readProgress = async (outputDir, reviewer) => {
56
+ const reviewerDir = path.join(outputDir, reviewer);
57
+ const progressPath = path.join(reviewerDir, 'progress.json');
58
+ try {
59
+ const content = await readFile(progressPath, 'utf-8');
60
+ const parsed = JSON.parse(content);
61
+ return progressSchema.parse(parsed);
62
+ }
63
+ catch (error) {
64
+ // If file doesn't exist or is invalid, return default state
65
+ if (error.code === 'ENOENT') {
66
+ return {
67
+ reviewer,
68
+ status: 'pending',
69
+ attempts: 0,
70
+ evidence: [],
71
+ };
72
+ }
73
+ throw error;
74
+ }
75
+ };
76
+ /**
77
+ * Update progress state atomically.
78
+ * Uses temp file + rename pattern to prevent corruption.
79
+ *
80
+ * @param outputDir - Base output directory
81
+ * @param reviewer - Reviewer name
82
+ * @param updates - Partial progress updates
83
+ * @returns Updated progress state
84
+ */
85
+ export const updateProgress = async (outputDir, reviewer, updates) => {
86
+ const reviewerDir = path.join(outputDir, reviewer);
87
+ await mkdir(reviewerDir, { recursive: true });
88
+ const current = await readProgress(outputDir, reviewer);
89
+ const updated = {
90
+ ...current,
91
+ ...updates,
92
+ reviewer, // Ensure reviewer name is preserved
93
+ };
94
+ // Validate updated state
95
+ const validated = progressSchema.parse(updated);
96
+ await writeProgressAtomically(reviewerDir, validated);
97
+ return validated;
98
+ };
99
+ /**
100
+ * Atomic write pattern: write to temp file, then rename.
101
+ * Prevents corruption if process is interrupted.
102
+ *
103
+ * @param reviewerDir - Reviewer-specific directory
104
+ * @param progress - Progress state to write
105
+ */
106
+ const writeProgressAtomically = async (reviewerDir, progress) => {
107
+ const progressPath = path.join(reviewerDir, 'progress.json');
108
+ const tempPath = `${progressPath}.tmp.${randomUUID()}`;
109
+ try {
110
+ // Write to temp file
111
+ await writeFile(tempPath, JSON.stringify(progress, null, 2), 'utf-8');
112
+ // Atomic rename
113
+ await rename(tempPath, progressPath);
114
+ }
115
+ catch (error) {
116
+ // Clean up temp file on error
117
+ try {
118
+ await unlink(tempPath);
119
+ }
120
+ catch {
121
+ // Temp file doesn't exist or already cleaned up, ignore
122
+ }
123
+ throw error;
124
+ }
125
+ };
126
+ /**
127
+ * Format ISO 8601 timestamp.
128
+ * Utility for consistent timestamp formatting.
129
+ *
130
+ * @returns ISO 8601 timestamp string
131
+ */
132
+ export const formatTimestamp = () => {
133
+ return new Date().toISOString();
134
+ };
135
+ /**
136
+ * Append entry to activity.md log.
137
+ * Creates file if it doesn't exist, appends otherwise.
138
+ *
139
+ * @param outputDir - Base output directory
140
+ * @param reviewer - Reviewer name
141
+ * @param entry - Activity log entry (markdown formatted)
142
+ */
143
+ export const appendActivity = async (outputDir, reviewer, entry) => {
144
+ const reviewerDir = path.join(outputDir, reviewer);
145
+ await mkdir(reviewerDir, { recursive: true });
146
+ const activityPath = path.join(reviewerDir, 'activity.md');
147
+ const timestamp = formatTimestamp();
148
+ const entryWithTimestamp = `\n## ${timestamp}\n\n${entry}\n`;
149
+ try {
150
+ // Read existing content
151
+ const existing = await readFile(activityPath, 'utf-8');
152
+ await writeFile(activityPath, existing + entryWithTimestamp, 'utf-8');
153
+ }
154
+ catch (error) {
155
+ // File doesn't exist, create new
156
+ if (error.code === 'ENOENT') {
157
+ const header = `# Activity Log\n\n**Reviewer**: ${reviewer}\n**Started**: ${timestamp}\n`;
158
+ await writeFile(activityPath, header + entryWithTimestamp, 'utf-8');
159
+ }
160
+ else {
161
+ throw error;
162
+ }
163
+ }
164
+ };
165
+ /**
166
+ * Append failure entry to failures.md log.
167
+ * Creates file if it doesn't exist, appends otherwise.
168
+ *
169
+ * @param outputDir - Base output directory
170
+ * @param reviewer - Reviewer name
171
+ * @param failure - Failure information object
172
+ */
173
+ export const appendFailure = async (outputDir, reviewer, failure) => {
174
+ const reviewerDir = path.join(outputDir, reviewer);
175
+ await mkdir(reviewerDir, { recursive: true });
176
+ const failuresPath = path.join(reviewerDir, 'failures.md');
177
+ const timestamp = failure.timestamp ?? formatTimestamp();
178
+ const failureEntry = `\n## Failure — ${timestamp}\n\n**Gate Failed**: ${failure.gate}\n\n**Error**:\n\`\`\`\n${failure.message}\n\`\`\`\n\n${failure.details ? `**Details**:\n\n${failure.details}\n\n` : ''}---\n`;
179
+ try {
180
+ // Read existing content
181
+ const existing = await readFile(failuresPath, 'utf-8');
182
+ await writeFile(failuresPath, existing + failureEntry, 'utf-8');
183
+ }
184
+ catch (error) {
185
+ // File doesn't exist, create new
186
+ if (error.code === 'ENOENT') {
187
+ const header = `# Failures Log\n\n**Reviewer**: ${reviewer}\n\n`;
188
+ await writeFile(failuresPath, header + failureEntry, 'utf-8');
189
+ }
190
+ else {
191
+ throw error;
192
+ }
193
+ }
194
+ };
@@ -0,0 +1,42 @@
1
+ import { type ReviewInputs } from '../review-inputs/index.js';
2
+ import type { ReviewerFindings } from '../output/schema.js';
3
+ export type PromptMessage = {
4
+ role: 'system' | 'user' | 'assistant';
5
+ content: string;
6
+ };
7
+ /**
8
+ * Severity calibration block with definitions and examples for each severity level.
9
+ * This block is injected into reviewer system prompts to ensure consistent severity assignment.
10
+ */
11
+ export declare const SEVERITY_CALIBRATION_BLOCK = "\n## Severity Calibration\n\nYou must assign severity levels accurately based on the following definitions and examples:\n\n### Critical\n**Definition**: Blocks deployment, active exploit path, or data corruption risk. Findings that could lead to security breaches, data loss, or system compromise.\n\n**Examples**:\n- SQL injection vulnerability: `const query = `SELECT * FROM users WHERE id = ${userInput}`;` (user input directly interpolated)\n- Authentication bypass: `if (user.role === 'admin' || user.id === 1) { grantAccess(); }` (hardcoded admin check)\n- Missing input validation on sensitive operations: `await db.delete(userId);` (no validation that userId belongs to requester)\n\n**When to use**: Only when there is a clear exploit path or risk of data corruption/integrity violation.\n\n### Major\n**Definition**: Significant bug, breaks functionality, or violates established patterns. Findings that cause incorrect behavior or violate architectural standards.\n\n**Examples**:\n- Missing error handling: `const data = await fetch(url); return data.json();` (no try-catch, will crash on network error)\n- Tight coupling: `import { DatabaseConnection } from './db'; class UserService { private db = new DatabaseConnection(); }` (direct instantiation, violates dependency injection)\n- Race condition: `let count = 0; async function increment() { count++; await save(count); }` (non-atomic increment)\n\n**When to use**: When functionality is broken or architectural patterns are violated, but no immediate security/data risk.\n\n### Minor\n**Definition**: Code quality issue, convention violation, or maintainability concern. Findings that don't break functionality but reduce code quality.\n\n**Examples**:\n- Magic numbers: `if (user.age > 18) { ... }` (should be `const MIN_ADULT_AGE = 18`)\n- Inconsistent naming: `function getUserData() { ... }` but `function fetch_user_info() { ... }` (mixed naming conventions)\n- Missing JSDoc: `function calculateTotal(items) { ... }` (no documentation for complex logic)\n\n**When to use**: Code quality, readability, or maintainability issues that don't affect functionality.\n\n### Info\n**Definition**: Observation, suggestion, or educational note. Findings that provide helpful context or suggestions without indicating a problem.\n\n**Examples**:\n- Performance suggestion: `// Consider caching this query result if called frequently`\n- Pattern suggestion: `// This could use the Repository pattern for better testability`\n- Documentation opportunity: `// This algorithm implements the Fisher-Yates shuffle`\n\n**When to use**: Helpful suggestions or observations that don't indicate actual problems.\n";
12
+ /**
13
+ * Severity validation rules for critic pass (iteration 2+).
14
+ * These rules help the critic validate and potentially downgrade severity.
15
+ */
16
+ export declare const SEVERITY_VALIDATION_RULES = "\n## Severity Validation Rules\n\nDuring the critic pass, validate each finding's severity against these rules:\n\n1. **Critical findings MUST have exploit path or data integrity evidence**\n - If a critical finding lacks a clear exploit path or data corruption risk described in the rationale, downgrade to major\n - Example: \"SQL injection\" without showing how user input reaches the query \u2192 downgrade to major\n\n2. **Major findings MUST demonstrate functional impact**\n - If a major finding doesn't show how functionality is broken or patterns violated, downgrade to minor\n - Example: \"Missing error handling\" without showing what breaks \u2192 downgrade to minor\n\n3. **Minor findings MUST indicate code quality impact**\n - If a minor finding is just a style preference without maintainability impact, consider downgrade to info\n - Example: \"Use const instead of let\" without explaining why \u2192 consider info\n\n4. **Downgrade rules**:\n - Critical \u2192 Major: No exploit path or data integrity risk described\n - Major \u2192 Minor: No functional impact demonstrated\n - Minor \u2192 Info: No code quality/maintainability impact shown\n\n5. **Never upgrade severity** - Only downgrade if evidence doesn't support the assigned level\n";
17
+ /**
18
+ * ContextRail tooling workflow requirements.
19
+ * Forces reviewers to ground findings in retrieved contexts instead of guessing IDs/titles.
20
+ */
21
+ export declare const CONTEXTRAIL_TOOLING_BLOCK = "\n## ContextRail Standards Workflow (Required)\n\nWhen you identify potential issues, you MUST ground them in retrieved ContextRail standards:\n\n1. Use `search_contexts` first to discover relevant standards for this change.\n2. Use `get_context` for each context you plan to cite in findings.\n3. Use `resolve_dependencies` when cited contexts have required dependencies.\n4. Perform at least one `search_contexts` call before finalizing your findings.\n\nAttribution rules:\n- NEVER invent context IDs or titles.\n- Only include `contextIdsUsed`, `contextIdsViolated`, and `contextTitles` from contexts you actually retrieved.\n- If no relevant ContextRail standard exists after tool lookup, set those fields to `null` (not empty arrays) and explain that briefly in `rationale`.\n";
22
+ /**
23
+ * Compact output contract optimized for structured-output reliability.
24
+ * Keeps formatting constraints in one place for both standard and critic prompts.
25
+ */
26
+ export declare const OUTPUT_CONTRACT_BLOCK = "\n## Output Contract (Strict)\n\nReturn a JSON object with:\n- `findings`: array\n- `validated`: boolean\n- `notes`: string | null\n\nFor each finding:\n- Required keys: `severity`, `title`, `description`, `rationale`\n- Optional-but-required-by-schema keys: `suggestedFix`, `file`, `line`, `endLine`, `contextIdsUsed`, `contextIdsViolated`, `contextTitles`\n\nSchema compatibility rules:\n- Include all keys (never omit)\n- Use `null` when a value is not available\n- Use `null` (not empty arrays) for context attribution fields when no standards apply\n";
27
+ /**
28
+ * Final guardrail checklist near generation point.
29
+ * Repeats only non-negotiables to reduce mid-prompt loss on long inputs.
30
+ */
31
+ export declare const FINAL_CHECKLIST_BLOCK = "\n## Final Checklist (Must Pass)\n1. Every finding is supported by concrete code/diff evidence.\n2. ContextRail workflow was used (`search_contexts` -> `get_context` -> `resolve_dependencies` as needed).\n3. Context attribution fields only reference retrieved contexts (or are `null`).\n4. Severity is calibrated to evidence.\n5. Output strictly matches the JSON contract, and `notes` summarizes retrieved contexts (or none found).\n";
32
+ export declare const buildPromptMessages: (promptResult: {
33
+ messages: {
34
+ role: string;
35
+ content: {
36
+ type: string;
37
+ text?: string;
38
+ };
39
+ }[];
40
+ metadata?: unknown;
41
+ }) => PromptMessage[];
42
+ export declare const buildReviewerUserMessage: (inputs: ReviewInputs, understanding: string, iteration: number, findings: ReviewerFindings | null, prDescription?: string, reviewDomains?: string[]) => string;
@@ -0,0 +1,246 @@
1
+ import { isDiffInputs } from '../review-inputs/index.js';
2
+ /**
3
+ * Severity calibration block with definitions and examples for each severity level.
4
+ * This block is injected into reviewer system prompts to ensure consistent severity assignment.
5
+ */
6
+ export const SEVERITY_CALIBRATION_BLOCK = `
7
+ ## Severity Calibration
8
+
9
+ You must assign severity levels accurately based on the following definitions and examples:
10
+
11
+ ### Critical
12
+ **Definition**: Blocks deployment, active exploit path, or data corruption risk. Findings that could lead to security breaches, data loss, or system compromise.
13
+
14
+ **Examples**:
15
+ - SQL injection vulnerability: \`const query = \`SELECT * FROM users WHERE id = \${userInput}\`;\` (user input directly interpolated)
16
+ - Authentication bypass: \`if (user.role === 'admin' || user.id === 1) { grantAccess(); }\` (hardcoded admin check)
17
+ - Missing input validation on sensitive operations: \`await db.delete(userId);\` (no validation that userId belongs to requester)
18
+
19
+ **When to use**: Only when there is a clear exploit path or risk of data corruption/integrity violation.
20
+
21
+ ### Major
22
+ **Definition**: Significant bug, breaks functionality, or violates established patterns. Findings that cause incorrect behavior or violate architectural standards.
23
+
24
+ **Examples**:
25
+ - Missing error handling: \`const data = await fetch(url); return data.json();\` (no try-catch, will crash on network error)
26
+ - Tight coupling: \`import { DatabaseConnection } from './db'; class UserService { private db = new DatabaseConnection(); }\` (direct instantiation, violates dependency injection)
27
+ - Race condition: \`let count = 0; async function increment() { count++; await save(count); }\` (non-atomic increment)
28
+
29
+ **When to use**: When functionality is broken or architectural patterns are violated, but no immediate security/data risk.
30
+
31
+ ### Minor
32
+ **Definition**: Code quality issue, convention violation, or maintainability concern. Findings that don't break functionality but reduce code quality.
33
+
34
+ **Examples**:
35
+ - Magic numbers: \`if (user.age > 18) { ... }\` (should be \`const MIN_ADULT_AGE = 18\`)
36
+ - Inconsistent naming: \`function getUserData() { ... }\` but \`function fetch_user_info() { ... }\` (mixed naming conventions)
37
+ - Missing JSDoc: \`function calculateTotal(items) { ... }\` (no documentation for complex logic)
38
+
39
+ **When to use**: Code quality, readability, or maintainability issues that don't affect functionality.
40
+
41
+ ### Info
42
+ **Definition**: Observation, suggestion, or educational note. Findings that provide helpful context or suggestions without indicating a problem.
43
+
44
+ **Examples**:
45
+ - Performance suggestion: \`// Consider caching this query result if called frequently\`
46
+ - Pattern suggestion: \`// This could use the Repository pattern for better testability\`
47
+ - Documentation opportunity: \`// This algorithm implements the Fisher-Yates shuffle\`
48
+
49
+ **When to use**: Helpful suggestions or observations that don't indicate actual problems.
50
+ `;
51
+ /**
52
+ * Severity validation rules for critic pass (iteration 2+).
53
+ * These rules help the critic validate and potentially downgrade severity.
54
+ */
55
+ export const SEVERITY_VALIDATION_RULES = `
56
+ ## Severity Validation Rules
57
+
58
+ During the critic pass, validate each finding's severity against these rules:
59
+
60
+ 1. **Critical findings MUST have exploit path or data integrity evidence**
61
+ - If a critical finding lacks a clear exploit path or data corruption risk described in the rationale, downgrade to major
62
+ - Example: "SQL injection" without showing how user input reaches the query → downgrade to major
63
+
64
+ 2. **Major findings MUST demonstrate functional impact**
65
+ - If a major finding doesn't show how functionality is broken or patterns violated, downgrade to minor
66
+ - Example: "Missing error handling" without showing what breaks → downgrade to minor
67
+
68
+ 3. **Minor findings MUST indicate code quality impact**
69
+ - If a minor finding is just a style preference without maintainability impact, consider downgrade to info
70
+ - Example: "Use const instead of let" without explaining why → consider info
71
+
72
+ 4. **Downgrade rules**:
73
+ - Critical → Major: No exploit path or data integrity risk described
74
+ - Major → Minor: No functional impact demonstrated
75
+ - Minor → Info: No code quality/maintainability impact shown
76
+
77
+ 5. **Never upgrade severity** - Only downgrade if evidence doesn't support the assigned level
78
+ `;
79
+ /**
80
+ * ContextRail tooling workflow requirements.
81
+ * Forces reviewers to ground findings in retrieved contexts instead of guessing IDs/titles.
82
+ */
83
+ export const CONTEXTRAIL_TOOLING_BLOCK = `
84
+ ## ContextRail Standards Workflow (Required)
85
+
86
+ When you identify potential issues, you MUST ground them in retrieved ContextRail standards:
87
+
88
+ 1. Use \`search_contexts\` first to discover relevant standards for this change.
89
+ 2. Use \`get_context\` for each context you plan to cite in findings.
90
+ 3. Use \`resolve_dependencies\` when cited contexts have required dependencies.
91
+ 4. Perform at least one \`search_contexts\` call before finalizing your findings.
92
+
93
+ Attribution rules:
94
+ - NEVER invent context IDs or titles.
95
+ - Only include \`contextIdsUsed\`, \`contextIdsViolated\`, and \`contextTitles\` from contexts you actually retrieved.
96
+ - If no relevant ContextRail standard exists after tool lookup, set those fields to \`null\` (not empty arrays) and explain that briefly in \`rationale\`.
97
+ `;
98
+ /**
99
+ * Compact output contract optimized for structured-output reliability.
100
+ * Keeps formatting constraints in one place for both standard and critic prompts.
101
+ */
102
+ export const OUTPUT_CONTRACT_BLOCK = `
103
+ ## Output Contract (Strict)
104
+
105
+ Return a JSON object with:
106
+ - \`findings\`: array
107
+ - \`validated\`: boolean
108
+ - \`notes\`: string | null
109
+
110
+ For each finding:
111
+ - Required keys: \`severity\`, \`title\`, \`description\`, \`rationale\`
112
+ - Optional-but-required-by-schema keys: \`suggestedFix\`, \`file\`, \`line\`, \`endLine\`, \`contextIdsUsed\`, \`contextIdsViolated\`, \`contextTitles\`
113
+
114
+ Schema compatibility rules:
115
+ - Include all keys (never omit)
116
+ - Use \`null\` when a value is not available
117
+ - Use \`null\` (not empty arrays) for context attribution fields when no standards apply
118
+ `;
119
+ /**
120
+ * Final guardrail checklist near generation point.
121
+ * Repeats only non-negotiables to reduce mid-prompt loss on long inputs.
122
+ */
123
+ export const FINAL_CHECKLIST_BLOCK = `
124
+ ## Final Checklist (Must Pass)
125
+ 1. Every finding is supported by concrete code/diff evidence.
126
+ 2. ContextRail workflow was used (\`search_contexts\` -> \`get_context\` -> \`resolve_dependencies\` as needed).
127
+ 3. Context attribution fields only reference retrieved contexts (or are \`null\`).
128
+ 4. Severity is calibrated to evidence.
129
+ 5. Output strictly matches the JSON contract, and \`notes\` summarizes retrieved contexts (or none found).
130
+ `;
131
+ export const buildPromptMessages = (promptResult) => {
132
+ return promptResult.messages.map((message) => {
133
+ const role = message.role === 'user' ? 'user' : message.role === 'assistant' ? 'assistant' : 'system';
134
+ let content = message.content.type === 'text' ? (message.content.text ?? '') : '';
135
+ // Inject severity calibration block into system prompts
136
+ if (role === 'system' && content) {
137
+ content = `${content}\n\n${SEVERITY_CALIBRATION_BLOCK}`;
138
+ }
139
+ return { role, content };
140
+ });
141
+ };
142
+ /**
143
+ * Build a critic prompt that challenges findings from a previous iteration.
144
+ * The critic pass validates findings, removes false positives, and enforces evidence/standards.
145
+ */
146
+ const buildCriticPrompt = (inputs, understanding, findings, prDescription, reviewDomains) => {
147
+ const diffBlock = isDiffInputs(inputs)
148
+ ? `\nDiffs:\n${Object.entries(inputs.diffs)
149
+ .map(([file, diff]) => `\n## ${file}\n\`\`\`diff\n${diff}\n\`\`\``)
150
+ .join('\n')}`
151
+ : '';
152
+ const contextBlock = inputs.context && Object.keys(inputs.context).length > 0
153
+ ? `\n\nSurrounding Code Context:\n${Object.entries(inputs.context)
154
+ .map(([file, context]) => `\n## ${file}\n\`\`\`\n${context}\n\`\`\``)
155
+ .join('\n')}`
156
+ : '';
157
+ const prDescriptionBlock = prDescription ? `\n\nPR Description:\n${prDescription}\n` : '';
158
+ const reviewDomainsBlock = reviewDomains && reviewDomains.length > 0
159
+ ? `\n\nReview Focus Domains:\n${reviewDomains.map((domain) => `- ${domain}`).join('\n')}\n`
160
+ : '';
161
+ return `You are performing a CRITIC PASS to validate and challenge findings from the previous iteration.
162
+
163
+ Your role is to:
164
+ 1. Challenge each finding - require concrete evidence from the code or ContextRail standards
165
+ 2. Remove false positives - findings that lack evidence or don't violate actual standards
166
+ 3. Enforce ContextRail standards - each finding MUST be grounded in retrieved contexts
167
+ 4. Validate severity - ensure severity matches the actual risk level using the severity validation rules below
168
+ 5. Only keep findings that are:
169
+ - Supported by evidence in the code/diffs
170
+ - Linked to specific ContextRail standards (contextIdsUsed or contextIdsViolated)
171
+ - Accurately categorized by severity
172
+
173
+ ${SEVERITY_VALIDATION_RULES}
174
+ ${CONTEXTRAIL_TOOLING_BLOCK}
175
+
176
+ Context:
177
+ ${understanding}
178
+ ${prDescriptionBlock}${reviewDomainsBlock}
179
+ Files:
180
+ ${inputs.files.map((f) => `- ${f}`).join('\n')}
181
+ ${diffBlock}${contextBlock}
182
+
183
+ Previous findings (iteration 1) - CRITICALLY REVIEW THESE:
184
+ ${JSON.stringify(findings.findings, null, 2)}
185
+
186
+ For each finding, ask yourself:
187
+ - Is there concrete evidence in the code/diffs that supports this finding?
188
+ - Does this finding reference specific ContextRail standards (contextIdsUsed or contextIdsViolated)?
189
+ - Is the severity appropriate for the actual risk? (Apply severity validation rules above)
190
+ - Could this be a false positive that should be removed?
191
+ - Should the severity be downgraded based on the validation rules?
192
+
193
+ Output requirements:
194
+ - Remove any findings that lack evidence
195
+ - Remove findings that claim ContextRail impact but do not include retrieved context attribution
196
+ - Keep only findings that are well-supported and properly attributed
197
+ - Validate severity using the rules above - downgrade if evidence doesn't support the assigned level
198
+ - Set validated: true only if all remaining findings meet these criteria
199
+ - Always include notes field: use a string when you have notes, or null when none
200
+ - If you remove findings or downgrade severity, explain why in notes
201
+ - In notes, summarize which ContextRail contexts were retrieved (or state that none were relevant after tool lookup)
202
+
203
+ ${OUTPUT_CONTRACT_BLOCK}
204
+ ${FINAL_CHECKLIST_BLOCK}`;
205
+ };
206
+ /**
207
+ * Build a standard reviewer prompt for initial review (iteration 1).
208
+ */
209
+ const buildStandardReviewPrompt = (inputs, understanding, prDescription, reviewDomains) => {
210
+ const diffBlock = isDiffInputs(inputs)
211
+ ? `\nDiffs:\n${Object.entries(inputs.diffs)
212
+ .map(([file, diff]) => `\n## ${file}\n\`\`\`diff\n${diff}\n\`\`\``)
213
+ .join('\n')}`
214
+ : '';
215
+ const contextBlock = inputs.context && Object.keys(inputs.context).length > 0
216
+ ? `\n\nSurrounding Code Context:\n${Object.entries(inputs.context)
217
+ .map(([file, context]) => `\n## ${file}\n\`\`\`\n${context}\n\`\`\``)
218
+ .join('\n')}`
219
+ : '';
220
+ const prDescriptionBlock = prDescription ? `\n\nPR Description:\n${prDescription}\n` : '';
221
+ const reviewDomainsBlock = reviewDomains && reviewDomains.length > 0
222
+ ? `\n\nReview Focus Domains:\n${reviewDomains.map((domain) => `- ${domain}`).join('\n')}\n`
223
+ : '';
224
+ return `Review these changes using the reviewer prompt guidance.
225
+
226
+ Context:
227
+ ${understanding}
228
+ ${prDescriptionBlock}${reviewDomainsBlock}
229
+ Files:
230
+ ${inputs.files.map((f) => `- ${f}`).join('\n')}
231
+ ${diffBlock}${contextBlock}
232
+
233
+ Please review these changes and provide findings.
234
+
235
+ ${CONTEXTRAIL_TOOLING_BLOCK}
236
+ ${OUTPUT_CONTRACT_BLOCK}
237
+ ${FINAL_CHECKLIST_BLOCK}`;
238
+ };
239
+ export const buildReviewerUserMessage = (inputs, understanding, iteration, findings, prDescription, reviewDomains) => {
240
+ // Use critic pass for iteration 2+
241
+ if (iteration > 1 && findings) {
242
+ return buildCriticPrompt(inputs, understanding, findings, prDescription, reviewDomains);
243
+ }
244
+ // Use standard review for iteration 1
245
+ return buildStandardReviewPrompt(inputs, understanding, prDescription, reviewDomains);
246
+ };
@@ -0,0 +1,18 @@
1
+ import type { ReviewerIterationResult, ToolCallEntry } from './types.js';
2
+ import type { TokenUsage } from '../output/schema.js';
3
+ export declare const collectToolCalls: (toolCalls: {
4
+ toolName: string;
5
+ input?: unknown;
6
+ }[] | undefined) => {
7
+ toolCalls: ToolCallEntry[];
8
+ contextIds: string[];
9
+ };
10
+ export declare const mergeIterationTracking: (current: {
11
+ toolCalls: ToolCallEntry[];
12
+ contextIds: string[];
13
+ }, iteration: ReviewerIterationResult) => void;
14
+ /**
15
+ * Merge token usage from multiple sources.
16
+ * Sums promptTokens, completionTokens, and totalTokens.
17
+ */
18
+ export declare const mergeTokenUsage: (usages: (TokenUsage | undefined)[]) => TokenUsage | undefined;