@ryuenn3123/agentic-senior-core 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/blueprints/api-nextjs.md +184 -0
- package/.agent-context/blueprints/aspnet-api.md +247 -0
- package/.agent-context/blueprints/ci-github-actions.md +226 -0
- package/.agent-context/blueprints/ci-gitlab.md +200 -0
- package/.agent-context/blueprints/fastapi-service.md +210 -0
- package/.agent-context/blueprints/go-service.md +217 -0
- package/.agent-context/blueprints/graphql-grpc-api.md +51 -0
- package/.agent-context/blueprints/infrastructure-as-code.md +62 -0
- package/.agent-context/blueprints/kubernetes-manifests.md +76 -0
- package/.agent-context/blueprints/laravel-api.md +223 -0
- package/.agent-context/blueprints/nestjs-logic.md +247 -0
- package/.agent-context/blueprints/observability.md +227 -0
- package/.agent-context/blueprints/spring-boot-api.md +218 -0
- package/.agent-context/policies/llm-judge-threshold.json +20 -0
- package/.agent-context/profiles/platform.md +13 -0
- package/.agent-context/profiles/regulated.md +13 -0
- package/.agent-context/profiles/startup.md +13 -0
- package/.agent-context/prompts/init-project.md +86 -0
- package/.agent-context/prompts/refactor.md +45 -0
- package/.agent-context/prompts/review-code.md +47 -0
- package/.agent-context/review-checklists/architecture-review.md +70 -0
- package/.agent-context/review-checklists/frontend-usability.md +33 -0
- package/.agent-context/review-checklists/performance-audit.md +65 -0
- package/.agent-context/review-checklists/pr-checklist.md +97 -0
- package/.agent-context/review-checklists/release-operations.md +29 -0
- package/.agent-context/review-checklists/security-audit.md +113 -0
- package/.agent-context/rules/api-docs.md +186 -0
- package/.agent-context/rules/architecture.md +198 -0
- package/.agent-context/rules/database-design.md +202 -0
- package/.agent-context/rules/efficiency-vs-hype.md +143 -0
- package/.agent-context/rules/error-handling.md +234 -0
- package/.agent-context/rules/event-driven.md +226 -0
- package/.agent-context/rules/frontend-architecture.md +66 -0
- package/.agent-context/rules/git-workflow.md +200 -0
- package/.agent-context/rules/microservices.md +174 -0
- package/.agent-context/rules/naming-conv.md +141 -0
- package/.agent-context/rules/performance.md +168 -0
- package/.agent-context/rules/realtime.md +47 -0
- package/.agent-context/rules/security.md +195 -0
- package/.agent-context/rules/testing.md +178 -0
- package/.agent-context/stacks/csharp.md +149 -0
- package/.agent-context/stacks/go.md +181 -0
- package/.agent-context/stacks/java.md +135 -0
- package/.agent-context/stacks/php.md +178 -0
- package/.agent-context/stacks/python.md +153 -0
- package/.agent-context/stacks/ruby.md +80 -0
- package/.agent-context/stacks/rust.md +86 -0
- package/.agent-context/stacks/typescript.md +317 -0
- package/.agent-context/state/architecture-map.md +25 -0
- package/.agent-context/state/dependency-map.md +32 -0
- package/.agent-override.md +36 -0
- package/.agents/workflows/init-project.md +29 -0
- package/.agents/workflows/refactor.md +29 -0
- package/.agents/workflows/review-code.md +29 -0
- package/.cursorrules +140 -0
- package/.gemini/instructions.md +97 -0
- package/.github/ISSUE_TEMPLATE/v1.7-frontend-work-item.yml +54 -0
- package/.github/copilot-instructions.md +104 -0
- package/.github/workflows/benchmark-detection.yml +38 -0
- package/.github/workflows/frontend-usability-gate.yml +36 -0
- package/.github/workflows/release-gate.yml +32 -0
- package/.github/workflows/sbom-compliance.yml +32 -0
- package/.windsurfrules +106 -0
- package/AGENTS.md +131 -0
- package/CONTRIBUTING.md +136 -0
- package/LICENSE +21 -0
- package/README.md +239 -0
- package/bin/agentic-senior-core.js +1147 -0
- package/mcp.json +29 -0
- package/package.json +50 -0
- package/scripts/detection-benchmark.mjs +138 -0
- package/scripts/frontend-usability-audit.mjs +87 -0
- package/scripts/generate-sbom.mjs +61 -0
- package/scripts/init-project.ps1 +105 -0
- package/scripts/init-project.sh +131 -0
- package/scripts/llm-judge.mjs +664 -0
- package/scripts/release-gate.mjs +116 -0
- package/scripts/validate.mjs +554 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// @ts-check
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* scripts/llm-judge.mjs
|
|
6
|
+
*
|
|
7
|
+
* LLM-as-a-Judge CI gate — enforces pr-checklist.md on every pull request.
|
|
8
|
+
*
|
|
9
|
+
* Reads the git diff of the current PR, loads the PR checklist, sends both
|
|
10
|
+
* to the first available LLM provider, and exits 1 when CRITICAL findings
|
|
11
|
+
* exist (security gaps, N+1 queries, swallowed errors, hardcoded secrets,
|
|
12
|
+
* layer boundary violations, SQL injection risks).
|
|
13
|
+
*
|
|
14
|
+
* Supported providers (auto-selected by first available env key):
|
|
15
|
+
* OPENAI_API_KEY → gpt-4o-mini (override with LLM_JUDGE_MODEL)
|
|
16
|
+
* ANTHROPIC_API_KEY → claude-3-5-haiku-latest
|
|
17
|
+
* GEMINI_API_KEY → gemini-2.0-flash
|
|
18
|
+
*
|
|
19
|
+
* Usage:
|
|
20
|
+
* node scripts/llm-judge.mjs — auto-detect diff, call LLM
|
|
21
|
+
* node scripts/llm-judge.mjs --dry-run — print prompt, skip LLM call
|
|
22
|
+
*
|
|
23
|
+
* Environment variables:
|
|
24
|
+
* OPENAI_API_KEY OpenAI secret key
|
|
25
|
+
* ANTHROPIC_API_KEY Anthropic secret key
|
|
26
|
+
* GEMINI_API_KEY Google Gemini API key
|
|
27
|
+
* LLM_JUDGE_MODEL Override model name for the selected provider
|
|
28
|
+
* LLM_MAX_DIFF_CHARS Max characters of diff to send (default: 12000)
|
|
29
|
+
* PR_DIFF Inject diff directly (bypasses git commands)
|
|
30
|
+
* GITHUB_BASE_SHA Base commit SHA for GitHub Actions PR diffs
|
|
31
|
+
* GITHUB_HEAD_SHA Head commit SHA for GitHub Actions PR diffs
|
|
32
|
+
* CI_MERGE_REQUEST_DIFF_BASE_SHA Base SHA for GitLab MR diffs
|
|
33
|
+
* CI_COMMIT_SHA Head SHA for GitLab MR diffs
|
|
34
|
+
*
|
|
35
|
+
* Zero external dependencies — uses Node.js built-ins only (Node 18+).
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import { execSync } from 'node:child_process';
|
|
39
|
+
import { readFileSync, existsSync, writeFileSync } from 'node:fs';
|
|
40
|
+
import { resolve, dirname } from 'node:path';
|
|
41
|
+
import { fileURLToPath } from 'node:url';
|
|
42
|
+
|
|
43
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
44
|
+
const __dirname = dirname(__filename);
|
|
45
|
+
|
|
46
|
+
// ─── CONSTANTS ────────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
const REPOSITORY_ROOT = resolve(__dirname, '..');
|
|
49
|
+
const PR_CHECKLIST_PATH = resolve(REPOSITORY_ROOT, '.agent-context/review-checklists/pr-checklist.md');
|
|
50
|
+
const DEFAULT_MACHINE_REPORT_PATH = resolve(REPOSITORY_ROOT, '.agent-context/state/llm-judge-report.json');
|
|
51
|
+
const MAX_DIFF_CHARS = parseInt(process.env.LLM_MAX_DIFF_CHARS ?? '12000', 10);
|
|
52
|
+
const IS_DRY_RUN = process.argv.includes('--dry-run');
|
|
53
|
+
const SHOULD_EMIT_MACHINE_REPORT = process.env.LLM_JUDGE_EMIT_JSON !== 'false';
|
|
54
|
+
const MACHINE_REPORT_PATH = process.env.LLM_JUDGE_OUTPUT_PATH || DEFAULT_MACHINE_REPORT_PATH;
|
|
55
|
+
|
|
56
|
+
/** @type {string[]} Source code file extensions to include in the diff */
|
|
57
|
+
const SOURCE_CODE_EXTENSIONS = ['*.ts', '*.tsx', '*.js', '*.mjs', '*.cjs', '*.py', '*.go', '*.java', '*.cs', '*.rb', '*.php'];
|
|
58
|
+
|
|
59
|
+
/** @type {Record<string, string>} */
|
|
60
|
+
const SEVERITY_NORMALIZATION_TABLE = {
|
|
61
|
+
critical: 'critical',
|
|
62
|
+
blocker: 'critical',
|
|
63
|
+
severe: 'critical',
|
|
64
|
+
high: 'high',
|
|
65
|
+
major: 'high',
|
|
66
|
+
medium: 'medium',
|
|
67
|
+
moderate: 'medium',
|
|
68
|
+
low: 'low',
|
|
69
|
+
minor: 'low',
|
|
70
|
+
info: 'low',
|
|
71
|
+
informational: 'low',
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* @typedef {{
|
|
76
|
+
* rule: string,
|
|
77
|
+
* problem: string,
|
|
78
|
+
* severity: string,
|
|
79
|
+
* }} Violation
|
|
80
|
+
*/
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* @typedef {{
|
|
84
|
+
* generatedAt: string,
|
|
85
|
+
* schemaVersion: string,
|
|
86
|
+
* profile: string,
|
|
87
|
+
* provider: string,
|
|
88
|
+
* ciProvider: string,
|
|
89
|
+
* blockingSeverities: string[],
|
|
90
|
+
* failDecision: boolean,
|
|
91
|
+
* malformedVerdict: boolean,
|
|
92
|
+
* providerError: boolean,
|
|
93
|
+
* dryRun: boolean,
|
|
94
|
+
* summary: {
|
|
95
|
+
* totalViolations: number,
|
|
96
|
+
* blockingViolations: number,
|
|
97
|
+
* },
|
|
98
|
+
* violations: Violation[],
|
|
99
|
+
* }} MachineReportPayload
|
|
100
|
+
*/
|
|
101
|
+
|
|
102
|
+
function detectCiProvider() {
|
|
103
|
+
if (process.env.GITHUB_ACTIONS === 'true') {
|
|
104
|
+
return 'github';
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (process.env.GITLAB_CI === 'true') {
|
|
108
|
+
return 'gitlab';
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return 'local';
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* @param {string | undefined} rawSeverityValue
|
|
116
|
+
* @returns {string}
|
|
117
|
+
*/
|
|
118
|
+
function normalizeSeverity(rawSeverityValue) {
|
|
119
|
+
const normalizedSeverityKey = String(rawSeverityValue || '').trim().toLowerCase();
|
|
120
|
+
return SEVERITY_NORMALIZATION_TABLE[normalizedSeverityKey] || 'low';
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* @param {MachineReportPayload} machineReportPayload
|
|
125
|
+
* @returns {string}
|
|
126
|
+
*/
|
|
127
|
+
function formatMachineReadableLine(machineReportPayload) {
|
|
128
|
+
return `JSON_REPORT: ${JSON.stringify(machineReportPayload)}`;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* @param {MachineReportPayload} machineReportPayload
|
|
133
|
+
*/
|
|
134
|
+
function emitMachineReadableReport(machineReportPayload) {
|
|
135
|
+
if (!SHOULD_EMIT_MACHINE_REPORT) {
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
writeFileSync(MACHINE_REPORT_PATH, `${JSON.stringify(machineReportPayload, null, 2)}\n`, 'utf-8');
|
|
140
|
+
console.log(formatMachineReadableLine(machineReportPayload));
|
|
141
|
+
console.log(`📎 Machine report saved: ${MACHINE_REPORT_PATH}`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ─── GIT DIFF COLLECTION ──────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Collects the pull request diff from the best available source:
|
|
148
|
+
* 1. PR_DIFF env var (direct injection — highest priority)
|
|
149
|
+
* 2. GitHub Actions env vars (GITHUB_BASE_SHA / GITHUB_HEAD_SHA)
|
|
150
|
+
* 3. GitLab CI env vars (CI_MERGE_REQUEST_DIFF_BASE_SHA / CI_COMMIT_SHA)
|
|
151
|
+
* 4. Local fallback: HEAD~1..HEAD
|
|
152
|
+
*
|
|
153
|
+
* @returns {string} The raw git diff output
|
|
154
|
+
*/
|
|
155
|
+
function collectPullRequestDiff() {
|
|
156
|
+
if (process.env.PR_DIFF) {
|
|
157
|
+
console.log(' Source: PR_DIFF env variable');
|
|
158
|
+
return process.env.PR_DIFF;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const execOptions = {
|
|
162
|
+
cwd: REPOSITORY_ROOT,
|
|
163
|
+
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
164
|
+
maxBuffer: 1024 * 1024 * 8, // 8 MB
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
// GitHub Actions: PR event injects base/head SHAs
|
|
168
|
+
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
169
|
+
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
170
|
+
if (githubBaseSha) {
|
|
171
|
+
console.log(` Source: GitHub Actions diff (${githubBaseSha.slice(0, 8)}...${githubHeadSha.slice(0, 8)})`);
|
|
172
|
+
return execSync(`git diff "${githubBaseSha}...${githubHeadSha}"`, execOptions);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// GitLab CI: merge request event provides base + head SHAs
|
|
176
|
+
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
177
|
+
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
178
|
+
if (gitlabBaseSha) {
|
|
179
|
+
console.log(` Source: GitLab CI diff (${gitlabBaseSha.slice(0, 8)}...${gitlabHeadSha.slice(0, 8)})`);
|
|
180
|
+
return execSync(`git diff "${gitlabBaseSha}...${gitlabHeadSha}"`, execOptions);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Local / fallback: last commit diff
|
|
184
|
+
console.log(' Source: local HEAD~1..HEAD fallback');
|
|
185
|
+
try {
|
|
186
|
+
return execSync('git diff HEAD~1 HEAD', execOptions);
|
|
187
|
+
} catch (err) {
|
|
188
|
+
try {
|
|
189
|
+
// Initial commit has no parent — diff against empty tree
|
|
190
|
+
const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
|
|
191
|
+
return execSync(`git diff "${emptyTreeSha}" HEAD`, execOptions);
|
|
192
|
+
} catch (e2) {
|
|
193
|
+
console.warn(' ⚠️ Unable to execute git diff. Defaulting to empty diff.');
|
|
194
|
+
return '';
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ─── CHECKLIST & THRESHOLDS LOADING ───────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Loads and returns the PR checklist markdown content.
|
|
203
|
+
*
|
|
204
|
+
* @returns {string} The checklist file contents
|
|
205
|
+
*/
|
|
206
|
+
function loadPrChecklist() {
|
|
207
|
+
if (!existsSync(PR_CHECKLIST_PATH)) {
|
|
208
|
+
throw new Error(`PR checklist not found at: ${PR_CHECKLIST_PATH}`);
|
|
209
|
+
}
|
|
210
|
+
return readFileSync(PR_CHECKLIST_PATH, 'utf-8');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Loads the LLM judge thresholds.
|
|
215
|
+
*
|
|
216
|
+
* @returns {any} The thresholds object
|
|
217
|
+
*/
|
|
218
|
+
function loadThresholds() {
|
|
219
|
+
const thresholdsPath = resolve(REPOSITORY_ROOT, '.agent-context/policies/llm-judge-threshold.json');
|
|
220
|
+
if (!existsSync(thresholdsPath)) {
|
|
221
|
+
return {
|
|
222
|
+
selectedProfile: 'balanced',
|
|
223
|
+
profileThresholds: {
|
|
224
|
+
balanced: { blockingSeverities: ['critical', 'high'], failOnMalformedResponse: true, failOnProviderError: false }
|
|
225
|
+
}
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
return JSON.parse(readFileSync(thresholdsPath, 'utf-8'));
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// ─── PROMPT CONSTRUCTION ─────────────────────────────────────────────────────
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Returns the system-level instruction for the LLM judge role.
|
|
235
|
+
*
|
|
236
|
+
* @returns {string}
|
|
237
|
+
*/
|
|
238
|
+
function buildSystemPrompt() {
|
|
239
|
+
return `You are a Senior Software Architect performing an automated code review for a CI/CD pipeline.
|
|
240
|
+
|
|
241
|
+
Your job: evaluate a git diff against the provided PR checklist and identify violations.
|
|
242
|
+
You must categorize each violation with a severity level: critical, high, medium, or low.
|
|
243
|
+
|
|
244
|
+
## Severity classification:
|
|
245
|
+
- critical: Security vulnerabilities (hardcoded secrets, SQL/command injection, missing auth checks, CORS), unvalidated external inputs.
|
|
246
|
+
- high: N+1 database queries, swallowed errors (empty catch blocks without re-throw/recovery), layer boundary violations.
|
|
247
|
+
- medium: TypeScript \`any\` type used without justification, missing test coverage, bad architectural patterns.
|
|
248
|
+
- low: Style preferences, minor naming nitpicks, documentation suggestions, performance micro-optimizations.
|
|
249
|
+
|
|
250
|
+
## Mandatory output format:
|
|
251
|
+
You MUST output your findings in EXACTLY this structure:
|
|
252
|
+
|
|
253
|
+
\`\`\`
|
|
254
|
+
## PR REVIEW RESULTS
|
|
255
|
+
━━━━━━━━━━━━━━━━━━━
|
|
256
|
+
|
|
257
|
+
✅ [Section Name] — Passes
|
|
258
|
+
❌ [Section Name] — FAILS
|
|
259
|
+
📌 Rule: [rule file and section]
|
|
260
|
+
❌ Problem: [exact description of the issue found in the diff]
|
|
261
|
+
⚠️ Severity: [critical | high | medium | low]
|
|
262
|
+
✅ Fix: [specific actionable fix]
|
|
263
|
+
|
|
264
|
+
\`\`\`
|
|
265
|
+
|
|
266
|
+
Rules:
|
|
267
|
+
- Then at the absolute LAST line of your response, output a JSON array of the failed checks. Each object should have 'rule', 'problem', 'severity'. If there are no failures, output an empty array [].
|
|
268
|
+
- Make sure the JSON array is perfectly valid JSON on a single line starting with \`JSON_VERDICT: \`. For example:
|
|
269
|
+
JSON_VERDICT: [{"rule": "Security", "problem": "Hardcoded secret", "severity": "critical"}]
|
|
270
|
+
- If the diff is empty, contains only documentation changes, or has no source code changes, output JSON_VERDICT: [] immediately.`;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Builds the user message combining the checklist and the (possibly truncated) diff.
|
|
275
|
+
*
|
|
276
|
+
* @param {string} prChecklistContent
|
|
277
|
+
* @param {string} diffContent
|
|
278
|
+
* @returns {string}
|
|
279
|
+
*/
|
|
280
|
+
function buildUserMessage(prChecklistContent, diffContent) {
|
|
281
|
+
const truncatedDiff =
|
|
282
|
+
diffContent.length > MAX_DIFF_CHARS
|
|
283
|
+
? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED — ${(diffContent.length - MAX_DIFF_CHARS).toLocaleString()} additional characters omitted to stay within token limits]`
|
|
284
|
+
: diffContent;
|
|
285
|
+
|
|
286
|
+
return `## PR Checklist Reference
|
|
287
|
+
|
|
288
|
+
${prChecklistContent}
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## Git Diff to Review
|
|
293
|
+
|
|
294
|
+
\`\`\`diff
|
|
295
|
+
${truncatedDiff.trim() || '(empty diff — no source code changes detected)'}
|
|
296
|
+
\`\`\`
|
|
297
|
+
|
|
298
|
+
Review the diff against the checklist. Report your findings in the required format, ending with VERDICT: PASS ✅ or VERDICT: FAIL ❌.`;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// ─── LLM PROVIDER IMPLEMENTATIONS ────────────────────────────────────────────
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Calls the OpenAI Chat Completions API.
|
|
305
|
+
*
|
|
306
|
+
* @param {string} systemPrompt
|
|
307
|
+
* @param {string} userMessage
|
|
308
|
+
* @returns {Promise<string>}
|
|
309
|
+
*/
|
|
310
|
+
async function callOpenAiProvider(systemPrompt, userMessage) {
|
|
311
|
+
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
|
|
312
|
+
const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
313
|
+
method: 'POST',
|
|
314
|
+
headers: {
|
|
315
|
+
'Content-Type': 'application/json',
|
|
316
|
+
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
|
317
|
+
},
|
|
318
|
+
body: JSON.stringify({
|
|
319
|
+
model: selectedModel,
|
|
320
|
+
max_tokens: 2048,
|
|
321
|
+
temperature: 0,
|
|
322
|
+
messages: [
|
|
323
|
+
{ role: 'system', content: systemPrompt },
|
|
324
|
+
{ role: 'user', content: userMessage },
|
|
325
|
+
],
|
|
326
|
+
}),
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
if (!apiResponse.ok) {
|
|
330
|
+
const errorBody = await apiResponse.text();
|
|
331
|
+
throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/** @type {{ choices: Array<{ message: { content: string } }> }} */
|
|
335
|
+
const responsePayload = await apiResponse.json();
|
|
336
|
+
return responsePayload.choices[0].message.content;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Calls the Anthropic Messages API.
|
|
341
|
+
*
|
|
342
|
+
* @param {string} systemPrompt
|
|
343
|
+
* @param {string} userMessage
|
|
344
|
+
* @returns {Promise<string>}
|
|
345
|
+
*/
|
|
346
|
+
async function callAnthropicProvider(systemPrompt, userMessage) {
|
|
347
|
+
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
|
|
348
|
+
const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
|
|
349
|
+
method: 'POST',
|
|
350
|
+
headers: {
|
|
351
|
+
'Content-Type': 'application/json',
|
|
352
|
+
'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
|
|
353
|
+
'anthropic-version': '2023-06-01',
|
|
354
|
+
},
|
|
355
|
+
body: JSON.stringify({
|
|
356
|
+
model: selectedModel,
|
|
357
|
+
max_tokens: 2048,
|
|
358
|
+
system: systemPrompt,
|
|
359
|
+
messages: [{ role: 'user', content: userMessage }],
|
|
360
|
+
}),
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
if (!apiResponse.ok) {
|
|
364
|
+
const errorBody = await apiResponse.text();
|
|
365
|
+
throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/** @type {{ content: Array<{ text: string }> }} */
|
|
369
|
+
const responsePayload = await apiResponse.json();
|
|
370
|
+
return responsePayload.content[0].text;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Calls the Google Gemini generateContent API.
|
|
375
|
+
*
|
|
376
|
+
* @param {string} systemPrompt
|
|
377
|
+
* @param {string} userMessage
|
|
378
|
+
* @returns {Promise<string>}
|
|
379
|
+
*/
|
|
380
|
+
async function callGeminiProvider(systemPrompt, userMessage) {
|
|
381
|
+
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
|
|
382
|
+
const apiKey = process.env.GEMINI_API_KEY ?? '';
|
|
383
|
+
const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
|
|
384
|
+
|
|
385
|
+
const apiResponse = await fetch(endpointUrl, {
|
|
386
|
+
method: 'POST',
|
|
387
|
+
headers: { 'Content-Type': 'application/json' },
|
|
388
|
+
body: JSON.stringify({
|
|
389
|
+
system_instruction: { parts: [{ text: systemPrompt }] },
|
|
390
|
+
contents: [{ role: 'user', parts: [{ text: userMessage }] }],
|
|
391
|
+
generationConfig: { temperature: 0, maxOutputTokens: 2048 },
|
|
392
|
+
}),
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
if (!apiResponse.ok) {
|
|
396
|
+
const errorBody = await apiResponse.text();
|
|
397
|
+
throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/** @type {{ candidates: Array<{ content: { parts: Array<{ text: string }> } }> }} */
|
|
401
|
+
const responsePayload = await apiResponse.json();
|
|
402
|
+
return responsePayload.candidates[0].content.parts[0].text;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// ─── PROVIDER AUTO-SELECTION ──────────────────────────────────────────────────
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* @typedef {{ providerName: string, invokeProvider: (sys: string, usr: string) => Promise<string> }} SelectedProvider
|
|
409
|
+
*/
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Returns the first available LLM provider based on environment keys.
|
|
413
|
+
* Priority: OpenAI → Anthropic → Gemini.
|
|
414
|
+
*
|
|
415
|
+
* @returns {SelectedProvider | null}
|
|
416
|
+
*/
|
|
417
|
+
function selectAvailableProvider() {
|
|
418
|
+
if (process.env.OPENAI_API_KEY) {
|
|
419
|
+
return { providerName: 'OpenAI (gpt-4o-mini)', invokeProvider: callOpenAiProvider };
|
|
420
|
+
}
|
|
421
|
+
if (process.env.ANTHROPIC_API_KEY) {
|
|
422
|
+
return { providerName: 'Anthropic (claude-3-5-haiku-latest)', invokeProvider: callAnthropicProvider };
|
|
423
|
+
}
|
|
424
|
+
if (process.env.GEMINI_API_KEY) {
|
|
425
|
+
return { providerName: 'Google Gemini (gemini-2.0-flash)', invokeProvider: callGeminiProvider };
|
|
426
|
+
}
|
|
427
|
+
return null;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// ─── VERDICT PARSING ─────────────────────────────────────────────────────────
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Extracts and parses the JSON verdict from the LLM response.
|
|
434
|
+
*
|
|
435
|
+
* @param {string} llmResponseText
|
|
436
|
+
* @param {boolean} failOnMalformedResponse
|
|
437
|
+
* @returns {Array<{ rule: string, problem: string, severity: string }>}
|
|
438
|
+
*/
|
|
439
|
+
function extractVerdict(llmResponseText, failOnMalformedResponse) {
|
|
440
|
+
const match = llmResponseText.match(/JSON_VERDICT:\s*(\[.*\])/i);
|
|
441
|
+
if (!match) {
|
|
442
|
+
console.warn('⚠️ LLM response did not include a valid JSON_VERDICT line.');
|
|
443
|
+
if (failOnMalformedResponse) {
|
|
444
|
+
console.error('❌ Failing pipeline because malformed responses are not allowed by the profile.');
|
|
445
|
+
process.exit(1);
|
|
446
|
+
}
|
|
447
|
+
return [];
|
|
448
|
+
}
|
|
449
|
+
try {
|
|
450
|
+
return JSON.parse(match[1]);
|
|
451
|
+
} catch (err) {
|
|
452
|
+
const parseError = /** @type {Error} */ (err);
|
|
453
|
+
console.error('⚠️ Failed to parse JSON_VERDICT:', parseError.message);
|
|
454
|
+
if (failOnMalformedResponse) {
|
|
455
|
+
process.exit(1);
|
|
456
|
+
}
|
|
457
|
+
return [];
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* @param {Array<{ rule?: string, problem?: string, severity?: string }>} violations
|
|
463
|
+
* @returns {Violation[]}
|
|
464
|
+
*/
|
|
465
|
+
function normalizeViolations(violations) {
|
|
466
|
+
return violations.map((violationItem) => ({
|
|
467
|
+
rule: String(violationItem.rule || 'Unknown Rule'),
|
|
468
|
+
problem: String(violationItem.problem || 'No problem description provided.'),
|
|
469
|
+
severity: normalizeSeverity(violationItem.severity),
|
|
470
|
+
}));
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* @param {{
|
|
475
|
+
* provider: string,
|
|
476
|
+
* selectedProfile: string,
|
|
477
|
+
* blockingSeverities: string[],
|
|
478
|
+
* finalViolations: Violation[],
|
|
479
|
+
* blockingFound: Violation[],
|
|
480
|
+
* isDryRun: boolean,
|
|
481
|
+
* malformedVerdict: boolean,
|
|
482
|
+
* providerError: boolean,
|
|
483
|
+
* }} payloadInput
|
|
484
|
+
* @returns {MachineReportPayload}
|
|
485
|
+
*/
|
|
486
|
+
function buildMachineReportPayload({
|
|
487
|
+
provider,
|
|
488
|
+
selectedProfile,
|
|
489
|
+
blockingSeverities,
|
|
490
|
+
finalViolations,
|
|
491
|
+
blockingFound,
|
|
492
|
+
isDryRun,
|
|
493
|
+
malformedVerdict,
|
|
494
|
+
providerError,
|
|
495
|
+
}) {
|
|
496
|
+
return {
|
|
497
|
+
generatedAt: new Date().toISOString(),
|
|
498
|
+
schemaVersion: '1.0',
|
|
499
|
+
profile: selectedProfile,
|
|
500
|
+
provider,
|
|
501
|
+
ciProvider: detectCiProvider(),
|
|
502
|
+
blockingSeverities,
|
|
503
|
+
failDecision: blockingFound.length > 0 || malformedVerdict || providerError,
|
|
504
|
+
malformedVerdict,
|
|
505
|
+
providerError,
|
|
506
|
+
dryRun: isDryRun,
|
|
507
|
+
summary: {
|
|
508
|
+
totalViolations: finalViolations.length,
|
|
509
|
+
blockingViolations: blockingFound.length,
|
|
510
|
+
},
|
|
511
|
+
violations: finalViolations,
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// ─── MAIN ─────────────────────────────────────────────────────────────────────
|
|
516
|
+
|
|
517
|
+
async function main() {
|
|
518
|
+
console.log('');
|
|
519
|
+
console.log('🔍 LLM Judge — Automated Code Review Gate');
|
|
520
|
+
console.log('════════════════════════════════════════════');
|
|
521
|
+
console.log('');
|
|
522
|
+
|
|
523
|
+
// ── Step 1: Load checklist and thresholds ──────────────
|
|
524
|
+
const prChecklistContent = loadPrChecklist();
|
|
525
|
+
const thresholdsObj = loadThresholds();
|
|
526
|
+
const selectedProfile = thresholdsObj.selectedProfile || 'balanced';
|
|
527
|
+
const profileConfig = thresholdsObj.profileThresholds[selectedProfile] || {};
|
|
528
|
+
const blockingSeverities = profileConfig.blockingSeverities || ['critical', 'high'];
|
|
529
|
+
const failOnMalformedResponse = profileConfig.failOnMalformedResponse !== false;
|
|
530
|
+
const failOnProviderError = profileConfig.failOnProviderError || false;
|
|
531
|
+
|
|
532
|
+
console.log(`✅ PR checklist loaded (${prChecklistContent.length} chars)`);
|
|
533
|
+
console.log(`✅ Threshold profile loaded: ${selectedProfile} (blocking: ${blockingSeverities.join(', ')})`);
|
|
534
|
+
|
|
535
|
+
// ── Step 2: Collect diff ────────────────────────────────
|
|
536
|
+
const rawDiff = collectPullRequestDiff();
|
|
537
|
+
console.log(`✅ Git diff collected (${rawDiff.length} chars${rawDiff.length > MAX_DIFF_CHARS ? ` — will truncate to ${MAX_DIFF_CHARS}` : ''})`);
|
|
538
|
+
|
|
539
|
+
// ── Step 3: Build prompt ────────────────────────────────
|
|
540
|
+
const systemPrompt = buildSystemPrompt();
|
|
541
|
+
const userMessage = buildUserMessage(prChecklistContent, rawDiff);
|
|
542
|
+
|
|
543
|
+
// ── Step 4: Dry run mode ────────────────────────────────
|
|
544
|
+
if (IS_DRY_RUN) {
|
|
545
|
+
console.log('');
|
|
546
|
+
console.log('── DRY RUN MODE ──────────────────────────────────────────');
|
|
547
|
+
console.log('[SYSTEM PROMPT PREVIEW]');
|
|
548
|
+
console.log(systemPrompt.slice(0, 400) + '...');
|
|
549
|
+
console.log('');
|
|
550
|
+
console.log('[USER MESSAGE PREVIEW]');
|
|
551
|
+
console.log(userMessage.slice(0, 400) + '...');
|
|
552
|
+
console.log('─────────────────────────────────────────────────────────');
|
|
553
|
+
console.log('');
|
|
554
|
+
const dryRunReportPayload = buildMachineReportPayload({
|
|
555
|
+
provider: 'dry-run',
|
|
556
|
+
selectedProfile,
|
|
557
|
+
blockingSeverities,
|
|
558
|
+
finalViolations: [],
|
|
559
|
+
blockingFound: [],
|
|
560
|
+
isDryRun: true,
|
|
561
|
+
malformedVerdict: false,
|
|
562
|
+
providerError: false,
|
|
563
|
+
});
|
|
564
|
+
emitMachineReadableReport(dryRunReportPayload);
|
|
565
|
+
console.log('VERDICT: JSON_VERDICT: [] (dry run — no LLM call made)');
|
|
566
|
+
process.exit(0);
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// ── Step 5: Select provider ─────────────────────────────
|
|
570
|
+
const selectedProvider = selectAvailableProvider();
|
|
571
|
+
if (!selectedProvider) {
|
|
572
|
+
console.warn('');
|
|
573
|
+
console.warn('⚠️ No LLM API key detected.');
|
|
574
|
+
console.warn(' Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GEMINI_API_KEY');
|
|
575
|
+
console.warn(' to enable automated code review.');
|
|
576
|
+
console.warn('');
|
|
577
|
+
console.warn('⏭️ Skipping LLM review — pipeline continues (PASS).');
|
|
578
|
+
const skippedReportPayload = buildMachineReportPayload({
|
|
579
|
+
provider: 'none',
|
|
580
|
+
selectedProfile,
|
|
581
|
+
blockingSeverities,
|
|
582
|
+
finalViolations: [],
|
|
583
|
+
blockingFound: [],
|
|
584
|
+
isDryRun: false,
|
|
585
|
+
malformedVerdict: false,
|
|
586
|
+
providerError: false,
|
|
587
|
+
});
|
|
588
|
+
emitMachineReadableReport(skippedReportPayload);
|
|
589
|
+
process.exit(0);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
console.log(`✅ Provider selected: ${selectedProvider.providerName}`);
|
|
593
|
+
if (process.env.LLM_JUDGE_MODEL) {
|
|
594
|
+
console.log(` Model override: ${process.env.LLM_JUDGE_MODEL}`);
|
|
595
|
+
}
|
|
596
|
+
console.log('');
|
|
597
|
+
console.log('⏳ Sending diff to LLM for review...');
|
|
598
|
+
console.log('');
|
|
599
|
+
|
|
600
|
+
// ── Step 6: Call LLM ────────────────────────────────────
|
|
601
|
+
let llmReviewText;
|
|
602
|
+
try {
|
|
603
|
+
llmReviewText = await selectedProvider.invokeProvider(systemPrompt, userMessage);
|
|
604
|
+
} catch (providerCallError) {
|
|
605
|
+
console.warn(`⚠️ LLM call failed: ${/** @type {Error} */ (providerCallError).message}`);
|
|
606
|
+
const providerErrorReportPayload = buildMachineReportPayload({
|
|
607
|
+
provider: selectedProvider.providerName,
|
|
608
|
+
selectedProfile,
|
|
609
|
+
blockingSeverities,
|
|
610
|
+
finalViolations: [],
|
|
611
|
+
blockingFound: [],
|
|
612
|
+
isDryRun: false,
|
|
613
|
+
malformedVerdict: false,
|
|
614
|
+
providerError: Boolean(failOnProviderError),
|
|
615
|
+
});
|
|
616
|
+
emitMachineReadableReport(providerErrorReportPayload);
|
|
617
|
+
if (failOnProviderError) {
|
|
618
|
+
console.error('❌ Failing pipeline because provider errors are not allowed by the profile.');
|
|
619
|
+
process.exit(1);
|
|
620
|
+
}
|
|
621
|
+
console.warn(' Skipping LLM review — pipeline continues (PASS).');
|
|
622
|
+
process.exit(0);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
// ── Step 7: Print report ────────────────────────────────
|
|
626
|
+
console.log('── LLM Review Report ─────────────────────────────────────');
|
|
627
|
+
console.log('');
|
|
628
|
+
console.log(llmReviewText);
|
|
629
|
+
console.log('');
|
|
630
|
+
console.log('──────────────────────────────────────────────────────────');
|
|
631
|
+
console.log('');
|
|
632
|
+
|
|
633
|
+
// ── Step 8: Enforce verdict ─────────────────────────────
|
|
634
|
+
const rawVerdictViolations = extractVerdict(llmReviewText, failOnMalformedResponse);
|
|
635
|
+
const finalViolations = normalizeViolations(rawVerdictViolations);
|
|
636
|
+
const hasMalformedVerdict = !/JSON_VERDICT:\s*\[/i.test(llmReviewText);
|
|
637
|
+
|
|
638
|
+
const blockingFound = finalViolations.filter(v => blockingSeverities.includes(v.severity.toLowerCase()));
|
|
639
|
+
const machineReportPayload = buildMachineReportPayload({
|
|
640
|
+
provider: selectedProvider.providerName,
|
|
641
|
+
selectedProfile,
|
|
642
|
+
blockingSeverities,
|
|
643
|
+
finalViolations,
|
|
644
|
+
blockingFound,
|
|
645
|
+
isDryRun: false,
|
|
646
|
+
malformedVerdict: hasMalformedVerdict,
|
|
647
|
+
providerError: false,
|
|
648
|
+
});
|
|
649
|
+
emitMachineReadableReport(machineReportPayload);
|
|
650
|
+
|
|
651
|
+
if (blockingFound.length > 0) {
|
|
652
|
+
console.error(`❌ LLM Judge: ${blockingFound.length} blocking violations found (severities: ${blockingSeverities.join(', ')}). Pipeline FAILED.`);
|
|
653
|
+
console.error(' Fix the issues listed above before merging.');
|
|
654
|
+
process.exit(1);
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
console.log('✅ LLM Judge: No blocking violations. Pipeline PASSED.');
|
|
658
|
+
process.exit(0);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
main().catch((unexpectedError) => {
|
|
662
|
+
console.error('❌ Unexpected error in llm-judge:', unexpectedError);
|
|
663
|
+
process.exit(1);
|
|
664
|
+
});
|