@ryuenn3123/agentic-senior-core 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.agent-context/blueprints/api-nextjs.md +184 -0
  2. package/.agent-context/blueprints/aspnet-api.md +247 -0
  3. package/.agent-context/blueprints/ci-github-actions.md +226 -0
  4. package/.agent-context/blueprints/ci-gitlab.md +200 -0
  5. package/.agent-context/blueprints/fastapi-service.md +210 -0
  6. package/.agent-context/blueprints/go-service.md +217 -0
  7. package/.agent-context/blueprints/graphql-grpc-api.md +51 -0
  8. package/.agent-context/blueprints/infrastructure-as-code.md +62 -0
  9. package/.agent-context/blueprints/kubernetes-manifests.md +76 -0
  10. package/.agent-context/blueprints/laravel-api.md +223 -0
  11. package/.agent-context/blueprints/nestjs-logic.md +247 -0
  12. package/.agent-context/blueprints/observability.md +227 -0
  13. package/.agent-context/blueprints/spring-boot-api.md +218 -0
  14. package/.agent-context/policies/llm-judge-threshold.json +20 -0
  15. package/.agent-context/profiles/platform.md +13 -0
  16. package/.agent-context/profiles/regulated.md +13 -0
  17. package/.agent-context/profiles/startup.md +13 -0
  18. package/.agent-context/prompts/init-project.md +86 -0
  19. package/.agent-context/prompts/refactor.md +45 -0
  20. package/.agent-context/prompts/review-code.md +47 -0
  21. package/.agent-context/review-checklists/architecture-review.md +70 -0
  22. package/.agent-context/review-checklists/frontend-usability.md +33 -0
  23. package/.agent-context/review-checklists/performance-audit.md +65 -0
  24. package/.agent-context/review-checklists/pr-checklist.md +97 -0
  25. package/.agent-context/review-checklists/release-operations.md +29 -0
  26. package/.agent-context/review-checklists/security-audit.md +113 -0
  27. package/.agent-context/rules/api-docs.md +186 -0
  28. package/.agent-context/rules/architecture.md +198 -0
  29. package/.agent-context/rules/database-design.md +202 -0
  30. package/.agent-context/rules/efficiency-vs-hype.md +143 -0
  31. package/.agent-context/rules/error-handling.md +234 -0
  32. package/.agent-context/rules/event-driven.md +226 -0
  33. package/.agent-context/rules/frontend-architecture.md +66 -0
  34. package/.agent-context/rules/git-workflow.md +200 -0
  35. package/.agent-context/rules/microservices.md +174 -0
  36. package/.agent-context/rules/naming-conv.md +141 -0
  37. package/.agent-context/rules/performance.md +168 -0
  38. package/.agent-context/rules/realtime.md +47 -0
  39. package/.agent-context/rules/security.md +195 -0
  40. package/.agent-context/rules/testing.md +178 -0
  41. package/.agent-context/stacks/csharp.md +149 -0
  42. package/.agent-context/stacks/go.md +181 -0
  43. package/.agent-context/stacks/java.md +135 -0
  44. package/.agent-context/stacks/php.md +178 -0
  45. package/.agent-context/stacks/python.md +153 -0
  46. package/.agent-context/stacks/ruby.md +80 -0
  47. package/.agent-context/stacks/rust.md +86 -0
  48. package/.agent-context/stacks/typescript.md +317 -0
  49. package/.agent-context/state/architecture-map.md +25 -0
  50. package/.agent-context/state/dependency-map.md +32 -0
  51. package/.agent-override.md +36 -0
  52. package/.agents/workflows/init-project.md +29 -0
  53. package/.agents/workflows/refactor.md +29 -0
  54. package/.agents/workflows/review-code.md +29 -0
  55. package/.cursorrules +140 -0
  56. package/.gemini/instructions.md +97 -0
  57. package/.github/ISSUE_TEMPLATE/v1.7-frontend-work-item.yml +54 -0
  58. package/.github/copilot-instructions.md +104 -0
  59. package/.github/workflows/benchmark-detection.yml +38 -0
  60. package/.github/workflows/frontend-usability-gate.yml +36 -0
  61. package/.github/workflows/release-gate.yml +32 -0
  62. package/.github/workflows/sbom-compliance.yml +32 -0
  63. package/.windsurfrules +106 -0
  64. package/AGENTS.md +131 -0
  65. package/CONTRIBUTING.md +136 -0
  66. package/LICENSE +21 -0
  67. package/README.md +239 -0
  68. package/bin/agentic-senior-core.js +1147 -0
  69. package/mcp.json +29 -0
  70. package/package.json +50 -0
  71. package/scripts/detection-benchmark.mjs +138 -0
  72. package/scripts/frontend-usability-audit.mjs +87 -0
  73. package/scripts/generate-sbom.mjs +61 -0
  74. package/scripts/init-project.ps1 +105 -0
  75. package/scripts/init-project.sh +131 -0
  76. package/scripts/llm-judge.mjs +664 -0
  77. package/scripts/release-gate.mjs +116 -0
  78. package/scripts/validate.mjs +554 -0
@@ -0,0 +1,664 @@
1
+ #!/usr/bin/env node
2
+ // @ts-check
3
+
4
+ /**
5
+ * scripts/llm-judge.mjs
6
+ *
7
+ * LLM-as-a-Judge CI gate — enforces pr-checklist.md on every pull request.
8
+ *
9
+ * Reads the git diff of the current PR, loads the PR checklist, sends both
10
+ * to the first available LLM provider, and exits 1 when CRITICAL findings
11
+ * exist (security gaps, N+1 queries, swallowed errors, hardcoded secrets,
12
+ * layer boundary violations, SQL injection risks).
13
+ *
14
+ * Supported providers (auto-selected by first available env key):
15
+ * OPENAI_API_KEY → gpt-4o-mini (override with LLM_JUDGE_MODEL)
16
+ * ANTHROPIC_API_KEY → claude-3-5-haiku-latest
17
+ * GEMINI_API_KEY → gemini-2.0-flash
18
+ *
19
+ * Usage:
20
+ * node scripts/llm-judge.mjs — auto-detect diff, call LLM
21
+ * node scripts/llm-judge.mjs --dry-run — print prompt, skip LLM call
22
+ *
23
+ * Environment variables:
24
+ * OPENAI_API_KEY OpenAI secret key
25
+ * ANTHROPIC_API_KEY Anthropic secret key
26
+ * GEMINI_API_KEY Google Gemini API key
27
+ * LLM_JUDGE_MODEL Override model name for the selected provider
28
+ * LLM_MAX_DIFF_CHARS Max characters of diff to send (default: 12000)
29
+ * PR_DIFF Inject diff directly (bypasses git commands)
30
+ * GITHUB_BASE_SHA Base commit SHA for GitHub Actions PR diffs
31
+ * GITHUB_HEAD_SHA Head commit SHA for GitHub Actions PR diffs
32
+ * CI_MERGE_REQUEST_DIFF_BASE_SHA Base SHA for GitLab MR diffs
33
+ * CI_COMMIT_SHA Head SHA for GitLab MR diffs
34
+ *
35
+ * Zero external dependencies — uses Node.js built-ins only (Node 18+).
36
+ */
37
+
38
+ import { execSync } from 'node:child_process';
39
+ import { readFileSync, existsSync, writeFileSync } from 'node:fs';
40
+ import { resolve, dirname } from 'node:path';
41
+ import { fileURLToPath } from 'node:url';
42
+
43
+ const __filename = fileURLToPath(import.meta.url);
44
+ const __dirname = dirname(__filename);
45
+
46
+ // ─── CONSTANTS ────────────────────────────────────────────────────────────────
47
+
48
+ const REPOSITORY_ROOT = resolve(__dirname, '..');
49
+ const PR_CHECKLIST_PATH = resolve(REPOSITORY_ROOT, '.agent-context/review-checklists/pr-checklist.md');
50
+ const DEFAULT_MACHINE_REPORT_PATH = resolve(REPOSITORY_ROOT, '.agent-context/state/llm-judge-report.json');
51
+ const MAX_DIFF_CHARS = parseInt(process.env.LLM_MAX_DIFF_CHARS ?? '12000', 10);
52
+ const IS_DRY_RUN = process.argv.includes('--dry-run');
53
+ const SHOULD_EMIT_MACHINE_REPORT = process.env.LLM_JUDGE_EMIT_JSON !== 'false';
54
+ const MACHINE_REPORT_PATH = process.env.LLM_JUDGE_OUTPUT_PATH || DEFAULT_MACHINE_REPORT_PATH;
55
+
56
+ /** @type {string[]} Source code file extensions to include in the diff */
57
+ const SOURCE_CODE_EXTENSIONS = ['*.ts', '*.tsx', '*.js', '*.mjs', '*.cjs', '*.py', '*.go', '*.java', '*.cs', '*.rb', '*.php'];
58
+
59
+ /** @type {Record<string, string>} */
60
+ const SEVERITY_NORMALIZATION_TABLE = {
61
+ critical: 'critical',
62
+ blocker: 'critical',
63
+ severe: 'critical',
64
+ high: 'high',
65
+ major: 'high',
66
+ medium: 'medium',
67
+ moderate: 'medium',
68
+ low: 'low',
69
+ minor: 'low',
70
+ info: 'low',
71
+ informational: 'low',
72
+ };
73
+
74
+ /**
75
+ * @typedef {{
76
+ * rule: string,
77
+ * problem: string,
78
+ * severity: string,
79
+ * }} Violation
80
+ */
81
+
82
+ /**
83
+ * @typedef {{
84
+ * generatedAt: string,
85
+ * schemaVersion: string,
86
+ * profile: string,
87
+ * provider: string,
88
+ * ciProvider: string,
89
+ * blockingSeverities: string[],
90
+ * failDecision: boolean,
91
+ * malformedVerdict: boolean,
92
+ * providerError: boolean,
93
+ * dryRun: boolean,
94
+ * summary: {
95
+ * totalViolations: number,
96
+ * blockingViolations: number,
97
+ * },
98
+ * violations: Violation[],
99
+ * }} MachineReportPayload
100
+ */
101
+
102
+ function detectCiProvider() {
103
+ if (process.env.GITHUB_ACTIONS === 'true') {
104
+ return 'github';
105
+ }
106
+
107
+ if (process.env.GITLAB_CI === 'true') {
108
+ return 'gitlab';
109
+ }
110
+
111
+ return 'local';
112
+ }
113
+
114
+ /**
115
+ * @param {string | undefined} rawSeverityValue
116
+ * @returns {string}
117
+ */
118
+ function normalizeSeverity(rawSeverityValue) {
119
+ const normalizedSeverityKey = String(rawSeverityValue || '').trim().toLowerCase();
120
+ return SEVERITY_NORMALIZATION_TABLE[normalizedSeverityKey] || 'low';
121
+ }
122
+
123
+ /**
124
+ * @param {MachineReportPayload} machineReportPayload
125
+ * @returns {string}
126
+ */
127
+ function formatMachineReadableLine(machineReportPayload) {
128
+ return `JSON_REPORT: ${JSON.stringify(machineReportPayload)}`;
129
+ }
130
+
131
+ /**
132
+ * @param {MachineReportPayload} machineReportPayload
133
+ */
134
+ function emitMachineReadableReport(machineReportPayload) {
135
+ if (!SHOULD_EMIT_MACHINE_REPORT) {
136
+ return;
137
+ }
138
+
139
+ writeFileSync(MACHINE_REPORT_PATH, `${JSON.stringify(machineReportPayload, null, 2)}\n`, 'utf-8');
140
+ console.log(formatMachineReadableLine(machineReportPayload));
141
+ console.log(`📎 Machine report saved: ${MACHINE_REPORT_PATH}`);
142
+ }
143
+
144
+ // ─── GIT DIFF COLLECTION ──────────────────────────────────────────────────────
145
+
146
+ /**
147
+ * Collects the pull request diff from the best available source:
148
+ * 1. PR_DIFF env var (direct injection — highest priority)
149
+ * 2. GitHub Actions env vars (GITHUB_BASE_SHA / GITHUB_HEAD_SHA)
150
+ * 3. GitLab CI env vars (CI_MERGE_REQUEST_DIFF_BASE_SHA / CI_COMMIT_SHA)
151
+ * 4. Local fallback: HEAD~1..HEAD
152
+ *
153
+ * @returns {string} The raw git diff output
154
+ */
155
+ function collectPullRequestDiff() {
156
+ if (process.env.PR_DIFF) {
157
+ console.log(' Source: PR_DIFF env variable');
158
+ return process.env.PR_DIFF;
159
+ }
160
+
161
+ const execOptions = {
162
+ cwd: REPOSITORY_ROOT,
163
+ encoding: /** @type {'utf-8'} */ ('utf-8'),
164
+ maxBuffer: 1024 * 1024 * 8, // 8 MB
165
+ };
166
+
167
+ // GitHub Actions: PR event injects base/head SHAs
168
+ const githubBaseSha = process.env.GITHUB_BASE_SHA;
169
+ const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
170
+ if (githubBaseSha) {
171
+ console.log(` Source: GitHub Actions diff (${githubBaseSha.slice(0, 8)}...${githubHeadSha.slice(0, 8)})`);
172
+ return execSync(`git diff "${githubBaseSha}...${githubHeadSha}"`, execOptions);
173
+ }
174
+
175
+ // GitLab CI: merge request event provides base + head SHAs
176
+ const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
177
+ const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
178
+ if (gitlabBaseSha) {
179
+ console.log(` Source: GitLab CI diff (${gitlabBaseSha.slice(0, 8)}...${gitlabHeadSha.slice(0, 8)})`);
180
+ return execSync(`git diff "${gitlabBaseSha}...${gitlabHeadSha}"`, execOptions);
181
+ }
182
+
183
+ // Local / fallback: last commit diff
184
+ console.log(' Source: local HEAD~1..HEAD fallback');
185
+ try {
186
+ return execSync('git diff HEAD~1 HEAD', execOptions);
187
+ } catch (err) {
188
+ try {
189
+ // Initial commit has no parent — diff against empty tree
190
+ const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
191
+ return execSync(`git diff "${emptyTreeSha}" HEAD`, execOptions);
192
+ } catch (e2) {
193
+ console.warn(' ⚠️ Unable to execute git diff. Defaulting to empty diff.');
194
+ return '';
195
+ }
196
+ }
197
+ }
198
+
199
+ // ─── CHECKLIST & THRESHOLDS LOADING ───────────────────────────────────────────
200
+
201
+ /**
202
+ * Loads and returns the PR checklist markdown content.
203
+ *
204
+ * @returns {string} The checklist file contents
205
+ */
206
+ function loadPrChecklist() {
207
+ if (!existsSync(PR_CHECKLIST_PATH)) {
208
+ throw new Error(`PR checklist not found at: ${PR_CHECKLIST_PATH}`);
209
+ }
210
+ return readFileSync(PR_CHECKLIST_PATH, 'utf-8');
211
+ }
212
+
213
+ /**
214
+ * Loads the LLM judge thresholds.
215
+ *
216
+ * @returns {any} The thresholds object
217
+ */
218
+ function loadThresholds() {
219
+ const thresholdsPath = resolve(REPOSITORY_ROOT, '.agent-context/policies/llm-judge-threshold.json');
220
+ if (!existsSync(thresholdsPath)) {
221
+ return {
222
+ selectedProfile: 'balanced',
223
+ profileThresholds: {
224
+ balanced: { blockingSeverities: ['critical', 'high'], failOnMalformedResponse: true, failOnProviderError: false }
225
+ }
226
+ };
227
+ }
228
+ return JSON.parse(readFileSync(thresholdsPath, 'utf-8'));
229
+ }
230
+
231
+ // ─── PROMPT CONSTRUCTION ─────────────────────────────────────────────────────
232
+
233
+ /**
234
+ * Returns the system-level instruction for the LLM judge role.
235
+ *
236
+ * @returns {string}
237
+ */
238
+ function buildSystemPrompt() {
239
+ return `You are a Senior Software Architect performing an automated code review for a CI/CD pipeline.
240
+
241
+ Your job: evaluate a git diff against the provided PR checklist and identify violations.
242
+ You must categorize each violation with a severity level: critical, high, medium, or low.
243
+
244
+ ## Severity classification:
245
+ - critical: Security vulnerabilities (hardcoded secrets, SQL/command injection, missing auth checks, CORS), unvalidated external inputs.
246
+ - high: N+1 database queries, swallowed errors (empty catch blocks without re-throw/recovery), layer boundary violations.
247
+ - medium: TypeScript \`any\` type used without justification, missing test coverage, bad architectural patterns.
248
+ - low: Style preferences, minor naming nitpicks, documentation suggestions, performance micro-optimizations.
249
+
250
+ ## Mandatory output format:
251
+ You MUST output your findings in EXACTLY this structure:
252
+
253
+ \`\`\`
254
+ ## PR REVIEW RESULTS
255
+ ━━━━━━━━━━━━━━━━━━━
256
+
257
+ ✅ [Section Name] — Passes
258
+ ❌ [Section Name] — FAILS
259
+ 📌 Rule: [rule file and section]
260
+ ❌ Problem: [exact description of the issue found in the diff]
261
+ ⚠️ Severity: [critical | high | medium | low]
262
+ ✅ Fix: [specific actionable fix]
263
+
264
+ \`\`\`
265
+
266
+ Rules:
267
+ - Then at the absolute LAST line of your response, output a JSON array of the failed checks. Each object should have 'rule', 'problem', 'severity'. If there are no failures, output an empty array [].
268
+ - Make sure the JSON array is perfectly valid JSON on a single line starting with \`JSON_VERDICT: \`. For example:
269
+ JSON_VERDICT: [{"rule": "Security", "problem": "Hardcoded secret", "severity": "critical"}]
270
+ - If the diff is empty, contains only documentation changes, or has no source code changes, output JSON_VERDICT: [] immediately.`;
271
+ }
272
+
273
+ /**
274
+ * Builds the user message combining the checklist and the (possibly truncated) diff.
275
+ *
276
+ * @param {string} prChecklistContent
277
+ * @param {string} diffContent
278
+ * @returns {string}
279
+ */
280
+ function buildUserMessage(prChecklistContent, diffContent) {
281
+ const truncatedDiff =
282
+ diffContent.length > MAX_DIFF_CHARS
283
+ ? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED — ${(diffContent.length - MAX_DIFF_CHARS).toLocaleString()} additional characters omitted to stay within token limits]`
284
+ : diffContent;
285
+
286
+ return `## PR Checklist Reference
287
+
288
+ ${prChecklistContent}
289
+
290
+ ---
291
+
292
+ ## Git Diff to Review
293
+
294
+ \`\`\`diff
295
+ ${truncatedDiff.trim() || '(empty diff — no source code changes detected)'}
296
+ \`\`\`
297
+
298
+ Review the diff against the checklist. Report your findings in the required format, ending with VERDICT: PASS ✅ or VERDICT: FAIL ❌.`;
299
+ }
300
+
301
+ // ─── LLM PROVIDER IMPLEMENTATIONS ────────────────────────────────────────────
302
+
303
+ /**
304
+ * Calls the OpenAI Chat Completions API.
305
+ *
306
+ * @param {string} systemPrompt
307
+ * @param {string} userMessage
308
+ * @returns {Promise<string>}
309
+ */
310
+ async function callOpenAiProvider(systemPrompt, userMessage) {
311
+ const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
312
+ const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
313
+ method: 'POST',
314
+ headers: {
315
+ 'Content-Type': 'application/json',
316
+ Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
317
+ },
318
+ body: JSON.stringify({
319
+ model: selectedModel,
320
+ max_tokens: 2048,
321
+ temperature: 0,
322
+ messages: [
323
+ { role: 'system', content: systemPrompt },
324
+ { role: 'user', content: userMessage },
325
+ ],
326
+ }),
327
+ });
328
+
329
+ if (!apiResponse.ok) {
330
+ const errorBody = await apiResponse.text();
331
+ throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
332
+ }
333
+
334
+ /** @type {{ choices: Array<{ message: { content: string } }> }} */
335
+ const responsePayload = await apiResponse.json();
336
+ return responsePayload.choices[0].message.content;
337
+ }
338
+
339
+ /**
340
+ * Calls the Anthropic Messages API.
341
+ *
342
+ * @param {string} systemPrompt
343
+ * @param {string} userMessage
344
+ * @returns {Promise<string>}
345
+ */
346
+ async function callAnthropicProvider(systemPrompt, userMessage) {
347
+ const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
348
+ const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
349
+ method: 'POST',
350
+ headers: {
351
+ 'Content-Type': 'application/json',
352
+ 'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
353
+ 'anthropic-version': '2023-06-01',
354
+ },
355
+ body: JSON.stringify({
356
+ model: selectedModel,
357
+ max_tokens: 2048,
358
+ system: systemPrompt,
359
+ messages: [{ role: 'user', content: userMessage }],
360
+ }),
361
+ });
362
+
363
+ if (!apiResponse.ok) {
364
+ const errorBody = await apiResponse.text();
365
+ throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
366
+ }
367
+
368
+ /** @type {{ content: Array<{ text: string }> }} */
369
+ const responsePayload = await apiResponse.json();
370
+ return responsePayload.content[0].text;
371
+ }
372
+
373
+ /**
374
+ * Calls the Google Gemini generateContent API.
375
+ *
376
+ * @param {string} systemPrompt
377
+ * @param {string} userMessage
378
+ * @returns {Promise<string>}
379
+ */
380
+ async function callGeminiProvider(systemPrompt, userMessage) {
381
+ const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
382
+ const apiKey = process.env.GEMINI_API_KEY ?? '';
383
+ const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
384
+
385
+ const apiResponse = await fetch(endpointUrl, {
386
+ method: 'POST',
387
+ headers: { 'Content-Type': 'application/json' },
388
+ body: JSON.stringify({
389
+ system_instruction: { parts: [{ text: systemPrompt }] },
390
+ contents: [{ role: 'user', parts: [{ text: userMessage }] }],
391
+ generationConfig: { temperature: 0, maxOutputTokens: 2048 },
392
+ }),
393
+ });
394
+
395
+ if (!apiResponse.ok) {
396
+ const errorBody = await apiResponse.text();
397
+ throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
398
+ }
399
+
400
+ /** @type {{ candidates: Array<{ content: { parts: Array<{ text: string }> } }> }} */
401
+ const responsePayload = await apiResponse.json();
402
+ return responsePayload.candidates[0].content.parts[0].text;
403
+ }
404
+
405
+ // ─── PROVIDER AUTO-SELECTION ──────────────────────────────────────────────────
406
+
407
+ /**
408
+ * @typedef {{ providerName: string, invokeProvider: (sys: string, usr: string) => Promise<string> }} SelectedProvider
409
+ */
410
+
411
+ /**
412
+ * Returns the first available LLM provider based on environment keys.
413
+ * Priority: OpenAI → Anthropic → Gemini.
414
+ *
415
+ * @returns {SelectedProvider | null}
416
+ */
417
+ function selectAvailableProvider() {
418
+ if (process.env.OPENAI_API_KEY) {
419
+ return { providerName: 'OpenAI (gpt-4o-mini)', invokeProvider: callOpenAiProvider };
420
+ }
421
+ if (process.env.ANTHROPIC_API_KEY) {
422
+ return { providerName: 'Anthropic (claude-3-5-haiku-latest)', invokeProvider: callAnthropicProvider };
423
+ }
424
+ if (process.env.GEMINI_API_KEY) {
425
+ return { providerName: 'Google Gemini (gemini-2.0-flash)', invokeProvider: callGeminiProvider };
426
+ }
427
+ return null;
428
+ }
429
+
430
+ // ─── VERDICT PARSING ─────────────────────────────────────────────────────────
431
+
432
+ /**
433
+ * Extracts and parses the JSON verdict from the LLM response.
434
+ *
435
+ * @param {string} llmResponseText
436
+ * @param {boolean} failOnMalformedResponse
437
+ * @returns {Array<{ rule: string, problem: string, severity: string }>}
438
+ */
439
+ function extractVerdict(llmResponseText, failOnMalformedResponse) {
440
+ const match = llmResponseText.match(/JSON_VERDICT:\s*(\[.*\])/i);
441
+ if (!match) {
442
+ console.warn('⚠️ LLM response did not include a valid JSON_VERDICT line.');
443
+ if (failOnMalformedResponse) {
444
+ console.error('❌ Failing pipeline because malformed responses are not allowed by the profile.');
445
+ process.exit(1);
446
+ }
447
+ return [];
448
+ }
449
+ try {
450
+ return JSON.parse(match[1]);
451
+ } catch (err) {
452
+ const parseError = /** @type {Error} */ (err);
453
+ console.error('⚠️ Failed to parse JSON_VERDICT:', parseError.message);
454
+ if (failOnMalformedResponse) {
455
+ process.exit(1);
456
+ }
457
+ return [];
458
+ }
459
+ }
460
+
461
+ /**
462
+ * @param {Array<{ rule?: string, problem?: string, severity?: string }>} violations
463
+ * @returns {Violation[]}
464
+ */
465
+ function normalizeViolations(violations) {
466
+ return violations.map((violationItem) => ({
467
+ rule: String(violationItem.rule || 'Unknown Rule'),
468
+ problem: String(violationItem.problem || 'No problem description provided.'),
469
+ severity: normalizeSeverity(violationItem.severity),
470
+ }));
471
+ }
472
+
473
+ /**
474
+ * @param {{
475
+ * provider: string,
476
+ * selectedProfile: string,
477
+ * blockingSeverities: string[],
478
+ * finalViolations: Violation[],
479
+ * blockingFound: Violation[],
480
+ * isDryRun: boolean,
481
+ * malformedVerdict: boolean,
482
+ * providerError: boolean,
483
+ * }} payloadInput
484
+ * @returns {MachineReportPayload}
485
+ */
486
+ function buildMachineReportPayload({
487
+ provider,
488
+ selectedProfile,
489
+ blockingSeverities,
490
+ finalViolations,
491
+ blockingFound,
492
+ isDryRun,
493
+ malformedVerdict,
494
+ providerError,
495
+ }) {
496
+ return {
497
+ generatedAt: new Date().toISOString(),
498
+ schemaVersion: '1.0',
499
+ profile: selectedProfile,
500
+ provider,
501
+ ciProvider: detectCiProvider(),
502
+ blockingSeverities,
503
+ failDecision: blockingFound.length > 0 || malformedVerdict || providerError,
504
+ malformedVerdict,
505
+ providerError,
506
+ dryRun: isDryRun,
507
+ summary: {
508
+ totalViolations: finalViolations.length,
509
+ blockingViolations: blockingFound.length,
510
+ },
511
+ violations: finalViolations,
512
+ };
513
+ }
514
+
515
+ // ─── MAIN ─────────────────────────────────────────────────────────────────────
516
+
517
+ async function main() {
518
+ console.log('');
519
+ console.log('🔍 LLM Judge — Automated Code Review Gate');
520
+ console.log('════════════════════════════════════════════');
521
+ console.log('');
522
+
523
+ // ── Step 1: Load checklist and thresholds ──────────────
524
+ const prChecklistContent = loadPrChecklist();
525
+ const thresholdsObj = loadThresholds();
526
+ const selectedProfile = thresholdsObj.selectedProfile || 'balanced';
527
+ const profileConfig = thresholdsObj.profileThresholds[selectedProfile] || {};
528
+ const blockingSeverities = profileConfig.blockingSeverities || ['critical', 'high'];
529
+ const failOnMalformedResponse = profileConfig.failOnMalformedResponse !== false;
530
+ const failOnProviderError = profileConfig.failOnProviderError || false;
531
+
532
+ console.log(`✅ PR checklist loaded (${prChecklistContent.length} chars)`);
533
+ console.log(`✅ Threshold profile loaded: ${selectedProfile} (blocking: ${blockingSeverities.join(', ')})`);
534
+
535
+ // ── Step 2: Collect diff ────────────────────────────────
536
+ const rawDiff = collectPullRequestDiff();
537
+ console.log(`✅ Git diff collected (${rawDiff.length} chars${rawDiff.length > MAX_DIFF_CHARS ? ` — will truncate to ${MAX_DIFF_CHARS}` : ''})`);
538
+
539
+ // ── Step 3: Build prompt ────────────────────────────────
540
+ const systemPrompt = buildSystemPrompt();
541
+ const userMessage = buildUserMessage(prChecklistContent, rawDiff);
542
+
543
+ // ── Step 4: Dry run mode ────────────────────────────────
544
+ if (IS_DRY_RUN) {
545
+ console.log('');
546
+ console.log('── DRY RUN MODE ──────────────────────────────────────────');
547
+ console.log('[SYSTEM PROMPT PREVIEW]');
548
+ console.log(systemPrompt.slice(0, 400) + '...');
549
+ console.log('');
550
+ console.log('[USER MESSAGE PREVIEW]');
551
+ console.log(userMessage.slice(0, 400) + '...');
552
+ console.log('─────────────────────────────────────────────────────────');
553
+ console.log('');
554
+ const dryRunReportPayload = buildMachineReportPayload({
555
+ provider: 'dry-run',
556
+ selectedProfile,
557
+ blockingSeverities,
558
+ finalViolations: [],
559
+ blockingFound: [],
560
+ isDryRun: true,
561
+ malformedVerdict: false,
562
+ providerError: false,
563
+ });
564
+ emitMachineReadableReport(dryRunReportPayload);
565
+ console.log('VERDICT: JSON_VERDICT: [] (dry run — no LLM call made)');
566
+ process.exit(0);
567
+ }
568
+
569
+ // ── Step 5: Select provider ─────────────────────────────
570
+ const selectedProvider = selectAvailableProvider();
571
+ if (!selectedProvider) {
572
+ console.warn('');
573
+ console.warn('⚠️ No LLM API key detected.');
574
+ console.warn(' Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GEMINI_API_KEY');
575
+ console.warn(' to enable automated code review.');
576
+ console.warn('');
577
+ console.warn('⏭️ Skipping LLM review — pipeline continues (PASS).');
578
+ const skippedReportPayload = buildMachineReportPayload({
579
+ provider: 'none',
580
+ selectedProfile,
581
+ blockingSeverities,
582
+ finalViolations: [],
583
+ blockingFound: [],
584
+ isDryRun: false,
585
+ malformedVerdict: false,
586
+ providerError: false,
587
+ });
588
+ emitMachineReadableReport(skippedReportPayload);
589
+ process.exit(0);
590
+ }
591
+
592
+ console.log(`✅ Provider selected: ${selectedProvider.providerName}`);
593
+ if (process.env.LLM_JUDGE_MODEL) {
594
+ console.log(` Model override: ${process.env.LLM_JUDGE_MODEL}`);
595
+ }
596
+ console.log('');
597
+ console.log('⏳ Sending diff to LLM for review...');
598
+ console.log('');
599
+
600
+ // ── Step 6: Call LLM ────────────────────────────────────
601
+ let llmReviewText;
602
+ try {
603
+ llmReviewText = await selectedProvider.invokeProvider(systemPrompt, userMessage);
604
+ } catch (providerCallError) {
605
+ console.warn(`⚠️ LLM call failed: ${/** @type {Error} */ (providerCallError).message}`);
606
+ const providerErrorReportPayload = buildMachineReportPayload({
607
+ provider: selectedProvider.providerName,
608
+ selectedProfile,
609
+ blockingSeverities,
610
+ finalViolations: [],
611
+ blockingFound: [],
612
+ isDryRun: false,
613
+ malformedVerdict: false,
614
+ providerError: Boolean(failOnProviderError),
615
+ });
616
+ emitMachineReadableReport(providerErrorReportPayload);
617
+ if (failOnProviderError) {
618
+ console.error('❌ Failing pipeline because provider errors are not allowed by the profile.');
619
+ process.exit(1);
620
+ }
621
+ console.warn(' Skipping LLM review — pipeline continues (PASS).');
622
+ process.exit(0);
623
+ }
624
+
625
+ // ── Step 7: Print report ────────────────────────────────
626
+ console.log('── LLM Review Report ─────────────────────────────────────');
627
+ console.log('');
628
+ console.log(llmReviewText);
629
+ console.log('');
630
+ console.log('──────────────────────────────────────────────────────────');
631
+ console.log('');
632
+
633
+ // ── Step 8: Enforce verdict ─────────────────────────────
634
+ const rawVerdictViolations = extractVerdict(llmReviewText, failOnMalformedResponse);
635
+ const finalViolations = normalizeViolations(rawVerdictViolations);
636
+ const hasMalformedVerdict = !/JSON_VERDICT:\s*\[/i.test(llmReviewText);
637
+
638
+ const blockingFound = finalViolations.filter(v => blockingSeverities.includes(v.severity.toLowerCase()));
639
+ const machineReportPayload = buildMachineReportPayload({
640
+ provider: selectedProvider.providerName,
641
+ selectedProfile,
642
+ blockingSeverities,
643
+ finalViolations,
644
+ blockingFound,
645
+ isDryRun: false,
646
+ malformedVerdict: hasMalformedVerdict,
647
+ providerError: false,
648
+ });
649
+ emitMachineReadableReport(machineReportPayload);
650
+
651
+ if (blockingFound.length > 0) {
652
+ console.error(`❌ LLM Judge: ${blockingFound.length} blocking violations found (severities: ${blockingSeverities.join(', ')}). Pipeline FAILED.`);
653
+ console.error(' Fix the issues listed above before merging.');
654
+ process.exit(1);
655
+ }
656
+
657
+ console.log('✅ LLM Judge: No blocking violations. Pipeline PASSED.');
658
+ process.exit(0);
659
+ }
660
+
661
+ main().catch((unexpectedError) => {
662
+ console.error('❌ Unexpected error in llm-judge:', unexpectedError);
663
+ process.exit(1);
664
+ });