@ryuenn3123/agentic-senior-core 3.0.49 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/prompts/bootstrap-design.md +2 -1
- package/.agent-context/review-checklists/pr-checklist.md +1 -0
- package/.agent-context/rules/api-docs.md +63 -45
- package/.agent-context/rules/architecture.md +133 -118
- package/.agent-context/rules/database-design.md +36 -16
- package/.agent-context/rules/docker-runtime.md +66 -43
- package/.agent-context/rules/efficiency-vs-hype.md +38 -17
- package/.agent-context/rules/error-handling.md +35 -14
- package/.agent-context/rules/event-driven.md +35 -18
- package/.agent-context/rules/frontend-architecture.md +103 -74
- package/.agent-context/rules/git-workflow.md +81 -197
- package/.agent-context/rules/microservices.md +42 -41
- package/.agent-context/rules/naming-conv.md +27 -6
- package/.agent-context/rules/performance.md +32 -10
- package/.agent-context/rules/realtime.md +26 -9
- package/.agent-context/rules/security.md +39 -19
- package/.agent-context/rules/testing.md +36 -15
- package/AGENTS.md +9 -9
- package/README.md +10 -1
- package/lib/cli/commands/init.mjs +1 -0
- package/lib/cli/compiler.mjs +1 -0
- package/lib/cli/detector/constants.mjs +135 -0
- package/lib/cli/detector/design-evidence/collector.mjs +256 -0
- package/lib/cli/detector/design-evidence/constants.mjs +39 -0
- package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
- package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
- package/lib/cli/detector/design-evidence/summary.mjs +109 -0
- package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
- package/lib/cli/detector/design-evidence.mjs +25 -610
- package/lib/cli/detector/stack-detection.mjs +243 -0
- package/lib/cli/detector/ui-signals.mjs +150 -0
- package/lib/cli/detector/workspace-scan.mjs +177 -0
- package/lib/cli/detector.mjs +20 -688
- package/lib/cli/memory-continuity.mjs +1 -0
- package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
- package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +116 -0
- package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
- package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
- package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
- package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +222 -0
- package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
- package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
- package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
- package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
- package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
- package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
- package/lib/cli/project-scaffolder/design-contract/validation.mjs +59 -896
- package/lib/cli/project-scaffolder/design-contract.mjs +147 -557
- package/mcp.json +30 -9
- package/package.json +17 -2
- package/scripts/audit-cache-layer-contract.mjs +258 -0
- package/scripts/audit-caching-scope-hygiene.mjs +263 -0
- package/scripts/audit-file-size.mjs +219 -0
- package/scripts/audit-reflection-citations.mjs +163 -0
- package/scripts/audit-release-bundle.mjs +170 -0
- package/scripts/audit-rule-id-uniqueness.mjs +313 -0
- package/scripts/benchmark-evidence-bundle.mjs +1 -0
- package/scripts/build-release-benchmark-bundle.mjs +204 -0
- package/scripts/context-triggered-audit.mjs +1 -0
- package/scripts/documentation-boundary-audit.mjs +1 -0
- package/scripts/explain-on-demand-audit.mjs +2 -1
- package/scripts/frontend-usability-audit.mjs +10 -10
- package/scripts/llm-judge/checklist-loader.mjs +45 -0
- package/scripts/llm-judge/constants.mjs +66 -0
- package/scripts/llm-judge/diff-collection.mjs +74 -0
- package/scripts/llm-judge/prompting.mjs +78 -0
- package/scripts/llm-judge/providers.mjs +111 -0
- package/scripts/llm-judge/verdict.mjs +134 -0
- package/scripts/llm-judge.mjs +21 -482
- package/scripts/mcp-server/tool-registry.mjs +55 -0
- package/scripts/mcp-server/tools.mjs +137 -1
- package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
- package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
- package/scripts/migrate-rule-format/render-new.mjs +169 -0
- package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
- package/scripts/migrate-rule-format.mjs +192 -0
- package/scripts/release-gate/constants.mjs +1 -1
- package/scripts/release-gate/static-checks.mjs +1 -1
- package/scripts/rules-guardian-audit.mjs +5 -2
- package/scripts/single-source-lazy-loading-audit.mjs +2 -1
- package/scripts/ui-design-judge/git-input.mjs +3 -0
- package/scripts/validate/config.mjs +3 -2
- package/scripts/validate/coverage-checks.mjs +1 -1
- package/scripts/validate.mjs +93 -1
package/scripts/llm-judge.mjs
CHANGED
|
@@ -4,21 +4,24 @@
|
|
|
4
4
|
/**
|
|
5
5
|
* scripts/llm-judge.mjs
|
|
6
6
|
*
|
|
7
|
-
* LLM-as-a-Judge CI gate
|
|
7
|
+
* LLM-as-a-Judge CI gate. Enforces pr-checklist.md on every pull request.
|
|
8
8
|
*
|
|
9
9
|
* Reads the git diff of the current PR, loads the PR checklist, sends both
|
|
10
10
|
* to the first available LLM provider, and exits 1 when CRITICAL findings
|
|
11
11
|
* exist (security gaps, N+1 queries, swallowed errors, hardcoded secrets,
|
|
12
12
|
* layer boundary violations, SQL injection risks).
|
|
13
13
|
*
|
|
14
|
+
* Implementation is split per concern under scripts/llm-judge/. This entry
|
|
15
|
+
* file owns the CLI orchestration only.
|
|
16
|
+
*
|
|
14
17
|
* Supported providers (auto-selected by first available env key):
|
|
15
|
-
* OPENAI_API_KEY
|
|
16
|
-
* ANTHROPIC_API_KEY
|
|
17
|
-
* GEMINI_API_KEY
|
|
18
|
+
* OPENAI_API_KEY -> gpt-4o-mini (override with LLM_JUDGE_MODEL)
|
|
19
|
+
* ANTHROPIC_API_KEY -> claude-3-5-haiku-latest
|
|
20
|
+
* GEMINI_API_KEY -> gemini-2.0-flash
|
|
18
21
|
*
|
|
19
22
|
* Usage:
|
|
20
|
-
* node scripts/llm-judge.mjs
|
|
21
|
-
* node scripts/llm-judge.mjs --dry-run
|
|
23
|
+
* node scripts/llm-judge.mjs (auto-detect diff, call LLM)
|
|
24
|
+
* node scripts/llm-judge.mjs --dry-run (print prompt, skip LLM call)
|
|
22
25
|
*
|
|
23
26
|
* Environment variables:
|
|
24
27
|
* OPENAI_API_KEY OpenAI secret key
|
|
@@ -35,481 +38,17 @@
|
|
|
35
38
|
* Zero external dependencies — uses Node.js built-ins only (Node 18+).
|
|
36
39
|
*/
|
|
37
40
|
|
|
38
|
-
import {
|
|
39
|
-
import {
|
|
40
|
-
import {
|
|
41
|
-
import {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
const PR_CHECKLIST_PATH = resolve(REPOSITORY_ROOT, '.agent-context/review-checklists/pr-checklist.md');
|
|
50
|
-
const DEFAULT_MACHINE_REPORT_PATH = resolve(REPOSITORY_ROOT, '.agent-context/state/llm-judge-report.json');
|
|
51
|
-
const MAX_DIFF_CHARS = parseInt(process.env.LLM_MAX_DIFF_CHARS ?? '12000', 10);
|
|
52
|
-
const IS_DRY_RUN = process.argv.includes('--dry-run');
|
|
53
|
-
const SHOULD_EMIT_MACHINE_REPORT = process.env.LLM_JUDGE_EMIT_JSON !== 'false';
|
|
54
|
-
const MACHINE_REPORT_PATH = process.env.LLM_JUDGE_OUTPUT_PATH || DEFAULT_MACHINE_REPORT_PATH;
|
|
55
|
-
|
|
56
|
-
/** @type {Record<string, string>} */
|
|
57
|
-
const SEVERITY_NORMALIZATION_TABLE = {
|
|
58
|
-
critical: 'critical',
|
|
59
|
-
blocker: 'critical',
|
|
60
|
-
severe: 'critical',
|
|
61
|
-
high: 'high',
|
|
62
|
-
major: 'high',
|
|
63
|
-
medium: 'medium',
|
|
64
|
-
moderate: 'medium',
|
|
65
|
-
low: 'low',
|
|
66
|
-
minor: 'low',
|
|
67
|
-
info: 'low',
|
|
68
|
-
informational: 'low',
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
/**
|
|
72
|
-
* @typedef {{
|
|
73
|
-
* rule: string,
|
|
74
|
-
* problem: string,
|
|
75
|
-
* severity: string,
|
|
76
|
-
* }} Violation
|
|
77
|
-
*/
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* @typedef {{
|
|
81
|
-
* generatedAt: string,
|
|
82
|
-
* schemaVersion: string,
|
|
83
|
-
* profile: string,
|
|
84
|
-
* provider: string,
|
|
85
|
-
* ciProvider: string,
|
|
86
|
-
* blockingSeverities: string[],
|
|
87
|
-
* failDecision: boolean,
|
|
88
|
-
* malformedVerdict: boolean,
|
|
89
|
-
* providerError: boolean,
|
|
90
|
-
* dryRun: boolean,
|
|
91
|
-
* summary: {
|
|
92
|
-
* totalViolations: number,
|
|
93
|
-
* blockingViolations: number,
|
|
94
|
-
* },
|
|
95
|
-
* violations: Violation[],
|
|
96
|
-
* }} MachineReportPayload
|
|
97
|
-
*/
|
|
98
|
-
|
|
99
|
-
function detectCiProvider() {
|
|
100
|
-
if (process.env.GITHUB_ACTIONS === 'true') {
|
|
101
|
-
return 'github';
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
if (process.env.GITLAB_CI === 'true') {
|
|
105
|
-
return 'gitlab';
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
return 'local';
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* @param {string | undefined} rawSeverityValue
|
|
113
|
-
* @returns {string}
|
|
114
|
-
*/
|
|
115
|
-
function normalizeSeverity(rawSeverityValue) {
|
|
116
|
-
const normalizedSeverityKey = String(rawSeverityValue || '').trim().toLowerCase();
|
|
117
|
-
return SEVERITY_NORMALIZATION_TABLE[normalizedSeverityKey] || 'low';
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* @param {MachineReportPayload} machineReportPayload
|
|
122
|
-
* @returns {string}
|
|
123
|
-
*/
|
|
124
|
-
function formatMachineReadableLine(machineReportPayload) {
|
|
125
|
-
return `JSON_REPORT: ${JSON.stringify(machineReportPayload)}`;
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
/**
|
|
129
|
-
* @param {MachineReportPayload} machineReportPayload
|
|
130
|
-
*/
|
|
131
|
-
function emitMachineReadableReport(machineReportPayload) {
|
|
132
|
-
if (!SHOULD_EMIT_MACHINE_REPORT) {
|
|
133
|
-
return;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
writeFileSync(MACHINE_REPORT_PATH, `${JSON.stringify(machineReportPayload, null, 2)}\n`, 'utf-8');
|
|
137
|
-
console.log(formatMachineReadableLine(machineReportPayload));
|
|
138
|
-
console.log(`📎 Machine report saved: ${MACHINE_REPORT_PATH}`);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// ─── GIT DIFF COLLECTION ──────────────────────────────────────────────────────
|
|
142
|
-
|
|
143
|
-
/**
|
|
144
|
-
* Collects the pull request diff from the best available source:
|
|
145
|
-
* 1. PR_DIFF env var (direct injection — highest priority)
|
|
146
|
-
* 2. GitHub Actions env vars (GITHUB_BASE_SHA / GITHUB_HEAD_SHA)
|
|
147
|
-
* 3. GitLab CI env vars (CI_MERGE_REQUEST_DIFF_BASE_SHA / CI_COMMIT_SHA)
|
|
148
|
-
* 4. Local fallback: HEAD~1..HEAD
|
|
149
|
-
*
|
|
150
|
-
* @returns {string} The raw git diff output
|
|
151
|
-
*/
|
|
152
|
-
function collectPullRequestDiff() {
|
|
153
|
-
if (process.env.PR_DIFF) {
|
|
154
|
-
console.log(' Source: PR_DIFF env variable');
|
|
155
|
-
return process.env.PR_DIFF;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const execOptions = {
|
|
159
|
-
cwd: REPOSITORY_ROOT,
|
|
160
|
-
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
161
|
-
maxBuffer: 1024 * 1024 * 8, // 8 MB
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
// GitHub Actions: PR event injects base/head SHAs
|
|
165
|
-
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
166
|
-
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
167
|
-
if (githubBaseSha) {
|
|
168
|
-
console.log(` Source: GitHub Actions diff (${githubBaseSha.slice(0, 8)}...${githubHeadSha.slice(0, 8)})`);
|
|
169
|
-
return execSync(`git diff "${githubBaseSha}...${githubHeadSha}"`, execOptions);
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
// GitLab CI: merge request event provides base + head SHAs
|
|
173
|
-
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
174
|
-
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
175
|
-
if (gitlabBaseSha) {
|
|
176
|
-
console.log(` Source: GitLab CI diff (${gitlabBaseSha.slice(0, 8)}...${gitlabHeadSha.slice(0, 8)})`);
|
|
177
|
-
return execSync(`git diff "${gitlabBaseSha}...${gitlabHeadSha}"`, execOptions);
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Local / fallback: last commit diff
|
|
181
|
-
console.log(' Source: local HEAD~1..HEAD fallback');
|
|
182
|
-
try {
|
|
183
|
-
return execSync('git diff HEAD~1 HEAD', execOptions);
|
|
184
|
-
} catch {
|
|
185
|
-
try {
|
|
186
|
-
// Initial commit has no parent — diff against empty tree
|
|
187
|
-
const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
|
|
188
|
-
return execSync(`git diff "${emptyTreeSha}" HEAD`, execOptions);
|
|
189
|
-
} catch {
|
|
190
|
-
console.warn(' ⚠️ Unable to execute git diff. Defaulting to empty diff.');
|
|
191
|
-
return '';
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
// ─── CHECKLIST & THRESHOLDS LOADING ───────────────────────────────────────────
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* Loads and returns the PR checklist markdown content.
|
|
200
|
-
*
|
|
201
|
-
* @returns {string} The checklist file contents
|
|
202
|
-
*/
|
|
203
|
-
function loadPrChecklist() {
|
|
204
|
-
if (!existsSync(PR_CHECKLIST_PATH)) {
|
|
205
|
-
throw new Error(`PR checklist not found at: ${PR_CHECKLIST_PATH}`);
|
|
206
|
-
}
|
|
207
|
-
return readFileSync(PR_CHECKLIST_PATH, 'utf-8');
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Loads the LLM judge thresholds.
|
|
212
|
-
*
|
|
213
|
-
* @returns {any} The thresholds object
|
|
214
|
-
*/
|
|
215
|
-
function loadThresholds() {
|
|
216
|
-
const thresholdsPath = resolve(REPOSITORY_ROOT, '.agent-context/policies/llm-judge-threshold.json');
|
|
217
|
-
if (!existsSync(thresholdsPath)) {
|
|
218
|
-
return {
|
|
219
|
-
selectedProfile: 'balanced',
|
|
220
|
-
profileThresholds: {
|
|
221
|
-
balanced: { blockingSeverities: ['critical', 'high'], failOnMalformedResponse: true, failOnProviderError: false }
|
|
222
|
-
}
|
|
223
|
-
};
|
|
224
|
-
}
|
|
225
|
-
return JSON.parse(readFileSync(thresholdsPath, 'utf-8'));
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
// ─── PROMPT CONSTRUCTION ─────────────────────────────────────────────────────
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Returns the system-level instruction for the LLM judge role.
|
|
232
|
-
*
|
|
233
|
-
* @returns {string}
|
|
234
|
-
*/
|
|
235
|
-
function buildSystemPrompt() {
|
|
236
|
-
return `You are a Senior Software Architect performing an automated code review for a CI/CD pipeline.
|
|
237
|
-
|
|
238
|
-
Your job: evaluate a git diff against the provided PR checklist and identify violations.
|
|
239
|
-
You must categorize each violation with a severity level: critical, high, medium, or low.
|
|
240
|
-
|
|
241
|
-
## Severity classification:
|
|
242
|
-
- critical: Security vulnerabilities (hardcoded secrets, SQL/command injection, missing auth checks, CORS), unvalidated external inputs.
|
|
243
|
-
- high: N+1 database queries, swallowed errors (empty catch blocks without re-throw/recovery), layer boundary violations.
|
|
244
|
-
- medium: TypeScript \`any\` type used without justification, missing test coverage, bad architectural patterns.
|
|
245
|
-
- low: Style preferences, minor naming nitpicks, documentation nitpicks, performance micro-optimizations.
|
|
246
|
-
|
|
247
|
-
## Mandatory output format:
|
|
248
|
-
You MUST output your findings in EXACTLY this structure:
|
|
249
|
-
|
|
250
|
-
\`\`\`
|
|
251
|
-
## PR REVIEW RESULTS
|
|
252
|
-
━━━━━━━━━━━━━━━━━━━
|
|
253
|
-
|
|
254
|
-
✅ [Section Name] — Passes
|
|
255
|
-
❌ [Section Name] — FAILS
|
|
256
|
-
📌 Rule: [rule file and section]
|
|
257
|
-
❌ Problem: [exact description of the issue found in the diff]
|
|
258
|
-
⚠️ Severity: [critical | high | medium | low]
|
|
259
|
-
✅ Fix: [specific actionable fix]
|
|
260
|
-
|
|
261
|
-
\`\`\`
|
|
262
|
-
|
|
263
|
-
Rules:
|
|
264
|
-
- Then at the absolute LAST line of your response, output a JSON array of the failed checks. Each object should have 'rule', 'problem', 'severity'. If there are no failures, output an empty array [].
|
|
265
|
-
- Make sure the JSON array is perfectly valid JSON on a single line starting with \`JSON_VERDICT: \`. For example:
|
|
266
|
-
JSON_VERDICT: [{"rule": "Security", "problem": "Hardcoded secret", "severity": "critical"}]
|
|
267
|
-
- If the diff is empty, contains only documentation changes, or has no source code changes, output JSON_VERDICT: [] immediately.`;
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
/**
|
|
271
|
-
* Builds the user message combining the checklist and the (possibly truncated) diff.
|
|
272
|
-
*
|
|
273
|
-
* @param {string} prChecklistContent
|
|
274
|
-
* @param {string} diffContent
|
|
275
|
-
* @returns {string}
|
|
276
|
-
*/
|
|
277
|
-
function buildUserMessage(prChecklistContent, diffContent) {
|
|
278
|
-
const truncatedDiff =
|
|
279
|
-
diffContent.length > MAX_DIFF_CHARS
|
|
280
|
-
? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED — ${(diffContent.length - MAX_DIFF_CHARS).toLocaleString()} additional characters omitted to stay within token limits]`
|
|
281
|
-
: diffContent;
|
|
282
|
-
|
|
283
|
-
return `## PR Checklist Reference
|
|
284
|
-
|
|
285
|
-
${prChecklistContent}
|
|
286
|
-
|
|
287
|
-
---
|
|
288
|
-
|
|
289
|
-
## Git Diff to Review
|
|
290
|
-
|
|
291
|
-
\`\`\`diff
|
|
292
|
-
${truncatedDiff.trim() || '(empty diff — no source code changes detected)'}
|
|
293
|
-
\`\`\`
|
|
294
|
-
|
|
295
|
-
Review the diff against the checklist. Report your findings in the required format, ending with VERDICT: PASS ✅ or VERDICT: FAIL ❌.`;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
// ─── LLM PROVIDER IMPLEMENTATIONS ────────────────────────────────────────────
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Calls the OpenAI Chat Completions API.
|
|
302
|
-
*
|
|
303
|
-
* @param {string} systemPrompt
|
|
304
|
-
* @param {string} userMessage
|
|
305
|
-
* @returns {Promise<string>}
|
|
306
|
-
*/
|
|
307
|
-
async function callOpenAiProvider(systemPrompt, userMessage) {
|
|
308
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
|
|
309
|
-
const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
310
|
-
method: 'POST',
|
|
311
|
-
headers: {
|
|
312
|
-
'Content-Type': 'application/json',
|
|
313
|
-
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
|
314
|
-
},
|
|
315
|
-
body: JSON.stringify({
|
|
316
|
-
model: selectedModel,
|
|
317
|
-
max_tokens: 2048,
|
|
318
|
-
temperature: 0,
|
|
319
|
-
messages: [
|
|
320
|
-
{ role: 'system', content: systemPrompt },
|
|
321
|
-
{ role: 'user', content: userMessage },
|
|
322
|
-
],
|
|
323
|
-
}),
|
|
324
|
-
});
|
|
325
|
-
|
|
326
|
-
if (!apiResponse.ok) {
|
|
327
|
-
const errorBody = await apiResponse.text();
|
|
328
|
-
throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
/** @type {{ choices: Array<{ message: { content: string } }> }} */
|
|
332
|
-
const responsePayload = await apiResponse.json();
|
|
333
|
-
return responsePayload.choices[0].message.content;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
/**
|
|
337
|
-
* Calls the Anthropic Messages API.
|
|
338
|
-
*
|
|
339
|
-
* @param {string} systemPrompt
|
|
340
|
-
* @param {string} userMessage
|
|
341
|
-
* @returns {Promise<string>}
|
|
342
|
-
*/
|
|
343
|
-
async function callAnthropicProvider(systemPrompt, userMessage) {
|
|
344
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
|
|
345
|
-
const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
|
|
346
|
-
method: 'POST',
|
|
347
|
-
headers: {
|
|
348
|
-
'Content-Type': 'application/json',
|
|
349
|
-
'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
|
|
350
|
-
'anthropic-version': '2023-06-01',
|
|
351
|
-
},
|
|
352
|
-
body: JSON.stringify({
|
|
353
|
-
model: selectedModel,
|
|
354
|
-
max_tokens: 2048,
|
|
355
|
-
system: systemPrompt,
|
|
356
|
-
messages: [{ role: 'user', content: userMessage }],
|
|
357
|
-
}),
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
if (!apiResponse.ok) {
|
|
361
|
-
const errorBody = await apiResponse.text();
|
|
362
|
-
throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
/** @type {{ content: Array<{ text: string }> }} */
|
|
366
|
-
const responsePayload = await apiResponse.json();
|
|
367
|
-
return responsePayload.content[0].text;
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
/**
|
|
371
|
-
* Calls the Google Gemini generateContent API.
|
|
372
|
-
*
|
|
373
|
-
* @param {string} systemPrompt
|
|
374
|
-
* @param {string} userMessage
|
|
375
|
-
* @returns {Promise<string>}
|
|
376
|
-
*/
|
|
377
|
-
async function callGeminiProvider(systemPrompt, userMessage) {
|
|
378
|
-
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
|
|
379
|
-
const apiKey = process.env.GEMINI_API_KEY ?? '';
|
|
380
|
-
const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
|
|
381
|
-
|
|
382
|
-
const apiResponse = await fetch(endpointUrl, {
|
|
383
|
-
method: 'POST',
|
|
384
|
-
headers: { 'Content-Type': 'application/json' },
|
|
385
|
-
body: JSON.stringify({
|
|
386
|
-
system_instruction: { parts: [{ text: systemPrompt }] },
|
|
387
|
-
contents: [{ role: 'user', parts: [{ text: userMessage }] }],
|
|
388
|
-
generationConfig: { temperature: 0, maxOutputTokens: 2048 },
|
|
389
|
-
}),
|
|
390
|
-
});
|
|
391
|
-
|
|
392
|
-
if (!apiResponse.ok) {
|
|
393
|
-
const errorBody = await apiResponse.text();
|
|
394
|
-
throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
/** @type {{ candidates: Array<{ content: { parts: Array<{ text: string }> } }> }} */
|
|
398
|
-
const responsePayload = await apiResponse.json();
|
|
399
|
-
return responsePayload.candidates[0].content.parts[0].text;
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
// ─── PROVIDER AUTO-SELECTION ──────────────────────────────────────────────────
|
|
403
|
-
|
|
404
|
-
/**
|
|
405
|
-
* @typedef {{ providerName: string, invokeProvider: (sys: string, usr: string) => Promise<string> }} SelectedProvider
|
|
406
|
-
*/
|
|
407
|
-
|
|
408
|
-
/**
|
|
409
|
-
* Returns the first available LLM provider based on environment keys.
|
|
410
|
-
* Priority: OpenAI → Anthropic → Gemini.
|
|
411
|
-
*
|
|
412
|
-
* @returns {SelectedProvider | null}
|
|
413
|
-
*/
|
|
414
|
-
function selectAvailableProvider() {
|
|
415
|
-
if (process.env.OPENAI_API_KEY) {
|
|
416
|
-
return { providerName: 'OpenAI (gpt-4o-mini)', invokeProvider: callOpenAiProvider };
|
|
417
|
-
}
|
|
418
|
-
if (process.env.ANTHROPIC_API_KEY) {
|
|
419
|
-
return { providerName: 'Anthropic (claude-3-5-haiku-latest)', invokeProvider: callAnthropicProvider };
|
|
420
|
-
}
|
|
421
|
-
if (process.env.GEMINI_API_KEY) {
|
|
422
|
-
return { providerName: 'Google Gemini (gemini-2.0-flash)', invokeProvider: callGeminiProvider };
|
|
423
|
-
}
|
|
424
|
-
return null;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
// ─── VERDICT PARSING ─────────────────────────────────────────────────────────
|
|
428
|
-
|
|
429
|
-
/**
|
|
430
|
-
* Extracts and parses the JSON verdict from the LLM response.
|
|
431
|
-
*
|
|
432
|
-
* @param {string} llmResponseText
|
|
433
|
-
* @param {boolean} failOnMalformedResponse
|
|
434
|
-
* @returns {Array<{ rule: string, problem: string, severity: string }>}
|
|
435
|
-
*/
|
|
436
|
-
function extractVerdict(llmResponseText, failOnMalformedResponse) {
|
|
437
|
-
const match = llmResponseText.match(/JSON_VERDICT:\s*(\[.*\])/i);
|
|
438
|
-
if (!match) {
|
|
439
|
-
console.warn('⚠️ LLM response did not include a valid JSON_VERDICT line.');
|
|
440
|
-
if (failOnMalformedResponse) {
|
|
441
|
-
console.error('❌ Failing pipeline because malformed responses are not allowed by the profile.');
|
|
442
|
-
process.exit(1);
|
|
443
|
-
}
|
|
444
|
-
return [];
|
|
445
|
-
}
|
|
446
|
-
try {
|
|
447
|
-
return JSON.parse(match[1]);
|
|
448
|
-
} catch (err) {
|
|
449
|
-
const parseError = /** @type {Error} */ (err);
|
|
450
|
-
console.error('⚠️ Failed to parse JSON_VERDICT:', parseError.message);
|
|
451
|
-
if (failOnMalformedResponse) {
|
|
452
|
-
process.exit(1);
|
|
453
|
-
}
|
|
454
|
-
return [];
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
/**
|
|
459
|
-
* @param {Array<{ rule?: string, problem?: string, severity?: string }>} violations
|
|
460
|
-
* @returns {Violation[]}
|
|
461
|
-
*/
|
|
462
|
-
function normalizeViolations(violations) {
|
|
463
|
-
return violations.map((violationItem) => ({
|
|
464
|
-
rule: String(violationItem.rule || 'Unknown Rule'),
|
|
465
|
-
problem: String(violationItem.problem || 'No problem description provided.'),
|
|
466
|
-
severity: normalizeSeverity(violationItem.severity),
|
|
467
|
-
}));
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
/**
|
|
471
|
-
* @param {{
|
|
472
|
-
* provider: string,
|
|
473
|
-
* selectedProfile: string,
|
|
474
|
-
* blockingSeverities: string[],
|
|
475
|
-
* finalViolations: Violation[],
|
|
476
|
-
* blockingFound: Violation[],
|
|
477
|
-
* isDryRun: boolean,
|
|
478
|
-
* malformedVerdict: boolean,
|
|
479
|
-
* providerError: boolean,
|
|
480
|
-
* }} payloadInput
|
|
481
|
-
* @returns {MachineReportPayload}
|
|
482
|
-
*/
|
|
483
|
-
function buildMachineReportPayload({
|
|
484
|
-
provider,
|
|
485
|
-
selectedProfile,
|
|
486
|
-
blockingSeverities,
|
|
487
|
-
finalViolations,
|
|
488
|
-
blockingFound,
|
|
489
|
-
isDryRun,
|
|
490
|
-
malformedVerdict,
|
|
491
|
-
providerError,
|
|
492
|
-
}) {
|
|
493
|
-
return {
|
|
494
|
-
generatedAt: new Date().toISOString(),
|
|
495
|
-
schemaVersion: '1.0',
|
|
496
|
-
profile: selectedProfile,
|
|
497
|
-
provider,
|
|
498
|
-
ciProvider: detectCiProvider(),
|
|
499
|
-
blockingSeverities,
|
|
500
|
-
failDecision: blockingFound.length > 0 || malformedVerdict || providerError,
|
|
501
|
-
malformedVerdict,
|
|
502
|
-
providerError,
|
|
503
|
-
dryRun: isDryRun,
|
|
504
|
-
summary: {
|
|
505
|
-
totalViolations: finalViolations.length,
|
|
506
|
-
blockingViolations: blockingFound.length,
|
|
507
|
-
},
|
|
508
|
-
violations: finalViolations,
|
|
509
|
-
};
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
// ─── MAIN ─────────────────────────────────────────────────────────────────────
|
|
41
|
+
import { IS_DRY_RUN, MAX_DIFF_CHARS } from './llm-judge/constants.mjs';
|
|
42
|
+
import { collectPullRequestDiff } from './llm-judge/diff-collection.mjs';
|
|
43
|
+
import { loadPrChecklist, loadThresholds } from './llm-judge/checklist-loader.mjs';
|
|
44
|
+
import { buildSystemPrompt, buildUserMessage } from './llm-judge/prompting.mjs';
|
|
45
|
+
import { selectAvailableProvider } from './llm-judge/providers.mjs';
|
|
46
|
+
import {
|
|
47
|
+
buildMachineReportPayload,
|
|
48
|
+
emitMachineReadableReport,
|
|
49
|
+
extractVerdict,
|
|
50
|
+
normalizeViolations,
|
|
51
|
+
} from './llm-judge/verdict.mjs';
|
|
513
52
|
|
|
514
53
|
async function main() {
|
|
515
54
|
console.log('');
|
|
@@ -632,7 +171,7 @@ async function main() {
|
|
|
632
171
|
const finalViolations = normalizeViolations(rawVerdictViolations);
|
|
633
172
|
const hasMalformedVerdict = !/JSON_VERDICT:\s*\[/i.test(llmReviewText);
|
|
634
173
|
|
|
635
|
-
const blockingFound = finalViolations.filter(v => blockingSeverities.includes(v.severity.toLowerCase()));
|
|
174
|
+
const blockingFound = finalViolations.filter((v) => blockingSeverities.includes(v.severity.toLowerCase()));
|
|
636
175
|
const machineReportPayload = buildMachineReportPayload({
|
|
637
176
|
provider: selectedProvider.providerName,
|
|
638
177
|
selectedProfile,
|
|
@@ -61,6 +61,61 @@ export function buildToolDefinitions() {
|
|
|
61
61
|
}
|
|
62
62
|
|
|
63
63
|
toolDefinitions.push(
|
|
64
|
+
{
|
|
65
|
+
name: 'lookup_rule',
|
|
66
|
+
description: 'Look up a canonical .agent-context rule section by stable rule ID.',
|
|
67
|
+
inputSchema: {
|
|
68
|
+
type: 'object',
|
|
69
|
+
properties: {
|
|
70
|
+
ruleId: {
|
|
71
|
+
type: 'string',
|
|
72
|
+
description: 'Stable rule section ID, such as ARCH-003 or API-001.',
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
required: ['ruleId'],
|
|
76
|
+
additionalProperties: false,
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: 'validate_against_rules',
|
|
81
|
+
description: 'Validate that cited rule IDs resolve to canonical rule sections.',
|
|
82
|
+
inputSchema: {
|
|
83
|
+
type: 'object',
|
|
84
|
+
properties: {
|
|
85
|
+
ruleIds: {
|
|
86
|
+
type: 'array',
|
|
87
|
+
items: { type: 'string' },
|
|
88
|
+
description: 'Rule IDs cited by a response, plan, or review.',
|
|
89
|
+
},
|
|
90
|
+
summary: {
|
|
91
|
+
type: 'string',
|
|
92
|
+
description: 'Optional one-line context for the validation request.',
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
required: ['ruleIds'],
|
|
96
|
+
additionalProperties: false,
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
name: 'audit_compliance',
|
|
101
|
+
description: 'Run a lightweight compliance audit over cited rule IDs and scope labels.',
|
|
102
|
+
inputSchema: {
|
|
103
|
+
type: 'object',
|
|
104
|
+
properties: {
|
|
105
|
+
ruleIds: {
|
|
106
|
+
type: 'array',
|
|
107
|
+
items: { type: 'string' },
|
|
108
|
+
description: 'Rule IDs used as the compliance basis.',
|
|
109
|
+
},
|
|
110
|
+
scope: {
|
|
111
|
+
type: 'string',
|
|
112
|
+
description: 'Optional changed scope label, such as api, security, testing, architecture, ui, or release.',
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
required: ['ruleIds'],
|
|
116
|
+
additionalProperties: false,
|
|
117
|
+
},
|
|
118
|
+
},
|
|
64
119
|
{
|
|
65
120
|
name: 'research_fetch',
|
|
66
121
|
description: 'Fetch external documentation/news content and return query-focused excerpts with citation metadata.',
|