@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/.agent-context/review-checklists/pr-checklist.md +1 -0
  2. package/.agent-context/rules/api-docs.md +63 -47
  3. package/.agent-context/rules/architecture.md +133 -120
  4. package/.agent-context/rules/database-design.md +36 -18
  5. package/.agent-context/rules/docker-runtime.md +66 -43
  6. package/.agent-context/rules/efficiency-vs-hype.md +38 -17
  7. package/.agent-context/rules/error-handling.md +35 -16
  8. package/.agent-context/rules/event-driven.md +35 -18
  9. package/.agent-context/rules/frontend-architecture.md +103 -76
  10. package/.agent-context/rules/git-workflow.md +81 -197
  11. package/.agent-context/rules/microservices.md +42 -41
  12. package/.agent-context/rules/naming-conv.md +27 -8
  13. package/.agent-context/rules/performance.md +32 -12
  14. package/.agent-context/rules/realtime.md +26 -9
  15. package/.agent-context/rules/security.md +39 -20
  16. package/.agent-context/rules/testing.md +36 -16
  17. package/AGENTS.md +9 -9
  18. package/README.md +10 -1
  19. package/lib/cli/commands/init.mjs +1 -0
  20. package/lib/cli/compiler.mjs +1 -0
  21. package/lib/cli/detector/constants.mjs +135 -0
  22. package/lib/cli/detector/design-evidence/collector.mjs +256 -0
  23. package/lib/cli/detector/design-evidence/constants.mjs +39 -0
  24. package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
  25. package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
  26. package/lib/cli/detector/design-evidence/summary.mjs +109 -0
  27. package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
  28. package/lib/cli/detector/design-evidence.mjs +25 -610
  29. package/lib/cli/detector/stack-detection.mjs +243 -0
  30. package/lib/cli/detector/ui-signals.mjs +150 -0
  31. package/lib/cli/detector/workspace-scan.mjs +177 -0
  32. package/lib/cli/detector.mjs +20 -688
  33. package/lib/cli/memory-continuity.mjs +1 -0
  34. package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
  35. package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +116 -0
  36. package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
  37. package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
  38. package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
  39. package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +222 -0
  40. package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
  41. package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
  42. package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
  43. package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
  44. package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
  45. package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
  46. package/lib/cli/project-scaffolder/design-contract/validation.mjs +59 -896
  47. package/lib/cli/project-scaffolder/design-contract.mjs +147 -557
  48. package/mcp.json +30 -9
  49. package/package.json +17 -2
  50. package/scripts/audit-cache-layer-contract.mjs +258 -0
  51. package/scripts/audit-caching-scope-hygiene.mjs +263 -0
  52. package/scripts/audit-file-size.mjs +219 -0
  53. package/scripts/audit-reflection-citations.mjs +163 -0
  54. package/scripts/audit-release-bundle.mjs +170 -0
  55. package/scripts/audit-rule-id-uniqueness.mjs +313 -0
  56. package/scripts/benchmark-evidence-bundle.mjs +1 -0
  57. package/scripts/build-release-benchmark-bundle.mjs +204 -0
  58. package/scripts/context-triggered-audit.mjs +1 -0
  59. package/scripts/documentation-boundary-audit.mjs +1 -0
  60. package/scripts/explain-on-demand-audit.mjs +2 -1
  61. package/scripts/frontend-usability-audit.mjs +10 -10
  62. package/scripts/llm-judge/checklist-loader.mjs +45 -0
  63. package/scripts/llm-judge/constants.mjs +66 -0
  64. package/scripts/llm-judge/diff-collection.mjs +74 -0
  65. package/scripts/llm-judge/prompting.mjs +78 -0
  66. package/scripts/llm-judge/providers.mjs +111 -0
  67. package/scripts/llm-judge/verdict.mjs +134 -0
  68. package/scripts/llm-judge.mjs +21 -482
  69. package/scripts/mcp-server/tool-registry.mjs +55 -0
  70. package/scripts/mcp-server/tools.mjs +137 -1
  71. package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
  72. package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
  73. package/scripts/migrate-rule-format/render-new.mjs +169 -0
  74. package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
  75. package/scripts/migrate-rule-format.mjs +192 -0
  76. package/scripts/release-gate/constants.mjs +1 -1
  77. package/scripts/release-gate/static-checks.mjs +1 -1
  78. package/scripts/rules-guardian-audit.mjs +5 -2
  79. package/scripts/single-source-lazy-loading-audit.mjs +2 -1
  80. package/scripts/ui-design-judge/git-input.mjs +3 -0
  81. package/scripts/validate/config.mjs +3 -2
  82. package/scripts/validate/coverage-checks.mjs +1 -1
  83. package/scripts/validate.mjs +93 -1
@@ -4,21 +4,24 @@
4
4
  /**
5
5
  * scripts/llm-judge.mjs
6
6
  *
7
- * LLM-as-a-Judge CI gate enforces pr-checklist.md on every pull request.
7
+ * LLM-as-a-Judge CI gate. Enforces pr-checklist.md on every pull request.
8
8
  *
9
9
  * Reads the git diff of the current PR, loads the PR checklist, sends both
10
10
  * to the first available LLM provider, and exits 1 when CRITICAL findings
11
11
  * exist (security gaps, N+1 queries, swallowed errors, hardcoded secrets,
12
12
  * layer boundary violations, SQL injection risks).
13
13
  *
14
+ * Implementation is split per concern under scripts/llm-judge/. This entry
15
+ * file owns the CLI orchestration only.
16
+ *
14
17
  * Supported providers (auto-selected by first available env key):
15
- * OPENAI_API_KEY gpt-4o-mini (override with LLM_JUDGE_MODEL)
16
- * ANTHROPIC_API_KEY claude-3-5-haiku-latest
17
- * GEMINI_API_KEY gemini-2.0-flash
18
+ * OPENAI_API_KEY -> gpt-4o-mini (override with LLM_JUDGE_MODEL)
19
+ * ANTHROPIC_API_KEY -> claude-3-5-haiku-latest
20
+ * GEMINI_API_KEY -> gemini-2.0-flash
18
21
  *
19
22
  * Usage:
20
- * node scripts/llm-judge.mjs auto-detect diff, call LLM
21
- * node scripts/llm-judge.mjs --dry-run print prompt, skip LLM call
23
+ * node scripts/llm-judge.mjs (auto-detect diff, call LLM)
24
+ * node scripts/llm-judge.mjs --dry-run (print prompt, skip LLM call)
22
25
  *
23
26
  * Environment variables:
24
27
  * OPENAI_API_KEY OpenAI secret key
@@ -35,481 +38,17 @@
35
38
  * Zero external dependencies — uses Node.js built-ins only (Node 18+).
36
39
  */
37
40
 
38
- import { execSync } from 'node:child_process';
39
- import { readFileSync, existsSync, writeFileSync } from 'node:fs';
40
- import { resolve, dirname } from 'node:path';
41
- import { fileURLToPath } from 'node:url';
42
-
43
- const __filename = fileURLToPath(import.meta.url);
44
- const __dirname = dirname(__filename);
45
-
46
- // ─── CONSTANTS ────────────────────────────────────────────────────────────────
47
-
48
- const REPOSITORY_ROOT = resolve(__dirname, '..');
49
- const PR_CHECKLIST_PATH = resolve(REPOSITORY_ROOT, '.agent-context/review-checklists/pr-checklist.md');
50
- const DEFAULT_MACHINE_REPORT_PATH = resolve(REPOSITORY_ROOT, '.agent-context/state/llm-judge-report.json');
51
- const MAX_DIFF_CHARS = parseInt(process.env.LLM_MAX_DIFF_CHARS ?? '12000', 10);
52
- const IS_DRY_RUN = process.argv.includes('--dry-run');
53
- const SHOULD_EMIT_MACHINE_REPORT = process.env.LLM_JUDGE_EMIT_JSON !== 'false';
54
- const MACHINE_REPORT_PATH = process.env.LLM_JUDGE_OUTPUT_PATH || DEFAULT_MACHINE_REPORT_PATH;
55
-
56
- /** @type {Record<string, string>} */
57
- const SEVERITY_NORMALIZATION_TABLE = {
58
- critical: 'critical',
59
- blocker: 'critical',
60
- severe: 'critical',
61
- high: 'high',
62
- major: 'high',
63
- medium: 'medium',
64
- moderate: 'medium',
65
- low: 'low',
66
- minor: 'low',
67
- info: 'low',
68
- informational: 'low',
69
- };
70
-
71
- /**
72
- * @typedef {{
73
- * rule: string,
74
- * problem: string,
75
- * severity: string,
76
- * }} Violation
77
- */
78
-
79
- /**
80
- * @typedef {{
81
- * generatedAt: string,
82
- * schemaVersion: string,
83
- * profile: string,
84
- * provider: string,
85
- * ciProvider: string,
86
- * blockingSeverities: string[],
87
- * failDecision: boolean,
88
- * malformedVerdict: boolean,
89
- * providerError: boolean,
90
- * dryRun: boolean,
91
- * summary: {
92
- * totalViolations: number,
93
- * blockingViolations: number,
94
- * },
95
- * violations: Violation[],
96
- * }} MachineReportPayload
97
- */
98
-
99
- function detectCiProvider() {
100
- if (process.env.GITHUB_ACTIONS === 'true') {
101
- return 'github';
102
- }
103
-
104
- if (process.env.GITLAB_CI === 'true') {
105
- return 'gitlab';
106
- }
107
-
108
- return 'local';
109
- }
110
-
111
- /**
112
- * @param {string | undefined} rawSeverityValue
113
- * @returns {string}
114
- */
115
- function normalizeSeverity(rawSeverityValue) {
116
- const normalizedSeverityKey = String(rawSeverityValue || '').trim().toLowerCase();
117
- return SEVERITY_NORMALIZATION_TABLE[normalizedSeverityKey] || 'low';
118
- }
119
-
120
- /**
121
- * @param {MachineReportPayload} machineReportPayload
122
- * @returns {string}
123
- */
124
- function formatMachineReadableLine(machineReportPayload) {
125
- return `JSON_REPORT: ${JSON.stringify(machineReportPayload)}`;
126
- }
127
-
128
- /**
129
- * @param {MachineReportPayload} machineReportPayload
130
- */
131
- function emitMachineReadableReport(machineReportPayload) {
132
- if (!SHOULD_EMIT_MACHINE_REPORT) {
133
- return;
134
- }
135
-
136
- writeFileSync(MACHINE_REPORT_PATH, `${JSON.stringify(machineReportPayload, null, 2)}\n`, 'utf-8');
137
- console.log(formatMachineReadableLine(machineReportPayload));
138
- console.log(`📎 Machine report saved: ${MACHINE_REPORT_PATH}`);
139
- }
140
-
141
- // ─── GIT DIFF COLLECTION ──────────────────────────────────────────────────────
142
-
143
- /**
144
- * Collects the pull request diff from the best available source:
145
- * 1. PR_DIFF env var (direct injection — highest priority)
146
- * 2. GitHub Actions env vars (GITHUB_BASE_SHA / GITHUB_HEAD_SHA)
147
- * 3. GitLab CI env vars (CI_MERGE_REQUEST_DIFF_BASE_SHA / CI_COMMIT_SHA)
148
- * 4. Local fallback: HEAD~1..HEAD
149
- *
150
- * @returns {string} The raw git diff output
151
- */
152
- function collectPullRequestDiff() {
153
- if (process.env.PR_DIFF) {
154
- console.log(' Source: PR_DIFF env variable');
155
- return process.env.PR_DIFF;
156
- }
157
-
158
- const execOptions = {
159
- cwd: REPOSITORY_ROOT,
160
- encoding: /** @type {'utf-8'} */ ('utf-8'),
161
- maxBuffer: 1024 * 1024 * 8, // 8 MB
162
- };
163
-
164
- // GitHub Actions: PR event injects base/head SHAs
165
- const githubBaseSha = process.env.GITHUB_BASE_SHA;
166
- const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
167
- if (githubBaseSha) {
168
- console.log(` Source: GitHub Actions diff (${githubBaseSha.slice(0, 8)}...${githubHeadSha.slice(0, 8)})`);
169
- return execSync(`git diff "${githubBaseSha}...${githubHeadSha}"`, execOptions);
170
- }
171
-
172
- // GitLab CI: merge request event provides base + head SHAs
173
- const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
174
- const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
175
- if (gitlabBaseSha) {
176
- console.log(` Source: GitLab CI diff (${gitlabBaseSha.slice(0, 8)}...${gitlabHeadSha.slice(0, 8)})`);
177
- return execSync(`git diff "${gitlabBaseSha}...${gitlabHeadSha}"`, execOptions);
178
- }
179
-
180
- // Local / fallback: last commit diff
181
- console.log(' Source: local HEAD~1..HEAD fallback');
182
- try {
183
- return execSync('git diff HEAD~1 HEAD', execOptions);
184
- } catch {
185
- try {
186
- // Initial commit has no parent — diff against empty tree
187
- const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
188
- return execSync(`git diff "${emptyTreeSha}" HEAD`, execOptions);
189
- } catch {
190
- console.warn(' ⚠️ Unable to execute git diff. Defaulting to empty diff.');
191
- return '';
192
- }
193
- }
194
- }
195
-
196
- // ─── CHECKLIST & THRESHOLDS LOADING ───────────────────────────────────────────
197
-
198
- /**
199
- * Loads and returns the PR checklist markdown content.
200
- *
201
- * @returns {string} The checklist file contents
202
- */
203
- function loadPrChecklist() {
204
- if (!existsSync(PR_CHECKLIST_PATH)) {
205
- throw new Error(`PR checklist not found at: ${PR_CHECKLIST_PATH}`);
206
- }
207
- return readFileSync(PR_CHECKLIST_PATH, 'utf-8');
208
- }
209
-
210
- /**
211
- * Loads the LLM judge thresholds.
212
- *
213
- * @returns {any} The thresholds object
214
- */
215
- function loadThresholds() {
216
- const thresholdsPath = resolve(REPOSITORY_ROOT, '.agent-context/policies/llm-judge-threshold.json');
217
- if (!existsSync(thresholdsPath)) {
218
- return {
219
- selectedProfile: 'balanced',
220
- profileThresholds: {
221
- balanced: { blockingSeverities: ['critical', 'high'], failOnMalformedResponse: true, failOnProviderError: false }
222
- }
223
- };
224
- }
225
- return JSON.parse(readFileSync(thresholdsPath, 'utf-8'));
226
- }
227
-
228
- // ─── PROMPT CONSTRUCTION ─────────────────────────────────────────────────────
229
-
230
- /**
231
- * Returns the system-level instruction for the LLM judge role.
232
- *
233
- * @returns {string}
234
- */
235
- function buildSystemPrompt() {
236
- return `You are a Senior Software Architect performing an automated code review for a CI/CD pipeline.
237
-
238
- Your job: evaluate a git diff against the provided PR checklist and identify violations.
239
- You must categorize each violation with a severity level: critical, high, medium, or low.
240
-
241
- ## Severity classification:
242
- - critical: Security vulnerabilities (hardcoded secrets, SQL/command injection, missing auth checks, CORS), unvalidated external inputs.
243
- - high: N+1 database queries, swallowed errors (empty catch blocks without re-throw/recovery), layer boundary violations.
244
- - medium: TypeScript \`any\` type used without justification, missing test coverage, bad architectural patterns.
245
- - low: Style preferences, minor naming nitpicks, documentation nitpicks, performance micro-optimizations.
246
-
247
- ## Mandatory output format:
248
- You MUST output your findings in EXACTLY this structure:
249
-
250
- \`\`\`
251
- ## PR REVIEW RESULTS
252
- ━━━━━━━━━━━━━━━━━━━
253
-
254
- ✅ [Section Name] — Passes
255
- ❌ [Section Name] — FAILS
256
- 📌 Rule: [rule file and section]
257
- ❌ Problem: [exact description of the issue found in the diff]
258
- ⚠️ Severity: [critical | high | medium | low]
259
- ✅ Fix: [specific actionable fix]
260
-
261
- \`\`\`
262
-
263
- Rules:
264
- - Then at the absolute LAST line of your response, output a JSON array of the failed checks. Each object should have 'rule', 'problem', 'severity'. If there are no failures, output an empty array [].
265
- - Make sure the JSON array is perfectly valid JSON on a single line starting with \`JSON_VERDICT: \`. For example:
266
- JSON_VERDICT: [{"rule": "Security", "problem": "Hardcoded secret", "severity": "critical"}]
267
- - If the diff is empty, contains only documentation changes, or has no source code changes, output JSON_VERDICT: [] immediately.`;
268
- }
269
-
270
- /**
271
- * Builds the user message combining the checklist and the (possibly truncated) diff.
272
- *
273
- * @param {string} prChecklistContent
274
- * @param {string} diffContent
275
- * @returns {string}
276
- */
277
- function buildUserMessage(prChecklistContent, diffContent) {
278
- const truncatedDiff =
279
- diffContent.length > MAX_DIFF_CHARS
280
- ? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED — ${(diffContent.length - MAX_DIFF_CHARS).toLocaleString()} additional characters omitted to stay within token limits]`
281
- : diffContent;
282
-
283
- return `## PR Checklist Reference
284
-
285
- ${prChecklistContent}
286
-
287
- ---
288
-
289
- ## Git Diff to Review
290
-
291
- \`\`\`diff
292
- ${truncatedDiff.trim() || '(empty diff — no source code changes detected)'}
293
- \`\`\`
294
-
295
- Review the diff against the checklist. Report your findings in the required format, ending with VERDICT: PASS ✅ or VERDICT: FAIL ❌.`;
296
- }
297
-
298
- // ─── LLM PROVIDER IMPLEMENTATIONS ────────────────────────────────────────────
299
-
300
- /**
301
- * Calls the OpenAI Chat Completions API.
302
- *
303
- * @param {string} systemPrompt
304
- * @param {string} userMessage
305
- * @returns {Promise<string>}
306
- */
307
- async function callOpenAiProvider(systemPrompt, userMessage) {
308
- const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
309
- const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
310
- method: 'POST',
311
- headers: {
312
- 'Content-Type': 'application/json',
313
- Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
314
- },
315
- body: JSON.stringify({
316
- model: selectedModel,
317
- max_tokens: 2048,
318
- temperature: 0,
319
- messages: [
320
- { role: 'system', content: systemPrompt },
321
- { role: 'user', content: userMessage },
322
- ],
323
- }),
324
- });
325
-
326
- if (!apiResponse.ok) {
327
- const errorBody = await apiResponse.text();
328
- throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
329
- }
330
-
331
- /** @type {{ choices: Array<{ message: { content: string } }> }} */
332
- const responsePayload = await apiResponse.json();
333
- return responsePayload.choices[0].message.content;
334
- }
335
-
336
- /**
337
- * Calls the Anthropic Messages API.
338
- *
339
- * @param {string} systemPrompt
340
- * @param {string} userMessage
341
- * @returns {Promise<string>}
342
- */
343
- async function callAnthropicProvider(systemPrompt, userMessage) {
344
- const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
345
- const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
346
- method: 'POST',
347
- headers: {
348
- 'Content-Type': 'application/json',
349
- 'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
350
- 'anthropic-version': '2023-06-01',
351
- },
352
- body: JSON.stringify({
353
- model: selectedModel,
354
- max_tokens: 2048,
355
- system: systemPrompt,
356
- messages: [{ role: 'user', content: userMessage }],
357
- }),
358
- });
359
-
360
- if (!apiResponse.ok) {
361
- const errorBody = await apiResponse.text();
362
- throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
363
- }
364
-
365
- /** @type {{ content: Array<{ text: string }> }} */
366
- const responsePayload = await apiResponse.json();
367
- return responsePayload.content[0].text;
368
- }
369
-
370
- /**
371
- * Calls the Google Gemini generateContent API.
372
- *
373
- * @param {string} systemPrompt
374
- * @param {string} userMessage
375
- * @returns {Promise<string>}
376
- */
377
- async function callGeminiProvider(systemPrompt, userMessage) {
378
- const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
379
- const apiKey = process.env.GEMINI_API_KEY ?? '';
380
- const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
381
-
382
- const apiResponse = await fetch(endpointUrl, {
383
- method: 'POST',
384
- headers: { 'Content-Type': 'application/json' },
385
- body: JSON.stringify({
386
- system_instruction: { parts: [{ text: systemPrompt }] },
387
- contents: [{ role: 'user', parts: [{ text: userMessage }] }],
388
- generationConfig: { temperature: 0, maxOutputTokens: 2048 },
389
- }),
390
- });
391
-
392
- if (!apiResponse.ok) {
393
- const errorBody = await apiResponse.text();
394
- throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
395
- }
396
-
397
- /** @type {{ candidates: Array<{ content: { parts: Array<{ text: string }> } }> }} */
398
- const responsePayload = await apiResponse.json();
399
- return responsePayload.candidates[0].content.parts[0].text;
400
- }
401
-
402
- // ─── PROVIDER AUTO-SELECTION ──────────────────────────────────────────────────
403
-
404
- /**
405
- * @typedef {{ providerName: string, invokeProvider: (sys: string, usr: string) => Promise<string> }} SelectedProvider
406
- */
407
-
408
- /**
409
- * Returns the first available LLM provider based on environment keys.
410
- * Priority: OpenAI → Anthropic → Gemini.
411
- *
412
- * @returns {SelectedProvider | null}
413
- */
414
- function selectAvailableProvider() {
415
- if (process.env.OPENAI_API_KEY) {
416
- return { providerName: 'OpenAI (gpt-4o-mini)', invokeProvider: callOpenAiProvider };
417
- }
418
- if (process.env.ANTHROPIC_API_KEY) {
419
- return { providerName: 'Anthropic (claude-3-5-haiku-latest)', invokeProvider: callAnthropicProvider };
420
- }
421
- if (process.env.GEMINI_API_KEY) {
422
- return { providerName: 'Google Gemini (gemini-2.0-flash)', invokeProvider: callGeminiProvider };
423
- }
424
- return null;
425
- }
426
-
427
- // ─── VERDICT PARSING ─────────────────────────────────────────────────────────
428
-
429
- /**
430
- * Extracts and parses the JSON verdict from the LLM response.
431
- *
432
- * @param {string} llmResponseText
433
- * @param {boolean} failOnMalformedResponse
434
- * @returns {Array<{ rule: string, problem: string, severity: string }>}
435
- */
436
- function extractVerdict(llmResponseText, failOnMalformedResponse) {
437
- const match = llmResponseText.match(/JSON_VERDICT:\s*(\[.*\])/i);
438
- if (!match) {
439
- console.warn('⚠️ LLM response did not include a valid JSON_VERDICT line.');
440
- if (failOnMalformedResponse) {
441
- console.error('❌ Failing pipeline because malformed responses are not allowed by the profile.');
442
- process.exit(1);
443
- }
444
- return [];
445
- }
446
- try {
447
- return JSON.parse(match[1]);
448
- } catch (err) {
449
- const parseError = /** @type {Error} */ (err);
450
- console.error('⚠️ Failed to parse JSON_VERDICT:', parseError.message);
451
- if (failOnMalformedResponse) {
452
- process.exit(1);
453
- }
454
- return [];
455
- }
456
- }
457
-
458
- /**
459
- * @param {Array<{ rule?: string, problem?: string, severity?: string }>} violations
460
- * @returns {Violation[]}
461
- */
462
- function normalizeViolations(violations) {
463
- return violations.map((violationItem) => ({
464
- rule: String(violationItem.rule || 'Unknown Rule'),
465
- problem: String(violationItem.problem || 'No problem description provided.'),
466
- severity: normalizeSeverity(violationItem.severity),
467
- }));
468
- }
469
-
470
- /**
471
- * @param {{
472
- * provider: string,
473
- * selectedProfile: string,
474
- * blockingSeverities: string[],
475
- * finalViolations: Violation[],
476
- * blockingFound: Violation[],
477
- * isDryRun: boolean,
478
- * malformedVerdict: boolean,
479
- * providerError: boolean,
480
- * }} payloadInput
481
- * @returns {MachineReportPayload}
482
- */
483
- function buildMachineReportPayload({
484
- provider,
485
- selectedProfile,
486
- blockingSeverities,
487
- finalViolations,
488
- blockingFound,
489
- isDryRun,
490
- malformedVerdict,
491
- providerError,
492
- }) {
493
- return {
494
- generatedAt: new Date().toISOString(),
495
- schemaVersion: '1.0',
496
- profile: selectedProfile,
497
- provider,
498
- ciProvider: detectCiProvider(),
499
- blockingSeverities,
500
- failDecision: blockingFound.length > 0 || malformedVerdict || providerError,
501
- malformedVerdict,
502
- providerError,
503
- dryRun: isDryRun,
504
- summary: {
505
- totalViolations: finalViolations.length,
506
- blockingViolations: blockingFound.length,
507
- },
508
- violations: finalViolations,
509
- };
510
- }
511
-
512
- // ─── MAIN ─────────────────────────────────────────────────────────────────────
41
+ import { IS_DRY_RUN, MAX_DIFF_CHARS } from './llm-judge/constants.mjs';
42
+ import { collectPullRequestDiff } from './llm-judge/diff-collection.mjs';
43
+ import { loadPrChecklist, loadThresholds } from './llm-judge/checklist-loader.mjs';
44
+ import { buildSystemPrompt, buildUserMessage } from './llm-judge/prompting.mjs';
45
+ import { selectAvailableProvider } from './llm-judge/providers.mjs';
46
+ import {
47
+ buildMachineReportPayload,
48
+ emitMachineReadableReport,
49
+ extractVerdict,
50
+ normalizeViolations,
51
+ } from './llm-judge/verdict.mjs';
513
52
 
514
53
  async function main() {
515
54
  console.log('');
@@ -632,7 +171,7 @@ async function main() {
632
171
  const finalViolations = normalizeViolations(rawVerdictViolations);
633
172
  const hasMalformedVerdict = !/JSON_VERDICT:\s*\[/i.test(llmReviewText);
634
173
 
635
- const blockingFound = finalViolations.filter(v => blockingSeverities.includes(v.severity.toLowerCase()));
174
+ const blockingFound = finalViolations.filter((v) => blockingSeverities.includes(v.severity.toLowerCase()));
636
175
  const machineReportPayload = buildMachineReportPayload({
637
176
  provider: selectedProvider.providerName,
638
177
  selectedProfile,
@@ -61,6 +61,61 @@ export function buildToolDefinitions() {
61
61
  }
62
62
 
63
63
  toolDefinitions.push(
64
+ {
65
+ name: 'lookup_rule',
66
+ description: 'Look up a canonical .agent-context rule section by stable rule ID.',
67
+ inputSchema: {
68
+ type: 'object',
69
+ properties: {
70
+ ruleId: {
71
+ type: 'string',
72
+ description: 'Stable rule section ID, such as ARCH-003 or API-001.',
73
+ },
74
+ },
75
+ required: ['ruleId'],
76
+ additionalProperties: false,
77
+ },
78
+ },
79
+ {
80
+ name: 'validate_against_rules',
81
+ description: 'Validate that cited rule IDs resolve to canonical rule sections.',
82
+ inputSchema: {
83
+ type: 'object',
84
+ properties: {
85
+ ruleIds: {
86
+ type: 'array',
87
+ items: { type: 'string' },
88
+ description: 'Rule IDs cited by a response, plan, or review.',
89
+ },
90
+ summary: {
91
+ type: 'string',
92
+ description: 'Optional one-line context for the validation request.',
93
+ },
94
+ },
95
+ required: ['ruleIds'],
96
+ additionalProperties: false,
97
+ },
98
+ },
99
+ {
100
+ name: 'audit_compliance',
101
+ description: 'Run a lightweight compliance audit over cited rule IDs and scope labels.',
102
+ inputSchema: {
103
+ type: 'object',
104
+ properties: {
105
+ ruleIds: {
106
+ type: 'array',
107
+ items: { type: 'string' },
108
+ description: 'Rule IDs used as the compliance basis.',
109
+ },
110
+ scope: {
111
+ type: 'string',
112
+ description: 'Optional changed scope label, such as api, security, testing, architecture, ui, or release.',
113
+ },
114
+ },
115
+ required: ['ruleIds'],
116
+ additionalProperties: false,
117
+ },
118
+ },
64
119
  {
65
120
  name: 'research_fetch',
66
121
  description: 'Fetch external documentation/news content and return query-focused excerpts with citation metadata.',