@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/prompts/bootstrap-design.md +3 -1
- package/.agent-context/prompts/research-design.md +165 -0
- package/.agent-context/review-checklists/pr-checklist.md +1 -0
- package/.agent-context/rules/api-docs.md +63 -47
- package/.agent-context/rules/architecture.md +133 -120
- package/.agent-context/rules/database-design.md +36 -18
- package/.agent-context/rules/docker-runtime.md +66 -43
- package/.agent-context/rules/efficiency-vs-hype.md +38 -17
- package/.agent-context/rules/error-handling.md +35 -16
- package/.agent-context/rules/event-driven.md +35 -18
- package/.agent-context/rules/frontend-architecture.md +103 -76
- package/.agent-context/rules/git-workflow.md +81 -197
- package/.agent-context/rules/microservices.md +42 -41
- package/.agent-context/rules/naming-conv.md +27 -8
- package/.agent-context/rules/performance.md +32 -12
- package/.agent-context/rules/realtime.md +26 -9
- package/.agent-context/rules/security.md +39 -20
- package/.agent-context/rules/testing.md +36 -16
- package/AGENTS.md +21 -20
- package/README.md +10 -1
- package/lib/cli/commands/init.mjs +12 -0
- package/lib/cli/commands/upgrade.mjs +11 -0
- package/lib/cli/compiler.mjs +1 -0
- package/lib/cli/detector/constants.mjs +135 -0
- package/lib/cli/detector/design-evidence/collector.mjs +256 -0
- package/lib/cli/detector/design-evidence/constants.mjs +39 -0
- package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
- package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
- package/lib/cli/detector/design-evidence/summary.mjs +109 -0
- package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
- package/lib/cli/detector/design-evidence.mjs +25 -610
- package/lib/cli/detector/stack-detection.mjs +243 -0
- package/lib/cli/detector/ui-signals.mjs +150 -0
- package/lib/cli/detector/workspace-scan.mjs +177 -0
- package/lib/cli/detector.mjs +20 -688
- package/lib/cli/memory-continuity.mjs +1 -0
- package/lib/cli/project-scaffolder/design-contract/research-dossier-migration.mjs +165 -0
- package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
- package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +233 -0
- package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
- package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
- package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
- package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +456 -0
- package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
- package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
- package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
- package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
- package/lib/cli/project-scaffolder/design-contract/validation/research-dossier-validators.mjs +104 -0
- package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
- package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
- package/lib/cli/project-scaffolder/design-contract/validation.mjs +61 -896
- package/lib/cli/project-scaffolder/design-contract.mjs +151 -556
- package/lib/cli/project-scaffolder/prompt-builders.mjs +9 -0
- package/mcp.json +30 -9
- package/package.json +17 -2
- package/scripts/audit-cache-layer-contract.mjs +258 -0
- package/scripts/audit-caching-scope-hygiene.mjs +263 -0
- package/scripts/audit-file-size.mjs +219 -0
- package/scripts/audit-reflection-citations.mjs +163 -0
- package/scripts/audit-release-bundle.mjs +170 -0
- package/scripts/audit-rule-id-uniqueness.mjs +313 -0
- package/scripts/benchmark-evidence-bundle.mjs +1 -0
- package/scripts/build-release-benchmark-bundle.mjs +204 -0
- package/scripts/context-triggered-audit.mjs +1 -0
- package/scripts/documentation-boundary-audit.mjs +1 -0
- package/scripts/explain-on-demand-audit.mjs +2 -1
- package/scripts/frontend-usability-audit.mjs +10 -10
- package/scripts/llm-judge/checklist-loader.mjs +45 -0
- package/scripts/llm-judge/constants.mjs +66 -0
- package/scripts/llm-judge/diff-collection.mjs +74 -0
- package/scripts/llm-judge/prompting.mjs +78 -0
- package/scripts/llm-judge/providers.mjs +111 -0
- package/scripts/llm-judge/verdict.mjs +134 -0
- package/scripts/llm-judge.mjs +21 -482
- package/scripts/mcp-server/tool-registry.mjs +55 -0
- package/scripts/mcp-server/tools.mjs +137 -1
- package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
- package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
- package/scripts/migrate-rule-format/render-new.mjs +169 -0
- package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
- package/scripts/migrate-rule-format.mjs +192 -0
- package/scripts/release-gate/constants.mjs +1 -1
- package/scripts/release-gate/static-checks.mjs +1 -1
- package/scripts/rules-guardian-audit.mjs +5 -2
- package/scripts/single-source-lazy-loading-audit.mjs +2 -1
- package/scripts/ui-design-judge/git-input.mjs +3 -0
- package/scripts/validate/config.mjs +27 -2
- package/scripts/validate/coverage-checks.mjs +1 -1
- package/scripts/validate.mjs +94 -1
|
@@ -26,7 +26,7 @@ const SUPPORTED_MODES = new Set([DEFAULT_MODE, 'diagnostic']);
|
|
|
26
26
|
const DEFAULT_WORKFLOW = 'standard';
|
|
27
27
|
|
|
28
28
|
const REQUIRED_ARCHITECTURE_RULE_SNIPPETS = [
|
|
29
|
-
'## Invisible State Management with Explain-on-Demand',
|
|
29
|
+
'## ARCH-006: Invisible State Management with Explain-on-Demand',
|
|
30
30
|
'Default responses must avoid unnecessary state-file internals.',
|
|
31
31
|
'State internals are exposed only on explicit user request.',
|
|
32
32
|
'Diagnostic mode explains relevant state decisions when needed.',
|
|
@@ -80,6 +80,7 @@ function runGitFileQuery(commandArguments) {
|
|
|
80
80
|
cwd: REPOSITORY_ROOT,
|
|
81
81
|
encoding: 'utf8',
|
|
82
82
|
maxBuffer: 1024 * 1024,
|
|
83
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
83
84
|
});
|
|
84
85
|
|
|
85
86
|
return parseGitFileList(rawOutput);
|
|
@@ -55,32 +55,32 @@ const REQUIRED_ARCHITECTURE_CHECKLIST_SNIPPETS = [
|
|
|
55
55
|
const REQUIRED_FRONTEND_RULE_SNIPPETS = [
|
|
56
56
|
'Frontend Design and Interaction Boundaries',
|
|
57
57
|
'Load this rule for UI-facing work. Keep the loaded surface small.',
|
|
58
|
-
'## Activation',
|
|
59
|
-
'## Authority',
|
|
58
|
+
'## FE-001: Activation',
|
|
59
|
+
'## FE-002: Authority',
|
|
60
60
|
'Treat `.agent-context/` as design governance authority.',
|
|
61
61
|
'Treat `README.md` as public and developer overview, setup, usage, and user-facing context only',
|
|
62
62
|
'Do not choose final style, framework, palette, typography, layout paradigm, or animation library offline.',
|
|
63
63
|
'Keep design continuity opt-in.',
|
|
64
64
|
'Repo evidence outranks memory residue.',
|
|
65
|
-
'## Required Design Contract',
|
|
66
|
-
'## Anti-Generic UI Gate',
|
|
65
|
+
'## FE-003: Required Design Contract',
|
|
66
|
+
'## FE-004: Anti-Generic UI Gate',
|
|
67
67
|
'Do not ship interchangeable dashboard chrome',
|
|
68
68
|
'Do not let repeated surfaces share one visual treatment by habit',
|
|
69
69
|
'Use the rename test:',
|
|
70
70
|
'decorative geometry are invalid as wallpaper',
|
|
71
|
-
'## Dynamic Anchor Gate',
|
|
72
|
-
'## Motion, Palette, and 3D',
|
|
71
|
+
'## FE-005: Dynamic Anchor Gate',
|
|
72
|
+
'## FE-006: Motion, Palette, and 3D',
|
|
73
73
|
'Treat motion, 3D, WebGL, canvas, scroll choreography, and animation libraries as first-class options.',
|
|
74
74
|
'Prefer visually exploratory, product-derived palettes while preserving WCAG contrast and status clarity.',
|
|
75
|
-
'## Responsive Mutation',
|
|
75
|
+
'## FE-008: Responsive Mutation',
|
|
76
76
|
'Responsive quality is not scale-only.',
|
|
77
77
|
'container queries',
|
|
78
|
-
'## Accessibility',
|
|
78
|
+
'## FE-009: Accessibility',
|
|
79
79
|
'WCAG 2.2 AA is the hard floor.',
|
|
80
80
|
'APCA is advisory perceptual tuning only.',
|
|
81
|
-
'## CSS Production Hardening',
|
|
81
|
+
'## FE-010: CSS Production Hardening',
|
|
82
82
|
'overflow, wrapping, truncation',
|
|
83
|
-
'## Implementation Boundaries',
|
|
83
|
+
'## FE-011: Implementation Boundaries',
|
|
84
84
|
'Do not hardcode Zustand, React Query, smart/dumb component doctrine',
|
|
85
85
|
];
|
|
86
86
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Loaders for the PR checklist content and the threshold profile JSON. The
|
|
5
|
+
* thresholds loader returns a safe balanced-profile default when the policy
|
|
6
|
+
* file is absent so the gate stays functional in fresh checkouts.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
10
|
+
|
|
11
|
+
import { PR_CHECKLIST_PATH, THRESHOLDS_PATH } from './constants.mjs';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Loads and returns the PR checklist markdown content.
|
|
15
|
+
*
|
|
16
|
+
* @returns {string} The checklist file contents
|
|
17
|
+
*/
|
|
18
|
+
export function loadPrChecklist() {
|
|
19
|
+
if (!existsSync(PR_CHECKLIST_PATH)) {
|
|
20
|
+
throw new Error(`PR checklist not found at: ${PR_CHECKLIST_PATH}`);
|
|
21
|
+
}
|
|
22
|
+
return readFileSync(PR_CHECKLIST_PATH, 'utf-8');
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Loads the LLM judge thresholds, falling back to a safe balanced-profile
|
|
27
|
+
* default when the policy file is missing.
|
|
28
|
+
*
|
|
29
|
+
* @returns {any} The thresholds object
|
|
30
|
+
*/
|
|
31
|
+
export function loadThresholds() {
|
|
32
|
+
if (!existsSync(THRESHOLDS_PATH)) {
|
|
33
|
+
return {
|
|
34
|
+
selectedProfile: 'balanced',
|
|
35
|
+
profileThresholds: {
|
|
36
|
+
balanced: {
|
|
37
|
+
blockingSeverities: ['critical', 'high'],
|
|
38
|
+
failOnMalformedResponse: true,
|
|
39
|
+
failOnProviderError: false,
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
return JSON.parse(readFileSync(THRESHOLDS_PATH, 'utf-8'));
|
|
45
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Static configuration for the LLM judge CI gate. Centralizes paths, env-driven
|
|
5
|
+
* settings, and severity normalization so the rest of the pipeline reads from a
|
|
6
|
+
* single config surface.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { resolve, dirname } from 'node:path';
|
|
10
|
+
import { fileURLToPath } from 'node:url';
|
|
11
|
+
|
|
12
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
const __dirname = dirname(__filename);
|
|
14
|
+
|
|
15
|
+
export const REPOSITORY_ROOT = resolve(__dirname, '..', '..');
|
|
16
|
+
export const PR_CHECKLIST_PATH = resolve(REPOSITORY_ROOT, '.agent-context/review-checklists/pr-checklist.md');
|
|
17
|
+
export const THRESHOLDS_PATH = resolve(REPOSITORY_ROOT, '.agent-context/policies/llm-judge-threshold.json');
|
|
18
|
+
export const DEFAULT_MACHINE_REPORT_PATH = resolve(REPOSITORY_ROOT, '.agent-context/state/llm-judge-report.json');
|
|
19
|
+
|
|
20
|
+
export const MAX_DIFF_CHARS = parseInt(process.env.LLM_MAX_DIFF_CHARS ?? '12000', 10);
|
|
21
|
+
export const IS_DRY_RUN = process.argv.includes('--dry-run');
|
|
22
|
+
export const SHOULD_EMIT_MACHINE_REPORT = process.env.LLM_JUDGE_EMIT_JSON !== 'false';
|
|
23
|
+
export const MACHINE_REPORT_PATH = process.env.LLM_JUDGE_OUTPUT_PATH || DEFAULT_MACHINE_REPORT_PATH;
|
|
24
|
+
|
|
25
|
+
/** @type {Record<string, string>} */
|
|
26
|
+
export const SEVERITY_NORMALIZATION_TABLE = {
|
|
27
|
+
critical: 'critical',
|
|
28
|
+
blocker: 'critical',
|
|
29
|
+
severe: 'critical',
|
|
30
|
+
high: 'high',
|
|
31
|
+
major: 'high',
|
|
32
|
+
medium: 'medium',
|
|
33
|
+
moderate: 'medium',
|
|
34
|
+
low: 'low',
|
|
35
|
+
minor: 'low',
|
|
36
|
+
info: 'low',
|
|
37
|
+
informational: 'low',
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* @typedef {{
|
|
42
|
+
* rule: string,
|
|
43
|
+
* problem: string,
|
|
44
|
+
* severity: string,
|
|
45
|
+
* }} Violation
|
|
46
|
+
*/
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* @typedef {{
|
|
50
|
+
* generatedAt: string,
|
|
51
|
+
* schemaVersion: string,
|
|
52
|
+
* profile: string,
|
|
53
|
+
* provider: string,
|
|
54
|
+
* ciProvider: string,
|
|
55
|
+
* blockingSeverities: string[],
|
|
56
|
+
* failDecision: boolean,
|
|
57
|
+
* malformedVerdict: boolean,
|
|
58
|
+
* providerError: boolean,
|
|
59
|
+
* dryRun: boolean,
|
|
60
|
+
* summary: {
|
|
61
|
+
* totalViolations: number,
|
|
62
|
+
* blockingViolations: number,
|
|
63
|
+
* },
|
|
64
|
+
* violations: Violation[],
|
|
65
|
+
* }} MachineReportPayload
|
|
66
|
+
*/
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Git diff collection for the LLM judge. Selects the best available diff source
|
|
5
|
+
* based on environment signals (PR_DIFF override, GitHub Actions, GitLab CI),
|
|
6
|
+
* with a local HEAD~1..HEAD fallback that handles the initial-commit edge case
|
|
7
|
+
* by diffing against Git's empty-tree object.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { execSync } from 'node:child_process';
|
|
11
|
+
|
|
12
|
+
import { REPOSITORY_ROOT } from './constants.mjs';
|
|
13
|
+
|
|
14
|
+
export function detectCiProvider() {
|
|
15
|
+
if (process.env.GITHUB_ACTIONS === 'true') {
|
|
16
|
+
return 'github';
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (process.env.GITLAB_CI === 'true') {
|
|
20
|
+
return 'gitlab';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return 'local';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Collects the pull request diff from the best available source:
|
|
28
|
+
* 1. PR_DIFF env var (direct injection, highest priority)
|
|
29
|
+
* 2. GitHub Actions env vars (GITHUB_BASE_SHA / GITHUB_HEAD_SHA)
|
|
30
|
+
* 3. GitLab CI env vars (CI_MERGE_REQUEST_DIFF_BASE_SHA / CI_COMMIT_SHA)
|
|
31
|
+
* 4. Local fallback: HEAD~1..HEAD, then empty-tree if no parent exists
|
|
32
|
+
*
|
|
33
|
+
* @returns {string} The raw git diff output
|
|
34
|
+
*/
|
|
35
|
+
export function collectPullRequestDiff() {
|
|
36
|
+
if (process.env.PR_DIFF) {
|
|
37
|
+
console.log(' Source: PR_DIFF env variable');
|
|
38
|
+
return process.env.PR_DIFF;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const execOptions = {
|
|
42
|
+
cwd: REPOSITORY_ROOT,
|
|
43
|
+
encoding: /** @type {'utf-8'} */ ('utf-8'),
|
|
44
|
+
maxBuffer: 1024 * 1024 * 8,
|
|
45
|
+
stdio: /** @type {['ignore', 'pipe', 'ignore']} */ (['ignore', 'pipe', 'ignore']),
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
const githubBaseSha = process.env.GITHUB_BASE_SHA;
|
|
49
|
+
const githubHeadSha = process.env.GITHUB_HEAD_SHA ?? 'HEAD';
|
|
50
|
+
if (githubBaseSha) {
|
|
51
|
+
console.log(` Source: GitHub Actions diff (${githubBaseSha.slice(0, 8)}...${githubHeadSha.slice(0, 8)})`);
|
|
52
|
+
return execSync(`git diff "${githubBaseSha}...${githubHeadSha}"`, execOptions);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const gitlabBaseSha = process.env.CI_MERGE_REQUEST_DIFF_BASE_SHA;
|
|
56
|
+
const gitlabHeadSha = process.env.CI_COMMIT_SHA ?? 'HEAD';
|
|
57
|
+
if (gitlabBaseSha) {
|
|
58
|
+
console.log(` Source: GitLab CI diff (${gitlabBaseSha.slice(0, 8)}...${gitlabHeadSha.slice(0, 8)})`);
|
|
59
|
+
return execSync(`git diff "${gitlabBaseSha}...${gitlabHeadSha}"`, execOptions);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
console.log(' Source: local HEAD~1..HEAD fallback');
|
|
63
|
+
try {
|
|
64
|
+
return execSync('git diff HEAD~1 HEAD', execOptions);
|
|
65
|
+
} catch {
|
|
66
|
+
try {
|
|
67
|
+
const emptyTreeSha = '4b825dc642cb6eb9a060e54bf8d69288fbee4904';
|
|
68
|
+
return execSync(`git diff "${emptyTreeSha}" HEAD`, execOptions);
|
|
69
|
+
} catch {
|
|
70
|
+
console.warn(' ⚠️ Unable to execute git diff. Defaulting to empty diff.');
|
|
71
|
+
return '';
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Prompt construction for the LLM judge. Keeps the system role and the
|
|
5
|
+
* user-message diff packaging in one place so the contract surface (severity
|
|
6
|
+
* scheme + JSON_VERDICT format) stays inspectable.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { MAX_DIFF_CHARS } from './constants.mjs';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Returns the system-level instruction for the LLM judge role.
|
|
13
|
+
*
|
|
14
|
+
* @returns {string}
|
|
15
|
+
*/
|
|
16
|
+
export function buildSystemPrompt() {
|
|
17
|
+
return `You are a Senior Software Architect performing an automated code review for a CI/CD pipeline.
|
|
18
|
+
|
|
19
|
+
Your job: evaluate a git diff against the provided PR checklist and identify violations.
|
|
20
|
+
You must categorize each violation with a severity level: critical, high, medium, or low.
|
|
21
|
+
|
|
22
|
+
## Severity classification:
|
|
23
|
+
- critical: Security vulnerabilities (hardcoded secrets, SQL/command injection, missing auth checks, CORS), unvalidated external inputs.
|
|
24
|
+
- high: N+1 database queries, swallowed errors (empty catch blocks without re-throw/recovery), layer boundary violations.
|
|
25
|
+
- medium: TypeScript \`any\` type used without justification, missing test coverage, bad architectural patterns.
|
|
26
|
+
- low: Style preferences, minor naming nitpicks, documentation nitpicks, performance micro-optimizations.
|
|
27
|
+
|
|
28
|
+
## Mandatory output format:
|
|
29
|
+
You MUST output your findings in EXACTLY this structure:
|
|
30
|
+
|
|
31
|
+
\`\`\`
|
|
32
|
+
## PR REVIEW RESULTS
|
|
33
|
+
━━━━━━━━━━━━━━━━━━━
|
|
34
|
+
|
|
35
|
+
✅ [Section Name] — Passes
|
|
36
|
+
❌ [Section Name] — FAILS
|
|
37
|
+
📌 Rule: [rule file and section]
|
|
38
|
+
❌ Problem: [exact description of the issue found in the diff]
|
|
39
|
+
⚠️ Severity: [critical | high | medium | low]
|
|
40
|
+
✅ Fix: [specific actionable fix]
|
|
41
|
+
|
|
42
|
+
\`\`\`
|
|
43
|
+
|
|
44
|
+
Rules:
|
|
45
|
+
- Then at the absolute LAST line of your response, output a JSON array of the failed checks. Each object should have 'rule', 'problem', 'severity'. If there are no failures, output an empty array [].
|
|
46
|
+
- Make sure the JSON array is perfectly valid JSON on a single line starting with \`JSON_VERDICT: \`. For example:
|
|
47
|
+
JSON_VERDICT: [{"rule": "Security", "problem": "Hardcoded secret", "severity": "critical"}]
|
|
48
|
+
- If the diff is empty, contains only documentation changes, or has no source code changes, output JSON_VERDICT: [] immediately.`;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Builds the user message combining the checklist and the (possibly truncated)
|
|
53
|
+
* diff. Truncation is annotated so the model knows the diff is partial.
|
|
54
|
+
*
|
|
55
|
+
* @param {string} prChecklistContent
|
|
56
|
+
* @param {string} diffContent
|
|
57
|
+
* @returns {string}
|
|
58
|
+
*/
|
|
59
|
+
export function buildUserMessage(prChecklistContent, diffContent) {
|
|
60
|
+
const truncatedDiff =
|
|
61
|
+
diffContent.length > MAX_DIFF_CHARS
|
|
62
|
+
? `${diffContent.slice(0, MAX_DIFF_CHARS)}\n\n[DIFF TRUNCATED — ${(diffContent.length - MAX_DIFF_CHARS).toLocaleString()} additional characters omitted to stay within token limits]`
|
|
63
|
+
: diffContent;
|
|
64
|
+
|
|
65
|
+
return `## PR Checklist Reference
|
|
66
|
+
|
|
67
|
+
${prChecklistContent}
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Git Diff to Review
|
|
72
|
+
|
|
73
|
+
\`\`\`diff
|
|
74
|
+
${truncatedDiff.trim() || '(empty diff — no source code changes detected)'}
|
|
75
|
+
\`\`\`
|
|
76
|
+
|
|
77
|
+
Review the diff against the checklist. Report your findings in the required format, ending with VERDICT: PASS ✅ or VERDICT: FAIL ❌.`;
|
|
78
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* LLM provider invocations and selection logic. Each provider sticks to its
|
|
5
|
+
* native API contract; the selection helper picks the first one whose env key
|
|
6
|
+
* is set so callers do not need provider-specific glue code.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* @typedef {{ providerName: string, invokeProvider: (sys: string, usr: string) => Promise<string> }} SelectedProvider
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
async function callOpenAiProvider(systemPrompt, userMessage) {
|
|
14
|
+
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gpt-4o-mini';
|
|
15
|
+
const apiResponse = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
16
|
+
method: 'POST',
|
|
17
|
+
headers: {
|
|
18
|
+
'Content-Type': 'application/json',
|
|
19
|
+
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
|
20
|
+
},
|
|
21
|
+
body: JSON.stringify({
|
|
22
|
+
model: selectedModel,
|
|
23
|
+
max_tokens: 2048,
|
|
24
|
+
temperature: 0,
|
|
25
|
+
messages: [
|
|
26
|
+
{ role: 'system', content: systemPrompt },
|
|
27
|
+
{ role: 'user', content: userMessage },
|
|
28
|
+
],
|
|
29
|
+
}),
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
if (!apiResponse.ok) {
|
|
33
|
+
const errorBody = await apiResponse.text();
|
|
34
|
+
throw new Error(`OpenAI API returned ${apiResponse.status}: ${errorBody}`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** @type {{ choices: Array<{ message: { content: string } }> }} */
|
|
38
|
+
const responsePayload = await apiResponse.json();
|
|
39
|
+
return responsePayload.choices[0].message.content;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async function callAnthropicProvider(systemPrompt, userMessage) {
|
|
43
|
+
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'claude-3-5-haiku-latest';
|
|
44
|
+
const apiResponse = await fetch('https://api.anthropic.com/v1/messages', {
|
|
45
|
+
method: 'POST',
|
|
46
|
+
headers: {
|
|
47
|
+
'Content-Type': 'application/json',
|
|
48
|
+
'x-api-key': process.env.ANTHROPIC_API_KEY ?? '',
|
|
49
|
+
'anthropic-version': '2023-06-01',
|
|
50
|
+
},
|
|
51
|
+
body: JSON.stringify({
|
|
52
|
+
model: selectedModel,
|
|
53
|
+
max_tokens: 2048,
|
|
54
|
+
system: systemPrompt,
|
|
55
|
+
messages: [{ role: 'user', content: userMessage }],
|
|
56
|
+
}),
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
if (!apiResponse.ok) {
|
|
60
|
+
const errorBody = await apiResponse.text();
|
|
61
|
+
throw new Error(`Anthropic API returned ${apiResponse.status}: ${errorBody}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** @type {{ content: Array<{ text: string }> }} */
|
|
65
|
+
const responsePayload = await apiResponse.json();
|
|
66
|
+
return responsePayload.content[0].text;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function callGeminiProvider(systemPrompt, userMessage) {
|
|
70
|
+
const selectedModel = process.env.LLM_JUDGE_MODEL ?? 'gemini-2.0-flash';
|
|
71
|
+
const apiKey = process.env.GEMINI_API_KEY ?? '';
|
|
72
|
+
const endpointUrl = `https://generativelanguage.googleapis.com/v1beta/models/${selectedModel}:generateContent?key=${apiKey}`;
|
|
73
|
+
|
|
74
|
+
const apiResponse = await fetch(endpointUrl, {
|
|
75
|
+
method: 'POST',
|
|
76
|
+
headers: { 'Content-Type': 'application/json' },
|
|
77
|
+
body: JSON.stringify({
|
|
78
|
+
system_instruction: { parts: [{ text: systemPrompt }] },
|
|
79
|
+
contents: [{ role: 'user', parts: [{ text: userMessage }] }],
|
|
80
|
+
generationConfig: { temperature: 0, maxOutputTokens: 2048 },
|
|
81
|
+
}),
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
if (!apiResponse.ok) {
|
|
85
|
+
const errorBody = await apiResponse.text();
|
|
86
|
+
throw new Error(`Gemini API returned ${apiResponse.status}: ${errorBody}`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** @type {{ candidates: Array<{ content: { parts: Array<{ text: string }> } }> }} */
|
|
90
|
+
const responsePayload = await apiResponse.json();
|
|
91
|
+
return responsePayload.candidates[0].content.parts[0].text;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Returns the first available LLM provider based on environment keys.
|
|
96
|
+
* Priority: OpenAI, then Anthropic, then Gemini.
|
|
97
|
+
*
|
|
98
|
+
* @returns {SelectedProvider | null}
|
|
99
|
+
*/
|
|
100
|
+
export function selectAvailableProvider() {
|
|
101
|
+
if (process.env.OPENAI_API_KEY) {
|
|
102
|
+
return { providerName: 'OpenAI (gpt-4o-mini)', invokeProvider: callOpenAiProvider };
|
|
103
|
+
}
|
|
104
|
+
if (process.env.ANTHROPIC_API_KEY) {
|
|
105
|
+
return { providerName: 'Anthropic (claude-3-5-haiku-latest)', invokeProvider: callAnthropicProvider };
|
|
106
|
+
}
|
|
107
|
+
if (process.env.GEMINI_API_KEY) {
|
|
108
|
+
return { providerName: 'Google Gemini (gemini-2.0-flash)', invokeProvider: callGeminiProvider };
|
|
109
|
+
}
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Verdict parsing and machine-readable report assembly for the LLM judge. Keeps
|
|
5
|
+
* the pipeline output deterministic so downstream gates and CI dashboards can
|
|
6
|
+
* consume a stable schema.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { writeFileSync } from 'node:fs';
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
MACHINE_REPORT_PATH,
|
|
13
|
+
SEVERITY_NORMALIZATION_TABLE,
|
|
14
|
+
SHOULD_EMIT_MACHINE_REPORT,
|
|
15
|
+
} from './constants.mjs';
|
|
16
|
+
import { detectCiProvider } from './diff-collection.mjs';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* @typedef {import('./constants.mjs').MachineReportPayload} MachineReportPayload
|
|
20
|
+
* @typedef {import('./constants.mjs').Violation} Violation
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @param {string | undefined} rawSeverityValue
|
|
25
|
+
* @returns {string}
|
|
26
|
+
*/
|
|
27
|
+
export function normalizeSeverity(rawSeverityValue) {
|
|
28
|
+
const normalizedSeverityKey = String(rawSeverityValue || '').trim().toLowerCase();
|
|
29
|
+
return SEVERITY_NORMALIZATION_TABLE[normalizedSeverityKey] || 'low';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @param {MachineReportPayload} machineReportPayload
|
|
34
|
+
* @returns {string}
|
|
35
|
+
*/
|
|
36
|
+
export function formatMachineReadableLine(machineReportPayload) {
|
|
37
|
+
return `JSON_REPORT: ${JSON.stringify(machineReportPayload)}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* @param {MachineReportPayload} machineReportPayload
|
|
42
|
+
*/
|
|
43
|
+
export function emitMachineReadableReport(machineReportPayload) {
|
|
44
|
+
if (!SHOULD_EMIT_MACHINE_REPORT) {
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
writeFileSync(MACHINE_REPORT_PATH, `${JSON.stringify(machineReportPayload, null, 2)}\n`, 'utf-8');
|
|
49
|
+
console.log(formatMachineReadableLine(machineReportPayload));
|
|
50
|
+
console.log(`📎 Machine report saved: ${MACHINE_REPORT_PATH}`);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Extracts and parses the JSON verdict from the LLM response.
|
|
55
|
+
*
|
|
56
|
+
* @param {string} llmResponseText
|
|
57
|
+
* @param {boolean} failOnMalformedResponse
|
|
58
|
+
* @returns {Array<{ rule: string, problem: string, severity: string }>}
|
|
59
|
+
*/
|
|
60
|
+
export function extractVerdict(llmResponseText, failOnMalformedResponse) {
|
|
61
|
+
const match = llmResponseText.match(/JSON_VERDICT:\s*(\[.*\])/i);
|
|
62
|
+
if (!match) {
|
|
63
|
+
console.warn('⚠️ LLM response did not include a valid JSON_VERDICT line.');
|
|
64
|
+
if (failOnMalformedResponse) {
|
|
65
|
+
console.error('❌ Failing pipeline because malformed responses are not allowed by the profile.');
|
|
66
|
+
process.exit(1);
|
|
67
|
+
}
|
|
68
|
+
return [];
|
|
69
|
+
}
|
|
70
|
+
try {
|
|
71
|
+
return JSON.parse(match[1]);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
const parseError = /** @type {Error} */ (err);
|
|
74
|
+
console.error('⚠️ Failed to parse JSON_VERDICT:', parseError.message);
|
|
75
|
+
if (failOnMalformedResponse) {
|
|
76
|
+
process.exit(1);
|
|
77
|
+
}
|
|
78
|
+
return [];
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* @param {Array<{ rule?: string, problem?: string, severity?: string }>} violations
|
|
84
|
+
* @returns {Violation[]}
|
|
85
|
+
*/
|
|
86
|
+
export function normalizeViolations(violations) {
|
|
87
|
+
return violations.map((violationItem) => ({
|
|
88
|
+
rule: String(violationItem.rule || 'Unknown Rule'),
|
|
89
|
+
problem: String(violationItem.problem || 'No problem description provided.'),
|
|
90
|
+
severity: normalizeSeverity(violationItem.severity),
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* @param {{
|
|
96
|
+
* provider: string,
|
|
97
|
+
* selectedProfile: string,
|
|
98
|
+
* blockingSeverities: string[],
|
|
99
|
+
* finalViolations: Violation[],
|
|
100
|
+
* blockingFound: Violation[],
|
|
101
|
+
* isDryRun: boolean,
|
|
102
|
+
* malformedVerdict: boolean,
|
|
103
|
+
* providerError: boolean,
|
|
104
|
+
* }} payloadInput
|
|
105
|
+
* @returns {MachineReportPayload}
|
|
106
|
+
*/
|
|
107
|
+
export function buildMachineReportPayload({
|
|
108
|
+
provider,
|
|
109
|
+
selectedProfile,
|
|
110
|
+
blockingSeverities,
|
|
111
|
+
finalViolations,
|
|
112
|
+
blockingFound,
|
|
113
|
+
isDryRun,
|
|
114
|
+
malformedVerdict,
|
|
115
|
+
providerError,
|
|
116
|
+
}) {
|
|
117
|
+
return {
|
|
118
|
+
generatedAt: new Date().toISOString(),
|
|
119
|
+
schemaVersion: '1.0',
|
|
120
|
+
profile: selectedProfile,
|
|
121
|
+
provider,
|
|
122
|
+
ciProvider: detectCiProvider(),
|
|
123
|
+
blockingSeverities,
|
|
124
|
+
failDecision: blockingFound.length > 0 || malformedVerdict || providerError,
|
|
125
|
+
malformedVerdict,
|
|
126
|
+
providerError,
|
|
127
|
+
dryRun: isDryRun,
|
|
128
|
+
summary: {
|
|
129
|
+
totalViolations: finalViolations.length,
|
|
130
|
+
blockingViolations: blockingFound.length,
|
|
131
|
+
},
|
|
132
|
+
violations: finalViolations,
|
|
133
|
+
};
|
|
134
|
+
}
|