@ryuenn3123/agentic-senior-core 1.8.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/blueprints/mobile-app.md +21 -0
- package/.agent-context/review-checklists/frontend-skill-parity.md +28 -0
- package/.agent-context/skills/README.md +63 -0
- package/.agent-context/skills/backend/README.md +68 -0
- package/.agent-context/skills/backend/architecture.md +361 -0
- package/.agent-context/skills/backend/data-access.md +231 -0
- package/.agent-context/skills/backend/errors.md +138 -0
- package/.agent-context/skills/backend/validation.md +117 -0
- package/.agent-context/skills/backend.md +29 -0
- package/.agent-context/skills/cli/README.md +50 -0
- package/.agent-context/skills/cli/init.md +38 -0
- package/.agent-context/skills/cli/output.md +36 -0
- package/.agent-context/skills/cli/upgrade.md +38 -0
- package/.agent-context/skills/cli.md +29 -0
- package/.agent-context/skills/distribution/README.md +19 -0
- package/.agent-context/skills/distribution/compatibility.md +32 -0
- package/.agent-context/skills/distribution/publish.md +37 -0
- package/.agent-context/skills/distribution/rollback.md +32 -0
- package/.agent-context/skills/distribution.md +29 -0
- package/.agent-context/skills/frontend/README.md +36 -0
- package/.agent-context/skills/frontend/accessibility.md +107 -0
- package/.agent-context/skills/frontend/motion.md +67 -0
- package/.agent-context/skills/frontend/performance.md +63 -0
- package/.agent-context/skills/frontend/ui-architecture.md +128 -0
- package/.agent-context/skills/frontend.md +30 -0
- package/.agent-context/skills/fullstack/README.md +19 -0
- package/.agent-context/skills/fullstack/contracts.md +53 -0
- package/.agent-context/skills/fullstack/end-to-end.md +42 -0
- package/.agent-context/skills/fullstack/feature-slicing.md +65 -0
- package/.agent-context/skills/fullstack.md +27 -0
- package/.agent-context/skills/index.json +107 -0
- package/.agent-context/skills/review-quality/README.md +19 -0
- package/.agent-context/skills/review-quality/benchmark.md +30 -0
- package/.agent-context/skills/review-quality/planning.md +38 -0
- package/.agent-context/skills/review-quality/security.md +34 -0
- package/.agent-context/skills/review-quality.md +28 -0
- package/.agent-context/stacks/flutter.md +16 -0
- package/.agent-context/stacks/react-native.md +16 -0
- package/.agent-context/state/benchmark-analysis.json +431 -0
- package/.agent-context/state/benchmark-thresholds.json +10 -0
- package/.agent-context/state/benchmark-watchlist.json +19 -0
- package/.agent-context/state/skill-platform.json +38 -0
- package/.cursorrules +1 -1
- package/.github/workflows/benchmark-intelligence.yml +50 -0
- package/.windsurfrules +1 -1
- package/README.md +81 -2
- package/bin/agentic-senior-core.js +412 -3
- package/package.json +4 -2
- package/scripts/benchmark-gate.mjs +121 -0
- package/scripts/benchmark-intelligence.mjs +140 -0
- package/scripts/skill-tier-policy.mjs +76 -0
- package/scripts/validate.mjs +82 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* benchmark-gate.mjs
|
|
5
|
+
*
|
|
6
|
+
* Anti-regression gate for benchmark quality signals.
|
|
7
|
+
* Fails when benchmark metrics drop below configured thresholds.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
11
|
+
import { dirname, join, resolve } from 'node:path';
|
|
12
|
+
import { fileURLToPath } from 'node:url';
|
|
13
|
+
import { execFileSync } from 'node:child_process';
|
|
14
|
+
|
|
15
|
+
const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
|
|
16
|
+
const SCRIPT_DIR = dirname(SCRIPT_FILE_PATH);
|
|
17
|
+
const REPOSITORY_ROOT = resolve(SCRIPT_DIR, '..');
|
|
18
|
+
const BENCHMARK_THRESHOLD_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'benchmark-thresholds.json');
|
|
19
|
+
const DETECTION_BENCHMARK_PATH = join(REPOSITORY_ROOT, 'scripts', 'detection-benchmark.mjs');
|
|
20
|
+
|
|
21
|
+
function readThresholdConfiguration() {
|
|
22
|
+
if (!existsSync(BENCHMARK_THRESHOLD_PATH)) {
|
|
23
|
+
return {
|
|
24
|
+
minimumTop1Accuracy: 0.9,
|
|
25
|
+
maximumManualCorrectionRate: 0.12,
|
|
26
|
+
maximumTop1AccuracyDrop: 0.02,
|
|
27
|
+
maximumManualCorrectionIncrease: 0.03,
|
|
28
|
+
previousReleaseBaseline: {
|
|
29
|
+
top1Accuracy: 0.9167,
|
|
30
|
+
manualCorrectionRate: 0.0833,
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return JSON.parse(readFileSync(BENCHMARK_THRESHOLD_PATH, 'utf8'));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function runDetectionBenchmark() {
|
|
39
|
+
const benchmarkRawOutput = execFileSync('node', [DETECTION_BENCHMARK_PATH], {
|
|
40
|
+
cwd: REPOSITORY_ROOT,
|
|
41
|
+
encoding: 'utf8',
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
return JSON.parse(benchmarkRawOutput);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function buildCheckResult(checkName, passed, details) {
|
|
48
|
+
return {
|
|
49
|
+
checkName,
|
|
50
|
+
passed,
|
|
51
|
+
details,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function runBenchmarkGate() {
|
|
56
|
+
const thresholdConfiguration = readThresholdConfiguration();
|
|
57
|
+
const benchmarkResult = runDetectionBenchmark();
|
|
58
|
+
const benchmarkChecks = [];
|
|
59
|
+
|
|
60
|
+
const top1AccuracyPassed = benchmarkResult.top1Accuracy >= thresholdConfiguration.minimumTop1Accuracy;
|
|
61
|
+
benchmarkChecks.push(
|
|
62
|
+
buildCheckResult(
|
|
63
|
+
'minimum-top1-accuracy',
|
|
64
|
+
top1AccuracyPassed,
|
|
65
|
+
`top1Accuracy=${benchmarkResult.top1Accuracy} minimum=${thresholdConfiguration.minimumTop1Accuracy}`,
|
|
66
|
+
),
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
const manualCorrectionPassed = benchmarkResult.manualCorrectionRate <= thresholdConfiguration.maximumManualCorrectionRate;
|
|
70
|
+
benchmarkChecks.push(
|
|
71
|
+
buildCheckResult(
|
|
72
|
+
'maximum-manual-correction-rate',
|
|
73
|
+
manualCorrectionPassed,
|
|
74
|
+
`manualCorrectionRate=${benchmarkResult.manualCorrectionRate} maximum=${thresholdConfiguration.maximumManualCorrectionRate}`,
|
|
75
|
+
),
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const previousReleaseBaseline = thresholdConfiguration.previousReleaseBaseline;
|
|
79
|
+
if (previousReleaseBaseline && typeof previousReleaseBaseline === 'object') {
|
|
80
|
+
const top1AccuracyDrop = Number((previousReleaseBaseline.top1Accuracy - benchmarkResult.top1Accuracy).toFixed(4));
|
|
81
|
+
const manualCorrectionIncrease = Number((benchmarkResult.manualCorrectionRate - previousReleaseBaseline.manualCorrectionRate).toFixed(4));
|
|
82
|
+
|
|
83
|
+
const top1AccuracyDropPassed = top1AccuracyDrop <= thresholdConfiguration.maximumTop1AccuracyDrop;
|
|
84
|
+
benchmarkChecks.push(
|
|
85
|
+
buildCheckResult(
|
|
86
|
+
'maximum-top1-accuracy-drop',
|
|
87
|
+
top1AccuracyDropPassed,
|
|
88
|
+
`drop=${top1AccuracyDrop} maximum=${thresholdConfiguration.maximumTop1AccuracyDrop}`,
|
|
89
|
+
),
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
const manualCorrectionIncreasePassed = manualCorrectionIncrease <= thresholdConfiguration.maximumManualCorrectionIncrease;
|
|
93
|
+
benchmarkChecks.push(
|
|
94
|
+
buildCheckResult(
|
|
95
|
+
'maximum-manual-correction-increase',
|
|
96
|
+
manualCorrectionIncreasePassed,
|
|
97
|
+
`increase=${manualCorrectionIncrease} maximum=${thresholdConfiguration.maximumManualCorrectionIncrease}`,
|
|
98
|
+
),
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const failedCheckCount = benchmarkChecks.filter((benchmarkCheck) => !benchmarkCheck.passed).length;
|
|
103
|
+
const benchmarkGateReport = {
|
|
104
|
+
generatedAt: new Date().toISOString(),
|
|
105
|
+
gateName: 'benchmark-gate',
|
|
106
|
+
passed: failedCheckCount === 0,
|
|
107
|
+
failureCount: failedCheckCount,
|
|
108
|
+
benchmarkResult: {
|
|
109
|
+
fixtureCount: benchmarkResult.fixtureCount,
|
|
110
|
+
top1Accuracy: benchmarkResult.top1Accuracy,
|
|
111
|
+
manualCorrectionRate: benchmarkResult.manualCorrectionRate,
|
|
112
|
+
},
|
|
113
|
+
thresholds: thresholdConfiguration,
|
|
114
|
+
results: benchmarkChecks,
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
console.log(JSON.stringify(benchmarkGateReport, null, 2));
|
|
118
|
+
process.exit(benchmarkGateReport.passed ? 0 : 1);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
runBenchmarkGate();
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* benchmark-intelligence.mjs
|
|
5
|
+
*
|
|
6
|
+
* Competitive intelligence cadence validator.
|
|
7
|
+
* Ensures benchmark watchlist coverage and review freshness.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
11
|
+
import { dirname, join, resolve } from 'node:path';
|
|
12
|
+
import { fileURLToPath } from 'node:url';
|
|
13
|
+
|
|
14
|
+
const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
|
|
15
|
+
const SCRIPT_DIR = dirname(SCRIPT_FILE_PATH);
|
|
16
|
+
const REPOSITORY_ROOT = resolve(SCRIPT_DIR, '..');
|
|
17
|
+
const WATCHLIST_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'benchmark-watchlist.json');
|
|
18
|
+
const REVIEW_SLA_DAYS = 14;
|
|
19
|
+
const REQUIRED_BENCHMARK_REPOSITORIES = new Set([
|
|
20
|
+
'sickn33/antigravity-awesome-skills',
|
|
21
|
+
'github/awesome-copilot',
|
|
22
|
+
'MiniMax-AI/skills',
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
function parseDateOrNull(rawDateValue) {
|
|
26
|
+
if (typeof rawDateValue !== 'string') {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (!/^\d{4}-\d{2}-\d{2}$/.test(rawDateValue)) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const parsedDate = new Date(`${rawDateValue}T00:00:00.000Z`);
|
|
35
|
+
return Number.isNaN(parsedDate.getTime()) ? null : parsedDate;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function calculateAgeInDays(referenceDate, targetDate) {
|
|
39
|
+
const ageInMilliseconds = referenceDate.getTime() - targetDate.getTime();
|
|
40
|
+
return Math.floor(ageInMilliseconds / (1000 * 60 * 60 * 24));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function loadWatchlistConfiguration() {
|
|
44
|
+
if (!existsSync(WATCHLIST_PATH)) {
|
|
45
|
+
return { repositories: [] };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return JSON.parse(readFileSync(WATCHLIST_PATH, 'utf8'));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function runIntelligenceValidation() {
|
|
52
|
+
const watchlistConfiguration = loadWatchlistConfiguration();
|
|
53
|
+
const watchlistEntries = Array.isArray(watchlistConfiguration.repositories)
|
|
54
|
+
? watchlistConfiguration.repositories
|
|
55
|
+
: [];
|
|
56
|
+
const validationResults = [];
|
|
57
|
+
const currentDate = new Date();
|
|
58
|
+
|
|
59
|
+
const trackedRepositoryNames = new Set();
|
|
60
|
+
for (const watchlistEntry of watchlistEntries) {
|
|
61
|
+
trackedRepositoryNames.add(watchlistEntry.repository);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
for (const requiredRepositoryName of REQUIRED_BENCHMARK_REPOSITORIES) {
|
|
65
|
+
const hasRequiredRepository = trackedRepositoryNames.has(requiredRepositoryName);
|
|
66
|
+
validationResults.push({
|
|
67
|
+
checkName: 'required-benchmark-repository',
|
|
68
|
+
repository: requiredRepositoryName,
|
|
69
|
+
passed: hasRequiredRepository,
|
|
70
|
+
details: hasRequiredRepository
|
|
71
|
+
? `${requiredRepositoryName} is present in watchlist`
|
|
72
|
+
: `${requiredRepositoryName} is missing from watchlist`,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const watchlistReport = watchlistEntries.map((watchlistEntry) => {
|
|
77
|
+
const repositoryName = watchlistEntry.repository;
|
|
78
|
+
const repositoryOwner = watchlistEntry.owner;
|
|
79
|
+
const lastReviewedDate = parseDateOrNull(watchlistEntry.lastReviewedAt);
|
|
80
|
+
|
|
81
|
+
const hasOwner = typeof repositoryOwner === 'string' && repositoryOwner.trim().length > 0;
|
|
82
|
+
validationResults.push({
|
|
83
|
+
checkName: 'watchlist-owner-defined',
|
|
84
|
+
repository: repositoryName,
|
|
85
|
+
passed: hasOwner,
|
|
86
|
+
details: hasOwner ? `Owner ${repositoryOwner} is defined` : 'Owner is missing',
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
if (!lastReviewedDate) {
|
|
90
|
+
validationResults.push({
|
|
91
|
+
checkName: 'review-date-format',
|
|
92
|
+
repository: repositoryName,
|
|
93
|
+
passed: false,
|
|
94
|
+
details: `Invalid or missing lastReviewedAt: ${String(watchlistEntry.lastReviewedAt)}`,
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
repository: repositoryName,
|
|
99
|
+
owner: repositoryOwner,
|
|
100
|
+
lastReviewedAt: watchlistEntry.lastReviewedAt,
|
|
101
|
+
ageInDays: null,
|
|
102
|
+
stale: true,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const reviewAgeInDays = calculateAgeInDays(currentDate, lastReviewedDate);
|
|
107
|
+
const reviewWithinSla = reviewAgeInDays <= REVIEW_SLA_DAYS;
|
|
108
|
+
|
|
109
|
+
validationResults.push({
|
|
110
|
+
checkName: 'review-sla-compliance',
|
|
111
|
+
repository: repositoryName,
|
|
112
|
+
passed: reviewWithinSla,
|
|
113
|
+
details: `ageInDays=${reviewAgeInDays} slaDays=${REVIEW_SLA_DAYS}`,
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
repository: repositoryName,
|
|
118
|
+
owner: repositoryOwner,
|
|
119
|
+
lastReviewedAt: watchlistEntry.lastReviewedAt,
|
|
120
|
+
ageInDays: reviewAgeInDays,
|
|
121
|
+
stale: !reviewWithinSla,
|
|
122
|
+
};
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
const failedCheckCount = validationResults.filter((validationResult) => !validationResult.passed).length;
|
|
126
|
+
const intelligenceReport = {
|
|
127
|
+
generatedAt: new Date().toISOString(),
|
|
128
|
+
reportName: 'benchmark-intelligence',
|
|
129
|
+
passed: failedCheckCount === 0,
|
|
130
|
+
failureCount: failedCheckCount,
|
|
131
|
+
reviewSlaDays: REVIEW_SLA_DAYS,
|
|
132
|
+
watchlist: watchlistReport,
|
|
133
|
+
results: validationResults,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
console.log(JSON.stringify(intelligenceReport, null, 2));
|
|
137
|
+
process.exit(intelligenceReport.passed ? 0 : 1);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
runIntelligenceValidation();
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
export const SKILL_TIER_MINIMUMS = {
|
|
2
|
+
standard: { minWords: 60, minHeadings: 1, minChecklistItems: 0, minCodeBlocks: 0 },
|
|
3
|
+
advance: { minWords: 100, minHeadings: 2, minChecklistItems: 1, minCodeBlocks: 0 },
|
|
4
|
+
expert: { minWords: 130, minHeadings: 3, minChecklistItems: 1, minCodeBlocks: 0 },
|
|
5
|
+
above: { minWords: 240, minHeadings: 3, minChecklistItems: 1, minCodeBlocks: 1 },
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export function countWords(markdownContent) {
|
|
9
|
+
return markdownContent
|
|
10
|
+
.replace(/```[\s\S]*?```/g, ' ')
|
|
11
|
+
.replace(/[^A-Za-z0-9_\-\s]/g, ' ')
|
|
12
|
+
.trim()
|
|
13
|
+
.split(/\s+/)
|
|
14
|
+
.filter(Boolean).length;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function countMarkdownHeadings(markdownContent) {
|
|
18
|
+
const headingMatches = markdownContent.match(/^#{2,6}\s+/gm);
|
|
19
|
+
return headingMatches ? headingMatches.length : 0;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function countChecklistItems(markdownContent) {
|
|
23
|
+
const checklistMatches = markdownContent.match(/^\s*[-*]\s+\[[ xX]\]\s+/gm);
|
|
24
|
+
return checklistMatches ? checklistMatches.length : 0;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function countCodeBlocks(markdownContent) {
|
|
28
|
+
const fenceMatches = markdownContent.match(/```/g);
|
|
29
|
+
if (!fenceMatches) {
|
|
30
|
+
return 0;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return Math.floor(fenceMatches.length / 2);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function extractSkillTier(markdownContent) {
|
|
37
|
+
const normalizedMarkdownContent = markdownContent.replace(/\*\*/g, '');
|
|
38
|
+
const tierMatch = normalizedMarkdownContent.match(/\bTier\s*:\s*`?(standard|advance|expert|above)`?\b/i);
|
|
39
|
+
return tierMatch ? tierMatch[1].toLowerCase() : null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function validateSkillTopicContent(markdownContent) {
|
|
43
|
+
const detectedTier = extractSkillTier(markdownContent);
|
|
44
|
+
|
|
45
|
+
if (!detectedTier) {
|
|
46
|
+
return { isValid: false, reason: 'missing-tier' };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const minimumRules = SKILL_TIER_MINIMUMS[detectedTier];
|
|
50
|
+
if (!minimumRules) {
|
|
51
|
+
return { isValid: false, reason: 'unsupported-tier', detectedTier };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const wordCount = countWords(markdownContent);
|
|
55
|
+
const headingCount = countMarkdownHeadings(markdownContent);
|
|
56
|
+
const checklistCount = countChecklistItems(markdownContent);
|
|
57
|
+
const codeBlockCount = countCodeBlocks(markdownContent);
|
|
58
|
+
|
|
59
|
+
if (wordCount < minimumRules.minWords) {
|
|
60
|
+
return { isValid: false, reason: 'word-count', detectedTier, wordCount, minimumRules };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (headingCount < minimumRules.minHeadings) {
|
|
64
|
+
return { isValid: false, reason: 'heading-count', detectedTier, headingCount, minimumRules };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (checklistCount < minimumRules.minChecklistItems) {
|
|
68
|
+
return { isValid: false, reason: 'checklist-count', detectedTier, checklistCount, minimumRules };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (codeBlockCount < minimumRules.minCodeBlocks) {
|
|
72
|
+
return { isValid: false, reason: 'code-block-count', detectedTier, codeBlockCount, minimumRules };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return { isValid: true, detectedTier, wordCount, headingCount, checklistCount, codeBlockCount, minimumRules };
|
|
76
|
+
}
|
package/scripts/validate.mjs
CHANGED
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import { readdir, readFile, stat } from 'node:fs/promises';
|
|
17
17
|
import { dirname, join, relative, resolve } from 'node:path';
|
|
18
18
|
import { fileURLToPath } from 'node:url';
|
|
19
|
+
import { validateSkillTopicContent } from './skill-tier-policy.mjs';
|
|
19
20
|
|
|
20
21
|
const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
|
|
21
22
|
const ROOT_DIR = resolve(dirname(SCRIPT_FILE_PATH), '..');
|
|
@@ -25,6 +26,7 @@ const CHANGELOG_PATH = join(ROOT_DIR, 'CHANGELOG.md');
|
|
|
25
26
|
const README_PATH = join(ROOT_DIR, 'README.md');
|
|
26
27
|
const POLICY_FILE_PATH = join(ROOT_DIR, '.agent-context', 'policies', 'llm-judge-threshold.json');
|
|
27
28
|
const OVERRIDE_FILE_PATH = join(ROOT_DIR, '.agent-override.md');
|
|
29
|
+
const SKILLS_DIR = join(AGENT_CONTEXT_DIR, 'skills');
|
|
28
30
|
const GENERATED_RULE_FILES = ['.cursorrules', '.windsurfrules'];
|
|
29
31
|
const ALLOWED_SEVERITIES = new Set(['critical', 'high', 'medium', 'low']);
|
|
30
32
|
const OVERRIDE_WARNING_WINDOW_DAYS = 30;
|
|
@@ -101,6 +103,8 @@ async function validateRequiredFiles() {
|
|
|
101
103
|
'scripts/validate.mjs',
|
|
102
104
|
'scripts/llm-judge.mjs',
|
|
103
105
|
'scripts/detection-benchmark.mjs',
|
|
106
|
+
'scripts/benchmark-gate.mjs',
|
|
107
|
+
'scripts/benchmark-intelligence.mjs',
|
|
104
108
|
'scripts/frontend-usability-audit.mjs',
|
|
105
109
|
'scripts/release-gate.mjs',
|
|
106
110
|
'scripts/generate-sbom.mjs',
|
|
@@ -121,8 +125,13 @@ async function validateRequiredFiles() {
|
|
|
121
125
|
'docs/v1.7-execution-playbook.md',
|
|
122
126
|
'docs/v1.7-issue-breakdown.md',
|
|
123
127
|
'docs/v1.8-operations-playbook.md',
|
|
128
|
+
'docs/v2-upgrade-playbook.md',
|
|
129
|
+
'.agent-context/state/benchmark-watchlist.json',
|
|
130
|
+
'.agent-context/state/skill-platform.json',
|
|
131
|
+
'.agent-context/skills/index.json',
|
|
124
132
|
'.github/workflows/release-gate.yml',
|
|
125
133
|
'.github/workflows/sbom-compliance.yml',
|
|
134
|
+
'.github/workflows/benchmark-intelligence.yml',
|
|
126
135
|
'tests/cli-smoke.test.mjs',
|
|
127
136
|
'tests/llm-judge.test.mjs',
|
|
128
137
|
'tests/enterprise-ops.test.mjs',
|
|
@@ -204,10 +213,24 @@ async function validateRuleFiles() {
|
|
|
204
213
|
'profiles/platform.md',
|
|
205
214
|
'review-checklists/pr-checklist.md',
|
|
206
215
|
'review-checklists/frontend-usability.md',
|
|
216
|
+
'review-checklists/frontend-skill-parity.md',
|
|
207
217
|
'review-checklists/release-operations.md',
|
|
208
218
|
'review-checklists/security-audit.md',
|
|
209
219
|
'review-checklists/performance-audit.md',
|
|
210
220
|
'review-checklists/architecture-review.md',
|
|
221
|
+
'skills/README.md',
|
|
222
|
+
'skills/frontend/README.md',
|
|
223
|
+
'skills/backend/README.md',
|
|
224
|
+
'skills/fullstack/README.md',
|
|
225
|
+
'skills/cli/README.md',
|
|
226
|
+
'skills/distribution/README.md',
|
|
227
|
+
'skills/review-quality/README.md',
|
|
228
|
+
'skills/frontend.md',
|
|
229
|
+
'skills/backend.md',
|
|
230
|
+
'skills/fullstack.md',
|
|
231
|
+
'skills/cli.md',
|
|
232
|
+
'skills/distribution.md',
|
|
233
|
+
'skills/review-quality.md',
|
|
211
234
|
'state/architecture-map.md',
|
|
212
235
|
'state/dependency-map.md',
|
|
213
236
|
];
|
|
@@ -230,6 +253,63 @@ async function validateRuleFiles() {
|
|
|
230
253
|
}
|
|
231
254
|
}
|
|
232
255
|
|
|
256
|
+
async function validateSkillTierQuality() {
|
|
257
|
+
console.log('\nChecking skill tier quality...');
|
|
258
|
+
|
|
259
|
+
const skillMarkdownFiles = await collectFiles(SKILLS_DIR, (fileName) => fileName.endsWith('.md'));
|
|
260
|
+
const scopedSkillTopicFiles = skillMarkdownFiles.filter((skillFilePath) => {
|
|
261
|
+
if (skillFilePath.endsWith('README.md')) {
|
|
262
|
+
return false;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const relativeSkillPath = relative(SKILLS_DIR, skillFilePath);
|
|
266
|
+
return /[\\/]/.test(relativeSkillPath);
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
for (const skillTopicPath of scopedSkillTopicFiles) {
|
|
270
|
+
const skillTopicContent = await readTextFile(skillTopicPath);
|
|
271
|
+
const relativeSkillTopicPath = relative(ROOT_DIR, skillTopicPath);
|
|
272
|
+
const validationResult = validateSkillTopicContent(skillTopicContent);
|
|
273
|
+
|
|
274
|
+
if (!validationResult.isValid) {
|
|
275
|
+
if (validationResult.reason === 'missing-tier') {
|
|
276
|
+
fail(`${relativeSkillTopicPath} is missing explicit Tier metadata`);
|
|
277
|
+
continue;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (validationResult.reason === 'unsupported-tier') {
|
|
281
|
+
fail(`${relativeSkillTopicPath} has unsupported tier: ${validationResult.detectedTier}`);
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
if (validationResult.reason === 'word-count') {
|
|
286
|
+
fail(`${relativeSkillTopicPath} tier ${validationResult.detectedTier} must include at least ${validationResult.minimumRules.minWords} words (found ${validationResult.wordCount})`);
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (validationResult.reason === 'heading-count') {
|
|
291
|
+
fail(`${relativeSkillTopicPath} tier ${validationResult.detectedTier} must include at least ${validationResult.minimumRules.minHeadings} section headings (found ${validationResult.headingCount})`);
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (validationResult.reason === 'checklist-count') {
|
|
296
|
+
fail(`${relativeSkillTopicPath} tier ${validationResult.detectedTier} must include at least ${validationResult.minimumRules.minChecklistItems} checklist item(s) (found ${validationResult.checklistCount})`);
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (validationResult.reason === 'code-block-count') {
|
|
301
|
+
fail(`${relativeSkillTopicPath} tier ${validationResult.detectedTier} must include at least ${validationResult.minimumRules.minCodeBlocks} code block(s) (found ${validationResult.codeBlockCount})`);
|
|
302
|
+
continue;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
fail(`${relativeSkillTopicPath} failed tier validation`);
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
pass(`${relativeSkillTopicPath} tier ${validationResult.detectedTier} quality gate passed`);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
233
313
|
function stripMarkdownCodeBlocks(markdownText) {
|
|
234
314
|
return markdownText.replace(/```[\s\S]*?```/g, '');
|
|
235
315
|
}
|
|
@@ -484,6 +564,7 @@ async function validateDocumentationFlow() {
|
|
|
484
564
|
'npm run validate',
|
|
485
565
|
'docs/faq.md',
|
|
486
566
|
'docs/deep-dive.md',
|
|
567
|
+
'docs/v2-upgrade-playbook.md',
|
|
487
568
|
];
|
|
488
569
|
|
|
489
570
|
for (const requiredReadmeSnippet of requiredReadmeSnippets) {
|
|
@@ -523,6 +604,7 @@ async function main() {
|
|
|
523
604
|
await validateRequiredFiles();
|
|
524
605
|
await validateMarkdownFiles();
|
|
525
606
|
await validateRuleFiles();
|
|
607
|
+
await validateSkillTierQuality();
|
|
526
608
|
await validateOverrideGovernance();
|
|
527
609
|
await validateAgentsManifest();
|
|
528
610
|
await validateCrossReferences();
|