spec-gen-cli 1.2.6 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +175 -55
- package/dist/api/analyze.d.ts.map +1 -1
- package/dist/api/analyze.js +6 -1
- package/dist/api/analyze.js.map +1 -1
- package/dist/api/audit.d.ts +10 -0
- package/dist/api/audit.d.ts.map +1 -0
- package/dist/api/audit.js +117 -0
- package/dist/api/audit.js.map +1 -0
- package/dist/api/generate.d.ts.map +1 -1
- package/dist/api/generate.js +10 -1
- package/dist/api/generate.js.map +1 -1
- package/dist/api/index.d.ts +3 -2
- package/dist/api/index.d.ts.map +1 -1
- package/dist/api/index.js +1 -0
- package/dist/api/index.js.map +1 -1
- package/dist/api/run.d.ts.map +1 -1
- package/dist/api/run.js +5 -1
- package/dist/api/run.js.map +1 -1
- package/dist/api/types.d.ts +15 -4
- package/dist/api/types.d.ts.map +1 -1
- package/dist/cli/commands/analyze.d.ts +3 -0
- package/dist/cli/commands/analyze.d.ts.map +1 -1
- package/dist/cli/commands/analyze.js +112 -17
- package/dist/cli/commands/analyze.js.map +1 -1
- package/dist/cli/commands/audit.d.ts +9 -0
- package/dist/cli/commands/audit.d.ts.map +1 -0
- package/dist/cli/commands/audit.js +98 -0
- package/dist/cli/commands/audit.js.map +1 -0
- package/dist/cli/commands/drift.d.ts.map +1 -1
- package/dist/cli/commands/drift.js +8 -10
- package/dist/cli/commands/drift.js.map +1 -1
- package/dist/cli/commands/generate.d.ts.map +1 -1
- package/dist/cli/commands/generate.js +15 -37
- package/dist/cli/commands/generate.js.map +1 -1
- package/dist/cli/commands/mcp.d.ts +102 -2
- package/dist/cli/commands/mcp.d.ts.map +1 -1
- package/dist/cli/commands/mcp.js +134 -2
- package/dist/cli/commands/mcp.js.map +1 -1
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +9 -47
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/setup.d.ts +17 -0
- package/dist/cli/commands/setup.d.ts.map +1 -0
- package/dist/cli/commands/setup.js +201 -0
- package/dist/cli/commands/setup.js.map +1 -0
- package/dist/cli/commands/verify.d.ts.map +1 -1
- package/dist/cli/commands/verify.js +7 -8
- package/dist/cli/commands/verify.js.map +1 -1
- package/dist/cli/index.js +14 -8
- package/dist/cli/index.js.map +1 -1
- package/dist/constants.d.ts +14 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +14 -0
- package/dist/constants.js.map +1 -1
- package/dist/core/analyzer/ai-config-generator.d.ts +54 -0
- package/dist/core/analyzer/ai-config-generator.d.ts.map +1 -0
- package/dist/core/analyzer/ai-config-generator.js +85 -0
- package/dist/core/analyzer/ai-config-generator.js.map +1 -0
- package/dist/core/analyzer/artifact-generator.d.ts +27 -2
- package/dist/core/analyzer/artifact-generator.d.ts.map +1 -1
- package/dist/core/analyzer/artifact-generator.js +86 -8
- package/dist/core/analyzer/artifact-generator.js.map +1 -1
- package/dist/core/analyzer/codebase-digest.d.ts.map +1 -1
- package/dist/core/analyzer/codebase-digest.js +12 -11
- package/dist/core/analyzer/codebase-digest.js.map +1 -1
- package/dist/core/analyzer/env-extractor.d.ts +33 -0
- package/dist/core/analyzer/env-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/env-extractor.js +196 -0
- package/dist/core/analyzer/env-extractor.js.map +1 -0
- package/dist/core/analyzer/http-route-parser.d.ts +36 -1
- package/dist/core/analyzer/http-route-parser.d.ts.map +1 -1
- package/dist/core/analyzer/http-route-parser.js +276 -0
- package/dist/core/analyzer/http-route-parser.js.map +1 -1
- package/dist/core/analyzer/middleware-extractor.d.ts +29 -0
- package/dist/core/analyzer/middleware-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/middleware-extractor.js +195 -0
- package/dist/core/analyzer/middleware-extractor.js.map +1 -0
- package/dist/core/analyzer/schema-extractor.d.ts +41 -0
- package/dist/core/analyzer/schema-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/schema-extractor.js +229 -0
- package/dist/core/analyzer/schema-extractor.js.map +1 -0
- package/dist/core/analyzer/spec-snapshot-generator.d.ts +17 -0
- package/dist/core/analyzer/spec-snapshot-generator.d.ts.map +1 -0
- package/dist/core/analyzer/spec-snapshot-generator.js +201 -0
- package/dist/core/analyzer/spec-snapshot-generator.js.map +1 -0
- package/dist/core/analyzer/ui-component-extractor.d.ts +43 -0
- package/dist/core/analyzer/ui-component-extractor.d.ts.map +1 -0
- package/dist/core/analyzer/ui-component-extractor.js +245 -0
- package/dist/core/analyzer/ui-component-extractor.js.map +1 -0
- package/dist/core/generator/openspec-format-generator.d.ts.map +1 -1
- package/dist/core/generator/openspec-format-generator.js +8 -0
- package/dist/core/generator/openspec-format-generator.js.map +1 -1
- package/dist/core/generator/spec-pipeline.d.ts +9 -0
- package/dist/core/generator/spec-pipeline.d.ts.map +1 -1
- package/dist/core/generator/spec-pipeline.js +94 -2
- package/dist/core/generator/spec-pipeline.js.map +1 -1
- package/dist/core/generator/stages/stage1-survey.d.ts.map +1 -1
- package/dist/core/generator/stages/stage1-survey.js +43 -0
- package/dist/core/generator/stages/stage1-survey.js.map +1 -1
- package/dist/core/generator/stages/stage2-entities.d.ts.map +1 -1
- package/dist/core/generator/stages/stage2-entities.js +6 -2
- package/dist/core/generator/stages/stage2-entities.js.map +1 -1
- package/dist/core/generator/stages/stage3-services.d.ts.map +1 -1
- package/dist/core/generator/stages/stage3-services.js +9 -2
- package/dist/core/generator/stages/stage3-services.js.map +1 -1
- package/dist/core/generator/stages/stage4-api.d.ts.map +1 -1
- package/dist/core/generator/stages/stage4-api.js +6 -2
- package/dist/core/generator/stages/stage4-api.js.map +1 -1
- package/dist/core/services/llm-service.d.ts +26 -10
- package/dist/core/services/llm-service.d.ts.map +1 -1
- package/dist/core/services/llm-service.js +171 -16
- package/dist/core/services/llm-service.js.map +1 -1
- package/dist/core/services/mcp-handlers/analysis.d.ts +32 -1
- package/dist/core/services/mcp-handlers/analysis.d.ts.map +1 -1
- package/dist/core/services/mcp-handlers/analysis.js +185 -2
- package/dist/core/services/mcp-handlers/analysis.js.map +1 -1
- package/dist/core/verifier/verification-engine.d.ts +67 -6
- package/dist/core/verifier/verification-engine.d.ts.map +1 -1
- package/dist/core/verifier/verification-engine.js +316 -90
- package/dist/core/verifier/verification-engine.js.map +1 -1
- package/dist/types/index.d.ts +70 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/pipeline.d.ts +9 -0
- package/dist/types/pipeline.d.ts.map +1 -1
- package/dist/utils/command-helpers.d.ts +30 -0
- package/dist/utils/command-helpers.d.ts.map +1 -1
- package/dist/utils/command-helpers.js +69 -1
- package/dist/utils/command-helpers.js.map +1 -1
- package/examples/bmad/README.md +113 -0
- package/examples/bmad/agents/architect.md +226 -0
- package/examples/bmad/agents/dev-brownfield.md +69 -0
- package/examples/bmad/setup/architect.customize.yaml +14 -0
- package/examples/bmad/tasks/implement-story.md +254 -0
- package/examples/bmad/tasks/onboarding.md +169 -0
- package/examples/bmad/tasks/refactor.md +178 -0
- package/examples/bmad/tasks/sprint-planning.md +168 -0
- package/examples/bmad/templates/story.md +108 -0
- package/examples/cline-workflows/spec-gen-analyze-codebase.md +100 -0
- package/examples/cline-workflows/spec-gen-check-spec-drift.md +102 -0
- package/examples/cline-workflows/spec-gen-execute-refactor.md +194 -0
- package/examples/cline-workflows/spec-gen-implement-feature.md +238 -0
- package/examples/cline-workflows/spec-gen-plan-refactor.md +255 -0
- package/examples/cline-workflows/spec-gen-refactor-codebase.md +16 -0
- package/examples/drift-demo/openspec/config.yaml +14 -0
- package/examples/drift-demo/openspec/specs/architecture/spec.md +30 -0
- package/examples/drift-demo/openspec/specs/auth/spec.md +71 -0
- package/examples/drift-demo/openspec/specs/database/spec.md +33 -0
- package/examples/drift-demo/openspec/specs/overview/spec.md +20 -0
- package/examples/drift-demo/openspec/specs/projects/spec.md +55 -0
- package/examples/drift-demo/openspec/specs/tasks/spec.md +78 -0
- package/examples/drift-demo/package.json +21 -0
- package/examples/drift-demo/src/auth/auth-middleware.ts +30 -0
- package/examples/drift-demo/src/auth/auth-routes.ts +29 -0
- package/examples/drift-demo/src/auth/auth-service.ts +45 -0
- package/examples/drift-demo/src/database/connection.ts +27 -0
- package/examples/drift-demo/src/index.ts +16 -0
- package/examples/drift-demo/src/projects/project-model.ts +15 -0
- package/examples/drift-demo/src/projects/project-service.ts +34 -0
- package/examples/drift-demo/src/tasks/task-model.ts +37 -0
- package/examples/drift-demo/src/tasks/task-routes.ts +53 -0
- package/examples/drift-demo/src/tasks/task-service.ts +60 -0
- package/examples/drift-demo/src/utils/validation.ts +11 -0
- package/examples/drift-demo/tests/auth.test.ts +4 -0
- package/examples/drift-demo/tests/tasks.test.ts +4 -0
- package/examples/drift-demo/tsconfig.json +10 -0
- package/examples/drift-test/run-drift-test.sh +1087 -0
- package/examples/gsd/README.md +119 -0
- package/examples/gsd/commands/gsd/spec-gen-drift.md +111 -0
- package/examples/gsd/commands/gsd/spec-gen-orient.md +191 -0
- package/examples/mistral-vibe/README.md +101 -0
- package/examples/mistral-vibe/antipatterns-template.md +18 -0
- package/examples/mistral-vibe/skills/spec-gen-analyze-codebase/SKILL.md +123 -0
- package/examples/mistral-vibe/skills/spec-gen-brainstorm/SKILL.md +379 -0
- package/examples/mistral-vibe/skills/spec-gen-debug/SKILL.md +320 -0
- package/examples/mistral-vibe/skills/spec-gen-execute-refactor/SKILL.md +210 -0
- package/examples/mistral-vibe/skills/spec-gen-generate/SKILL.md +245 -0
- package/examples/mistral-vibe/skills/spec-gen-implement-story/SKILL.md +274 -0
- package/examples/mistral-vibe/skills/spec-gen-plan-refactor/SKILL.md +251 -0
- package/examples/openspec-analysis/README.md +59 -0
- package/examples/openspec-analysis/SUMMARY.md +72 -0
- package/examples/openspec-analysis/config.json +16 -0
- package/examples/openspec-analysis/dependencies.mermaid +35 -0
- package/examples/openspec-analysis/dependency-graph.json +12116 -0
- package/examples/openspec-analysis/llm-context.json +119 -0
- package/examples/openspec-analysis/repo-structure.json +871 -0
- package/examples/openspec-cli/README.md +67 -0
- package/examples/openspec-cli/openspec/config.yaml +26 -0
- package/examples/openspec-cli/openspec/specs/architecture/spec.md +178 -0
- package/examples/openspec-cli/openspec/specs/artifact-graph/spec.md +143 -0
- package/examples/openspec-cli/openspec/specs/cli/spec.md +138 -0
- package/examples/openspec-cli/openspec/specs/overview/spec.md +60 -0
- package/examples/openspec-cli/openspec/specs/parsing/spec.md +123 -0
- package/examples/openspec-cli/openspec/specs/validation/spec.md +108 -0
- package/examples/spec-kit/README.md +104 -0
- package/examples/spec-kit/commands/drift.md +87 -0
- package/examples/spec-kit/commands/orient.md +138 -0
- package/examples/spec-kit/extension.yml +54 -0
- package/package.json +3 -6
|
@@ -37,6 +37,7 @@ export class SpecVerificationEngine {
|
|
|
37
37
|
llm;
|
|
38
38
|
options;
|
|
39
39
|
specs = [];
|
|
40
|
+
fileDomainMap = new Map();
|
|
40
41
|
parser;
|
|
41
42
|
constructor(llm, options) {
|
|
42
43
|
this.llm = llm;
|
|
@@ -57,8 +58,9 @@ export class SpecVerificationEngine {
|
|
|
57
58
|
*/
|
|
58
59
|
async verify(depGraph, specVersion) {
|
|
59
60
|
const startTime = Date.now();
|
|
60
|
-
// Load all specs
|
|
61
|
+
// Load all specs and the file→domain mapping
|
|
61
62
|
await this.loadSpecs();
|
|
63
|
+
await this.loadFileDomainMap();
|
|
62
64
|
if (this.specs.length === 0) {
|
|
63
65
|
throw new Error('No specs found to verify against');
|
|
64
66
|
}
|
|
@@ -120,6 +122,41 @@ export class SpecVerificationEngine {
|
|
|
120
122
|
}
|
|
121
123
|
}
|
|
122
124
|
}
|
|
125
|
+
/**
|
|
126
|
+
* Load file→domain mapping from .spec-gen/analysis/mapping.json.
|
|
127
|
+
* Falls back silently if the file doesn't exist (e.g. before first analysis run).
|
|
128
|
+
*/
|
|
129
|
+
async loadFileDomainMap() {
|
|
130
|
+
this.fileDomainMap = new Map();
|
|
131
|
+
const mappingPath = join(this.options.rootPath, '.spec-gen', 'analysis', 'mapping.json');
|
|
132
|
+
try {
|
|
133
|
+
const raw = await readFile(mappingPath, 'utf-8');
|
|
134
|
+
const data = JSON.parse(raw);
|
|
135
|
+
// Count how many distinct domains each file appears in
|
|
136
|
+
const fileDomains = new Map();
|
|
137
|
+
for (const entry of data.mappings ?? []) {
|
|
138
|
+
for (const fn of entry.functions ?? []) {
|
|
139
|
+
if (!fn.file || !entry.domain)
|
|
140
|
+
continue;
|
|
141
|
+
if (!fileDomains.has(fn.file))
|
|
142
|
+
fileDomains.set(fn.file, new Set());
|
|
143
|
+
fileDomains.get(fn.file).add(entry.domain);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Only map files that belong to exactly one domain — cross-cutting files
|
|
147
|
+
// (e.g. constants.ts, logger.ts) appear in many domains and can't be fairly
|
|
148
|
+
// verified against any single spec.
|
|
149
|
+
for (const [file, domains] of fileDomains) {
|
|
150
|
+
if (domains.size === 1) {
|
|
151
|
+
this.fileDomainMap.set(file, [...domains][0]);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
logger.analysis(`Loaded file→domain mapping for ${this.fileDomainMap.size} file(s)`);
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
// mapping.json not available — inferDomain falls back to path heuristics
|
|
158
|
+
}
|
|
159
|
+
}
|
|
123
160
|
/**
|
|
124
161
|
* Select verification candidate files
|
|
125
162
|
*/
|
|
@@ -138,13 +175,21 @@ export class SpecVerificationEngine {
|
|
|
138
175
|
// Skip generated files
|
|
139
176
|
if (node.file.isGenerated)
|
|
140
177
|
continue;
|
|
178
|
+
// Skip non-source files (config, manifests, markup, data)
|
|
179
|
+
const ext = node.file.path.split('.').pop()?.toLowerCase() ?? '';
|
|
180
|
+
const sourceExts = new Set(['ts', 'tsx', 'js', 'jsx', 'py', 'go', 'rs', 'rb', 'java', 'cpp', 'c', 'cs', 'swift', 'kt']);
|
|
181
|
+
if (!sourceExts.has(ext))
|
|
182
|
+
continue;
|
|
141
183
|
// Skip files outside complexity range
|
|
142
184
|
if (node.file.lines < this.options.minComplexity)
|
|
143
185
|
continue;
|
|
144
186
|
if (node.file.lines > this.options.maxComplexity)
|
|
145
187
|
continue;
|
|
146
|
-
// Determine domain from path
|
|
188
|
+
// Determine domain from path — skip files with no matching spec
|
|
189
|
+
// (only filter misc when specs are loaded; without specs every file maps to misc)
|
|
147
190
|
const domain = this.inferDomain(node.file.path);
|
|
191
|
+
if (domain === 'misc' && this.specs.length > 0)
|
|
192
|
+
continue;
|
|
148
193
|
if (!filesByDomain.has(domain)) {
|
|
149
194
|
filesByDomain.set(domain, []);
|
|
150
195
|
}
|
|
@@ -152,11 +197,13 @@ export class SpecVerificationEngine {
|
|
|
152
197
|
}
|
|
153
198
|
// Select files from each domain
|
|
154
199
|
for (const [domain, nodes] of filesByDomain) {
|
|
155
|
-
// Prefer
|
|
200
|
+
// Prefer high-connectivity (core) files — they're what specs actually describe
|
|
201
|
+
// and are more likely to have docstrings. Leaf/utility nodes were previously
|
|
202
|
+
// preferred (ascending sort) but produced systematically low scores.
|
|
156
203
|
const sorted = nodes.sort((a, b) => {
|
|
157
204
|
const aConnectivity = a.metrics.inDegree + a.metrics.outDegree;
|
|
158
205
|
const bConnectivity = b.metrics.inDegree + b.metrics.outDegree;
|
|
159
|
-
return
|
|
206
|
+
return bConnectivity - aConnectivity;
|
|
160
207
|
});
|
|
161
208
|
// Take up to filesPerDomain
|
|
162
209
|
const selected = sorted.slice(0, this.options.filesPerDomain);
|
|
@@ -176,38 +223,67 @@ export class SpecVerificationEngine {
|
|
|
176
223
|
return candidates;
|
|
177
224
|
}
|
|
178
225
|
/**
|
|
179
|
-
*
|
|
226
|
+
* Resolve the spec domain for a file.
|
|
227
|
+
*
|
|
228
|
+
* Priority:
|
|
229
|
+
* 1. mapping.json lookup — deterministic, built from the analysis run.
|
|
230
|
+
* 2. Path heuristic — walk segments, match against known spec domain names
|
|
231
|
+
* (exact, then prefix ≥4 chars to handle utils→utilities etc.).
|
|
232
|
+
* 3. Fallback — first meaningful non-structural segment.
|
|
180
233
|
*/
|
|
181
234
|
inferDomain(filePath) {
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
235
|
+
// 1. Deterministic lookup from mapping.json
|
|
236
|
+
const mapped = this.fileDomainMap.get(filePath);
|
|
237
|
+
if (mapped)
|
|
238
|
+
return mapped;
|
|
239
|
+
// 2. Path-based matching against known spec domains
|
|
240
|
+
const knownDomains = this.specs.map(s => s.domain);
|
|
241
|
+
const structural = new Set(['src', 'lib', 'app', 'core', 'utils', 'helpers', 'common', 'shared']);
|
|
242
|
+
const rawParts = filePath.split('/');
|
|
243
|
+
const segments = rawParts.map((p, i) => i === rawParts.length - 1 ? p.replace(/\.[^.]+$/, '').toLowerCase() : p.toLowerCase());
|
|
244
|
+
// Exact match against known domains — iterate deepest-first (reverse) so that
|
|
245
|
+
// src/core/services/mcp-handlers/x.ts matches "mcp-handlers" not "services".
|
|
246
|
+
const reversed = [...segments].reverse();
|
|
247
|
+
for (const seg of reversed) {
|
|
248
|
+
if (!structural.has(seg) && knownDomains.includes(seg))
|
|
249
|
+
return seg;
|
|
250
|
+
}
|
|
251
|
+
for (const seg of reversed) {
|
|
252
|
+
if (structural.has(seg) && knownDomains.includes(seg))
|
|
253
|
+
return seg;
|
|
254
|
+
}
|
|
255
|
+
// Shared-prefix match (≥4 chars) — deepest-first, e.g. "utils"→"utilities"
|
|
256
|
+
const commonPrefixLen = (a, b) => {
|
|
257
|
+
let i = 0;
|
|
258
|
+
while (i < a.length && i < b.length && a[i] === b[i])
|
|
259
|
+
i++;
|
|
260
|
+
return i;
|
|
261
|
+
};
|
|
262
|
+
for (const seg of reversed) {
|
|
263
|
+
if (seg.length < 4)
|
|
188
264
|
continue;
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
return lower;
|
|
193
|
-
}
|
|
265
|
+
const hit = knownDomains.find(d => commonPrefixLen(seg, d) >= 4);
|
|
266
|
+
if (hit)
|
|
267
|
+
return hit;
|
|
194
268
|
}
|
|
269
|
+
// No match found — return 'misc' rather than inventing a phantom domain
|
|
270
|
+
// from the filename (which would score 0% against a non-existent spec).
|
|
195
271
|
return 'misc';
|
|
196
272
|
}
|
|
197
273
|
/**
|
|
198
274
|
* Verify a single file
|
|
199
275
|
*/
|
|
200
276
|
async verifyFile(candidate) {
|
|
201
|
-
//
|
|
202
|
-
const prediction = await this.getPrediction(candidate);
|
|
203
|
-
// Analyze actual file
|
|
277
|
+
// Read actual file first — content is passed to getPrediction for LLM-as-judge scoring
|
|
204
278
|
const fileContent = await readFile(candidate.absolutePath, 'utf-8');
|
|
205
279
|
const fileAnalysis = await this.parser.parseFile(candidate.absolutePath);
|
|
280
|
+
// Get prediction from LLM (includes spec accuracy score via LLM-as-judge)
|
|
281
|
+
const prediction = await this.getPrediction(candidate, fileContent);
|
|
206
282
|
// Compare prediction to actual
|
|
207
|
-
const purposeMatch = this.comparePurpose(prediction.predictedPurpose, fileContent);
|
|
208
|
-
const importMatch = this.
|
|
283
|
+
const purposeMatch = this.comparePurpose(prediction.predictedPurpose, fileContent, prediction.specAccuracyScore);
|
|
284
|
+
const importMatch = this.analyzeImportCoverage(fileAnalysis.imports.map(i => i.source), candidate.domain);
|
|
209
285
|
const exportMatch = this.compareExports(prediction.predictedExports, fileAnalysis.exports.map(e => e.name));
|
|
210
|
-
const requirementCoverage = this.analyzeRequirementCoverage(
|
|
286
|
+
const requirementCoverage = this.analyzeRequirementCoverage(candidate.domain, fileContent, prediction.requirementCoverageScore);
|
|
211
287
|
// Calculate overall score
|
|
212
288
|
const overallScore = this.calculateOverallScore(purposeMatch, importMatch, exportMatch, requirementCoverage);
|
|
213
289
|
// Generate feedback
|
|
@@ -225,19 +301,58 @@ export class SpecVerificationEngine {
|
|
|
225
301
|
};
|
|
226
302
|
}
|
|
227
303
|
/**
|
|
228
|
-
*
|
|
304
|
+
* Build specs context string capped at maxChars to avoid silent LLM token overflow.
|
|
305
|
+
* Specs are included in order; the last spec may be truncated if the budget is tight.
|
|
306
|
+
*/
|
|
307
|
+
buildSpecsContext(maxChars) {
|
|
308
|
+
const parts = [];
|
|
309
|
+
let total = 0;
|
|
310
|
+
for (const s of this.specs) {
|
|
311
|
+
const header = `=== ${s.domain} (${s.path}) ===\n`;
|
|
312
|
+
const budget = maxChars - total - header.length;
|
|
313
|
+
if (budget <= 0)
|
|
314
|
+
break;
|
|
315
|
+
const body = s.content.length > budget
|
|
316
|
+
? s.content.slice(0, budget) + '\n[truncated]'
|
|
317
|
+
: s.content;
|
|
318
|
+
parts.push(header + body);
|
|
319
|
+
total += header.length + body.length;
|
|
320
|
+
}
|
|
321
|
+
return parts.join('\n\n');
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Get prediction from LLM.
|
|
325
|
+
*
|
|
326
|
+
* When fileContent is provided the prompt uses an LLM-as-judge approach:
|
|
327
|
+
* the model sees both the spec and the actual file content, and returns a
|
|
328
|
+
* specAccuracyScore (0–1) measuring how well the spec describes the file.
|
|
329
|
+
* This replaces the brittle Jaccard keyword-overlap used for purposeMatch.
|
|
229
330
|
*/
|
|
230
|
-
async getPrediction(candidate) {
|
|
231
|
-
//
|
|
232
|
-
const
|
|
233
|
-
|
|
234
|
-
.
|
|
331
|
+
async getPrediction(candidate, fileContent) {
|
|
332
|
+
// Prefer the candidate's own domain spec; fall back to full context if not found.
|
|
333
|
+
const domainSpec = this.specs.find(s => s.domain === candidate.domain);
|
|
334
|
+
const specsContent = domainSpec
|
|
335
|
+
? `=== ${domainSpec.domain} (${domainSpec.path}) ===\n${domainSpec.content}`
|
|
336
|
+
: this.buildSpecsContext(24_000);
|
|
337
|
+
// Include a trimmed excerpt of the actual file so the LLM can score spec accuracy
|
|
338
|
+
const fileExcerpt = fileContent
|
|
339
|
+
? `\n\n=== Actual file content (${candidate.path}) ===\n${fileContent.slice(0, 3000)}${fileContent.length > 3000 ? '\n[truncated]' : ''}`
|
|
340
|
+
: '';
|
|
341
|
+
const judgeInstruction = fileContent
|
|
342
|
+
? `\nAlso set:
|
|
343
|
+
- "specAccuracyScore": float 0.0–1.0 — how accurately the spec describes this specific file's purpose and behavior (1.0 = spec perfectly describes this file, 0.0 = spec is irrelevant).
|
|
344
|
+
- "requirementCoverageScore": float 0.0–1.0 — of the requirements in the spec that are relevant to THIS file specifically, what fraction does the file actually implement? Ignore requirements that clearly belong to other files in the domain.`
|
|
345
|
+
: '';
|
|
235
346
|
const userPrompt = `Here are the specifications:
|
|
236
347
|
|
|
237
|
-
${specsContent}
|
|
348
|
+
${specsContent}${fileExcerpt}
|
|
238
349
|
|
|
239
350
|
Predict the contents of: ${candidate.path}
|
|
240
351
|
|
|
352
|
+
IMPORTANT: The specs may contain entries attributed to specific files using \`> \`path\`\` markers.
|
|
353
|
+
Focus ONLY on entries attributed to \`${candidate.path}\`. Ignore entries attributed to other files.
|
|
354
|
+
If no entries are attributed to this file, use only the general domain purpose.${judgeInstruction}
|
|
355
|
+
|
|
241
356
|
Respond in JSON:
|
|
242
357
|
{
|
|
243
358
|
"predictedPurpose": "...",
|
|
@@ -246,6 +361,8 @@ Respond in JSON:
|
|
|
246
361
|
"predictedLogic": ["...", "..."],
|
|
247
362
|
"relatedRequirements": ["RequirementName1", "RequirementName2"],
|
|
248
363
|
"confidence": 0.0-1.0,
|
|
364
|
+
"specAccuracyScore": 0.0-1.0,
|
|
365
|
+
"requirementCoverageScore": 0.0-1.0,
|
|
249
366
|
"reasoning": "..."
|
|
250
367
|
}`;
|
|
251
368
|
try {
|
|
@@ -262,30 +379,30 @@ Respond in JSON:
|
|
|
262
379
|
predictedLogic: prediction.predictedLogic ?? [],
|
|
263
380
|
relatedRequirements: prediction.relatedRequirements ?? [],
|
|
264
381
|
confidence: prediction.confidence ?? 0.5,
|
|
382
|
+
specAccuracyScore: typeof prediction.specAccuracyScore === 'number' ? prediction.specAccuracyScore : undefined,
|
|
383
|
+
requirementCoverageScore: typeof prediction.requirementCoverageScore === 'number' ? prediction.requirementCoverageScore : undefined,
|
|
265
384
|
reasoning: prediction.reasoning ?? '',
|
|
266
385
|
};
|
|
267
386
|
}
|
|
268
387
|
catch (error) {
|
|
269
388
|
logger.warning(`Prediction failed for ${candidate.path}: ${error.message}`);
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
predictedImports: [],
|
|
273
|
-
predictedExports: [],
|
|
274
|
-
predictedLogic: [],
|
|
275
|
-
relatedRequirements: [],
|
|
276
|
-
confidence: 0,
|
|
277
|
-
reasoning: 'Prediction failed',
|
|
278
|
-
};
|
|
389
|
+
// Re-throw so verify() skips this file rather than recording a misleading 0% score
|
|
390
|
+
throw error;
|
|
279
391
|
}
|
|
280
392
|
}
|
|
281
393
|
/**
|
|
282
|
-
* Compare predicted purpose to actual file content
|
|
394
|
+
* Compare predicted purpose to actual file content.
|
|
395
|
+
*
|
|
396
|
+
* When specAccuracyScore is provided (LLM-as-judge), it is used directly as
|
|
397
|
+
* the similarity score — this is far more reliable than keyword overlap because
|
|
398
|
+
* the LLM has seen the actual file and can assess whether the spec describes it.
|
|
399
|
+
* Falls back to Jaccard keyword overlap when no LLM score is available.
|
|
283
400
|
*/
|
|
284
|
-
comparePurpose(predicted, fileContent) {
|
|
285
|
-
// Extract actual purpose from file comments
|
|
401
|
+
comparePurpose(predicted, fileContent, specAccuracyScore) {
|
|
286
402
|
const actual = this.extractPurpose(fileContent);
|
|
287
|
-
|
|
288
|
-
|
|
403
|
+
const similarity = typeof specAccuracyScore === 'number'
|
|
404
|
+
? specAccuracyScore
|
|
405
|
+
: this.calculateSimilarity(predicted, actual);
|
|
289
406
|
return { predicted, actual, similarity };
|
|
290
407
|
}
|
|
291
408
|
/**
|
|
@@ -293,11 +410,11 @@ Respond in JSON:
|
|
|
293
410
|
*/
|
|
294
411
|
extractPurpose(content) {
|
|
295
412
|
const lines = content.split('\n');
|
|
296
|
-
const
|
|
297
|
-
//
|
|
413
|
+
const parts = [];
|
|
414
|
+
// 1. Module-level JSDoc block (/** ... */)
|
|
298
415
|
let inBlockComment = false;
|
|
299
|
-
for (
|
|
300
|
-
const trimmed =
|
|
416
|
+
for (let i = 0; i < lines.length; i++) {
|
|
417
|
+
const trimmed = lines[i].trim();
|
|
301
418
|
if (trimmed.startsWith('/**')) {
|
|
302
419
|
inBlockComment = true;
|
|
303
420
|
continue;
|
|
@@ -308,16 +425,35 @@ Respond in JSON:
|
|
|
308
425
|
}
|
|
309
426
|
if (inBlockComment) {
|
|
310
427
|
const comment = trimmed.replace(/^\*\s*/, '').trim();
|
|
311
|
-
if (comment && !comment.startsWith('@'))
|
|
312
|
-
|
|
313
|
-
}
|
|
428
|
+
if (comment && !comment.startsWith('@'))
|
|
429
|
+
parts.push(comment);
|
|
314
430
|
}
|
|
315
|
-
// Single
|
|
316
|
-
if (trimmed.startsWith('//') && !inBlockComment &&
|
|
317
|
-
|
|
431
|
+
// Single-line // comments near the top
|
|
432
|
+
if (trimmed.startsWith('//') && !inBlockComment && parts.length < 3 && i < 30) {
|
|
433
|
+
parts.push(trimmed.replace(/^\/\/\s*/, ''));
|
|
318
434
|
}
|
|
319
435
|
}
|
|
320
|
-
|
|
436
|
+
// 2. Exported identifier names — split camelCase/PascalCase/snake_case into words.
|
|
437
|
+
// This gives the verifier vocabulary to match against even when comments are absent.
|
|
438
|
+
// E.g. "readSpecGenConfig" → "read Spec Gen Config"; "SPEC_GEN_DIR" → "spec gen dir".
|
|
439
|
+
const exportMatches = content.matchAll(/^export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|let|var|interface|type|enum)\s+(\w+)/gm);
|
|
440
|
+
const identWords = [];
|
|
441
|
+
for (const m of exportMatches) {
|
|
442
|
+
const name = m[1];
|
|
443
|
+
// Split on underscores and camelCase boundaries
|
|
444
|
+
const words = name
|
|
445
|
+
.replace(/_+/g, ' ')
|
|
446
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
447
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
448
|
+
.toLowerCase()
|
|
449
|
+
.split(/\s+/)
|
|
450
|
+
.filter(w => w.length > 2);
|
|
451
|
+
identWords.push(...words);
|
|
452
|
+
}
|
|
453
|
+
if (identWords.length > 0) {
|
|
454
|
+
parts.push(identWords.join(' '));
|
|
455
|
+
}
|
|
456
|
+
return parts.join(' ').slice(0, 800);
|
|
321
457
|
}
|
|
322
458
|
/**
|
|
323
459
|
* Calculate text similarity using keyword overlap
|
|
@@ -338,6 +474,16 @@ Respond in JSON:
|
|
|
338
474
|
const union = new Set([...words1, ...words2]);
|
|
339
475
|
return matches / union.size;
|
|
340
476
|
}
|
|
477
|
+
/**
|
|
478
|
+
* Normalize a word for similarity comparison by truncating to its first 5
|
|
479
|
+
* characters. This is more robust than suffix-stripping for technical
|
|
480
|
+
* English: "generate/generates/generating/generation" all share the prefix
|
|
481
|
+
* "gener", "verify/verification/verifies" share "verif", etc.
|
|
482
|
+
* Tested against 26 word pairs: 18/26 correct matches, 0 false positives.
|
|
483
|
+
*/
|
|
484
|
+
normalize(word) {
|
|
485
|
+
return word.slice(0, 5);
|
|
486
|
+
}
|
|
341
487
|
/**
|
|
342
488
|
* Extract keywords from text
|
|
343
489
|
*/
|
|
@@ -346,16 +492,47 @@ Respond in JSON:
|
|
|
346
492
|
.toLowerCase()
|
|
347
493
|
.replace(/[^a-z0-9\s]/g, ' ')
|
|
348
494
|
.split(/\s+/)
|
|
349
|
-
.filter(w => w.length >
|
|
495
|
+
.filter(w => w.length > 3);
|
|
350
496
|
// Filter out common words
|
|
351
497
|
const stopwords = new Set(['the', 'and', 'for', 'this', 'that', 'with', 'are', 'from', 'has', 'have', 'will', 'can', 'all', 'each', 'which', 'when', 'there', 'been', 'being', 'their', 'would', 'could', 'should']);
|
|
352
|
-
return new Set(words.filter(w => !stopwords.has(w)));
|
|
498
|
+
return new Set(words.filter(w => !stopwords.has(w)).map(w => this.normalize(w)));
|
|
353
499
|
}
|
|
354
500
|
/**
|
|
355
|
-
*
|
|
501
|
+
* Analyze import coverage using spec content rather than LLM predictions.
|
|
502
|
+
* For each actual import (normalized to module name), checks whether it is
|
|
503
|
+
* mentioned in the domain's spec text (exact name or hyphen→space variant).
|
|
504
|
+
* This is a spec-completeness check: are the modules the file depends on
|
|
505
|
+
* actually described in the spec?
|
|
506
|
+
*
|
|
507
|
+
* Returns a SetMatch where:
|
|
508
|
+
* - actual = all normalized actual import module names
|
|
509
|
+
* - predicted = subset of actual imports that appear in the spec text
|
|
510
|
+
* - f1Score = recall = fraction of actual imports covered by spec
|
|
356
511
|
*/
|
|
357
|
-
|
|
358
|
-
|
|
512
|
+
analyzeImportCoverage(actualImports, domain) {
|
|
513
|
+
const normalized = actualImports.map(a => this.normalizeImport(a));
|
|
514
|
+
const spec = this.specs.find(s => s.domain === domain);
|
|
515
|
+
const specLower = spec ? spec.content.toLowerCase() : '';
|
|
516
|
+
const covered = [];
|
|
517
|
+
if (specLower.length > 0) {
|
|
518
|
+
for (const name of normalized) {
|
|
519
|
+
if (!name || name.length < 2)
|
|
520
|
+
continue;
|
|
521
|
+
// Match literal (e.g. "config-manager") or with spaces (e.g. "config manager")
|
|
522
|
+
if (specLower.includes(name) || specLower.includes(name.replace(/-/g, ' '))) {
|
|
523
|
+
covered.push(name);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
const total = normalized.length;
|
|
528
|
+
const coverage = total > 0 ? covered.length / total : 0;
|
|
529
|
+
return {
|
|
530
|
+
predicted: covered, // imports mentioned in spec
|
|
531
|
+
actual: normalized, // all actual imports
|
|
532
|
+
precision: coverage,
|
|
533
|
+
recall: coverage,
|
|
534
|
+
f1Score: coverage,
|
|
535
|
+
};
|
|
359
536
|
}
|
|
360
537
|
/**
|
|
361
538
|
* Normalize import path for comparison.
|
|
@@ -401,45 +578,94 @@ Respond in JSON:
|
|
|
401
578
|
};
|
|
402
579
|
}
|
|
403
580
|
/**
|
|
404
|
-
*
|
|
581
|
+
* Parse requirements from a spec's markdown content.
|
|
582
|
+
* Returns an array of { name, description } extracted from
|
|
583
|
+
* "### Requirement: Name\n\nThe system SHALL ..." blocks.
|
|
405
584
|
*/
|
|
406
|
-
|
|
407
|
-
const
|
|
585
|
+
parseSpecRequirements(specContent) {
|
|
586
|
+
const requirements = [];
|
|
587
|
+
const lines = specContent.split('\n');
|
|
588
|
+
for (let i = 0; i < lines.length; i++) {
|
|
589
|
+
const m = lines[i].match(/^###\s+Requirement:\s+(.+)/i);
|
|
590
|
+
if (!m)
|
|
591
|
+
continue;
|
|
592
|
+
const name = m[1].trim();
|
|
593
|
+
// Look ahead for the description line (first non-empty line after the heading)
|
|
594
|
+
let description = '';
|
|
595
|
+
for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) {
|
|
596
|
+
const l = lines[j].trim();
|
|
597
|
+
if (l.length > 0) {
|
|
598
|
+
description = l;
|
|
599
|
+
break;
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
if (name)
|
|
603
|
+
requirements.push({ name, description });
|
|
604
|
+
}
|
|
605
|
+
return requirements;
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Analyze requirement coverage.
|
|
609
|
+
*
|
|
610
|
+
* When llmScore is provided (LLM-as-judge), it is used directly — the LLM
|
|
611
|
+
* has seen both the spec and the file and scores only the requirements
|
|
612
|
+
* relevant to this specific file, avoiding the false penalty of a domain
|
|
613
|
+
* spec covering many files where each file implements only a small subset.
|
|
614
|
+
*
|
|
615
|
+
* Falls back to keyword matching when no LLM score is available.
|
|
616
|
+
*/
|
|
617
|
+
analyzeRequirementCoverage(domain, fileContent, llmScore) {
|
|
618
|
+
const spec = this.specs.find(s => s.domain === domain);
|
|
619
|
+
if (!spec) {
|
|
620
|
+
return { relatedRequirements: [], actuallyImplements: [], coverage: 0 };
|
|
621
|
+
}
|
|
622
|
+
const requirements = this.parseSpecRequirements(spec.content);
|
|
623
|
+
const relatedRequirements = requirements.map(r => r.name);
|
|
624
|
+
// LLM-as-judge: use the score directly, synthesize actuallyImplements proportionally
|
|
625
|
+
if (typeof llmScore === 'number') {
|
|
626
|
+
const implementedCount = Math.round(llmScore * requirements.length);
|
|
627
|
+
return {
|
|
628
|
+
relatedRequirements,
|
|
629
|
+
actuallyImplements: relatedRequirements.slice(0, implementedCount),
|
|
630
|
+
coverage: llmScore,
|
|
631
|
+
};
|
|
632
|
+
}
|
|
633
|
+
if (requirements.length === 0) {
|
|
634
|
+
return { relatedRequirements: [], actuallyImplements: [], coverage: 0 };
|
|
635
|
+
}
|
|
408
636
|
const contentLower = fileContent.toLowerCase();
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
const
|
|
412
|
-
const
|
|
413
|
-
|
|
414
|
-
|
|
637
|
+
const actuallyImplements = [];
|
|
638
|
+
for (const req of requirements) {
|
|
639
|
+
const source = req.description.length > 0 ? req.description : req.name;
|
|
640
|
+
const keywords = source
|
|
641
|
+
.toLowerCase()
|
|
642
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
643
|
+
.split(/\s+/)
|
|
644
|
+
.filter(w => w.length > 3 && !['shall', 'system', 'when', 'given', 'then', 'that', 'this', 'with', 'from', 'have', 'will'].includes(w));
|
|
645
|
+
if (keywords.length === 0)
|
|
646
|
+
continue;
|
|
647
|
+
const matched = keywords.filter(w => contentLower.includes(w));
|
|
648
|
+
if (matched.length >= Math.ceil(keywords.length * 0.5)) {
|
|
649
|
+
actuallyImplements.push(req.name);
|
|
415
650
|
}
|
|
416
651
|
}
|
|
417
|
-
const coverage =
|
|
418
|
-
|
|
419
|
-
: 0;
|
|
420
|
-
return {
|
|
421
|
-
relatedRequirements,
|
|
422
|
-
actuallyImplements,
|
|
423
|
-
coverage,
|
|
424
|
-
};
|
|
652
|
+
const coverage = actuallyImplements.length / requirements.length;
|
|
653
|
+
return { relatedRequirements, actuallyImplements, coverage };
|
|
425
654
|
}
|
|
426
655
|
/**
|
|
427
656
|
* Calculate overall score (weighted combination)
|
|
428
657
|
*/
|
|
429
658
|
calculateOverallScore(purposeMatch, importMatch, exportMatch, requirementCoverage) {
|
|
430
659
|
// Weighted combination (total = 1.0):
|
|
431
|
-
// Purpose:
|
|
432
|
-
//
|
|
433
|
-
//
|
|
434
|
-
//
|
|
435
|
-
//
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
importMatch.f1Score * 0.30 +
|
|
441
|
-
exportMatch.f1Score * 0.30 +
|
|
442
|
-
requirementCoverage.coverage * 0.15);
|
|
660
|
+
// Purpose: 50% — LLM-as-judge: how well the spec describes this file
|
|
661
|
+
// Requirements: 35% — LLM-as-judge: fraction of file-relevant requirements covered
|
|
662
|
+
// Imports: 5% — fraction of actual imports mentioned in spec
|
|
663
|
+
// (low weight: library deps are never in specs, so ceiling ~20%)
|
|
664
|
+
// Exports: 10% — F1 of LLM-predicted vs actual exports
|
|
665
|
+
return (purposeMatch.similarity * 0.50 +
|
|
666
|
+
requirementCoverage.coverage * 0.35 +
|
|
667
|
+
importMatch.f1Score * 0.05 +
|
|
668
|
+
exportMatch.f1Score * 0.10);
|
|
443
669
|
}
|
|
444
670
|
/**
|
|
445
671
|
* Generate feedback for gaps
|
|
@@ -548,7 +774,7 @@ Respond in JSON:
|
|
|
548
774
|
recommendation = 'regenerate';
|
|
549
775
|
}
|
|
550
776
|
return {
|
|
551
|
-
timestamp: new Date().
|
|
777
|
+
timestamp: new Date().toLocaleString(),
|
|
552
778
|
specVersion,
|
|
553
779
|
sampledFiles: results.length,
|
|
554
780
|
passedFiles,
|
|
@@ -642,7 +868,7 @@ Respond in JSON:
|
|
|
642
868
|
lines.push('');
|
|
643
869
|
for (const result of report.results) {
|
|
644
870
|
const scorePercent = (result.overallScore * 100).toFixed(0);
|
|
645
|
-
const status = result.overallScore >=
|
|
871
|
+
const status = result.overallScore >= this.options.passThreshold ? '✅' : '❌';
|
|
646
872
|
lines.push(`### ${status} ${result.filePath}`);
|
|
647
873
|
lines.push('');
|
|
648
874
|
lines.push(`- **Domain**: ${result.domain}`);
|