spec-gen-cli 1.2.6 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/README.md +175 -55
  2. package/dist/api/analyze.d.ts.map +1 -1
  3. package/dist/api/analyze.js +6 -1
  4. package/dist/api/analyze.js.map +1 -1
  5. package/dist/api/audit.d.ts +10 -0
  6. package/dist/api/audit.d.ts.map +1 -0
  7. package/dist/api/audit.js +117 -0
  8. package/dist/api/audit.js.map +1 -0
  9. package/dist/api/generate.d.ts.map +1 -1
  10. package/dist/api/generate.js +10 -1
  11. package/dist/api/generate.js.map +1 -1
  12. package/dist/api/index.d.ts +3 -2
  13. package/dist/api/index.d.ts.map +1 -1
  14. package/dist/api/index.js +1 -0
  15. package/dist/api/index.js.map +1 -1
  16. package/dist/api/run.d.ts.map +1 -1
  17. package/dist/api/run.js +5 -1
  18. package/dist/api/run.js.map +1 -1
  19. package/dist/api/types.d.ts +15 -4
  20. package/dist/api/types.d.ts.map +1 -1
  21. package/dist/cli/commands/analyze.d.ts +3 -0
  22. package/dist/cli/commands/analyze.d.ts.map +1 -1
  23. package/dist/cli/commands/analyze.js +112 -17
  24. package/dist/cli/commands/analyze.js.map +1 -1
  25. package/dist/cli/commands/audit.d.ts +9 -0
  26. package/dist/cli/commands/audit.d.ts.map +1 -0
  27. package/dist/cli/commands/audit.js +98 -0
  28. package/dist/cli/commands/audit.js.map +1 -0
  29. package/dist/cli/commands/drift.d.ts.map +1 -1
  30. package/dist/cli/commands/drift.js +8 -10
  31. package/dist/cli/commands/drift.js.map +1 -1
  32. package/dist/cli/commands/generate.d.ts.map +1 -1
  33. package/dist/cli/commands/generate.js +15 -37
  34. package/dist/cli/commands/generate.js.map +1 -1
  35. package/dist/cli/commands/mcp.d.ts +102 -2
  36. package/dist/cli/commands/mcp.d.ts.map +1 -1
  37. package/dist/cli/commands/mcp.js +134 -2
  38. package/dist/cli/commands/mcp.js.map +1 -1
  39. package/dist/cli/commands/run.d.ts.map +1 -1
  40. package/dist/cli/commands/run.js +9 -47
  41. package/dist/cli/commands/run.js.map +1 -1
  42. package/dist/cli/commands/setup.d.ts +17 -0
  43. package/dist/cli/commands/setup.d.ts.map +1 -0
  44. package/dist/cli/commands/setup.js +201 -0
  45. package/dist/cli/commands/setup.js.map +1 -0
  46. package/dist/cli/commands/verify.d.ts.map +1 -1
  47. package/dist/cli/commands/verify.js +7 -8
  48. package/dist/cli/commands/verify.js.map +1 -1
  49. package/dist/cli/index.js +14 -8
  50. package/dist/cli/index.js.map +1 -1
  51. package/dist/constants.d.ts +14 -0
  52. package/dist/constants.d.ts.map +1 -1
  53. package/dist/constants.js +14 -0
  54. package/dist/constants.js.map +1 -1
  55. package/dist/core/analyzer/ai-config-generator.d.ts +54 -0
  56. package/dist/core/analyzer/ai-config-generator.d.ts.map +1 -0
  57. package/dist/core/analyzer/ai-config-generator.js +85 -0
  58. package/dist/core/analyzer/ai-config-generator.js.map +1 -0
  59. package/dist/core/analyzer/artifact-generator.d.ts +27 -2
  60. package/dist/core/analyzer/artifact-generator.d.ts.map +1 -1
  61. package/dist/core/analyzer/artifact-generator.js +86 -8
  62. package/dist/core/analyzer/artifact-generator.js.map +1 -1
  63. package/dist/core/analyzer/codebase-digest.d.ts.map +1 -1
  64. package/dist/core/analyzer/codebase-digest.js +12 -11
  65. package/dist/core/analyzer/codebase-digest.js.map +1 -1
  66. package/dist/core/analyzer/env-extractor.d.ts +33 -0
  67. package/dist/core/analyzer/env-extractor.d.ts.map +1 -0
  68. package/dist/core/analyzer/env-extractor.js +196 -0
  69. package/dist/core/analyzer/env-extractor.js.map +1 -0
  70. package/dist/core/analyzer/http-route-parser.d.ts +36 -1
  71. package/dist/core/analyzer/http-route-parser.d.ts.map +1 -1
  72. package/dist/core/analyzer/http-route-parser.js +276 -0
  73. package/dist/core/analyzer/http-route-parser.js.map +1 -1
  74. package/dist/core/analyzer/middleware-extractor.d.ts +29 -0
  75. package/dist/core/analyzer/middleware-extractor.d.ts.map +1 -0
  76. package/dist/core/analyzer/middleware-extractor.js +195 -0
  77. package/dist/core/analyzer/middleware-extractor.js.map +1 -0
  78. package/dist/core/analyzer/schema-extractor.d.ts +41 -0
  79. package/dist/core/analyzer/schema-extractor.d.ts.map +1 -0
  80. package/dist/core/analyzer/schema-extractor.js +229 -0
  81. package/dist/core/analyzer/schema-extractor.js.map +1 -0
  82. package/dist/core/analyzer/spec-snapshot-generator.d.ts +17 -0
  83. package/dist/core/analyzer/spec-snapshot-generator.d.ts.map +1 -0
  84. package/dist/core/analyzer/spec-snapshot-generator.js +201 -0
  85. package/dist/core/analyzer/spec-snapshot-generator.js.map +1 -0
  86. package/dist/core/analyzer/ui-component-extractor.d.ts +43 -0
  87. package/dist/core/analyzer/ui-component-extractor.d.ts.map +1 -0
  88. package/dist/core/analyzer/ui-component-extractor.js +245 -0
  89. package/dist/core/analyzer/ui-component-extractor.js.map +1 -0
  90. package/dist/core/generator/openspec-format-generator.d.ts.map +1 -1
  91. package/dist/core/generator/openspec-format-generator.js +8 -0
  92. package/dist/core/generator/openspec-format-generator.js.map +1 -1
  93. package/dist/core/generator/spec-pipeline.d.ts +9 -0
  94. package/dist/core/generator/spec-pipeline.d.ts.map +1 -1
  95. package/dist/core/generator/spec-pipeline.js +94 -2
  96. package/dist/core/generator/spec-pipeline.js.map +1 -1
  97. package/dist/core/generator/stages/stage1-survey.d.ts.map +1 -1
  98. package/dist/core/generator/stages/stage1-survey.js +43 -0
  99. package/dist/core/generator/stages/stage1-survey.js.map +1 -1
  100. package/dist/core/generator/stages/stage2-entities.d.ts.map +1 -1
  101. package/dist/core/generator/stages/stage2-entities.js +6 -2
  102. package/dist/core/generator/stages/stage2-entities.js.map +1 -1
  103. package/dist/core/generator/stages/stage3-services.d.ts.map +1 -1
  104. package/dist/core/generator/stages/stage3-services.js +9 -2
  105. package/dist/core/generator/stages/stage3-services.js.map +1 -1
  106. package/dist/core/generator/stages/stage4-api.d.ts.map +1 -1
  107. package/dist/core/generator/stages/stage4-api.js +6 -2
  108. package/dist/core/generator/stages/stage4-api.js.map +1 -1
  109. package/dist/core/services/llm-service.d.ts +26 -10
  110. package/dist/core/services/llm-service.d.ts.map +1 -1
  111. package/dist/core/services/llm-service.js +171 -16
  112. package/dist/core/services/llm-service.js.map +1 -1
  113. package/dist/core/services/mcp-handlers/analysis.d.ts +32 -1
  114. package/dist/core/services/mcp-handlers/analysis.d.ts.map +1 -1
  115. package/dist/core/services/mcp-handlers/analysis.js +185 -2
  116. package/dist/core/services/mcp-handlers/analysis.js.map +1 -1
  117. package/dist/core/verifier/verification-engine.d.ts +67 -6
  118. package/dist/core/verifier/verification-engine.d.ts.map +1 -1
  119. package/dist/core/verifier/verification-engine.js +316 -90
  120. package/dist/core/verifier/verification-engine.js.map +1 -1
  121. package/dist/types/index.d.ts +70 -1
  122. package/dist/types/index.d.ts.map +1 -1
  123. package/dist/types/pipeline.d.ts +9 -0
  124. package/dist/types/pipeline.d.ts.map +1 -1
  125. package/dist/utils/command-helpers.d.ts +30 -0
  126. package/dist/utils/command-helpers.d.ts.map +1 -1
  127. package/dist/utils/command-helpers.js +69 -1
  128. package/dist/utils/command-helpers.js.map +1 -1
  129. package/examples/bmad/README.md +113 -0
  130. package/examples/bmad/agents/architect.md +226 -0
  131. package/examples/bmad/agents/dev-brownfield.md +69 -0
  132. package/examples/bmad/setup/architect.customize.yaml +14 -0
  133. package/examples/bmad/tasks/implement-story.md +254 -0
  134. package/examples/bmad/tasks/onboarding.md +169 -0
  135. package/examples/bmad/tasks/refactor.md +178 -0
  136. package/examples/bmad/tasks/sprint-planning.md +168 -0
  137. package/examples/bmad/templates/story.md +108 -0
  138. package/examples/cline-workflows/spec-gen-analyze-codebase.md +100 -0
  139. package/examples/cline-workflows/spec-gen-check-spec-drift.md +102 -0
  140. package/examples/cline-workflows/spec-gen-execute-refactor.md +194 -0
  141. package/examples/cline-workflows/spec-gen-implement-feature.md +238 -0
  142. package/examples/cline-workflows/spec-gen-plan-refactor.md +255 -0
  143. package/examples/cline-workflows/spec-gen-refactor-codebase.md +16 -0
  144. package/examples/drift-demo/openspec/config.yaml +14 -0
  145. package/examples/drift-demo/openspec/specs/architecture/spec.md +30 -0
  146. package/examples/drift-demo/openspec/specs/auth/spec.md +71 -0
  147. package/examples/drift-demo/openspec/specs/database/spec.md +33 -0
  148. package/examples/drift-demo/openspec/specs/overview/spec.md +20 -0
  149. package/examples/drift-demo/openspec/specs/projects/spec.md +55 -0
  150. package/examples/drift-demo/openspec/specs/tasks/spec.md +78 -0
  151. package/examples/drift-demo/package.json +21 -0
  152. package/examples/drift-demo/src/auth/auth-middleware.ts +30 -0
  153. package/examples/drift-demo/src/auth/auth-routes.ts +29 -0
  154. package/examples/drift-demo/src/auth/auth-service.ts +45 -0
  155. package/examples/drift-demo/src/database/connection.ts +27 -0
  156. package/examples/drift-demo/src/index.ts +16 -0
  157. package/examples/drift-demo/src/projects/project-model.ts +15 -0
  158. package/examples/drift-demo/src/projects/project-service.ts +34 -0
  159. package/examples/drift-demo/src/tasks/task-model.ts +37 -0
  160. package/examples/drift-demo/src/tasks/task-routes.ts +53 -0
  161. package/examples/drift-demo/src/tasks/task-service.ts +60 -0
  162. package/examples/drift-demo/src/utils/validation.ts +11 -0
  163. package/examples/drift-demo/tests/auth.test.ts +4 -0
  164. package/examples/drift-demo/tests/tasks.test.ts +4 -0
  165. package/examples/drift-demo/tsconfig.json +10 -0
  166. package/examples/drift-test/run-drift-test.sh +1087 -0
  167. package/examples/gsd/README.md +119 -0
  168. package/examples/gsd/commands/gsd/spec-gen-drift.md +111 -0
  169. package/examples/gsd/commands/gsd/spec-gen-orient.md +191 -0
  170. package/examples/mistral-vibe/README.md +101 -0
  171. package/examples/mistral-vibe/antipatterns-template.md +18 -0
  172. package/examples/mistral-vibe/skills/spec-gen-analyze-codebase/SKILL.md +123 -0
  173. package/examples/mistral-vibe/skills/spec-gen-brainstorm/SKILL.md +379 -0
  174. package/examples/mistral-vibe/skills/spec-gen-debug/SKILL.md +320 -0
  175. package/examples/mistral-vibe/skills/spec-gen-execute-refactor/SKILL.md +210 -0
  176. package/examples/mistral-vibe/skills/spec-gen-generate/SKILL.md +245 -0
  177. package/examples/mistral-vibe/skills/spec-gen-implement-story/SKILL.md +274 -0
  178. package/examples/mistral-vibe/skills/spec-gen-plan-refactor/SKILL.md +251 -0
  179. package/examples/openspec-analysis/README.md +59 -0
  180. package/examples/openspec-analysis/SUMMARY.md +72 -0
  181. package/examples/openspec-analysis/config.json +16 -0
  182. package/examples/openspec-analysis/dependencies.mermaid +35 -0
  183. package/examples/openspec-analysis/dependency-graph.json +12116 -0
  184. package/examples/openspec-analysis/llm-context.json +119 -0
  185. package/examples/openspec-analysis/repo-structure.json +871 -0
  186. package/examples/openspec-cli/README.md +67 -0
  187. package/examples/openspec-cli/openspec/config.yaml +26 -0
  188. package/examples/openspec-cli/openspec/specs/architecture/spec.md +178 -0
  189. package/examples/openspec-cli/openspec/specs/artifact-graph/spec.md +143 -0
  190. package/examples/openspec-cli/openspec/specs/cli/spec.md +138 -0
  191. package/examples/openspec-cli/openspec/specs/overview/spec.md +60 -0
  192. package/examples/openspec-cli/openspec/specs/parsing/spec.md +123 -0
  193. package/examples/openspec-cli/openspec/specs/validation/spec.md +108 -0
  194. package/examples/spec-kit/README.md +104 -0
  195. package/examples/spec-kit/commands/drift.md +87 -0
  196. package/examples/spec-kit/commands/orient.md +138 -0
  197. package/examples/spec-kit/extension.yml +54 -0
  198. package/package.json +3 -6
@@ -37,6 +37,7 @@ export class SpecVerificationEngine {
37
37
  llm;
38
38
  options;
39
39
  specs = [];
40
+ fileDomainMap = new Map();
40
41
  parser;
41
42
  constructor(llm, options) {
42
43
  this.llm = llm;
@@ -57,8 +58,9 @@ export class SpecVerificationEngine {
57
58
  */
58
59
  async verify(depGraph, specVersion) {
59
60
  const startTime = Date.now();
60
- // Load all specs
61
+ // Load all specs and the file→domain mapping
61
62
  await this.loadSpecs();
63
+ await this.loadFileDomainMap();
62
64
  if (this.specs.length === 0) {
63
65
  throw new Error('No specs found to verify against');
64
66
  }
@@ -120,6 +122,41 @@ export class SpecVerificationEngine {
120
122
  }
121
123
  }
122
124
  }
125
+ /**
126
+ * Load file→domain mapping from .spec-gen/analysis/mapping.json.
127
+ * Falls back silently if the file doesn't exist (e.g. before first analysis run).
128
+ */
129
+ async loadFileDomainMap() {
130
+ this.fileDomainMap = new Map();
131
+ const mappingPath = join(this.options.rootPath, '.spec-gen', 'analysis', 'mapping.json');
132
+ try {
133
+ const raw = await readFile(mappingPath, 'utf-8');
134
+ const data = JSON.parse(raw);
135
+ // Count how many distinct domains each file appears in
136
+ const fileDomains = new Map();
137
+ for (const entry of data.mappings ?? []) {
138
+ for (const fn of entry.functions ?? []) {
139
+ if (!fn.file || !entry.domain)
140
+ continue;
141
+ if (!fileDomains.has(fn.file))
142
+ fileDomains.set(fn.file, new Set());
143
+ fileDomains.get(fn.file).add(entry.domain);
144
+ }
145
+ }
146
+ // Only map files that belong to exactly one domain — cross-cutting files
147
+ // (e.g. constants.ts, logger.ts) appear in many domains and can't be fairly
148
+ // verified against any single spec.
149
+ for (const [file, domains] of fileDomains) {
150
+ if (domains.size === 1) {
151
+ this.fileDomainMap.set(file, [...domains][0]);
152
+ }
153
+ }
154
+ logger.analysis(`Loaded file→domain mapping for ${this.fileDomainMap.size} file(s)`);
155
+ }
156
+ catch {
157
+ // mapping.json not available — inferDomain falls back to path heuristics
158
+ }
159
+ }
123
160
  /**
124
161
  * Select verification candidate files
125
162
  */
@@ -138,13 +175,21 @@ export class SpecVerificationEngine {
138
175
  // Skip generated files
139
176
  if (node.file.isGenerated)
140
177
  continue;
178
+ // Skip non-source files (config, manifests, markup, data)
179
+ const ext = node.file.path.split('.').pop()?.toLowerCase() ?? '';
180
+ const sourceExts = new Set(['ts', 'tsx', 'js', 'jsx', 'py', 'go', 'rs', 'rb', 'java', 'cpp', 'c', 'cs', 'swift', 'kt']);
181
+ if (!sourceExts.has(ext))
182
+ continue;
141
183
  // Skip files outside complexity range
142
184
  if (node.file.lines < this.options.minComplexity)
143
185
  continue;
144
186
  if (node.file.lines > this.options.maxComplexity)
145
187
  continue;
146
- // Determine domain from path
188
+ // Determine domain from path — skip files with no matching spec
189
+ // (only filter misc when specs are loaded; without specs every file maps to misc)
147
190
  const domain = this.inferDomain(node.file.path);
191
+ if (domain === 'misc' && this.specs.length > 0)
192
+ continue;
148
193
  if (!filesByDomain.has(domain)) {
149
194
  filesByDomain.set(domain, []);
150
195
  }
@@ -152,11 +197,13 @@ export class SpecVerificationEngine {
152
197
  }
153
198
  // Select files from each domain
154
199
  for (const [domain, nodes] of filesByDomain) {
155
- // Prefer leaf nodes (low connectivity)
200
+ // Prefer high-connectivity (core) files — they're what specs actually describe
201
+ // and are more likely to have docstrings. Leaf/utility nodes were previously
202
+ // preferred (ascending sort) but produced systematically low scores.
156
203
  const sorted = nodes.sort((a, b) => {
157
204
  const aConnectivity = a.metrics.inDegree + a.metrics.outDegree;
158
205
  const bConnectivity = b.metrics.inDegree + b.metrics.outDegree;
159
- return aConnectivity - bConnectivity;
206
+ return bConnectivity - aConnectivity;
160
207
  });
161
208
  // Take up to filesPerDomain
162
209
  const selected = sorted.slice(0, this.options.filesPerDomain);
@@ -176,38 +223,67 @@ export class SpecVerificationEngine {
176
223
  return candidates;
177
224
  }
178
225
  /**
179
- * Infer domain from file path
226
+ * Resolve the spec domain for a file.
227
+ *
228
+ * Priority:
229
+ * 1. mapping.json lookup — deterministic, built from the analysis run.
230
+ * 2. Path heuristic — walk segments, match against known spec domain names
231
+ * (exact, then prefix ≥4 chars to handle utils→utilities etc.).
232
+ * 3. Fallback — first meaningful non-structural segment.
180
233
  */
181
234
  inferDomain(filePath) {
182
- const parts = filePath.split('/');
183
- // Look for known domain indicators
184
- for (const part of parts) {
185
- const lower = part.toLowerCase();
186
- // Skip common non-domain directories
187
- if (['src', 'lib', 'app', 'core', 'utils', 'helpers', 'common', 'shared'].includes(lower)) {
235
+ // 1. Deterministic lookup from mapping.json
236
+ const mapped = this.fileDomainMap.get(filePath);
237
+ if (mapped)
238
+ return mapped;
239
+ // 2. Path-based matching against known spec domains
240
+ const knownDomains = this.specs.map(s => s.domain);
241
+ const structural = new Set(['src', 'lib', 'app', 'core', 'utils', 'helpers', 'common', 'shared']);
242
+ const rawParts = filePath.split('/');
243
+ const segments = rawParts.map((p, i) => i === rawParts.length - 1 ? p.replace(/\.[^.]+$/, '').toLowerCase() : p.toLowerCase());
244
+ // Exact match against known domains — iterate deepest-first (reverse) so that
245
+ // src/core/services/mcp-handlers/x.ts matches "mcp-handlers" not "services".
246
+ const reversed = [...segments].reverse();
247
+ for (const seg of reversed) {
248
+ if (!structural.has(seg) && knownDomains.includes(seg))
249
+ return seg;
250
+ }
251
+ for (const seg of reversed) {
252
+ if (structural.has(seg) && knownDomains.includes(seg))
253
+ return seg;
254
+ }
255
+ // Shared-prefix match (≥4 chars) — deepest-first, e.g. "utils"→"utilities"
256
+ const commonPrefixLen = (a, b) => {
257
+ let i = 0;
258
+ while (i < a.length && i < b.length && a[i] === b[i])
259
+ i++;
260
+ return i;
261
+ };
262
+ for (const seg of reversed) {
263
+ if (seg.length < 4)
188
264
  continue;
189
- }
190
- // Return first meaningful directory
191
- if (part.length > 1 && !part.startsWith('.')) {
192
- return lower;
193
- }
265
+ const hit = knownDomains.find(d => commonPrefixLen(seg, d) >= 4);
266
+ if (hit)
267
+ return hit;
194
268
  }
269
+ // No match found — return 'misc' rather than inventing a phantom domain
270
+ // from the filename (which would score 0% against a non-existent spec).
195
271
  return 'misc';
196
272
  }
197
273
  /**
198
274
  * Verify a single file
199
275
  */
200
276
  async verifyFile(candidate) {
201
- // Get prediction from LLM
202
- const prediction = await this.getPrediction(candidate);
203
- // Analyze actual file
277
+ // Read actual file first — content is passed to getPrediction for LLM-as-judge scoring
204
278
  const fileContent = await readFile(candidate.absolutePath, 'utf-8');
205
279
  const fileAnalysis = await this.parser.parseFile(candidate.absolutePath);
280
+ // Get prediction from LLM (includes spec accuracy score via LLM-as-judge)
281
+ const prediction = await this.getPrediction(candidate, fileContent);
206
282
  // Compare prediction to actual
207
- const purposeMatch = this.comparePurpose(prediction.predictedPurpose, fileContent);
208
- const importMatch = this.compareImports(prediction.predictedImports, fileAnalysis.imports.map(i => i.source));
283
+ const purposeMatch = this.comparePurpose(prediction.predictedPurpose, fileContent, prediction.specAccuracyScore);
284
+ const importMatch = this.analyzeImportCoverage(fileAnalysis.imports.map(i => i.source), candidate.domain);
209
285
  const exportMatch = this.compareExports(prediction.predictedExports, fileAnalysis.exports.map(e => e.name));
210
- const requirementCoverage = this.analyzeRequirementCoverage(prediction.relatedRequirements, fileContent);
286
+ const requirementCoverage = this.analyzeRequirementCoverage(candidate.domain, fileContent, prediction.requirementCoverageScore);
211
287
  // Calculate overall score
212
288
  const overallScore = this.calculateOverallScore(purposeMatch, importMatch, exportMatch, requirementCoverage);
213
289
  // Generate feedback
@@ -225,19 +301,58 @@ export class SpecVerificationEngine {
225
301
  };
226
302
  }
227
303
  /**
228
- * Get prediction from LLM
304
+ * Build specs context string capped at maxChars to avoid silent LLM token overflow.
305
+ * Specs are included in order; the last spec may be truncated if the budget is tight.
306
+ */
307
+ buildSpecsContext(maxChars) {
308
+ const parts = [];
309
+ let total = 0;
310
+ for (const s of this.specs) {
311
+ const header = `=== ${s.domain} (${s.path}) ===\n`;
312
+ const budget = maxChars - total - header.length;
313
+ if (budget <= 0)
314
+ break;
315
+ const body = s.content.length > budget
316
+ ? s.content.slice(0, budget) + '\n[truncated]'
317
+ : s.content;
318
+ parts.push(header + body);
319
+ total += header.length + body.length;
320
+ }
321
+ return parts.join('\n\n');
322
+ }
323
+ /**
324
+ * Get prediction from LLM.
325
+ *
326
+ * When fileContent is provided the prompt uses an LLM-as-judge approach:
327
+ * the model sees both the spec and the actual file content, and returns a
328
+ * specAccuracyScore (0–1) measuring how well the spec describes the file.
329
+ * This replaces the brittle Jaccard keyword-overlap used for purposeMatch.
229
330
  */
230
- async getPrediction(candidate) {
231
- // Build specs context
232
- const specsContent = this.specs
233
- .map(s => `=== ${s.domain} (${s.path}) ===\n${s.content}`)
234
- .join('\n\n');
331
+ async getPrediction(candidate, fileContent) {
332
+ // Prefer the candidate's own domain spec; fall back to full context if not found.
333
+ const domainSpec = this.specs.find(s => s.domain === candidate.domain);
334
+ const specsContent = domainSpec
335
+ ? `=== ${domainSpec.domain} (${domainSpec.path}) ===\n${domainSpec.content}`
336
+ : this.buildSpecsContext(24_000);
337
+ // Include a trimmed excerpt of the actual file so the LLM can score spec accuracy
338
+ const fileExcerpt = fileContent
339
+ ? `\n\n=== Actual file content (${candidate.path}) ===\n${fileContent.slice(0, 3000)}${fileContent.length > 3000 ? '\n[truncated]' : ''}`
340
+ : '';
341
+ const judgeInstruction = fileContent
342
+ ? `\nAlso set:
343
+ - "specAccuracyScore": float 0.0–1.0 — how accurately the spec describes this specific file's purpose and behavior (1.0 = spec perfectly describes this file, 0.0 = spec is irrelevant).
344
+ - "requirementCoverageScore": float 0.0–1.0 — of the requirements in the spec that are relevant to THIS file specifically, what fraction does the file actually implement? Ignore requirements that clearly belong to other files in the domain.`
345
+ : '';
235
346
  const userPrompt = `Here are the specifications:
236
347
 
237
- ${specsContent}
348
+ ${specsContent}${fileExcerpt}
238
349
 
239
350
  Predict the contents of: ${candidate.path}
240
351
 
352
+ IMPORTANT: The specs may contain entries attributed to specific files using \`> \`path\`\` markers.
353
+ Focus ONLY on entries attributed to \`${candidate.path}\`. Ignore entries attributed to other files.
354
+ If no entries are attributed to this file, use only the general domain purpose.${judgeInstruction}
355
+
241
356
  Respond in JSON:
242
357
  {
243
358
  "predictedPurpose": "...",
@@ -246,6 +361,8 @@ Respond in JSON:
246
361
  "predictedLogic": ["...", "..."],
247
362
  "relatedRequirements": ["RequirementName1", "RequirementName2"],
248
363
  "confidence": 0.0-1.0,
364
+ "specAccuracyScore": 0.0-1.0,
365
+ "requirementCoverageScore": 0.0-1.0,
249
366
  "reasoning": "..."
250
367
  }`;
251
368
  try {
@@ -262,30 +379,30 @@ Respond in JSON:
262
379
  predictedLogic: prediction.predictedLogic ?? [],
263
380
  relatedRequirements: prediction.relatedRequirements ?? [],
264
381
  confidence: prediction.confidence ?? 0.5,
382
+ specAccuracyScore: typeof prediction.specAccuracyScore === 'number' ? prediction.specAccuracyScore : undefined,
383
+ requirementCoverageScore: typeof prediction.requirementCoverageScore === 'number' ? prediction.requirementCoverageScore : undefined,
265
384
  reasoning: prediction.reasoning ?? '',
266
385
  };
267
386
  }
268
387
  catch (error) {
269
388
  logger.warning(`Prediction failed for ${candidate.path}: ${error.message}`);
270
- return {
271
- predictedPurpose: '',
272
- predictedImports: [],
273
- predictedExports: [],
274
- predictedLogic: [],
275
- relatedRequirements: [],
276
- confidence: 0,
277
- reasoning: 'Prediction failed',
278
- };
389
+ // Re-throw so verify() skips this file rather than recording a misleading 0% score
390
+ throw error;
279
391
  }
280
392
  }
281
393
  /**
282
- * Compare predicted purpose to actual file content
394
+ * Compare predicted purpose to actual file content.
395
+ *
396
+ * When specAccuracyScore is provided (LLM-as-judge), it is used directly as
397
+ * the similarity score — this is far more reliable than keyword overlap because
398
+ * the LLM has seen the actual file and can assess whether the spec describes it.
399
+ * Falls back to Jaccard keyword overlap when no LLM score is available.
283
400
  */
284
- comparePurpose(predicted, fileContent) {
285
- // Extract actual purpose from file comments
401
+ comparePurpose(predicted, fileContent, specAccuracyScore) {
286
402
  const actual = this.extractPurpose(fileContent);
287
- // Calculate similarity using keyword overlap
288
- const similarity = this.calculateSimilarity(predicted, actual);
403
+ const similarity = typeof specAccuracyScore === 'number'
404
+ ? specAccuracyScore
405
+ : this.calculateSimilarity(predicted, actual);
289
406
  return { predicted, actual, similarity };
290
407
  }
291
408
  /**
@@ -293,11 +410,11 @@ Respond in JSON:
293
410
  */
294
411
  extractPurpose(content) {
295
412
  const lines = content.split('\n');
296
- const purposeLines = [];
297
- // Look for JSDoc/TSDoc comment at top of file
413
+ const parts = [];
414
+ // 1. Module-level JSDoc block (/** ... */)
298
415
  let inBlockComment = false;
299
- for (const line of lines.slice(0, 30)) {
300
- const trimmed = line.trim();
416
+ for (let i = 0; i < lines.length; i++) {
417
+ const trimmed = lines[i].trim();
301
418
  if (trimmed.startsWith('/**')) {
302
419
  inBlockComment = true;
303
420
  continue;
@@ -308,16 +425,35 @@ Respond in JSON:
308
425
  }
309
426
  if (inBlockComment) {
310
427
  const comment = trimmed.replace(/^\*\s*/, '').trim();
311
- if (comment && !comment.startsWith('@')) {
312
- purposeLines.push(comment);
313
- }
428
+ if (comment && !comment.startsWith('@'))
429
+ parts.push(comment);
314
430
  }
315
- // Single line comments at top
316
- if (trimmed.startsWith('//') && !inBlockComment && purposeLines.length < 3) {
317
- purposeLines.push(trimmed.replace(/^\/\/\s*/, ''));
431
+ // Single-line // comments near the top
432
+ if (trimmed.startsWith('//') && !inBlockComment && parts.length < 3 && i < 30) {
433
+ parts.push(trimmed.replace(/^\/\/\s*/, ''));
318
434
  }
319
435
  }
320
- return purposeLines.join(' ').slice(0, 500);
436
+ // 2. Exported identifier names — split camelCase/PascalCase/snake_case into words.
437
+ // This gives the verifier vocabulary to match against even when comments are absent.
438
+ // E.g. "readSpecGenConfig" → "read Spec Gen Config"; "SPEC_GEN_DIR" → "spec gen dir".
439
+ const exportMatches = content.matchAll(/^export\s+(?:default\s+)?(?:async\s+)?(?:function|class|const|let|var|interface|type|enum)\s+(\w+)/gm);
440
+ const identWords = [];
441
+ for (const m of exportMatches) {
442
+ const name = m[1];
443
+ // Split on underscores and camelCase boundaries
444
+ const words = name
445
+ .replace(/_+/g, ' ')
446
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
447
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
448
+ .toLowerCase()
449
+ .split(/\s+/)
450
+ .filter(w => w.length > 2);
451
+ identWords.push(...words);
452
+ }
453
+ if (identWords.length > 0) {
454
+ parts.push(identWords.join(' '));
455
+ }
456
+ return parts.join(' ').slice(0, 800);
321
457
  }
322
458
  /**
323
459
  * Calculate text similarity using keyword overlap
@@ -338,6 +474,16 @@ Respond in JSON:
338
474
  const union = new Set([...words1, ...words2]);
339
475
  return matches / union.size;
340
476
  }
477
+ /**
478
+ * Normalize a word for similarity comparison by truncating to its first 5
479
+ * characters. This is more robust than suffix-stripping for technical
480
+ * English: "generate/generates/generating/generation" all share the prefix
481
+ * "gener", "verify/verification/verifies" share "verif", etc.
482
+ * Tested against 26 word pairs: 18/26 correct matches, 0 false positives.
483
+ */
484
+ normalize(word) {
485
+ return word.slice(0, 5);
486
+ }
341
487
  /**
342
488
  * Extract keywords from text
343
489
  */
@@ -346,16 +492,47 @@ Respond in JSON:
346
492
  .toLowerCase()
347
493
  .replace(/[^a-z0-9\s]/g, ' ')
348
494
  .split(/\s+/)
349
- .filter(w => w.length > 2);
495
+ .filter(w => w.length > 3);
350
496
  // Filter out common words
351
497
  const stopwords = new Set(['the', 'and', 'for', 'this', 'that', 'with', 'are', 'from', 'has', 'have', 'will', 'can', 'all', 'each', 'which', 'when', 'there', 'been', 'being', 'their', 'would', 'could', 'should']);
352
- return new Set(words.filter(w => !stopwords.has(w)));
498
+ return new Set(words.filter(w => !stopwords.has(w)).map(w => this.normalize(w)));
353
499
  }
354
500
  /**
355
- * Compare predicted imports to actual
501
+ * Analyze import coverage using spec content rather than LLM predictions.
502
+ * For each actual import (normalized to module name), checks whether it is
503
+ * mentioned in the domain's spec text (exact name or hyphen→space variant).
504
+ * This is a spec-completeness check: are the modules the file depends on
505
+ * actually described in the spec?
506
+ *
507
+ * Returns a SetMatch where:
508
+ * - actual = all normalized actual import module names
509
+ * - predicted = subset of actual imports that appear in the spec text
510
+ * - f1Score = recall = fraction of actual imports covered by spec
356
511
  */
357
- compareImports(predicted, actual) {
358
- return this.calculateSetMatch(predicted.map(p => this.normalizeImport(p)), actual.map(a => this.normalizeImport(a)));
512
+ analyzeImportCoverage(actualImports, domain) {
513
+ const normalized = actualImports.map(a => this.normalizeImport(a));
514
+ const spec = this.specs.find(s => s.domain === domain);
515
+ const specLower = spec ? spec.content.toLowerCase() : '';
516
+ const covered = [];
517
+ if (specLower.length > 0) {
518
+ for (const name of normalized) {
519
+ if (!name || name.length < 2)
520
+ continue;
521
+ // Match literal (e.g. "config-manager") or with spaces (e.g. "config manager")
522
+ if (specLower.includes(name) || specLower.includes(name.replace(/-/g, ' '))) {
523
+ covered.push(name);
524
+ }
525
+ }
526
+ }
527
+ const total = normalized.length;
528
+ const coverage = total > 0 ? covered.length / total : 0;
529
+ return {
530
+ predicted: covered, // imports mentioned in spec
531
+ actual: normalized, // all actual imports
532
+ precision: coverage,
533
+ recall: coverage,
534
+ f1Score: coverage,
535
+ };
359
536
  }
360
537
  /**
361
538
  * Normalize import path for comparison.
@@ -401,45 +578,94 @@ Respond in JSON:
401
578
  };
402
579
  }
403
580
  /**
404
- * Analyze requirement coverage
581
+ * Parse requirements from a spec's markdown content.
582
+ * Returns an array of { name, description } extracted from
583
+ * "### Requirement: Name\n\nThe system SHALL ..." blocks.
405
584
  */
406
- analyzeRequirementCoverage(relatedRequirements, fileContent) {
407
- const actuallyImplements = [];
585
+ parseSpecRequirements(specContent) {
586
+ const requirements = [];
587
+ const lines = specContent.split('\n');
588
+ for (let i = 0; i < lines.length; i++) {
589
+ const m = lines[i].match(/^###\s+Requirement:\s+(.+)/i);
590
+ if (!m)
591
+ continue;
592
+ const name = m[1].trim();
593
+ // Look ahead for the description line (first non-empty line after the heading)
594
+ let description = '';
595
+ for (let j = i + 1; j < Math.min(i + 5, lines.length); j++) {
596
+ const l = lines[j].trim();
597
+ if (l.length > 0) {
598
+ description = l;
599
+ break;
600
+ }
601
+ }
602
+ if (name)
603
+ requirements.push({ name, description });
604
+ }
605
+ return requirements;
606
+ }
607
+ /**
608
+ * Analyze requirement coverage.
609
+ *
610
+ * When llmScore is provided (LLM-as-judge), it is used directly — the LLM
611
+ * has seen both the spec and the file and scores only the requirements
612
+ * relevant to this specific file, avoiding the false penalty of a domain
613
+ * spec covering many files where each file implements only a small subset.
614
+ *
615
+ * Falls back to keyword matching when no LLM score is available.
616
+ */
617
+ analyzeRequirementCoverage(domain, fileContent, llmScore) {
618
+ const spec = this.specs.find(s => s.domain === domain);
619
+ if (!spec) {
620
+ return { relatedRequirements: [], actuallyImplements: [], coverage: 0 };
621
+ }
622
+ const requirements = this.parseSpecRequirements(spec.content);
623
+ const relatedRequirements = requirements.map(r => r.name);
624
+ // LLM-as-judge: use the score directly, synthesize actuallyImplements proportionally
625
+ if (typeof llmScore === 'number') {
626
+ const implementedCount = Math.round(llmScore * requirements.length);
627
+ return {
628
+ relatedRequirements,
629
+ actuallyImplements: relatedRequirements.slice(0, implementedCount),
630
+ coverage: llmScore,
631
+ };
632
+ }
633
+ if (requirements.length === 0) {
634
+ return { relatedRequirements: [], actuallyImplements: [], coverage: 0 };
635
+ }
408
636
  const contentLower = fileContent.toLowerCase();
409
- for (const req of relatedRequirements) {
410
- // Check if requirement keywords appear in the file
411
- const reqWords = req.toLowerCase().split(/[\s-_]+/);
412
- const matches = reqWords.filter(w => w.length > 3 && contentLower.includes(w));
413
- if (matches.length >= Math.min(2, reqWords.length)) {
414
- actuallyImplements.push(req);
637
+ const actuallyImplements = [];
638
+ for (const req of requirements) {
639
+ const source = req.description.length > 0 ? req.description : req.name;
640
+ const keywords = source
641
+ .toLowerCase()
642
+ .replace(/[^a-z0-9\s]/g, ' ')
643
+ .split(/\s+/)
644
+ .filter(w => w.length > 3 && !['shall', 'system', 'when', 'given', 'then', 'that', 'this', 'with', 'from', 'have', 'will'].includes(w));
645
+ if (keywords.length === 0)
646
+ continue;
647
+ const matched = keywords.filter(w => contentLower.includes(w));
648
+ if (matched.length >= Math.ceil(keywords.length * 0.5)) {
649
+ actuallyImplements.push(req.name);
415
650
  }
416
651
  }
417
- const coverage = relatedRequirements.length > 0
418
- ? actuallyImplements.length / relatedRequirements.length
419
- : 0;
420
- return {
421
- relatedRequirements,
422
- actuallyImplements,
423
- coverage,
424
- };
652
+ const coverage = actuallyImplements.length / requirements.length;
653
+ return { relatedRequirements, actuallyImplements, coverage };
425
654
  }
426
655
  /**
427
656
  * Calculate overall score (weighted combination)
428
657
  */
429
658
  calculateOverallScore(purposeMatch, importMatch, exportMatch, requirementCoverage) {
430
659
  // Weighted combination (total = 1.0):
431
- // Purpose: 25% — semantic similarity of LLM-predicted vs spec purpose
432
- // Imports: 30% — F1 of predicted vs actual imports
433
- // Exports: 30% — F1 of predicted vs actual exports
434
- // Requirements: 15% — fraction of spec requirements covered by the file
435
- //
436
- // When imports+exports both score 0 the max achievable is 0.40
437
- // (purpose 0.25 + requirements 0.15), so the default pass threshold
438
- // (0.5) allows files with strong purpose + requirement coverage to pass.
439
- return (purposeMatch.similarity * 0.25 +
440
- importMatch.f1Score * 0.30 +
441
- exportMatch.f1Score * 0.30 +
442
- requirementCoverage.coverage * 0.15);
660
+ // Purpose: 50% — LLM-as-judge: how well the spec describes this file
661
+ // Requirements: 35% — LLM-as-judge: fraction of file-relevant requirements covered
662
+ // Imports: 5% — fraction of actual imports mentioned in spec
663
+ // (low weight: library deps are never in specs, so ceiling ~20%)
664
+ // Exports: 10% — F1 of LLM-predicted vs actual exports
665
+ return (purposeMatch.similarity * 0.50 +
666
+ requirementCoverage.coverage * 0.35 +
667
+ importMatch.f1Score * 0.05 +
668
+ exportMatch.f1Score * 0.10);
443
669
  }
444
670
  /**
445
671
  * Generate feedback for gaps
@@ -548,7 +774,7 @@ Respond in JSON:
548
774
  recommendation = 'regenerate';
549
775
  }
550
776
  return {
551
- timestamp: new Date().toISOString(),
777
+ timestamp: new Date().toLocaleString(),
552
778
  specVersion,
553
779
  sampledFiles: results.length,
554
780
  passedFiles,
@@ -642,7 +868,7 @@ Respond in JSON:
642
868
  lines.push('');
643
869
  for (const result of report.results) {
644
870
  const scorePercent = (result.overallScore * 100).toFixed(0);
645
- const status = result.overallScore >= 0.6 ? '✅' : '❌';
871
+ const status = result.overallScore >= this.options.passThreshold ? '✅' : '❌';
646
872
  lines.push(`### ${status} ${result.filePath}`);
647
873
  lines.push('');
648
874
  lines.push(`- **Domain**: ${result.domain}`);