@girardmedia/bootspring 2.0.37 → 2.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/analyze.js +312 -0
- package/cli/generate.js +182 -1
- package/cli/visualize.js +171 -1
- package/core/coherence/index.js +15 -1
- package/core/coherence/semantic-analyzer.js +836 -0
- package/generators/visual-doc-generator.js +910 -0
- package/intelligence/index.js +14 -1
- package/intelligence/model-context-optimizer.js +704 -0
- package/mcp/contracts/mcp-contract.v1.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,836 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Document Analyzer
|
|
3
|
+
*
|
|
4
|
+
* AI-powered document understanding for extracting concepts,
|
|
5
|
+
* analyzing terminology, finding contradictions, measuring
|
|
6
|
+
* specificity, detecting gaps, and validating cross-references.
|
|
7
|
+
*
|
|
8
|
+
* @package bootspring
|
|
9
|
+
* @module core/coherence/semantic-analyzer
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs').promises;
|
|
13
|
+
const path = require('path');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Common technical terms to track
|
|
17
|
+
*/
|
|
18
|
+
const TECHNICAL_TERMS = [
|
|
19
|
+
'api', 'database', 'authentication', 'authorization', 'cache',
|
|
20
|
+
'component', 'service', 'model', 'controller', 'middleware',
|
|
21
|
+
'endpoint', 'route', 'schema', 'migration', 'deployment',
|
|
22
|
+
'testing', 'integration', 'unit test', 'e2e', 'ci/cd',
|
|
23
|
+
'microservice', 'monolith', 'serverless', 'container'
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Vague language patterns
|
|
28
|
+
*/
|
|
29
|
+
const VAGUE_PATTERNS = [
|
|
30
|
+
{ pattern: /\b(various|several|some|many|few)\s+\w+s?\b/gi, type: 'quantity' },
|
|
31
|
+
{ pattern: /\b(soon|later|eventually|sometime)\b/gi, type: 'timeline' },
|
|
32
|
+
{ pattern: /\b(might|could|may|possibly|perhaps)\b/gi, type: 'uncertainty' },
|
|
33
|
+
{ pattern: /\b(good|better|best|nice|great)\b/gi, type: 'subjective' },
|
|
34
|
+
{ pattern: /\b(etc|and so on|and more|things like)\b/gi, type: 'incomplete' }
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Concrete language patterns
|
|
39
|
+
*/
|
|
40
|
+
const CONCRETE_PATTERNS = [
|
|
41
|
+
{ pattern: /\d+\s*(ms|seconds?|minutes?|hours?|days?|weeks?|months?)/gi, type: 'duration' },
|
|
42
|
+
{ pattern: /\d+(\.\d+)?%/g, type: 'percentage' },
|
|
43
|
+
{ pattern: /\$[\d,]+(\.\d{2})?/g, type: 'currency' },
|
|
44
|
+
{ pattern: /\b\d{4}-\d{2}-\d{2}\b/g, type: 'date' },
|
|
45
|
+
{ pattern: /v?\d+\.\d+(\.\d+)?/g, type: 'version' },
|
|
46
|
+
{ pattern: /\b(must|shall|will|requires?)\b/gi, type: 'requirement' }
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* SemanticDocumentAnalyzer class
|
|
51
|
+
*/
|
|
52
|
+
class SemanticDocumentAnalyzer {
|
|
53
|
+
/**
|
|
54
|
+
* @param {Object} options - Configuration options
|
|
55
|
+
*/
|
|
56
|
+
constructor(options = {}) {
|
|
57
|
+
this.projectRoot = options.projectRoot || process.cwd();
|
|
58
|
+
this.planningDir = options.planningDir || path.join(this.projectRoot, 'planning');
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Analyze all documents
|
|
63
|
+
* @param {Object} documents - Map of document name to content
|
|
64
|
+
*/
|
|
65
|
+
async analyze(documents) {
|
|
66
|
+
const analysis = {
|
|
67
|
+
concepts: await this.extractConcepts(documents),
|
|
68
|
+
terminology: await this.analyzeTerminology(documents),
|
|
69
|
+
contradictions: await this.findContradictions(documents),
|
|
70
|
+
specificity: await this.measureSpecificity(documents),
|
|
71
|
+
gaps: await this.detectGaps(documents),
|
|
72
|
+
crossReferences: await this.validateReferences(documents),
|
|
73
|
+
summary: null
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
// Generate summary
|
|
77
|
+
analysis.summary = this.generateSummary(analysis);
|
|
78
|
+
|
|
79
|
+
return analysis;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Load documents from planning directory
|
|
84
|
+
*/
|
|
85
|
+
async loadDocuments() {
|
|
86
|
+
const documents = {};
|
|
87
|
+
|
|
88
|
+
try {
|
|
89
|
+
const files = await fs.readdir(this.planningDir);
|
|
90
|
+
const mdFiles = files.filter(f => f.endsWith('.md'));
|
|
91
|
+
|
|
92
|
+
for (const file of mdFiles) {
|
|
93
|
+
const filePath = path.join(this.planningDir, file);
|
|
94
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
95
|
+
const docName = file.replace('.md', '').toLowerCase();
|
|
96
|
+
documents[docName] = content;
|
|
97
|
+
}
|
|
98
|
+
} catch {
|
|
99
|
+
// Planning directory doesn't exist
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return documents;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Extract core concepts from documents
|
|
107
|
+
* @param {Object} documents - Map of document name to content
|
|
108
|
+
*/
|
|
109
|
+
async extractConcepts(documents) {
|
|
110
|
+
const concepts = {};
|
|
111
|
+
const allContent = Object.values(documents).join('\n');
|
|
112
|
+
|
|
113
|
+
// Extract technical terms
|
|
114
|
+
for (const term of TECHNICAL_TERMS) {
|
|
115
|
+
const regex = new RegExp(`\\b${term}\\b`, 'gi');
|
|
116
|
+
const matches = allContent.match(regex) || [];
|
|
117
|
+
|
|
118
|
+
if (matches.length > 0) {
|
|
119
|
+
concepts[term] = {
|
|
120
|
+
frequency: matches.length,
|
|
121
|
+
documents: this.findDocumentsContaining(documents, term)
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Extract capitalized terms (likely domain concepts)
|
|
127
|
+
const capitalizedRegex = /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/g;
|
|
128
|
+
const capitalizedMatches = allContent.match(capitalizedRegex) || [];
|
|
129
|
+
|
|
130
|
+
const domainConcepts = {};
|
|
131
|
+
for (const match of capitalizedMatches) {
|
|
132
|
+
// Filter out common words
|
|
133
|
+
if (this.isLikelyDomainConcept(match)) {
|
|
134
|
+
domainConcepts[match] = (domainConcepts[match] || 0) + 1;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Get top domain concepts
|
|
139
|
+
const topDomainConcepts = Object.entries(domainConcepts)
|
|
140
|
+
.filter(([, count]) => count >= 2)
|
|
141
|
+
.sort((a, b) => b[1] - a[1])
|
|
142
|
+
.slice(0, 20)
|
|
143
|
+
.map(([term, count]) => ({
|
|
144
|
+
term,
|
|
145
|
+
frequency: count,
|
|
146
|
+
documents: this.findDocumentsContaining(documents, term)
|
|
147
|
+
}));
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
technical: concepts,
|
|
151
|
+
domain: topDomainConcepts,
|
|
152
|
+
totalUniqueConcepts: Object.keys(concepts).length + topDomainConcepts.length
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Check if term is likely a domain concept
|
|
158
|
+
* @param {string} term - Term to check
|
|
159
|
+
*/
|
|
160
|
+
isLikelyDomainConcept(term) {
|
|
161
|
+
const commonWords = new Set([
|
|
162
|
+
'The', 'This', 'That', 'These', 'Those', 'When', 'Where', 'What',
|
|
163
|
+
'How', 'Why', 'Which', 'Who', 'For', 'With', 'From', 'Into',
|
|
164
|
+
'About', 'After', 'Before', 'Between', 'During', 'Without',
|
|
165
|
+
'Through', 'Against', 'Within', 'Among', 'However', 'Therefore',
|
|
166
|
+
'Furthermore', 'Additionally', 'Moreover', 'Finally', 'First',
|
|
167
|
+
'Second', 'Third', 'Next', 'Last', 'Also', 'Only', 'Just',
|
|
168
|
+
'Now', 'Then', 'Here', 'There', 'Each', 'Every', 'Some',
|
|
169
|
+
'Any', 'Most', 'Many', 'Few', 'All', 'Both', 'Either', 'Neither'
|
|
170
|
+
]);
|
|
171
|
+
|
|
172
|
+
return !commonWords.has(term) && term.length > 2;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Find documents containing a term
|
|
177
|
+
* @param {Object} documents - Map of document name to content
|
|
178
|
+
* @param {string} term - Term to search for
|
|
179
|
+
*/
|
|
180
|
+
findDocumentsContaining(documents, term) {
|
|
181
|
+
const result = [];
|
|
182
|
+
const regex = new RegExp(`\\b${term}\\b`, 'gi');
|
|
183
|
+
|
|
184
|
+
for (const [docName, content] of Object.entries(documents)) {
|
|
185
|
+
if (regex.test(content)) {
|
|
186
|
+
result.push(docName);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return result;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Analyze terminology consistency
|
|
195
|
+
* @param {Object} documents - Map of document name to content
|
|
196
|
+
*/
|
|
197
|
+
async analyzeTerminology(documents) {
|
|
198
|
+
const terms = {};
|
|
199
|
+
const inconsistencies = [];
|
|
200
|
+
|
|
201
|
+
// Extract term definitions and usages
|
|
202
|
+
for (const [docName, content] of Object.entries(documents)) {
|
|
203
|
+
const extracted = this.extractTermDefinitions(content);
|
|
204
|
+
|
|
205
|
+
for (const term of extracted) {
|
|
206
|
+
if (!terms[term.name]) {
|
|
207
|
+
terms[term.name] = { usages: [] };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
terms[term.name].usages.push({
|
|
211
|
+
document: docName,
|
|
212
|
+
context: term.context,
|
|
213
|
+
definition: term.definition
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Find inconsistencies
|
|
219
|
+
for (const [termName, termData] of Object.entries(terms)) {
|
|
220
|
+
if (termData.usages.length > 1) {
|
|
221
|
+
const definitions = termData.usages
|
|
222
|
+
.map(u => u.definition)
|
|
223
|
+
.filter(Boolean);
|
|
224
|
+
|
|
225
|
+
if (definitions.length > 1) {
|
|
226
|
+
// Check if definitions are similar
|
|
227
|
+
const unique = [...new Set(definitions.map(d => d.toLowerCase().trim()))];
|
|
228
|
+
if (unique.length > 1) {
|
|
229
|
+
inconsistencies.push({
|
|
230
|
+
term: termName,
|
|
231
|
+
issue: 'Inconsistent definitions across documents',
|
|
232
|
+
usages: termData.usages.map(u => ({
|
|
233
|
+
document: u.document,
|
|
234
|
+
definition: u.definition
|
|
235
|
+
}))
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
terms: Object.keys(terms).length,
|
|
244
|
+
documented: Object.values(terms).filter(t =>
|
|
245
|
+
t.usages.some(u => u.definition)
|
|
246
|
+
).length,
|
|
247
|
+
inconsistencies
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Extract term definitions from content
|
|
253
|
+
* @param {string} content - Document content
|
|
254
|
+
*/
|
|
255
|
+
extractTermDefinitions(content) {
|
|
256
|
+
const terms = [];
|
|
257
|
+
|
|
258
|
+
// Pattern: "Term: definition" or "**Term**: definition"
|
|
259
|
+
const definitionPattern = /(?:\*\*)?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:\*\*)?:\s*([^.\n]+\.?)/g;
|
|
260
|
+
let match;
|
|
261
|
+
|
|
262
|
+
while ((match = definitionPattern.exec(content)) !== null) {
|
|
263
|
+
terms.push({
|
|
264
|
+
name: match[1].trim(),
|
|
265
|
+
definition: match[2].trim(),
|
|
266
|
+
context: this.getContext(content, match.index)
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Pattern: "Term is/are definition"
|
|
271
|
+
const isPattern = /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|are)\s+([^.\n]+\.)/g;
|
|
272
|
+
|
|
273
|
+
while ((match = isPattern.exec(content)) !== null) {
|
|
274
|
+
if (!terms.find(t => t.name === match[1].trim())) {
|
|
275
|
+
terms.push({
|
|
276
|
+
name: match[1].trim(),
|
|
277
|
+
definition: match[2].trim(),
|
|
278
|
+
context: this.getContext(content, match.index)
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return terms;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Get surrounding context for a match
|
|
288
|
+
* @param {string} content - Full content
|
|
289
|
+
* @param {number} index - Match index
|
|
290
|
+
*/
|
|
291
|
+
getContext(content, index) {
|
|
292
|
+
const start = Math.max(0, index - 50);
|
|
293
|
+
const end = Math.min(content.length, index + 100);
|
|
294
|
+
return content.slice(start, end).replace(/\n/g, ' ').trim();
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Find contradictions between documents
|
|
299
|
+
* @param {Object} documents - Map of document name to content
|
|
300
|
+
*/
|
|
301
|
+
async findContradictions(documents) {
|
|
302
|
+
const contradictions = [];
|
|
303
|
+
|
|
304
|
+
// Check for conflicting statements
|
|
305
|
+
const statements = this.extractStatements(documents);
|
|
306
|
+
|
|
307
|
+
// Group by topic
|
|
308
|
+
const topicGroups = {};
|
|
309
|
+
for (const statement of statements) {
|
|
310
|
+
const topic = this.extractTopic(statement.text);
|
|
311
|
+
if (topic) {
|
|
312
|
+
if (!topicGroups[topic]) {
|
|
313
|
+
topicGroups[topic] = [];
|
|
314
|
+
}
|
|
315
|
+
topicGroups[topic].push(statement);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Find conflicts within topic groups
|
|
320
|
+
for (const [topic, stmts] of Object.entries(topicGroups)) {
|
|
321
|
+
if (stmts.length > 1) {
|
|
322
|
+
const conflicts = this.findConflicts(stmts);
|
|
323
|
+
contradictions.push(...conflicts);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Check for timeline contradictions
|
|
328
|
+
const timelineConflicts = this.findTimelineConflicts(documents);
|
|
329
|
+
contradictions.push(...timelineConflicts);
|
|
330
|
+
|
|
331
|
+
return contradictions;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Extract factual statements from documents
|
|
336
|
+
* @param {Object} documents - Map of document name to content
|
|
337
|
+
*/
|
|
338
|
+
extractStatements(documents) {
|
|
339
|
+
const statements = [];
|
|
340
|
+
|
|
341
|
+
for (const [docName, content] of Object.entries(documents)) {
|
|
342
|
+
// Extract sentences with factual indicators
|
|
343
|
+
const sentences = content.split(/[.!?]+/).filter(s => s.trim());
|
|
344
|
+
|
|
345
|
+
for (const sentence of sentences) {
|
|
346
|
+
const trimmed = sentence.trim();
|
|
347
|
+
if (this.isFactualStatement(trimmed)) {
|
|
348
|
+
statements.push({
|
|
349
|
+
text: trimmed,
|
|
350
|
+
document: docName
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return statements;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Check if sentence is a factual statement
|
|
361
|
+
* @param {string} sentence - Sentence to check
|
|
362
|
+
*/
|
|
363
|
+
isFactualStatement(sentence) {
|
|
364
|
+
const factualIndicators = /\b(is|are|will|must|shall|requires?|uses?|supports?|provides?|enables?)\b/i;
|
|
365
|
+
const minLength = 20;
|
|
366
|
+
|
|
367
|
+
return sentence.length >= minLength && factualIndicators.test(sentence);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Extract main topic from statement
|
|
372
|
+
* @param {string} text - Statement text
|
|
373
|
+
*/
|
|
374
|
+
extractTopic(text) {
|
|
375
|
+
// Extract subject (first noun phrase)
|
|
376
|
+
const match = text.match(/^(?:The\s+)?([A-Za-z]+(?:\s+[A-Za-z]+)?)/i);
|
|
377
|
+
return match ? match[1].toLowerCase() : null;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Find conflicts between statements
|
|
382
|
+
* @param {Array} statements - Statements on same topic
|
|
383
|
+
*/
|
|
384
|
+
findConflicts(statements) {
|
|
385
|
+
const conflicts = [];
|
|
386
|
+
|
|
387
|
+
// Compare each pair
|
|
388
|
+
for (let i = 0; i < statements.length; i++) {
|
|
389
|
+
for (let j = i + 1; j < statements.length; j++) {
|
|
390
|
+
const conflict = this.detectConflict(statements[i], statements[j]);
|
|
391
|
+
if (conflict) {
|
|
392
|
+
conflicts.push(conflict);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return conflicts;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Detect if two statements conflict
|
|
402
|
+
* @param {Object} stmt1 - First statement
|
|
403
|
+
* @param {Object} stmt2 - Second statement
|
|
404
|
+
*/
|
|
405
|
+
detectConflict(stmt1, stmt2) {
|
|
406
|
+
const text1 = stmt1.text.toLowerCase();
|
|
407
|
+
const text2 = stmt2.text.toLowerCase();
|
|
408
|
+
|
|
409
|
+
// Check for negation conflicts
|
|
410
|
+
const negations = ['not', "n't", 'never', 'no', 'none', 'without'];
|
|
411
|
+
const hasNegation1 = negations.some(n => text1.includes(n));
|
|
412
|
+
const hasNegation2 = negations.some(n => text2.includes(n));
|
|
413
|
+
|
|
414
|
+
// If one has negation and they share key terms, possible conflict
|
|
415
|
+
if (hasNegation1 !== hasNegation2) {
|
|
416
|
+
const words1 = new Set(text1.match(/\b\w{4,}\b/g) || []);
|
|
417
|
+
const words2 = new Set(text2.match(/\b\w{4,}\b/g) || []);
|
|
418
|
+
const overlap = [...words1].filter(w => words2.has(w));
|
|
419
|
+
|
|
420
|
+
if (overlap.length >= 3) {
|
|
421
|
+
return {
|
|
422
|
+
type: 'negation',
|
|
423
|
+
severity: 'medium',
|
|
424
|
+
statements: [
|
|
425
|
+
{ document: stmt1.document, text: stmt1.text.slice(0, 100) },
|
|
426
|
+
{ document: stmt2.document, text: stmt2.text.slice(0, 100) }
|
|
427
|
+
],
|
|
428
|
+
sharedTerms: overlap.slice(0, 5)
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Check for numeric conflicts
|
|
434
|
+
const nums1 = text1.match(/\d+/g) || [];
|
|
435
|
+
const nums2 = text2.match(/\d+/g) || [];
|
|
436
|
+
|
|
437
|
+
if (nums1.length > 0 && nums2.length > 0) {
|
|
438
|
+
// Extract what the numbers refer to
|
|
439
|
+
const context1 = text1.replace(/\d+/g, 'NUM').slice(0, 50);
|
|
440
|
+
const context2 = text2.replace(/\d+/g, 'NUM').slice(0, 50);
|
|
441
|
+
|
|
442
|
+
if (this.contextSimilarity(context1, context2) > 0.6 && nums1[0] !== nums2[0]) {
|
|
443
|
+
return {
|
|
444
|
+
type: 'numeric',
|
|
445
|
+
severity: 'high',
|
|
446
|
+
statements: [
|
|
447
|
+
{ document: stmt1.document, text: stmt1.text.slice(0, 100), value: nums1[0] },
|
|
448
|
+
{ document: stmt2.document, text: stmt2.text.slice(0, 100), value: nums2[0] }
|
|
449
|
+
]
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return null;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Calculate simple context similarity
|
|
459
|
+
* @param {string} ctx1 - First context
|
|
460
|
+
* @param {string} ctx2 - Second context
|
|
461
|
+
*/
|
|
462
|
+
contextSimilarity(ctx1, ctx2) {
|
|
463
|
+
const words1 = new Set(ctx1.toLowerCase().split(/\s+/));
|
|
464
|
+
const words2 = new Set(ctx2.toLowerCase().split(/\s+/));
|
|
465
|
+
const intersection = [...words1].filter(w => words2.has(w));
|
|
466
|
+
const union = new Set([...words1, ...words2]);
|
|
467
|
+
|
|
468
|
+
return intersection.length / union.size;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Find timeline conflicts
|
|
473
|
+
* @param {Object} documents - Map of document name to content
|
|
474
|
+
*/
|
|
475
|
+
findTimelineConflicts(documents) {
|
|
476
|
+
const conflicts = [];
|
|
477
|
+
const timelines = {};
|
|
478
|
+
|
|
479
|
+
// Extract timeline mentions
|
|
480
|
+
for (const [docName, content] of Object.entries(documents)) {
|
|
481
|
+
const dateMatches = content.matchAll(/(\w+\s+\d{4}|\d{4}-\d{2}(?:-\d{2})?|Q[1-4]\s+\d{4})/gi);
|
|
482
|
+
|
|
483
|
+
for (const match of dateMatches) {
|
|
484
|
+
const context = this.getContext(content, match.index);
|
|
485
|
+
const topic = this.extractTopic(context);
|
|
486
|
+
|
|
487
|
+
if (topic) {
|
|
488
|
+
if (!timelines[topic]) {
|
|
489
|
+
timelines[topic] = [];
|
|
490
|
+
}
|
|
491
|
+
timelines[topic].push({
|
|
492
|
+
document: docName,
|
|
493
|
+
date: match[0],
|
|
494
|
+
context
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Find conflicts
|
|
501
|
+
for (const [topic, entries] of Object.entries(timelines)) {
|
|
502
|
+
if (entries.length > 1) {
|
|
503
|
+
const dates = entries.map(e => e.date);
|
|
504
|
+
const uniqueDates = [...new Set(dates)];
|
|
505
|
+
|
|
506
|
+
if (uniqueDates.length > 1) {
|
|
507
|
+
conflicts.push({
|
|
508
|
+
type: 'timeline',
|
|
509
|
+
topic,
|
|
510
|
+
severity: 'medium',
|
|
511
|
+
entries: entries.map(e => ({
|
|
512
|
+
document: e.document,
|
|
513
|
+
date: e.date
|
|
514
|
+
}))
|
|
515
|
+
});
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
return conflicts;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Measure document specificity
|
|
525
|
+
* @param {Object} documents - Map of document name to content
|
|
526
|
+
*/
|
|
527
|
+
async measureSpecificity(documents) {
|
|
528
|
+
const results = {};
|
|
529
|
+
let totalVague = 0;
|
|
530
|
+
let totalConcrete = 0;
|
|
531
|
+
let totalWords = 0;
|
|
532
|
+
|
|
533
|
+
for (const [docName, content] of Object.entries(documents)) {
|
|
534
|
+
const docResult = this.measureDocumentSpecificity(content);
|
|
535
|
+
results[docName] = docResult;
|
|
536
|
+
|
|
537
|
+
totalVague += docResult.vagueCount;
|
|
538
|
+
totalConcrete += docResult.concreteCount;
|
|
539
|
+
totalWords += docResult.wordCount;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Calculate overall score (0-100)
|
|
543
|
+
const vagueRatio = totalVague / Math.max(totalWords, 1);
|
|
544
|
+
const concreteRatio = totalConcrete / Math.max(totalWords, 1);
|
|
545
|
+
const score = Math.round((1 - vagueRatio + concreteRatio) * 50);
|
|
546
|
+
|
|
547
|
+
return {
|
|
548
|
+
byDocument: results,
|
|
549
|
+
overall: {
|
|
550
|
+
score: Math.min(100, Math.max(0, score)),
|
|
551
|
+
vagueTerms: totalVague,
|
|
552
|
+
concreteTerms: totalConcrete,
|
|
553
|
+
rating: score >= 70 ? 'specific' : score >= 50 ? 'moderate' : 'vague'
|
|
554
|
+
}
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Measure specificity of a single document
|
|
560
|
+
* @param {string} content - Document content
|
|
561
|
+
*/
|
|
562
|
+
measureDocumentSpecificity(content) {
|
|
563
|
+
let vagueCount = 0;
|
|
564
|
+
let concreteCount = 0;
|
|
565
|
+
const vagueExamples = [];
|
|
566
|
+
const concreteExamples = [];
|
|
567
|
+
|
|
568
|
+
// Count vague patterns
|
|
569
|
+
for (const { pattern, type } of VAGUE_PATTERNS) {
|
|
570
|
+
const matches = content.match(pattern) || [];
|
|
571
|
+
vagueCount += matches.length;
|
|
572
|
+
if (matches.length > 0 && vagueExamples.length < 5) {
|
|
573
|
+
vagueExamples.push({ type, example: matches[0] });
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// Count concrete patterns
|
|
578
|
+
for (const { pattern, type } of CONCRETE_PATTERNS) {
|
|
579
|
+
const matches = content.match(pattern) || [];
|
|
580
|
+
concreteCount += matches.length;
|
|
581
|
+
if (matches.length > 0 && concreteExamples.length < 5) {
|
|
582
|
+
concreteExamples.push({ type, example: matches[0] });
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
const wordCount = (content.match(/\b\w+\b/g) || []).length;
|
|
587
|
+
|
|
588
|
+
return {
|
|
589
|
+
wordCount,
|
|
590
|
+
vagueCount,
|
|
591
|
+
concreteCount,
|
|
592
|
+
vagueExamples,
|
|
593
|
+
concreteExamples,
|
|
594
|
+
ratio: vagueCount / Math.max(concreteCount, 1)
|
|
595
|
+
};
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Detect gaps in documentation
|
|
600
|
+
* @param {Object} documents - Map of document name to content
|
|
601
|
+
*/
|
|
602
|
+
async detectGaps(documents) {
|
|
603
|
+
const gaps = {
|
|
604
|
+
mentionedButNotDefined: [],
|
|
605
|
+
referencedDocuments: [],
|
|
606
|
+
missingDetails: []
|
|
607
|
+
};
|
|
608
|
+
|
|
609
|
+
const allContent = Object.values(documents).join('\n');
|
|
610
|
+
|
|
611
|
+
// Find terms mentioned but not defined
|
|
612
|
+
const mentionedTerms = this.extractMentionedConcepts(allContent);
|
|
613
|
+
const definedTerms = new Set(
|
|
614
|
+
Object.values(documents)
|
|
615
|
+
.flatMap(content => this.extractTermDefinitions(content))
|
|
616
|
+
.map(t => t.name.toLowerCase())
|
|
617
|
+
);
|
|
618
|
+
|
|
619
|
+
for (const term of mentionedTerms) {
|
|
620
|
+
if (!definedTerms.has(term.toLowerCase())) {
|
|
621
|
+
gaps.mentionedButNotDefined.push(term);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
// Find referenced documents that don't exist
|
|
626
|
+
const docReferences = allContent.match(/see\s+([A-Z][A-Z_]+)\.md|refer\s+to\s+([A-Z][A-Z_]+)/gi) || [];
|
|
627
|
+
const existingDocs = new Set(Object.keys(documents).map(d => d.toLowerCase()));
|
|
628
|
+
|
|
629
|
+
for (const ref of docReferences) {
|
|
630
|
+
const docName = ref.match(/([A-Z][A-Z_]+)/i)?.[1]?.toLowerCase();
|
|
631
|
+
if (docName && !existingDocs.has(docName)) {
|
|
632
|
+
gaps.referencedDocuments.push(docName);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Find sections with TODO or TBD
|
|
637
|
+
const todoPattern = /(?:TODO|TBD|FIXME|XXX|WIP)[\s:]+([^\n]+)/gi;
|
|
638
|
+
let match;
|
|
639
|
+
while ((match = todoPattern.exec(allContent)) !== null) {
|
|
640
|
+
gaps.missingDetails.push({
|
|
641
|
+
type: match[0].split(/[\s:]/)[0],
|
|
642
|
+
detail: match[1].trim().slice(0, 100)
|
|
643
|
+
});
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Limit results
|
|
647
|
+
gaps.mentionedButNotDefined = [...new Set(gaps.mentionedButNotDefined)].slice(0, 10);
|
|
648
|
+
gaps.referencedDocuments = [...new Set(gaps.referencedDocuments)];
|
|
649
|
+
gaps.missingDetails = gaps.missingDetails.slice(0, 10);
|
|
650
|
+
|
|
651
|
+
return gaps;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
/**
|
|
655
|
+
* Extract concepts that are mentioned but might need definition
|
|
656
|
+
* @param {string} content - Content to analyze
|
|
657
|
+
*/
|
|
658
|
+
extractMentionedConcepts(content) {
|
|
659
|
+
const concepts = [];
|
|
660
|
+
|
|
661
|
+
// Look for capitalized terms followed by specific indicators
|
|
662
|
+
const needsDefinitionPattern = /(?:the\s+)?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:should|must|will|needs?|requires?)/gi;
|
|
663
|
+
let match;
|
|
664
|
+
|
|
665
|
+
while ((match = needsDefinitionPattern.exec(content)) !== null) {
|
|
666
|
+
if (this.isLikelyDomainConcept(match[1])) {
|
|
667
|
+
concepts.push(match[1]);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
return concepts;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
/**
|
|
675
|
+
* Validate cross-references between documents
|
|
676
|
+
* @param {Object} documents - Map of document name to content
|
|
677
|
+
*/
|
|
678
|
+
async validateReferences(documents) {
|
|
679
|
+
const references = {
|
|
680
|
+
valid: [],
|
|
681
|
+
broken: [],
|
|
682
|
+
suggestions: []
|
|
683
|
+
};
|
|
684
|
+
|
|
685
|
+
const docNames = Object.keys(documents);
|
|
686
|
+
|
|
687
|
+
for (const [docName, content] of Object.entries(documents)) {
|
|
688
|
+
// Find links to other documents
|
|
689
|
+
const linkPattern = /\[([^\]]+)\]\(([^)]+)\)|see\s+([A-Z][A-Z_]+\.md)/gi;
|
|
690
|
+
let match;
|
|
691
|
+
|
|
692
|
+
while ((match = linkPattern.exec(content)) !== null) {
|
|
693
|
+
const target = match[2] || match[3];
|
|
694
|
+
|
|
695
|
+
if (target && target.endsWith('.md')) {
|
|
696
|
+
const targetName = target.replace('.md', '').toLowerCase();
|
|
697
|
+
const exists = docNames.includes(targetName);
|
|
698
|
+
|
|
699
|
+
if (exists) {
|
|
700
|
+
references.valid.push({
|
|
701
|
+
from: docName,
|
|
702
|
+
to: targetName,
|
|
703
|
+
text: match[1] || target
|
|
704
|
+
});
|
|
705
|
+
} else {
|
|
706
|
+
references.broken.push({
|
|
707
|
+
from: docName,
|
|
708
|
+
to: targetName,
|
|
709
|
+
suggestion: this.findSimilarDoc(targetName, docNames)
|
|
710
|
+
});
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// Find implicit references
|
|
716
|
+
for (const otherDoc of docNames) {
|
|
717
|
+
if (otherDoc !== docName) {
|
|
718
|
+
const regex = new RegExp(`\\b${otherDoc}\\b`, 'gi');
|
|
719
|
+
if (regex.test(content)) {
|
|
720
|
+
references.suggestions.push({
|
|
721
|
+
from: docName,
|
|
722
|
+
to: otherDoc,
|
|
723
|
+
suggestion: `Consider adding explicit link to ${otherDoc.toUpperCase()}.md`
|
|
724
|
+
});
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Deduplicate suggestions
|
|
731
|
+
references.suggestions = references.suggestions.filter((s, i, arr) =>
|
|
732
|
+
arr.findIndex(x => x.from === s.from && x.to === s.to) === i
|
|
733
|
+
).slice(0, 10);
|
|
734
|
+
|
|
735
|
+
return references;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/**
|
|
739
|
+
* Find similar document name
|
|
740
|
+
* @param {string} name - Document name to match
|
|
741
|
+
* @param {Array} docNames - Available document names
|
|
742
|
+
*/
|
|
743
|
+
findSimilarDoc(name, docNames) {
|
|
744
|
+
for (const doc of docNames) {
|
|
745
|
+
if (doc.includes(name) || name.includes(doc)) {
|
|
746
|
+
return `Did you mean ${doc.toUpperCase()}.md?`;
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
return null;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Generate analysis summary
|
|
754
|
+
* @param {Object} analysis - Full analysis results
|
|
755
|
+
*/
|
|
756
|
+
generateSummary(analysis) {
|
|
757
|
+
const issues = [];
|
|
758
|
+
|
|
759
|
+
// Terminology issues
|
|
760
|
+
if (analysis.terminology.inconsistencies.length > 0) {
|
|
761
|
+
issues.push({
|
|
762
|
+
type: 'terminology',
|
|
763
|
+
severity: 'medium',
|
|
764
|
+
count: analysis.terminology.inconsistencies.length,
|
|
765
|
+
message: `${analysis.terminology.inconsistencies.length} terminology inconsistencies found`
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
// Contradictions
|
|
770
|
+
if (analysis.contradictions.length > 0) {
|
|
771
|
+
issues.push({
|
|
772
|
+
type: 'contradiction',
|
|
773
|
+
severity: 'high',
|
|
774
|
+
count: analysis.contradictions.length,
|
|
775
|
+
message: `${analysis.contradictions.length} potential contradictions found`
|
|
776
|
+
});
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
// Specificity
|
|
780
|
+
if (analysis.specificity.overall.rating === 'vague') {
|
|
781
|
+
issues.push({
|
|
782
|
+
type: 'specificity',
|
|
783
|
+
severity: 'medium',
|
|
784
|
+
count: analysis.specificity.overall.vagueTerms,
|
|
785
|
+
message: 'Documents contain excessive vague language'
|
|
786
|
+
});
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// Gaps
|
|
790
|
+
const totalGaps = analysis.gaps.mentionedButNotDefined.length +
|
|
791
|
+
analysis.gaps.referencedDocuments.length +
|
|
792
|
+
analysis.gaps.missingDetails.length;
|
|
793
|
+
if (totalGaps > 0) {
|
|
794
|
+
issues.push({
|
|
795
|
+
type: 'gaps',
|
|
796
|
+
severity: 'low',
|
|
797
|
+
count: totalGaps,
|
|
798
|
+
message: `${totalGaps} documentation gaps detected`
|
|
799
|
+
});
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
// Broken references
|
|
803
|
+
if (analysis.crossReferences.broken.length > 0) {
|
|
804
|
+
issues.push({
|
|
805
|
+
type: 'references',
|
|
806
|
+
severity: 'medium',
|
|
807
|
+
count: analysis.crossReferences.broken.length,
|
|
808
|
+
message: `${analysis.crossReferences.broken.length} broken document references`
|
|
809
|
+
});
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// Calculate overall health score
|
|
813
|
+
let score = 100;
|
|
814
|
+
for (const issue of issues) {
|
|
815
|
+
if (issue.severity === 'high') score -= issue.count * 15;
|
|
816
|
+
else if (issue.severity === 'medium') score -= issue.count * 8;
|
|
817
|
+
else score -= issue.count * 3;
|
|
818
|
+
}
|
|
819
|
+
score = Math.max(0, Math.min(100, score));
|
|
820
|
+
|
|
821
|
+
return {
|
|
822
|
+
score,
|
|
823
|
+
rating: score >= 80 ? 'good' : score >= 60 ? 'fair' : 'needs improvement',
|
|
824
|
+
issues,
|
|
825
|
+
conceptCount: analysis.concepts.totalUniqueConcepts,
|
|
826
|
+
documentedTerms: analysis.terminology.documented
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
module.exports = {
|
|
832
|
+
SemanticDocumentAnalyzer,
|
|
833
|
+
TECHNICAL_TERMS,
|
|
834
|
+
VAGUE_PATTERNS,
|
|
835
|
+
CONCRETE_PATTERNS
|
|
836
|
+
};
|