@girardmedia/bootspring 2.0.37 → 2.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,836 @@
1
+ /**
2
+ * Semantic Document Analyzer
3
+ *
4
+ * AI-powered document understanding for extracting concepts,
5
+ * analyzing terminology, finding contradictions, measuring
6
+ * specificity, detecting gaps, and validating cross-references.
7
+ *
8
+ * @package bootspring
9
+ * @module core/coherence/semantic-analyzer
10
+ */
11
+
12
+ const fs = require('fs').promises;
13
+ const path = require('path');
14
+
15
+ /**
16
+ * Common technical terms to track
17
+ */
18
+ const TECHNICAL_TERMS = [
19
+ 'api', 'database', 'authentication', 'authorization', 'cache',
20
+ 'component', 'service', 'model', 'controller', 'middleware',
21
+ 'endpoint', 'route', 'schema', 'migration', 'deployment',
22
+ 'testing', 'integration', 'unit test', 'e2e', 'ci/cd',
23
+ 'microservice', 'monolith', 'serverless', 'container'
24
+ ];
25
+
26
+ /**
27
+ * Vague language patterns
28
+ */
29
+ const VAGUE_PATTERNS = [
30
+ { pattern: /\b(various|several|some|many|few)\s+\w+s?\b/gi, type: 'quantity' },
31
+ { pattern: /\b(soon|later|eventually|sometime)\b/gi, type: 'timeline' },
32
+ { pattern: /\b(might|could|may|possibly|perhaps)\b/gi, type: 'uncertainty' },
33
+ { pattern: /\b(good|better|best|nice|great)\b/gi, type: 'subjective' },
34
+ { pattern: /\b(etc|and so on|and more|things like)\b/gi, type: 'incomplete' }
35
+ ];
36
+
37
+ /**
38
+ * Concrete language patterns
39
+ */
40
+ const CONCRETE_PATTERNS = [
41
+ { pattern: /\d+\s*(ms|seconds?|minutes?|hours?|days?|weeks?|months?)/gi, type: 'duration' },
42
+ { pattern: /\d+(\.\d+)?%/g, type: 'percentage' },
43
+ { pattern: /\$[\d,]+(\.\d{2})?/g, type: 'currency' },
44
+ { pattern: /\b\d{4}-\d{2}-\d{2}\b/g, type: 'date' },
45
+ { pattern: /v?\d+\.\d+(\.\d+)?/g, type: 'version' },
46
+ { pattern: /\b(must|shall|will|requires?)\b/gi, type: 'requirement' }
47
+ ];
48
+
49
+ /**
50
+ * SemanticDocumentAnalyzer class
51
+ */
52
+ class SemanticDocumentAnalyzer {
53
+ /**
54
+ * @param {Object} options - Configuration options
55
+ */
56
+ constructor(options = {}) {
57
+ this.projectRoot = options.projectRoot || process.cwd();
58
+ this.planningDir = options.planningDir || path.join(this.projectRoot, 'planning');
59
+ }
60
+
61
+ /**
62
+ * Analyze all documents
63
+ * @param {Object} documents - Map of document name to content
64
+ */
65
+ async analyze(documents) {
66
+ const analysis = {
67
+ concepts: await this.extractConcepts(documents),
68
+ terminology: await this.analyzeTerminology(documents),
69
+ contradictions: await this.findContradictions(documents),
70
+ specificity: await this.measureSpecificity(documents),
71
+ gaps: await this.detectGaps(documents),
72
+ crossReferences: await this.validateReferences(documents),
73
+ summary: null
74
+ };
75
+
76
+ // Generate summary
77
+ analysis.summary = this.generateSummary(analysis);
78
+
79
+ return analysis;
80
+ }
81
+
82
+ /**
83
+ * Load documents from planning directory
84
+ */
85
+ async loadDocuments() {
86
+ const documents = {};
87
+
88
+ try {
89
+ const files = await fs.readdir(this.planningDir);
90
+ const mdFiles = files.filter(f => f.endsWith('.md'));
91
+
92
+ for (const file of mdFiles) {
93
+ const filePath = path.join(this.planningDir, file);
94
+ const content = await fs.readFile(filePath, 'utf-8');
95
+ const docName = file.replace('.md', '').toLowerCase();
96
+ documents[docName] = content;
97
+ }
98
+ } catch {
99
+ // Planning directory doesn't exist
100
+ }
101
+
102
+ return documents;
103
+ }
104
+
105
+ /**
106
+ * Extract core concepts from documents
107
+ * @param {Object} documents - Map of document name to content
108
+ */
109
+ async extractConcepts(documents) {
110
+ const concepts = {};
111
+ const allContent = Object.values(documents).join('\n');
112
+
113
+ // Extract technical terms
114
+ for (const term of TECHNICAL_TERMS) {
115
+ const regex = new RegExp(`\\b${term}\\b`, 'gi');
116
+ const matches = allContent.match(regex) || [];
117
+
118
+ if (matches.length > 0) {
119
+ concepts[term] = {
120
+ frequency: matches.length,
121
+ documents: this.findDocumentsContaining(documents, term)
122
+ };
123
+ }
124
+ }
125
+
126
+ // Extract capitalized terms (likely domain concepts)
127
+ const capitalizedRegex = /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b/g;
128
+ const capitalizedMatches = allContent.match(capitalizedRegex) || [];
129
+
130
+ const domainConcepts = {};
131
+ for (const match of capitalizedMatches) {
132
+ // Filter out common words
133
+ if (this.isLikelyDomainConcept(match)) {
134
+ domainConcepts[match] = (domainConcepts[match] || 0) + 1;
135
+ }
136
+ }
137
+
138
+ // Get top domain concepts
139
+ const topDomainConcepts = Object.entries(domainConcepts)
140
+ .filter(([, count]) => count >= 2)
141
+ .sort((a, b) => b[1] - a[1])
142
+ .slice(0, 20)
143
+ .map(([term, count]) => ({
144
+ term,
145
+ frequency: count,
146
+ documents: this.findDocumentsContaining(documents, term)
147
+ }));
148
+
149
+ return {
150
+ technical: concepts,
151
+ domain: topDomainConcepts,
152
+ totalUniqueConcepts: Object.keys(concepts).length + topDomainConcepts.length
153
+ };
154
+ }
155
+
156
+ /**
157
+ * Check if term is likely a domain concept
158
+ * @param {string} term - Term to check
159
+ */
160
+ isLikelyDomainConcept(term) {
161
+ const commonWords = new Set([
162
+ 'The', 'This', 'That', 'These', 'Those', 'When', 'Where', 'What',
163
+ 'How', 'Why', 'Which', 'Who', 'For', 'With', 'From', 'Into',
164
+ 'About', 'After', 'Before', 'Between', 'During', 'Without',
165
+ 'Through', 'Against', 'Within', 'Among', 'However', 'Therefore',
166
+ 'Furthermore', 'Additionally', 'Moreover', 'Finally', 'First',
167
+ 'Second', 'Third', 'Next', 'Last', 'Also', 'Only', 'Just',
168
+ 'Now', 'Then', 'Here', 'There', 'Each', 'Every', 'Some',
169
+ 'Any', 'Most', 'Many', 'Few', 'All', 'Both', 'Either', 'Neither'
170
+ ]);
171
+
172
+ return !commonWords.has(term) && term.length > 2;
173
+ }
174
+
175
+ /**
176
+ * Find documents containing a term
177
+ * @param {Object} documents - Map of document name to content
178
+ * @param {string} term - Term to search for
179
+ */
180
+ findDocumentsContaining(documents, term) {
181
+ const result = [];
182
+ const regex = new RegExp(`\\b${term}\\b`, 'gi');
183
+
184
+ for (const [docName, content] of Object.entries(documents)) {
185
+ if (regex.test(content)) {
186
+ result.push(docName);
187
+ }
188
+ }
189
+
190
+ return result;
191
+ }
192
+
193
+ /**
194
+ * Analyze terminology consistency
195
+ * @param {Object} documents - Map of document name to content
196
+ */
197
+ async analyzeTerminology(documents) {
198
+ const terms = {};
199
+ const inconsistencies = [];
200
+
201
+ // Extract term definitions and usages
202
+ for (const [docName, content] of Object.entries(documents)) {
203
+ const extracted = this.extractTermDefinitions(content);
204
+
205
+ for (const term of extracted) {
206
+ if (!terms[term.name]) {
207
+ terms[term.name] = { usages: [] };
208
+ }
209
+
210
+ terms[term.name].usages.push({
211
+ document: docName,
212
+ context: term.context,
213
+ definition: term.definition
214
+ });
215
+ }
216
+ }
217
+
218
+ // Find inconsistencies
219
+ for (const [termName, termData] of Object.entries(terms)) {
220
+ if (termData.usages.length > 1) {
221
+ const definitions = termData.usages
222
+ .map(u => u.definition)
223
+ .filter(Boolean);
224
+
225
+ if (definitions.length > 1) {
226
+ // Check if definitions are similar
227
+ const unique = [...new Set(definitions.map(d => d.toLowerCase().trim()))];
228
+ if (unique.length > 1) {
229
+ inconsistencies.push({
230
+ term: termName,
231
+ issue: 'Inconsistent definitions across documents',
232
+ usages: termData.usages.map(u => ({
233
+ document: u.document,
234
+ definition: u.definition
235
+ }))
236
+ });
237
+ }
238
+ }
239
+ }
240
+ }
241
+
242
+ return {
243
+ terms: Object.keys(terms).length,
244
+ documented: Object.values(terms).filter(t =>
245
+ t.usages.some(u => u.definition)
246
+ ).length,
247
+ inconsistencies
248
+ };
249
+ }
250
+
251
+ /**
252
+ * Extract term definitions from content
253
+ * @param {string} content - Document content
254
+ */
255
+ extractTermDefinitions(content) {
256
+ const terms = [];
257
+
258
+ // Pattern: "Term: definition" or "**Term**: definition"
259
+ const definitionPattern = /(?:\*\*)?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)(?:\*\*)?:\s*([^.\n]+\.?)/g;
260
+ let match;
261
+
262
+ while ((match = definitionPattern.exec(content)) !== null) {
263
+ terms.push({
264
+ name: match[1].trim(),
265
+ definition: match[2].trim(),
266
+ context: this.getContext(content, match.index)
267
+ });
268
+ }
269
+
270
+ // Pattern: "Term is/are definition"
271
+ const isPattern = /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|are)\s+([^.\n]+\.)/g;
272
+
273
+ while ((match = isPattern.exec(content)) !== null) {
274
+ if (!terms.find(t => t.name === match[1].trim())) {
275
+ terms.push({
276
+ name: match[1].trim(),
277
+ definition: match[2].trim(),
278
+ context: this.getContext(content, match.index)
279
+ });
280
+ }
281
+ }
282
+
283
+ return terms;
284
+ }
285
+
286
+ /**
287
+ * Get surrounding context for a match
288
+ * @param {string} content - Full content
289
+ * @param {number} index - Match index
290
+ */
291
+ getContext(content, index) {
292
+ const start = Math.max(0, index - 50);
293
+ const end = Math.min(content.length, index + 100);
294
+ return content.slice(start, end).replace(/\n/g, ' ').trim();
295
+ }
296
+
297
+ /**
298
+ * Find contradictions between documents
299
+ * @param {Object} documents - Map of document name to content
300
+ */
301
+ async findContradictions(documents) {
302
+ const contradictions = [];
303
+
304
+ // Check for conflicting statements
305
+ const statements = this.extractStatements(documents);
306
+
307
+ // Group by topic
308
+ const topicGroups = {};
309
+ for (const statement of statements) {
310
+ const topic = this.extractTopic(statement.text);
311
+ if (topic) {
312
+ if (!topicGroups[topic]) {
313
+ topicGroups[topic] = [];
314
+ }
315
+ topicGroups[topic].push(statement);
316
+ }
317
+ }
318
+
319
+ // Find conflicts within topic groups
320
+ for (const [topic, stmts] of Object.entries(topicGroups)) {
321
+ if (stmts.length > 1) {
322
+ const conflicts = this.findConflicts(stmts);
323
+ contradictions.push(...conflicts);
324
+ }
325
+ }
326
+
327
+ // Check for timeline contradictions
328
+ const timelineConflicts = this.findTimelineConflicts(documents);
329
+ contradictions.push(...timelineConflicts);
330
+
331
+ return contradictions;
332
+ }
333
+
334
+ /**
335
+ * Extract factual statements from documents
336
+ * @param {Object} documents - Map of document name to content
337
+ */
338
+ extractStatements(documents) {
339
+ const statements = [];
340
+
341
+ for (const [docName, content] of Object.entries(documents)) {
342
+ // Extract sentences with factual indicators
343
+ const sentences = content.split(/[.!?]+/).filter(s => s.trim());
344
+
345
+ for (const sentence of sentences) {
346
+ const trimmed = sentence.trim();
347
+ if (this.isFactualStatement(trimmed)) {
348
+ statements.push({
349
+ text: trimmed,
350
+ document: docName
351
+ });
352
+ }
353
+ }
354
+ }
355
+
356
+ return statements;
357
+ }
358
+
359
+ /**
360
+ * Check if sentence is a factual statement
361
+ * @param {string} sentence - Sentence to check
362
+ */
363
+ isFactualStatement(sentence) {
364
+ const factualIndicators = /\b(is|are|will|must|shall|requires?|uses?|supports?|provides?|enables?)\b/i;
365
+ const minLength = 20;
366
+
367
+ return sentence.length >= minLength && factualIndicators.test(sentence);
368
+ }
369
+
370
+ /**
371
+ * Extract main topic from statement
372
+ * @param {string} text - Statement text
373
+ */
374
+ extractTopic(text) {
375
+ // Extract subject (first noun phrase)
376
+ const match = text.match(/^(?:The\s+)?([A-Za-z]+(?:\s+[A-Za-z]+)?)/i);
377
+ return match ? match[1].toLowerCase() : null;
378
+ }
379
+
380
+ /**
381
+ * Find conflicts between statements
382
+ * @param {Array} statements - Statements on same topic
383
+ */
384
+ findConflicts(statements) {
385
+ const conflicts = [];
386
+
387
+ // Compare each pair
388
+ for (let i = 0; i < statements.length; i++) {
389
+ for (let j = i + 1; j < statements.length; j++) {
390
+ const conflict = this.detectConflict(statements[i], statements[j]);
391
+ if (conflict) {
392
+ conflicts.push(conflict);
393
+ }
394
+ }
395
+ }
396
+
397
+ return conflicts;
398
+ }
399
+
400
+ /**
401
+ * Detect if two statements conflict
402
+ * @param {Object} stmt1 - First statement
403
+ * @param {Object} stmt2 - Second statement
404
+ */
405
+ detectConflict(stmt1, stmt2) {
406
+ const text1 = stmt1.text.toLowerCase();
407
+ const text2 = stmt2.text.toLowerCase();
408
+
409
+ // Check for negation conflicts
410
+ const negations = ['not', "n't", 'never', 'no', 'none', 'without'];
411
+ const hasNegation1 = negations.some(n => text1.includes(n));
412
+ const hasNegation2 = negations.some(n => text2.includes(n));
413
+
414
+ // If one has negation and they share key terms, possible conflict
415
+ if (hasNegation1 !== hasNegation2) {
416
+ const words1 = new Set(text1.match(/\b\w{4,}\b/g) || []);
417
+ const words2 = new Set(text2.match(/\b\w{4,}\b/g) || []);
418
+ const overlap = [...words1].filter(w => words2.has(w));
419
+
420
+ if (overlap.length >= 3) {
421
+ return {
422
+ type: 'negation',
423
+ severity: 'medium',
424
+ statements: [
425
+ { document: stmt1.document, text: stmt1.text.slice(0, 100) },
426
+ { document: stmt2.document, text: stmt2.text.slice(0, 100) }
427
+ ],
428
+ sharedTerms: overlap.slice(0, 5)
429
+ };
430
+ }
431
+ }
432
+
433
+ // Check for numeric conflicts
434
+ const nums1 = text1.match(/\d+/g) || [];
435
+ const nums2 = text2.match(/\d+/g) || [];
436
+
437
+ if (nums1.length > 0 && nums2.length > 0) {
438
+ // Extract what the numbers refer to
439
+ const context1 = text1.replace(/\d+/g, 'NUM').slice(0, 50);
440
+ const context2 = text2.replace(/\d+/g, 'NUM').slice(0, 50);
441
+
442
+ if (this.contextSimilarity(context1, context2) > 0.6 && nums1[0] !== nums2[0]) {
443
+ return {
444
+ type: 'numeric',
445
+ severity: 'high',
446
+ statements: [
447
+ { document: stmt1.document, text: stmt1.text.slice(0, 100), value: nums1[0] },
448
+ { document: stmt2.document, text: stmt2.text.slice(0, 100), value: nums2[0] }
449
+ ]
450
+ };
451
+ }
452
+ }
453
+
454
+ return null;
455
+ }
456
+
457
+ /**
458
+ * Calculate simple context similarity
459
+ * @param {string} ctx1 - First context
460
+ * @param {string} ctx2 - Second context
461
+ */
462
+ contextSimilarity(ctx1, ctx2) {
463
+ const words1 = new Set(ctx1.toLowerCase().split(/\s+/));
464
+ const words2 = new Set(ctx2.toLowerCase().split(/\s+/));
465
+ const intersection = [...words1].filter(w => words2.has(w));
466
+ const union = new Set([...words1, ...words2]);
467
+
468
+ return intersection.length / union.size;
469
+ }
470
+
471
+ /**
472
+ * Find timeline conflicts
473
+ * @param {Object} documents - Map of document name to content
474
+ */
475
+ findTimelineConflicts(documents) {
476
+ const conflicts = [];
477
+ const timelines = {};
478
+
479
+ // Extract timeline mentions
480
+ for (const [docName, content] of Object.entries(documents)) {
481
+ const dateMatches = content.matchAll(/(\w+\s+\d{4}|\d{4}-\d{2}(?:-\d{2})?|Q[1-4]\s+\d{4})/gi);
482
+
483
+ for (const match of dateMatches) {
484
+ const context = this.getContext(content, match.index);
485
+ const topic = this.extractTopic(context);
486
+
487
+ if (topic) {
488
+ if (!timelines[topic]) {
489
+ timelines[topic] = [];
490
+ }
491
+ timelines[topic].push({
492
+ document: docName,
493
+ date: match[0],
494
+ context
495
+ });
496
+ }
497
+ }
498
+ }
499
+
500
+ // Find conflicts
501
+ for (const [topic, entries] of Object.entries(timelines)) {
502
+ if (entries.length > 1) {
503
+ const dates = entries.map(e => e.date);
504
+ const uniqueDates = [...new Set(dates)];
505
+
506
+ if (uniqueDates.length > 1) {
507
+ conflicts.push({
508
+ type: 'timeline',
509
+ topic,
510
+ severity: 'medium',
511
+ entries: entries.map(e => ({
512
+ document: e.document,
513
+ date: e.date
514
+ }))
515
+ });
516
+ }
517
+ }
518
+ }
519
+
520
+ return conflicts;
521
+ }
522
+
523
+ /**
524
+ * Measure document specificity
525
+ * @param {Object} documents - Map of document name to content
526
+ */
527
+ async measureSpecificity(documents) {
528
+ const results = {};
529
+ let totalVague = 0;
530
+ let totalConcrete = 0;
531
+ let totalWords = 0;
532
+
533
+ for (const [docName, content] of Object.entries(documents)) {
534
+ const docResult = this.measureDocumentSpecificity(content);
535
+ results[docName] = docResult;
536
+
537
+ totalVague += docResult.vagueCount;
538
+ totalConcrete += docResult.concreteCount;
539
+ totalWords += docResult.wordCount;
540
+ }
541
+
542
+ // Calculate overall score (0-100)
543
+ const vagueRatio = totalVague / Math.max(totalWords, 1);
544
+ const concreteRatio = totalConcrete / Math.max(totalWords, 1);
545
+ const score = Math.round((1 - vagueRatio + concreteRatio) * 50);
546
+
547
+ return {
548
+ byDocument: results,
549
+ overall: {
550
+ score: Math.min(100, Math.max(0, score)),
551
+ vagueTerms: totalVague,
552
+ concreteTerms: totalConcrete,
553
+ rating: score >= 70 ? 'specific' : score >= 50 ? 'moderate' : 'vague'
554
+ }
555
+ };
556
+ }
557
+
558
+ /**
559
+ * Measure specificity of a single document
560
+ * @param {string} content - Document content
561
+ */
562
+ measureDocumentSpecificity(content) {
563
+ let vagueCount = 0;
564
+ let concreteCount = 0;
565
+ const vagueExamples = [];
566
+ const concreteExamples = [];
567
+
568
+ // Count vague patterns
569
+ for (const { pattern, type } of VAGUE_PATTERNS) {
570
+ const matches = content.match(pattern) || [];
571
+ vagueCount += matches.length;
572
+ if (matches.length > 0 && vagueExamples.length < 5) {
573
+ vagueExamples.push({ type, example: matches[0] });
574
+ }
575
+ }
576
+
577
+ // Count concrete patterns
578
+ for (const { pattern, type } of CONCRETE_PATTERNS) {
579
+ const matches = content.match(pattern) || [];
580
+ concreteCount += matches.length;
581
+ if (matches.length > 0 && concreteExamples.length < 5) {
582
+ concreteExamples.push({ type, example: matches[0] });
583
+ }
584
+ }
585
+
586
+ const wordCount = (content.match(/\b\w+\b/g) || []).length;
587
+
588
+ return {
589
+ wordCount,
590
+ vagueCount,
591
+ concreteCount,
592
+ vagueExamples,
593
+ concreteExamples,
594
+ ratio: vagueCount / Math.max(concreteCount, 1)
595
+ };
596
+ }
597
+
598
+ /**
599
+ * Detect gaps in documentation
600
+ * @param {Object} documents - Map of document name to content
601
+ */
602
+ async detectGaps(documents) {
603
+ const gaps = {
604
+ mentionedButNotDefined: [],
605
+ referencedDocuments: [],
606
+ missingDetails: []
607
+ };
608
+
609
+ const allContent = Object.values(documents).join('\n');
610
+
611
+ // Find terms mentioned but not defined
612
+ const mentionedTerms = this.extractMentionedConcepts(allContent);
613
+ const definedTerms = new Set(
614
+ Object.values(documents)
615
+ .flatMap(content => this.extractTermDefinitions(content))
616
+ .map(t => t.name.toLowerCase())
617
+ );
618
+
619
+ for (const term of mentionedTerms) {
620
+ if (!definedTerms.has(term.toLowerCase())) {
621
+ gaps.mentionedButNotDefined.push(term);
622
+ }
623
+ }
624
+
625
+ // Find referenced documents that don't exist
626
+ const docReferences = allContent.match(/see\s+([A-Z][A-Z_]+)\.md|refer\s+to\s+([A-Z][A-Z_]+)/gi) || [];
627
+ const existingDocs = new Set(Object.keys(documents).map(d => d.toLowerCase()));
628
+
629
+ for (const ref of docReferences) {
630
+ const docName = ref.match(/([A-Z][A-Z_]+)/i)?.[1]?.toLowerCase();
631
+ if (docName && !existingDocs.has(docName)) {
632
+ gaps.referencedDocuments.push(docName);
633
+ }
634
+ }
635
+
636
+ // Find sections with TODO or TBD
637
+ const todoPattern = /(?:TODO|TBD|FIXME|XXX|WIP)[\s:]+([^\n]+)/gi;
638
+ let match;
639
+ while ((match = todoPattern.exec(allContent)) !== null) {
640
+ gaps.missingDetails.push({
641
+ type: match[0].split(/[\s:]/)[0],
642
+ detail: match[1].trim().slice(0, 100)
643
+ });
644
+ }
645
+
646
+ // Limit results
647
+ gaps.mentionedButNotDefined = [...new Set(gaps.mentionedButNotDefined)].slice(0, 10);
648
+ gaps.referencedDocuments = [...new Set(gaps.referencedDocuments)];
649
+ gaps.missingDetails = gaps.missingDetails.slice(0, 10);
650
+
651
+ return gaps;
652
+ }
653
+
654
+ /**
655
+ * Extract concepts that are mentioned but might need definition
656
+ * @param {string} content - Content to analyze
657
+ */
658
+ extractMentionedConcepts(content) {
659
+ const concepts = [];
660
+
661
+ // Look for capitalized terms followed by specific indicators
662
+ const needsDefinitionPattern = /(?:the\s+)?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:should|must|will|needs?|requires?)/gi;
663
+ let match;
664
+
665
+ while ((match = needsDefinitionPattern.exec(content)) !== null) {
666
+ if (this.isLikelyDomainConcept(match[1])) {
667
+ concepts.push(match[1]);
668
+ }
669
+ }
670
+
671
+ return concepts;
672
+ }
673
+
674
+ /**
675
+ * Validate cross-references between documents
676
+ * @param {Object} documents - Map of document name to content
677
+ */
678
+ async validateReferences(documents) {
679
+ const references = {
680
+ valid: [],
681
+ broken: [],
682
+ suggestions: []
683
+ };
684
+
685
+ const docNames = Object.keys(documents);
686
+
687
+ for (const [docName, content] of Object.entries(documents)) {
688
+ // Find links to other documents
689
+ const linkPattern = /\[([^\]]+)\]\(([^)]+)\)|see\s+([A-Z][A-Z_]+\.md)/gi;
690
+ let match;
691
+
692
+ while ((match = linkPattern.exec(content)) !== null) {
693
+ const target = match[2] || match[3];
694
+
695
+ if (target && target.endsWith('.md')) {
696
+ const targetName = target.replace('.md', '').toLowerCase();
697
+ const exists = docNames.includes(targetName);
698
+
699
+ if (exists) {
700
+ references.valid.push({
701
+ from: docName,
702
+ to: targetName,
703
+ text: match[1] || target
704
+ });
705
+ } else {
706
+ references.broken.push({
707
+ from: docName,
708
+ to: targetName,
709
+ suggestion: this.findSimilarDoc(targetName, docNames)
710
+ });
711
+ }
712
+ }
713
+ }
714
+
715
+ // Find implicit references
716
+ for (const otherDoc of docNames) {
717
+ if (otherDoc !== docName) {
718
+ const regex = new RegExp(`\\b${otherDoc}\\b`, 'gi');
719
+ if (regex.test(content)) {
720
+ references.suggestions.push({
721
+ from: docName,
722
+ to: otherDoc,
723
+ suggestion: `Consider adding explicit link to ${otherDoc.toUpperCase()}.md`
724
+ });
725
+ }
726
+ }
727
+ }
728
+ }
729
+
730
+ // Deduplicate suggestions
731
+ references.suggestions = references.suggestions.filter((s, i, arr) =>
732
+ arr.findIndex(x => x.from === s.from && x.to === s.to) === i
733
+ ).slice(0, 10);
734
+
735
+ return references;
736
+ }
737
+
738
+ /**
739
+ * Find similar document name
740
+ * @param {string} name - Document name to match
741
+ * @param {Array} docNames - Available document names
742
+ */
743
+ findSimilarDoc(name, docNames) {
744
+ for (const doc of docNames) {
745
+ if (doc.includes(name) || name.includes(doc)) {
746
+ return `Did you mean ${doc.toUpperCase()}.md?`;
747
+ }
748
+ }
749
+ return null;
750
+ }
751
+
752
+ /**
753
+ * Generate analysis summary
754
+ * @param {Object} analysis - Full analysis results
755
+ */
756
+ generateSummary(analysis) {
757
+ const issues = [];
758
+
759
+ // Terminology issues
760
+ if (analysis.terminology.inconsistencies.length > 0) {
761
+ issues.push({
762
+ type: 'terminology',
763
+ severity: 'medium',
764
+ count: analysis.terminology.inconsistencies.length,
765
+ message: `${analysis.terminology.inconsistencies.length} terminology inconsistencies found`
766
+ });
767
+ }
768
+
769
+ // Contradictions
770
+ if (analysis.contradictions.length > 0) {
771
+ issues.push({
772
+ type: 'contradiction',
773
+ severity: 'high',
774
+ count: analysis.contradictions.length,
775
+ message: `${analysis.contradictions.length} potential contradictions found`
776
+ });
777
+ }
778
+
779
+ // Specificity
780
+ if (analysis.specificity.overall.rating === 'vague') {
781
+ issues.push({
782
+ type: 'specificity',
783
+ severity: 'medium',
784
+ count: analysis.specificity.overall.vagueTerms,
785
+ message: 'Documents contain excessive vague language'
786
+ });
787
+ }
788
+
789
+ // Gaps
790
+ const totalGaps = analysis.gaps.mentionedButNotDefined.length +
791
+ analysis.gaps.referencedDocuments.length +
792
+ analysis.gaps.missingDetails.length;
793
+ if (totalGaps > 0) {
794
+ issues.push({
795
+ type: 'gaps',
796
+ severity: 'low',
797
+ count: totalGaps,
798
+ message: `${totalGaps} documentation gaps detected`
799
+ });
800
+ }
801
+
802
+ // Broken references
803
+ if (analysis.crossReferences.broken.length > 0) {
804
+ issues.push({
805
+ type: 'references',
806
+ severity: 'medium',
807
+ count: analysis.crossReferences.broken.length,
808
+ message: `${analysis.crossReferences.broken.length} broken document references`
809
+ });
810
+ }
811
+
812
+ // Calculate overall health score
813
+ let score = 100;
814
+ for (const issue of issues) {
815
+ if (issue.severity === 'high') score -= issue.count * 15;
816
+ else if (issue.severity === 'medium') score -= issue.count * 8;
817
+ else score -= issue.count * 3;
818
+ }
819
+ score = Math.max(0, Math.min(100, score));
820
+
821
+ return {
822
+ score,
823
+ rating: score >= 80 ? 'good' : score >= 60 ? 'fair' : 'needs improvement',
824
+ issues,
825
+ conceptCount: analysis.concepts.totalUniqueConcepts,
826
+ documentedTerms: analysis.terminology.documented
827
+ };
828
+ }
829
+ }
830
+
831
+ module.exports = {
832
+ SemanticDocumentAnalyzer,
833
+ TECHNICAL_TERMS,
834
+ VAGUE_PATTERNS,
835
+ CONCRETE_PATTERNS
836
+ };