preflight-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ import fs from 'node:fs/promises';
2
+ import fsSync from 'node:fs';
3
+ import path from 'node:path';
4
+ import Database from 'better-sqlite3';
5
+ async function ensureDir(p) {
6
+ await fs.mkdir(p, { recursive: true });
7
+ }
8
+ /**
9
+ * Check if incremental indexing is supported for this database.
10
+ * Returns true if the file_meta table exists.
11
+ */
12
+ export function supportsIncrementalIndex(dbPath) {
13
+ try {
14
+ const db = new Database(dbPath, { readonly: true });
15
+ try {
16
+ const row = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='file_meta'`).get();
17
+ return !!row;
18
+ }
19
+ finally {
20
+ db.close();
21
+ }
22
+ }
23
+ catch {
24
+ return false;
25
+ }
26
+ }
27
+ /**
28
+ * Get stored file metadata (SHA256 hashes) from the index.
29
+ */
30
+ function getStoredFileMeta(db) {
31
+ const meta = new Map();
32
+ try {
33
+ const rows = db.prepare('SELECT path, sha256 FROM file_meta').all();
34
+ for (const row of rows) {
35
+ meta.set(row.path, row.sha256);
36
+ }
37
+ }
38
+ catch {
39
+ // Table doesn't exist yet
40
+ }
41
+ return meta;
42
+ }
43
+ /**
44
+ * Perform incremental index update based on file SHA256 hashes.
45
+ * Only re-indexes files that have changed since the last index build.
46
+ *
47
+ * @returns Statistics about what was updated
48
+ */
49
+ export async function incrementalIndexUpdate(dbPath, files, opts) {
50
+ // Check if database exists and supports incremental updates
51
+ const dbExists = await fs.access(dbPath).then(() => true).catch(() => false);
52
+ if (!dbExists || !supportsIncrementalIndex(dbPath)) {
53
+ // Fall back to full rebuild
54
+ await rebuildIndex(dbPath, files, opts);
55
+ return {
56
+ added: files.length,
57
+ updated: 0,
58
+ removed: 0,
59
+ unchanged: 0,
60
+ totalIndexed: files.length,
61
+ };
62
+ }
63
+ const db = new Database(dbPath);
64
+ try {
65
+ db.pragma('journal_mode = WAL');
66
+ db.pragma('synchronous = NORMAL');
67
+ // Get existing file metadata
68
+ const storedMeta = getStoredFileMeta(db);
69
+ const currentPaths = new Set(files.map(f => f.bundleNormRelativePath));
70
+ let added = 0;
71
+ let updated = 0;
72
+ let removed = 0;
73
+ let unchanged = 0;
74
+ // Find files to remove (in index but not in current files)
75
+ const pathsToRemove = [];
76
+ for (const storedPath of storedMeta.keys()) {
77
+ if (!currentPaths.has(storedPath)) {
78
+ pathsToRemove.push(storedPath);
79
+ }
80
+ }
81
+ // Remove deleted files from index
82
+ if (pathsToRemove.length > 0) {
83
+ const deleteLines = db.prepare('DELETE FROM lines WHERE path = ?');
84
+ const deleteMeta = db.prepare('DELETE FROM file_meta WHERE path = ?');
85
+ const removeTransaction = db.transaction((paths) => {
86
+ for (const p of paths) {
87
+ deleteLines.run(p);
88
+ deleteMeta.run(p);
89
+ }
90
+ });
91
+ removeTransaction(pathsToRemove);
92
+ removed = pathsToRemove.length;
93
+ }
94
+ // Categorize files: new, changed, or unchanged
95
+ const filesToIndex = [];
96
+ const filesToUpdate = [];
97
+ for (const f of files) {
98
+ if (f.kind === 'doc' && !opts.includeDocs)
99
+ continue;
100
+ if (f.kind === 'code' && !opts.includeCode)
101
+ continue;
102
+ const storedSha = storedMeta.get(f.bundleNormRelativePath);
103
+ if (!storedSha) {
104
+ // New file
105
+ filesToIndex.push(f);
106
+ added++;
107
+ }
108
+ else if (storedSha !== f.sha256) {
109
+ // Changed file
110
+ filesToUpdate.push(f);
111
+ updated++;
112
+ }
113
+ else {
114
+ // Unchanged
115
+ unchanged++;
116
+ }
117
+ }
118
+ // Prepare statements
119
+ const insertLine = db.prepare('INSERT INTO lines (content, path, repo, kind, lineNo) VALUES (?, ?, ?, ?, ?)');
120
+ const deleteLines = db.prepare('DELETE FROM lines WHERE path = ?');
121
+ const upsertMeta = db.prepare('INSERT OR REPLACE INTO file_meta (path, sha256, indexed_at) VALUES (?, ?, ?)');
122
+ // Process updates (delete old lines, insert new)
123
+ const updateTransaction = db.transaction((updateFiles) => {
124
+ const now = new Date().toISOString();
125
+ for (const f of updateFiles) {
126
+ // Delete old lines
127
+ deleteLines.run(f.bundleNormRelativePath);
128
+ // Insert new lines
129
+ const text = fsSync.readFileSync(f.bundleNormAbsPath, 'utf8');
130
+ const lines = text.split('\n');
131
+ for (let i = 0; i < lines.length; i++) {
132
+ const line = lines[i] ?? '';
133
+ if (!line.trim())
134
+ continue;
135
+ insertLine.run(line, f.bundleNormRelativePath, f.repoId, f.kind, i + 1);
136
+ }
137
+ // Update metadata
138
+ upsertMeta.run(f.bundleNormRelativePath, f.sha256, now);
139
+ }
140
+ });
141
+ // Process new files
142
+ const insertTransaction = db.transaction((newFiles) => {
143
+ const now = new Date().toISOString();
144
+ for (const f of newFiles) {
145
+ const text = fsSync.readFileSync(f.bundleNormAbsPath, 'utf8');
146
+ const lines = text.split('\n');
147
+ for (let i = 0; i < lines.length; i++) {
148
+ const line = lines[i] ?? '';
149
+ if (!line.trim())
150
+ continue;
151
+ insertLine.run(line, f.bundleNormRelativePath, f.repoId, f.kind, i + 1);
152
+ }
153
+ // Insert metadata
154
+ upsertMeta.run(f.bundleNormRelativePath, f.sha256, now);
155
+ }
156
+ });
157
+ // Execute transactions
158
+ if (filesToUpdate.length > 0) {
159
+ updateTransaction(filesToUpdate);
160
+ }
161
+ if (filesToIndex.length > 0) {
162
+ insertTransaction(filesToIndex);
163
+ }
164
+ return {
165
+ added,
166
+ updated,
167
+ removed,
168
+ unchanged,
169
+ totalIndexed: added + updated + unchanged,
170
+ };
171
+ }
172
+ finally {
173
+ db.close();
174
+ }
175
+ }
176
+ // Note: rebuildIndex signature changed
177
+ // Old callers should use the wrapper below
178
+ export async function rebuildIndex(dbPathOrFiles, filesOrDbPath, opts) {
179
+ // Handle both old and new signatures for backward compatibility
180
+ let dbPath;
181
+ let files;
182
+ if (typeof dbPathOrFiles === 'string') {
183
+ // Old signature: rebuildIndex(dbPath, files, opts)
184
+ dbPath = dbPathOrFiles;
185
+ files = filesOrDbPath;
186
+ }
187
+ else {
188
+ // New signature: rebuildIndex(files, dbPath, opts)
189
+ files = dbPathOrFiles;
190
+ dbPath = filesOrDbPath;
191
+ }
192
+ await ensureDir(path.dirname(dbPath));
193
+ await fs.rm(dbPath, { force: true });
194
+ // Also remove WAL/SHM files if present.
195
+ await fs.rm(dbPath + '-wal', { force: true });
196
+ await fs.rm(dbPath + '-shm', { force: true });
197
+ const db = new Database(dbPath);
198
+ try {
199
+ db.pragma('journal_mode = WAL');
200
+ db.pragma('synchronous = NORMAL');
201
+ // Create FTS5 table for full-text search
202
+ db.exec(`
203
+ CREATE VIRTUAL TABLE lines USING fts5(
204
+ content,
205
+ path UNINDEXED,
206
+ repo UNINDEXED,
207
+ kind UNINDEXED,
208
+ lineNo UNINDEXED,
209
+ tokenize='unicode61'
210
+ );
211
+ `);
212
+ // Create file_meta table for incremental indexing support
213
+ db.exec(`
214
+ CREATE TABLE IF NOT EXISTS file_meta (
215
+ path TEXT PRIMARY KEY,
216
+ sha256 TEXT NOT NULL,
217
+ indexed_at TEXT NOT NULL
218
+ );
219
+ `);
220
+ const insertLine = db.prepare(`INSERT INTO lines (content, path, repo, kind, lineNo) VALUES (?, ?, ?, ?, ?)`);
221
+ const insertMeta = db.prepare(`INSERT INTO file_meta (path, sha256, indexed_at) VALUES (?, ?, ?)`);
222
+ const insertMany = db.transaction((fileList) => {
223
+ const now = new Date().toISOString();
224
+ for (const f of fileList) {
225
+ if (f.kind === 'doc' && !opts.includeDocs)
226
+ continue;
227
+ if (f.kind === 'code' && !opts.includeCode)
228
+ continue;
229
+ // Read file synchronously inside transaction for better performance.
230
+ const text = fsSync.readFileSync(f.bundleNormAbsPath, 'utf8');
231
+ const lines = text.split('\n');
232
+ for (let i = 0; i < lines.length; i++) {
233
+ const line = lines[i] ?? '';
234
+ // Skip empty lines to keep the index smaller.
235
+ if (!line.trim())
236
+ continue;
237
+ insertLine.run(line, f.bundleNormRelativePath, f.repoId, f.kind, i + 1);
238
+ }
239
+ // Store file metadata for incremental updates
240
+ insertMeta.run(f.bundleNormRelativePath, f.sha256, now);
241
+ }
242
+ });
243
+ insertMany(files);
244
+ }
245
+ finally {
246
+ db.close();
247
+ }
248
+ }
249
+ function tokenizeForSafeQuery(input) {
250
+ const s = input.trim();
251
+ if (!s)
252
+ return [];
253
+ // Include unicode letters/digits/underscore/dot/slash/dash.
254
+ const re = /[\p{L}\p{N}_.\/-]{2,}/gu;
255
+ const out = [];
256
+ for (const m of s.matchAll(re)) {
257
+ const tok = m[0];
258
+ if (tok)
259
+ out.push(tok);
260
+ if (out.length >= 12)
261
+ break;
262
+ }
263
+ return out;
264
+ }
265
+ export function buildFtsQuery(input) {
266
+ const trimmed = input.trim();
267
+ if (trimmed.startsWith('fts:')) {
268
+ return trimmed.slice(4).trim();
269
+ }
270
+ const tokens = tokenizeForSafeQuery(trimmed);
271
+ if (tokens.length === 0) {
272
+ // Quote the whole thing (best-effort) to avoid syntax errors.
273
+ const escaped = trimmed.replaceAll('"', '""');
274
+ return `"${escaped}"`;
275
+ }
276
+ // Quote each token to keep syntax safe.
277
+ return tokens.map((t) => `"${t.replaceAll('"', '""')}"`).join(' OR ');
278
+ }
279
+ export function searchIndex(dbPath, query, scope, limit) {
280
+ const db = new Database(dbPath, { readonly: true });
281
+ try {
282
+ const ftsQuery = buildFtsQuery(query);
283
+ const whereKind = scope === 'docs' ? `kind = 'doc' AND` : scope === 'code' ? `kind = 'code' AND` : '';
284
+ const stmt = db.prepare(`
285
+ SELECT
286
+ path,
287
+ repo,
288
+ kind,
289
+ lineNo,
290
+ snippet(lines, 0, '[', ']', '…', 10) AS snippet
291
+ FROM lines
292
+ WHERE ${whereKind} lines MATCH ?
293
+ ORDER BY bm25(lines)
294
+ LIMIT ?
295
+ `);
296
+ const rows = stmt.all(ftsQuery, limit);
297
+ return rows.map((r) => ({
298
+ path: r.path,
299
+ repo: r.repo,
300
+ kind: r.kind,
301
+ lineNo: r.lineNo,
302
+ snippet: r.snippet,
303
+ }));
304
+ }
305
+ finally {
306
+ db.close();
307
+ }
308
+ }
309
+ // Negation patterns that might indicate contradiction
310
+ const NEGATION_PATTERNS = [
311
+ /\b(not|no|never|cannot|can't|won't|doesn't|don't|isn't|aren't|wasn't|weren't|hasn't|haven't|hadn't)\b/i,
312
+ /\b(deprecated|removed|obsolete|discontinued|unsupported|disabled)\b/i,
313
+ /\b(instead of|rather than|unlike|contrary to|in contrast)\b/i,
314
+ ];
315
+ // Affirmation patterns that might indicate support
316
+ const AFFIRMATION_PATTERNS = [
317
+ /\b(is|are|was|were|has|have|does|do|can|will|should|must)\b/i,
318
+ /\b(supports?|enables?|provides?|allows?|includes?)\b/i,
319
+ /\b(recommended|required|default|standard|official)\b/i,
320
+ ];
321
+ /**
322
+ * Classify evidence as supporting, contradicting, or related.
323
+ * Uses heuristic analysis of content patterns.
324
+ */
325
+ function classifyEvidence(snippet, claimTokens) {
326
+ const lowerSnippet = snippet.toLowerCase();
327
+ // Count how many claim tokens appear in the snippet
328
+ const tokenMatches = claimTokens.filter(t => lowerSnippet.includes(t.toLowerCase())).length;
329
+ const tokenRatio = claimTokens.length > 0 ? tokenMatches / claimTokens.length : 0;
330
+ // Check for negation patterns
331
+ const hasNegation = NEGATION_PATTERNS.some(p => p.test(snippet));
332
+ // Check for affirmation patterns
333
+ const hasAffirmation = AFFIRMATION_PATTERNS.some(p => p.test(snippet));
334
+ // Base score on token match ratio
335
+ let score = tokenRatio * 0.7 + 0.3; // 0.3-1.0 range
336
+ // Classify based on patterns
337
+ let type;
338
+ if (tokenRatio >= 0.5) {
339
+ // High token match - likely directly relevant
340
+ if (hasNegation && !hasAffirmation) {
341
+ type = 'contradicting';
342
+ score *= 0.9; // Slightly lower confidence for contradictions
343
+ }
344
+ else if (hasAffirmation || !hasNegation) {
345
+ type = 'supporting';
346
+ }
347
+ else {
348
+ type = 'related';
349
+ score *= 0.8;
350
+ }
351
+ }
352
+ else if (tokenRatio >= 0.25) {
353
+ // Moderate token match - probably related
354
+ type = 'related';
355
+ score *= 0.7;
356
+ }
357
+ else {
358
+ // Low token match - tangentially related
359
+ type = 'related';
360
+ score *= 0.5;
361
+ }
362
+ return { type, score: Math.min(1, Math.max(0, score)) };
363
+ }
364
+ /**
365
+ * Calculate overall confidence based on evidence distribution.
366
+ */
367
+ function calculateConfidence(supporting, contradicting, related) {
368
+ const totalEvidence = supporting.length + contradicting.length + related.length;
369
+ if (totalEvidence === 0) {
370
+ return { confidence: 0, label: 'none' };
371
+ }
372
+ // Weight by evidence type and scores
373
+ const supportingWeight = supporting.reduce((sum, e) => sum + e.relevanceScore, 0);
374
+ const contradictingWeight = contradicting.reduce((sum, e) => sum + e.relevanceScore * 0.8, 0);
375
+ const relatedWeight = related.reduce((sum, e) => sum + e.relevanceScore * 0.3, 0);
376
+ const totalWeight = supportingWeight + contradictingWeight + relatedWeight;
377
+ // Calculate confidence based on supporting evidence ratio
378
+ let confidence;
379
+ if (totalWeight === 0) {
380
+ confidence = 0;
381
+ }
382
+ else if (contradictingWeight > supportingWeight) {
383
+ // More contradicting than supporting evidence
384
+ confidence = 0.2 * (supportingWeight / totalWeight);
385
+ }
386
+ else {
387
+ // More supporting than contradicting evidence
388
+ confidence = (supportingWeight - contradictingWeight * 0.5) / totalWeight;
389
+ }
390
+ // Apply quantity bonus (more evidence = more confidence, up to a point)
391
+ const quantityBonus = Math.min(0.2, totalEvidence * 0.02);
392
+ confidence = Math.min(1, confidence + quantityBonus);
393
+ // Determine label
394
+ let label;
395
+ if (confidence >= 0.7)
396
+ label = 'high';
397
+ else if (confidence >= 0.4)
398
+ label = 'medium';
399
+ else if (confidence > 0)
400
+ label = 'low';
401
+ else
402
+ label = 'none';
403
+ return { confidence, label };
404
+ }
405
+ /**
406
+ * Generate a human-readable summary of the verification result.
407
+ */
408
+ function generateVerificationSummary(claim, supporting, contradicting, related, confidence, label) {
409
+ const total = supporting.length + contradicting.length + related.length;
410
+ if (total === 0) {
411
+ return `No evidence found for: "${claim.slice(0, 50)}${claim.length > 50 ? '...' : ''}"`;
412
+ }
413
+ const parts = [];
414
+ parts.push(`Found ${total} piece(s) of evidence (confidence: ${label})`);
415
+ if (supporting.length > 0) {
416
+ parts.push(`${supporting.length} supporting`);
417
+ }
418
+ if (contradicting.length > 0) {
419
+ parts.push(`${contradicting.length} potentially contradicting`);
420
+ }
421
+ if (related.length > 0 && supporting.length + contradicting.length === 0) {
422
+ parts.push(`${related.length} related but inconclusive`);
423
+ }
424
+ return parts.join('; ');
425
+ }
426
+ /**
427
+ * Verify a claim against the search index.
428
+ * Returns classified evidence with confidence scoring.
429
+ *
430
+ * This differs from searchIndex by:
431
+ * 1. Classifying results as supporting/contradicting/related
432
+ * 2. Calculating an overall confidence score
433
+ * 3. Providing a human-readable summary
434
+ */
435
+ export function verifyClaimInIndex(dbPath, claim, scope, limit) {
436
+ // Get raw search results
437
+ const rawHits = searchIndex(dbPath, claim, scope, limit);
438
+ // Extract tokens from claim for classification
439
+ const claimTokens = tokenizeForSafeQuery(claim);
440
+ // Classify each hit
441
+ const supporting = [];
442
+ const contradicting = [];
443
+ const related = [];
444
+ for (const hit of rawHits) {
445
+ const { type, score } = classifyEvidence(hit.snippet, claimTokens);
446
+ const evidenceHit = {
447
+ ...hit,
448
+ evidenceType: type,
449
+ relevanceScore: score,
450
+ };
451
+ switch (type) {
452
+ case 'supporting':
453
+ supporting.push(evidenceHit);
454
+ break;
455
+ case 'contradicting':
456
+ contradicting.push(evidenceHit);
457
+ break;
458
+ case 'related':
459
+ related.push(evidenceHit);
460
+ break;
461
+ }
462
+ }
463
+ // Sort each category by relevance score
464
+ supporting.sort((a, b) => b.relevanceScore - a.relevanceScore);
465
+ contradicting.sort((a, b) => b.relevanceScore - a.relevanceScore);
466
+ related.sort((a, b) => b.relevanceScore - a.relevanceScore);
467
+ // Calculate confidence
468
+ const { confidence, label } = calculateConfidence(supporting, contradicting, related);
469
+ // Generate summary
470
+ const summary = generateVerificationSummary(claim, supporting, contradicting, related, confidence, label);
471
+ return {
472
+ claim,
473
+ found: rawHits.length > 0,
474
+ confidence,
475
+ confidenceLabel: label,
476
+ summary,
477
+ supporting,
478
+ contradicting,
479
+ related,
480
+ };
481
+ }