preflight-mcp 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,170 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import crypto from 'node:crypto';
4
+ import Database from 'better-sqlite3';
5
+ function sha256Hex(text) {
6
+ return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
7
+ }
8
+ function edgeDeterministicId(e) {
9
+ if (e.id && e.id.trim())
10
+ return e.id.trim();
11
+ return `tr_${sha256Hex(`${e.source.type}|${e.source.id}|${e.type}|${e.target.type}|${e.target.id}`).slice(0, 24)}`;
12
+ }
13
+ async function ensureDir(p) {
14
+ await fs.mkdir(p, { recursive: true });
15
+ }
16
+ export async function ensureTraceDb(traceDbPath) {
17
+ await ensureDir(path.dirname(traceDbPath));
18
+ const db = new Database(traceDbPath);
19
+ try {
20
+ db.pragma('journal_mode = WAL');
21
+ db.pragma('synchronous = NORMAL');
22
+ db.exec(`
23
+ CREATE TABLE IF NOT EXISTS trace_edges (
24
+ id TEXT PRIMARY KEY,
25
+ source_type TEXT NOT NULL,
26
+ source_id TEXT NOT NULL,
27
+ target_type TEXT NOT NULL,
28
+ target_id TEXT NOT NULL,
29
+ edge_type TEXT NOT NULL,
30
+ confidence REAL NOT NULL,
31
+ method TEXT NOT NULL,
32
+ sources_json TEXT NOT NULL,
33
+ created_at TEXT NOT NULL,
34
+ updated_at TEXT NOT NULL
35
+ );
36
+
37
+ CREATE INDEX IF NOT EXISTS idx_trace_edges_source ON trace_edges(source_type, source_id);
38
+ CREATE INDEX IF NOT EXISTS idx_trace_edges_target ON trace_edges(target_type, target_id);
39
+ CREATE INDEX IF NOT EXISTS idx_trace_edges_edge_type ON trace_edges(edge_type);
40
+ `);
41
+ }
42
+ finally {
43
+ db.close();
44
+ }
45
+ }
46
+ export async function upsertTraceEdges(traceDbPath, edges) {
47
+ await ensureTraceDb(traceDbPath);
48
+ const db = new Database(traceDbPath);
49
+ try {
50
+ db.pragma('journal_mode = WAL');
51
+ db.pragma('synchronous = NORMAL');
52
+ const upsert = db.prepare(`
53
+ INSERT INTO trace_edges (
54
+ id, source_type, source_id, target_type, target_id, edge_type,
55
+ confidence, method, sources_json, created_at, updated_at
56
+ ) VALUES (
57
+ @id, @source_type, @source_id, @target_type, @target_id, @edge_type,
58
+ @confidence, @method, @sources_json, @created_at, @updated_at
59
+ )
60
+ ON CONFLICT(id) DO UPDATE SET
61
+ source_type=excluded.source_type,
62
+ source_id=excluded.source_id,
63
+ target_type=excluded.target_type,
64
+ target_id=excluded.target_id,
65
+ edge_type=excluded.edge_type,
66
+ confidence=excluded.confidence,
67
+ method=excluded.method,
68
+ sources_json=excluded.sources_json,
69
+ updated_at=excluded.updated_at;
70
+ `);
71
+ const now = new Date().toISOString();
72
+ const tx = db.transaction((items) => {
73
+ const ids = [];
74
+ for (const e of items) {
75
+ const id = edgeDeterministicId(e);
76
+ const confidence = typeof e.confidence === 'number' ? Math.max(0, Math.min(1, e.confidence)) : 0.5;
77
+ const method = e.method === 'exact' ? 'exact' : 'heuristic';
78
+ const sourcesJson = JSON.stringify(e.sources ?? []);
79
+ // Preserve created_at on update by reading existing row (cheap single get)
80
+ const existing = db
81
+ .prepare('SELECT created_at FROM trace_edges WHERE id = ?')
82
+ .get(id);
83
+ upsert.run({
84
+ id,
85
+ source_type: e.source.type,
86
+ source_id: e.source.id,
87
+ target_type: e.target.type,
88
+ target_id: e.target.id,
89
+ edge_type: e.type,
90
+ confidence,
91
+ method,
92
+ sources_json: sourcesJson,
93
+ created_at: existing?.created_at ?? now,
94
+ updated_at: now,
95
+ });
96
+ ids.push(id);
97
+ }
98
+ return ids;
99
+ });
100
+ const ids = tx(edges);
101
+ return { upserted: ids.length, ids };
102
+ }
103
+ finally {
104
+ db.close();
105
+ }
106
+ }
107
+ export function queryTraceEdges(traceDbPath, params) {
108
+ const db = new Database(traceDbPath, { readonly: true });
109
+ try {
110
+ const where = [];
111
+ const bind = {};
112
+ if (params.source) {
113
+ where.push('source_type = @source_type AND source_id = @source_id');
114
+ bind.source_type = params.source.type;
115
+ bind.source_id = params.source.id;
116
+ }
117
+ if (params.target) {
118
+ where.push('target_type = @target_type AND target_id = @target_id');
119
+ bind.target_type = params.target.type;
120
+ bind.target_id = params.target.id;
121
+ }
122
+ if (params.edgeType) {
123
+ where.push('edge_type = @edge_type');
124
+ bind.edge_type = params.edgeType;
125
+ }
126
+ const sqlWhere = where.length ? `WHERE ${where.join(' AND ')}` : '';
127
+ const limit = Math.min(500, Math.max(1, params.limit ?? 50));
128
+ const stmt = db.prepare(`
129
+ SELECT
130
+ id,
131
+ source_type,
132
+ source_id,
133
+ target_type,
134
+ target_id,
135
+ edge_type,
136
+ confidence,
137
+ method,
138
+ sources_json,
139
+ created_at,
140
+ updated_at
141
+ FROM trace_edges
142
+ ${sqlWhere}
143
+ ORDER BY updated_at DESC
144
+ LIMIT ${limit}
145
+ `);
146
+ const rows = stmt.all(bind);
147
+ return rows.map((r) => ({
148
+ id: r.id,
149
+ source: { type: r.source_type, id: r.source_id },
150
+ target: { type: r.target_type, id: r.target_id },
151
+ type: r.edge_type,
152
+ confidence: r.confidence,
153
+ method: r.method === 'exact' ? 'exact' : 'heuristic',
154
+ sources: (() => {
155
+ try {
156
+ const parsed = JSON.parse(r.sources_json);
157
+ return Array.isArray(parsed) ? parsed : [];
158
+ }
159
+ catch {
160
+ return [];
161
+ }
162
+ })(),
163
+ createdAt: r.created_at,
164
+ updatedAt: r.updated_at,
165
+ }));
166
+ }
167
+ finally {
168
+ db.close();
169
+ }
170
+ }
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "preflight-mcp",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "MCP server that creates evidence-based preflight bundles for GitHub repositories and library docs.",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
7
  "author": "preflight-mcp contributors",
8
8
  "repository": {
9
9
  "type": "git",
10
- "url": "https://github.com/jonnyhoo/preflight-mcp.git"
10
+ "url": "git+https://github.com/jonnyhoo/preflight-mcp.git"
11
11
  },
12
12
  "bugs": {
13
13
  "url": "https://github.com/jonnyhoo/preflight-mcp/issues"
@@ -48,10 +48,12 @@
48
48
  },
49
49
  "dependencies": {
50
50
  "@modelcontextprotocol/sdk": "^1.25.1",
51
+ "@vscode/tree-sitter-wasm": "^0.3.0",
51
52
  "adm-zip": "^0.5.16",
52
53
  "better-sqlite3": "^12.5.0",
53
54
  "ignore": "^7.0.5",
54
55
  "node-cron": "^4.2.1",
56
+ "web-tree-sitter": "^0.26.3",
55
57
  "zod": "^4.2.1"
56
58
  },
57
59
  "devDependencies": {
@@ -1,206 +0,0 @@
1
- import crypto from 'node:crypto';
2
- import fs from 'node:fs/promises';
3
- import path from 'node:path';
4
- function nowIso() {
5
- return new Date().toISOString();
6
- }
7
- function toPosix(p) {
8
- return p.replaceAll('\\', '/');
9
- }
10
- function sha256Hex(text) {
11
- return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
12
- }
13
- async function ensureDir(p) {
14
- await fs.mkdir(p, { recursive: true });
15
- }
16
- /**
17
- * Parse DeepWiki URL to extract owner/repo.
18
- * Supports formats like:
19
- * - https://deepwiki.com/owner/repo
20
- * - https://deepwiki.com/owner/repo/path/to/doc
21
- */
22
- function parseDeepWikiUrl(url) {
23
- try {
24
- const parsed = new URL(url);
25
- if (!parsed.hostname.includes('deepwiki.com'))
26
- return null;
27
- const parts = parsed.pathname.split('/').filter(Boolean);
28
- if (parts.length < 2)
29
- return null;
30
- return { owner: parts[0], repo: parts[1] };
31
- }
32
- catch {
33
- return null;
34
- }
35
- }
36
- /**
37
- * Fetch a DeepWiki page and extract its content.
38
- * Returns the page content as Markdown.
39
- */
40
- async function fetchDeepWikiPage(url, timeoutMs = 30000) {
41
- const controller = new AbortController();
42
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
43
- try {
44
- const res = await fetch(url, {
45
- headers: {
46
- 'User-Agent': 'preflight-mcp/0.1.1',
47
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
48
- },
49
- signal: controller.signal,
50
- });
51
- if (!res.ok) {
52
- throw new Error(`HTTP ${res.status}: ${res.statusText}`);
53
- }
54
- const html = await res.text();
55
- // Extract main content - DeepWiki typically renders docs in a main content area.
56
- // This is a best-effort extraction; real implementation would need more sophisticated parsing.
57
- const content = extractMarkdownFromHtml(html);
58
- const title = extractTitle(html);
59
- return { content, title };
60
- }
61
- finally {
62
- clearTimeout(timeoutId);
63
- }
64
- }
65
- /**
66
- * Simple HTML to Markdown-ish text extraction.
67
- * This is a best-effort converter for documentation pages.
68
- */
69
- function extractMarkdownFromHtml(html) {
70
- // Remove script and style tags
71
- let text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
72
- text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
73
- // Convert common HTML elements to Markdown-ish format
74
- text = text.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, '\n# $1\n');
75
- text = text.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, '\n## $1\n');
76
- text = text.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, '\n### $1\n');
77
- text = text.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, '\n#### $1\n');
78
- text = text.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, '\n##### $1\n');
79
- text = text.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, '\n###### $1\n');
80
- // Code blocks
81
- text = text.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, '\n```\n$1\n```\n');
82
- text = text.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, '\n```\n$1\n```\n');
83
- text = text.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
84
- // Lists
85
- text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
86
- text = text.replace(/<ul[^>]*>/gi, '\n');
87
- text = text.replace(/<\/ul>/gi, '\n');
88
- text = text.replace(/<ol[^>]*>/gi, '\n');
89
- text = text.replace(/<\/ol>/gi, '\n');
90
- // Paragraphs and line breaks
91
- text = text.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '\n$1\n');
92
- text = text.replace(/<br\s*\/?>/gi, '\n');
93
- text = text.replace(/<hr\s*\/?>/gi, '\n---\n');
94
- // Bold and italic
95
- text = text.replace(/<strong[^>]*>([\s\S]*?)<\/strong>/gi, '**$1**');
96
- text = text.replace(/<b[^>]*>([\s\S]*?)<\/b>/gi, '**$1**');
97
- text = text.replace(/<em[^>]*>([\s\S]*?)<\/em>/gi, '*$1*');
98
- text = text.replace(/<i[^>]*>([\s\S]*?)<\/i>/gi, '*$1*');
99
- // Links
100
- text = text.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
101
- // Remove remaining HTML tags
102
- text = text.replace(/<[^>]+>/g, '');
103
- // Decode HTML entities
104
- text = text.replace(/&amp;/g, '&');
105
- text = text.replace(/&lt;/g, '<');
106
- text = text.replace(/&gt;/g, '>');
107
- text = text.replace(/&quot;/g, '"');
108
- text = text.replace(/&#39;/g, "'");
109
- text = text.replace(/&nbsp;/g, ' ');
110
- // Clean up whitespace
111
- text = text.replace(/\n{3,}/g, '\n\n');
112
- text = text.trim();
113
- return text;
114
- }
115
- function extractTitle(html) {
116
- const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
117
- if (match?.[1]) {
118
- return match[1].replace(/\s*[-|]\s*DeepWiki.*$/i, '').trim();
119
- }
120
- return undefined;
121
- }
122
- function clipUtf8(text, maxBytes) {
123
- const normalized = text.replace(/\r\n/g, '\n');
124
- const buf = Buffer.from(normalized, 'utf8');
125
- if (buf.length <= maxBytes)
126
- return { text: normalized, truncated: false };
127
- const clipped = buf.subarray(0, maxBytes).toString('utf8');
128
- return { text: `${clipped}\n\n[TRUNCATED]\n`, truncated: true };
129
- }
130
- export async function ingestDeepWikiRepo(params) {
131
- const parsed = parseDeepWikiUrl(params.url);
132
- if (!parsed) {
133
- return {
134
- files: [],
135
- summary: {
136
- kind: 'deepwiki',
137
- url: params.url,
138
- repoId: params.url,
139
- fetchedAt: nowIso(),
140
- notes: ['Invalid DeepWiki URL format'],
141
- },
142
- };
143
- }
144
- const repoId = `${parsed.owner}/${parsed.repo}`;
145
- const fetchedAt = nowIso();
146
- const notes = [];
147
- const files = [];
148
- const fileRelPaths = [];
149
- // Create deepwiki directory structure
150
- const deepwikiDir = path.join(params.bundlePaths.rootDir, 'deepwiki', parsed.owner, parsed.repo);
151
- const normDir = path.join(deepwikiDir, 'norm');
152
- await ensureDir(normDir);
153
- try {
154
- const { content, title } = await fetchDeepWikiPage(params.url);
155
- if (!content.trim()) {
156
- notes.push('DeepWiki page returned empty content');
157
- }
158
- else {
159
- const clipped = clipUtf8(content, params.cfg.maxFileBytes);
160
- if (clipped.truncated) {
161
- notes.push(`Content truncated to maxFileBytes=${params.cfg.maxFileBytes}`);
162
- }
163
- // Add header with source info
164
- const header = `# ${title || repoId} (DeepWiki)\n\nSource: ${params.url}\nFetched: ${fetchedAt}\n\n---\n\n`;
165
- const finalContent = header + clipped.text;
166
- const fileName = 'index.md';
167
- const absDocPath = path.join(normDir, fileName);
168
- await fs.writeFile(absDocPath, finalContent, 'utf8');
169
- const bundleRelPosix = toPosix(path.relative(params.bundlePaths.rootDir, absDocPath));
170
- fileRelPaths.push(bundleRelPosix);
171
- files.push({
172
- repoId: `deepwiki:${repoId}`,
173
- kind: 'doc',
174
- repoRelativePath: fileName,
175
- bundleNormRelativePath: bundleRelPosix,
176
- bundleNormAbsPath: absDocPath,
177
- sha256: sha256Hex(finalContent),
178
- bytes: Buffer.byteLength(finalContent, 'utf8'),
179
- });
180
- }
181
- }
182
- catch (err) {
183
- notes.push(`Failed to fetch DeepWiki page: ${err instanceof Error ? err.message : String(err)}`);
184
- }
185
- // Write meta.json
186
- const metaPath = path.join(deepwikiDir, 'meta.json');
187
- await fs.writeFile(metaPath, JSON.stringify({
188
- kind: 'deepwiki',
189
- url: params.url,
190
- repoId,
191
- fetchedAt,
192
- files: fileRelPaths,
193
- notes: notes.length > 0 ? notes : undefined,
194
- }, null, 2) + '\n', 'utf8');
195
- return {
196
- files,
197
- summary: {
198
- kind: 'deepwiki',
199
- url: params.url,
200
- repoId,
201
- fetchedAt,
202
- files: fileRelPaths.length > 0 ? fileRelPaths : undefined,
203
- notes: notes.length > 0 ? notes : undefined,
204
- },
205
- };
206
- }