preflight-mcp 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,7 @@ async function fetchDeepWikiPage(url, timeoutMs = 30000) {
43
43
  try {
44
44
  const res = await fetch(url, {
45
45
  headers: {
46
- 'User-Agent': 'preflight-mcp/0.1.0',
46
+ 'User-Agent': 'preflight-mcp/0.1.1',
47
47
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
48
48
  },
49
49
  signal: controller.signal,
@@ -1,8 +1,7 @@
1
- import { execFile } from 'node:child_process';
1
+ import { spawn } from 'node:child_process';
2
2
  import fs from 'node:fs/promises';
3
3
  import path from 'node:path';
4
- import { promisify } from 'node:util';
5
- const execFileAsync = promisify(execFile);
4
+ import { logger } from '../logging/logger.js';
6
5
  export function parseOwnerRepo(input) {
7
6
  const trimmed = input.trim().replace(/^https?:\/\/github\.com\//i, '');
8
7
  const parts = trimmed.split('/').filter(Boolean);
@@ -15,18 +14,76 @@ export function toCloneUrl(ref) {
15
14
  return `https://github.com/${ref.owner}/${ref.repo}.git`;
16
15
  }
17
16
  async function runGit(args, opts) {
18
- const { stdout, stderr } = await execFileAsync('git', args, {
19
- cwd: opts?.cwd,
20
- timeout: opts?.timeoutMs ?? 5 * 60_000,
21
- env: {
22
- ...process.env,
23
- GIT_TERMINAL_PROMPT: '0',
24
- },
25
- windowsHide: true,
26
- encoding: 'utf8',
27
- maxBuffer: 10 * 1024 * 1024,
17
+ const timeoutMs = opts?.timeoutMs ?? 5 * 60_000;
18
+ return new Promise((resolve, reject) => {
19
+ const child = spawn('git', args, {
20
+ cwd: opts?.cwd,
21
+ env: {
22
+ ...process.env,
23
+ GIT_TERMINAL_PROMPT: '0',
24
+ },
25
+ windowsHide: true,
26
+ });
27
+ let stdout = '';
28
+ let stderr = '';
29
+ let timedOut = false;
30
+ let cleanedUp = false;
31
+ const cleanup = () => {
32
+ if (cleanedUp)
33
+ return;
34
+ cleanedUp = true;
35
+ if (timeoutHandle) {
36
+ clearTimeout(timeoutHandle);
37
+ }
38
+ };
39
+ const forceKill = () => {
40
+ if (child.killed)
41
+ return;
42
+ try {
43
+ // Try SIGKILL for forceful termination
44
+ child.kill('SIGKILL');
45
+ }
46
+ catch (err) {
47
+ logger.warn('Failed to kill git process', err instanceof Error ? err : undefined);
48
+ }
49
+ };
50
+ // Set up timeout
51
+ const timeoutHandle = setTimeout(() => {
52
+ timedOut = true;
53
+ logger.warn(`Git command timed out after ${timeoutMs}ms`, { args });
54
+ // Try graceful termination first
55
+ try {
56
+ child.kill('SIGTERM');
57
+ }
58
+ catch (err) {
59
+ logger.warn('Failed to send SIGTERM to git process', err instanceof Error ? err : undefined);
60
+ }
61
+ // Force kill after 5 seconds if still running
62
+ setTimeout(forceKill, 5000);
63
+ }, timeoutMs);
64
+ child.stdout?.on('data', (data) => {
65
+ stdout += data.toString('utf8');
66
+ });
67
+ child.stderr?.on('data', (data) => {
68
+ stderr += data.toString('utf8');
69
+ });
70
+ child.on('error', (err) => {
71
+ cleanup();
72
+ reject(err);
73
+ });
74
+ child.on('close', (code, signal) => {
75
+ cleanup();
76
+ if (timedOut) {
77
+ reject(new Error(`Git command timed out after ${timeoutMs}ms: git ${args.join(' ')}`));
78
+ }
79
+ else if (code !== 0) {
80
+ reject(new Error(`Git command failed with code ${code}: ${stderr || stdout}`));
81
+ }
82
+ else {
83
+ resolve({ stdout, stderr });
84
+ }
85
+ });
28
86
  });
29
- return { stdout, stderr };
30
87
  }
31
88
  export async function getRemoteHeadSha(cloneUrl) {
32
89
  const { stdout } = await runGit(['ls-remote', cloneUrl, 'HEAD'], { timeoutMs: 60_000 });
@@ -38,16 +95,44 @@ export async function getRemoteHeadSha(cloneUrl) {
38
95
  throw new Error(`Could not parse remote sha from: ${line}`);
39
96
  return sha;
40
97
  }
98
+ /**
99
+ * Validate git ref to prevent command injection.
100
+ * Only allows: alphanumeric, hyphens, underscores, dots, forward slashes
101
+ */
102
+ function validateGitRef(ref) {
103
+ if (!ref || ref.length === 0) {
104
+ throw new Error('Git ref cannot be empty');
105
+ }
106
+ if (ref.length > 256) {
107
+ throw new Error('Git ref too long (max 256 characters)');
108
+ }
109
+ // Allow only safe characters: alphanumeric, hyphen, underscore, dot, forward slash
110
+ // This covers branches, tags, and commit SHAs
111
+ const safeRefPattern = /^[a-zA-Z0-9_.\/-]+$/;
112
+ if (!safeRefPattern.test(ref)) {
113
+ throw new Error(`Invalid git ref: contains unsafe characters. Ref: ${ref}`);
114
+ }
115
+ // Prevent refs starting with dash (could be interpreted as git option)
116
+ if (ref.startsWith('-')) {
117
+ throw new Error('Invalid git ref: cannot start with hyphen');
118
+ }
119
+ // Prevent double dots (path traversal in git refs)
120
+ if (ref.includes('..')) {
121
+ throw new Error('Invalid git ref: cannot contain ".."');
122
+ }
123
+ }
41
124
  export async function shallowClone(cloneUrl, destDir, opts) {
42
125
  await fs.mkdir(path.dirname(destDir), { recursive: true });
43
126
  // Clean dest if exists.
44
127
  await fs.rm(destDir, { recursive: true, force: true });
45
128
  const args = ['-c', 'core.autocrlf=false', 'clone', '--depth', '1', '--no-tags', '--single-branch'];
46
129
  if (opts?.ref) {
130
+ // Validate ref before using it in git command
131
+ validateGitRef(opts.ref);
47
132
  args.push('--branch', opts.ref);
48
133
  }
49
134
  args.push(cloneUrl, destDir);
50
- await runGit(args, { timeoutMs: 15 * 60_000 });
135
+ await runGit(args, { timeoutMs: opts?.timeoutMs ?? 15 * 60_000 });
51
136
  }
52
137
  export async function getLocalHeadSha(repoDir) {
53
138
  const { stdout } = await runGit(['-C', repoDir, 'rev-parse', 'HEAD']);
@@ -0,0 +1,82 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import AdmZip from 'adm-zip';
4
+ function nowIso() {
5
+ return new Date().toISOString();
6
+ }
7
+ function githubHeaders(cfg) {
8
+ const headers = {
9
+ 'User-Agent': 'preflight-mcp/0.1.1',
10
+ Accept: 'application/vnd.github+json',
11
+ };
12
+ if (cfg.githubToken) {
13
+ headers.Authorization = `Bearer ${cfg.githubToken}`;
14
+ }
15
+ return headers;
16
+ }
17
+ async function ensureDir(p) {
18
+ await fs.mkdir(p, { recursive: true });
19
+ }
20
+ async function fetchJson(url, headers) {
21
+ const res = await fetch(url, { headers });
22
+ if (!res.ok) {
23
+ throw new Error(`GitHub API error ${res.status}: ${res.statusText}`);
24
+ }
25
+ return (await res.json());
26
+ }
27
+ async function downloadToFile(url, headers, destPath) {
28
+ const res = await fetch(url, { headers, redirect: 'follow' });
29
+ if (!res.ok) {
30
+ throw new Error(`Download error ${res.status}: ${res.statusText}`);
31
+ }
32
+ // Use streaming if possible; otherwise fallback to arrayBuffer.
33
+ const anyRes = res;
34
+ const body = anyRes.body;
35
+ await ensureDir(path.dirname(destPath));
36
+ if (body && typeof body.pipe === 'function') {
37
+ // Node.js stream
38
+ const ws = (await import('node:fs')).createWriteStream(destPath);
39
+ await new Promise((resolve, reject) => {
40
+ body.pipe(ws);
41
+ body.on('error', reject);
42
+ ws.on('error', reject);
43
+ ws.on('finish', () => resolve());
44
+ });
45
+ return;
46
+ }
47
+ // Web stream or no stream support.
48
+ const buf = Buffer.from(await res.arrayBuffer());
49
+ await fs.writeFile(destPath, buf);
50
+ }
51
+ async function extractZip(zipPath, destDir) {
52
+ await ensureDir(destDir);
53
+ const zip = new AdmZip(zipPath);
54
+ zip.extractAllTo(destDir, true);
55
+ }
56
+ async function findSingleTopLevelDir(root) {
57
+ const entries = await fs.readdir(root, { withFileTypes: true });
58
+ const dirs = entries.filter((e) => e.isDirectory()).map((e) => path.join(root, e.name));
59
+ if (dirs.length === 1)
60
+ return dirs[0];
61
+ return root;
62
+ }
63
+ export async function downloadAndExtractGitHubArchive(params) {
64
+ const headers = githubHeaders(params.cfg);
65
+ // Resolve ref if not provided.
66
+ let refUsed = (params.ref ?? '').trim();
67
+ if (!refUsed) {
68
+ const repoInfo = await fetchJson(`https://api.github.com/repos/${params.owner}/${params.repo}`, headers);
69
+ refUsed = repoInfo.default_branch || 'HEAD';
70
+ }
71
+ const zipPath = path.join(params.destDir, `github-zipball-${params.owner}-${params.repo}-${Date.now()}.zip`);
72
+ // Use the API zipball endpoint so ref can be branch/tag/SHA (including slashes via URL-encoding).
73
+ const zipballUrl = `https://api.github.com/repos/${params.owner}/${params.repo}/zipball/${encodeURIComponent(refUsed)}`;
74
+ await ensureDir(params.destDir);
75
+ await downloadToFile(zipballUrl, headers, zipPath);
76
+ const extractDir = path.join(params.destDir, `extracted-${Date.now()}`);
77
+ await extractZip(zipPath, extractDir);
78
+ const repoRoot = await findSingleTopLevelDir(extractDir);
79
+ // Best-effort cleanup: remove zip file (keep extracted for caller to consume).
80
+ await fs.rm(zipPath, { force: true }).catch(() => undefined);
81
+ return { repoRoot, refUsed, fetchedAt: nowIso() };
82
+ }
@@ -32,7 +32,7 @@ function isProbablyBinary(buf) {
32
32
  }
33
33
  return false;
34
34
  }
35
- function classifyKind(repoRelativePathPosix) {
35
+ export function classifyIngestedFileKind(repoRelativePathPosix) {
36
36
  const base = path.posix.basename(repoRelativePathPosix).toLowerCase();
37
37
  const ext = path.posix.extname(repoRelativePathPosix).toLowerCase();
38
38
  if (base === 'readme' ||
@@ -135,7 +135,7 @@ export async function ingestRepoToBundle(params) {
135
135
  await fs.mkdir(path.dirname(normDest), { recursive: true });
136
136
  await fs.writeFile(normDest, normalized, 'utf8');
137
137
  totalBytes += st.size;
138
- const kind = classifyKind(f.relPosix);
138
+ const kind = classifyIngestedFileKind(f.relPosix);
139
139
  const sha256 = sha256Hex(Buffer.from(normalized, 'utf8'));
140
140
  const bundleNormRelativePath = `${params.bundleNormPrefixPosix}/${f.relPosix}`;
141
141
  files.push({
@@ -1,5 +1,28 @@
1
1
  import path from 'node:path';
2
+ /**
3
+ * Validate bundle ID to prevent path traversal attacks.
4
+ * Only allows: alphanumeric, hyphens, underscores
5
+ */
6
+ export function validateBundleId(bundleId) {
7
+ if (!bundleId || bundleId.length === 0) {
8
+ throw new Error('Bundle ID cannot be empty');
9
+ }
10
+ if (bundleId.length > 128) {
11
+ throw new Error('Bundle ID too long (max 128 characters)');
12
+ }
13
+ // Allow only alphanumeric, hyphen, and underscore (no dots or slashes)
14
+ const safeIdPattern = /^[a-zA-Z0-9_-]+$/;
15
+ if (!safeIdPattern.test(bundleId)) {
16
+ throw new Error(`Invalid bundle ID: contains unsafe characters. ID: ${bundleId}`);
17
+ }
18
+ // Prevent IDs starting with dot (hidden files)
19
+ if (bundleId.startsWith('.')) {
20
+ throw new Error('Invalid bundle ID: cannot start with dot');
21
+ }
22
+ }
2
23
  export function getBundlePaths(storageDir, bundleId) {
24
+ // Validate bundle ID to prevent path traversal
25
+ validateBundleId(bundleId);
3
26
  const rootDir = path.join(storageDir, bundleId);
4
27
  const indexesDir = path.join(rootDir, 'indexes');
5
28
  return {