preflight-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -0
- package/README.zh-CN.md +406 -0
- package/dist/bundle/analysis.js +91 -0
- package/dist/bundle/context7.js +301 -0
- package/dist/bundle/deepwiki.js +206 -0
- package/dist/bundle/facts.js +296 -0
- package/dist/bundle/github.js +55 -0
- package/dist/bundle/guides.js +65 -0
- package/dist/bundle/ingest.js +152 -0
- package/dist/bundle/manifest.js +14 -0
- package/dist/bundle/overview.js +222 -0
- package/dist/bundle/paths.js +29 -0
- package/dist/bundle/service.js +803 -0
- package/dist/bundle/tagging.js +206 -0
- package/dist/config.js +65 -0
- package/dist/context7/client.js +30 -0
- package/dist/context7/tools.js +58 -0
- package/dist/core/scheduler.js +166 -0
- package/dist/errors.js +150 -0
- package/dist/index.js +7 -0
- package/dist/jobs/bundle-auto-update-job.js +71 -0
- package/dist/jobs/health-check-job.js +172 -0
- package/dist/jobs/storage-cleanup-job.js +148 -0
- package/dist/logging/logger.js +311 -0
- package/dist/mcp/uris.js +45 -0
- package/dist/search/sqliteFts.js +481 -0
- package/dist/server/optimized-server.js +255 -0
- package/dist/server.js +778 -0
- package/dist/storage/compression.js +249 -0
- package/dist/storage/storage-adapter.js +316 -0
- package/dist/utils/index.js +100 -0
- package/package.json +44 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
/**
|
|
4
|
+
* Detect programming languages from file extensions
|
|
5
|
+
*/
|
|
6
|
+
function detectLanguages(files) {
|
|
7
|
+
const langMap = new Map();
|
|
8
|
+
const extToLang = {
|
|
9
|
+
'.ts': 'TypeScript',
|
|
10
|
+
'.tsx': 'TypeScript',
|
|
11
|
+
'.js': 'JavaScript',
|
|
12
|
+
'.jsx': 'JavaScript',
|
|
13
|
+
'.mjs': 'JavaScript',
|
|
14
|
+
'.cjs': 'JavaScript',
|
|
15
|
+
'.py': 'Python',
|
|
16
|
+
'.go': 'Go',
|
|
17
|
+
'.rs': 'Rust',
|
|
18
|
+
'.java': 'Java',
|
|
19
|
+
'.rb': 'Ruby',
|
|
20
|
+
'.php': 'PHP',
|
|
21
|
+
'.c': 'C',
|
|
22
|
+
'.cpp': 'C++',
|
|
23
|
+
'.cc': 'C++',
|
|
24
|
+
'.h': 'C/C++',
|
|
25
|
+
'.hpp': 'C++',
|
|
26
|
+
'.cs': 'C#',
|
|
27
|
+
'.swift': 'Swift',
|
|
28
|
+
'.kt': 'Kotlin',
|
|
29
|
+
'.scala': 'Scala',
|
|
30
|
+
};
|
|
31
|
+
for (const file of files) {
|
|
32
|
+
if (file.kind !== 'code')
|
|
33
|
+
continue;
|
|
34
|
+
const ext = path.extname(file.repoRelativePath).toLowerCase();
|
|
35
|
+
const lang = extToLang[ext] || 'Other';
|
|
36
|
+
if (!langMap.has(lang)) {
|
|
37
|
+
langMap.set(lang, new Set());
|
|
38
|
+
}
|
|
39
|
+
langMap.get(lang).add(ext);
|
|
40
|
+
}
|
|
41
|
+
return Array.from(langMap.entries())
|
|
42
|
+
.map(([language, exts]) => ({
|
|
43
|
+
language,
|
|
44
|
+
fileCount: files.filter((f) => f.kind === 'code' && exts.has(path.extname(f.repoRelativePath).toLowerCase())).length,
|
|
45
|
+
extensions: Array.from(exts).sort(),
|
|
46
|
+
}))
|
|
47
|
+
.sort((a, b) => b.fileCount - a.fileCount);
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Find entry points in the repository
|
|
51
|
+
*/
|
|
52
|
+
async function findEntryPoints(files, bundleRoot, repoId) {
|
|
53
|
+
const entryPoints = [];
|
|
54
|
+
const [owner, repo] = repoId.split('/');
|
|
55
|
+
// Check package.json for Node.js projects
|
|
56
|
+
const pkgJson = files.find((f) => f.repoRelativePath === 'package.json');
|
|
57
|
+
if (pkgJson) {
|
|
58
|
+
try {
|
|
59
|
+
const content = await fs.readFile(pkgJson.bundleNormAbsPath, 'utf8');
|
|
60
|
+
const pkg = JSON.parse(content);
|
|
61
|
+
if (pkg.main) {
|
|
62
|
+
entryPoints.push({
|
|
63
|
+
type: 'package-main',
|
|
64
|
+
file: `repos/${owner}/${repo}/norm/${pkg.main}`,
|
|
65
|
+
evidence: `${pkgJson.bundleNormRelativePath}:1`,
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
if (pkg.bin) {
|
|
69
|
+
const binEntries = typeof pkg.bin === 'string' ? { [repo]: pkg.bin } : pkg.bin;
|
|
70
|
+
for (const [name, binPath] of Object.entries(binEntries)) {
|
|
71
|
+
entryPoints.push({
|
|
72
|
+
type: 'package-bin',
|
|
73
|
+
file: `repos/${owner}/${repo}/norm/${binPath}`,
|
|
74
|
+
evidence: `${pkgJson.bundleNormRelativePath}:1`,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
// Ignore parsing errors
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// Common entry point patterns
|
|
84
|
+
const commonEntries = ['index.ts', 'index.js', 'main.ts', 'main.js', 'src/index.ts', 'src/main.ts'];
|
|
85
|
+
for (const entry of commonEntries) {
|
|
86
|
+
const file = files.find((f) => f.repoRelativePath === entry);
|
|
87
|
+
if (file) {
|
|
88
|
+
entryPoints.push({
|
|
89
|
+
type: entry.includes('index') ? 'index-file' : 'main-file',
|
|
90
|
+
file: file.bundleNormRelativePath,
|
|
91
|
+
evidence: `${file.bundleNormRelativePath}:1`,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return entryPoints;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Extract dependency information
|
|
99
|
+
*/
|
|
100
|
+
async function extractDependencies(files, bundleRoot) {
|
|
101
|
+
const result = {
|
|
102
|
+
runtime: [],
|
|
103
|
+
dev: [],
|
|
104
|
+
manager: 'unknown',
|
|
105
|
+
};
|
|
106
|
+
// Node.js - package.json
|
|
107
|
+
const pkgJson = files.find((f) => f.repoRelativePath === 'package.json');
|
|
108
|
+
if (pkgJson) {
|
|
109
|
+
result.manager = 'npm';
|
|
110
|
+
try {
|
|
111
|
+
const content = await fs.readFile(pkgJson.bundleNormAbsPath, 'utf8');
|
|
112
|
+
const pkg = JSON.parse(content);
|
|
113
|
+
if (pkg.dependencies) {
|
|
114
|
+
for (const [name, version] of Object.entries(pkg.dependencies)) {
|
|
115
|
+
result.runtime.push({
|
|
116
|
+
name,
|
|
117
|
+
version: String(version),
|
|
118
|
+
evidence: `${pkgJson.bundleNormRelativePath}:1`,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
if (pkg.devDependencies) {
|
|
123
|
+
for (const [name, version] of Object.entries(pkg.devDependencies)) {
|
|
124
|
+
result.dev.push({
|
|
125
|
+
name,
|
|
126
|
+
version: String(version),
|
|
127
|
+
evidence: `${pkgJson.bundleNormRelativePath}:1`,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
catch {
|
|
133
|
+
// Ignore parsing errors
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Python - requirements.txt
|
|
137
|
+
const reqTxt = files.find((f) => f.repoRelativePath === 'requirements.txt');
|
|
138
|
+
if (reqTxt) {
|
|
139
|
+
result.manager = 'pip';
|
|
140
|
+
try {
|
|
141
|
+
const content = await fs.readFile(reqTxt.bundleNormAbsPath, 'utf8');
|
|
142
|
+
const lines = content.split('\n');
|
|
143
|
+
for (let i = 0; i < lines.length; i++) {
|
|
144
|
+
const line = lines[i]?.trim() ?? '';
|
|
145
|
+
if (!line || line.startsWith('#'))
|
|
146
|
+
continue;
|
|
147
|
+
const match = line.match(/^([a-zA-Z0-9_-]+)(==|>=|<=|~=)?(.+)?/);
|
|
148
|
+
if (match) {
|
|
149
|
+
result.runtime.push({
|
|
150
|
+
name: match[1],
|
|
151
|
+
version: match[3],
|
|
152
|
+
evidence: `${reqTxt.bundleNormRelativePath}:${i + 1}`,
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
// Ignore errors
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// Go - go.mod
|
|
162
|
+
const goMod = files.find((f) => f.repoRelativePath === 'go.mod');
|
|
163
|
+
if (goMod) {
|
|
164
|
+
result.manager = 'go';
|
|
165
|
+
try {
|
|
166
|
+
const content = await fs.readFile(goMod.bundleNormAbsPath, 'utf8');
|
|
167
|
+
const lines = content.split('\n');
|
|
168
|
+
for (let i = 0; i < lines.length; i++) {
|
|
169
|
+
const line = lines[i]?.trim() ?? '';
|
|
170
|
+
const match = line.match(/^\s*([^\s]+)\s+v?([0-9.]+)/);
|
|
171
|
+
if (match) {
|
|
172
|
+
result.runtime.push({
|
|
173
|
+
name: match[1],
|
|
174
|
+
version: match[2],
|
|
175
|
+
evidence: `${goMod.bundleNormRelativePath}:${i + 1}`,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// Ignore errors
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return result;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Analyze file structure
|
|
188
|
+
*/
|
|
189
|
+
function analyzeFileStructure(files) {
|
|
190
|
+
const topLevelDirs = new Set();
|
|
191
|
+
for (const file of files) {
|
|
192
|
+
const parts = file.repoRelativePath.split('/');
|
|
193
|
+
if (parts.length > 1 && parts[0]) {
|
|
194
|
+
topLevelDirs.add(parts[0]);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
const hasTests = files.some((f) => f.repoRelativePath.includes('/test/') ||
|
|
198
|
+
f.repoRelativePath.includes('/tests/') ||
|
|
199
|
+
f.repoRelativePath.includes('/__tests__/') ||
|
|
200
|
+
f.repoRelativePath.includes('.test.') ||
|
|
201
|
+
f.repoRelativePath.includes('.spec.'));
|
|
202
|
+
const hasConfig = files.some((f) => f.repoRelativePath.includes('config') ||
|
|
203
|
+
f.repoRelativePath.endsWith('.config.js') ||
|
|
204
|
+
f.repoRelativePath.endsWith('.config.ts') ||
|
|
205
|
+
f.repoRelativePath.endsWith('.json'));
|
|
206
|
+
return {
|
|
207
|
+
totalFiles: files.length,
|
|
208
|
+
totalDocs: files.filter((f) => f.kind === 'doc').length,
|
|
209
|
+
totalCode: files.filter((f) => f.kind === 'code').length,
|
|
210
|
+
topLevelDirs: Array.from(topLevelDirs).sort(),
|
|
211
|
+
hasTests,
|
|
212
|
+
hasConfig,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Detect frameworks from dependencies and file patterns
|
|
217
|
+
*/
|
|
218
|
+
function detectFrameworks(deps, files) {
|
|
219
|
+
const frameworks = new Set();
|
|
220
|
+
const allDeps = [...deps.runtime, ...deps.dev].map((d) => d.name.toLowerCase());
|
|
221
|
+
// JavaScript/TypeScript frameworks
|
|
222
|
+
if (allDeps.includes('react'))
|
|
223
|
+
frameworks.add('React');
|
|
224
|
+
if (allDeps.includes('vue'))
|
|
225
|
+
frameworks.add('Vue');
|
|
226
|
+
if (allDeps.includes('angular'))
|
|
227
|
+
frameworks.add('Angular');
|
|
228
|
+
if (allDeps.includes('next'))
|
|
229
|
+
frameworks.add('Next.js');
|
|
230
|
+
if (allDeps.includes('nuxt'))
|
|
231
|
+
frameworks.add('Nuxt');
|
|
232
|
+
if (allDeps.includes('express'))
|
|
233
|
+
frameworks.add('Express');
|
|
234
|
+
if (allDeps.includes('fastify'))
|
|
235
|
+
frameworks.add('Fastify');
|
|
236
|
+
if (allDeps.includes('nestjs'))
|
|
237
|
+
frameworks.add('NestJS');
|
|
238
|
+
// Python frameworks
|
|
239
|
+
if (allDeps.includes('django'))
|
|
240
|
+
frameworks.add('Django');
|
|
241
|
+
if (allDeps.includes('flask'))
|
|
242
|
+
frameworks.add('Flask');
|
|
243
|
+
if (allDeps.includes('fastapi'))
|
|
244
|
+
frameworks.add('FastAPI');
|
|
245
|
+
// Test frameworks
|
|
246
|
+
if (allDeps.includes('jest'))
|
|
247
|
+
frameworks.add('Jest');
|
|
248
|
+
if (allDeps.includes('vitest'))
|
|
249
|
+
frameworks.add('Vitest');
|
|
250
|
+
if (allDeps.includes('pytest'))
|
|
251
|
+
frameworks.add('Pytest');
|
|
252
|
+
return Array.from(frameworks).sort();
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Extract all facts from a bundle
|
|
256
|
+
*/
|
|
257
|
+
export async function extractBundleFacts(params) {
|
|
258
|
+
// Aggregate all files
|
|
259
|
+
const allFiles = params.repos.flatMap((r) => r.files);
|
|
260
|
+
// Extract facts
|
|
261
|
+
const languages = detectLanguages(allFiles);
|
|
262
|
+
const entryPointsPromises = params.repos.map((r) => findEntryPoints(r.files, params.bundleRoot, r.repoId));
|
|
263
|
+
const entryPointsArrays = await Promise.all(entryPointsPromises);
|
|
264
|
+
const entryPoints = entryPointsArrays.flat();
|
|
265
|
+
const dependencies = await extractDependencies(allFiles, params.bundleRoot);
|
|
266
|
+
const fileStructure = analyzeFileStructure(allFiles);
|
|
267
|
+
const frameworks = detectFrameworks(dependencies, allFiles);
|
|
268
|
+
return {
|
|
269
|
+
version: '1.0',
|
|
270
|
+
timestamp: new Date().toISOString(),
|
|
271
|
+
languages,
|
|
272
|
+
entryPoints,
|
|
273
|
+
dependencies,
|
|
274
|
+
fileStructure,
|
|
275
|
+
frameworks,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Write facts to JSON file
|
|
280
|
+
*/
|
|
281
|
+
export async function writeFacts(factsPath, facts) {
|
|
282
|
+
await fs.mkdir(path.dirname(factsPath), { recursive: true });
|
|
283
|
+
await fs.writeFile(factsPath, JSON.stringify(facts, null, 2) + '\n', 'utf8');
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Read facts from JSON file
|
|
287
|
+
*/
|
|
288
|
+
export async function readFacts(factsPath) {
|
|
289
|
+
try {
|
|
290
|
+
const content = await fs.readFile(factsPath, 'utf8');
|
|
291
|
+
return JSON.parse(content);
|
|
292
|
+
}
|
|
293
|
+
catch {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { promisify } from 'node:util';
|
|
5
|
+
const execFileAsync = promisify(execFile);
|
|
6
|
+
export function parseOwnerRepo(input) {
|
|
7
|
+
const trimmed = input.trim().replace(/^https?:\/\/github\.com\//i, '');
|
|
8
|
+
const parts = trimmed.split('/').filter(Boolean);
|
|
9
|
+
if (parts.length < 2) {
|
|
10
|
+
throw new Error(`Invalid GitHub repo identifier: ${input} (expected owner/repo)`);
|
|
11
|
+
}
|
|
12
|
+
return { owner: parts[0], repo: parts[1].replace(/\.git$/i, '') };
|
|
13
|
+
}
|
|
14
|
+
export function toCloneUrl(ref) {
|
|
15
|
+
return `https://github.com/${ref.owner}/${ref.repo}.git`;
|
|
16
|
+
}
|
|
17
|
+
async function runGit(args, opts) {
|
|
18
|
+
const { stdout, stderr } = await execFileAsync('git', args, {
|
|
19
|
+
cwd: opts?.cwd,
|
|
20
|
+
timeout: opts?.timeoutMs ?? 5 * 60_000,
|
|
21
|
+
env: {
|
|
22
|
+
...process.env,
|
|
23
|
+
GIT_TERMINAL_PROMPT: '0',
|
|
24
|
+
},
|
|
25
|
+
windowsHide: true,
|
|
26
|
+
encoding: 'utf8',
|
|
27
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
28
|
+
});
|
|
29
|
+
return { stdout, stderr };
|
|
30
|
+
}
|
|
31
|
+
export async function getRemoteHeadSha(cloneUrl) {
|
|
32
|
+
const { stdout } = await runGit(['ls-remote', cloneUrl, 'HEAD'], { timeoutMs: 60_000 });
|
|
33
|
+
const line = stdout.trim().split(/\r?\n/)[0];
|
|
34
|
+
if (!line)
|
|
35
|
+
throw new Error(`git ls-remote returned empty output for ${cloneUrl}`);
|
|
36
|
+
const [sha] = line.split(/\s+/);
|
|
37
|
+
if (!sha || sha.length < 8)
|
|
38
|
+
throw new Error(`Could not parse remote sha from: ${line}`);
|
|
39
|
+
return sha;
|
|
40
|
+
}
|
|
41
|
+
export async function shallowClone(cloneUrl, destDir, opts) {
|
|
42
|
+
await fs.mkdir(path.dirname(destDir), { recursive: true });
|
|
43
|
+
// Clean dest if exists.
|
|
44
|
+
await fs.rm(destDir, { recursive: true, force: true });
|
|
45
|
+
const args = ['-c', 'core.autocrlf=false', 'clone', '--depth', '1', '--no-tags', '--single-branch'];
|
|
46
|
+
if (opts?.ref) {
|
|
47
|
+
args.push('--branch', opts.ref);
|
|
48
|
+
}
|
|
49
|
+
args.push(cloneUrl, destDir);
|
|
50
|
+
await runGit(args, { timeoutMs: 15 * 60_000 });
|
|
51
|
+
}
|
|
52
|
+
export async function getLocalHeadSha(repoDir) {
|
|
53
|
+
const { stdout } = await runGit(['-C', repoDir, 'rev-parse', 'HEAD']);
|
|
54
|
+
return stdout.trim();
|
|
55
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
export async function writeAgentsMd(targetPath) {
|
|
3
|
+
const content = `# AGENTS.md - Rules for using this Preflight Bundle
|
|
4
|
+
|
|
5
|
+
This bundle is an **evidence pack**. You must stay factual.
|
|
6
|
+
|
|
7
|
+
## Non-negotiable rules
|
|
8
|
+
- Only use evidence **inside this bundle**.
|
|
9
|
+
- Every factual claim in your answer must include an **Evidence pointer**:
|
|
10
|
+
- file path (within this bundle) + line range
|
|
11
|
+
- or a direct quote snippet with a pointer
|
|
12
|
+
- If you cannot find evidence, you must say: **"Not found in this bundle"** and suggest next steps:
|
|
13
|
+
- run preflight_search_bundle
|
|
14
|
+
- run preflight_update_bundle
|
|
15
|
+
- expand bundle scope and rebuild
|
|
16
|
+
|
|
17
|
+
## Forbidden behavior
|
|
18
|
+
- Do not guess.
|
|
19
|
+
- Do not invent APIs, commands, file paths, or architecture.
|
|
20
|
+
- Avoid words like "probably", "likely", "should" unless you attach evidence.
|
|
21
|
+
|
|
22
|
+
## How to cite evidence
|
|
23
|
+
Use this format:
|
|
24
|
+
- (evidence: <bundle-relative-path>:<startLine>-<endLine>)
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
- The project uses TypeScript. (evidence: repos/foo/bar/norm/package.json:1-40)
|
|
28
|
+
`;
|
|
29
|
+
await fs.writeFile(targetPath, content, 'utf8');
|
|
30
|
+
}
|
|
31
|
+
export async function writeStartHereMd(params) {
|
|
32
|
+
const repoLines = params.repos
|
|
33
|
+
.map((r) => `- ${r.id}${r.headSha ? ` @ ${r.headSha}` : ''}`)
|
|
34
|
+
.join('\n');
|
|
35
|
+
const libraryLines = (params.libraries ?? [])
|
|
36
|
+
.map((l) => {
|
|
37
|
+
const resolved = l.id ? ` -> ${l.id}` : '';
|
|
38
|
+
return `- ${l.kind}: ${l.input}${resolved}`;
|
|
39
|
+
})
|
|
40
|
+
.join('\n');
|
|
41
|
+
const content = `# START_HERE.md - Preflight Bundle ${params.bundleId}
|
|
42
|
+
|
|
43
|
+
## What this is
|
|
44
|
+
This bundle is a local snapshot of selected repositories (and optionally library docs) for **evidence-based** development.
|
|
45
|
+
|
|
46
|
+
## Repositories included
|
|
47
|
+
${repoLines || '(none)'}
|
|
48
|
+
|
|
49
|
+
## Library docs included
|
|
50
|
+
${libraryLines || '(none)'}
|
|
51
|
+
|
|
52
|
+
## How to use
|
|
53
|
+
1) Read AGENTS.md first and follow its rules.
|
|
54
|
+
2) Read OVERVIEW.md for a quick, evidence-linked map.
|
|
55
|
+
3) Use search to find exact evidence:
|
|
56
|
+
- tool: preflight_search_bundle
|
|
57
|
+
4) If the repo may have changed, refresh:
|
|
58
|
+
- tool: preflight_update_bundle
|
|
59
|
+
|
|
60
|
+
## Tips
|
|
61
|
+
- Prefer quoting exact file content over paraphrasing.
|
|
62
|
+
- When unsure, open the referenced file resource and verify.
|
|
63
|
+
`;
|
|
64
|
+
await fs.writeFile(params.targetPath, content, 'utf8');
|
|
65
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import crypto from 'node:crypto';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import ignore from 'ignore';
|
|
5
|
+
const DEFAULT_IGNORE = [
|
|
6
|
+
'.git/',
|
|
7
|
+
'node_modules/',
|
|
8
|
+
'dist/',
|
|
9
|
+
'build/',
|
|
10
|
+
'out/',
|
|
11
|
+
'.next/',
|
|
12
|
+
'.turbo/',
|
|
13
|
+
'.cache/',
|
|
14
|
+
'coverage/',
|
|
15
|
+
'__pycache__/',
|
|
16
|
+
'.venv/',
|
|
17
|
+
'venv/',
|
|
18
|
+
'.idea/',
|
|
19
|
+
'.vscode/',
|
|
20
|
+
];
|
|
21
|
+
function toPosix(p) {
|
|
22
|
+
return p.replaceAll('\\', '/');
|
|
23
|
+
}
|
|
24
|
+
function sha256Hex(buf) {
|
|
25
|
+
return crypto.createHash('sha256').update(buf).digest('hex');
|
|
26
|
+
}
|
|
27
|
+
function isProbablyBinary(buf) {
|
|
28
|
+
const sample = buf.subarray(0, Math.min(buf.length, 8192));
|
|
29
|
+
for (const b of sample) {
|
|
30
|
+
if (b === 0)
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
function classifyKind(repoRelativePathPosix) {
|
|
36
|
+
const base = path.posix.basename(repoRelativePathPosix).toLowerCase();
|
|
37
|
+
const ext = path.posix.extname(repoRelativePathPosix).toLowerCase();
|
|
38
|
+
if (base === 'readme' ||
|
|
39
|
+
base === 'readme.md' ||
|
|
40
|
+
base.startsWith('readme.') ||
|
|
41
|
+
base === 'license' ||
|
|
42
|
+
base.startsWith('license.') ||
|
|
43
|
+
base === 'contributing' ||
|
|
44
|
+
base === 'contributing.md' ||
|
|
45
|
+
base === 'code_of_conduct' ||
|
|
46
|
+
base === 'code_of_conduct.md' ||
|
|
47
|
+
base === 'security' ||
|
|
48
|
+
base === 'security.md' ||
|
|
49
|
+
base === 'changelog' ||
|
|
50
|
+
base === 'changelog.md' ||
|
|
51
|
+
base === 'llms.txt') {
|
|
52
|
+
return 'doc';
|
|
53
|
+
}
|
|
54
|
+
if (repoRelativePathPosix.includes('/docs/') || repoRelativePathPosix.includes('/doc/')) {
|
|
55
|
+
return 'doc';
|
|
56
|
+
}
|
|
57
|
+
if (['.md', '.markdown', '.rst', '.txt', '.adoc'].includes(ext))
|
|
58
|
+
return 'doc';
|
|
59
|
+
return 'code';
|
|
60
|
+
}
|
|
61
|
+
async function buildIgnore(repoRoot) {
|
|
62
|
+
const ig = ignore();
|
|
63
|
+
ig.add(DEFAULT_IGNORE);
|
|
64
|
+
// Respect repo .gitignore if present.
|
|
65
|
+
try {
|
|
66
|
+
const gitignorePath = path.join(repoRoot, '.gitignore');
|
|
67
|
+
const raw = await fs.readFile(gitignorePath, 'utf8');
|
|
68
|
+
ig.add(raw);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
// ignore
|
|
72
|
+
}
|
|
73
|
+
return ig;
|
|
74
|
+
}
|
|
75
|
+
async function* walkFiles(repoRoot, ig) {
|
|
76
|
+
const stack = [repoRoot];
|
|
77
|
+
while (stack.length) {
|
|
78
|
+
const dir = stack.pop();
|
|
79
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
80
|
+
for (const ent of entries) {
|
|
81
|
+
const abs = path.join(dir, ent.name);
|
|
82
|
+
const rel = path.relative(repoRoot, abs);
|
|
83
|
+
const relPosix = toPosix(rel);
|
|
84
|
+
// Check ignore rules for both files and directories
|
|
85
|
+
if (ig.ignores(relPosix)) {
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
if (ent.isDirectory()) {
|
|
89
|
+
stack.push(abs);
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
if (!ent.isFile())
|
|
93
|
+
continue;
|
|
94
|
+
yield { absPath: abs, relPosix };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
export async function ingestRepoToBundle(params) {
|
|
99
|
+
const ig = await buildIgnore(params.repoRoot);
|
|
100
|
+
let totalBytes = 0;
|
|
101
|
+
const files = [];
|
|
102
|
+
const skipped = [];
|
|
103
|
+
const decoder = new TextDecoder('utf-8', { fatal: true });
|
|
104
|
+
for await (const f of walkFiles(params.repoRoot, ig)) {
|
|
105
|
+
// ignore check already done in walkFiles
|
|
106
|
+
const st = await fs.stat(f.absPath);
|
|
107
|
+
if (st.size > params.options.maxFileBytes) {
|
|
108
|
+
skipped.push(`${f.relPosix} (too large: ${st.size} bytes)`);
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
if (totalBytes + st.size > params.options.maxTotalBytes) {
|
|
112
|
+
skipped.push(`(bundle maxTotalBytes reached) stopped before: ${f.relPosix}`);
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
const buf = await fs.readFile(f.absPath);
|
|
116
|
+
if (isProbablyBinary(buf)) {
|
|
117
|
+
skipped.push(`${f.relPosix} (binary)`);
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
let text;
|
|
121
|
+
try {
|
|
122
|
+
text = decoder.decode(buf);
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
skipped.push(`${f.relPosix} (non-utf8)`);
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
// Write raw bytes (as checked out by git).
|
|
129
|
+
const rawDest = path.join(params.rawDestRoot, f.relPosix.split('/').join(path.sep));
|
|
130
|
+
await fs.mkdir(path.dirname(rawDest), { recursive: true });
|
|
131
|
+
await fs.writeFile(rawDest, buf);
|
|
132
|
+
// Write normalized text with LF.
|
|
133
|
+
const normalized = text.replace(/\r\n/g, '\n');
|
|
134
|
+
const normDest = path.join(params.normDestRoot, f.relPosix.split('/').join(path.sep));
|
|
135
|
+
await fs.mkdir(path.dirname(normDest), { recursive: true });
|
|
136
|
+
await fs.writeFile(normDest, normalized, 'utf8');
|
|
137
|
+
totalBytes += st.size;
|
|
138
|
+
const kind = classifyKind(f.relPosix);
|
|
139
|
+
const sha256 = sha256Hex(Buffer.from(normalized, 'utf8'));
|
|
140
|
+
const bundleNormRelativePath = `${params.bundleNormPrefixPosix}/${f.relPosix}`;
|
|
141
|
+
files.push({
|
|
142
|
+
repoId: params.repoId,
|
|
143
|
+
kind,
|
|
144
|
+
repoRelativePath: f.relPosix,
|
|
145
|
+
bundleNormRelativePath,
|
|
146
|
+
bundleNormAbsPath: normDest,
|
|
147
|
+
sha256,
|
|
148
|
+
bytes: st.size,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
return { files, totalBytes, skipped };
|
|
152
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
export async function readManifest(manifestPath) {
|
|
4
|
+
const raw = await fs.readFile(manifestPath, 'utf8');
|
|
5
|
+
const parsed = JSON.parse(raw);
|
|
6
|
+
if (parsed.schemaVersion !== 1) {
|
|
7
|
+
throw new Error(`Unsupported manifest schemaVersion: ${String(parsed.schemaVersion)}`);
|
|
8
|
+
}
|
|
9
|
+
return parsed;
|
|
10
|
+
}
|
|
11
|
+
export async function writeManifest(manifestPath, manifest) {
|
|
12
|
+
await fs.mkdir(path.dirname(manifestPath), { recursive: true });
|
|
13
|
+
await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2) + '\n', 'utf8');
|
|
14
|
+
}
|