@apmantza/greedysearch-pi 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +107 -89
- package/LICENSE +21 -21
- package/README.md +73 -262
- package/{cdp.mjs → bin/cdp.mjs} +1004 -1004
- package/{coding-task.mjs → bin/coding-task.mjs} +392 -392
- package/{launch.mjs → bin/launch.mjs} +288 -288
- package/{search.mjs → bin/search.mjs} +1482 -1436
- package/extractors/bing-copilot.mjs +167 -167
- package/extractors/common.mjs +237 -237
- package/extractors/consent.mjs +273 -273
- package/extractors/google-ai.mjs +156 -156
- package/extractors/perplexity.mjs +141 -141
- package/extractors/selectors.mjs +52 -52
- package/index.ts +18 -18
- package/package.json +46 -49
- package/skills/greedy-search/SKILL.md +117 -117
- package/src/fetcher.mjs +589 -589
- package/src/formatters/coding.ts +68 -68
- package/src/formatters/sources.ts +116 -116
- package/src/formatters/synthesis.ts +91 -91
- package/src/github.mjs +323 -323
- package/src/utils/content.mjs +56 -56
- package/src/utils/helpers.ts +40 -40
package/src/github.mjs
CHANGED
|
@@ -1,323 +1,323 @@
|
|
|
1
|
-
// src/github.mjs - GitHub repo cloning for better code extraction
|
|
2
|
-
|
|
3
|
-
import { execFile } from "node:child_process";
|
|
4
|
-
import {
|
|
5
|
-
existsSync,
|
|
6
|
-
mkdtempSync,
|
|
7
|
-
readdirSync,
|
|
8
|
-
readFileSync,
|
|
9
|
-
statSync,
|
|
10
|
-
} from "node:fs";
|
|
11
|
-
import { tmpdir } from "node:os";
|
|
12
|
-
import { join, relative } from "node:path";
|
|
13
|
-
|
|
14
|
-
const CLONE_CACHE = new Map(); // repo key -> path
|
|
15
|
-
const DEFAULT_MAX_FILES = 50;
|
|
16
|
-
const MAX_FILE_SIZE_BYTES = 1024 * 1024; // 1MB per file
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Parse a GitHub URL into components
|
|
20
|
-
* @param {string} url
|
|
21
|
-
* @returns {{owner: string, repo: string, type: 'blob'|'tree'|'root', ref?: string, path?: string} | null}
|
|
22
|
-
*/
|
|
23
|
-
export function parseGitHubUrl(url) {
|
|
24
|
-
try {
|
|
25
|
-
const parsed = new URL(url);
|
|
26
|
-
if (!parsed.hostname.endsWith("github.com")) {
|
|
27
|
-
return null;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
const parts = parsed.pathname.split("/").filter(Boolean);
|
|
31
|
-
if (parts.length < 2) {
|
|
32
|
-
return null; // Need at least owner/repo
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
const [owner, repo] = parts;
|
|
36
|
-
|
|
37
|
-
// Root: github.com/owner/repo
|
|
38
|
-
if (parts.length === 2) {
|
|
39
|
-
return { owner, repo, type: "root" };
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// With type: github.com/owner/repo/blob|tree/ref/path
|
|
43
|
-
if (parts.length >= 4 && (parts[2] === "blob" || parts[2] === "tree")) {
|
|
44
|
-
const type = parts[2];
|
|
45
|
-
const ref = parts[3];
|
|
46
|
-
const path = parts.slice(4).join("/");
|
|
47
|
-
return { owner, repo, type, ref, path };
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
return null;
|
|
51
|
-
} catch {
|
|
52
|
-
return null;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Check if git CLI is available
|
|
58
|
-
*/
|
|
59
|
-
async function checkGitAvailable() {
|
|
60
|
-
try {
|
|
61
|
-
await execFile("git", ["--version"]);
|
|
62
|
-
return true;
|
|
63
|
-
} catch {
|
|
64
|
-
return false;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Clone a GitHub repo and return local path
|
|
70
|
-
* @param {string} owner - Repo owner
|
|
71
|
-
* @param {string} repo - Repo name
|
|
72
|
-
* @param {string} [ref] - Branch/tag/commit (default: main/master)
|
|
73
|
-
* @returns {Promise<{path: string, cached: boolean, error?: string}>}
|
|
74
|
-
*/
|
|
75
|
-
export async function cloneGitHubRepo(owner, repo, ref = "HEAD") {
|
|
76
|
-
const cacheKey = `${owner}/${repo}@${ref}`;
|
|
77
|
-
|
|
78
|
-
// Check cache
|
|
79
|
-
if (CLONE_CACHE.has(cacheKey)) {
|
|
80
|
-
const cachedPath = CLONE_CACHE.get(cacheKey);
|
|
81
|
-
if (existsSync(cachedPath)) {
|
|
82
|
-
return { path: cachedPath, cached: true };
|
|
83
|
-
}
|
|
84
|
-
// Cache stale, remove
|
|
85
|
-
CLONE_CACHE.delete(cacheKey);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// Check git available
|
|
89
|
-
if (!(await checkGitAvailable())) {
|
|
90
|
-
return { path: "", cached: false, error: "git CLI not available" };
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Create temp directory
|
|
94
|
-
const tempBase = mkdtempSync(join(tmpdir(), `github-${owner}-${repo}-`));
|
|
95
|
-
const clonePath = join(tempBase, "repo");
|
|
96
|
-
|
|
97
|
-
try {
|
|
98
|
-
// Shallow clone
|
|
99
|
-
await execFile(
|
|
100
|
-
"git",
|
|
101
|
-
[
|
|
102
|
-
"clone",
|
|
103
|
-
"--depth",
|
|
104
|
-
"1",
|
|
105
|
-
"--single-branch",
|
|
106
|
-
"--branch",
|
|
107
|
-
ref === "HEAD" ? "main" : ref,
|
|
108
|
-
`https://github.com/${owner}/${repo}.git`,
|
|
109
|
-
clonePath,
|
|
110
|
-
],
|
|
111
|
-
{ timeout: 60000 },
|
|
112
|
-
);
|
|
113
|
-
|
|
114
|
-
// Cache result
|
|
115
|
-
CLONE_CACHE.set(cacheKey, clonePath);
|
|
116
|
-
|
|
117
|
-
return { path: clonePath, cached: false };
|
|
118
|
-
} catch (error) {
|
|
119
|
-
// Try 'master' if 'main' failed
|
|
120
|
-
if (ref === "HEAD") {
|
|
121
|
-
try {
|
|
122
|
-
await execFile(
|
|
123
|
-
"git",
|
|
124
|
-
[
|
|
125
|
-
"clone",
|
|
126
|
-
"--depth",
|
|
127
|
-
"1",
|
|
128
|
-
"--single-branch",
|
|
129
|
-
"--branch",
|
|
130
|
-
"master",
|
|
131
|
-
`https://github.com/${owner}/${repo}.git`,
|
|
132
|
-
clonePath,
|
|
133
|
-
],
|
|
134
|
-
{ timeout: 60000 },
|
|
135
|
-
);
|
|
136
|
-
|
|
137
|
-
CLONE_CACHE.set(cacheKey, clonePath);
|
|
138
|
-
return { path: clonePath, cached: false };
|
|
139
|
-
} catch {
|
|
140
|
-
// Fall through to error
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
return { path: "", cached: false, error: error.message };
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
/**
|
|
149
|
-
* Read a file from cloned repo
|
|
150
|
-
* @param {string} repoPath - Local repo path
|
|
151
|
-
* @param {string} filePath - Relative path within repo
|
|
152
|
-
* @returns {{content: string, size: number} | null}
|
|
153
|
-
*/
|
|
154
|
-
export function readRepoFile(repoPath, filePath) {
|
|
155
|
-
const fullPath = join(repoPath, filePath);
|
|
156
|
-
|
|
157
|
-
// Security: ensure path is within repo
|
|
158
|
-
if (!fullPath.startsWith(repoPath)) {
|
|
159
|
-
return null;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
if (!existsSync(fullPath)) {
|
|
163
|
-
return null;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const stats = statSync(fullPath);
|
|
167
|
-
if (stats.isDirectory()) {
|
|
168
|
-
return null;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
if (stats.size > MAX_FILE_SIZE_BYTES) {
|
|
172
|
-
return {
|
|
173
|
-
content: `[File too large: ${(stats.size / 1024).toFixed(1)}KB]`,
|
|
174
|
-
size: stats.size,
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
try {
|
|
179
|
-
const content = readFileSync(fullPath, "utf8");
|
|
180
|
-
return { content, size: stats.size };
|
|
181
|
-
} catch {
|
|
182
|
-
return null;
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
* Get directory tree listing
|
|
188
|
-
* @param {string} repoPath - Local repo path
|
|
189
|
-
* @param {string} [subPath] - Subdirectory to list
|
|
190
|
-
* @param {number} [maxFiles] - Max files to return
|
|
191
|
-
* @returns {Array<{path: string, type: 'file'|'dir', size?: number}>}
|
|
192
|
-
*/
|
|
193
|
-
export function getRepoTree(
|
|
194
|
-
repoPath,
|
|
195
|
-
subPath = "",
|
|
196
|
-
maxFiles = DEFAULT_MAX_FILES,
|
|
197
|
-
) {
|
|
198
|
-
const targetPath = join(repoPath, subPath);
|
|
199
|
-
|
|
200
|
-
// Security: ensure within repo
|
|
201
|
-
if (!targetPath.startsWith(repoPath)) {
|
|
202
|
-
return [];
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
if (!existsSync(targetPath)) {
|
|
206
|
-
return [];
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
const results = [];
|
|
210
|
-
|
|
211
|
-
function walk(dir, relativePath) {
|
|
212
|
-
if (results.length >= maxFiles) return;
|
|
213
|
-
|
|
214
|
-
try {
|
|
215
|
-
const entries = readdirSync(dir, { withFileTypes: true });
|
|
216
|
-
|
|
217
|
-
for (const entry of entries) {
|
|
218
|
-
if (results.length >= maxFiles) break;
|
|
219
|
-
|
|
220
|
-
// Skip hidden and common non-source dirs
|
|
221
|
-
if (
|
|
222
|
-
entry.name.startsWith(".") ||
|
|
223
|
-
entry.name === "node_modules" ||
|
|
224
|
-
entry.name === "vendor"
|
|
225
|
-
) {
|
|
226
|
-
continue;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
const entryRelPath = join(relativePath, entry.name);
|
|
230
|
-
|
|
231
|
-
if (entry.isDirectory()) {
|
|
232
|
-
results.push({ path: entryRelPath, type: "dir" });
|
|
233
|
-
walk(join(dir, entry.name), entryRelPath);
|
|
234
|
-
} else if (entry.isFile()) {
|
|
235
|
-
const stats = statSync(join(dir, entry.name));
|
|
236
|
-
results.push({ path: entryRelPath, type: "file", size: stats.size });
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
} catch {
|
|
240
|
-
// Ignore permission errors
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
walk(targetPath, subPath);
|
|
245
|
-
return results;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
/**
|
|
249
|
-
* Fetch GitHub content by cloning repo
|
|
250
|
-
* @param {string} url - GitHub URL (blob, tree, or root)
|
|
251
|
-
* @returns {Promise<{ok: boolean, content?: string, title?: string, error?: string, localPath?: string, tree?: Array}>}
|
|
252
|
-
*/
|
|
253
|
-
export async function fetchGitHubContent(url) {
|
|
254
|
-
const parsed = parseGitHubUrl(url);
|
|
255
|
-
if (!parsed) {
|
|
256
|
-
return { ok: false, error: "Not a valid GitHub URL" };
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
const { owner, repo, type, ref, path } = parsed;
|
|
260
|
-
|
|
261
|
-
// Clone repo
|
|
262
|
-
const cloneResult = await cloneGitHubRepo(owner, repo, ref);
|
|
263
|
-
if (cloneResult.error) {
|
|
264
|
-
return { ok: false, error: `Clone failed: ${cloneResult.error}` };
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
const repoPath = cloneResult.path;
|
|
268
|
-
|
|
269
|
-
// Handle different URL types
|
|
270
|
-
if (type === "root" || (type === "tree" && !path)) {
|
|
271
|
-
// Return README + tree
|
|
272
|
-
const tree = getRepoTree(repoPath, "", 50);
|
|
273
|
-
|
|
274
|
-
// Try to find README
|
|
275
|
-
const readmeNames = ["README.md", "Readme.md", "readme.md", "README.MD"];
|
|
276
|
-
let readmeContent = "";
|
|
277
|
-
for (const name of readmeNames) {
|
|
278
|
-
const readme = readRepoFile(repoPath, name);
|
|
279
|
-
if (readme) {
|
|
280
|
-
readmeContent = readme.content.slice(0, 5000); // First 5KB of README
|
|
281
|
-
break;
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
return {
|
|
286
|
-
ok: true,
|
|
287
|
-
title: `${owner}/${repo}`,
|
|
288
|
-
content: readmeContent || `[Repository: ${owner}/${repo}]`,
|
|
289
|
-
localPath: repoPath,
|
|
290
|
-
tree: tree.slice(0, 30),
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
if (type === "blob" && path) {
|
|
295
|
-
// Return specific file
|
|
296
|
-
const file = readRepoFile(repoPath, path);
|
|
297
|
-
if (!file) {
|
|
298
|
-
return { ok: false, error: `File not found: ${path}` };
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
return {
|
|
302
|
-
ok: true,
|
|
303
|
-
title: `${owner}/${repo}: ${path}`,
|
|
304
|
-
content: file.content,
|
|
305
|
-
localPath: join(repoPath, path),
|
|
306
|
-
};
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
if (type === "tree" && path) {
|
|
310
|
-
// Return directory listing
|
|
311
|
-
const tree = getRepoTree(repoPath, path, 50);
|
|
312
|
-
|
|
313
|
-
return {
|
|
314
|
-
ok: true,
|
|
315
|
-
title: `${owner}/${repo}/${path}`,
|
|
316
|
-
content: `[Directory: ${path}]\n\nFiles:\n${tree.map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`).join("\n")}`,
|
|
317
|
-
localPath: join(repoPath, path),
|
|
318
|
-
tree,
|
|
319
|
-
};
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
return { ok: false, error: "Unsupported GitHub URL type" };
|
|
323
|
-
}
|
|
1
|
+
// src/github.mjs - GitHub repo cloning for better code extraction
|
|
2
|
+
|
|
3
|
+
import { execFile } from "node:child_process";
|
|
4
|
+
import {
|
|
5
|
+
existsSync,
|
|
6
|
+
mkdtempSync,
|
|
7
|
+
readdirSync,
|
|
8
|
+
readFileSync,
|
|
9
|
+
statSync,
|
|
10
|
+
} from "node:fs";
|
|
11
|
+
import { tmpdir } from "node:os";
|
|
12
|
+
import { join, relative } from "node:path";
|
|
13
|
+
|
|
14
|
+
const CLONE_CACHE = new Map(); // repo key -> path
|
|
15
|
+
const DEFAULT_MAX_FILES = 50;
|
|
16
|
+
const MAX_FILE_SIZE_BYTES = 1024 * 1024; // 1MB per file
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Parse a GitHub URL into components
|
|
20
|
+
* @param {string} url
|
|
21
|
+
* @returns {{owner: string, repo: string, type: 'blob'|'tree'|'root', ref?: string, path?: string} | null}
|
|
22
|
+
*/
|
|
23
|
+
export function parseGitHubUrl(url) {
|
|
24
|
+
try {
|
|
25
|
+
const parsed = new URL(url);
|
|
26
|
+
if (!parsed.hostname.endsWith("github.com")) {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const parts = parsed.pathname.split("/").filter(Boolean);
|
|
31
|
+
if (parts.length < 2) {
|
|
32
|
+
return null; // Need at least owner/repo
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const [owner, repo] = parts;
|
|
36
|
+
|
|
37
|
+
// Root: github.com/owner/repo
|
|
38
|
+
if (parts.length === 2) {
|
|
39
|
+
return { owner, repo, type: "root" };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// With type: github.com/owner/repo/blob|tree/ref/path
|
|
43
|
+
if (parts.length >= 4 && (parts[2] === "blob" || parts[2] === "tree")) {
|
|
44
|
+
const type = parts[2];
|
|
45
|
+
const ref = parts[3];
|
|
46
|
+
const path = parts.slice(4).join("/");
|
|
47
|
+
return { owner, repo, type, ref, path };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return null;
|
|
51
|
+
} catch {
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Check if git CLI is available
|
|
58
|
+
*/
|
|
59
|
+
async function checkGitAvailable() {
|
|
60
|
+
try {
|
|
61
|
+
await execFile("git", ["--version"]);
|
|
62
|
+
return true;
|
|
63
|
+
} catch {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Clone a GitHub repo and return local path
|
|
70
|
+
* @param {string} owner - Repo owner
|
|
71
|
+
* @param {string} repo - Repo name
|
|
72
|
+
* @param {string} [ref] - Branch/tag/commit (default: main/master)
|
|
73
|
+
* @returns {Promise<{path: string, cached: boolean, error?: string}>}
|
|
74
|
+
*/
|
|
75
|
+
export async function cloneGitHubRepo(owner, repo, ref = "HEAD") {
|
|
76
|
+
const cacheKey = `${owner}/${repo}@${ref}`;
|
|
77
|
+
|
|
78
|
+
// Check cache
|
|
79
|
+
if (CLONE_CACHE.has(cacheKey)) {
|
|
80
|
+
const cachedPath = CLONE_CACHE.get(cacheKey);
|
|
81
|
+
if (existsSync(cachedPath)) {
|
|
82
|
+
return { path: cachedPath, cached: true };
|
|
83
|
+
}
|
|
84
|
+
// Cache stale, remove
|
|
85
|
+
CLONE_CACHE.delete(cacheKey);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Check git available
|
|
89
|
+
if (!(await checkGitAvailable())) {
|
|
90
|
+
return { path: "", cached: false, error: "git CLI not available" };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Create temp directory
|
|
94
|
+
const tempBase = mkdtempSync(join(tmpdir(), `github-${owner}-${repo}-`));
|
|
95
|
+
const clonePath = join(tempBase, "repo");
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
// Shallow clone
|
|
99
|
+
await execFile(
|
|
100
|
+
"git",
|
|
101
|
+
[
|
|
102
|
+
"clone",
|
|
103
|
+
"--depth",
|
|
104
|
+
"1",
|
|
105
|
+
"--single-branch",
|
|
106
|
+
"--branch",
|
|
107
|
+
ref === "HEAD" ? "main" : ref,
|
|
108
|
+
`https://github.com/${owner}/${repo}.git`,
|
|
109
|
+
clonePath,
|
|
110
|
+
],
|
|
111
|
+
{ timeout: 60000 },
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
// Cache result
|
|
115
|
+
CLONE_CACHE.set(cacheKey, clonePath);
|
|
116
|
+
|
|
117
|
+
return { path: clonePath, cached: false };
|
|
118
|
+
} catch (error) {
|
|
119
|
+
// Try 'master' if 'main' failed
|
|
120
|
+
if (ref === "HEAD") {
|
|
121
|
+
try {
|
|
122
|
+
await execFile(
|
|
123
|
+
"git",
|
|
124
|
+
[
|
|
125
|
+
"clone",
|
|
126
|
+
"--depth",
|
|
127
|
+
"1",
|
|
128
|
+
"--single-branch",
|
|
129
|
+
"--branch",
|
|
130
|
+
"master",
|
|
131
|
+
`https://github.com/${owner}/${repo}.git`,
|
|
132
|
+
clonePath,
|
|
133
|
+
],
|
|
134
|
+
{ timeout: 60000 },
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
CLONE_CACHE.set(cacheKey, clonePath);
|
|
138
|
+
return { path: clonePath, cached: false };
|
|
139
|
+
} catch {
|
|
140
|
+
// Fall through to error
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return { path: "", cached: false, error: error.message };
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Read a file from cloned repo
|
|
150
|
+
* @param {string} repoPath - Local repo path
|
|
151
|
+
* @param {string} filePath - Relative path within repo
|
|
152
|
+
* @returns {{content: string, size: number} | null}
|
|
153
|
+
*/
|
|
154
|
+
export function readRepoFile(repoPath, filePath) {
|
|
155
|
+
const fullPath = join(repoPath, filePath);
|
|
156
|
+
|
|
157
|
+
// Security: ensure path is within repo
|
|
158
|
+
if (!fullPath.startsWith(repoPath)) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (!existsSync(fullPath)) {
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const stats = statSync(fullPath);
|
|
167
|
+
if (stats.isDirectory()) {
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (stats.size > MAX_FILE_SIZE_BYTES) {
|
|
172
|
+
return {
|
|
173
|
+
content: `[File too large: ${(stats.size / 1024).toFixed(1)}KB]`,
|
|
174
|
+
size: stats.size,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
try {
|
|
179
|
+
const content = readFileSync(fullPath, "utf8");
|
|
180
|
+
return { content, size: stats.size };
|
|
181
|
+
} catch {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Get directory tree listing
|
|
188
|
+
* @param {string} repoPath - Local repo path
|
|
189
|
+
* @param {string} [subPath] - Subdirectory to list
|
|
190
|
+
* @param {number} [maxFiles] - Max files to return
|
|
191
|
+
* @returns {Array<{path: string, type: 'file'|'dir', size?: number}>}
|
|
192
|
+
*/
|
|
193
|
+
export function getRepoTree(
|
|
194
|
+
repoPath,
|
|
195
|
+
subPath = "",
|
|
196
|
+
maxFiles = DEFAULT_MAX_FILES,
|
|
197
|
+
) {
|
|
198
|
+
const targetPath = join(repoPath, subPath);
|
|
199
|
+
|
|
200
|
+
// Security: ensure within repo
|
|
201
|
+
if (!targetPath.startsWith(repoPath)) {
|
|
202
|
+
return [];
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (!existsSync(targetPath)) {
|
|
206
|
+
return [];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const results = [];
|
|
210
|
+
|
|
211
|
+
function walk(dir, relativePath) {
|
|
212
|
+
if (results.length >= maxFiles) return;
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
const entries = readdirSync(dir, { withFileTypes: true });
|
|
216
|
+
|
|
217
|
+
for (const entry of entries) {
|
|
218
|
+
if (results.length >= maxFiles) break;
|
|
219
|
+
|
|
220
|
+
// Skip hidden and common non-source dirs
|
|
221
|
+
if (
|
|
222
|
+
entry.name.startsWith(".") ||
|
|
223
|
+
entry.name === "node_modules" ||
|
|
224
|
+
entry.name === "vendor"
|
|
225
|
+
) {
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const entryRelPath = join(relativePath, entry.name);
|
|
230
|
+
|
|
231
|
+
if (entry.isDirectory()) {
|
|
232
|
+
results.push({ path: entryRelPath, type: "dir" });
|
|
233
|
+
walk(join(dir, entry.name), entryRelPath);
|
|
234
|
+
} else if (entry.isFile()) {
|
|
235
|
+
const stats = statSync(join(dir, entry.name));
|
|
236
|
+
results.push({ path: entryRelPath, type: "file", size: stats.size });
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
} catch {
|
|
240
|
+
// Ignore permission errors
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
walk(targetPath, subPath);
|
|
245
|
+
return results;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Fetch GitHub content by cloning repo
|
|
250
|
+
* @param {string} url - GitHub URL (blob, tree, or root)
|
|
251
|
+
* @returns {Promise<{ok: boolean, content?: string, title?: string, error?: string, localPath?: string, tree?: Array}>}
|
|
252
|
+
*/
|
|
253
|
+
export async function fetchGitHubContent(url) {
|
|
254
|
+
const parsed = parseGitHubUrl(url);
|
|
255
|
+
if (!parsed) {
|
|
256
|
+
return { ok: false, error: "Not a valid GitHub URL" };
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const { owner, repo, type, ref, path } = parsed;
|
|
260
|
+
|
|
261
|
+
// Clone repo
|
|
262
|
+
const cloneResult = await cloneGitHubRepo(owner, repo, ref);
|
|
263
|
+
if (cloneResult.error) {
|
|
264
|
+
return { ok: false, error: `Clone failed: ${cloneResult.error}` };
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const repoPath = cloneResult.path;
|
|
268
|
+
|
|
269
|
+
// Handle different URL types
|
|
270
|
+
if (type === "root" || (type === "tree" && !path)) {
|
|
271
|
+
// Return README + tree
|
|
272
|
+
const tree = getRepoTree(repoPath, "", 50);
|
|
273
|
+
|
|
274
|
+
// Try to find README
|
|
275
|
+
const readmeNames = ["README.md", "Readme.md", "readme.md", "README.MD"];
|
|
276
|
+
let readmeContent = "";
|
|
277
|
+
for (const name of readmeNames) {
|
|
278
|
+
const readme = readRepoFile(repoPath, name);
|
|
279
|
+
if (readme) {
|
|
280
|
+
readmeContent = readme.content.slice(0, 5000); // First 5KB of README
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
ok: true,
|
|
287
|
+
title: `${owner}/${repo}`,
|
|
288
|
+
content: readmeContent || `[Repository: ${owner}/${repo}]`,
|
|
289
|
+
localPath: repoPath,
|
|
290
|
+
tree: tree.slice(0, 30),
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (type === "blob" && path) {
|
|
295
|
+
// Return specific file
|
|
296
|
+
const file = readRepoFile(repoPath, path);
|
|
297
|
+
if (!file) {
|
|
298
|
+
return { ok: false, error: `File not found: ${path}` };
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return {
|
|
302
|
+
ok: true,
|
|
303
|
+
title: `${owner}/${repo}: ${path}`,
|
|
304
|
+
content: file.content,
|
|
305
|
+
localPath: join(repoPath, path),
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (type === "tree" && path) {
|
|
310
|
+
// Return directory listing
|
|
311
|
+
const tree = getRepoTree(repoPath, path, 50);
|
|
312
|
+
|
|
313
|
+
return {
|
|
314
|
+
ok: true,
|
|
315
|
+
title: `${owner}/${repo}/${path}`,
|
|
316
|
+
content: `[Directory: ${path}]\n\nFiles:\n${tree.map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`).join("\n")}`,
|
|
317
|
+
localPath: join(repoPath, path),
|
|
318
|
+
tree,
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return { ok: false, error: "Unsupported GitHub URL type" };
|
|
323
|
+
}
|