@apmantza/greedysearch-pi 1.7.6 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/github.mjs CHANGED
@@ -1,323 +1,237 @@
1
- // src/github.mjs - GitHub repo cloning for better code extraction
2
-
3
- import { execFile } from "node:child_process";
4
- import {
5
- existsSync,
6
- mkdtempSync,
7
- readdirSync,
8
- readFileSync,
9
- statSync,
10
- } from "node:fs";
11
- import { tmpdir } from "node:os";
12
- import { join, relative } from "node:path";
13
-
14
- const CLONE_CACHE = new Map(); // repo key -> path
15
- const DEFAULT_MAX_FILES = 50;
16
- const MAX_FILE_SIZE_BYTES = 1024 * 1024; // 1MB per file
17
-
18
- /**
19
- * Parse a GitHub URL into components
20
- * @param {string} url
21
- * @returns {{owner: string, repo: string, type: 'blob'|'tree'|'root', ref?: string, path?: string} | null}
22
- */
23
- export function parseGitHubUrl(url) {
24
- try {
25
- const parsed = new URL(url);
26
- if (!parsed.hostname.endsWith("github.com")) {
27
- return null;
28
- }
29
-
30
- const parts = parsed.pathname.split("/").filter(Boolean);
31
- if (parts.length < 2) {
32
- return null; // Need at least owner/repo
33
- }
34
-
35
- const [owner, repo] = parts;
36
-
37
- // Root: github.com/owner/repo
38
- if (parts.length === 2) {
39
- return { owner, repo, type: "root" };
40
- }
41
-
42
- // With type: github.com/owner/repo/blob|tree/ref/path
43
- if (parts.length >= 4 && (parts[2] === "blob" || parts[2] === "tree")) {
44
- const type = parts[2];
45
- const ref = parts[3];
46
- const path = parts.slice(4).join("/");
47
- return { owner, repo, type, ref, path };
48
- }
49
-
50
- return null;
51
- } catch {
52
- return null;
53
- }
54
- }
55
-
56
- /**
57
- * Check if git CLI is available
58
- */
59
- async function checkGitAvailable() {
60
- try {
61
- await execFile("git", ["--version"]);
62
- return true;
63
- } catch {
64
- return false;
65
- }
66
- }
67
-
68
- /**
69
- * Clone a GitHub repo and return local path
70
- * @param {string} owner - Repo owner
71
- * @param {string} repo - Repo name
72
- * @param {string} [ref] - Branch/tag/commit (default: main/master)
73
- * @returns {Promise<{path: string, cached: boolean, error?: string}>}
74
- */
75
- export async function cloneGitHubRepo(owner, repo, ref = "HEAD") {
76
- const cacheKey = `${owner}/${repo}@${ref}`;
77
-
78
- // Check cache
79
- if (CLONE_CACHE.has(cacheKey)) {
80
- const cachedPath = CLONE_CACHE.get(cacheKey);
81
- if (existsSync(cachedPath)) {
82
- return { path: cachedPath, cached: true };
83
- }
84
- // Cache stale, remove
85
- CLONE_CACHE.delete(cacheKey);
86
- }
87
-
88
- // Check git available
89
- if (!(await checkGitAvailable())) {
90
- return { path: "", cached: false, error: "git CLI not available" };
91
- }
92
-
93
- // Create temp directory
94
- const tempBase = mkdtempSync(join(tmpdir(), `github-${owner}-${repo}-`));
95
- const clonePath = join(tempBase, "repo");
96
-
97
- try {
98
- // Shallow clone
99
- await execFile(
100
- "git",
101
- [
102
- "clone",
103
- "--depth",
104
- "1",
105
- "--single-branch",
106
- "--branch",
107
- ref === "HEAD" ? "main" : ref,
108
- `https://github.com/${owner}/${repo}.git`,
109
- clonePath,
110
- ],
111
- { timeout: 60000 },
112
- );
113
-
114
- // Cache result
115
- CLONE_CACHE.set(cacheKey, clonePath);
116
-
117
- return { path: clonePath, cached: false };
118
- } catch (error) {
119
- // Try 'master' if 'main' failed
120
- if (ref === "HEAD") {
121
- try {
122
- await execFile(
123
- "git",
124
- [
125
- "clone",
126
- "--depth",
127
- "1",
128
- "--single-branch",
129
- "--branch",
130
- "master",
131
- `https://github.com/${owner}/${repo}.git`,
132
- clonePath,
133
- ],
134
- { timeout: 60000 },
135
- );
136
-
137
- CLONE_CACHE.set(cacheKey, clonePath);
138
- return { path: clonePath, cached: false };
139
- } catch {
140
- // Fall through to error
141
- }
142
- }
143
-
144
- return { path: "", cached: false, error: error.message };
145
- }
146
- }
147
-
148
- /**
149
- * Read a file from cloned repo
150
- * @param {string} repoPath - Local repo path
151
- * @param {string} filePath - Relative path within repo
152
- * @returns {{content: string, size: number} | null}
153
- */
154
- export function readRepoFile(repoPath, filePath) {
155
- const fullPath = join(repoPath, filePath);
156
-
157
- // Security: ensure path is within repo
158
- if (!fullPath.startsWith(repoPath)) {
159
- return null;
160
- }
161
-
162
- if (!existsSync(fullPath)) {
163
- return null;
164
- }
165
-
166
- const stats = statSync(fullPath);
167
- if (stats.isDirectory()) {
168
- return null;
169
- }
170
-
171
- if (stats.size > MAX_FILE_SIZE_BYTES) {
172
- return {
173
- content: `[File too large: ${(stats.size / 1024).toFixed(1)}KB]`,
174
- size: stats.size,
175
- };
176
- }
177
-
178
- try {
179
- const content = readFileSync(fullPath, "utf8");
180
- return { content, size: stats.size };
181
- } catch {
182
- return null;
183
- }
184
- }
185
-
186
- /**
187
- * Get directory tree listing
188
- * @param {string} repoPath - Local repo path
189
- * @param {string} [subPath] - Subdirectory to list
190
- * @param {number} [maxFiles] - Max files to return
191
- * @returns {Array<{path: string, type: 'file'|'dir', size?: number}>}
192
- */
193
- export function getRepoTree(
194
- repoPath,
195
- subPath = "",
196
- maxFiles = DEFAULT_MAX_FILES,
197
- ) {
198
- const targetPath = join(repoPath, subPath);
199
-
200
- // Security: ensure within repo
201
- if (!targetPath.startsWith(repoPath)) {
202
- return [];
203
- }
204
-
205
- if (!existsSync(targetPath)) {
206
- return [];
207
- }
208
-
209
- const results = [];
210
-
211
- function walk(dir, relativePath) {
212
- if (results.length >= maxFiles) return;
213
-
214
- try {
215
- const entries = readdirSync(dir, { withFileTypes: true });
216
-
217
- for (const entry of entries) {
218
- if (results.length >= maxFiles) break;
219
-
220
- // Skip hidden and common non-source dirs
221
- if (
222
- entry.name.startsWith(".") ||
223
- entry.name === "node_modules" ||
224
- entry.name === "vendor"
225
- ) {
226
- continue;
227
- }
228
-
229
- const entryRelPath = join(relativePath, entry.name);
230
-
231
- if (entry.isDirectory()) {
232
- results.push({ path: entryRelPath, type: "dir" });
233
- walk(join(dir, entry.name), entryRelPath);
234
- } else if (entry.isFile()) {
235
- const stats = statSync(join(dir, entry.name));
236
- results.push({ path: entryRelPath, type: "file", size: stats.size });
237
- }
238
- }
239
- } catch {
240
- // Ignore permission errors
241
- }
242
- }
243
-
244
- walk(targetPath, subPath);
245
- return results;
246
- }
247
-
248
- /**
249
- * Fetch GitHub content by cloning repo
250
- * @param {string} url - GitHub URL (blob, tree, or root)
251
- * @returns {Promise<{ok: boolean, content?: string, title?: string, error?: string, localPath?: string, tree?: Array}>}
252
- */
253
- export async function fetchGitHubContent(url) {
254
- const parsed = parseGitHubUrl(url);
255
- if (!parsed) {
256
- return { ok: false, error: "Not a valid GitHub URL" };
257
- }
258
-
259
- const { owner, repo, type, ref, path } = parsed;
260
-
261
- // Clone repo
262
- const cloneResult = await cloneGitHubRepo(owner, repo, ref);
263
- if (cloneResult.error) {
264
- return { ok: false, error: `Clone failed: ${cloneResult.error}` };
265
- }
266
-
267
- const repoPath = cloneResult.path;
268
-
269
- // Handle different URL types
270
- if (type === "root" || (type === "tree" && !path)) {
271
- // Return README + tree
272
- const tree = getRepoTree(repoPath, "", 50);
273
-
274
- // Try to find README
275
- const readmeNames = ["README.md", "Readme.md", "readme.md", "README.MD"];
276
- let readmeContent = "";
277
- for (const name of readmeNames) {
278
- const readme = readRepoFile(repoPath, name);
279
- if (readme) {
280
- readmeContent = readme.content.slice(0, 5000); // First 5KB of README
281
- break;
282
- }
283
- }
284
-
285
- return {
286
- ok: true,
287
- title: `${owner}/${repo}`,
288
- content: readmeContent || `[Repository: ${owner}/${repo}]`,
289
- localPath: repoPath,
290
- tree: tree.slice(0, 30),
291
- };
292
- }
293
-
294
- if (type === "blob" && path) {
295
- // Return specific file
296
- const file = readRepoFile(repoPath, path);
297
- if (!file) {
298
- return { ok: false, error: `File not found: ${path}` };
299
- }
300
-
301
- return {
302
- ok: true,
303
- title: `${owner}/${repo}: ${path}`,
304
- content: file.content,
305
- localPath: join(repoPath, path),
306
- };
307
- }
308
-
309
- if (type === "tree" && path) {
310
- // Return directory listing
311
- const tree = getRepoTree(repoPath, path, 50);
312
-
313
- return {
314
- ok: true,
315
- title: `${owner}/${repo}/${path}`,
316
- content: `[Directory: ${path}]\n\nFiles:\n${tree.map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`).join("\n")}`,
317
- localPath: join(repoPath, path),
318
- tree,
319
- };
320
- }
321
-
322
- return { ok: false, error: "Unsupported GitHub URL type" };
323
- }
1
+ // src/github.mjs - GitHub content fetching via REST API
2
+
3
+ const GITHUB_API = "https://api.github.com";
4
+ const DEFAULT_HEADERS = {
5
+ "user-agent": "GreedySearch/1.0",
6
+ accept: "application/vnd.github+json",
7
+ "x-github-api-version": "2022-11-28",
8
+ };
9
+
10
+ /**
11
+ * Parse a GitHub URL into components
12
+ * @param {string} url
13
+ * @returns {{owner: string, repo: string, type: 'blob'|'tree'|'root', ref?: string, path?: string} | null}
14
+ */
15
+ export function parseGitHubUrl(url) {
16
+ try {
17
+ const parsed = new URL(url);
18
+ if (!parsed.hostname.endsWith("github.com")) {
19
+ return null;
20
+ }
21
+
22
+ const parts = parsed.pathname.split("/").filter(Boolean);
23
+ if (parts.length < 2) {
24
+ return null;
25
+ }
26
+
27
+ const [owner, repo] = parts;
28
+
29
+ // Root: github.com/owner/repo
30
+ if (parts.length === 2) {
31
+ return { owner, repo, type: "root" };
32
+ }
33
+
34
+ // With type: github.com/owner/repo/blob|tree/ref/path
35
+ if (parts.length >= 4 && (parts[2] === "blob" || parts[2] === "tree")) {
36
+ const type = parts[2];
37
+ const ref = parts[3];
38
+ const path = parts.slice(4).join("/");
39
+ return { owner, repo, type, ref, path };
40
+ }
41
+
42
+ return null;
43
+ } catch {
44
+ return null;
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Fetch JSON from GitHub API with timeout
50
+ */
51
+ async function apiGet(path, timeoutMs = 10000) {
52
+ const controller = new AbortController();
53
+ const tid = setTimeout(() => controller.abort(), timeoutMs);
54
+ try {
55
+ const res = await fetch(`${GITHUB_API}${path}`, {
56
+ headers: DEFAULT_HEADERS,
57
+ signal: controller.signal,
58
+ });
59
+ clearTimeout(tid);
60
+ if (!res.ok) {
61
+ throw new Error(`GitHub API ${res.status}: ${path}`);
62
+ }
63
+ return await res.json();
64
+ } catch (err) {
65
+ clearTimeout(tid);
66
+ throw err;
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Fetch the default branch README as plain text
72
+ */
73
+ async function fetchReadme(owner, repo) {
74
+ try {
75
+ const data = await apiGet(`/repos/${owner}/${repo}/readme`);
76
+ if (data.content && data.encoding === "base64") {
77
+ return Buffer.from(data.content, "base64").toString("utf8");
78
+ }
79
+ return "";
80
+ } catch {
81
+ return "";
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Fetch top-level file tree (non-recursive)
87
+ */
88
+ async function fetchTree(owner, repo, ref = "HEAD", subPath = "") {
89
+ try {
90
+ // Resolve ref to a tree SHA first when using HEAD or a branch name
91
+ const refData = await apiGet(`/repos/${owner}/${repo}/git/ref/heads/${ref === "HEAD" ? "main" : ref}`).catch(() =>
92
+ apiGet(`/repos/${owner}/${repo}/git/ref/heads/master`).catch(() => null)
93
+ );
94
+
95
+ let treeSha;
96
+ if (refData?.object?.sha) {
97
+ // Get commit to get tree SHA
98
+ const commit = await apiGet(`/repos/${owner}/${repo}/git/commits/${refData.object.sha}`);
99
+ treeSha = commit.tree.sha;
100
+ } else {
101
+ // Fall back to repo default branch info
102
+ const repoInfo = await apiGet(`/repos/${owner}/${repo}`);
103
+ const branch = await apiGet(`/repos/${owner}/${repo}/branches/${repoInfo.default_branch}`);
104
+ treeSha = branch.commit.commit.tree.sha;
105
+ }
106
+
107
+ const treeData = await apiGet(`/repos/${owner}/${repo}/git/trees/${treeSha}`);
108
+ let items = treeData.tree || [];
109
+
110
+ // Filter to subPath if requested
111
+ if (subPath) {
112
+ items = items.filter((item) => item.path.startsWith(subPath));
113
+ }
114
+
115
+ return items.slice(0, 50).map((item) => ({
116
+ path: item.path,
117
+ type: item.type === "tree" ? "dir" : "file",
118
+ size: item.size,
119
+ }));
120
+ } catch {
121
+ return [];
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Fetch a specific file via raw.githubusercontent.com
127
+ */
128
+ async function fetchRawFile(owner, repo, ref, filePath, timeoutMs = 10000) {
129
+ const ref_ = ref && ref !== "HEAD" ? ref : "main";
130
+ const urls = [
131
+ `https://raw.githubusercontent.com/${owner}/${repo}/${ref_}/${filePath}`,
132
+ `https://raw.githubusercontent.com/${owner}/${repo}/master/${filePath}`,
133
+ ];
134
+
135
+ for (const url of urls) {
136
+ const controller = new AbortController();
137
+ const tid = setTimeout(() => controller.abort(), timeoutMs);
138
+ try {
139
+ const res = await fetch(url, {
140
+ headers: { "user-agent": DEFAULT_HEADERS["user-agent"] },
141
+ signal: controller.signal,
142
+ });
143
+ clearTimeout(tid);
144
+ if (res.ok) {
145
+ return await res.text();
146
+ }
147
+ } catch {
148
+ clearTimeout(tid);
149
+ }
150
+ }
151
+ return null;
152
+ }
153
+
154
+ /**
155
+ * Fetch GitHub content via API
156
+ * @param {string} url - GitHub URL (blob, tree, or root)
157
+ * @returns {Promise<{ok: boolean, content?: string, title?: string, error?: string, tree?: Array}>}
158
+ */
159
+ export async function fetchGitHubContent(url) {
160
+ const parsed = parseGitHubUrl(url);
161
+ if (!parsed) {
162
+ return { ok: false, error: "Not a valid GitHub URL" };
163
+ }
164
+
165
+ const { owner, repo, type, ref, path } = parsed;
166
+
167
+ try {
168
+ if (type === "root" || (type === "tree" && !path)) {
169
+ // Fetch repo info + README + top-level tree in parallel
170
+ const [repoInfo, readme, tree] = await Promise.allSettled([
171
+ apiGet(`/repos/${owner}/${repo}`),
172
+ fetchReadme(owner, repo),
173
+ fetchTree(owner, repo, ref || "HEAD"),
174
+ ]);
175
+
176
+ // If repo info failed (e.g. 404 — repo doesn't exist), bail out
177
+ if (repoInfo.status === "rejected") {
178
+ return { ok: false, error: repoInfo.reason?.message || "Repo not found" };
179
+ }
180
+
181
+ const info = repoInfo.value;
182
+ const readmeText = readme.status === "fulfilled" ? readme.value : "";
183
+ const treeItems = tree.status === "fulfilled" ? tree.value : [];
184
+
185
+ const description = info?.description ? `\n\n> ${info.description}` : "";
186
+ const stars = info?.stargazers_count != null ? ` ⭐ ${info.stargazers_count}` : "";
187
+ const language = info?.language ? ` · ${info.language}` : "";
188
+
189
+ let content = `# ${owner}/${repo}${stars}${language}${description}\n\n`;
190
+
191
+ if (readmeText) {
192
+ content += readmeText.slice(0, 6000);
193
+ } else {
194
+ content += `[No README found]\n\nFiles:\n${treeItems.map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`).join("\n")}`;
195
+ }
196
+
197
+ return {
198
+ ok: true,
199
+ title: `${owner}/${repo}`,
200
+ content,
201
+ tree: treeItems.slice(0, 30),
202
+ };
203
+ }
204
+
205
+ if (type === "blob" && path) {
206
+ // Fetch specific file via raw URL
207
+ const content = await fetchRawFile(owner, repo, ref, path);
208
+ if (content === null) {
209
+ return { ok: false, error: `File not found: ${path}` };
210
+ }
211
+ return {
212
+ ok: true,
213
+ title: `${owner}/${repo}: ${path}`,
214
+ content,
215
+ };
216
+ }
217
+
218
+ if (type === "tree" && path) {
219
+ // Directory listing via API tree
220
+ const treeItems = await fetchTree(owner, repo, ref || "HEAD", path);
221
+ const listing = treeItems
222
+ .map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`)
223
+ .join("\n");
224
+
225
+ return {
226
+ ok: true,
227
+ title: `${owner}/${repo}/${path}`,
228
+ content: `[Directory: ${path}]\n\nFiles:\n${listing}`,
229
+ tree: treeItems,
230
+ };
231
+ }
232
+
233
+ return { ok: false, error: "Unsupported GitHub URL type" };
234
+ } catch (err) {
235
+ return { ok: false, error: err.message };
236
+ }
237
+ }