@apmantza/greedysearch-pi 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/github.mjs CHANGED
@@ -1,323 +1,323 @@
1
- // src/github.mjs - GitHub repo cloning for better code extraction
2
-
3
- import { execFile } from "node:child_process";
4
- import {
5
- existsSync,
6
- mkdtempSync,
7
- readdirSync,
8
- readFileSync,
9
- statSync,
10
- } from "node:fs";
11
- import { tmpdir } from "node:os";
12
- import { join, relative } from "node:path";
13
-
14
- const CLONE_CACHE = new Map(); // repo key -> path
15
- const DEFAULT_MAX_FILES = 50;
16
- const MAX_FILE_SIZE_BYTES = 1024 * 1024; // 1MB per file
17
-
18
- /**
19
- * Parse a GitHub URL into components
20
- * @param {string} url
21
- * @returns {{owner: string, repo: string, type: 'blob'|'tree'|'root', ref?: string, path?: string} | null}
22
- */
23
- export function parseGitHubUrl(url) {
24
- try {
25
- const parsed = new URL(url);
26
- if (!parsed.hostname.endsWith("github.com")) {
27
- return null;
28
- }
29
-
30
- const parts = parsed.pathname.split("/").filter(Boolean);
31
- if (parts.length < 2) {
32
- return null; // Need at least owner/repo
33
- }
34
-
35
- const [owner, repo] = parts;
36
-
37
- // Root: github.com/owner/repo
38
- if (parts.length === 2) {
39
- return { owner, repo, type: "root" };
40
- }
41
-
42
- // With type: github.com/owner/repo/blob|tree/ref/path
43
- if (parts.length >= 4 && (parts[2] === "blob" || parts[2] === "tree")) {
44
- const type = parts[2];
45
- const ref = parts[3];
46
- const path = parts.slice(4).join("/");
47
- return { owner, repo, type, ref, path };
48
- }
49
-
50
- return null;
51
- } catch {
52
- return null;
53
- }
54
- }
55
-
56
- /**
57
- * Check if git CLI is available
58
- */
59
- async function checkGitAvailable() {
60
- try {
61
- await execFile("git", ["--version"]);
62
- return true;
63
- } catch {
64
- return false;
65
- }
66
- }
67
-
68
- /**
69
- * Clone a GitHub repo and return local path
70
- * @param {string} owner - Repo owner
71
- * @param {string} repo - Repo name
72
- * @param {string} [ref] - Branch/tag/commit (default: main/master)
73
- * @returns {Promise<{path: string, cached: boolean, error?: string}>}
74
- */
75
- export async function cloneGitHubRepo(owner, repo, ref = "HEAD") {
76
- const cacheKey = `${owner}/${repo}@${ref}`;
77
-
78
- // Check cache
79
- if (CLONE_CACHE.has(cacheKey)) {
80
- const cachedPath = CLONE_CACHE.get(cacheKey);
81
- if (existsSync(cachedPath)) {
82
- return { path: cachedPath, cached: true };
83
- }
84
- // Cache stale, remove
85
- CLONE_CACHE.delete(cacheKey);
86
- }
87
-
88
- // Check git available
89
- if (!(await checkGitAvailable())) {
90
- return { path: "", cached: false, error: "git CLI not available" };
91
- }
92
-
93
- // Create temp directory
94
- const tempBase = mkdtempSync(join(tmpdir(), `github-${owner}-${repo}-`));
95
- const clonePath = join(tempBase, "repo");
96
-
97
- try {
98
- // Shallow clone
99
- await execFile(
100
- "git",
101
- [
102
- "clone",
103
- "--depth",
104
- "1",
105
- "--single-branch",
106
- "--branch",
107
- ref === "HEAD" ? "main" : ref,
108
- `https://github.com/${owner}/${repo}.git`,
109
- clonePath,
110
- ],
111
- { timeout: 60000 },
112
- );
113
-
114
- // Cache result
115
- CLONE_CACHE.set(cacheKey, clonePath);
116
-
117
- return { path: clonePath, cached: false };
118
- } catch (error) {
119
- // Try 'master' if 'main' failed
120
- if (ref === "HEAD") {
121
- try {
122
- await execFile(
123
- "git",
124
- [
125
- "clone",
126
- "--depth",
127
- "1",
128
- "--single-branch",
129
- "--branch",
130
- "master",
131
- `https://github.com/${owner}/${repo}.git`,
132
- clonePath,
133
- ],
134
- { timeout: 60000 },
135
- );
136
-
137
- CLONE_CACHE.set(cacheKey, clonePath);
138
- return { path: clonePath, cached: false };
139
- } catch {
140
- // Fall through to error
141
- }
142
- }
143
-
144
- return { path: "", cached: false, error: error.message };
145
- }
146
- }
147
-
148
- /**
149
- * Read a file from cloned repo
150
- * @param {string} repoPath - Local repo path
151
- * @param {string} filePath - Relative path within repo
152
- * @returns {{content: string, size: number} | null}
153
- */
154
- export function readRepoFile(repoPath, filePath) {
155
- const fullPath = join(repoPath, filePath);
156
-
157
- // Security: ensure path is within repo
158
- if (!fullPath.startsWith(repoPath)) {
159
- return null;
160
- }
161
-
162
- if (!existsSync(fullPath)) {
163
- return null;
164
- }
165
-
166
- const stats = statSync(fullPath);
167
- if (stats.isDirectory()) {
168
- return null;
169
- }
170
-
171
- if (stats.size > MAX_FILE_SIZE_BYTES) {
172
- return {
173
- content: `[File too large: ${(stats.size / 1024).toFixed(1)}KB]`,
174
- size: stats.size,
175
- };
176
- }
177
-
178
- try {
179
- const content = readFileSync(fullPath, "utf8");
180
- return { content, size: stats.size };
181
- } catch {
182
- return null;
183
- }
184
- }
185
-
186
- /**
187
- * Get directory tree listing
188
- * @param {string} repoPath - Local repo path
189
- * @param {string} [subPath] - Subdirectory to list
190
- * @param {number} [maxFiles] - Max files to return
191
- * @returns {Array<{path: string, type: 'file'|'dir', size?: number}>}
192
- */
193
- export function getRepoTree(
194
- repoPath,
195
- subPath = "",
196
- maxFiles = DEFAULT_MAX_FILES,
197
- ) {
198
- const targetPath = join(repoPath, subPath);
199
-
200
- // Security: ensure within repo
201
- if (!targetPath.startsWith(repoPath)) {
202
- return [];
203
- }
204
-
205
- if (!existsSync(targetPath)) {
206
- return [];
207
- }
208
-
209
- const results = [];
210
-
211
- function walk(dir, relativePath) {
212
- if (results.length >= maxFiles) return;
213
-
214
- try {
215
- const entries = readdirSync(dir, { withFileTypes: true });
216
-
217
- for (const entry of entries) {
218
- if (results.length >= maxFiles) break;
219
-
220
- // Skip hidden and common non-source dirs
221
- if (
222
- entry.name.startsWith(".") ||
223
- entry.name === "node_modules" ||
224
- entry.name === "vendor"
225
- ) {
226
- continue;
227
- }
228
-
229
- const entryRelPath = join(relativePath, entry.name);
230
-
231
- if (entry.isDirectory()) {
232
- results.push({ path: entryRelPath, type: "dir" });
233
- walk(join(dir, entry.name), entryRelPath);
234
- } else if (entry.isFile()) {
235
- const stats = statSync(join(dir, entry.name));
236
- results.push({ path: entryRelPath, type: "file", size: stats.size });
237
- }
238
- }
239
- } catch {
240
- // Ignore permission errors
241
- }
242
- }
243
-
244
- walk(targetPath, subPath);
245
- return results;
246
- }
247
-
248
- /**
249
- * Fetch GitHub content by cloning repo
250
- * @param {string} url - GitHub URL (blob, tree, or root)
251
- * @returns {Promise<{ok: boolean, content?: string, title?: string, error?: string, localPath?: string, tree?: Array}>}
252
- */
253
- export async function fetchGitHubContent(url) {
254
- const parsed = parseGitHubUrl(url);
255
- if (!parsed) {
256
- return { ok: false, error: "Not a valid GitHub URL" };
257
- }
258
-
259
- const { owner, repo, type, ref, path } = parsed;
260
-
261
- // Clone repo
262
- const cloneResult = await cloneGitHubRepo(owner, repo, ref);
263
- if (cloneResult.error) {
264
- return { ok: false, error: `Clone failed: ${cloneResult.error}` };
265
- }
266
-
267
- const repoPath = cloneResult.path;
268
-
269
- // Handle different URL types
270
- if (type === "root" || (type === "tree" && !path)) {
271
- // Return README + tree
272
- const tree = getRepoTree(repoPath, "", 50);
273
-
274
- // Try to find README
275
- const readmeNames = ["README.md", "Readme.md", "readme.md", "README.MD"];
276
- let readmeContent = "";
277
- for (const name of readmeNames) {
278
- const readme = readRepoFile(repoPath, name);
279
- if (readme) {
280
- readmeContent = readme.content.slice(0, 5000); // First 5KB of README
281
- break;
282
- }
283
- }
284
-
285
- return {
286
- ok: true,
287
- title: `${owner}/${repo}`,
288
- content: readmeContent || `[Repository: ${owner}/${repo}]`,
289
- localPath: repoPath,
290
- tree: tree.slice(0, 30),
291
- };
292
- }
293
-
294
- if (type === "blob" && path) {
295
- // Return specific file
296
- const file = readRepoFile(repoPath, path);
297
- if (!file) {
298
- return { ok: false, error: `File not found: ${path}` };
299
- }
300
-
301
- return {
302
- ok: true,
303
- title: `${owner}/${repo}: ${path}`,
304
- content: file.content,
305
- localPath: join(repoPath, path),
306
- };
307
- }
308
-
309
- if (type === "tree" && path) {
310
- // Return directory listing
311
- const tree = getRepoTree(repoPath, path, 50);
312
-
313
- return {
314
- ok: true,
315
- title: `${owner}/${repo}/${path}`,
316
- content: `[Directory: ${path}]\n\nFiles:\n${tree.map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`).join("\n")}`,
317
- localPath: join(repoPath, path),
318
- tree,
319
- };
320
- }
321
-
322
- return { ok: false, error: "Unsupported GitHub URL type" };
323
- }
1
+ // src/github.mjs - GitHub repo cloning for better code extraction
2
+
3
+ import { execFile } from "node:child_process";
4
+ import {
5
+ existsSync,
6
+ mkdtempSync,
7
+ readdirSync,
8
+ readFileSync,
9
+ statSync,
10
+ } from "node:fs";
11
+ import { tmpdir } from "node:os";
12
+ import { join, relative } from "node:path";
13
+
14
+ const CLONE_CACHE = new Map(); // repo key -> path
15
+ const DEFAULT_MAX_FILES = 50;
16
+ const MAX_FILE_SIZE_BYTES = 1024 * 1024; // 1MB per file
17
+
18
+ /**
19
+ * Parse a GitHub URL into components
20
+ * @param {string} url
21
+ * @returns {{owner: string, repo: string, type: 'blob'|'tree'|'root', ref?: string, path?: string} | null}
22
+ */
23
+ export function parseGitHubUrl(url) {
24
+ try {
25
+ const parsed = new URL(url);
26
+ if (!parsed.hostname.endsWith("github.com")) {
27
+ return null;
28
+ }
29
+
30
+ const parts = parsed.pathname.split("/").filter(Boolean);
31
+ if (parts.length < 2) {
32
+ return null; // Need at least owner/repo
33
+ }
34
+
35
+ const [owner, repo] = parts;
36
+
37
+ // Root: github.com/owner/repo
38
+ if (parts.length === 2) {
39
+ return { owner, repo, type: "root" };
40
+ }
41
+
42
+ // With type: github.com/owner/repo/blob|tree/ref/path
43
+ if (parts.length >= 4 && (parts[2] === "blob" || parts[2] === "tree")) {
44
+ const type = parts[2];
45
+ const ref = parts[3];
46
+ const path = parts.slice(4).join("/");
47
+ return { owner, repo, type, ref, path };
48
+ }
49
+
50
+ return null;
51
+ } catch {
52
+ return null;
53
+ }
54
+ }
55
+
56
+ /**
57
+ * Check if git CLI is available
58
+ */
59
+ async function checkGitAvailable() {
60
+ try {
61
+ await execFile("git", ["--version"]);
62
+ return true;
63
+ } catch {
64
+ return false;
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Clone a GitHub repo and return local path
70
+ * @param {string} owner - Repo owner
71
+ * @param {string} repo - Repo name
72
+ * @param {string} [ref] - Branch/tag/commit (default: main/master)
73
+ * @returns {Promise<{path: string, cached: boolean, error?: string}>}
74
+ */
75
+ export async function cloneGitHubRepo(owner, repo, ref = "HEAD") {
76
+ const cacheKey = `${owner}/${repo}@${ref}`;
77
+
78
+ // Check cache
79
+ if (CLONE_CACHE.has(cacheKey)) {
80
+ const cachedPath = CLONE_CACHE.get(cacheKey);
81
+ if (existsSync(cachedPath)) {
82
+ return { path: cachedPath, cached: true };
83
+ }
84
+ // Cache stale, remove
85
+ CLONE_CACHE.delete(cacheKey);
86
+ }
87
+
88
+ // Check git available
89
+ if (!(await checkGitAvailable())) {
90
+ return { path: "", cached: false, error: "git CLI not available" };
91
+ }
92
+
93
+ // Create temp directory
94
+ const tempBase = mkdtempSync(join(tmpdir(), `github-${owner}-${repo}-`));
95
+ const clonePath = join(tempBase, "repo");
96
+
97
+ try {
98
+ // Shallow clone
99
+ await execFile(
100
+ "git",
101
+ [
102
+ "clone",
103
+ "--depth",
104
+ "1",
105
+ "--single-branch",
106
+ "--branch",
107
+ ref === "HEAD" ? "main" : ref,
108
+ `https://github.com/${owner}/${repo}.git`,
109
+ clonePath,
110
+ ],
111
+ { timeout: 60000 },
112
+ );
113
+
114
+ // Cache result
115
+ CLONE_CACHE.set(cacheKey, clonePath);
116
+
117
+ return { path: clonePath, cached: false };
118
+ } catch (error) {
119
+ // Try 'master' if 'main' failed
120
+ if (ref === "HEAD") {
121
+ try {
122
+ await execFile(
123
+ "git",
124
+ [
125
+ "clone",
126
+ "--depth",
127
+ "1",
128
+ "--single-branch",
129
+ "--branch",
130
+ "master",
131
+ `https://github.com/${owner}/${repo}.git`,
132
+ clonePath,
133
+ ],
134
+ { timeout: 60000 },
135
+ );
136
+
137
+ CLONE_CACHE.set(cacheKey, clonePath);
138
+ return { path: clonePath, cached: false };
139
+ } catch {
140
+ // Fall through to error
141
+ }
142
+ }
143
+
144
+ return { path: "", cached: false, error: error.message };
145
+ }
146
+ }
147
+
148
+ /**
149
+ * Read a file from cloned repo
150
+ * @param {string} repoPath - Local repo path
151
+ * @param {string} filePath - Relative path within repo
152
+ * @returns {{content: string, size: number} | null}
153
+ */
154
+ export function readRepoFile(repoPath, filePath) {
155
+ const fullPath = join(repoPath, filePath);
156
+
157
+ // Security: ensure path is within repo
158
+ if (!fullPath.startsWith(repoPath)) {
159
+ return null;
160
+ }
161
+
162
+ if (!existsSync(fullPath)) {
163
+ return null;
164
+ }
165
+
166
+ const stats = statSync(fullPath);
167
+ if (stats.isDirectory()) {
168
+ return null;
169
+ }
170
+
171
+ if (stats.size > MAX_FILE_SIZE_BYTES) {
172
+ return {
173
+ content: `[File too large: ${(stats.size / 1024).toFixed(1)}KB]`,
174
+ size: stats.size,
175
+ };
176
+ }
177
+
178
+ try {
179
+ const content = readFileSync(fullPath, "utf8");
180
+ return { content, size: stats.size };
181
+ } catch {
182
+ return null;
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Get directory tree listing
188
+ * @param {string} repoPath - Local repo path
189
+ * @param {string} [subPath] - Subdirectory to list
190
+ * @param {number} [maxFiles] - Max files to return
191
+ * @returns {Array<{path: string, type: 'file'|'dir', size?: number}>}
192
+ */
193
+ export function getRepoTree(
194
+ repoPath,
195
+ subPath = "",
196
+ maxFiles = DEFAULT_MAX_FILES,
197
+ ) {
198
+ const targetPath = join(repoPath, subPath);
199
+
200
+ // Security: ensure within repo
201
+ if (!targetPath.startsWith(repoPath)) {
202
+ return [];
203
+ }
204
+
205
+ if (!existsSync(targetPath)) {
206
+ return [];
207
+ }
208
+
209
+ const results = [];
210
+
211
+ function walk(dir, relativePath) {
212
+ if (results.length >= maxFiles) return;
213
+
214
+ try {
215
+ const entries = readdirSync(dir, { withFileTypes: true });
216
+
217
+ for (const entry of entries) {
218
+ if (results.length >= maxFiles) break;
219
+
220
+ // Skip hidden and common non-source dirs
221
+ if (
222
+ entry.name.startsWith(".") ||
223
+ entry.name === "node_modules" ||
224
+ entry.name === "vendor"
225
+ ) {
226
+ continue;
227
+ }
228
+
229
+ const entryRelPath = join(relativePath, entry.name);
230
+
231
+ if (entry.isDirectory()) {
232
+ results.push({ path: entryRelPath, type: "dir" });
233
+ walk(join(dir, entry.name), entryRelPath);
234
+ } else if (entry.isFile()) {
235
+ const stats = statSync(join(dir, entry.name));
236
+ results.push({ path: entryRelPath, type: "file", size: stats.size });
237
+ }
238
+ }
239
+ } catch {
240
+ // Ignore permission errors
241
+ }
242
+ }
243
+
244
+ walk(targetPath, subPath);
245
+ return results;
246
+ }
247
+
248
+ /**
249
+ * Fetch GitHub content by cloning repo
250
+ * @param {string} url - GitHub URL (blob, tree, or root)
251
+ * @returns {Promise<{ok: boolean, content?: string, title?: string, error?: string, localPath?: string, tree?: Array}>}
252
+ */
253
+ export async function fetchGitHubContent(url) {
254
+ const parsed = parseGitHubUrl(url);
255
+ if (!parsed) {
256
+ return { ok: false, error: "Not a valid GitHub URL" };
257
+ }
258
+
259
+ const { owner, repo, type, ref, path } = parsed;
260
+
261
+ // Clone repo
262
+ const cloneResult = await cloneGitHubRepo(owner, repo, ref);
263
+ if (cloneResult.error) {
264
+ return { ok: false, error: `Clone failed: ${cloneResult.error}` };
265
+ }
266
+
267
+ const repoPath = cloneResult.path;
268
+
269
+ // Handle different URL types
270
+ if (type === "root" || (type === "tree" && !path)) {
271
+ // Return README + tree
272
+ const tree = getRepoTree(repoPath, "", 50);
273
+
274
+ // Try to find README
275
+ const readmeNames = ["README.md", "Readme.md", "readme.md", "README.MD"];
276
+ let readmeContent = "";
277
+ for (const name of readmeNames) {
278
+ const readme = readRepoFile(repoPath, name);
279
+ if (readme) {
280
+ readmeContent = readme.content.slice(0, 5000); // First 5KB of README
281
+ break;
282
+ }
283
+ }
284
+
285
+ return {
286
+ ok: true,
287
+ title: `${owner}/${repo}`,
288
+ content: readmeContent || `[Repository: ${owner}/${repo}]`,
289
+ localPath: repoPath,
290
+ tree: tree.slice(0, 30),
291
+ };
292
+ }
293
+
294
+ if (type === "blob" && path) {
295
+ // Return specific file
296
+ const file = readRepoFile(repoPath, path);
297
+ if (!file) {
298
+ return { ok: false, error: `File not found: ${path}` };
299
+ }
300
+
301
+ return {
302
+ ok: true,
303
+ title: `${owner}/${repo}: ${path}`,
304
+ content: file.content,
305
+ localPath: join(repoPath, path),
306
+ };
307
+ }
308
+
309
+ if (type === "tree" && path) {
310
+ // Return directory listing
311
+ const tree = getRepoTree(repoPath, path, 50);
312
+
313
+ return {
314
+ ok: true,
315
+ title: `${owner}/${repo}/${path}`,
316
+ content: `[Directory: ${path}]\n\nFiles:\n${tree.map((t) => ` ${t.type === "dir" ? "📁" : "📄"} ${t.path}`).join("\n")}`,
317
+ localPath: join(repoPath, path),
318
+ tree,
319
+ };
320
+ }
321
+
322
+ return { ok: false, error: "Unsupported GitHub URL type" };
323
+ }