voyageai-cli 1.30.1 → 1.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +4 -4
  2. package/package.json +1 -1
  3. package/src/cli.js +2 -0
  4. package/src/commands/about.js +3 -3
  5. package/src/commands/code-search.js +751 -0
  6. package/src/commands/doctor.js +1 -1
  7. package/src/commands/index-workspace.js +9 -5
  8. package/src/commands/playground.js +9 -1
  9. package/src/commands/quickstart.js +4 -4
  10. package/src/commands/workflow.js +132 -65
  11. package/src/lib/catalog.js +4 -2
  12. package/src/lib/code-search.js +315 -0
  13. package/src/lib/codegen.js +1 -1
  14. package/src/lib/explanations.js +3 -3
  15. package/src/lib/github.js +226 -0
  16. package/src/lib/template-engine.js +154 -20
  17. package/src/lib/workflow-builder.js +753 -0
  18. package/src/lib/workflow-formatters.js +454 -0
  19. package/src/lib/workflow-input-cache.js +111 -0
  20. package/src/lib/workflow-scaffold.js +1 -1
  21. package/src/lib/workflow.js +91 -1
  22. package/src/mcp/schemas/index.js +130 -0
  23. package/src/mcp/server.js +17 -4
  24. package/src/mcp/tools/authoring.js +662 -0
  25. package/src/mcp/tools/code-search.js +620 -0
  26. package/src/mcp/tools/ingest.js +2 -5
  27. package/src/mcp/tools/retrieval.js +2 -15
  28. package/src/mcp/tools/workspace.js +1 -12
  29. package/src/mcp/utils.js +20 -0
  30. package/src/playground/help/workflow-nodes.js +127 -2
  31. package/src/playground/index.html +1366 -24
  32. package/src/workflows/code-review.json +110 -0
  33. package/src/workflows/cost-analysis.json +5 -0
  34. package/src/workflows/tests/code-review.fresh-index.test.json +83 -0
  35. package/src/workflows/tests/code-review.happy-path.test.json +121 -0
  36. package/src/workflows/tests/code-review.no-question.test.json +70 -0
  37. package/src/workflows/tests/smart-ingest.duplicate-detected.test.json +2 -2
@@ -0,0 +1,315 @@
1
+ 'use strict';
2
+
3
+ const path = require('path');
4
+ const fs = require('fs');
5
+ const { loadProject } = require('./project');
6
+
7
+ const DEFAULT_CODE_MODEL = 'voyage-code-3';
8
+ const DEFAULT_DB = 'vai_code_search';
9
+
10
+ const CODE_EXTENSIONS = [
11
+ '.js', '.ts', '.jsx', '.tsx', '.py', '.go', '.rs', '.java', '.c', '.cpp',
12
+ '.h', '.hpp', '.cs', '.rb', '.php', '.swift', '.kt', '.scala', '.ex',
13
+ '.exs', '.clj', '.hs', '.ml', '.fs', '.vue', '.svelte', '.sh', '.bash',
14
+ ];
15
+
16
+ const DOC_EXTENSIONS = ['.md', '.rst', '.txt', '.adoc', '.rdoc'];
17
+
18
+ const DEFAULT_IGNORE = [
19
+ 'node_modules', '.git', '.svn', '.hg', 'dist', 'build', 'out', 'target',
20
+ '__pycache__', '.cache', '.next', '.nuxt', 'coverage', '.nyc_output',
21
+ 'vendor', 'venv', '.venv', 'env', '.idea', '.vscode',
22
+ 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'Cargo.lock',
23
+ '*.min.js', '*.min.css', '*.map', '*.chunk.js',
24
+ ];
25
+
26
+ /**
27
+ * Language-aware function/class boundary patterns.
28
+ */
29
+ const BOUNDARY_PATTERNS = {
30
+ js: /^(?:(?:export\s+)?(?:async\s+)?function\s+\w+|(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?(?:function|\()|(?:export\s+)?class\s+\w+|module\.exports)/m,
31
+ ts: /^(?:(?:export\s+)?(?:async\s+)?function\s+\w+|(?:export\s+)?(?:const|let)\s+\w+\s*[=:]|(?:export\s+)?(?:class|interface|type|enum)\s+\w+)/m,
32
+ py: /^(?:def\s+|async\s+def\s+|class\s+)/m,
33
+ go: /^(?:func\s+|type\s+\w+\s+(?:struct|interface))/m,
34
+ rs: /^(?:(?:pub\s+)?fn\s+|(?:pub\s+)?(?:struct|enum|trait|impl)\s+)/m,
35
+ java: /^(?:\s*(?:public|private|protected)\s+(?:static\s+)?(?:class|interface|void|\w+)\s+\w+)/m,
36
+ rb: /^(?:def\s+|class\s+|module\s+)/m,
37
+ php: /^(?:\s*(?:public|private|protected)?\s*(?:static\s+)?function\s+|class\s+)/m,
38
+ };
39
+
40
+ /**
41
+ * Get the boundary pattern for a file extension.
42
+ * @param {string} ext
43
+ * @returns {RegExp|null}
44
+ */
45
+ function getBoundaryPattern(ext) {
46
+ const lang = ext.replace('.', '');
47
+ const map = {
48
+ js: 'js', jsx: 'js', mjs: 'js', cjs: 'js',
49
+ ts: 'ts', tsx: 'ts', mts: 'ts',
50
+ py: 'py',
51
+ go: 'go',
52
+ rs: 'rs',
53
+ java: 'java', kt: 'java', scala: 'java',
54
+ rb: 'rb',
55
+ php: 'php',
56
+ };
57
+ const key = map[lang];
58
+ return key ? BOUNDARY_PATTERNS[key] : null;
59
+ }
60
+
61
+ /**
62
+ * Smart chunk code: try splitting by function/class boundaries first,
63
+ * fall back to recursive character-based chunking.
64
+ * @param {string} content
65
+ * @param {string} filePath
66
+ * @param {object} opts
67
+ * @returns {Array<{text: string, startLine: number, endLine: number, type: string}>}
68
+ */
69
+ function smartChunkCode(content, filePath, opts = {}) {
70
+ const { chunk } = require('./chunker');
71
+ const ext = path.extname(filePath).toLowerCase();
72
+ const pattern = getBoundaryPattern(ext);
73
+ const chunkSize = opts.chunkSize || 512;
74
+ const chunkOverlap = opts.chunkOverlap || 50;
75
+ const lines = content.split('\n');
76
+
77
+ // Try boundary-based splitting
78
+ if (pattern) {
79
+ const boundaries = [];
80
+ for (let i = 0; i < lines.length; i++) {
81
+ if (pattern.test(lines[i])) {
82
+ boundaries.push(i);
83
+ }
84
+ }
85
+
86
+ if (boundaries.length > 1) {
87
+ const chunks = [];
88
+ for (let i = 0; i < boundaries.length; i++) {
89
+ const start = boundaries[i];
90
+ const end = i + 1 < boundaries.length ? boundaries[i + 1] : lines.length;
91
+ const text = lines.slice(start, end).join('\n').trim();
92
+ if (text.length >= 20) {
93
+ if (text.length > chunkSize * 2) {
94
+ const subChunks = chunk(text, { strategy: 'recursive', size: chunkSize, overlap: chunkOverlap });
95
+ let lineOffset = start;
96
+ for (const sc of subChunks) {
97
+ const scLines = sc.split('\n').length;
98
+ chunks.push({ text: sc, startLine: lineOffset + 1, endLine: lineOffset + scLines, type: 'boundary' });
99
+ lineOffset += scLines;
100
+ }
101
+ } else {
102
+ chunks.push({ text, startLine: start + 1, endLine: end, type: 'boundary' });
103
+ }
104
+ }
105
+ }
106
+ if (boundaries[0] > 0) {
107
+ const preamble = lines.slice(0, boundaries[0]).join('\n').trim();
108
+ if (preamble.length >= 20) {
109
+ chunks.unshift({ text: preamble, startLine: 1, endLine: boundaries[0], type: 'preamble' });
110
+ }
111
+ }
112
+ if (chunks.length > 0) return chunks;
113
+ }
114
+ }
115
+
116
+ // Fallback: recursive chunking with line number tracking
117
+ const { chunk: chunkFn } = require('./chunker');
118
+ const textChunks = chunkFn(content, { strategy: 'recursive', size: chunkSize, overlap: chunkOverlap });
119
+ const result = [];
120
+ let searchFrom = 0;
121
+ for (const tc of textChunks) {
122
+ const firstLine = tc.split('\n')[0];
123
+ let startLine = searchFrom;
124
+ for (let i = searchFrom; i < lines.length; i++) {
125
+ if (lines[i].includes(firstLine.trim().slice(0, 40))) {
126
+ startLine = i;
127
+ break;
128
+ }
129
+ }
130
+ const chunkLines = tc.split('\n').length;
131
+ result.push({ text: tc, startLine: startLine + 1, endLine: startLine + chunkLines, type: 'character' });
132
+ searchFrom = startLine + 1;
133
+ }
134
+ return result;
135
+ }
136
+
137
+ /**
138
+ * Extract symbol names from code.
139
+ * @param {string} content
140
+ * @param {string} filePath
141
+ * @returns {string[]}
142
+ */
143
+ function extractSymbols(content, filePath) {
144
+ const ext = path.extname(filePath).toLowerCase().slice(1);
145
+ const patterns = {
146
+ js: [/(?:function\s+|const\s+|let\s+|var\s+)(\w+)\s*(?:=\s*(?:async\s+)?(?:function|\(|=>)|\()/g, /class\s+(\w+)/g],
147
+ ts: [/(?:function\s+|const\s+|let\s+)(\w+)\s*(?:=\s*(?:async\s+)?(?:function|\(|=>)|[<(])/g, /(?:class|interface|type)\s+(\w+)/g],
148
+ py: [/(?:def|async def)\s+(\w+)\s*\(/g, /class\s+(\w+)/g],
149
+ go: [/func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/g, /type\s+(\w+)\s+struct/g],
150
+ rs: [/fn\s+(\w+)\s*[<(]/g, /(?:struct|enum|trait)\s+(\w+)/g],
151
+ java: [/(?:public|private|protected)?\s*(?:static)?\s*\w+\s+(\w+)\s*\(/g, /class\s+(\w+)/g],
152
+ rb: [/def\s+(\w+)/g, /class\s+(\w+)/g],
153
+ php: [/function\s+(\w+)/g, /class\s+(\w+)/g],
154
+ };
155
+ const langMap = { jsx: 'js', mjs: 'js', cjs: 'js', tsx: 'ts', mts: 'ts', kt: 'java', scala: 'java' };
156
+ const lang = langMap[ext] || ext;
157
+ const langPatterns = patterns[lang] || patterns.js;
158
+ const symbols = [];
159
+ for (const p of langPatterns) {
160
+ let m;
161
+ while ((m = p.exec(content)) !== null) {
162
+ if (m[1] && !symbols.includes(m[1])) symbols.push(m[1]);
163
+ }
164
+ }
165
+ return symbols.slice(0, 50);
166
+ }
167
+
168
+ /**
169
+ * Parse .gitignore patterns from a directory.
170
+ * @param {string} dirPath
171
+ * @returns {string[]}
172
+ */
173
+ function loadGitignore(dirPath) {
174
+ const gitignorePath = path.join(dirPath, '.gitignore');
175
+ try {
176
+ const content = fs.readFileSync(gitignorePath, 'utf-8');
177
+ return content
178
+ .split('\n')
179
+ .map(l => l.trim())
180
+ .filter(l => l && !l.startsWith('#'));
181
+ } catch {
182
+ return [];
183
+ }
184
+ }
185
+
186
+ /**
187
+ * Check if a path should be ignored.
188
+ * @param {string} filePath
189
+ * @param {string[]} patterns
190
+ * @returns {boolean}
191
+ */
192
+ function shouldIgnore(filePath, patterns) {
193
+ const basename = path.basename(filePath);
194
+ for (const pattern of patterns) {
195
+ if (pattern.startsWith('*')) {
196
+ if (basename.endsWith(pattern.slice(1))) return true;
197
+ } else if (filePath.includes(pattern) || basename === pattern) {
198
+ return true;
199
+ }
200
+ }
201
+ return false;
202
+ }
203
+
204
+ /**
205
+ * Recursively find code files respecting .gitignore.
206
+ * @param {string} dirPath
207
+ * @param {object} opts
208
+ * @returns {Promise<string[]>}
209
+ */
210
+ async function findCodeFiles(dirPath, opts = {}) {
211
+ const maxFiles = opts.maxFiles || 5000;
212
+ const maxFileSize = opts.maxFileSize || 100000;
213
+ const gitignorePatterns = loadGitignore(dirPath);
214
+ const allPatterns = [...DEFAULT_IGNORE, ...gitignorePatterns];
215
+ const files = [];
216
+
217
+ async function walk(dir) {
218
+ if (files.length >= maxFiles) return;
219
+ let entries;
220
+ try {
221
+ entries = await fs.promises.readdir(dir, { withFileTypes: true });
222
+ } catch { return; }
223
+ for (const entry of entries) {
224
+ if (files.length >= maxFiles) break;
225
+ const fullPath = path.join(dir, entry.name);
226
+ if (shouldIgnore(fullPath, allPatterns)) continue;
227
+ if (entry.isDirectory()) {
228
+ await walk(fullPath);
229
+ } else if (entry.isFile()) {
230
+ const ext = path.extname(entry.name).toLowerCase();
231
+ if (!CODE_EXTENSIONS.includes(ext)) continue;
232
+ try {
233
+ const stats = await fs.promises.stat(fullPath);
234
+ if (stats.size > maxFileSize || stats.size === 0) continue;
235
+ } catch { continue; }
236
+ files.push(fullPath);
237
+ }
238
+ }
239
+ }
240
+
241
+ await walk(dirPath);
242
+ return files;
243
+ }
244
+
245
+ /**
246
+ * Derive a collection name from a directory path.
247
+ * @param {string} dirPath
248
+ * @returns {string}
249
+ */
250
+ function deriveCollectionName(dirPath) {
251
+ try {
252
+ const pkg = JSON.parse(fs.readFileSync(path.join(dirPath, 'package.json'), 'utf-8'));
253
+ if (pkg.name) return pkg.name.replace(/[^a-zA-Z0-9_-]/g, '_') + '_code';
254
+ } catch { /* ignore */ }
255
+ return path.basename(path.resolve(dirPath)).replace(/[^a-zA-Z0-9_-]/g, '_') + '_code';
256
+ }
257
+
258
+ /**
259
+ * Resolve db/collection from options, .vai.json codeSearch config, or defaults.
260
+ * @param {object} opts
261
+ * @param {string} [workspacePath]
262
+ * @returns {{db: string, collection: string, model: string, projectConfig: object}}
263
+ */
264
+ function resolveConfig(opts, workspacePath) {
265
+ const { config: proj } = loadProject(workspacePath);
266
+ const cs = proj.codeSearch || {};
267
+ const db = opts.db || cs.db || proj.db || DEFAULT_DB;
268
+ const collection = opts.collection || cs.collection || deriveCollectionName(workspacePath || process.cwd());
269
+ const model = opts.model || cs.model || DEFAULT_CODE_MODEL;
270
+ return { db, collection, model, projectConfig: proj };
271
+ }
272
+
273
+ /**
274
+ * Auto-select the best embedding model based on file types.
275
+ * @param {string[]} files - Array of file paths
276
+ * @param {object} [projectConfig] - Project config from .vai.json
277
+ * @returns {string}
278
+ */
279
+ function selectCodeModel(files, projectConfig) {
280
+ // User override always wins
281
+ if (projectConfig?.codeSearch?.model) {
282
+ return projectConfig.codeSearch.model;
283
+ }
284
+
285
+ const total = files.length;
286
+ if (total === 0) return DEFAULT_CODE_MODEL;
287
+
288
+ const codeFiles = files.filter(f => CODE_EXTENSIONS.includes(path.extname(f).toLowerCase()));
289
+ const docFiles = files.filter(f => DOC_EXTENSIONS.includes(path.extname(f).toLowerCase()));
290
+
291
+ const codeRatio = codeFiles.length / total;
292
+ const docRatio = docFiles.length / total;
293
+
294
+ if (codeRatio >= 0.7) return 'voyage-code-3';
295
+ if (docRatio >= 0.7) return 'voyage-4-large';
296
+ return 'voyage-code-3';
297
+ }
298
+
299
+ module.exports = {
300
+ DEFAULT_CODE_MODEL,
301
+ DEFAULT_DB,
302
+ CODE_EXTENSIONS,
303
+ DOC_EXTENSIONS,
304
+ DEFAULT_IGNORE,
305
+ BOUNDARY_PATTERNS,
306
+ getBoundaryPattern,
307
+ smartChunkCode,
308
+ extractSymbols,
309
+ loadGitignore,
310
+ shouldIgnore,
311
+ findCodeFiles,
312
+ deriveCollectionName,
313
+ resolveConfig,
314
+ selectCodeModel,
315
+ };
@@ -302,7 +302,7 @@ function renderTemplate(target, name, context) {
302
302
  function buildContext(project, options = {}) {
303
303
  const context = {
304
304
  // Core config
305
- model: options.model || project.model || 'voyage-3-large',
305
+ model: options.model || project.model || 'voyage-4-large',
306
306
  db: options.db || project.db || 'myapp',
307
307
  collection: options.collection || project.collection || 'documents',
308
308
  field: options.field || project.field || 'embedding',
@@ -549,7 +549,7 @@ const concepts = {
549
549
  ``,
550
550
  `${pc.bold('In practice:')} You don't need to do anything special to use MoE — the API`,
551
551
  `interface is identical. The architecture difference shows up in quality and cost:`,
552
- ` ${pc.dim('•')} voyage-4-large: $0.12/1M tokens better quality than voyage-3-large ($0.18/1M)`,
552
+ ` ${pc.dim('•')} voyage-4-large: $0.12/1M tokens, best quality via MoE architecture`,
553
553
  ` ${pc.dim('•')} 40% cheaper than comparable dense models at the same quality tier`,
554
554
  ].join('\n'),
555
555
  links: [
@@ -616,9 +616,9 @@ const concepts = {
616
616
  ``,
617
617
  `${pc.bold('Current standings (Jan 2026):')}`,
618
618
  ` ${pc.cyan('voyage-4-large')} ${pc.bold('71.41')} ${pc.dim('— SOTA, MoE architecture')}`,
619
- ` ${pc.cyan('voyage-4')} ${pc.bold('70.07')} ${pc.dim('— near voyage-3-large quality')}`,
619
+ ` ${pc.cyan('voyage-4')} ${pc.bold('70.07')} ${pc.dim('— balanced quality/cost')}`,
620
620
  ` ${pc.cyan('Gemini Embedding 001')} ${pc.bold('68.66')} ${pc.dim('— Google')}`,
621
- ` ${pc.cyan('voyage-4-lite')} ${pc.bold('68.10')} ${pc.dim('— near voyage-3.5 quality')}`,
621
+ ` ${pc.cyan('voyage-4-lite')} ${pc.bold('68.10')} ${pc.dim('— best budget option')}`,
622
622
  ` ${pc.cyan('Cohere Embed v4')} ${pc.bold('65.75')} ${pc.dim('— Cohere')}`,
623
623
  ` ${pc.cyan('OpenAI v3 Large')} ${pc.bold('62.57')} ${pc.dim('— OpenAI')}`,
624
624
  ``,
@@ -0,0 +1,226 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * GitHub API fetcher for remote repository indexing.
5
+ * Uses native fetch (Node 18+) — no axios.
6
+ */
7
+
8
+ /**
9
+ * Get GitHub auth token from env or vai config.
10
+ * @returns {string|null}
11
+ */
12
+ function getAuthToken() {
13
+ if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN;
14
+ try {
15
+ const { getConfigValue } = require('./config');
16
+ return getConfigValue('github.token') || null;
17
+ } catch {
18
+ return null;
19
+ }
20
+ }
21
+
22
+ /**
23
+ * Check if a source string is a GitHub URL or shorthand.
24
+ * @param {string} source
25
+ * @returns {boolean}
26
+ */
27
+ function isGitHubUrl(source) {
28
+ if (!source || typeof source !== 'string') return false;
29
+ if (source.includes('github.com')) return true;
30
+ // owner/repo shorthand (must have exactly one slash, no spaces, no path separators at start)
31
+ if (/^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_.-]+$/.test(source)) return true;
32
+ return false;
33
+ }
34
+
35
+ /**
36
+ * Parse a GitHub URL into owner and repo.
37
+ * Supports: https://github.com/owner/repo, github.com/owner/repo, owner/repo
38
+ * @param {string} source
39
+ * @returns {{ owner: string, repo: string }}
40
+ */
41
+ function parseGitHubUrl(source) {
42
+ if (!source) throw new Error('Empty GitHub source');
43
+
44
+ // Strip trailing .git
45
+ source = source.replace(/\.git$/, '');
46
+
47
+ // Full URL
48
+ const urlMatch = source.match(/github\.com[/:]([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)/);
49
+ if (urlMatch) {
50
+ return { owner: urlMatch[1], repo: urlMatch[2] };
51
+ }
52
+
53
+ // owner/repo shorthand
54
+ const shortMatch = source.match(/^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/);
55
+ if (shortMatch) {
56
+ return { owner: shortMatch[1], repo: shortMatch[2] };
57
+ }
58
+
59
+ throw new Error(`Cannot parse GitHub URL: ${source}`);
60
+ }
61
+
62
+ /**
63
+ * Make a GitHub API request with optional auth and backoff.
64
+ * @param {string} url
65
+ * @param {string|null} token
66
+ * @param {number} [retries=3]
67
+ * @returns {Promise<object>}
68
+ */
69
+ async function githubFetch(url, token, retries = 3) {
70
+ const headers = { 'Accept': 'application/vnd.github.v3+json' };
71
+ if (token) headers['Authorization'] = `Bearer ${token}`;
72
+
73
+ for (let attempt = 0; attempt <= retries; attempt++) {
74
+ const res = await fetch(url, { headers });
75
+
76
+ if (res.status === 403) {
77
+ const remaining = res.headers.get('x-ratelimit-remaining');
78
+ const resetAt = res.headers.get('x-ratelimit-reset');
79
+ if (remaining === '0' && resetAt) {
80
+ const waitMs = Math.max(0, (parseInt(resetAt) * 1000) - Date.now()) + 1000;
81
+ if (attempt < retries && waitMs < 120000) {
82
+ await new Promise(r => setTimeout(r, waitMs));
83
+ continue;
84
+ }
85
+ throw new Error(`GitHub rate limit exceeded. Resets at ${new Date(parseInt(resetAt) * 1000).toISOString()}`);
86
+ }
87
+ }
88
+
89
+ if (res.status === 404) {
90
+ throw new Error(`GitHub resource not found: ${url}. Is the repo public or do you have a valid GITHUB_TOKEN?`);
91
+ }
92
+
93
+ if (!res.ok) {
94
+ if (attempt < retries) {
95
+ await new Promise(r => setTimeout(r, Math.pow(2, attempt) * 1000));
96
+ continue;
97
+ }
98
+ throw new Error(`GitHub API error ${res.status}: ${await res.text()}`);
99
+ }
100
+
101
+ return res.json();
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Fetch the recursive file tree for a repo.
107
+ * @param {string} owner
108
+ * @param {string} repo
109
+ * @param {string} branch
110
+ * @param {string|null} token
111
+ * @returns {Promise<Array<{path: string, size: number, sha: string}>>}
112
+ */
113
+ async function fetchRepoTree(owner, repo, branch, token) {
114
+ const data = await githubFetch(
115
+ `https://api.github.com/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`,
116
+ token
117
+ );
118
+
119
+ if (!data.tree) throw new Error('No tree data returned from GitHub');
120
+
121
+ return data.tree
122
+ .filter(entry => entry.type === 'blob')
123
+ .map(entry => ({ path: entry.path, size: entry.size || 0, sha: entry.sha }));
124
+ }
125
+
126
+ /**
127
+ * Fetch file contents from a GitHub repo.
128
+ * @param {string} owner
129
+ * @param {string} repo
130
+ * @param {string} filePath
131
+ * @param {string} branch
132
+ * @param {string|null} token
133
+ * @returns {Promise<string>}
134
+ */
135
+ async function fetchFileContents(owner, repo, filePath, branch, token) {
136
+ const data = await githubFetch(
137
+ `https://api.github.com/repos/${owner}/${repo}/contents/${filePath}?ref=${branch}`,
138
+ token
139
+ );
140
+
141
+ if (data.encoding === 'base64' && data.content) {
142
+ return Buffer.from(data.content, 'base64').toString('utf-8');
143
+ }
144
+
145
+ throw new Error(`Unexpected encoding for ${filePath}: ${data.encoding}`);
146
+ }
147
+
148
+ /**
149
+ * Fetch changed files between two commits.
150
+ * @param {string} owner
151
+ * @param {string} repo
152
+ * @param {string} baseSha
153
+ * @param {string} headSha
154
+ * @param {string|null} token
155
+ * @returns {Promise<Array<{filename: string, status: string}>>}
156
+ */
157
+ async function fetchChangedFiles(owner, repo, baseSha, headSha, token) {
158
+ const data = await githubFetch(
159
+ `https://api.github.com/repos/${owner}/${repo}/compare/${baseSha}...${headSha}`,
160
+ token
161
+ );
162
+
163
+ return (data.files || []).map(f => ({ filename: f.filename, status: f.status }));
164
+ }
165
+
166
+ /**
167
+ * Fetch multiple files concurrently with a pool limit.
168
+ * @param {string} owner
169
+ * @param {string} repo
170
+ * @param {string[]} filePaths
171
+ * @param {string} branch
172
+ * @param {string|null} token
173
+ * @param {number} [concurrency=5]
174
+ * @returns {Promise<Array<{path: string, content: string}|{path: string, error: string}>>}
175
+ */
176
+ async function fetchFilesBatch(owner, repo, filePaths, branch, token, concurrency = 5) {
177
+ const results = [];
178
+ let i = 0;
179
+
180
+ async function worker() {
181
+ while (i < filePaths.length) {
182
+ const idx = i++;
183
+ const fp = filePaths[idx];
184
+ try {
185
+ const content = await fetchFileContents(owner, repo, fp, branch, token);
186
+ results[idx] = { path: fp, content };
187
+ } catch (err) {
188
+ results[idx] = { path: fp, error: err.message };
189
+ }
190
+ }
191
+ }
192
+
193
+ const workers = [];
194
+ for (let w = 0; w < Math.min(concurrency, filePaths.length); w++) {
195
+ workers.push(worker());
196
+ }
197
+ await Promise.all(workers);
198
+ return results;
199
+ }
200
+
201
+ /**
202
+ * Resolve a branch name to its HEAD commit SHA.
203
+ * @param {string} owner
204
+ * @param {string} repo
205
+ * @param {string} branch
206
+ * @param {string|null} token
207
+ * @returns {Promise<string>} commit SHA
208
+ */
209
+ async function resolveCommitSha(owner, repo, branch, token) {
210
+ const data = await githubFetch(
211
+ `https://api.github.com/repos/${owner}/${repo}/commits/${branch}`,
212
+ token
213
+ );
214
+ return data.sha;
215
+ }
216
+
217
+ module.exports = {
218
+ getAuthToken,
219
+ isGitHubUrl,
220
+ parseGitHubUrl,
221
+ fetchRepoTree,
222
+ fetchFileContents,
223
+ fetchChangedFiles,
224
+ fetchFilesBatch,
225
+ resolveCommitSha,
226
+ };