voyageai-cli 1.30.1 → 1.30.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +1 -1
- package/src/cli.js +2 -0
- package/src/commands/about.js +3 -3
- package/src/commands/code-search.js +751 -0
- package/src/commands/doctor.js +1 -1
- package/src/commands/index-workspace.js +9 -5
- package/src/commands/playground.js +9 -1
- package/src/commands/quickstart.js +4 -4
- package/src/commands/workflow.js +132 -65
- package/src/lib/catalog.js +4 -2
- package/src/lib/code-search.js +315 -0
- package/src/lib/codegen.js +1 -1
- package/src/lib/explanations.js +3 -3
- package/src/lib/github.js +226 -0
- package/src/lib/template-engine.js +154 -20
- package/src/lib/workflow-builder.js +753 -0
- package/src/lib/workflow-formatters.js +454 -0
- package/src/lib/workflow-input-cache.js +111 -0
- package/src/lib/workflow-scaffold.js +1 -1
- package/src/lib/workflow.js +91 -1
- package/src/mcp/schemas/index.js +130 -0
- package/src/mcp/server.js +17 -4
- package/src/mcp/tools/authoring.js +662 -0
- package/src/mcp/tools/code-search.js +620 -0
- package/src/mcp/tools/ingest.js +2 -5
- package/src/mcp/tools/retrieval.js +2 -15
- package/src/mcp/tools/workspace.js +1 -12
- package/src/mcp/utils.js +20 -0
- package/src/playground/help/workflow-nodes.js +127 -2
- package/src/playground/index.html +1366 -24
- package/src/workflows/code-review.json +110 -0
- package/src/workflows/cost-analysis.json +5 -0
- package/src/workflows/tests/code-review.fresh-index.test.json +83 -0
- package/src/workflows/tests/code-review.happy-path.test.json +121 -0
- package/src/workflows/tests/code-review.no-question.test.json +70 -0
- package/src/workflows/tests/smart-ingest.duplicate-detected.test.json +2 -2
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const { loadProject } = require('./project');
|
|
6
|
+
|
|
7
|
+
const DEFAULT_CODE_MODEL = 'voyage-code-3';
|
|
8
|
+
const DEFAULT_DB = 'vai_code_search';
|
|
9
|
+
|
|
10
|
+
const CODE_EXTENSIONS = [
|
|
11
|
+
'.js', '.ts', '.jsx', '.tsx', '.py', '.go', '.rs', '.java', '.c', '.cpp',
|
|
12
|
+
'.h', '.hpp', '.cs', '.rb', '.php', '.swift', '.kt', '.scala', '.ex',
|
|
13
|
+
'.exs', '.clj', '.hs', '.ml', '.fs', '.vue', '.svelte', '.sh', '.bash',
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
const DOC_EXTENSIONS = ['.md', '.rst', '.txt', '.adoc', '.rdoc'];
|
|
17
|
+
|
|
18
|
+
const DEFAULT_IGNORE = [
|
|
19
|
+
'node_modules', '.git', '.svn', '.hg', 'dist', 'build', 'out', 'target',
|
|
20
|
+
'__pycache__', '.cache', '.next', '.nuxt', 'coverage', '.nyc_output',
|
|
21
|
+
'vendor', 'venv', '.venv', 'env', '.idea', '.vscode',
|
|
22
|
+
'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'Cargo.lock',
|
|
23
|
+
'*.min.js', '*.min.css', '*.map', '*.chunk.js',
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Language-aware function/class boundary patterns.
|
|
28
|
+
*/
|
|
29
|
+
const BOUNDARY_PATTERNS = {
|
|
30
|
+
js: /^(?:(?:export\s+)?(?:async\s+)?function\s+\w+|(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?(?:function|\()|(?:export\s+)?class\s+\w+|module\.exports)/m,
|
|
31
|
+
ts: /^(?:(?:export\s+)?(?:async\s+)?function\s+\w+|(?:export\s+)?(?:const|let)\s+\w+\s*[=:]|(?:export\s+)?(?:class|interface|type|enum)\s+\w+)/m,
|
|
32
|
+
py: /^(?:def\s+|async\s+def\s+|class\s+)/m,
|
|
33
|
+
go: /^(?:func\s+|type\s+\w+\s+(?:struct|interface))/m,
|
|
34
|
+
rs: /^(?:(?:pub\s+)?fn\s+|(?:pub\s+)?(?:struct|enum|trait|impl)\s+)/m,
|
|
35
|
+
java: /^(?:\s*(?:public|private|protected)\s+(?:static\s+)?(?:class|interface|void|\w+)\s+\w+)/m,
|
|
36
|
+
rb: /^(?:def\s+|class\s+|module\s+)/m,
|
|
37
|
+
php: /^(?:\s*(?:public|private|protected)?\s*(?:static\s+)?function\s+|class\s+)/m,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Get the boundary pattern for a file extension.
|
|
42
|
+
* @param {string} ext
|
|
43
|
+
* @returns {RegExp|null}
|
|
44
|
+
*/
|
|
45
|
+
function getBoundaryPattern(ext) {
|
|
46
|
+
const lang = ext.replace('.', '');
|
|
47
|
+
const map = {
|
|
48
|
+
js: 'js', jsx: 'js', mjs: 'js', cjs: 'js',
|
|
49
|
+
ts: 'ts', tsx: 'ts', mts: 'ts',
|
|
50
|
+
py: 'py',
|
|
51
|
+
go: 'go',
|
|
52
|
+
rs: 'rs',
|
|
53
|
+
java: 'java', kt: 'java', scala: 'java',
|
|
54
|
+
rb: 'rb',
|
|
55
|
+
php: 'php',
|
|
56
|
+
};
|
|
57
|
+
const key = map[lang];
|
|
58
|
+
return key ? BOUNDARY_PATTERNS[key] : null;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Smart chunk code: try splitting by function/class boundaries first,
|
|
63
|
+
* fall back to recursive character-based chunking.
|
|
64
|
+
* @param {string} content
|
|
65
|
+
* @param {string} filePath
|
|
66
|
+
* @param {object} opts
|
|
67
|
+
* @returns {Array<{text: string, startLine: number, endLine: number, type: string}>}
|
|
68
|
+
*/
|
|
69
|
+
function smartChunkCode(content, filePath, opts = {}) {
|
|
70
|
+
const { chunk } = require('./chunker');
|
|
71
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
72
|
+
const pattern = getBoundaryPattern(ext);
|
|
73
|
+
const chunkSize = opts.chunkSize || 512;
|
|
74
|
+
const chunkOverlap = opts.chunkOverlap || 50;
|
|
75
|
+
const lines = content.split('\n');
|
|
76
|
+
|
|
77
|
+
// Try boundary-based splitting
|
|
78
|
+
if (pattern) {
|
|
79
|
+
const boundaries = [];
|
|
80
|
+
for (let i = 0; i < lines.length; i++) {
|
|
81
|
+
if (pattern.test(lines[i])) {
|
|
82
|
+
boundaries.push(i);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (boundaries.length > 1) {
|
|
87
|
+
const chunks = [];
|
|
88
|
+
for (let i = 0; i < boundaries.length; i++) {
|
|
89
|
+
const start = boundaries[i];
|
|
90
|
+
const end = i + 1 < boundaries.length ? boundaries[i + 1] : lines.length;
|
|
91
|
+
const text = lines.slice(start, end).join('\n').trim();
|
|
92
|
+
if (text.length >= 20) {
|
|
93
|
+
if (text.length > chunkSize * 2) {
|
|
94
|
+
const subChunks = chunk(text, { strategy: 'recursive', size: chunkSize, overlap: chunkOverlap });
|
|
95
|
+
let lineOffset = start;
|
|
96
|
+
for (const sc of subChunks) {
|
|
97
|
+
const scLines = sc.split('\n').length;
|
|
98
|
+
chunks.push({ text: sc, startLine: lineOffset + 1, endLine: lineOffset + scLines, type: 'boundary' });
|
|
99
|
+
lineOffset += scLines;
|
|
100
|
+
}
|
|
101
|
+
} else {
|
|
102
|
+
chunks.push({ text, startLine: start + 1, endLine: end, type: 'boundary' });
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (boundaries[0] > 0) {
|
|
107
|
+
const preamble = lines.slice(0, boundaries[0]).join('\n').trim();
|
|
108
|
+
if (preamble.length >= 20) {
|
|
109
|
+
chunks.unshift({ text: preamble, startLine: 1, endLine: boundaries[0], type: 'preamble' });
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (chunks.length > 0) return chunks;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Fallback: recursive chunking with line number tracking
|
|
117
|
+
const { chunk: chunkFn } = require('./chunker');
|
|
118
|
+
const textChunks = chunkFn(content, { strategy: 'recursive', size: chunkSize, overlap: chunkOverlap });
|
|
119
|
+
const result = [];
|
|
120
|
+
let searchFrom = 0;
|
|
121
|
+
for (const tc of textChunks) {
|
|
122
|
+
const firstLine = tc.split('\n')[0];
|
|
123
|
+
let startLine = searchFrom;
|
|
124
|
+
for (let i = searchFrom; i < lines.length; i++) {
|
|
125
|
+
if (lines[i].includes(firstLine.trim().slice(0, 40))) {
|
|
126
|
+
startLine = i;
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const chunkLines = tc.split('\n').length;
|
|
131
|
+
result.push({ text: tc, startLine: startLine + 1, endLine: startLine + chunkLines, type: 'character' });
|
|
132
|
+
searchFrom = startLine + 1;
|
|
133
|
+
}
|
|
134
|
+
return result;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Extract symbol names from code.
|
|
139
|
+
* @param {string} content
|
|
140
|
+
* @param {string} filePath
|
|
141
|
+
* @returns {string[]}
|
|
142
|
+
*/
|
|
143
|
+
function extractSymbols(content, filePath) {
|
|
144
|
+
const ext = path.extname(filePath).toLowerCase().slice(1);
|
|
145
|
+
const patterns = {
|
|
146
|
+
js: [/(?:function\s+|const\s+|let\s+|var\s+)(\w+)\s*(?:=\s*(?:async\s+)?(?:function|\(|=>)|\()/g, /class\s+(\w+)/g],
|
|
147
|
+
ts: [/(?:function\s+|const\s+|let\s+)(\w+)\s*(?:=\s*(?:async\s+)?(?:function|\(|=>)|[<(])/g, /(?:class|interface|type)\s+(\w+)/g],
|
|
148
|
+
py: [/(?:def|async def)\s+(\w+)\s*\(/g, /class\s+(\w+)/g],
|
|
149
|
+
go: [/func\s+(?:\([^)]+\)\s+)?(\w+)\s*\(/g, /type\s+(\w+)\s+struct/g],
|
|
150
|
+
rs: [/fn\s+(\w+)\s*[<(]/g, /(?:struct|enum|trait)\s+(\w+)/g],
|
|
151
|
+
java: [/(?:public|private|protected)?\s*(?:static)?\s*\w+\s+(\w+)\s*\(/g, /class\s+(\w+)/g],
|
|
152
|
+
rb: [/def\s+(\w+)/g, /class\s+(\w+)/g],
|
|
153
|
+
php: [/function\s+(\w+)/g, /class\s+(\w+)/g],
|
|
154
|
+
};
|
|
155
|
+
const langMap = { jsx: 'js', mjs: 'js', cjs: 'js', tsx: 'ts', mts: 'ts', kt: 'java', scala: 'java' };
|
|
156
|
+
const lang = langMap[ext] || ext;
|
|
157
|
+
const langPatterns = patterns[lang] || patterns.js;
|
|
158
|
+
const symbols = [];
|
|
159
|
+
for (const p of langPatterns) {
|
|
160
|
+
let m;
|
|
161
|
+
while ((m = p.exec(content)) !== null) {
|
|
162
|
+
if (m[1] && !symbols.includes(m[1])) symbols.push(m[1]);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return symbols.slice(0, 50);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Parse .gitignore patterns from a directory.
|
|
170
|
+
* @param {string} dirPath
|
|
171
|
+
* @returns {string[]}
|
|
172
|
+
*/
|
|
173
|
+
function loadGitignore(dirPath) {
|
|
174
|
+
const gitignorePath = path.join(dirPath, '.gitignore');
|
|
175
|
+
try {
|
|
176
|
+
const content = fs.readFileSync(gitignorePath, 'utf-8');
|
|
177
|
+
return content
|
|
178
|
+
.split('\n')
|
|
179
|
+
.map(l => l.trim())
|
|
180
|
+
.filter(l => l && !l.startsWith('#'));
|
|
181
|
+
} catch {
|
|
182
|
+
return [];
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Check if a path should be ignored.
|
|
188
|
+
* @param {string} filePath
|
|
189
|
+
* @param {string[]} patterns
|
|
190
|
+
* @returns {boolean}
|
|
191
|
+
*/
|
|
192
|
+
function shouldIgnore(filePath, patterns) {
|
|
193
|
+
const basename = path.basename(filePath);
|
|
194
|
+
for (const pattern of patterns) {
|
|
195
|
+
if (pattern.startsWith('*')) {
|
|
196
|
+
if (basename.endsWith(pattern.slice(1))) return true;
|
|
197
|
+
} else if (filePath.includes(pattern) || basename === pattern) {
|
|
198
|
+
return true;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Recursively find code files respecting .gitignore.
|
|
206
|
+
* @param {string} dirPath
|
|
207
|
+
* @param {object} opts
|
|
208
|
+
* @returns {Promise<string[]>}
|
|
209
|
+
*/
|
|
210
|
+
async function findCodeFiles(dirPath, opts = {}) {
|
|
211
|
+
const maxFiles = opts.maxFiles || 5000;
|
|
212
|
+
const maxFileSize = opts.maxFileSize || 100000;
|
|
213
|
+
const gitignorePatterns = loadGitignore(dirPath);
|
|
214
|
+
const allPatterns = [...DEFAULT_IGNORE, ...gitignorePatterns];
|
|
215
|
+
const files = [];
|
|
216
|
+
|
|
217
|
+
async function walk(dir) {
|
|
218
|
+
if (files.length >= maxFiles) return;
|
|
219
|
+
let entries;
|
|
220
|
+
try {
|
|
221
|
+
entries = await fs.promises.readdir(dir, { withFileTypes: true });
|
|
222
|
+
} catch { return; }
|
|
223
|
+
for (const entry of entries) {
|
|
224
|
+
if (files.length >= maxFiles) break;
|
|
225
|
+
const fullPath = path.join(dir, entry.name);
|
|
226
|
+
if (shouldIgnore(fullPath, allPatterns)) continue;
|
|
227
|
+
if (entry.isDirectory()) {
|
|
228
|
+
await walk(fullPath);
|
|
229
|
+
} else if (entry.isFile()) {
|
|
230
|
+
const ext = path.extname(entry.name).toLowerCase();
|
|
231
|
+
if (!CODE_EXTENSIONS.includes(ext)) continue;
|
|
232
|
+
try {
|
|
233
|
+
const stats = await fs.promises.stat(fullPath);
|
|
234
|
+
if (stats.size > maxFileSize || stats.size === 0) continue;
|
|
235
|
+
} catch { continue; }
|
|
236
|
+
files.push(fullPath);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
await walk(dirPath);
|
|
242
|
+
return files;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Derive a collection name from a directory path.
|
|
247
|
+
* @param {string} dirPath
|
|
248
|
+
* @returns {string}
|
|
249
|
+
*/
|
|
250
|
+
function deriveCollectionName(dirPath) {
|
|
251
|
+
try {
|
|
252
|
+
const pkg = JSON.parse(fs.readFileSync(path.join(dirPath, 'package.json'), 'utf-8'));
|
|
253
|
+
if (pkg.name) return pkg.name.replace(/[^a-zA-Z0-9_-]/g, '_') + '_code';
|
|
254
|
+
} catch { /* ignore */ }
|
|
255
|
+
return path.basename(path.resolve(dirPath)).replace(/[^a-zA-Z0-9_-]/g, '_') + '_code';
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Resolve db/collection from options, .vai.json codeSearch config, or defaults.
|
|
260
|
+
* @param {object} opts
|
|
261
|
+
* @param {string} [workspacePath]
|
|
262
|
+
* @returns {{db: string, collection: string, model: string, projectConfig: object}}
|
|
263
|
+
*/
|
|
264
|
+
function resolveConfig(opts, workspacePath) {
|
|
265
|
+
const { config: proj } = loadProject(workspacePath);
|
|
266
|
+
const cs = proj.codeSearch || {};
|
|
267
|
+
const db = opts.db || cs.db || proj.db || DEFAULT_DB;
|
|
268
|
+
const collection = opts.collection || cs.collection || deriveCollectionName(workspacePath || process.cwd());
|
|
269
|
+
const model = opts.model || cs.model || DEFAULT_CODE_MODEL;
|
|
270
|
+
return { db, collection, model, projectConfig: proj };
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Auto-select the best embedding model based on file types.
|
|
275
|
+
* @param {string[]} files - Array of file paths
|
|
276
|
+
* @param {object} [projectConfig] - Project config from .vai.json
|
|
277
|
+
* @returns {string}
|
|
278
|
+
*/
|
|
279
|
+
function selectCodeModel(files, projectConfig) {
|
|
280
|
+
// User override always wins
|
|
281
|
+
if (projectConfig?.codeSearch?.model) {
|
|
282
|
+
return projectConfig.codeSearch.model;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const total = files.length;
|
|
286
|
+
if (total === 0) return DEFAULT_CODE_MODEL;
|
|
287
|
+
|
|
288
|
+
const codeFiles = files.filter(f => CODE_EXTENSIONS.includes(path.extname(f).toLowerCase()));
|
|
289
|
+
const docFiles = files.filter(f => DOC_EXTENSIONS.includes(path.extname(f).toLowerCase()));
|
|
290
|
+
|
|
291
|
+
const codeRatio = codeFiles.length / total;
|
|
292
|
+
const docRatio = docFiles.length / total;
|
|
293
|
+
|
|
294
|
+
if (codeRatio >= 0.7) return 'voyage-code-3';
|
|
295
|
+
if (docRatio >= 0.7) return 'voyage-4-large';
|
|
296
|
+
return 'voyage-code-3';
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
module.exports = {
|
|
300
|
+
DEFAULT_CODE_MODEL,
|
|
301
|
+
DEFAULT_DB,
|
|
302
|
+
CODE_EXTENSIONS,
|
|
303
|
+
DOC_EXTENSIONS,
|
|
304
|
+
DEFAULT_IGNORE,
|
|
305
|
+
BOUNDARY_PATTERNS,
|
|
306
|
+
getBoundaryPattern,
|
|
307
|
+
smartChunkCode,
|
|
308
|
+
extractSymbols,
|
|
309
|
+
loadGitignore,
|
|
310
|
+
shouldIgnore,
|
|
311
|
+
findCodeFiles,
|
|
312
|
+
deriveCollectionName,
|
|
313
|
+
resolveConfig,
|
|
314
|
+
selectCodeModel,
|
|
315
|
+
};
|
package/src/lib/codegen.js
CHANGED
|
@@ -302,7 +302,7 @@ function renderTemplate(target, name, context) {
|
|
|
302
302
|
function buildContext(project, options = {}) {
|
|
303
303
|
const context = {
|
|
304
304
|
// Core config
|
|
305
|
-
model: options.model || project.model || 'voyage-
|
|
305
|
+
model: options.model || project.model || 'voyage-4-large',
|
|
306
306
|
db: options.db || project.db || 'myapp',
|
|
307
307
|
collection: options.collection || project.collection || 'documents',
|
|
308
308
|
field: options.field || project.field || 'embedding',
|
package/src/lib/explanations.js
CHANGED
|
@@ -549,7 +549,7 @@ const concepts = {
|
|
|
549
549
|
``,
|
|
550
550
|
`${pc.bold('In practice:')} You don't need to do anything special to use MoE — the API`,
|
|
551
551
|
`interface is identical. The architecture difference shows up in quality and cost:`,
|
|
552
|
-
` ${pc.dim('•')} voyage-4-large: $0.12/1M tokens
|
|
552
|
+
` ${pc.dim('•')} voyage-4-large: $0.12/1M tokens, best quality via MoE architecture`,
|
|
553
553
|
` ${pc.dim('•')} 40% cheaper than comparable dense models at the same quality tier`,
|
|
554
554
|
].join('\n'),
|
|
555
555
|
links: [
|
|
@@ -616,9 +616,9 @@ const concepts = {
|
|
|
616
616
|
``,
|
|
617
617
|
`${pc.bold('Current standings (Jan 2026):')}`,
|
|
618
618
|
` ${pc.cyan('voyage-4-large')} ${pc.bold('71.41')} ${pc.dim('— SOTA, MoE architecture')}`,
|
|
619
|
-
` ${pc.cyan('voyage-4')} ${pc.bold('70.07')} ${pc.dim('—
|
|
619
|
+
` ${pc.cyan('voyage-4')} ${pc.bold('70.07')} ${pc.dim('— balanced quality/cost')}`,
|
|
620
620
|
` ${pc.cyan('Gemini Embedding 001')} ${pc.bold('68.66')} ${pc.dim('— Google')}`,
|
|
621
|
-
` ${pc.cyan('voyage-4-lite')} ${pc.bold('68.10')} ${pc.dim('—
|
|
621
|
+
` ${pc.cyan('voyage-4-lite')} ${pc.bold('68.10')} ${pc.dim('— best budget option')}`,
|
|
622
622
|
` ${pc.cyan('Cohere Embed v4')} ${pc.bold('65.75')} ${pc.dim('— Cohere')}`,
|
|
623
623
|
` ${pc.cyan('OpenAI v3 Large')} ${pc.bold('62.57')} ${pc.dim('— OpenAI')}`,
|
|
624
624
|
``,
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* GitHub API fetcher for remote repository indexing.
|
|
5
|
+
* Uses native fetch (Node 18+) — no axios.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Get GitHub auth token from env or vai config.
|
|
10
|
+
* @returns {string|null}
|
|
11
|
+
*/
|
|
12
|
+
function getAuthToken() {
|
|
13
|
+
if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN;
|
|
14
|
+
try {
|
|
15
|
+
const { getConfigValue } = require('./config');
|
|
16
|
+
return getConfigValue('github.token') || null;
|
|
17
|
+
} catch {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Check if a source string is a GitHub URL or shorthand.
|
|
24
|
+
* @param {string} source
|
|
25
|
+
* @returns {boolean}
|
|
26
|
+
*/
|
|
27
|
+
function isGitHubUrl(source) {
|
|
28
|
+
if (!source || typeof source !== 'string') return false;
|
|
29
|
+
if (source.includes('github.com')) return true;
|
|
30
|
+
// owner/repo shorthand (must have exactly one slash, no spaces, no path separators at start)
|
|
31
|
+
if (/^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_.-]+$/.test(source)) return true;
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Parse a GitHub URL into owner and repo.
|
|
37
|
+
* Supports: https://github.com/owner/repo, github.com/owner/repo, owner/repo
|
|
38
|
+
* @param {string} source
|
|
39
|
+
* @returns {{ owner: string, repo: string }}
|
|
40
|
+
*/
|
|
41
|
+
function parseGitHubUrl(source) {
|
|
42
|
+
if (!source) throw new Error('Empty GitHub source');
|
|
43
|
+
|
|
44
|
+
// Strip trailing .git
|
|
45
|
+
source = source.replace(/\.git$/, '');
|
|
46
|
+
|
|
47
|
+
// Full URL
|
|
48
|
+
const urlMatch = source.match(/github\.com[/:]([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)/);
|
|
49
|
+
if (urlMatch) {
|
|
50
|
+
return { owner: urlMatch[1], repo: urlMatch[2] };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// owner/repo shorthand
|
|
54
|
+
const shortMatch = source.match(/^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/);
|
|
55
|
+
if (shortMatch) {
|
|
56
|
+
return { owner: shortMatch[1], repo: shortMatch[2] };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
throw new Error(`Cannot parse GitHub URL: ${source}`);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Make a GitHub API request with optional auth and backoff.
|
|
64
|
+
* @param {string} url
|
|
65
|
+
* @param {string|null} token
|
|
66
|
+
* @param {number} [retries=3]
|
|
67
|
+
* @returns {Promise<object>}
|
|
68
|
+
*/
|
|
69
|
+
async function githubFetch(url, token, retries = 3) {
|
|
70
|
+
const headers = { 'Accept': 'application/vnd.github.v3+json' };
|
|
71
|
+
if (token) headers['Authorization'] = `Bearer ${token}`;
|
|
72
|
+
|
|
73
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
74
|
+
const res = await fetch(url, { headers });
|
|
75
|
+
|
|
76
|
+
if (res.status === 403) {
|
|
77
|
+
const remaining = res.headers.get('x-ratelimit-remaining');
|
|
78
|
+
const resetAt = res.headers.get('x-ratelimit-reset');
|
|
79
|
+
if (remaining === '0' && resetAt) {
|
|
80
|
+
const waitMs = Math.max(0, (parseInt(resetAt) * 1000) - Date.now()) + 1000;
|
|
81
|
+
if (attempt < retries && waitMs < 120000) {
|
|
82
|
+
await new Promise(r => setTimeout(r, waitMs));
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
throw new Error(`GitHub rate limit exceeded. Resets at ${new Date(parseInt(resetAt) * 1000).toISOString()}`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (res.status === 404) {
|
|
90
|
+
throw new Error(`GitHub resource not found: ${url}. Is the repo public or do you have a valid GITHUB_TOKEN?`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (!res.ok) {
|
|
94
|
+
if (attempt < retries) {
|
|
95
|
+
await new Promise(r => setTimeout(r, Math.pow(2, attempt) * 1000));
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
throw new Error(`GitHub API error ${res.status}: ${await res.text()}`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return res.json();
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Fetch the recursive file tree for a repo.
|
|
107
|
+
* @param {string} owner
|
|
108
|
+
* @param {string} repo
|
|
109
|
+
* @param {string} branch
|
|
110
|
+
* @param {string|null} token
|
|
111
|
+
* @returns {Promise<Array<{path: string, size: number, sha: string}>>}
|
|
112
|
+
*/
|
|
113
|
+
async function fetchRepoTree(owner, repo, branch, token) {
|
|
114
|
+
const data = await githubFetch(
|
|
115
|
+
`https://api.github.com/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`,
|
|
116
|
+
token
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
if (!data.tree) throw new Error('No tree data returned from GitHub');
|
|
120
|
+
|
|
121
|
+
return data.tree
|
|
122
|
+
.filter(entry => entry.type === 'blob')
|
|
123
|
+
.map(entry => ({ path: entry.path, size: entry.size || 0, sha: entry.sha }));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Fetch file contents from a GitHub repo.
|
|
128
|
+
* @param {string} owner
|
|
129
|
+
* @param {string} repo
|
|
130
|
+
* @param {string} filePath
|
|
131
|
+
* @param {string} branch
|
|
132
|
+
* @param {string|null} token
|
|
133
|
+
* @returns {Promise<string>}
|
|
134
|
+
*/
|
|
135
|
+
async function fetchFileContents(owner, repo, filePath, branch, token) {
|
|
136
|
+
const data = await githubFetch(
|
|
137
|
+
`https://api.github.com/repos/${owner}/${repo}/contents/${filePath}?ref=${branch}`,
|
|
138
|
+
token
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
if (data.encoding === 'base64' && data.content) {
|
|
142
|
+
return Buffer.from(data.content, 'base64').toString('utf-8');
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
throw new Error(`Unexpected encoding for ${filePath}: ${data.encoding}`);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Fetch changed files between two commits.
|
|
150
|
+
* @param {string} owner
|
|
151
|
+
* @param {string} repo
|
|
152
|
+
* @param {string} baseSha
|
|
153
|
+
* @param {string} headSha
|
|
154
|
+
* @param {string|null} token
|
|
155
|
+
* @returns {Promise<Array<{filename: string, status: string}>>}
|
|
156
|
+
*/
|
|
157
|
+
async function fetchChangedFiles(owner, repo, baseSha, headSha, token) {
|
|
158
|
+
const data = await githubFetch(
|
|
159
|
+
`https://api.github.com/repos/${owner}/${repo}/compare/${baseSha}...${headSha}`,
|
|
160
|
+
token
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
return (data.files || []).map(f => ({ filename: f.filename, status: f.status }));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Fetch multiple files concurrently with a pool limit.
|
|
168
|
+
* @param {string} owner
|
|
169
|
+
* @param {string} repo
|
|
170
|
+
* @param {string[]} filePaths
|
|
171
|
+
* @param {string} branch
|
|
172
|
+
* @param {string|null} token
|
|
173
|
+
* @param {number} [concurrency=5]
|
|
174
|
+
* @returns {Promise<Array<{path: string, content: string}|{path: string, error: string}>>}
|
|
175
|
+
*/
|
|
176
|
+
async function fetchFilesBatch(owner, repo, filePaths, branch, token, concurrency = 5) {
|
|
177
|
+
const results = [];
|
|
178
|
+
let i = 0;
|
|
179
|
+
|
|
180
|
+
async function worker() {
|
|
181
|
+
while (i < filePaths.length) {
|
|
182
|
+
const idx = i++;
|
|
183
|
+
const fp = filePaths[idx];
|
|
184
|
+
try {
|
|
185
|
+
const content = await fetchFileContents(owner, repo, fp, branch, token);
|
|
186
|
+
results[idx] = { path: fp, content };
|
|
187
|
+
} catch (err) {
|
|
188
|
+
results[idx] = { path: fp, error: err.message };
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const workers = [];
|
|
194
|
+
for (let w = 0; w < Math.min(concurrency, filePaths.length); w++) {
|
|
195
|
+
workers.push(worker());
|
|
196
|
+
}
|
|
197
|
+
await Promise.all(workers);
|
|
198
|
+
return results;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Resolve a branch name to its HEAD commit SHA.
|
|
203
|
+
* @param {string} owner
|
|
204
|
+
* @param {string} repo
|
|
205
|
+
* @param {string} branch
|
|
206
|
+
* @param {string|null} token
|
|
207
|
+
* @returns {Promise<string>} commit SHA
|
|
208
|
+
*/
|
|
209
|
+
async function resolveCommitSha(owner, repo, branch, token) {
|
|
210
|
+
const data = await githubFetch(
|
|
211
|
+
`https://api.github.com/repos/${owner}/${repo}/commits/${branch}`,
|
|
212
|
+
token
|
|
213
|
+
);
|
|
214
|
+
return data.sha;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
module.exports = {
|
|
218
|
+
getAuthToken,
|
|
219
|
+
isGitHubUrl,
|
|
220
|
+
parseGitHubUrl,
|
|
221
|
+
fetchRepoTree,
|
|
222
|
+
fetchFileContents,
|
|
223
|
+
fetchChangedFiles,
|
|
224
|
+
fetchFilesBatch,
|
|
225
|
+
resolveCommitSha,
|
|
226
|
+
};
|