triage-ai 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +633 -0
- package/dist/cli.js.map +1 -0
- package/dist/mcp-server.d.ts +24 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +411 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/memory.d.ts +40 -0
- package/dist/memory.d.ts.map +1 -0
- package/dist/memory.js +241 -0
- package/dist/memory.js.map +1 -0
- package/dist/merge.d.ts +32 -0
- package/dist/merge.d.ts.map +1 -0
- package/dist/merge.js +251 -0
- package/dist/merge.js.map +1 -0
- package/dist/models/base.d.ts +72 -0
- package/dist/models/base.d.ts.map +1 -0
- package/dist/models/base.js +342 -0
- package/dist/models/base.js.map +1 -0
- package/dist/models/claude.d.ts +23 -0
- package/dist/models/claude.d.ts.map +1 -0
- package/dist/models/claude.js +30 -0
- package/dist/models/claude.js.map +1 -0
- package/dist/models/codex.d.ts +25 -0
- package/dist/models/codex.d.ts.map +1 -0
- package/dist/models/codex.js +34 -0
- package/dist/models/codex.js.map +1 -0
- package/dist/models/gemini.d.ts +23 -0
- package/dist/models/gemini.d.ts.map +1 -0
- package/dist/models/gemini.js +32 -0
- package/dist/models/gemini.js.map +1 -0
- package/dist/patch.d.ts +40 -0
- package/dist/patch.d.ts.map +1 -0
- package/dist/patch.js +183 -0
- package/dist/patch.js.map +1 -0
- package/dist/progress.d.ts +71 -0
- package/dist/progress.d.ts.map +1 -0
- package/dist/progress.js +268 -0
- package/dist/progress.js.map +1 -0
- package/dist/report.d.ts +19 -0
- package/dist/report.d.ts.map +1 -0
- package/dist/report.js +245 -0
- package/dist/report.js.map +1 -0
- package/dist/scanner.d.ts +64 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +645 -0
- package/dist/scanner.js.map +1 -0
- package/dist/setup.d.ts +52 -0
- package/dist/setup.d.ts.map +1 -0
- package/dist/setup.js +252 -0
- package/dist/setup.js.map +1 -0
- package/dist/types.d.ts +153 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +203 -0
- package/dist/types.js.map +1 -0
- package/examples/claude-code-skill.md +22 -0
- package/examples/mcp-config.json +9 -0
- package/package.json +77 -0
package/dist/scanner.js
ADDED
|
@@ -0,0 +1,645 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repository scanning and context gathering.
|
|
3
|
+
*
|
|
4
|
+
* Handles:
|
|
5
|
+
* - Git diff detection
|
|
6
|
+
* - File discovery based on prompt
|
|
7
|
+
* - Secret redaction
|
|
8
|
+
*
|
|
9
|
+
* TypeScript port of triage_cli/repo_scan.py — faithful port, same logic,
|
|
10
|
+
* same constants, same behavior.
|
|
11
|
+
*/
|
|
12
|
+
import { existsSync, statSync, readFileSync, readdirSync } from 'node:fs';
|
|
13
|
+
import { join, sep, extname, basename, relative, resolve } from 'node:path';
|
|
14
|
+
import { spawnSync } from 'node:child_process';
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Constants
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/** Patterns for secret detection: [pattern, replacement] */
|
|
19
|
+
const SECRET_PATTERNS = [
|
|
20
|
+
// API keys and tokens
|
|
21
|
+
[/(api[_-]?key|apikey)\s*[:=]\s*["']?[\w-]{20,}["']?/gi, '[REDACTED_API_KEY]'],
|
|
22
|
+
[/(secret[_-]?key|secretkey)\s*[:=]\s*["']?[\w-]{20,}["']?/gi, '[REDACTED_SECRET]'],
|
|
23
|
+
[/(auth[_-]?token|authtoken)\s*[:=]\s*["']?[\w-]{20,}["']?/gi, '[REDACTED_TOKEN]'],
|
|
24
|
+
[/(access[_-]?token)\s*[:=]\s*["']?[\w-]{20,}["']?/gi, '[REDACTED_TOKEN]'],
|
|
25
|
+
[/bearer\s+[\w-]{20,}/gi, '[REDACTED_BEARER]'],
|
|
26
|
+
// Private keys (matches across newlines)
|
|
27
|
+
[
|
|
28
|
+
/-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g,
|
|
29
|
+
'[REDACTED_PRIVATE_KEY]',
|
|
30
|
+
],
|
|
31
|
+
// Passwords
|
|
32
|
+
[/(password|passwd|pwd)\s*[:=]\s*["']?[^\s"']{8,}["']?/gi, '[REDACTED_PASSWORD]'],
|
|
33
|
+
// Database URLs
|
|
34
|
+
[/(mysql|postgres|mongodb|redis):\/\/[^\s"']+/gi, '[REDACTED_DB_URL]'],
|
|
35
|
+
// AWS
|
|
36
|
+
[/AKIA[0-9A-Z]{16}/g, '[REDACTED_AWS_KEY]'],
|
|
37
|
+
[/(aws[_-]?secret[_-]?access[_-]?key)\s*[:=]\s*["']?[\w/+=]{40}["']?/gi, '[REDACTED_AWS_SECRET]'],
|
|
38
|
+
// Generic long hex/base64 strings that look like secrets
|
|
39
|
+
[/(key|secret|token|password)\s*[:=]\s*["']?[a-f0-9]{32,}["']?/gi, '[REDACTED_HEX_SECRET]'],
|
|
40
|
+
];
|
|
41
|
+
/** Files to always skip. */
|
|
42
|
+
const SKIP_FILES = new Set([
|
|
43
|
+
'.env', '.env.local', '.env.production', '.env.development',
|
|
44
|
+
'credentials.json', 'secrets.json', 'config.secret.json',
|
|
45
|
+
'.npmrc', '.pypirc', '.netrc', '.git-credentials',
|
|
46
|
+
'id_rsa', 'id_ed25519', 'id_ecdsa', 'id_dsa',
|
|
47
|
+
]);
|
|
48
|
+
/** Extensions to skip. */
|
|
49
|
+
const SKIP_EXTENSIONS = new Set([
|
|
50
|
+
'.pem', '.key', '.p12', '.pfx', '.jks',
|
|
51
|
+
'.sqlite', '.db', '.sqlite3',
|
|
52
|
+
'.jpg', '.jpeg', '.png', '.gif', '.ico', '.svg', '.webp',
|
|
53
|
+
'.mp3', '.mp4', '.wav', '.avi', '.mov',
|
|
54
|
+
'.zip', '.tar', '.gz', '.rar', '.7z',
|
|
55
|
+
'.exe', '.dll', '.so', '.dylib',
|
|
56
|
+
'.pyc', '.pyo', '.class',
|
|
57
|
+
]);
|
|
58
|
+
/** Common entrypoint patterns. */
|
|
59
|
+
const ENTRYPOINT_PATTERNS = [
|
|
60
|
+
'main.py', 'app.py', 'index.py', '__main__.py',
|
|
61
|
+
'main.js', 'index.js', 'app.js', 'server.js',
|
|
62
|
+
'main.ts', 'index.ts', 'app.ts',
|
|
63
|
+
'main.go', 'cmd/main.go',
|
|
64
|
+
'Makefile', 'setup.py', 'pyproject.toml', 'package.json',
|
|
65
|
+
];
|
|
66
|
+
/** Stop words for keyword extraction. */
|
|
67
|
+
const STOP_WORDS = new Set([
|
|
68
|
+
'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
|
|
69
|
+
'and', 'or', 'but', 'is', 'are', 'was', 'were', 'be', 'been',
|
|
70
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
|
|
71
|
+
'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that',
|
|
72
|
+
'these', 'those', 'i', 'you', 'we', 'they', 'it', 'my', 'your',
|
|
73
|
+
'find', 'analyze', 'check', 'review', 'look', 'fix', 'update',
|
|
74
|
+
'code', 'file', 'function', 'class', 'method', 'issue', 'bug',
|
|
75
|
+
'error', 'problem', 'security', 'performance', 'test',
|
|
76
|
+
]);
|
|
77
|
+
/** Directories to skip during traversal. */
|
|
78
|
+
const SKIP_DIRS = new Set([
|
|
79
|
+
'.git', 'node_modules', '__pycache__', 'venv', '.venv',
|
|
80
|
+
'vendor', 'dist', 'build', '.cache', '.tox',
|
|
81
|
+
]);
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
// RepoScanner class
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
export class RepoScanner {
|
|
86
|
+
root;
|
|
87
|
+
constructor(root) {
|
|
88
|
+
this.root = root ?? process.cwd();
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Scan repository and return context for models.
|
|
92
|
+
*
|
|
93
|
+
* Returns ScanContext with:
|
|
94
|
+
* - is_git_repo, has_diff, git_diff, git_status, git_log
|
|
95
|
+
* - tree (directory structure)
|
|
96
|
+
* - files: list of { path, content, reason, description }
|
|
97
|
+
* - prompt: original prompt
|
|
98
|
+
* - root: resolved repo root
|
|
99
|
+
*/
|
|
100
|
+
scan(diffOnly = false, maxFiles = 30, prompt = '') {
|
|
101
|
+
const context = {
|
|
102
|
+
is_git_repo: this._is_git_repo(),
|
|
103
|
+
has_diff: false,
|
|
104
|
+
git_diff: '',
|
|
105
|
+
git_status: '',
|
|
106
|
+
git_log: '',
|
|
107
|
+
tree: '',
|
|
108
|
+
files: [],
|
|
109
|
+
prompt,
|
|
110
|
+
root: this.root,
|
|
111
|
+
};
|
|
112
|
+
// Get git info
|
|
113
|
+
if (context.is_git_repo) {
|
|
114
|
+
context.git_status = this._get_git_status();
|
|
115
|
+
context.git_log = this._get_git_log();
|
|
116
|
+
const diff = this._get_git_diff();
|
|
117
|
+
if (diff) {
|
|
118
|
+
context.has_diff = true;
|
|
119
|
+
context.git_diff = this._redact_secrets(diff);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// Get directory tree (always useful for orientation)
|
|
123
|
+
context.tree = this._get_directory_tree();
|
|
124
|
+
// If diff_only and we have a diff, that's all we need
|
|
125
|
+
if (diffOnly && context.has_diff) {
|
|
126
|
+
return context;
|
|
127
|
+
}
|
|
128
|
+
// Discover relevant files
|
|
129
|
+
context.files = this._discover_files(prompt, maxFiles);
|
|
130
|
+
return context;
|
|
131
|
+
}
|
|
132
|
+
/** Check if current directory is a git repository. */
|
|
133
|
+
_is_git_repo() {
|
|
134
|
+
try {
|
|
135
|
+
const result = spawnSync('git', ['rev-parse', '--git-dir'], {
|
|
136
|
+
cwd: this.root,
|
|
137
|
+
encoding: 'utf8',
|
|
138
|
+
timeout: 5000,
|
|
139
|
+
});
|
|
140
|
+
return result.status === 0;
|
|
141
|
+
}
|
|
142
|
+
catch {
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
/** Get git status output. */
|
|
147
|
+
_get_git_status() {
|
|
148
|
+
try {
|
|
149
|
+
const result = spawnSync('git', ['status', '--short'], {
|
|
150
|
+
cwd: this.root,
|
|
151
|
+
encoding: 'utf8',
|
|
152
|
+
timeout: 10000,
|
|
153
|
+
});
|
|
154
|
+
return result.status === 0 ? (result.stdout ?? '') : '';
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return '';
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
/** Get git diff for staged and unstaged changes. */
|
|
161
|
+
_get_git_diff() {
|
|
162
|
+
try {
|
|
163
|
+
const staged = spawnSync('git', ['diff', '--cached'], {
|
|
164
|
+
cwd: this.root,
|
|
165
|
+
encoding: 'utf8',
|
|
166
|
+
timeout: 30000,
|
|
167
|
+
});
|
|
168
|
+
const unstaged = spawnSync('git', ['diff'], {
|
|
169
|
+
cwd: this.root,
|
|
170
|
+
encoding: 'utf8',
|
|
171
|
+
timeout: 30000,
|
|
172
|
+
});
|
|
173
|
+
const parts = [];
|
|
174
|
+
if (staged.status === 0 && staged.stdout) {
|
|
175
|
+
parts.push('=== STAGED CHANGES ===\n' + staged.stdout);
|
|
176
|
+
}
|
|
177
|
+
if (unstaged.status === 0 && unstaged.stdout) {
|
|
178
|
+
parts.push('=== UNSTAGED CHANGES ===\n' + unstaged.stdout);
|
|
179
|
+
}
|
|
180
|
+
return parts.join('\n');
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
return '';
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
/** Get recent git commits for context. */
|
|
187
|
+
_get_git_log(limit = 10) {
|
|
188
|
+
try {
|
|
189
|
+
const result = spawnSync('git', ['log', `-${limit}`, '--oneline', '--no-decorate'], {
|
|
190
|
+
cwd: this.root,
|
|
191
|
+
encoding: 'utf8',
|
|
192
|
+
timeout: 10000,
|
|
193
|
+
});
|
|
194
|
+
return result.status === 0 ? (result.stdout ?? '') : '';
|
|
195
|
+
}
|
|
196
|
+
catch {
|
|
197
|
+
return '';
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/** Get directory structure for orientation. */
|
|
201
|
+
_get_directory_tree(maxDepth = 3, maxEntries = 100) {
|
|
202
|
+
// Try using tree command if available
|
|
203
|
+
try {
|
|
204
|
+
const result = spawnSync('tree', [
|
|
205
|
+
'-L', String(maxDepth),
|
|
206
|
+
'-I', 'node_modules|__pycache__|.git|venv|.venv|dist|build|*.pyc',
|
|
207
|
+
'--noreport',
|
|
208
|
+
'--dirsfirst',
|
|
209
|
+
], { cwd: this.root, encoding: 'utf8', timeout: 10000 });
|
|
210
|
+
if (result.status === 0 && result.stdout) {
|
|
211
|
+
const lines = result.stdout.trim().split('\n');
|
|
212
|
+
return lines.slice(0, maxEntries).join('\n');
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
// fall through to manual traversal
|
|
217
|
+
}
|
|
218
|
+
// Fallback: manual directory listing
|
|
219
|
+
try {
|
|
220
|
+
const lines = [];
|
|
221
|
+
const walk = (dir, depth) => {
|
|
222
|
+
if (depth >= maxDepth || lines.length >= maxEntries)
|
|
223
|
+
return;
|
|
224
|
+
let entries;
|
|
225
|
+
try {
|
|
226
|
+
entries = readdirSync(dir);
|
|
227
|
+
}
|
|
228
|
+
catch {
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
const dirs = [];
|
|
232
|
+
const files = [];
|
|
233
|
+
for (const entry of entries) {
|
|
234
|
+
if (entry.startsWith('.'))
|
|
235
|
+
continue;
|
|
236
|
+
const full = join(dir, entry);
|
|
237
|
+
let isDir = false;
|
|
238
|
+
try {
|
|
239
|
+
isDir = statSync(full).isDirectory();
|
|
240
|
+
}
|
|
241
|
+
catch {
|
|
242
|
+
continue;
|
|
243
|
+
}
|
|
244
|
+
if (isDir) {
|
|
245
|
+
if (!SKIP_DIRS.has(entry))
|
|
246
|
+
dirs.push(entry);
|
|
247
|
+
}
|
|
248
|
+
else {
|
|
249
|
+
if (!entry.endsWith('.pyc'))
|
|
250
|
+
files.push(entry);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
const indent = ' '.repeat(depth);
|
|
254
|
+
const folderName = depth === 0 ? '.' : basename(dir);
|
|
255
|
+
lines.push(`${indent}${folderName}/`);
|
|
256
|
+
for (const f of files.sort().slice(0, 20)) {
|
|
257
|
+
lines.push(`${indent} ${f}`);
|
|
258
|
+
}
|
|
259
|
+
for (const d of dirs) {
|
|
260
|
+
if (lines.length >= maxEntries)
|
|
261
|
+
break;
|
|
262
|
+
walk(join(dir, d), depth + 1);
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
walk(this.root, 0);
|
|
266
|
+
return lines.slice(0, maxEntries).join('\n');
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
return '';
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
/** Extract first docstring or comment as file description. */
|
|
273
|
+
_extract_file_description(content) {
|
|
274
|
+
if (!content)
|
|
275
|
+
return '';
|
|
276
|
+
const lines = content.split('\n').slice(0, 30);
|
|
277
|
+
// Look for module docstring (Python)
|
|
278
|
+
let inDocstring = false;
|
|
279
|
+
const docstringLines = [];
|
|
280
|
+
for (const line of lines) {
|
|
281
|
+
const stripped = line.trim();
|
|
282
|
+
if (!inDocstring) {
|
|
283
|
+
if (stripped.startsWith('"""') || stripped.startsWith("'''")) {
|
|
284
|
+
inDocstring = true;
|
|
285
|
+
// Single-line docstring
|
|
286
|
+
const tripleDouble = stripped.split('"""').length - 1;
|
|
287
|
+
const tripleSingle = stripped.split("'''").length - 1;
|
|
288
|
+
if (tripleDouble >= 2 || tripleSingle >= 2) {
|
|
289
|
+
return stripped.replace(/"""|'''/g, '').trim();
|
|
290
|
+
}
|
|
291
|
+
docstringLines.push(stripped.replace(/"""|'''/g, ''));
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
else {
|
|
295
|
+
if (stripped.includes('"""') || stripped.includes("'''")) {
|
|
296
|
+
docstringLines.push(stripped.replace(/"""|'''/g, ''));
|
|
297
|
+
return docstringLines.join(' ').slice(0, 200);
|
|
298
|
+
}
|
|
299
|
+
docstringLines.push(stripped);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
// Look for file header comment
|
|
303
|
+
const commentLines = [];
|
|
304
|
+
for (const line of lines.slice(0, 10)) {
|
|
305
|
+
const stripped = line.trim();
|
|
306
|
+
if (stripped.startsWith('#') ||
|
|
307
|
+
stripped.startsWith('//') ||
|
|
308
|
+
stripped.startsWith('/*')) {
|
|
309
|
+
commentLines.push(stripped.replace(/^[#/*]+/, '').trim());
|
|
310
|
+
}
|
|
311
|
+
else if (commentLines.length > 0) {
|
|
312
|
+
break;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (commentLines.length > 0) {
|
|
316
|
+
return commentLines.join(' ').slice(0, 200);
|
|
317
|
+
}
|
|
318
|
+
return '';
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Discover relevant files based on prompt and repository structure.
|
|
322
|
+
*
|
|
323
|
+
* Returns list of FileContext objects.
|
|
324
|
+
* Order: explicit paths → explicit dirs → git diff files → keyword search → entrypoints.
|
|
325
|
+
*/
|
|
326
|
+
_discover_files(prompt, maxFiles) {
|
|
327
|
+
const files = [];
|
|
328
|
+
const seen = new Set();
|
|
329
|
+
const keywords = this._extract_keywords(prompt);
|
|
330
|
+
// 0. Find absolute file paths explicitly mentioned in prompt
|
|
331
|
+
const explicitPathRe = /\/[\w/.-]+\.(?:py|js|ts|html|css|json|yaml|yml|md|txt|sh|sql|php)/g;
|
|
332
|
+
const explicitPaths = [...prompt.matchAll(explicitPathRe)].map((m) => m[0]);
|
|
333
|
+
for (const pathStr of explicitPaths) {
|
|
334
|
+
if (seen.has(pathStr))
|
|
335
|
+
continue;
|
|
336
|
+
if (!existsSync(pathStr) || !statSync(pathStr).isFile())
|
|
337
|
+
continue;
|
|
338
|
+
const content = this._read_file(pathStr);
|
|
339
|
+
if (content) {
|
|
340
|
+
files.push({
|
|
341
|
+
path: pathStr,
|
|
342
|
+
content,
|
|
343
|
+
reason: 'explicitly mentioned in prompt',
|
|
344
|
+
description: this._extract_file_description(content),
|
|
345
|
+
});
|
|
346
|
+
seen.add(pathStr);
|
|
347
|
+
if (files.length >= maxFiles)
|
|
348
|
+
return files;
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
// Also check for directory paths — scan for relevant files
|
|
352
|
+
const explicitDirRe = /\/[\w/.-]+\//g;
|
|
353
|
+
const explicitDirs = [...prompt.matchAll(explicitDirRe)].map((m) => m[0]);
|
|
354
|
+
for (const dirStr of explicitDirs) {
|
|
355
|
+
if (files.length >= maxFiles)
|
|
356
|
+
break;
|
|
357
|
+
const dirPath = dirStr.replace(/\/$/, '');
|
|
358
|
+
if (!existsSync(dirPath) || !statSync(dirPath).isDirectory())
|
|
359
|
+
continue;
|
|
360
|
+
for (const ext of ['*.py', '*.js', '*.ts', '*.html', '*.php']) {
|
|
361
|
+
const pattern = ext.replace('*', '');
|
|
362
|
+
const found = this._globDir(dirPath, pattern);
|
|
363
|
+
for (const matchPath of found) {
|
|
364
|
+
if (files.length >= maxFiles)
|
|
365
|
+
break;
|
|
366
|
+
if (seen.has(matchPath))
|
|
367
|
+
continue;
|
|
368
|
+
if (!this._should_include_file(matchPath))
|
|
369
|
+
continue;
|
|
370
|
+
const content = this._read_file(matchPath);
|
|
371
|
+
if (content) {
|
|
372
|
+
files.push({
|
|
373
|
+
path: matchPath,
|
|
374
|
+
content,
|
|
375
|
+
reason: `in explicitly mentioned directory ${dirStr}`,
|
|
376
|
+
description: this._extract_file_description(content),
|
|
377
|
+
});
|
|
378
|
+
seen.add(matchPath);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
// 1. Find files mentioned in git diff
|
|
384
|
+
if (this._is_git_repo()) {
|
|
385
|
+
const diffFiles = this._get_diff_file_list();
|
|
386
|
+
for (const filePath of diffFiles.slice(0, Math.floor(maxFiles / 3))) {
|
|
387
|
+
if (seen.has(filePath))
|
|
388
|
+
continue;
|
|
389
|
+
if (!this._should_include_file(filePath))
|
|
390
|
+
continue;
|
|
391
|
+
const content = this._read_file(filePath);
|
|
392
|
+
if (content) {
|
|
393
|
+
files.push({
|
|
394
|
+
path: filePath,
|
|
395
|
+
content,
|
|
396
|
+
reason: 'changed in git diff',
|
|
397
|
+
description: this._extract_file_description(content),
|
|
398
|
+
});
|
|
399
|
+
seen.add(filePath);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
// 2. Find files matching keywords
|
|
404
|
+
if (keywords.length > 0) {
|
|
405
|
+
const keywordFiles = this._find_files_by_keywords(keywords);
|
|
406
|
+
for (const [filePath, reason] of keywordFiles) {
|
|
407
|
+
if (files.length >= maxFiles)
|
|
408
|
+
break;
|
|
409
|
+
if (seen.has(filePath))
|
|
410
|
+
continue;
|
|
411
|
+
if (!this._should_include_file(filePath))
|
|
412
|
+
continue;
|
|
413
|
+
const content = this._read_file(filePath);
|
|
414
|
+
if (content) {
|
|
415
|
+
files.push({
|
|
416
|
+
path: filePath,
|
|
417
|
+
content,
|
|
418
|
+
reason,
|
|
419
|
+
description: this._extract_file_description(content),
|
|
420
|
+
});
|
|
421
|
+
seen.add(filePath);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
// 3. Find entrypoints
|
|
426
|
+
for (const pattern of ENTRYPOINT_PATTERNS) {
|
|
427
|
+
if (files.length >= maxFiles)
|
|
428
|
+
break;
|
|
429
|
+
const matches = this._globDir(this.root, pattern, true);
|
|
430
|
+
for (const matchPath of matches.slice(0, 2)) {
|
|
431
|
+
if (files.length >= maxFiles)
|
|
432
|
+
break;
|
|
433
|
+
if (seen.has(matchPath))
|
|
434
|
+
continue;
|
|
435
|
+
if (!this._should_include_file(matchPath))
|
|
436
|
+
continue;
|
|
437
|
+
const content = this._read_file(matchPath);
|
|
438
|
+
if (content) {
|
|
439
|
+
// Use relative path for entrypoints (matching Python's path.relative_to(self.root))
|
|
440
|
+
let displayPath;
|
|
441
|
+
try {
|
|
442
|
+
displayPath = relative(this.root, matchPath);
|
|
443
|
+
}
|
|
444
|
+
catch {
|
|
445
|
+
displayPath = matchPath;
|
|
446
|
+
}
|
|
447
|
+
files.push({
|
|
448
|
+
path: displayPath,
|
|
449
|
+
content,
|
|
450
|
+
reason: `entrypoint (${pattern})`,
|
|
451
|
+
description: this._extract_file_description(content),
|
|
452
|
+
});
|
|
453
|
+
seen.add(matchPath);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
return files.slice(0, maxFiles);
|
|
458
|
+
}
|
|
459
|
+
/** Extract likely file/function/class names from prompt. */
|
|
460
|
+
_extract_keywords(prompt) {
|
|
461
|
+
const words = prompt.toLowerCase().split(/[^a-zA-Z0-9_]+/);
|
|
462
|
+
const keywords = [];
|
|
463
|
+
for (const word of words) {
|
|
464
|
+
if (word.length >= 3 && !STOP_WORDS.has(word)) {
|
|
465
|
+
keywords.push(word);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
return keywords;
|
|
469
|
+
}
|
|
470
|
+
/** Find files that match keywords in name or content. */
|
|
471
|
+
_find_files_by_keywords(keywords) {
|
|
472
|
+
const results = [];
|
|
473
|
+
// Search by filename
|
|
474
|
+
for (const keyword of keywords) {
|
|
475
|
+
try {
|
|
476
|
+
const result = spawnSync('find', [
|
|
477
|
+
'.', '-type', 'f', '-iname', `*${keyword}*`,
|
|
478
|
+
'-not', '-path', '*/.*',
|
|
479
|
+
'-not', '-path', '*/node_modules/*',
|
|
480
|
+
'-not', '-path', '*/__pycache__/*',
|
|
481
|
+
'-not', '-path', '*/venv/*',
|
|
482
|
+
], { cwd: this.root, encoding: 'utf8', timeout: 10000 });
|
|
483
|
+
if (result.status === 0 && result.stdout) {
|
|
484
|
+
for (const line of result.stdout.trim().split('\n')) {
|
|
485
|
+
if (line) {
|
|
486
|
+
const resolved = resolve(this.root, line.replace(/^\.\//, ''));
|
|
487
|
+
results.push([resolved, `filename matches "${keyword}"`]);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
// ignore
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
// Search by content (using grep) — limit to first 5 keywords
|
|
497
|
+
for (const keyword of keywords.slice(0, 5)) {
|
|
498
|
+
try {
|
|
499
|
+
const result = spawnSync('grep', [
|
|
500
|
+
'-r', '-l', '-i', keyword,
|
|
501
|
+
'--include=*.py', '--include=*.js', '--include=*.ts',
|
|
502
|
+
'--include=*.go', '--include=*.java', '--include=*.php',
|
|
503
|
+
'--exclude-dir=.*', '--exclude-dir=node_modules',
|
|
504
|
+
'--exclude-dir=__pycache__', '--exclude-dir=venv',
|
|
505
|
+
], { cwd: this.root, encoding: 'utf8', timeout: 30000 });
|
|
506
|
+
if (result.status === 0 && result.stdout) {
|
|
507
|
+
const lines = result.stdout.trim().split('\n').slice(0, 10);
|
|
508
|
+
for (const line of lines) {
|
|
509
|
+
if (line) {
|
|
510
|
+
const resolved = resolve(this.root, line);
|
|
511
|
+
results.push([resolved, `content matches "${keyword}"`]);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
catch {
|
|
517
|
+
// ignore
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
return results;
|
|
521
|
+
}
|
|
522
|
+
/** Get list of files changed in git diff. */
|
|
523
|
+
_get_diff_file_list() {
|
|
524
|
+
try {
|
|
525
|
+
const result = spawnSync('git', ['diff', '--name-only', 'HEAD'], {
|
|
526
|
+
cwd: this.root,
|
|
527
|
+
encoding: 'utf8',
|
|
528
|
+
timeout: 10000,
|
|
529
|
+
});
|
|
530
|
+
if (result.status === 0 && result.stdout) {
|
|
531
|
+
return result.stdout
|
|
532
|
+
.trim()
|
|
533
|
+
.split('\n')
|
|
534
|
+
.filter(Boolean)
|
|
535
|
+
.map((f) => resolve(this.root, f));
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
catch {
|
|
539
|
+
// ignore
|
|
540
|
+
}
|
|
541
|
+
return [];
|
|
542
|
+
}
|
|
543
|
+
/** Check if file should be included in context. */
|
|
544
|
+
_should_include_file(filePath) {
|
|
545
|
+
const name = basename(filePath);
|
|
546
|
+
const ext = extname(filePath).toLowerCase();
|
|
547
|
+
// Check filename
|
|
548
|
+
if (SKIP_FILES.has(name))
|
|
549
|
+
return false;
|
|
550
|
+
// Check extension
|
|
551
|
+
if (SKIP_EXTENSIONS.has(ext))
|
|
552
|
+
return false;
|
|
553
|
+
// Check path components
|
|
554
|
+
const parts = filePath.split(sep);
|
|
555
|
+
for (const part of parts) {
|
|
556
|
+
if (SKIP_DIRS.has(part))
|
|
557
|
+
return false;
|
|
558
|
+
}
|
|
559
|
+
return true;
|
|
560
|
+
}
|
|
561
|
+
/** Read file content with size limit and secret redaction. */
|
|
562
|
+
_read_file(filePath, maxSize = 100_000) {
|
|
563
|
+
try {
|
|
564
|
+
let resolved = filePath;
|
|
565
|
+
if (!filePath.startsWith('/')) {
|
|
566
|
+
resolved = resolve(this.root, filePath);
|
|
567
|
+
}
|
|
568
|
+
if (!existsSync(resolved))
|
|
569
|
+
return null;
|
|
570
|
+
const stat = statSync(resolved);
|
|
571
|
+
if (!stat.isFile())
|
|
572
|
+
return null;
|
|
573
|
+
if (stat.size > maxSize) {
|
|
574
|
+
return `[FILE TOO LARGE: ${stat.size} bytes]`;
|
|
575
|
+
}
|
|
576
|
+
const content = readFileSync(resolved, { encoding: 'utf8' });
|
|
577
|
+
return this._redact_secrets(content);
|
|
578
|
+
}
|
|
579
|
+
catch (e) {
|
|
580
|
+
return `[ERROR READING FILE: ${String(e)}]`;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
/** Redact potential secrets from content. */
|
|
584
|
+
_redact_secrets(content) {
|
|
585
|
+
let result = content;
|
|
586
|
+
for (const [pattern, replacement] of SECRET_PATTERNS) {
|
|
587
|
+
// Reset lastIndex for global regexes
|
|
588
|
+
pattern.lastIndex = 0;
|
|
589
|
+
result = result.replace(pattern, replacement);
|
|
590
|
+
}
|
|
591
|
+
return result;
|
|
592
|
+
}
|
|
593
|
+
// ---------------------------------------------------------------------------
|
|
594
|
+
// Private helpers
|
|
595
|
+
// ---------------------------------------------------------------------------
|
|
596
|
+
/**
|
|
597
|
+
* Recursively glob a directory for files matching a suffix (e.g. ".py").
|
|
598
|
+
* When recursive=false, only looks in top-level of the directory.
|
|
599
|
+
*/
|
|
600
|
+
_globDir(dir, suffix, recursive = true) {
|
|
601
|
+
// suffix can be like "*.py" or just ".py" or exact filename "package.json"
|
|
602
|
+
const isExact = !suffix.includes('*');
|
|
603
|
+
const normalizedSuffix = suffix.startsWith('*.')
|
|
604
|
+
? suffix.slice(1) // ".py"
|
|
605
|
+
: suffix;
|
|
606
|
+
const results = [];
|
|
607
|
+
const walk = (current) => {
|
|
608
|
+
let entries;
|
|
609
|
+
try {
|
|
610
|
+
entries = readdirSync(current);
|
|
611
|
+
}
|
|
612
|
+
catch {
|
|
613
|
+
return;
|
|
614
|
+
}
|
|
615
|
+
for (const entry of entries) {
|
|
616
|
+
const full = join(current, entry);
|
|
617
|
+
let isDir = false;
|
|
618
|
+
try {
|
|
619
|
+
isDir = statSync(full).isDirectory();
|
|
620
|
+
}
|
|
621
|
+
catch {
|
|
622
|
+
continue;
|
|
623
|
+
}
|
|
624
|
+
if (isDir) {
|
|
625
|
+
if (recursive && !SKIP_DIRS.has(entry) && !entry.startsWith('.')) {
|
|
626
|
+
walk(full);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
else {
|
|
630
|
+
if (isExact) {
|
|
631
|
+
if (entry === normalizedSuffix)
|
|
632
|
+
results.push(full);
|
|
633
|
+
}
|
|
634
|
+
else {
|
|
635
|
+
if (entry.endsWith(normalizedSuffix))
|
|
636
|
+
results.push(full);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
};
|
|
641
|
+
walk(dir);
|
|
642
|
+
return results;
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
//# sourceMappingURL=scanner.js.map
|