@stupidloud/codegraph 0.7.20 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +127 -106
- package/dist/bin/codegraph.d.ts +4 -0
- package/dist/bin/codegraph.d.ts.map +1 -1
- package/dist/bin/codegraph.js +327 -8
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/bin/node-version-check.d.ts +17 -0
- package/dist/bin/node-version-check.d.ts.map +1 -1
- package/dist/bin/node-version-check.js +37 -0
- package/dist/bin/node-version-check.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +1 -11
- package/dist/config.js.map +1 -1
- package/dist/db/index.d.ts +30 -1
- package/dist/db/index.d.ts.map +1 -1
- package/dist/db/index.js +75 -25
- package/dist/db/index.js.map +1 -1
- package/dist/db/queries.d.ts +16 -0
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +80 -27
- package/dist/db/queries.js.map +1 -1
- package/dist/db/sqlite-adapter.d.ts +17 -23
- package/dist/db/sqlite-adapter.d.ts.map +1 -1
- package/dist/db/sqlite-adapter.js +51 -174
- package/dist/db/sqlite-adapter.js.map +1 -1
- package/dist/extraction/grammars.d.ts +7 -1
- package/dist/extraction/grammars.d.ts.map +1 -1
- package/dist/extraction/grammars.js +42 -2
- package/dist/extraction/grammars.js.map +1 -1
- package/dist/extraction/index.d.ts +9 -14
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +131 -124
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/languages/index.d.ts.map +1 -1
- package/dist/extraction/languages/index.js +4 -0
- package/dist/extraction/languages/index.js.map +1 -1
- package/dist/extraction/languages/lua.d.ts +3 -0
- package/dist/extraction/languages/lua.d.ts.map +1 -0
- package/dist/extraction/languages/lua.js +150 -0
- package/dist/extraction/languages/lua.js.map +1 -0
- package/dist/extraction/languages/luau.d.ts +3 -0
- package/dist/extraction/languages/luau.d.ts.map +1 -0
- package/dist/extraction/languages/luau.js +37 -0
- package/dist/extraction/languages/luau.js.map +1 -0
- package/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.js +38 -0
- package/dist/extraction/tree-sitter.js.map +1 -1
- package/dist/extraction/wasm/tree-sitter-lua.wasm +0 -0
- package/dist/extraction/wasm/tree-sitter-luau.wasm +0 -0
- package/dist/extraction/wasm-runtime-flags.d.ts +38 -0
- package/dist/extraction/wasm-runtime-flags.d.ts.map +1 -0
- package/dist/extraction/wasm-runtime-flags.js +105 -0
- package/dist/extraction/wasm-runtime-flags.js.map +1 -0
- package/dist/graph/traversal.d.ts.map +1 -1
- package/dist/graph/traversal.js +71 -36
- package/dist/graph/traversal.js.map +1 -1
- package/dist/index.d.ts +11 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +28 -18
- package/dist/index.js.map +1 -1
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +3 -1
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +66 -2
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +195 -5
- package/dist/installer/index.js.map +1 -1
- package/dist/installer/instructions-template.d.ts +2 -2
- package/dist/installer/instructions-template.d.ts.map +1 -1
- package/dist/installer/instructions-template.js +4 -2
- package/dist/installer/instructions-template.js.map +1 -1
- package/dist/installer/targets/claude.d.ts +26 -6
- package/dist/installer/targets/claude.d.ts.map +1 -1
- package/dist/installer/targets/claude.js +165 -10
- package/dist/installer/targets/claude.js.map +1 -1
- package/dist/installer/targets/cursor.d.ts.map +1 -1
- package/dist/installer/targets/cursor.js +57 -3
- package/dist/installer/targets/cursor.js.map +1 -1
- package/dist/installer/targets/hermes.d.ts +18 -0
- package/dist/installer/targets/hermes.d.ts.map +1 -0
- package/dist/installer/targets/hermes.js +305 -0
- package/dist/installer/targets/hermes.js.map +1 -0
- package/dist/installer/targets/registry.d.ts.map +1 -1
- package/dist/installer/targets/registry.js +2 -0
- package/dist/installer/targets/registry.js.map +1 -1
- package/dist/installer/targets/types.d.ts +1 -1
- package/dist/installer/targets/types.d.ts.map +1 -1
- package/dist/mcp/index.d.ts +12 -0
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +213 -18
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.d.ts.map +1 -1
- package/dist/mcp/server-instructions.js +15 -0
- package/dist/mcp/server-instructions.js.map +1 -1
- package/dist/mcp/tools.d.ts +25 -1
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +221 -30
- package/dist/mcp/tools.js.map +1 -1
- package/dist/mcp/transport.d.ts +17 -0
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +63 -0
- package/dist/mcp/transport.js.map +1 -1
- package/dist/resolution/frameworks/drupal.d.ts +51 -0
- package/dist/resolution/frameworks/drupal.d.ts.map +1 -0
- package/dist/resolution/frameworks/drupal.js +335 -0
- package/dist/resolution/frameworks/drupal.js.map +1 -0
- package/dist/resolution/frameworks/index.d.ts +2 -0
- package/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/dist/resolution/frameworks/index.js +9 -1
- package/dist/resolution/frameworks/index.js.map +1 -1
- package/dist/resolution/frameworks/nestjs.d.ts +26 -0
- package/dist/resolution/frameworks/nestjs.d.ts.map +1 -0
- package/dist/resolution/frameworks/nestjs.js +374 -0
- package/dist/resolution/frameworks/nestjs.js.map +1 -0
- package/dist/resolution/index.d.ts.map +1 -1
- package/dist/resolution/index.js +40 -7
- package/dist/resolution/index.js.map +1 -1
- package/dist/resolution/lru-cache.d.ts +24 -0
- package/dist/resolution/lru-cache.d.ts.map +1 -0
- package/dist/resolution/lru-cache.js +62 -0
- package/dist/resolution/lru-cache.js.map +1 -0
- package/dist/sync/git-hooks.d.ts +45 -0
- package/dist/sync/git-hooks.d.ts.map +1 -0
- package/dist/sync/git-hooks.js +223 -0
- package/dist/sync/git-hooks.js.map +1 -0
- package/dist/sync/index.d.ts +4 -0
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +12 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watch-policy.d.ts +48 -0
- package/dist/sync/watch-policy.d.ts.map +1 -0
- package/dist/sync/watch-policy.js +124 -0
- package/dist/sync/watch-policy.js.map +1 -0
- package/dist/sync/watcher.d.ts +2 -4
- package/dist/sync/watcher.d.ts.map +1 -1
- package/dist/sync/watcher.js +14 -6
- package/dist/sync/watcher.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +11 -0
- package/dist/types.js.map +1 -1
- package/dist/utils.js +1 -1
- package/package.json +4 -4
- package/scripts/add-lang/bench.sh +60 -0
- package/scripts/add-lang/check-grammar.mjs +75 -0
- package/scripts/add-lang/dump-ast.mjs +103 -0
- package/scripts/add-lang/verify-extraction.mjs +70 -0
- package/scripts/agent-eval/audit.sh +68 -0
- package/scripts/agent-eval/itrun.sh +1 -1
- package/scripts/agent-eval/run-all.sh +67 -0
- package/scripts/build-bundle.sh +118 -0
- package/scripts/npm-shim.js +246 -0
- package/scripts/pack-npm.sh +95 -0
- package/scripts/patch-tree-sitter-dart.js +0 -112
- package/scripts/release.sh +0 -68
package/dist/extraction/index.js
CHANGED
|
@@ -41,9 +41,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
41
41
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
42
|
};
|
|
43
43
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
-
exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammars = exports.getSupportedLanguages = exports.isGrammarLoaded = exports.isLanguageSupported = exports.detectLanguage = exports.extractFromSource = exports.ExtractionOrchestrator = void 0;
|
|
44
|
+
exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammars = exports.getSupportedLanguages = exports.isGrammarLoaded = exports.isLanguageSupported = exports.isSourceFile = exports.detectLanguage = exports.extractFromSource = exports.ExtractionOrchestrator = void 0;
|
|
45
45
|
exports.hashContent = hashContent;
|
|
46
|
-
exports.shouldIncludeFile = shouldIncludeFile;
|
|
47
46
|
exports.scanDirectory = scanDirectory;
|
|
48
47
|
exports.scanDirectoryAsync = scanDirectoryAsync;
|
|
49
48
|
const fs = __importStar(require("fs"));
|
|
@@ -55,7 +54,7 @@ const tree_sitter_1 = require("./tree-sitter");
|
|
|
55
54
|
const grammars_1 = require("./grammars");
|
|
56
55
|
const errors_1 = require("../errors");
|
|
57
56
|
const utils_1 = require("../utils");
|
|
58
|
-
const
|
|
57
|
+
const ignore_1 = __importDefault(require("ignore"));
|
|
59
58
|
const frameworks_1 = require("../resolution/frameworks");
|
|
60
59
|
/**
|
|
61
60
|
* Number of files to read in parallel during indexing.
|
|
@@ -84,34 +83,64 @@ function hashContent(content) {
|
|
|
84
83
|
return crypto.createHash('sha256').update(content).digest('hex');
|
|
85
84
|
}
|
|
86
85
|
/**
|
|
87
|
-
*
|
|
86
|
+
* Skip files larger than this (bytes). Generated bundles, minified JS, and
|
|
87
|
+
* vendored blobs blow the WASM heap and the worker-recycle budget for no useful
|
|
88
|
+
* symbols. 1 MB covers essentially all hand-written source.
|
|
88
89
|
*/
|
|
89
|
-
|
|
90
|
-
filePath = (0, utils_1.normalizePath)(filePath);
|
|
91
|
-
return picomatch_1.default.isMatch(filePath, pattern, { dot: true });
|
|
92
|
-
}
|
|
90
|
+
const MAX_FILE_SIZE = 1024 * 1024;
|
|
93
91
|
/**
|
|
94
|
-
*
|
|
92
|
+
* Collect git-visible files (tracked + untracked, .gitignore-respected) from the
|
|
93
|
+
* git repository rooted at `repoDir`, adding each to `files` with `prefix`
|
|
94
|
+
* prepended so paths stay relative to the original scan root.
|
|
95
|
+
*
|
|
96
|
+
* Recurses into embedded git repositories — nested repos that are NOT submodules
|
|
97
|
+
* (independent clones living inside the workspace, common in CMake "super-repo"
|
|
98
|
+
* layouts). The parent repo's `git ls-files` cannot see into them: tracked output
|
|
99
|
+
* skips them entirely, and untracked output reports them only as an opaque
|
|
100
|
+
* "subdir/" entry (trailing slash) rather than expanding their files. Each
|
|
101
|
+
* embedded repo is its own git boundary, so we re-run `git ls-files` inside it.
|
|
102
|
+
* (See issue #193.)
|
|
95
103
|
*/
|
|
96
|
-
function
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
104
|
+
function collectGitFiles(repoDir, prefix, files) {
|
|
105
|
+
const gitOpts = { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] };
|
|
106
|
+
// Tracked files. --recurse-submodules pulls in files from active submodules,
|
|
107
|
+
// which the index would otherwise represent only as a commit pointer.
|
|
108
|
+
// Without this, monorepos using submodules index 0 files. (See issue #147.)
|
|
109
|
+
// Note: --recurse-submodules only supports -c/--cached and --stage modes — it
|
|
110
|
+
// can't be combined with -o, so untracked files are gathered separately below.
|
|
111
|
+
const tracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
|
|
112
|
+
for (const line of tracked.split('\n')) {
|
|
113
|
+
const trimmed = line.trim();
|
|
114
|
+
if (trimmed) {
|
|
115
|
+
files.add((0, utils_1.normalizePath)(prefix + trimmed));
|
|
101
116
|
}
|
|
102
117
|
}
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
118
|
+
// Untracked files (submodules manage their own untracked state). Embedded git
|
|
119
|
+
// repos surface here as a single "subdir/" entry that git refuses to descend
|
|
120
|
+
// into — recurse into those as their own repos so their source gets indexed.
|
|
121
|
+
const untracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
|
|
122
|
+
for (const line of untracked.split('\n')) {
|
|
123
|
+
const trimmed = line.trim();
|
|
124
|
+
if (!trimmed)
|
|
125
|
+
continue;
|
|
126
|
+
if (trimmed.endsWith('/')) {
|
|
127
|
+
// git only emits a trailing-slash directory entry for an embedded repo.
|
|
128
|
+
// Guard with a .git check anyway, and skip anything else exactly as git
|
|
129
|
+
// itself skips it (we never descend into a non-repo opaque dir).
|
|
130
|
+
const childDir = path.join(repoDir, trimmed);
|
|
131
|
+
if (fs.existsSync(path.join(childDir, '.git'))) {
|
|
132
|
+
collectGitFiles(childDir, prefix + trimmed, files);
|
|
133
|
+
}
|
|
134
|
+
continue;
|
|
107
135
|
}
|
|
136
|
+
files.add((0, utils_1.normalizePath)(prefix + trimmed));
|
|
108
137
|
}
|
|
109
|
-
return false;
|
|
110
138
|
}
|
|
111
139
|
/**
|
|
112
140
|
* Get all files visible to git (tracked + untracked but not ignored).
|
|
113
|
-
* Respects .gitignore at all levels (root, subdirectories)
|
|
114
|
-
*
|
|
141
|
+
* Respects .gitignore at all levels (root, subdirectories) and descends into
|
|
142
|
+
* embedded (nested, non-submodule) git repos. Returns null on failure
|
|
143
|
+
* (non-git project) so callers can fall back to a filesystem walk.
|
|
115
144
|
*/
|
|
116
145
|
function getGitVisibleFiles(rootDir) {
|
|
117
146
|
try {
|
|
@@ -131,27 +160,7 @@ function getGitVisibleFiles(rootDir) {
|
|
|
131
160
|
}
|
|
132
161
|
}
|
|
133
162
|
const files = new Set();
|
|
134
|
-
|
|
135
|
-
// Tracked files. --recurse-submodules pulls in files from active submodules,
|
|
136
|
-
// which the main repo's index would otherwise represent only as a commit pointer.
|
|
137
|
-
// Without this, monorepos using submodules index 0 files. (See issue #147.)
|
|
138
|
-
// Note: --recurse-submodules only supports -c/--cached and --stage modes — it
|
|
139
|
-
// can't be combined with -o, so untracked files are gathered separately below.
|
|
140
|
-
const tracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
|
|
141
|
-
for (const line of tracked.split('\n')) {
|
|
142
|
-
const trimmed = line.trim();
|
|
143
|
-
if (trimmed) {
|
|
144
|
-
files.add((0, utils_1.normalizePath)(trimmed));
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
// Untracked files in the main repo (submodules manage their own untracked state).
|
|
148
|
-
const untracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
|
|
149
|
-
for (const line of untracked.split('\n')) {
|
|
150
|
-
const trimmed = line.trim();
|
|
151
|
-
if (trimmed) {
|
|
152
|
-
files.add((0, utils_1.normalizePath)(trimmed));
|
|
153
|
-
}
|
|
154
|
-
}
|
|
163
|
+
collectGitFiles(rootDir, '', files);
|
|
155
164
|
return files;
|
|
156
165
|
}
|
|
157
166
|
catch {
|
|
@@ -162,7 +171,7 @@ function getGitVisibleFiles(rootDir) {
|
|
|
162
171
|
* Use `git status` to detect changed files instead of scanning every file.
|
|
163
172
|
* Returns null on failure so callers fall back to full scan.
|
|
164
173
|
*/
|
|
165
|
-
function getGitChangedFiles(rootDir
|
|
174
|
+
function getGitChangedFiles(rootDir) {
|
|
166
175
|
try {
|
|
167
176
|
const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
|
|
168
177
|
const modified = [];
|
|
@@ -173,8 +182,8 @@ function getGitChangedFiles(rootDir, config) {
|
|
|
173
182
|
continue; // Minimum: "XY file"
|
|
174
183
|
const statusCode = line.substring(0, 2);
|
|
175
184
|
const filePath = (0, utils_1.normalizePath)(line.substring(3));
|
|
176
|
-
// Skip files
|
|
177
|
-
if (!
|
|
185
|
+
// Skip non-source files (git status already omits .gitignored paths).
|
|
186
|
+
if (!(0, grammars_1.isSourceFile)(filePath))
|
|
178
187
|
continue;
|
|
179
188
|
if (statusCode === '??') {
|
|
180
189
|
added.push(filePath);
|
|
@@ -194,24 +203,20 @@ function getGitChangedFiles(rootDir, config) {
|
|
|
194
203
|
}
|
|
195
204
|
}
|
|
196
205
|
/**
|
|
197
|
-
*
|
|
198
|
-
*/
|
|
199
|
-
const CODEGRAPH_IGNORE_MARKER = '.codegraphignore';
|
|
200
|
-
/**
|
|
201
|
-
* Recursively scan directory for source files.
|
|
206
|
+
* Recursively scan a directory for source files.
|
|
202
207
|
*
|
|
203
|
-
* In git repos, uses `git ls-files`
|
|
204
|
-
*
|
|
205
|
-
*
|
|
208
|
+
* In git repos, uses `git ls-files` (inherently respects .gitignore at all
|
|
209
|
+
* levels), then keeps files with a supported source extension. For non-git
|
|
210
|
+
* projects, falls back to a filesystem walk that parses .gitignore itself.
|
|
206
211
|
*/
|
|
207
|
-
function scanDirectory(rootDir,
|
|
212
|
+
function scanDirectory(rootDir, onProgress) {
|
|
208
213
|
// Fast path: use git to get all visible files (respects .gitignore everywhere)
|
|
209
214
|
const gitFiles = getGitVisibleFiles(rootDir);
|
|
210
215
|
if (gitFiles) {
|
|
211
216
|
const files = [];
|
|
212
217
|
let count = 0;
|
|
213
218
|
for (const filePath of gitFiles) {
|
|
214
|
-
if (
|
|
219
|
+
if ((0, grammars_1.isSourceFile)(filePath)) {
|
|
215
220
|
files.push(filePath);
|
|
216
221
|
count++;
|
|
217
222
|
onProgress?.(count, filePath);
|
|
@@ -220,19 +225,19 @@ function scanDirectory(rootDir, config, onProgress) {
|
|
|
220
225
|
return files;
|
|
221
226
|
}
|
|
222
227
|
// Fallback: walk filesystem for non-git projects
|
|
223
|
-
return scanDirectoryWalk(rootDir,
|
|
228
|
+
return scanDirectoryWalk(rootDir, onProgress);
|
|
224
229
|
}
|
|
225
230
|
/**
|
|
226
231
|
* Async variant of scanDirectory that yields to the event loop periodically,
|
|
227
232
|
* allowing worker threads to receive and render progress messages.
|
|
228
233
|
*/
|
|
229
|
-
async function scanDirectoryAsync(rootDir,
|
|
234
|
+
async function scanDirectoryAsync(rootDir, onProgress) {
|
|
230
235
|
const gitFiles = getGitVisibleFiles(rootDir);
|
|
231
236
|
if (gitFiles) {
|
|
232
237
|
const files = [];
|
|
233
238
|
let count = 0;
|
|
234
239
|
for (const filePath of gitFiles) {
|
|
235
|
-
if (
|
|
240
|
+
if ((0, grammars_1.isSourceFile)(filePath)) {
|
|
236
241
|
files.push(filePath);
|
|
237
242
|
count++;
|
|
238
243
|
onProgress?.(count, filePath);
|
|
@@ -244,16 +249,40 @@ async function scanDirectoryAsync(rootDir, config, onProgress) {
|
|
|
244
249
|
}
|
|
245
250
|
return files;
|
|
246
251
|
}
|
|
247
|
-
return scanDirectoryWalk(rootDir,
|
|
252
|
+
return scanDirectoryWalk(rootDir, onProgress);
|
|
248
253
|
}
|
|
249
254
|
/**
|
|
250
255
|
* Filesystem walk fallback for non-git projects.
|
|
251
256
|
*/
|
|
252
|
-
function scanDirectoryWalk(rootDir,
|
|
257
|
+
function scanDirectoryWalk(rootDir, onProgress) {
|
|
253
258
|
const files = [];
|
|
254
259
|
let count = 0;
|
|
255
260
|
const visitedDirs = new Set();
|
|
256
|
-
|
|
261
|
+
const loadIgnore = (dir) => {
|
|
262
|
+
try {
|
|
263
|
+
const giPath = path.join(dir, '.gitignore');
|
|
264
|
+
if (fs.existsSync(giPath)) {
|
|
265
|
+
return { dir, ig: (0, ignore_1.default)().add(fs.readFileSync(giPath, 'utf-8')) };
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
// Unreadable .gitignore — treat as absent.
|
|
270
|
+
}
|
|
271
|
+
return null;
|
|
272
|
+
};
|
|
273
|
+
const isIgnored = (fullPath, isDir, matchers) => {
|
|
274
|
+
for (const { dir, ig } of matchers) {
|
|
275
|
+
let rel = (0, utils_1.normalizePath)(path.relative(dir, fullPath));
|
|
276
|
+
if (!rel || rel.startsWith('..'))
|
|
277
|
+
continue; // not under this matcher's dir
|
|
278
|
+
if (isDir)
|
|
279
|
+
rel += '/'; // dir-only rules (e.g. `build/`) only match with the slash
|
|
280
|
+
if (ig.ignores(rel))
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
return false;
|
|
284
|
+
};
|
|
285
|
+
function walk(dir, matchers) {
|
|
257
286
|
let realDir;
|
|
258
287
|
try {
|
|
259
288
|
realDir = fs.realpathSync(dir);
|
|
@@ -267,12 +296,9 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
|
267
296
|
return;
|
|
268
297
|
}
|
|
269
298
|
visitedDirs.add(realDir);
|
|
270
|
-
//
|
|
271
|
-
const
|
|
272
|
-
|
|
273
|
-
(0, errors_1.logDebug)('Skipping directory due to .codegraphignore marker', { dir });
|
|
274
|
-
return;
|
|
275
|
-
}
|
|
299
|
+
// This directory's own .gitignore (if present) applies to everything below it.
|
|
300
|
+
const own = loadIgnore(dir);
|
|
301
|
+
const active = own ? [...matchers, own] : matchers;
|
|
276
302
|
let entries;
|
|
277
303
|
try {
|
|
278
304
|
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
@@ -282,6 +308,9 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
|
282
308
|
return;
|
|
283
309
|
}
|
|
284
310
|
for (const entry of entries) {
|
|
311
|
+
// Never descend into git internals or our own data directory.
|
|
312
|
+
if (entry.name === '.git' || entry.name === '.codegraph')
|
|
313
|
+
continue;
|
|
285
314
|
const fullPath = path.join(dir, entry.name);
|
|
286
315
|
const relativePath = (0, utils_1.normalizePath)(path.relative(rootDir, fullPath));
|
|
287
316
|
if (entry.isSymbolicLink()) {
|
|
@@ -289,20 +318,12 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
|
289
318
|
const realTarget = fs.realpathSync(fullPath);
|
|
290
319
|
const stat = fs.statSync(realTarget);
|
|
291
320
|
if (stat.isDirectory()) {
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
for (const pattern of config.exclude) {
|
|
295
|
-
if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
|
|
296
|
-
excluded = true;
|
|
297
|
-
break;
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
if (!excluded) {
|
|
301
|
-
walk(fullPath);
|
|
321
|
+
if (!isIgnored(fullPath, true, active)) {
|
|
322
|
+
walk(fullPath, active);
|
|
302
323
|
}
|
|
303
324
|
}
|
|
304
325
|
else if (stat.isFile()) {
|
|
305
|
-
if (
|
|
326
|
+
if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
|
|
306
327
|
files.push(relativePath);
|
|
307
328
|
count++;
|
|
308
329
|
onProgress?.(count, relativePath);
|
|
@@ -315,20 +336,12 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
|
315
336
|
continue;
|
|
316
337
|
}
|
|
317
338
|
if (entry.isDirectory()) {
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
for (const pattern of config.exclude) {
|
|
321
|
-
if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
|
|
322
|
-
excluded = true;
|
|
323
|
-
break;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
if (!excluded) {
|
|
327
|
-
walk(fullPath);
|
|
339
|
+
if (!isIgnored(fullPath, true, active)) {
|
|
340
|
+
walk(fullPath, active);
|
|
328
341
|
}
|
|
329
342
|
}
|
|
330
343
|
else if (entry.isFile()) {
|
|
331
|
-
if (
|
|
344
|
+
if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
|
|
332
345
|
files.push(relativePath);
|
|
333
346
|
count++;
|
|
334
347
|
onProgress?.(count, relativePath);
|
|
@@ -336,7 +349,7 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
|
336
349
|
}
|
|
337
350
|
}
|
|
338
351
|
}
|
|
339
|
-
walk(rootDir);
|
|
352
|
+
walk(rootDir, []);
|
|
340
353
|
return files;
|
|
341
354
|
}
|
|
342
355
|
/**
|
|
@@ -344,7 +357,6 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
|
344
357
|
*/
|
|
345
358
|
class ExtractionOrchestrator {
|
|
346
359
|
rootDir;
|
|
347
|
-
config;
|
|
348
360
|
queries;
|
|
349
361
|
/**
|
|
350
362
|
* Names of frameworks detected for this project, populated by indexAll().
|
|
@@ -353,9 +365,8 @@ class ExtractionOrchestrator {
|
|
|
353
365
|
* hasn't run yet so single-file re-index paths can detect on the spot.
|
|
354
366
|
*/
|
|
355
367
|
detectedFrameworkNames = null;
|
|
356
|
-
constructor(rootDir,
|
|
368
|
+
constructor(rootDir, queries) {
|
|
357
369
|
this.rootDir = rootDir;
|
|
358
|
-
this.config = config;
|
|
359
370
|
this.queries = queries;
|
|
360
371
|
}
|
|
361
372
|
/**
|
|
@@ -407,7 +418,7 @@ class ExtractionOrchestrator {
|
|
|
407
418
|
ensureDetectedFrameworks(files) {
|
|
408
419
|
if (this.detectedFrameworkNames !== null)
|
|
409
420
|
return this.detectedFrameworkNames;
|
|
410
|
-
const fileList = files ?? scanDirectory(this.rootDir
|
|
421
|
+
const fileList = files ?? scanDirectory(this.rootDir);
|
|
411
422
|
const context = this.buildDetectionContext(fileList);
|
|
412
423
|
this.detectedFrameworkNames = (0, frameworks_1.detectFrameworks)(context).map((r) => r.name);
|
|
413
424
|
return this.detectedFrameworkNames;
|
|
@@ -433,7 +444,7 @@ class ExtractionOrchestrator {
|
|
|
433
444
|
current: 0,
|
|
434
445
|
total: 0,
|
|
435
446
|
});
|
|
436
|
-
const files = await scanDirectoryAsync(this.rootDir,
|
|
447
|
+
const files = await scanDirectoryAsync(this.rootDir, (current, file) => {
|
|
437
448
|
onProgress?.({
|
|
438
449
|
phase: 'scanning',
|
|
439
450
|
current,
|
|
@@ -668,18 +679,16 @@ class ExtractionOrchestrator {
|
|
|
668
679
|
});
|
|
669
680
|
continue;
|
|
670
681
|
}
|
|
671
|
-
// Honour
|
|
672
|
-
//
|
|
673
|
-
//
|
|
674
|
-
//
|
|
675
|
-
// the
|
|
676
|
-
|
|
677
|
-
// skip the check.
|
|
678
|
-
if (stats.size > this.config.maxFileSize) {
|
|
682
|
+
// Honour MAX_FILE_SIZE. Without this check, vendored generated
|
|
683
|
+
// headers, minified bundles, and other multi-MB files get indexed,
|
|
684
|
+
// wasting WASM heap and the worker recycle budget on inputs with no
|
|
685
|
+
// useful symbols. The single-file extractFile path already enforces
|
|
686
|
+
// this; the bulk path used to silently skip the check.
|
|
687
|
+
if (stats.size > MAX_FILE_SIZE) {
|
|
679
688
|
processed++;
|
|
680
689
|
filesSkipped++;
|
|
681
690
|
errors.push({
|
|
682
|
-
message: `File exceeds max size (${stats.size} > ${
|
|
691
|
+
message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`,
|
|
683
692
|
filePath,
|
|
684
693
|
severity: 'warning',
|
|
685
694
|
code: 'size_exceeded',
|
|
@@ -948,14 +957,14 @@ class ExtractionOrchestrator {
|
|
|
948
957
|
};
|
|
949
958
|
}
|
|
950
959
|
// Check file size
|
|
951
|
-
if (stats.size >
|
|
960
|
+
if (stats.size > MAX_FILE_SIZE) {
|
|
952
961
|
return {
|
|
953
962
|
nodes: [],
|
|
954
963
|
edges: [],
|
|
955
964
|
unresolvedReferences: [],
|
|
956
965
|
errors: [
|
|
957
966
|
{
|
|
958
|
-
message: `File exceeds max size (${stats.size} > ${
|
|
967
|
+
message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`,
|
|
959
968
|
filePath: relativePath,
|
|
960
969
|
severity: 'warning',
|
|
961
970
|
code: 'size_exceeded',
|
|
@@ -1062,7 +1071,7 @@ class ExtractionOrchestrator {
|
|
|
1062
1071
|
total: 0,
|
|
1063
1072
|
});
|
|
1064
1073
|
const filesToIndex = [];
|
|
1065
|
-
const gitChanges = getGitChangedFiles(this.rootDir
|
|
1074
|
+
const gitChanges = getGitChangedFiles(this.rootDir);
|
|
1066
1075
|
if (gitChanges) {
|
|
1067
1076
|
// === Git fast path ===
|
|
1068
1077
|
// Only inspect the files git reports as changed instead of scanning everything.
|
|
@@ -1075,8 +1084,12 @@ class ExtractionOrchestrator {
|
|
|
1075
1084
|
filesRemoved++;
|
|
1076
1085
|
}
|
|
1077
1086
|
}
|
|
1078
|
-
// Handle modified files — read + hash only these
|
|
1079
|
-
|
|
1087
|
+
// Handle modified + added files — read + hash only these. Untracked
|
|
1088
|
+
// (`??`) files stay untracked in git even after we index them, so they
|
|
1089
|
+
// can't be trusted as "new": re-hash and compare against the DB exactly
|
|
1090
|
+
// like modified files. Otherwise every sync re-indexes them and status
|
|
1091
|
+
// reports them as pending forever. (See issue #206.)
|
|
1092
|
+
for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
|
|
1080
1093
|
const fullPath = path.join(this.rootDir, filePath);
|
|
1081
1094
|
let content;
|
|
1082
1095
|
try {
|
|
@@ -1099,16 +1112,10 @@ class ExtractionOrchestrator {
|
|
|
1099
1112
|
filesModified++;
|
|
1100
1113
|
}
|
|
1101
1114
|
}
|
|
1102
|
-
// Handle added (untracked) files
|
|
1103
|
-
for (const filePath of gitChanges.added) {
|
|
1104
|
-
filesToIndex.push(filePath);
|
|
1105
|
-
changedFilePaths.push(filePath);
|
|
1106
|
-
filesAdded++;
|
|
1107
|
-
}
|
|
1108
1115
|
}
|
|
1109
1116
|
else {
|
|
1110
1117
|
// === Fallback: full scan (non-git project or git failure) ===
|
|
1111
|
-
const currentFiles = new Set(scanDirectory(this.rootDir
|
|
1118
|
+
const currentFiles = new Set(scanDirectory(this.rootDir));
|
|
1112
1119
|
filesChecked = currentFiles.size;
|
|
1113
1120
|
// Build Map for O(1) lookups instead of .find() per file
|
|
1114
1121
|
const trackedFiles = this.queries.getAllFiles();
|
|
@@ -1185,7 +1192,7 @@ class ExtractionOrchestrator {
|
|
|
1185
1192
|
* Uses git status as a fast path when available, falling back to full scan.
|
|
1186
1193
|
*/
|
|
1187
1194
|
getChangedFiles() {
|
|
1188
|
-
const gitChanges = getGitChangedFiles(this.rootDir
|
|
1195
|
+
const gitChanges = getGitChangedFiles(this.rootDir);
|
|
1189
1196
|
if (gitChanges) {
|
|
1190
1197
|
// === Git fast path ===
|
|
1191
1198
|
const added = [];
|
|
@@ -1198,8 +1205,11 @@ class ExtractionOrchestrator {
|
|
|
1198
1205
|
removed.push(filePath);
|
|
1199
1206
|
}
|
|
1200
1207
|
}
|
|
1201
|
-
// Modified files — read + hash
|
|
1202
|
-
|
|
1208
|
+
// Modified + added files — read + hash, compare with DB. Untracked (`??`)
|
|
1209
|
+
// files stay untracked in git even after indexing, so they must be
|
|
1210
|
+
// hash-compared like modified files instead of always counting as added —
|
|
1211
|
+
// otherwise status reports them as pending forever. (See issue #206.)
|
|
1212
|
+
for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
|
|
1203
1213
|
const fullPath = path.join(this.rootDir, filePath);
|
|
1204
1214
|
let content;
|
|
1205
1215
|
try {
|
|
@@ -1218,14 +1228,10 @@ class ExtractionOrchestrator {
|
|
|
1218
1228
|
modified.push(filePath);
|
|
1219
1229
|
}
|
|
1220
1230
|
}
|
|
1221
|
-
// Added (untracked) files
|
|
1222
|
-
for (const filePath of gitChanges.added) {
|
|
1223
|
-
added.push(filePath);
|
|
1224
|
-
}
|
|
1225
1231
|
return { added, modified, removed };
|
|
1226
1232
|
}
|
|
1227
1233
|
// === Fallback: full scan (non-git project or git failure) ===
|
|
1228
|
-
const currentFiles = new Set(scanDirectory(this.rootDir
|
|
1234
|
+
const currentFiles = new Set(scanDirectory(this.rootDir));
|
|
1229
1235
|
const trackedFiles = this.queries.getAllFiles();
|
|
1230
1236
|
// Build Map for O(1) lookups
|
|
1231
1237
|
const trackedMap = new Map();
|
|
@@ -1270,6 +1276,7 @@ var tree_sitter_2 = require("./tree-sitter");
|
|
|
1270
1276
|
Object.defineProperty(exports, "extractFromSource", { enumerable: true, get: function () { return tree_sitter_2.extractFromSource; } });
|
|
1271
1277
|
var grammars_2 = require("./grammars");
|
|
1272
1278
|
Object.defineProperty(exports, "detectLanguage", { enumerable: true, get: function () { return grammars_2.detectLanguage; } });
|
|
1279
|
+
Object.defineProperty(exports, "isSourceFile", { enumerable: true, get: function () { return grammars_2.isSourceFile; } });
|
|
1273
1280
|
Object.defineProperty(exports, "isLanguageSupported", { enumerable: true, get: function () { return grammars_2.isLanguageSupported; } });
|
|
1274
1281
|
Object.defineProperty(exports, "isGrammarLoaded", { enumerable: true, get: function () { return grammars_2.isGrammarLoaded; } });
|
|
1275
1282
|
Object.defineProperty(exports, "getSupportedLanguages", { enumerable: true, get: function () { return grammars_2.getSupportedLanguages; } });
|