@stupidloud/codegraph 0.7.20 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +127 -106
  2. package/dist/bin/codegraph.d.ts +4 -0
  3. package/dist/bin/codegraph.d.ts.map +1 -1
  4. package/dist/bin/codegraph.js +327 -8
  5. package/dist/bin/codegraph.js.map +1 -1
  6. package/dist/bin/node-version-check.d.ts +17 -0
  7. package/dist/bin/node-version-check.d.ts.map +1 -1
  8. package/dist/bin/node-version-check.js +37 -0
  9. package/dist/bin/node-version-check.js.map +1 -1
  10. package/dist/config.d.ts.map +1 -1
  11. package/dist/config.js +1 -11
  12. package/dist/config.js.map +1 -1
  13. package/dist/db/index.d.ts +30 -1
  14. package/dist/db/index.d.ts.map +1 -1
  15. package/dist/db/index.js +75 -25
  16. package/dist/db/index.js.map +1 -1
  17. package/dist/db/queries.d.ts +16 -0
  18. package/dist/db/queries.d.ts.map +1 -1
  19. package/dist/db/queries.js +80 -27
  20. package/dist/db/queries.js.map +1 -1
  21. package/dist/db/sqlite-adapter.d.ts +17 -23
  22. package/dist/db/sqlite-adapter.d.ts.map +1 -1
  23. package/dist/db/sqlite-adapter.js +51 -174
  24. package/dist/db/sqlite-adapter.js.map +1 -1
  25. package/dist/extraction/grammars.d.ts +7 -1
  26. package/dist/extraction/grammars.d.ts.map +1 -1
  27. package/dist/extraction/grammars.js +42 -2
  28. package/dist/extraction/grammars.js.map +1 -1
  29. package/dist/extraction/index.d.ts +9 -14
  30. package/dist/extraction/index.d.ts.map +1 -1
  31. package/dist/extraction/index.js +131 -124
  32. package/dist/extraction/index.js.map +1 -1
  33. package/dist/extraction/languages/index.d.ts.map +1 -1
  34. package/dist/extraction/languages/index.js +4 -0
  35. package/dist/extraction/languages/index.js.map +1 -1
  36. package/dist/extraction/languages/lua.d.ts +3 -0
  37. package/dist/extraction/languages/lua.d.ts.map +1 -0
  38. package/dist/extraction/languages/lua.js +150 -0
  39. package/dist/extraction/languages/lua.js.map +1 -0
  40. package/dist/extraction/languages/luau.d.ts +3 -0
  41. package/dist/extraction/languages/luau.d.ts.map +1 -0
  42. package/dist/extraction/languages/luau.js +37 -0
  43. package/dist/extraction/languages/luau.js.map +1 -0
  44. package/dist/extraction/tree-sitter.d.ts.map +1 -1
  45. package/dist/extraction/tree-sitter.js +38 -0
  46. package/dist/extraction/tree-sitter.js.map +1 -1
  47. package/dist/extraction/wasm/tree-sitter-lua.wasm +0 -0
  48. package/dist/extraction/wasm/tree-sitter-luau.wasm +0 -0
  49. package/dist/extraction/wasm-runtime-flags.d.ts +38 -0
  50. package/dist/extraction/wasm-runtime-flags.d.ts.map +1 -0
  51. package/dist/extraction/wasm-runtime-flags.js +105 -0
  52. package/dist/extraction/wasm-runtime-flags.js.map +1 -0
  53. package/dist/graph/traversal.d.ts.map +1 -1
  54. package/dist/graph/traversal.js +71 -36
  55. package/dist/graph/traversal.js.map +1 -1
  56. package/dist/index.d.ts +11 -5
  57. package/dist/index.d.ts.map +1 -1
  58. package/dist/index.js +28 -18
  59. package/dist/index.js.map +1 -1
  60. package/dist/installer/config-writer.d.ts.map +1 -1
  61. package/dist/installer/config-writer.js +3 -1
  62. package/dist/installer/config-writer.js.map +1 -1
  63. package/dist/installer/index.d.ts +66 -2
  64. package/dist/installer/index.d.ts.map +1 -1
  65. package/dist/installer/index.js +195 -5
  66. package/dist/installer/index.js.map +1 -1
  67. package/dist/installer/instructions-template.d.ts +2 -2
  68. package/dist/installer/instructions-template.d.ts.map +1 -1
  69. package/dist/installer/instructions-template.js +4 -2
  70. package/dist/installer/instructions-template.js.map +1 -1
  71. package/dist/installer/targets/claude.d.ts +26 -6
  72. package/dist/installer/targets/claude.d.ts.map +1 -1
  73. package/dist/installer/targets/claude.js +165 -10
  74. package/dist/installer/targets/claude.js.map +1 -1
  75. package/dist/installer/targets/cursor.d.ts.map +1 -1
  76. package/dist/installer/targets/cursor.js +57 -3
  77. package/dist/installer/targets/cursor.js.map +1 -1
  78. package/dist/installer/targets/hermes.d.ts +18 -0
  79. package/dist/installer/targets/hermes.d.ts.map +1 -0
  80. package/dist/installer/targets/hermes.js +305 -0
  81. package/dist/installer/targets/hermes.js.map +1 -0
  82. package/dist/installer/targets/registry.d.ts.map +1 -1
  83. package/dist/installer/targets/registry.js +2 -0
  84. package/dist/installer/targets/registry.js.map +1 -1
  85. package/dist/installer/targets/types.d.ts +1 -1
  86. package/dist/installer/targets/types.d.ts.map +1 -1
  87. package/dist/mcp/index.d.ts +12 -0
  88. package/dist/mcp/index.d.ts.map +1 -1
  89. package/dist/mcp/index.js +213 -18
  90. package/dist/mcp/index.js.map +1 -1
  91. package/dist/mcp/server-instructions.d.ts +1 -1
  92. package/dist/mcp/server-instructions.d.ts.map +1 -1
  93. package/dist/mcp/server-instructions.js +15 -0
  94. package/dist/mcp/server-instructions.js.map +1 -1
  95. package/dist/mcp/tools.d.ts +25 -1
  96. package/dist/mcp/tools.d.ts.map +1 -1
  97. package/dist/mcp/tools.js +221 -30
  98. package/dist/mcp/tools.js.map +1 -1
  99. package/dist/mcp/transport.d.ts +17 -0
  100. package/dist/mcp/transport.d.ts.map +1 -1
  101. package/dist/mcp/transport.js +63 -0
  102. package/dist/mcp/transport.js.map +1 -1
  103. package/dist/resolution/frameworks/drupal.d.ts +51 -0
  104. package/dist/resolution/frameworks/drupal.d.ts.map +1 -0
  105. package/dist/resolution/frameworks/drupal.js +335 -0
  106. package/dist/resolution/frameworks/drupal.js.map +1 -0
  107. package/dist/resolution/frameworks/index.d.ts +2 -0
  108. package/dist/resolution/frameworks/index.d.ts.map +1 -1
  109. package/dist/resolution/frameworks/index.js +9 -1
  110. package/dist/resolution/frameworks/index.js.map +1 -1
  111. package/dist/resolution/frameworks/nestjs.d.ts +26 -0
  112. package/dist/resolution/frameworks/nestjs.d.ts.map +1 -0
  113. package/dist/resolution/frameworks/nestjs.js +374 -0
  114. package/dist/resolution/frameworks/nestjs.js.map +1 -0
  115. package/dist/resolution/index.d.ts.map +1 -1
  116. package/dist/resolution/index.js +40 -7
  117. package/dist/resolution/index.js.map +1 -1
  118. package/dist/resolution/lru-cache.d.ts +24 -0
  119. package/dist/resolution/lru-cache.d.ts.map +1 -0
  120. package/dist/resolution/lru-cache.js +62 -0
  121. package/dist/resolution/lru-cache.js.map +1 -0
  122. package/dist/sync/git-hooks.d.ts +45 -0
  123. package/dist/sync/git-hooks.d.ts.map +1 -0
  124. package/dist/sync/git-hooks.js +223 -0
  125. package/dist/sync/git-hooks.js.map +1 -0
  126. package/dist/sync/index.d.ts +4 -0
  127. package/dist/sync/index.d.ts.map +1 -1
  128. package/dist/sync/index.js +12 -1
  129. package/dist/sync/index.js.map +1 -1
  130. package/dist/sync/watch-policy.d.ts +48 -0
  131. package/dist/sync/watch-policy.d.ts.map +1 -0
  132. package/dist/sync/watch-policy.js +124 -0
  133. package/dist/sync/watch-policy.js.map +1 -0
  134. package/dist/sync/watcher.d.ts +2 -4
  135. package/dist/sync/watcher.d.ts.map +1 -1
  136. package/dist/sync/watcher.js +14 -6
  137. package/dist/sync/watcher.js.map +1 -1
  138. package/dist/types.d.ts +1 -1
  139. package/dist/types.d.ts.map +1 -1
  140. package/dist/types.js +11 -0
  141. package/dist/types.js.map +1 -1
  142. package/dist/utils.js +1 -1
  143. package/package.json +4 -4
  144. package/scripts/add-lang/bench.sh +60 -0
  145. package/scripts/add-lang/check-grammar.mjs +75 -0
  146. package/scripts/add-lang/dump-ast.mjs +103 -0
  147. package/scripts/add-lang/verify-extraction.mjs +70 -0
  148. package/scripts/agent-eval/audit.sh +68 -0
  149. package/scripts/agent-eval/itrun.sh +1 -1
  150. package/scripts/agent-eval/run-all.sh +67 -0
  151. package/scripts/build-bundle.sh +118 -0
  152. package/scripts/npm-shim.js +246 -0
  153. package/scripts/pack-npm.sh +95 -0
  154. package/scripts/patch-tree-sitter-dart.js +0 -112
  155. package/scripts/release.sh +0 -68
@@ -41,9 +41,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
41
41
  return (mod && mod.__esModule) ? mod : { "default": mod };
42
42
  };
43
43
  Object.defineProperty(exports, "__esModule", { value: true });
44
- exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammars = exports.getSupportedLanguages = exports.isGrammarLoaded = exports.isLanguageSupported = exports.detectLanguage = exports.extractFromSource = exports.ExtractionOrchestrator = void 0;
44
+ exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammars = exports.getSupportedLanguages = exports.isGrammarLoaded = exports.isLanguageSupported = exports.isSourceFile = exports.detectLanguage = exports.extractFromSource = exports.ExtractionOrchestrator = void 0;
45
45
  exports.hashContent = hashContent;
46
- exports.shouldIncludeFile = shouldIncludeFile;
47
46
  exports.scanDirectory = scanDirectory;
48
47
  exports.scanDirectoryAsync = scanDirectoryAsync;
49
48
  const fs = __importStar(require("fs"));
@@ -55,7 +54,7 @@ const tree_sitter_1 = require("./tree-sitter");
55
54
  const grammars_1 = require("./grammars");
56
55
  const errors_1 = require("../errors");
57
56
  const utils_1 = require("../utils");
58
- const picomatch_1 = __importDefault(require("picomatch"));
57
+ const ignore_1 = __importDefault(require("ignore"));
59
58
  const frameworks_1 = require("../resolution/frameworks");
60
59
  /**
61
60
  * Number of files to read in parallel during indexing.
@@ -84,34 +83,64 @@ function hashContent(content) {
84
83
  return crypto.createHash('sha256').update(content).digest('hex');
85
84
  }
86
85
  /**
87
- * Check if a path matches any glob pattern (simplified)
86
+ * Skip files larger than this (bytes). Generated bundles, minified JS, and
87
+ * vendored blobs blow the WASM heap and the worker-recycle budget for no useful
88
+ * symbols. 1 MB covers essentially all hand-written source.
88
89
  */
89
- function matchesGlob(filePath, pattern) {
90
- filePath = (0, utils_1.normalizePath)(filePath);
91
- return picomatch_1.default.isMatch(filePath, pattern, { dot: true });
92
- }
90
+ const MAX_FILE_SIZE = 1024 * 1024;
93
91
  /**
94
- * Check if a file should be included based on config
92
+ * Collect git-visible files (tracked + untracked, .gitignore-respected) from the
93
+ * git repository rooted at `repoDir`, adding each to `files` with `prefix`
94
+ * prepended so paths stay relative to the original scan root.
95
+ *
96
+ * Recurses into embedded git repositories — nested repos that are NOT submodules
97
+ * (independent clones living inside the workspace, common in CMake "super-repo"
98
+ * layouts). The parent repo's `git ls-files` cannot see into them: tracked output
99
+ * skips them entirely, and untracked output reports them only as an opaque
100
+ * "subdir/" entry (trailing slash) rather than expanding their files. Each
101
+ * embedded repo is its own git boundary, so we re-run `git ls-files` inside it.
102
+ * (See issue #193.)
95
103
  */
96
- function shouldIncludeFile(filePath, config) {
97
- // Check exclude patterns first
98
- for (const pattern of config.exclude) {
99
- if (matchesGlob(filePath, pattern)) {
100
- return false;
104
+ function collectGitFiles(repoDir, prefix, files) {
105
+ const gitOpts = { cwd: repoDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] };
106
+ // Tracked files. --recurse-submodules pulls in files from active submodules,
107
+ // which the index would otherwise represent only as a commit pointer.
108
+ // Without this, monorepos using submodules index 0 files. (See issue #147.)
109
+ // Note: --recurse-submodules only supports -c/--cached and --stage modes — it
110
+ // can't be combined with -o, so untracked files are gathered separately below.
111
+ const tracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
112
+ for (const line of tracked.split('\n')) {
113
+ const trimmed = line.trim();
114
+ if (trimmed) {
115
+ files.add((0, utils_1.normalizePath)(prefix + trimmed));
101
116
  }
102
117
  }
103
- // Check include patterns
104
- for (const pattern of config.include) {
105
- if (matchesGlob(filePath, pattern)) {
106
- return true;
118
+ // Untracked files (submodules manage their own untracked state). Embedded git
119
+ // repos surface here as a single "subdir/" entry that git refuses to descend
120
+ // into recurse into those as their own repos so their source gets indexed.
121
+ const untracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
122
+ for (const line of untracked.split('\n')) {
123
+ const trimmed = line.trim();
124
+ if (!trimmed)
125
+ continue;
126
+ if (trimmed.endsWith('/')) {
127
+ // git only emits a trailing-slash directory entry for an embedded repo.
128
+ // Guard with a .git check anyway, and skip anything else exactly as git
129
+ // itself skips it (we never descend into a non-repo opaque dir).
130
+ const childDir = path.join(repoDir, trimmed);
131
+ if (fs.existsSync(path.join(childDir, '.git'))) {
132
+ collectGitFiles(childDir, prefix + trimmed, files);
133
+ }
134
+ continue;
107
135
  }
136
+ files.add((0, utils_1.normalizePath)(prefix + trimmed));
108
137
  }
109
- return false;
110
138
  }
111
139
  /**
112
140
  * Get all files visible to git (tracked + untracked but not ignored).
113
- * Respects .gitignore at all levels (root, subdirectories).
114
- * Returns null on failure (non-git project) so callers can fall back.
141
+ * Respects .gitignore at all levels (root, subdirectories) and descends into
142
+ * embedded (nested, non-submodule) git repos. Returns null on failure
143
+ * (non-git project) so callers can fall back to a filesystem walk.
115
144
  */
116
145
  function getGitVisibleFiles(rootDir) {
117
146
  try {
@@ -131,27 +160,7 @@ function getGitVisibleFiles(rootDir) {
131
160
  }
132
161
  }
133
162
  const files = new Set();
134
- const gitOpts = { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] };
135
- // Tracked files. --recurse-submodules pulls in files from active submodules,
136
- // which the main repo's index would otherwise represent only as a commit pointer.
137
- // Without this, monorepos using submodules index 0 files. (See issue #147.)
138
- // Note: --recurse-submodules only supports -c/--cached and --stage modes — it
139
- // can't be combined with -o, so untracked files are gathered separately below.
140
- const tracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
141
- for (const line of tracked.split('\n')) {
142
- const trimmed = line.trim();
143
- if (trimmed) {
144
- files.add((0, utils_1.normalizePath)(trimmed));
145
- }
146
- }
147
- // Untracked files in the main repo (submodules manage their own untracked state).
148
- const untracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
149
- for (const line of untracked.split('\n')) {
150
- const trimmed = line.trim();
151
- if (trimmed) {
152
- files.add((0, utils_1.normalizePath)(trimmed));
153
- }
154
- }
163
+ collectGitFiles(rootDir, '', files);
155
164
  return files;
156
165
  }
157
166
  catch {
@@ -162,7 +171,7 @@ function getGitVisibleFiles(rootDir) {
162
171
  * Use `git status` to detect changed files instead of scanning every file.
163
172
  * Returns null on failure so callers fall back to full scan.
164
173
  */
165
- function getGitChangedFiles(rootDir, config) {
174
+ function getGitChangedFiles(rootDir) {
166
175
  try {
167
176
  const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
168
177
  const modified = [];
@@ -173,8 +182,8 @@ function getGitChangedFiles(rootDir, config) {
173
182
  continue; // Minimum: "XY file"
174
183
  const statusCode = line.substring(0, 2);
175
184
  const filePath = (0, utils_1.normalizePath)(line.substring(3));
176
- // Skip files that don't match include/exclude config
177
- if (!shouldIncludeFile(filePath, config))
185
+ // Skip non-source files (git status already omits .gitignored paths).
186
+ if (!(0, grammars_1.isSourceFile)(filePath))
178
187
  continue;
179
188
  if (statusCode === '??') {
180
189
  added.push(filePath);
@@ -194,24 +203,20 @@ function getGitChangedFiles(rootDir, config) {
194
203
  }
195
204
  }
196
205
  /**
197
- * Marker file name that indicates a directory (and all children) should be skipped
198
- */
199
- const CODEGRAPH_IGNORE_MARKER = '.codegraphignore';
200
- /**
201
- * Recursively scan directory for source files.
206
+ * Recursively scan a directory for source files.
202
207
  *
203
- * In git repos, uses `git ls-files` to get the file list (inherently
204
- * respects .gitignore at all levels), then filters by config include patterns.
205
- * Falls back to filesystem walk for non-git projects.
208
+ * In git repos, uses `git ls-files` (inherently respects .gitignore at all
209
+ * levels), then keeps files with a supported source extension. For non-git
210
+ * projects, falls back to a filesystem walk that parses .gitignore itself.
206
211
  */
207
- function scanDirectory(rootDir, config, onProgress) {
212
+ function scanDirectory(rootDir, onProgress) {
208
213
  // Fast path: use git to get all visible files (respects .gitignore everywhere)
209
214
  const gitFiles = getGitVisibleFiles(rootDir);
210
215
  if (gitFiles) {
211
216
  const files = [];
212
217
  let count = 0;
213
218
  for (const filePath of gitFiles) {
214
- if (shouldIncludeFile(filePath, config)) {
219
+ if ((0, grammars_1.isSourceFile)(filePath)) {
215
220
  files.push(filePath);
216
221
  count++;
217
222
  onProgress?.(count, filePath);
@@ -220,19 +225,19 @@ function scanDirectory(rootDir, config, onProgress) {
220
225
  return files;
221
226
  }
222
227
  // Fallback: walk filesystem for non-git projects
223
- return scanDirectoryWalk(rootDir, config, onProgress);
228
+ return scanDirectoryWalk(rootDir, onProgress);
224
229
  }
225
230
  /**
226
231
  * Async variant of scanDirectory that yields to the event loop periodically,
227
232
  * allowing worker threads to receive and render progress messages.
228
233
  */
229
- async function scanDirectoryAsync(rootDir, config, onProgress) {
234
+ async function scanDirectoryAsync(rootDir, onProgress) {
230
235
  const gitFiles = getGitVisibleFiles(rootDir);
231
236
  if (gitFiles) {
232
237
  const files = [];
233
238
  let count = 0;
234
239
  for (const filePath of gitFiles) {
235
- if (shouldIncludeFile(filePath, config)) {
240
+ if ((0, grammars_1.isSourceFile)(filePath)) {
236
241
  files.push(filePath);
237
242
  count++;
238
243
  onProgress?.(count, filePath);
@@ -244,16 +249,40 @@ async function scanDirectoryAsync(rootDir, config, onProgress) {
244
249
  }
245
250
  return files;
246
251
  }
247
- return scanDirectoryWalk(rootDir, config, onProgress);
252
+ return scanDirectoryWalk(rootDir, onProgress);
248
253
  }
249
254
  /**
250
255
  * Filesystem walk fallback for non-git projects.
251
256
  */
252
- function scanDirectoryWalk(rootDir, config, onProgress) {
257
+ function scanDirectoryWalk(rootDir, onProgress) {
253
258
  const files = [];
254
259
  let count = 0;
255
260
  const visitedDirs = new Set();
256
- function walk(dir) {
261
+ const loadIgnore = (dir) => {
262
+ try {
263
+ const giPath = path.join(dir, '.gitignore');
264
+ if (fs.existsSync(giPath)) {
265
+ return { dir, ig: (0, ignore_1.default)().add(fs.readFileSync(giPath, 'utf-8')) };
266
+ }
267
+ }
268
+ catch {
269
+ // Unreadable .gitignore — treat as absent.
270
+ }
271
+ return null;
272
+ };
273
+ const isIgnored = (fullPath, isDir, matchers) => {
274
+ for (const { dir, ig } of matchers) {
275
+ let rel = (0, utils_1.normalizePath)(path.relative(dir, fullPath));
276
+ if (!rel || rel.startsWith('..'))
277
+ continue; // not under this matcher's dir
278
+ if (isDir)
279
+ rel += '/'; // dir-only rules (e.g. `build/`) only match with the slash
280
+ if (ig.ignores(rel))
281
+ return true;
282
+ }
283
+ return false;
284
+ };
285
+ function walk(dir, matchers) {
257
286
  let realDir;
258
287
  try {
259
288
  realDir = fs.realpathSync(dir);
@@ -267,12 +296,9 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
267
296
  return;
268
297
  }
269
298
  visitedDirs.add(realDir);
270
- // Check for .codegraphignore marker file
271
- const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER);
272
- if (fs.existsSync(ignoreMarker)) {
273
- (0, errors_1.logDebug)('Skipping directory due to .codegraphignore marker', { dir });
274
- return;
275
- }
299
+ // This directory's own .gitignore (if present) applies to everything below it.
300
+ const own = loadIgnore(dir);
301
+ const active = own ? [...matchers, own] : matchers;
276
302
  let entries;
277
303
  try {
278
304
  entries = fs.readdirSync(dir, { withFileTypes: true });
@@ -282,6 +308,9 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
282
308
  return;
283
309
  }
284
310
  for (const entry of entries) {
311
+ // Never descend into git internals or our own data directory.
312
+ if (entry.name === '.git' || entry.name === '.codegraph')
313
+ continue;
285
314
  const fullPath = path.join(dir, entry.name);
286
315
  const relativePath = (0, utils_1.normalizePath)(path.relative(rootDir, fullPath));
287
316
  if (entry.isSymbolicLink()) {
@@ -289,20 +318,12 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
289
318
  const realTarget = fs.realpathSync(fullPath);
290
319
  const stat = fs.statSync(realTarget);
291
320
  if (stat.isDirectory()) {
292
- const dirPattern = relativePath + '/';
293
- let excluded = false;
294
- for (const pattern of config.exclude) {
295
- if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
296
- excluded = true;
297
- break;
298
- }
299
- }
300
- if (!excluded) {
301
- walk(fullPath);
321
+ if (!isIgnored(fullPath, true, active)) {
322
+ walk(fullPath, active);
302
323
  }
303
324
  }
304
325
  else if (stat.isFile()) {
305
- if (shouldIncludeFile(relativePath, config)) {
326
+ if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
306
327
  files.push(relativePath);
307
328
  count++;
308
329
  onProgress?.(count, relativePath);
@@ -315,20 +336,12 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
315
336
  continue;
316
337
  }
317
338
  if (entry.isDirectory()) {
318
- const dirPattern = relativePath + '/';
319
- let excluded = false;
320
- for (const pattern of config.exclude) {
321
- if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
322
- excluded = true;
323
- break;
324
- }
325
- }
326
- if (!excluded) {
327
- walk(fullPath);
339
+ if (!isIgnored(fullPath, true, active)) {
340
+ walk(fullPath, active);
328
341
  }
329
342
  }
330
343
  else if (entry.isFile()) {
331
- if (shouldIncludeFile(relativePath, config)) {
344
+ if (!isIgnored(fullPath, false, active) && (0, grammars_1.isSourceFile)(relativePath)) {
332
345
  files.push(relativePath);
333
346
  count++;
334
347
  onProgress?.(count, relativePath);
@@ -336,7 +349,7 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
336
349
  }
337
350
  }
338
351
  }
339
- walk(rootDir);
352
+ walk(rootDir, []);
340
353
  return files;
341
354
  }
342
355
  /**
@@ -344,7 +357,6 @@ function scanDirectoryWalk(rootDir, config, onProgress) {
344
357
  */
345
358
  class ExtractionOrchestrator {
346
359
  rootDir;
347
- config;
348
360
  queries;
349
361
  /**
350
362
  * Names of frameworks detected for this project, populated by indexAll().
@@ -353,9 +365,8 @@ class ExtractionOrchestrator {
353
365
  * hasn't run yet so single-file re-index paths can detect on the spot.
354
366
  */
355
367
  detectedFrameworkNames = null;
356
- constructor(rootDir, config, queries) {
368
+ constructor(rootDir, queries) {
357
369
  this.rootDir = rootDir;
358
- this.config = config;
359
370
  this.queries = queries;
360
371
  }
361
372
  /**
@@ -407,7 +418,7 @@ class ExtractionOrchestrator {
407
418
  ensureDetectedFrameworks(files) {
408
419
  if (this.detectedFrameworkNames !== null)
409
420
  return this.detectedFrameworkNames;
410
- const fileList = files ?? scanDirectory(this.rootDir, this.config);
421
+ const fileList = files ?? scanDirectory(this.rootDir);
411
422
  const context = this.buildDetectionContext(fileList);
412
423
  this.detectedFrameworkNames = (0, frameworks_1.detectFrameworks)(context).map((r) => r.name);
413
424
  return this.detectedFrameworkNames;
@@ -433,7 +444,7 @@ class ExtractionOrchestrator {
433
444
  current: 0,
434
445
  total: 0,
435
446
  });
436
- const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => {
447
+ const files = await scanDirectoryAsync(this.rootDir, (current, file) => {
437
448
  onProgress?.({
438
449
  phase: 'scanning',
439
450
  current,
@@ -668,18 +679,16 @@ class ExtractionOrchestrator {
668
679
  });
669
680
  continue;
670
681
  }
671
- // Honour config.maxFileSize. Without this check, vendored
672
- // generated headers, minified bundles, and other multi-MB
673
- // files get indexed despite the user setting a size cap
674
- // wasting WASM heap and the worker recycle budget on inputs
675
- // the user explicitly opted out of. The single-file extractFile
676
- // path already enforces this; the bulk path used to silently
677
- // skip the check.
678
- if (stats.size > this.config.maxFileSize) {
682
+ // Honour MAX_FILE_SIZE. Without this check, vendored generated
683
+ // headers, minified bundles, and other multi-MB files get indexed,
684
+ // wasting WASM heap and the worker recycle budget on inputs with no
685
+ // useful symbols. The single-file extractFile path already enforces
686
+ // this; the bulk path used to silently skip the check.
687
+ if (stats.size > MAX_FILE_SIZE) {
679
688
  processed++;
680
689
  filesSkipped++;
681
690
  errors.push({
682
- message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
691
+ message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`,
683
692
  filePath,
684
693
  severity: 'warning',
685
694
  code: 'size_exceeded',
@@ -948,14 +957,14 @@ class ExtractionOrchestrator {
948
957
  };
949
958
  }
950
959
  // Check file size
951
- if (stats.size > this.config.maxFileSize) {
960
+ if (stats.size > MAX_FILE_SIZE) {
952
961
  return {
953
962
  nodes: [],
954
963
  edges: [],
955
964
  unresolvedReferences: [],
956
965
  errors: [
957
966
  {
958
- message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
967
+ message: `File exceeds max size (${stats.size} > ${MAX_FILE_SIZE})`,
959
968
  filePath: relativePath,
960
969
  severity: 'warning',
961
970
  code: 'size_exceeded',
@@ -1062,7 +1071,7 @@ class ExtractionOrchestrator {
1062
1071
  total: 0,
1063
1072
  });
1064
1073
  const filesToIndex = [];
1065
- const gitChanges = getGitChangedFiles(this.rootDir, this.config);
1074
+ const gitChanges = getGitChangedFiles(this.rootDir);
1066
1075
  if (gitChanges) {
1067
1076
  // === Git fast path ===
1068
1077
  // Only inspect the files git reports as changed instead of scanning everything.
@@ -1075,8 +1084,12 @@ class ExtractionOrchestrator {
1075
1084
  filesRemoved++;
1076
1085
  }
1077
1086
  }
1078
- // Handle modified files — read + hash only these files
1079
- for (const filePath of gitChanges.modified) {
1087
+ // Handle modified + added files — read + hash only these. Untracked
1088
+ // (`??`) files stay untracked in git even after we index them, so they
1089
+ // can't be trusted as "new": re-hash and compare against the DB exactly
1090
+ // like modified files. Otherwise every sync re-indexes them and status
1091
+ // reports them as pending forever. (See issue #206.)
1092
+ for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
1080
1093
  const fullPath = path.join(this.rootDir, filePath);
1081
1094
  let content;
1082
1095
  try {
@@ -1099,16 +1112,10 @@ class ExtractionOrchestrator {
1099
1112
  filesModified++;
1100
1113
  }
1101
1114
  }
1102
- // Handle added (untracked) files
1103
- for (const filePath of gitChanges.added) {
1104
- filesToIndex.push(filePath);
1105
- changedFilePaths.push(filePath);
1106
- filesAdded++;
1107
- }
1108
1115
  }
1109
1116
  else {
1110
1117
  // === Fallback: full scan (non-git project or git failure) ===
1111
- const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
1118
+ const currentFiles = new Set(scanDirectory(this.rootDir));
1112
1119
  filesChecked = currentFiles.size;
1113
1120
  // Build Map for O(1) lookups instead of .find() per file
1114
1121
  const trackedFiles = this.queries.getAllFiles();
@@ -1185,7 +1192,7 @@ class ExtractionOrchestrator {
1185
1192
  * Uses git status as a fast path when available, falling back to full scan.
1186
1193
  */
1187
1194
  getChangedFiles() {
1188
- const gitChanges = getGitChangedFiles(this.rootDir, this.config);
1195
+ const gitChanges = getGitChangedFiles(this.rootDir);
1189
1196
  if (gitChanges) {
1190
1197
  // === Git fast path ===
1191
1198
  const added = [];
@@ -1198,8 +1205,11 @@ class ExtractionOrchestrator {
1198
1205
  removed.push(filePath);
1199
1206
  }
1200
1207
  }
1201
- // Modified files — read + hash only these, compare with DB
1202
- for (const filePath of gitChanges.modified) {
1208
+ // Modified + added files — read + hash, compare with DB. Untracked (`??`)
1209
+ // files stay untracked in git even after indexing, so they must be
1210
+ // hash-compared like modified files instead of always counting as added —
1211
+ // otherwise status reports them as pending forever. (See issue #206.)
1212
+ for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
1203
1213
  const fullPath = path.join(this.rootDir, filePath);
1204
1214
  let content;
1205
1215
  try {
@@ -1218,14 +1228,10 @@ class ExtractionOrchestrator {
1218
1228
  modified.push(filePath);
1219
1229
  }
1220
1230
  }
1221
- // Added (untracked) files
1222
- for (const filePath of gitChanges.added) {
1223
- added.push(filePath);
1224
- }
1225
1231
  return { added, modified, removed };
1226
1232
  }
1227
1233
  // === Fallback: full scan (non-git project or git failure) ===
1228
- const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
1234
+ const currentFiles = new Set(scanDirectory(this.rootDir));
1229
1235
  const trackedFiles = this.queries.getAllFiles();
1230
1236
  // Build Map for O(1) lookups
1231
1237
  const trackedMap = new Map();
@@ -1270,6 +1276,7 @@ var tree_sitter_2 = require("./tree-sitter");
1270
1276
  Object.defineProperty(exports, "extractFromSource", { enumerable: true, get: function () { return tree_sitter_2.extractFromSource; } });
1271
1277
  var grammars_2 = require("./grammars");
1272
1278
  Object.defineProperty(exports, "detectLanguage", { enumerable: true, get: function () { return grammars_2.detectLanguage; } });
1279
+ Object.defineProperty(exports, "isSourceFile", { enumerable: true, get: function () { return grammars_2.isSourceFile; } });
1273
1280
  Object.defineProperty(exports, "isLanguageSupported", { enumerable: true, get: function () { return grammars_2.isLanguageSupported; } });
1274
1281
  Object.defineProperty(exports, "isGrammarLoaded", { enumerable: true, get: function () { return grammars_2.isGrammarLoaded; } });
1275
1282
  Object.defineProperty(exports, "getSupportedLanguages", { enumerable: true, get: function () { return grammars_2.getSupportedLanguages; } });