@colbymchenry/codegraph 0.7.10 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -54
- package/npm-shim.js +43 -0
- package/package.json +13 -51
- package/LICENSE +0 -21
- package/dist/bin/codegraph.d.ts +0 -21
- package/dist/bin/codegraph.d.ts.map +0 -1
- package/dist/bin/codegraph.js +0 -1232
- package/dist/bin/codegraph.js.map +0 -1
- package/dist/bin/node-version-check.d.ts +0 -20
- package/dist/bin/node-version-check.d.ts.map +0 -1
- package/dist/bin/node-version-check.js +0 -42
- package/dist/bin/node-version-check.js.map +0 -1
- package/dist/bin/uninstall.d.ts +0 -14
- package/dist/bin/uninstall.d.ts.map +0 -1
- package/dist/bin/uninstall.js +0 -36
- package/dist/bin/uninstall.js.map +0 -1
- package/dist/config.d.ts +0 -51
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -321
- package/dist/config.js.map +0 -1
- package/dist/context/formatter.d.ts +0 -30
- package/dist/context/formatter.d.ts.map +0 -1
- package/dist/context/formatter.js +0 -244
- package/dist/context/formatter.js.map +0 -1
- package/dist/context/index.d.ts +0 -97
- package/dist/context/index.d.ts.map +0 -1
- package/dist/context/index.js +0 -1048
- package/dist/context/index.js.map +0 -1
- package/dist/db/index.d.ts +0 -72
- package/dist/db/index.d.ts.map +0 -1
- package/dist/db/index.js +0 -200
- package/dist/db/index.js.map +0 -1
- package/dist/db/migrations.d.ts +0 -44
- package/dist/db/migrations.d.ts.map +0 -1
- package/dist/db/migrations.js +0 -131
- package/dist/db/migrations.js.map +0 -1
- package/dist/db/queries.d.ts +0 -253
- package/dist/db/queries.d.ts.map +0 -1
- package/dist/db/queries.js +0 -1207
- package/dist/db/queries.js.map +0 -1
- package/dist/db/schema.sql +0 -151
- package/dist/db/sqlite-adapter.d.ts +0 -52
- package/dist/db/sqlite-adapter.d.ts.map +0 -1
- package/dist/db/sqlite-adapter.js +0 -237
- package/dist/db/sqlite-adapter.js.map +0 -1
- package/dist/directory.d.ts +0 -57
- package/dist/directory.d.ts.map +0 -1
- package/dist/directory.js +0 -264
- package/dist/directory.js.map +0 -1
- package/dist/errors.d.ts +0 -136
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -219
- package/dist/errors.js.map +0 -1
- package/dist/extraction/dfm-extractor.d.ts +0 -31
- package/dist/extraction/dfm-extractor.d.ts.map +0 -1
- package/dist/extraction/dfm-extractor.js +0 -151
- package/dist/extraction/dfm-extractor.js.map +0 -1
- package/dist/extraction/grammars.d.ts +0 -78
- package/dist/extraction/grammars.d.ts.map +0 -1
- package/dist/extraction/grammars.js +0 -322
- package/dist/extraction/grammars.js.map +0 -1
- package/dist/extraction/index.d.ts +0 -130
- package/dist/extraction/index.d.ts.map +0 -1
- package/dist/extraction/index.js +0 -1279
- package/dist/extraction/index.js.map +0 -1
- package/dist/extraction/languages/c-cpp.d.ts +0 -4
- package/dist/extraction/languages/c-cpp.d.ts.map +0 -1
- package/dist/extraction/languages/c-cpp.js +0 -126
- package/dist/extraction/languages/c-cpp.js.map +0 -1
- package/dist/extraction/languages/csharp.d.ts +0 -3
- package/dist/extraction/languages/csharp.d.ts.map +0 -1
- package/dist/extraction/languages/csharp.js +0 -72
- package/dist/extraction/languages/csharp.js.map +0 -1
- package/dist/extraction/languages/dart.d.ts +0 -3
- package/dist/extraction/languages/dart.d.ts.map +0 -1
- package/dist/extraction/languages/dart.js +0 -192
- package/dist/extraction/languages/dart.js.map +0 -1
- package/dist/extraction/languages/go.d.ts +0 -3
- package/dist/extraction/languages/go.d.ts.map +0 -1
- package/dist/extraction/languages/go.js +0 -58
- package/dist/extraction/languages/go.js.map +0 -1
- package/dist/extraction/languages/index.d.ts +0 -10
- package/dist/extraction/languages/index.d.ts.map +0 -1
- package/dist/extraction/languages/index.js +0 -45
- package/dist/extraction/languages/index.js.map +0 -1
- package/dist/extraction/languages/java.d.ts +0 -3
- package/dist/extraction/languages/java.d.ts.map +0 -1
- package/dist/extraction/languages/java.js +0 -64
- package/dist/extraction/languages/java.js.map +0 -1
- package/dist/extraction/languages/javascript.d.ts +0 -3
- package/dist/extraction/languages/javascript.d.ts.map +0 -1
- package/dist/extraction/languages/javascript.js +0 -90
- package/dist/extraction/languages/javascript.js.map +0 -1
- package/dist/extraction/languages/kotlin.d.ts +0 -3
- package/dist/extraction/languages/kotlin.d.ts.map +0 -1
- package/dist/extraction/languages/kotlin.js +0 -253
- package/dist/extraction/languages/kotlin.js.map +0 -1
- package/dist/extraction/languages/pascal.d.ts +0 -3
- package/dist/extraction/languages/pascal.d.ts.map +0 -1
- package/dist/extraction/languages/pascal.js +0 -66
- package/dist/extraction/languages/pascal.js.map +0 -1
- package/dist/extraction/languages/php.d.ts +0 -3
- package/dist/extraction/languages/php.d.ts.map +0 -1
- package/dist/extraction/languages/php.js +0 -107
- package/dist/extraction/languages/php.js.map +0 -1
- package/dist/extraction/languages/python.d.ts +0 -3
- package/dist/extraction/languages/python.d.ts.map +0 -1
- package/dist/extraction/languages/python.js +0 -56
- package/dist/extraction/languages/python.js.map +0 -1
- package/dist/extraction/languages/ruby.d.ts +0 -3
- package/dist/extraction/languages/ruby.d.ts.map +0 -1
- package/dist/extraction/languages/ruby.js +0 -114
- package/dist/extraction/languages/ruby.js.map +0 -1
- package/dist/extraction/languages/rust.d.ts +0 -3
- package/dist/extraction/languages/rust.d.ts.map +0 -1
- package/dist/extraction/languages/rust.js +0 -109
- package/dist/extraction/languages/rust.js.map +0 -1
- package/dist/extraction/languages/scala.d.ts +0 -3
- package/dist/extraction/languages/scala.d.ts.map +0 -1
- package/dist/extraction/languages/scala.js +0 -139
- package/dist/extraction/languages/scala.js.map +0 -1
- package/dist/extraction/languages/swift.d.ts +0 -3
- package/dist/extraction/languages/swift.d.ts.map +0 -1
- package/dist/extraction/languages/swift.js +0 -91
- package/dist/extraction/languages/swift.js.map +0 -1
- package/dist/extraction/languages/typescript.d.ts +0 -3
- package/dist/extraction/languages/typescript.d.ts.map +0 -1
- package/dist/extraction/languages/typescript.js +0 -129
- package/dist/extraction/languages/typescript.js.map +0 -1
- package/dist/extraction/liquid-extractor.d.ts +0 -52
- package/dist/extraction/liquid-extractor.d.ts.map +0 -1
- package/dist/extraction/liquid-extractor.js +0 -313
- package/dist/extraction/liquid-extractor.js.map +0 -1
- package/dist/extraction/parse-worker.d.ts +0 -8
- package/dist/extraction/parse-worker.d.ts.map +0 -1
- package/dist/extraction/parse-worker.js +0 -94
- package/dist/extraction/parse-worker.js.map +0 -1
- package/dist/extraction/svelte-extractor.d.ts +0 -56
- package/dist/extraction/svelte-extractor.d.ts.map +0 -1
- package/dist/extraction/svelte-extractor.js +0 -272
- package/dist/extraction/svelte-extractor.js.map +0 -1
- package/dist/extraction/tree-sitter-helpers.d.ts +0 -28
- package/dist/extraction/tree-sitter-helpers.d.ts.map +0 -1
- package/dist/extraction/tree-sitter-helpers.js +0 -103
- package/dist/extraction/tree-sitter-helpers.js.map +0 -1
- package/dist/extraction/tree-sitter-types.d.ts +0 -179
- package/dist/extraction/tree-sitter-types.d.ts.map +0 -1
- package/dist/extraction/tree-sitter-types.js +0 -10
- package/dist/extraction/tree-sitter-types.js.map +0 -1
- package/dist/extraction/tree-sitter.d.ts +0 -233
- package/dist/extraction/tree-sitter.d.ts.map +0 -1
- package/dist/extraction/tree-sitter.js +0 -2393
- package/dist/extraction/tree-sitter.js.map +0 -1
- package/dist/extraction/vue-extractor.d.ts +0 -36
- package/dist/extraction/vue-extractor.d.ts.map +0 -1
- package/dist/extraction/vue-extractor.js +0 -163
- package/dist/extraction/vue-extractor.js.map +0 -1
- package/dist/extraction/wasm/tree-sitter-pascal.wasm +0 -0
- package/dist/extraction/wasm/tree-sitter-scala.wasm +0 -0
- package/dist/graph/index.d.ts +0 -8
- package/dist/graph/index.d.ts.map +0 -1
- package/dist/graph/index.js +0 -13
- package/dist/graph/index.js.map +0 -1
- package/dist/graph/queries.d.ts +0 -106
- package/dist/graph/queries.d.ts.map +0 -1
- package/dist/graph/queries.js +0 -366
- package/dist/graph/queries.js.map +0 -1
- package/dist/graph/traversal.d.ts +0 -127
- package/dist/graph/traversal.d.ts.map +0 -1
- package/dist/graph/traversal.js +0 -493
- package/dist/graph/traversal.js.map +0 -1
- package/dist/index.d.ts +0 -447
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -825
- package/dist/index.js.map +0 -1
- package/dist/installer/claude-md-template.d.ts +0 -14
- package/dist/installer/claude-md-template.d.ts.map +0 -1
- package/dist/installer/claude-md-template.js +0 -21
- package/dist/installer/claude-md-template.js.map +0 -1
- package/dist/installer/config-writer.d.ts +0 -29
- package/dist/installer/config-writer.d.ts.map +0 -1
- package/dist/installer/config-writer.js +0 -109
- package/dist/installer/config-writer.js.map +0 -1
- package/dist/installer/index.d.ts +0 -53
- package/dist/installer/index.d.ts.map +0 -1
- package/dist/installer/index.js +0 -338
- package/dist/installer/index.js.map +0 -1
- package/dist/installer/instructions-template.d.ts +0 -28
- package/dist/installer/instructions-template.d.ts.map +0 -1
- package/dist/installer/instructions-template.js +0 -63
- package/dist/installer/instructions-template.js.map +0 -1
- package/dist/installer/targets/claude.d.ts +0 -27
- package/dist/installer/targets/claude.d.ts.map +0 -1
- package/dist/installer/targets/claude.js +0 -246
- package/dist/installer/targets/claude.js.map +0 -1
- package/dist/installer/targets/codex.d.ts +0 -18
- package/dist/installer/targets/codex.d.ts.map +0 -1
- package/dist/installer/targets/codex.js +0 -185
- package/dist/installer/targets/codex.js.map +0 -1
- package/dist/installer/targets/cursor.d.ts +0 -35
- package/dist/installer/targets/cursor.d.ts.map +0 -1
- package/dist/installer/targets/cursor.js +0 -229
- package/dist/installer/targets/cursor.js.map +0 -1
- package/dist/installer/targets/opencode.d.ts +0 -30
- package/dist/installer/targets/opencode.d.ts.map +0 -1
- package/dist/installer/targets/opencode.js +0 -235
- package/dist/installer/targets/opencode.js.map +0 -1
- package/dist/installer/targets/registry.d.ts +0 -35
- package/dist/installer/targets/registry.d.ts.map +0 -1
- package/dist/installer/targets/registry.js +0 -83
- package/dist/installer/targets/registry.js.map +0 -1
- package/dist/installer/targets/shared.d.ts +0 -77
- package/dist/installer/targets/shared.d.ts.map +0 -1
- package/dist/installer/targets/shared.js +0 -246
- package/dist/installer/targets/shared.js.map +0 -1
- package/dist/installer/targets/toml.d.ts +0 -52
- package/dist/installer/targets/toml.d.ts.map +0 -1
- package/dist/installer/targets/toml.js +0 -147
- package/dist/installer/targets/toml.js.map +0 -1
- package/dist/installer/targets/types.d.ts +0 -116
- package/dist/installer/targets/types.d.ts.map +0 -1
- package/dist/installer/targets/types.js +0 -16
- package/dist/installer/targets/types.js.map +0 -1
- package/dist/mcp/index.d.ts +0 -86
- package/dist/mcp/index.d.ts.map +0 -1
- package/dist/mcp/index.js +0 -355
- package/dist/mcp/index.js.map +0 -1
- package/dist/mcp/server-instructions.d.ts +0 -19
- package/dist/mcp/server-instructions.d.ts.map +0 -1
- package/dist/mcp/server-instructions.js +0 -59
- package/dist/mcp/server-instructions.js.map +0 -1
- package/dist/mcp/tools.d.ts +0 -200
- package/dist/mcp/tools.d.ts.map +0 -1
- package/dist/mcp/tools.js +0 -1319
- package/dist/mcp/tools.js.map +0 -1
- package/dist/mcp/transport.d.ts +0 -89
- package/dist/mcp/transport.d.ts.map +0 -1
- package/dist/mcp/transport.js +0 -170
- package/dist/mcp/transport.js.map +0 -1
- package/dist/resolution/frameworks/cargo-workspace.d.ts +0 -18
- package/dist/resolution/frameworks/cargo-workspace.d.ts.map +0 -1
- package/dist/resolution/frameworks/cargo-workspace.js +0 -225
- package/dist/resolution/frameworks/cargo-workspace.js.map +0 -1
- package/dist/resolution/frameworks/csharp.d.ts +0 -8
- package/dist/resolution/frameworks/csharp.d.ts.map +0 -1
- package/dist/resolution/frameworks/csharp.js +0 -213
- package/dist/resolution/frameworks/csharp.js.map +0 -1
- package/dist/resolution/frameworks/express.d.ts +0 -8
- package/dist/resolution/frameworks/express.d.ts.map +0 -1
- package/dist/resolution/frameworks/express.js +0 -225
- package/dist/resolution/frameworks/express.js.map +0 -1
- package/dist/resolution/frameworks/go.d.ts +0 -8
- package/dist/resolution/frameworks/go.d.ts.map +0 -1
- package/dist/resolution/frameworks/go.js +0 -158
- package/dist/resolution/frameworks/go.js.map +0 -1
- package/dist/resolution/frameworks/index.d.ts +0 -41
- package/dist/resolution/frameworks/index.d.ts.map +0 -1
- package/dist/resolution/frameworks/index.js +0 -129
- package/dist/resolution/frameworks/index.js.map +0 -1
- package/dist/resolution/frameworks/java.d.ts +0 -8
- package/dist/resolution/frameworks/java.d.ts.map +0 -1
- package/dist/resolution/frameworks/java.js +0 -177
- package/dist/resolution/frameworks/java.js.map +0 -1
- package/dist/resolution/frameworks/laravel.d.ts +0 -13
- package/dist/resolution/frameworks/laravel.d.ts.map +0 -1
- package/dist/resolution/frameworks/laravel.js +0 -248
- package/dist/resolution/frameworks/laravel.js.map +0 -1
- package/dist/resolution/frameworks/python.d.ts +0 -10
- package/dist/resolution/frameworks/python.d.ts.map +0 -1
- package/dist/resolution/frameworks/python.js +0 -278
- package/dist/resolution/frameworks/python.js.map +0 -1
- package/dist/resolution/frameworks/react.d.ts +0 -8
- package/dist/resolution/frameworks/react.d.ts.map +0 -1
- package/dist/resolution/frameworks/react.js +0 -272
- package/dist/resolution/frameworks/react.js.map +0 -1
- package/dist/resolution/frameworks/ruby.d.ts +0 -8
- package/dist/resolution/frameworks/ruby.d.ts.map +0 -1
- package/dist/resolution/frameworks/ruby.js +0 -198
- package/dist/resolution/frameworks/ruby.js.map +0 -1
- package/dist/resolution/frameworks/rust.d.ts +0 -8
- package/dist/resolution/frameworks/rust.d.ts.map +0 -1
- package/dist/resolution/frameworks/rust.js +0 -207
- package/dist/resolution/frameworks/rust.js.map +0 -1
- package/dist/resolution/frameworks/svelte.d.ts +0 -9
- package/dist/resolution/frameworks/svelte.d.ts.map +0 -1
- package/dist/resolution/frameworks/svelte.js +0 -249
- package/dist/resolution/frameworks/svelte.js.map +0 -1
- package/dist/resolution/frameworks/swift.d.ts +0 -10
- package/dist/resolution/frameworks/swift.d.ts.map +0 -1
- package/dist/resolution/frameworks/swift.js +0 -376
- package/dist/resolution/frameworks/swift.js.map +0 -1
- package/dist/resolution/frameworks/vue.d.ts +0 -9
- package/dist/resolution/frameworks/vue.d.ts.map +0 -1
- package/dist/resolution/frameworks/vue.js +0 -306
- package/dist/resolution/frameworks/vue.js.map +0 -1
- package/dist/resolution/import-resolver.d.ts +0 -40
- package/dist/resolution/import-resolver.d.ts.map +0 -1
- package/dist/resolution/import-resolver.js +0 -663
- package/dist/resolution/import-resolver.js.map +0 -1
- package/dist/resolution/index.d.ts +0 -106
- package/dist/resolution/index.d.ts.map +0 -1
- package/dist/resolution/index.js +0 -709
- package/dist/resolution/index.js.map +0 -1
- package/dist/resolution/name-matcher.d.ts +0 -32
- package/dist/resolution/name-matcher.d.ts.map +0 -1
- package/dist/resolution/name-matcher.js +0 -384
- package/dist/resolution/name-matcher.js.map +0 -1
- package/dist/resolution/path-aliases.d.ts +0 -68
- package/dist/resolution/path-aliases.d.ts.map +0 -1
- package/dist/resolution/path-aliases.js +0 -238
- package/dist/resolution/path-aliases.js.map +0 -1
- package/dist/resolution/strip-comments.d.ts +0 -27
- package/dist/resolution/strip-comments.d.ts.map +0 -1
- package/dist/resolution/strip-comments.js +0 -441
- package/dist/resolution/strip-comments.js.map +0 -1
- package/dist/resolution/types.d.ts +0 -172
- package/dist/resolution/types.d.ts.map +0 -1
- package/dist/resolution/types.js +0 -8
- package/dist/resolution/types.js.map +0 -1
- package/dist/search/query-parser.d.ts +0 -57
- package/dist/search/query-parser.d.ts.map +0 -1
- package/dist/search/query-parser.js +0 -177
- package/dist/search/query-parser.js.map +0 -1
- package/dist/search/query-utils.d.ts +0 -53
- package/dist/search/query-utils.d.ts.map +0 -1
- package/dist/search/query-utils.js +0 -347
- package/dist/search/query-utils.js.map +0 -1
- package/dist/sync/index.d.ts +0 -13
- package/dist/sync/index.d.ts.map +0 -1
- package/dist/sync/index.js +0 -17
- package/dist/sync/index.js.map +0 -1
- package/dist/sync/watcher.d.ts +0 -81
- package/dist/sync/watcher.d.ts.map +0 -1
- package/dist/sync/watcher.js +0 -184
- package/dist/sync/watcher.js.map +0 -1
- package/dist/types.d.ts +0 -423
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -256
- package/dist/types.js.map +0 -1
- package/dist/ui/glyphs.d.ts +0 -42
- package/dist/ui/glyphs.d.ts.map +0 -1
- package/dist/ui/glyphs.js +0 -78
- package/dist/ui/glyphs.js.map +0 -1
- package/dist/ui/shimmer-progress.d.ts +0 -11
- package/dist/ui/shimmer-progress.d.ts.map +0 -1
- package/dist/ui/shimmer-progress.js +0 -90
- package/dist/ui/shimmer-progress.js.map +0 -1
- package/dist/ui/shimmer-worker.d.ts +0 -2
- package/dist/ui/shimmer-worker.d.ts.map +0 -1
- package/dist/ui/shimmer-worker.js +0 -118
- package/dist/ui/shimmer-worker.js.map +0 -1
- package/dist/ui/types.d.ts +0 -17
- package/dist/ui/types.d.ts.map +0 -1
- package/dist/ui/types.js +0 -3
- package/dist/ui/types.js.map +0 -1
- package/dist/utils.d.ts +0 -205
- package/dist/utils.d.ts.map +0 -1
- package/dist/utils.js +0 -549
- package/dist/utils.js.map +0 -1
- package/scripts/local-install.sh +0 -41
- package/scripts/patch-tree-sitter-dart.js +0 -112
- package/scripts/release.sh +0 -70
package/dist/extraction/index.js
DELETED
|
@@ -1,1279 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
/**
|
|
3
|
-
* Extraction Orchestrator
|
|
4
|
-
*
|
|
5
|
-
* Coordinates file scanning, parsing, and database storage.
|
|
6
|
-
*/
|
|
7
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
-
if (k2 === undefined) k2 = k;
|
|
9
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
-
}
|
|
13
|
-
Object.defineProperty(o, k2, desc);
|
|
14
|
-
}) : (function(o, m, k, k2) {
|
|
15
|
-
if (k2 === undefined) k2 = k;
|
|
16
|
-
o[k2] = m[k];
|
|
17
|
-
}));
|
|
18
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
-
}) : function(o, v) {
|
|
21
|
-
o["default"] = v;
|
|
22
|
-
});
|
|
23
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
-
var ownKeys = function(o) {
|
|
25
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
-
var ar = [];
|
|
27
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
-
return ar;
|
|
29
|
-
};
|
|
30
|
-
return ownKeys(o);
|
|
31
|
-
};
|
|
32
|
-
return function (mod) {
|
|
33
|
-
if (mod && mod.__esModule) return mod;
|
|
34
|
-
var result = {};
|
|
35
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
-
__setModuleDefault(result, mod);
|
|
37
|
-
return result;
|
|
38
|
-
};
|
|
39
|
-
})();
|
|
40
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
41
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
42
|
-
};
|
|
43
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
44
|
-
exports.loadAllGrammars = exports.loadGrammarsForLanguages = exports.initGrammars = exports.getSupportedLanguages = exports.isGrammarLoaded = exports.isLanguageSupported = exports.detectLanguage = exports.extractFromSource = exports.ExtractionOrchestrator = void 0;
|
|
45
|
-
exports.hashContent = hashContent;
|
|
46
|
-
exports.shouldIncludeFile = shouldIncludeFile;
|
|
47
|
-
exports.scanDirectory = scanDirectory;
|
|
48
|
-
exports.scanDirectoryAsync = scanDirectoryAsync;
|
|
49
|
-
const fs = __importStar(require("fs"));
|
|
50
|
-
const fsp = __importStar(require("fs/promises"));
|
|
51
|
-
const path = __importStar(require("path"));
|
|
52
|
-
const crypto = __importStar(require("crypto"));
|
|
53
|
-
const child_process_1 = require("child_process");
|
|
54
|
-
const tree_sitter_1 = require("./tree-sitter");
|
|
55
|
-
const grammars_1 = require("./grammars");
|
|
56
|
-
const errors_1 = require("../errors");
|
|
57
|
-
const utils_1 = require("../utils");
|
|
58
|
-
const picomatch_1 = __importDefault(require("picomatch"));
|
|
59
|
-
const frameworks_1 = require("../resolution/frameworks");
|
|
60
|
-
/**
|
|
61
|
-
* Number of files to read in parallel during indexing.
|
|
62
|
-
* File reads are I/O-bound; batching overlaps I/O wait with CPU parse work.
|
|
63
|
-
*/
|
|
64
|
-
const FILE_IO_BATCH_SIZE = 10;
|
|
65
|
-
// PARSER_RESET_INTERVAL moved to parse-worker.ts (runs in worker thread)
|
|
66
|
-
/**
|
|
67
|
-
* Maximum time (ms) to wait for a single file to parse in the worker thread.
|
|
68
|
-
* If tree-sitter hangs or WASM runs out of memory, this prevents the entire
|
|
69
|
-
* indexing run from freezing. The worker is restarted after a timeout.
|
|
70
|
-
*/
|
|
71
|
-
const PARSE_TIMEOUT_MS = 10_000;
|
|
72
|
-
/**
|
|
73
|
-
* Number of files to parse before recycling the worker thread.
|
|
74
|
-
* WASM linear memory can grow but NEVER shrink (WebAssembly spec limitation).
|
|
75
|
-
* The only way to reclaim tree-sitter's WASM heap is to destroy the entire
|
|
76
|
-
* V8 isolate by terminating the worker thread and spawning a fresh one.
|
|
77
|
-
* This interval balances memory usage against the cost of reloading grammars.
|
|
78
|
-
*/
|
|
79
|
-
const WORKER_RECYCLE_INTERVAL = 250;
|
|
80
|
-
/**
|
|
81
|
-
* Calculate SHA256 hash of file contents
|
|
82
|
-
*/
|
|
83
|
-
function hashContent(content) {
|
|
84
|
-
return crypto.createHash('sha256').update(content).digest('hex');
|
|
85
|
-
}
|
|
86
|
-
/**
|
|
87
|
-
* Check if a path matches any glob pattern (simplified)
|
|
88
|
-
*/
|
|
89
|
-
function matchesGlob(filePath, pattern) {
|
|
90
|
-
filePath = (0, utils_1.normalizePath)(filePath);
|
|
91
|
-
return picomatch_1.default.isMatch(filePath, pattern, { dot: true });
|
|
92
|
-
}
|
|
93
|
-
/**
|
|
94
|
-
* Check if a file should be included based on config
|
|
95
|
-
*/
|
|
96
|
-
function shouldIncludeFile(filePath, config) {
|
|
97
|
-
// Check exclude patterns first
|
|
98
|
-
for (const pattern of config.exclude) {
|
|
99
|
-
if (matchesGlob(filePath, pattern)) {
|
|
100
|
-
return false;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
// Check include patterns
|
|
104
|
-
for (const pattern of config.include) {
|
|
105
|
-
if (matchesGlob(filePath, pattern)) {
|
|
106
|
-
return true;
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return false;
|
|
110
|
-
}
|
|
111
|
-
/**
|
|
112
|
-
* Get all files visible to git (tracked + untracked but not ignored).
|
|
113
|
-
* Respects .gitignore at all levels (root, subdirectories).
|
|
114
|
-
* Returns null on failure (non-git project) so callers can fall back.
|
|
115
|
-
*/
|
|
116
|
-
function getGitVisibleFiles(rootDir) {
|
|
117
|
-
try {
|
|
118
|
-
// Check if the project directory is gitignored by a parent repo.
|
|
119
|
-
// When rootDir lives inside a parent git repo that ignores it,
|
|
120
|
-
// `git ls-files` returns nothing — fall back to filesystem walk.
|
|
121
|
-
const gitRoot = (0, child_process_1.execFileSync)('git', ['rev-parse', '--show-toplevel'], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
122
|
-
if (path.resolve(gitRoot) !== path.resolve(rootDir)) {
|
|
123
|
-
try {
|
|
124
|
-
// git check-ignore exits 0 if the path IS ignored, 1 if not
|
|
125
|
-
(0, child_process_1.execFileSync)('git', ['check-ignore', '-q', path.resolve(rootDir)], { cwd: rootDir, encoding: 'utf-8', timeout: 5000, stdio: ['pipe', 'pipe', 'pipe'] });
|
|
126
|
-
// Directory is gitignored by parent repo — fall back to filesystem walk
|
|
127
|
-
return null;
|
|
128
|
-
}
|
|
129
|
-
catch {
|
|
130
|
-
// Not ignored — safe to use git ls-files
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
const files = new Set();
|
|
134
|
-
const gitOpts = { cwd: rootDir, encoding: 'utf-8', timeout: 30000, maxBuffer: 50 * 1024 * 1024, stdio: ['pipe', 'pipe', 'pipe'] };
|
|
135
|
-
// Tracked files. --recurse-submodules pulls in files from active submodules,
|
|
136
|
-
// which the main repo's index would otherwise represent only as a commit pointer.
|
|
137
|
-
// Without this, monorepos using submodules index 0 files. (See issue #147.)
|
|
138
|
-
// Note: --recurse-submodules only supports -c/--cached and --stage modes — it
|
|
139
|
-
// can't be combined with -o, so untracked files are gathered separately below.
|
|
140
|
-
const tracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-c', '--recurse-submodules'], gitOpts);
|
|
141
|
-
for (const line of tracked.split('\n')) {
|
|
142
|
-
const trimmed = line.trim();
|
|
143
|
-
if (trimmed) {
|
|
144
|
-
files.add((0, utils_1.normalizePath)(trimmed));
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
// Untracked files in the main repo (submodules manage their own untracked state).
|
|
148
|
-
const untracked = (0, child_process_1.execFileSync)('git', ['ls-files', '-o', '--exclude-standard'], gitOpts);
|
|
149
|
-
for (const line of untracked.split('\n')) {
|
|
150
|
-
const trimmed = line.trim();
|
|
151
|
-
if (trimmed) {
|
|
152
|
-
files.add((0, utils_1.normalizePath)(trimmed));
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
return files;
|
|
156
|
-
}
|
|
157
|
-
catch {
|
|
158
|
-
return null;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
/**
|
|
162
|
-
* Use `git status` to detect changed files instead of scanning every file.
|
|
163
|
-
* Returns null on failure so callers fall back to full scan.
|
|
164
|
-
*/
|
|
165
|
-
function getGitChangedFiles(rootDir, config) {
|
|
166
|
-
try {
|
|
167
|
-
const output = (0, child_process_1.execFileSync)('git', ['status', '--porcelain', '--no-renames'], { cwd: rootDir, encoding: 'utf-8', timeout: 10000, stdio: ['pipe', 'pipe', 'pipe'] });
|
|
168
|
-
const modified = [];
|
|
169
|
-
const added = [];
|
|
170
|
-
const deleted = [];
|
|
171
|
-
for (const line of output.split('\n')) {
|
|
172
|
-
if (line.length < 4)
|
|
173
|
-
continue; // Minimum: "XY file"
|
|
174
|
-
const statusCode = line.substring(0, 2);
|
|
175
|
-
const filePath = (0, utils_1.normalizePath)(line.substring(3));
|
|
176
|
-
// Skip files that don't match include/exclude config
|
|
177
|
-
if (!shouldIncludeFile(filePath, config))
|
|
178
|
-
continue;
|
|
179
|
-
if (statusCode === '??') {
|
|
180
|
-
added.push(filePath);
|
|
181
|
-
}
|
|
182
|
-
else if (statusCode.includes('D')) {
|
|
183
|
-
deleted.push(filePath);
|
|
184
|
-
}
|
|
185
|
-
else {
|
|
186
|
-
// M, MM, AM, A (staged), etc. — treat as modified
|
|
187
|
-
modified.push(filePath);
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
return { modified, added, deleted };
|
|
191
|
-
}
|
|
192
|
-
catch {
|
|
193
|
-
return null;
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
/**
|
|
197
|
-
* Marker file name that indicates a directory (and all children) should be skipped
|
|
198
|
-
*/
|
|
199
|
-
const CODEGRAPH_IGNORE_MARKER = '.codegraphignore';
|
|
200
|
-
/**
|
|
201
|
-
* Recursively scan directory for source files.
|
|
202
|
-
*
|
|
203
|
-
* In git repos, uses `git ls-files` to get the file list (inherently
|
|
204
|
-
* respects .gitignore at all levels), then filters by config include patterns.
|
|
205
|
-
* Falls back to filesystem walk for non-git projects.
|
|
206
|
-
*/
|
|
207
|
-
function scanDirectory(rootDir, config, onProgress) {
|
|
208
|
-
// Fast path: use git to get all visible files (respects .gitignore everywhere)
|
|
209
|
-
const gitFiles = getGitVisibleFiles(rootDir);
|
|
210
|
-
if (gitFiles) {
|
|
211
|
-
const files = [];
|
|
212
|
-
let count = 0;
|
|
213
|
-
for (const filePath of gitFiles) {
|
|
214
|
-
if (shouldIncludeFile(filePath, config)) {
|
|
215
|
-
files.push(filePath);
|
|
216
|
-
count++;
|
|
217
|
-
onProgress?.(count, filePath);
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
return files;
|
|
221
|
-
}
|
|
222
|
-
// Fallback: walk filesystem for non-git projects
|
|
223
|
-
return scanDirectoryWalk(rootDir, config, onProgress);
|
|
224
|
-
}
|
|
225
|
-
/**
|
|
226
|
-
* Async variant of scanDirectory that yields to the event loop periodically,
|
|
227
|
-
* allowing worker threads to receive and render progress messages.
|
|
228
|
-
*/
|
|
229
|
-
async function scanDirectoryAsync(rootDir, config, onProgress) {
|
|
230
|
-
const gitFiles = getGitVisibleFiles(rootDir);
|
|
231
|
-
if (gitFiles) {
|
|
232
|
-
const files = [];
|
|
233
|
-
let count = 0;
|
|
234
|
-
for (const filePath of gitFiles) {
|
|
235
|
-
if (shouldIncludeFile(filePath, config)) {
|
|
236
|
-
files.push(filePath);
|
|
237
|
-
count++;
|
|
238
|
-
onProgress?.(count, filePath);
|
|
239
|
-
// Yield every 100 files so worker threads can render progress
|
|
240
|
-
if (count % 100 === 0) {
|
|
241
|
-
await new Promise(r => setImmediate(r));
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
return files;
|
|
246
|
-
}
|
|
247
|
-
return scanDirectoryWalk(rootDir, config, onProgress);
|
|
248
|
-
}
|
|
249
|
-
/**
|
|
250
|
-
* Filesystem walk fallback for non-git projects.
|
|
251
|
-
*/
|
|
252
|
-
function scanDirectoryWalk(rootDir, config, onProgress) {
|
|
253
|
-
const files = [];
|
|
254
|
-
let count = 0;
|
|
255
|
-
const visitedDirs = new Set();
|
|
256
|
-
function walk(dir) {
|
|
257
|
-
let realDir;
|
|
258
|
-
try {
|
|
259
|
-
realDir = fs.realpathSync(dir);
|
|
260
|
-
}
|
|
261
|
-
catch {
|
|
262
|
-
(0, errors_1.logDebug)('Skipping unresolvable directory', { dir });
|
|
263
|
-
return;
|
|
264
|
-
}
|
|
265
|
-
if (visitedDirs.has(realDir)) {
|
|
266
|
-
(0, errors_1.logDebug)('Skipping already-visited directory (symlink cycle)', { dir, realDir });
|
|
267
|
-
return;
|
|
268
|
-
}
|
|
269
|
-
visitedDirs.add(realDir);
|
|
270
|
-
// Check for .codegraphignore marker file
|
|
271
|
-
const ignoreMarker = path.join(dir, CODEGRAPH_IGNORE_MARKER);
|
|
272
|
-
if (fs.existsSync(ignoreMarker)) {
|
|
273
|
-
(0, errors_1.logDebug)('Skipping directory due to .codegraphignore marker', { dir });
|
|
274
|
-
return;
|
|
275
|
-
}
|
|
276
|
-
let entries;
|
|
277
|
-
try {
|
|
278
|
-
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
279
|
-
}
|
|
280
|
-
catch (error) {
|
|
281
|
-
(0, errors_1.logDebug)('Skipping unreadable directory', { dir, error: String(error) });
|
|
282
|
-
return;
|
|
283
|
-
}
|
|
284
|
-
for (const entry of entries) {
|
|
285
|
-
const fullPath = path.join(dir, entry.name);
|
|
286
|
-
const relativePath = (0, utils_1.normalizePath)(path.relative(rootDir, fullPath));
|
|
287
|
-
if (entry.isSymbolicLink()) {
|
|
288
|
-
try {
|
|
289
|
-
const realTarget = fs.realpathSync(fullPath);
|
|
290
|
-
const stat = fs.statSync(realTarget);
|
|
291
|
-
if (stat.isDirectory()) {
|
|
292
|
-
const dirPattern = relativePath + '/';
|
|
293
|
-
let excluded = false;
|
|
294
|
-
for (const pattern of config.exclude) {
|
|
295
|
-
if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
|
|
296
|
-
excluded = true;
|
|
297
|
-
break;
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
if (!excluded) {
|
|
301
|
-
walk(fullPath);
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
else if (stat.isFile()) {
|
|
305
|
-
if (shouldIncludeFile(relativePath, config)) {
|
|
306
|
-
files.push(relativePath);
|
|
307
|
-
count++;
|
|
308
|
-
onProgress?.(count, relativePath);
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
catch {
|
|
313
|
-
(0, errors_1.logDebug)('Skipping broken symlink', { path: fullPath });
|
|
314
|
-
}
|
|
315
|
-
continue;
|
|
316
|
-
}
|
|
317
|
-
if (entry.isDirectory()) {
|
|
318
|
-
const dirPattern = relativePath + '/';
|
|
319
|
-
let excluded = false;
|
|
320
|
-
for (const pattern of config.exclude) {
|
|
321
|
-
if (matchesGlob(dirPattern, pattern) || matchesGlob(relativePath, pattern)) {
|
|
322
|
-
excluded = true;
|
|
323
|
-
break;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
if (!excluded) {
|
|
327
|
-
walk(fullPath);
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
else if (entry.isFile()) {
|
|
331
|
-
if (shouldIncludeFile(relativePath, config)) {
|
|
332
|
-
files.push(relativePath);
|
|
333
|
-
count++;
|
|
334
|
-
onProgress?.(count, relativePath);
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
walk(rootDir);
|
|
340
|
-
return files;
|
|
341
|
-
}
|
|
342
|
-
/**
|
|
343
|
-
* Extraction orchestrator
|
|
344
|
-
*/
|
|
345
|
-
class ExtractionOrchestrator {
|
|
346
|
-
rootDir;
|
|
347
|
-
config;
|
|
348
|
-
queries;
|
|
349
|
-
/**
|
|
350
|
-
* Names of frameworks detected for this project, populated by indexAll().
|
|
351
|
-
* Passed to extractFromSource so framework-specific extractors (route nodes,
|
|
352
|
-
* middleware, etc.) run after the tree-sitter pass. Cleared if detection
|
|
353
|
-
* hasn't run yet so single-file re-index paths can detect on the spot.
|
|
354
|
-
*/
|
|
355
|
-
detectedFrameworkNames = null;
|
|
356
|
-
constructor(rootDir, config, queries) {
|
|
357
|
-
this.rootDir = rootDir;
|
|
358
|
-
this.config = config;
|
|
359
|
-
this.queries = queries;
|
|
360
|
-
}
|
|
361
|
-
/**
|
|
362
|
-
* Build a filesystem-backed ResolutionContext sufficient for framework
|
|
363
|
-
* detection. Graph-query methods (getNodesByName etc.) return empty because
|
|
364
|
-
* the DB hasn't been populated yet, but detect() only uses readFile,
|
|
365
|
-
* fileExists, and getAllFiles, so that's fine.
|
|
366
|
-
*/
|
|
367
|
-
buildDetectionContext(files) {
|
|
368
|
-
const rootDir = this.rootDir;
|
|
369
|
-
return {
|
|
370
|
-
getNodesInFile: () => [],
|
|
371
|
-
getNodesByName: () => [],
|
|
372
|
-
getNodesByQualifiedName: () => [],
|
|
373
|
-
getNodesByKind: () => [],
|
|
374
|
-
getNodesByLowerName: () => [],
|
|
375
|
-
getImportMappings: () => [],
|
|
376
|
-
getAllFiles: () => files,
|
|
377
|
-
getProjectRoot: () => rootDir,
|
|
378
|
-
fileExists: (relativePath) => {
|
|
379
|
-
const full = (0, utils_1.validatePathWithinRoot)(rootDir, relativePath);
|
|
380
|
-
if (!full)
|
|
381
|
-
return false;
|
|
382
|
-
try {
|
|
383
|
-
return fs.existsSync(full);
|
|
384
|
-
}
|
|
385
|
-
catch {
|
|
386
|
-
return false;
|
|
387
|
-
}
|
|
388
|
-
},
|
|
389
|
-
readFile: (relativePath) => {
|
|
390
|
-
const full = (0, utils_1.validatePathWithinRoot)(rootDir, relativePath);
|
|
391
|
-
if (!full)
|
|
392
|
-
return null;
|
|
393
|
-
try {
|
|
394
|
-
return fs.readFileSync(full, 'utf-8');
|
|
395
|
-
}
|
|
396
|
-
catch {
|
|
397
|
-
return null;
|
|
398
|
-
}
|
|
399
|
-
},
|
|
400
|
-
};
|
|
401
|
-
}
|
|
402
|
-
/**
|
|
403
|
-
* Detect frameworks on demand using the current scanned files (or a fresh
|
|
404
|
-
* scan if none are provided). Cached on the orchestrator so repeat calls
|
|
405
|
-
* inside a single run don't re-scan.
|
|
406
|
-
*/
|
|
407
|
-
ensureDetectedFrameworks(files) {
|
|
408
|
-
if (this.detectedFrameworkNames !== null)
|
|
409
|
-
return this.detectedFrameworkNames;
|
|
410
|
-
const fileList = files ?? scanDirectory(this.rootDir, this.config);
|
|
411
|
-
const context = this.buildDetectionContext(fileList);
|
|
412
|
-
this.detectedFrameworkNames = (0, frameworks_1.detectFrameworks)(context).map((r) => r.name);
|
|
413
|
-
return this.detectedFrameworkNames;
|
|
414
|
-
}
|
|
415
|
-
/**
|
|
416
|
-
* Index all files in the project
|
|
417
|
-
*/
|
|
418
|
-
async indexAll(onProgress, signal, verbose) {
|
|
419
|
-
await (0, grammars_1.initGrammars)();
|
|
420
|
-
const startTime = Date.now();
|
|
421
|
-
const errors = [];
|
|
422
|
-
let filesIndexed = 0;
|
|
423
|
-
let filesSkipped = 0;
|
|
424
|
-
let filesErrored = 0;
|
|
425
|
-
let totalNodes = 0;
|
|
426
|
-
let totalEdges = 0;
|
|
427
|
-
const log = verbose
|
|
428
|
-
? (msg) => { console.log(`[worker] ${msg}`); }
|
|
429
|
-
: (_msg) => { };
|
|
430
|
-
// Phase 1: Scan for files
|
|
431
|
-
onProgress?.({
|
|
432
|
-
phase: 'scanning',
|
|
433
|
-
current: 0,
|
|
434
|
-
total: 0,
|
|
435
|
-
});
|
|
436
|
-
const files = await scanDirectoryAsync(this.rootDir, this.config, (current, file) => {
|
|
437
|
-
onProgress?.({
|
|
438
|
-
phase: 'scanning',
|
|
439
|
-
current,
|
|
440
|
-
total: 0,
|
|
441
|
-
currentFile: file,
|
|
442
|
-
});
|
|
443
|
-
});
|
|
444
|
-
// Detect frameworks once per indexAll run using the scanned file list.
|
|
445
|
-
// Names are passed to each parse call so framework-specific extractors
|
|
446
|
-
// (route nodes, middleware, etc.) run after the tree-sitter pass.
|
|
447
|
-
// Framework detection is reset each run so adding e.g. requirements.txt
|
|
448
|
-
// between runs is picked up without restarting the process.
|
|
449
|
-
this.detectedFrameworkNames = null;
|
|
450
|
-
const frameworkNames = this.ensureDetectedFrameworks(files);
|
|
451
|
-
if (signal?.aborted) {
|
|
452
|
-
return {
|
|
453
|
-
success: false,
|
|
454
|
-
filesIndexed: 0,
|
|
455
|
-
filesSkipped: 0,
|
|
456
|
-
filesErrored: 0,
|
|
457
|
-
nodesCreated: 0,
|
|
458
|
-
edgesCreated: 0,
|
|
459
|
-
errors: [{ message: 'Aborted', severity: 'error' }],
|
|
460
|
-
durationMs: Date.now() - startTime,
|
|
461
|
-
};
|
|
462
|
-
}
|
|
463
|
-
// Phase 2: Parse files in a worker thread (keeps main thread unblocked for UI)
|
|
464
|
-
const total = files.length;
|
|
465
|
-
let processed = 0;
|
|
466
|
-
// Emit parsing phase immediately so the progress bar appears during worker setup.
|
|
467
|
-
// The yield lets the shimmer worker flush the phase transition to stdout before
|
|
468
|
-
// the main thread starts synchronous grammar detection work.
|
|
469
|
-
onProgress?.({
|
|
470
|
-
phase: 'parsing',
|
|
471
|
-
current: 0,
|
|
472
|
-
total,
|
|
473
|
-
});
|
|
474
|
-
await new Promise(resolve => setImmediate(resolve));
|
|
475
|
-
// Detect needed languages and load grammars in the parse worker
|
|
476
|
-
const neededLanguages = [...new Set(files.map((f) => (0, grammars_1.detectLanguage)(f)))];
|
|
477
|
-
// .h files default to 'c' but may be C++ — ensure cpp grammar is loaded when c is needed
|
|
478
|
-
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
479
|
-
neededLanguages.push('cpp');
|
|
480
|
-
}
|
|
481
|
-
// Try to use a worker thread for parsing (keeps main thread unblocked for UI).
|
|
482
|
-
// Falls back to in-process parsing if the compiled worker is unavailable (e.g. tests).
|
|
483
|
-
const parseWorkerPath = path.join(__dirname, 'parse-worker.js');
|
|
484
|
-
const useWorker = fs.existsSync(parseWorkerPath);
|
|
485
|
-
let WorkerClass = null;
|
|
486
|
-
if (useWorker) {
|
|
487
|
-
const { Worker } = await Promise.resolve().then(() => __importStar(require('worker_threads')));
|
|
488
|
-
WorkerClass = Worker;
|
|
489
|
-
}
|
|
490
|
-
else {
|
|
491
|
-
// In-process fallback: load grammars locally
|
|
492
|
-
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
493
|
-
}
|
|
494
|
-
// --- Worker lifecycle management ---
|
|
495
|
-
// The worker can crash (OOM in WASM) or hang on pathological files.
|
|
496
|
-
// We track pending parse promises and handle both cases:
|
|
497
|
-
// - Timeout: terminate + restart the worker, reject the timed-out request
|
|
498
|
-
// - Crash: reject all pending promises, restart for remaining files
|
|
499
|
-
let parseWorker = null;
|
|
500
|
-
let nextId = 0;
|
|
501
|
-
let workerParseCount = 0;
|
|
502
|
-
const pendingParses = new Map();
|
|
503
|
-
function rejectAllPending(reason) {
|
|
504
|
-
for (const [id, pending] of pendingParses) {
|
|
505
|
-
clearTimeout(pending.timer);
|
|
506
|
-
pendingParses.delete(id);
|
|
507
|
-
pending.reject(new Error(reason));
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
function attachWorkerHandlers(w) {
|
|
511
|
-
w.on('message', (msg) => {
|
|
512
|
-
if (msg.type === 'parse-result' && msg.id !== undefined) {
|
|
513
|
-
const pending = pendingParses.get(msg.id);
|
|
514
|
-
if (pending) {
|
|
515
|
-
clearTimeout(pending.timer);
|
|
516
|
-
pendingParses.delete(msg.id);
|
|
517
|
-
pending.resolve(msg.result);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
});
|
|
521
|
-
w.on('error', (err) => {
|
|
522
|
-
(0, errors_1.logWarn)('Parse worker error', { error: err.message });
|
|
523
|
-
rejectAllPending(`Worker error: ${err.message}`);
|
|
524
|
-
});
|
|
525
|
-
w.on('exit', (code) => {
|
|
526
|
-
if (code !== 0 && pendingParses.size > 0) {
|
|
527
|
-
(0, errors_1.logWarn)('Parse worker exited unexpectedly', { code });
|
|
528
|
-
rejectAllPending(`Worker exited with code ${code}`);
|
|
529
|
-
}
|
|
530
|
-
// Clear reference so we know to respawn, reset count so
|
|
531
|
-
// the fresh worker gets a full cycle before recycling.
|
|
532
|
-
if (parseWorker === w) {
|
|
533
|
-
parseWorker = null;
|
|
534
|
-
workerParseCount = 0;
|
|
535
|
-
}
|
|
536
|
-
});
|
|
537
|
-
}
|
|
538
|
-
async function ensureWorker() {
|
|
539
|
-
if (parseWorker)
|
|
540
|
-
return parseWorker;
|
|
541
|
-
log('Spawning new parse worker...');
|
|
542
|
-
parseWorker = new WorkerClass(parseWorkerPath);
|
|
543
|
-
attachWorkerHandlers(parseWorker);
|
|
544
|
-
// Load grammars in the new worker
|
|
545
|
-
await new Promise((resolve, reject) => {
|
|
546
|
-
parseWorker.once('message', (msg) => {
|
|
547
|
-
if (msg.type === 'grammars-loaded')
|
|
548
|
-
resolve();
|
|
549
|
-
else
|
|
550
|
-
reject(new Error(`Unexpected message: ${msg.type}`));
|
|
551
|
-
});
|
|
552
|
-
parseWorker.postMessage({ type: 'load-grammars', languages: neededLanguages });
|
|
553
|
-
});
|
|
554
|
-
return parseWorker;
|
|
555
|
-
}
|
|
556
|
-
if (WorkerClass) {
|
|
557
|
-
await ensureWorker();
|
|
558
|
-
}
|
|
559
|
-
/**
|
|
560
|
-
* Recycle the worker thread to reclaim WASM memory.
|
|
561
|
-
* Terminates the current worker and clears the reference so
|
|
562
|
-
* ensureWorker() will spawn a fresh one on the next call.
|
|
563
|
-
*/
|
|
564
|
-
function recycleWorker() {
|
|
565
|
-
if (!parseWorker)
|
|
566
|
-
return;
|
|
567
|
-
log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
|
|
568
|
-
const w = parseWorker;
|
|
569
|
-
parseWorker = null;
|
|
570
|
-
workerParseCount = 0;
|
|
571
|
-
// Fire-and-forget: worker.terminate() can hang if WASM is stuck
|
|
572
|
-
w.terminate().catch(() => { });
|
|
573
|
-
}
|
|
574
|
-
async function requestParse(filePath, content) {
|
|
575
|
-
if (!WorkerClass) {
|
|
576
|
-
// In-process fallback
|
|
577
|
-
return (0, tree_sitter_1.extractFromSource)(filePath, content, (0, grammars_1.detectLanguage)(filePath, content), frameworkNames);
|
|
578
|
-
}
|
|
579
|
-
// Recycle the worker before the next parse if we've hit the threshold.
|
|
580
|
-
// This destroys the WASM linear memory (which can grow but never shrink)
|
|
581
|
-
// and starts a fresh worker with a clean heap.
|
|
582
|
-
if (workerParseCount >= WORKER_RECYCLE_INTERVAL) {
|
|
583
|
-
await recycleWorker();
|
|
584
|
-
}
|
|
585
|
-
const worker = await ensureWorker();
|
|
586
|
-
const id = nextId++;
|
|
587
|
-
workerParseCount++;
|
|
588
|
-
// Scale timeout for large files: base 10s + 10s per 100KB
|
|
589
|
-
const timeoutMs = PARSE_TIMEOUT_MS + Math.floor(content.length / 100_000) * 10_000;
|
|
590
|
-
return new Promise((resolve, reject) => {
|
|
591
|
-
const timer = setTimeout(() => {
|
|
592
|
-
pendingParses.delete(id);
|
|
593
|
-
log(`TIMEOUT: ${filePath} exceeded ${timeoutMs}ms — killing worker`);
|
|
594
|
-
// Reject FIRST — worker.terminate() can hang if WASM is stuck
|
|
595
|
-
parseWorker = null;
|
|
596
|
-
workerParseCount = 0;
|
|
597
|
-
reject(new Error(`Parse timed out after ${timeoutMs}ms`));
|
|
598
|
-
// Fire-and-forget: kill the stuck worker in the background
|
|
599
|
-
worker.terminate().catch(() => { });
|
|
600
|
-
}, timeoutMs);
|
|
601
|
-
pendingParses.set(id, { resolve, reject, timer });
|
|
602
|
-
worker.postMessage({ type: 'parse', id, filePath, content, frameworkNames });
|
|
603
|
-
});
|
|
604
|
-
}
|
|
605
|
-
for (let i = 0; i < files.length; i += FILE_IO_BATCH_SIZE) {
|
|
606
|
-
if (signal?.aborted) {
|
|
607
|
-
if (parseWorker)
|
|
608
|
-
parseWorker.terminate().catch(() => { });
|
|
609
|
-
return {
|
|
610
|
-
success: false,
|
|
611
|
-
filesIndexed,
|
|
612
|
-
filesSkipped,
|
|
613
|
-
filesErrored,
|
|
614
|
-
nodesCreated: totalNodes,
|
|
615
|
-
edgesCreated: totalEdges,
|
|
616
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
617
|
-
durationMs: Date.now() - startTime,
|
|
618
|
-
};
|
|
619
|
-
}
|
|
620
|
-
const batch = files.slice(i, i + FILE_IO_BATCH_SIZE);
|
|
621
|
-
// Read files in parallel (with path validation before any I/O)
|
|
622
|
-
const fileContents = await Promise.all(batch.map(async (fp) => {
|
|
623
|
-
try {
|
|
624
|
-
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, fp);
|
|
625
|
-
if (!fullPath) {
|
|
626
|
-
(0, errors_1.logWarn)('Path traversal blocked in batch reader', { filePath: fp });
|
|
627
|
-
return { filePath: fp, content: null, stats: null, error: new Error('Path traversal blocked') };
|
|
628
|
-
}
|
|
629
|
-
const content = await fsp.readFile(fullPath, 'utf-8');
|
|
630
|
-
const stats = await fsp.stat(fullPath);
|
|
631
|
-
return { filePath: fp, content, stats, error: null };
|
|
632
|
-
}
|
|
633
|
-
catch (err) {
|
|
634
|
-
return { filePath: fp, content: null, stats: null, error: err };
|
|
635
|
-
}
|
|
636
|
-
}));
|
|
637
|
-
// Send to worker for parsing, store results on main thread
|
|
638
|
-
for (const { filePath, content, stats, error } of fileContents) {
|
|
639
|
-
if (signal?.aborted) {
|
|
640
|
-
if (parseWorker)
|
|
641
|
-
parseWorker.terminate().catch(() => { });
|
|
642
|
-
return {
|
|
643
|
-
success: false,
|
|
644
|
-
filesIndexed,
|
|
645
|
-
filesSkipped,
|
|
646
|
-
filesErrored,
|
|
647
|
-
nodesCreated: totalNodes,
|
|
648
|
-
edgesCreated: totalEdges,
|
|
649
|
-
errors: [{ message: 'Aborted', severity: 'error' }, ...errors],
|
|
650
|
-
durationMs: Date.now() - startTime,
|
|
651
|
-
};
|
|
652
|
-
}
|
|
653
|
-
// Report progress before parsing (show current file being worked on)
|
|
654
|
-
onProgress?.({
|
|
655
|
-
phase: 'parsing',
|
|
656
|
-
current: processed,
|
|
657
|
-
total,
|
|
658
|
-
currentFile: filePath,
|
|
659
|
-
});
|
|
660
|
-
if (error || content === null || stats === null) {
|
|
661
|
-
processed++;
|
|
662
|
-
filesErrored++;
|
|
663
|
-
errors.push({
|
|
664
|
-
message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
|
|
665
|
-
filePath,
|
|
666
|
-
severity: 'error',
|
|
667
|
-
code: 'read_error',
|
|
668
|
-
});
|
|
669
|
-
continue;
|
|
670
|
-
}
|
|
671
|
-
// Honour config.maxFileSize. Without this check, vendored
|
|
672
|
-
// generated headers, minified bundles, and other multi-MB
|
|
673
|
-
// files get indexed despite the user setting a size cap —
|
|
674
|
-
// wasting WASM heap and the worker recycle budget on inputs
|
|
675
|
-
// the user explicitly opted out of. The single-file extractFile
|
|
676
|
-
// path already enforces this; the bulk path used to silently
|
|
677
|
-
// skip the check.
|
|
678
|
-
if (stats.size > this.config.maxFileSize) {
|
|
679
|
-
processed++;
|
|
680
|
-
filesSkipped++;
|
|
681
|
-
errors.push({
|
|
682
|
-
message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
|
|
683
|
-
filePath,
|
|
684
|
-
severity: 'warning',
|
|
685
|
-
code: 'size_exceeded',
|
|
686
|
-
});
|
|
687
|
-
onProgress?.({ phase: 'parsing', current: processed, total });
|
|
688
|
-
continue;
|
|
689
|
-
}
|
|
690
|
-
// Parse in worker thread (main thread stays unblocked).
|
|
691
|
-
// Wrapped in try/catch to handle worker timeouts and crashes gracefully.
|
|
692
|
-
let result;
|
|
693
|
-
try {
|
|
694
|
-
result = await requestParse(filePath, content);
|
|
695
|
-
}
|
|
696
|
-
catch (parseErr) {
|
|
697
|
-
processed++;
|
|
698
|
-
filesErrored++;
|
|
699
|
-
errors.push({
|
|
700
|
-
message: parseErr instanceof Error ? parseErr.message : String(parseErr),
|
|
701
|
-
filePath,
|
|
702
|
-
severity: 'error',
|
|
703
|
-
code: 'parse_error',
|
|
704
|
-
});
|
|
705
|
-
continue;
|
|
706
|
-
}
|
|
707
|
-
processed++;
|
|
708
|
-
// Store in database on main thread (SQLite is not thread-safe)
|
|
709
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
710
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content);
|
|
711
|
-
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
712
|
-
}
|
|
713
|
-
if (result.errors.length > 0) {
|
|
714
|
-
for (const err of result.errors) {
|
|
715
|
-
if (!err.filePath)
|
|
716
|
-
err.filePath = filePath;
|
|
717
|
-
}
|
|
718
|
-
errors.push(...result.errors);
|
|
719
|
-
}
|
|
720
|
-
if (result.nodes.length > 0) {
|
|
721
|
-
filesIndexed++;
|
|
722
|
-
totalNodes += result.nodes.length;
|
|
723
|
-
totalEdges += result.edges.length;
|
|
724
|
-
}
|
|
725
|
-
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
726
|
-
filesErrored++;
|
|
727
|
-
}
|
|
728
|
-
else {
|
|
729
|
-
filesSkipped++;
|
|
730
|
-
}
|
|
731
|
-
}
|
|
732
|
-
}
|
|
733
|
-
// Report 100% so the progress bar doesn't hang at 99%
|
|
734
|
-
onProgress?.({
|
|
735
|
-
phase: 'parsing',
|
|
736
|
-
current: total,
|
|
737
|
-
total,
|
|
738
|
-
});
|
|
739
|
-
// Yield so the shimmer worker's buffered stdout writes can flush.
|
|
740
|
-
// Worker thread stdout is proxied through the main thread's event loop,
|
|
741
|
-
// so synchronous work here blocks the animation from rendering.
|
|
742
|
-
await new Promise(resolve => setImmediate(resolve));
|
|
743
|
-
// Retry pass: files that failed due to WASM memory corruption may succeed
|
|
744
|
-
// on a fresh worker with a clean heap. Recycle before each attempt so
|
|
745
|
-
// every file gets the absolute cleanest WASM state possible.
|
|
746
|
-
const retryableErrors = errors.filter((e) => e.code === 'parse_error' && e.filePath &&
|
|
747
|
-
(e.message.includes('Worker exited') || e.message.includes('memory access out of bounds')));
|
|
748
|
-
if (retryableErrors.length > 0 && WorkerClass) {
|
|
749
|
-
log(`Retrying ${retryableErrors.length} files that failed due to WASM memory errors...`);
|
|
750
|
-
const stillFailing = [];
|
|
751
|
-
for (const errEntry of retryableErrors) {
|
|
752
|
-
const filePath = errEntry.filePath;
|
|
753
|
-
if (signal?.aborted)
|
|
754
|
-
break;
|
|
755
|
-
// Fresh worker for every retry — maximum WASM headroom
|
|
756
|
-
recycleWorker();
|
|
757
|
-
let content;
|
|
758
|
-
try {
|
|
759
|
-
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
760
|
-
if (!fullPath)
|
|
761
|
-
continue;
|
|
762
|
-
content = await fsp.readFile(fullPath, 'utf-8');
|
|
763
|
-
}
|
|
764
|
-
catch {
|
|
765
|
-
continue;
|
|
766
|
-
}
|
|
767
|
-
let result;
|
|
768
|
-
try {
|
|
769
|
-
result = await requestParse(filePath, content);
|
|
770
|
-
}
|
|
771
|
-
catch {
|
|
772
|
-
stillFailing.push(errEntry);
|
|
773
|
-
continue;
|
|
774
|
-
}
|
|
775
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
776
|
-
const language = (0, grammars_1.detectLanguage)(filePath, content);
|
|
777
|
-
const stats = await fsp.stat(path.join(this.rootDir, filePath));
|
|
778
|
-
this.storeExtractionResult(filePath, content, language, stats, result);
|
|
779
|
-
const idx = errors.indexOf(errEntry);
|
|
780
|
-
if (idx >= 0)
|
|
781
|
-
errors.splice(idx, 1);
|
|
782
|
-
filesErrored--;
|
|
783
|
-
filesIndexed++;
|
|
784
|
-
totalNodes += result.nodes.length;
|
|
785
|
-
totalEdges += result.edges.length;
|
|
786
|
-
log(`Retry OK: ${filePath} (${result.nodes.length} nodes)`);
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
// Last resort: for files that still crash on a clean worker, strip
|
|
790
|
-
// comment-only lines to reduce WASM memory pressure. Many compiler
|
|
791
|
-
// test files are 90%+ comments (CHECK directives) that don't contribute
|
|
792
|
-
// code nodes but consume parser memory.
|
|
793
|
-
if (stillFailing.length > 0) {
|
|
794
|
-
log(`${stillFailing.length} files still failing — retrying with comments stripped...`);
|
|
795
|
-
for (const errEntry of stillFailing) {
|
|
796
|
-
const filePath = errEntry.filePath;
|
|
797
|
-
if (signal?.aborted)
|
|
798
|
-
break;
|
|
799
|
-
recycleWorker();
|
|
800
|
-
let fullContent;
|
|
801
|
-
try {
|
|
802
|
-
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, filePath);
|
|
803
|
-
if (!fullPath)
|
|
804
|
-
continue;
|
|
805
|
-
fullContent = await fsp.readFile(fullPath, 'utf-8');
|
|
806
|
-
}
|
|
807
|
-
catch {
|
|
808
|
-
continue;
|
|
809
|
-
}
|
|
810
|
-
// Strip lines that are entirely comments (preserving line numbers
|
|
811
|
-
// by replacing with empty lines so node positions stay correct)
|
|
812
|
-
const stripped = fullContent
|
|
813
|
-
.split('\n')
|
|
814
|
-
.map(line => /^\s*\/\//.test(line) ? '' : line)
|
|
815
|
-
.join('\n');
|
|
816
|
-
let result;
|
|
817
|
-
try {
|
|
818
|
-
result = await requestParse(filePath, stripped);
|
|
819
|
-
}
|
|
820
|
-
catch {
|
|
821
|
-
continue;
|
|
822
|
-
}
|
|
823
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
824
|
-
const language = (0, grammars_1.detectLanguage)(filePath, fullContent);
|
|
825
|
-
const stats = await fsp.stat(path.join(this.rootDir, filePath));
|
|
826
|
-
this.storeExtractionResult(filePath, fullContent, language, stats, result);
|
|
827
|
-
const idx = errors.indexOf(errEntry);
|
|
828
|
-
if (idx >= 0)
|
|
829
|
-
errors.splice(idx, 1);
|
|
830
|
-
filesErrored--;
|
|
831
|
-
filesIndexed++;
|
|
832
|
-
totalNodes += result.nodes.length;
|
|
833
|
-
totalEdges += result.edges.length;
|
|
834
|
-
log(`Retry (stripped) OK: ${filePath} (${result.nodes.length} nodes)`);
|
|
835
|
-
}
|
|
836
|
-
}
|
|
837
|
-
}
|
|
838
|
-
}
|
|
839
|
-
// Shut down parse worker and clear any pending timers
|
|
840
|
-
rejectAllPending('Indexing complete');
|
|
841
|
-
if (parseWorker) {
|
|
842
|
-
parseWorker.terminate().catch(() => { });
|
|
843
|
-
}
|
|
844
|
-
return {
|
|
845
|
-
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
846
|
-
filesIndexed,
|
|
847
|
-
filesSkipped,
|
|
848
|
-
filesErrored,
|
|
849
|
-
nodesCreated: totalNodes,
|
|
850
|
-
edgesCreated: totalEdges,
|
|
851
|
-
errors,
|
|
852
|
-
durationMs: Date.now() - startTime,
|
|
853
|
-
};
|
|
854
|
-
}
|
|
855
|
-
/**
|
|
856
|
-
* Index specific files
|
|
857
|
-
*/
|
|
858
|
-
async indexFiles(filePaths) {
|
|
859
|
-
const startTime = Date.now();
|
|
860
|
-
const errors = [];
|
|
861
|
-
let filesIndexed = 0;
|
|
862
|
-
let filesSkipped = 0;
|
|
863
|
-
let filesErrored = 0;
|
|
864
|
-
let totalNodes = 0;
|
|
865
|
-
let totalEdges = 0;
|
|
866
|
-
for (const filePath of filePaths) {
|
|
867
|
-
const result = await this.indexFile(filePath);
|
|
868
|
-
if (result.errors.length > 0) {
|
|
869
|
-
errors.push(...result.errors);
|
|
870
|
-
}
|
|
871
|
-
if (result.nodes.length > 0) {
|
|
872
|
-
filesIndexed++;
|
|
873
|
-
totalNodes += result.nodes.length;
|
|
874
|
-
totalEdges += result.edges.length;
|
|
875
|
-
}
|
|
876
|
-
else if (result.errors.some((e) => e.severity === 'error')) {
|
|
877
|
-
filesErrored++;
|
|
878
|
-
}
|
|
879
|
-
else {
|
|
880
|
-
filesSkipped++;
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
return {
|
|
884
|
-
success: filesIndexed > 0 || errors.filter((e) => e.severity === 'error').length === 0,
|
|
885
|
-
filesIndexed,
|
|
886
|
-
filesSkipped,
|
|
887
|
-
filesErrored,
|
|
888
|
-
nodesCreated: totalNodes,
|
|
889
|
-
edgesCreated: totalEdges,
|
|
890
|
-
errors,
|
|
891
|
-
durationMs: Date.now() - startTime,
|
|
892
|
-
};
|
|
893
|
-
}
|
|
894
|
-
/**
|
|
895
|
-
* Index a single file
|
|
896
|
-
*/
|
|
897
|
-
async indexFile(relativePath) {
|
|
898
|
-
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath);
|
|
899
|
-
if (!fullPath) {
|
|
900
|
-
return {
|
|
901
|
-
nodes: [],
|
|
902
|
-
edges: [],
|
|
903
|
-
unresolvedReferences: [],
|
|
904
|
-
errors: [{ message: `Path traversal blocked: ${relativePath}`, filePath: relativePath, severity: 'error', code: 'path_traversal' }],
|
|
905
|
-
durationMs: 0,
|
|
906
|
-
};
|
|
907
|
-
}
|
|
908
|
-
// Read file content and stats
|
|
909
|
-
let content;
|
|
910
|
-
let stats;
|
|
911
|
-
try {
|
|
912
|
-
stats = await fsp.stat(fullPath);
|
|
913
|
-
content = await fsp.readFile(fullPath, 'utf-8');
|
|
914
|
-
}
|
|
915
|
-
catch (error) {
|
|
916
|
-
return {
|
|
917
|
-
nodes: [],
|
|
918
|
-
edges: [],
|
|
919
|
-
unresolvedReferences: [],
|
|
920
|
-
errors: [
|
|
921
|
-
{
|
|
922
|
-
message: `Failed to read file: ${error instanceof Error ? error.message : String(error)}`,
|
|
923
|
-
filePath: relativePath,
|
|
924
|
-
severity: 'error',
|
|
925
|
-
code: 'read_error',
|
|
926
|
-
},
|
|
927
|
-
],
|
|
928
|
-
durationMs: 0,
|
|
929
|
-
};
|
|
930
|
-
}
|
|
931
|
-
return this.indexFileWithContent(relativePath, content, stats);
|
|
932
|
-
}
|
|
933
|
-
/**
|
|
934
|
-
* Index a single file with pre-read content and stats.
|
|
935
|
-
* Used by the parallel batch reader to avoid redundant file I/O.
|
|
936
|
-
*/
|
|
937
|
-
async indexFileWithContent(relativePath, content, stats) {
|
|
938
|
-
// Prevent path traversal
|
|
939
|
-
const fullPath = (0, utils_1.validatePathWithinRoot)(this.rootDir, relativePath);
|
|
940
|
-
if (!fullPath) {
|
|
941
|
-
(0, errors_1.logWarn)('Path traversal blocked in indexFileWithContent', { relativePath });
|
|
942
|
-
return {
|
|
943
|
-
nodes: [],
|
|
944
|
-
edges: [],
|
|
945
|
-
unresolvedReferences: [],
|
|
946
|
-
errors: [{ message: 'Path traversal blocked', filePath: relativePath, severity: 'error', code: 'path_traversal' }],
|
|
947
|
-
durationMs: 0,
|
|
948
|
-
};
|
|
949
|
-
}
|
|
950
|
-
// Check file size
|
|
951
|
-
if (stats.size > this.config.maxFileSize) {
|
|
952
|
-
return {
|
|
953
|
-
nodes: [],
|
|
954
|
-
edges: [],
|
|
955
|
-
unresolvedReferences: [],
|
|
956
|
-
errors: [
|
|
957
|
-
{
|
|
958
|
-
message: `File exceeds max size (${stats.size} > ${this.config.maxFileSize})`,
|
|
959
|
-
filePath: relativePath,
|
|
960
|
-
severity: 'warning',
|
|
961
|
-
code: 'size_exceeded',
|
|
962
|
-
},
|
|
963
|
-
],
|
|
964
|
-
durationMs: 0,
|
|
965
|
-
};
|
|
966
|
-
}
|
|
967
|
-
// Detect language
|
|
968
|
-
const language = (0, grammars_1.detectLanguage)(relativePath, content);
|
|
969
|
-
if (!(0, grammars_1.isLanguageSupported)(language)) {
|
|
970
|
-
return {
|
|
971
|
-
nodes: [],
|
|
972
|
-
edges: [],
|
|
973
|
-
unresolvedReferences: [],
|
|
974
|
-
errors: [],
|
|
975
|
-
durationMs: 0,
|
|
976
|
-
};
|
|
977
|
-
}
|
|
978
|
-
// Extract from source. Use cached framework names if indexAll has run,
|
|
979
|
-
// otherwise detect on the spot so single-file re-index paths still emit
|
|
980
|
-
// route nodes / middleware / etc.
|
|
981
|
-
const frameworkNames = this.ensureDetectedFrameworks();
|
|
982
|
-
const result = (0, tree_sitter_1.extractFromSource)(relativePath, content, language, frameworkNames);
|
|
983
|
-
// Store in database
|
|
984
|
-
if (result.nodes.length > 0 || result.errors.length === 0) {
|
|
985
|
-
this.storeExtractionResult(relativePath, content, language, stats, result);
|
|
986
|
-
}
|
|
987
|
-
return result;
|
|
988
|
-
}
|
|
989
|
-
/**
|
|
990
|
-
* Store extraction result in database
|
|
991
|
-
*/
|
|
992
|
-
storeExtractionResult(filePath, content, language, stats, result) {
|
|
993
|
-
const contentHash = hashContent(content);
|
|
994
|
-
// Check if file already exists and hasn't changed
|
|
995
|
-
const existingFile = this.queries.getFileByPath(filePath);
|
|
996
|
-
if (existingFile && existingFile.contentHash === contentHash) {
|
|
997
|
-
return; // No changes
|
|
998
|
-
}
|
|
999
|
-
// Delete existing data for this file
|
|
1000
|
-
if (existingFile) {
|
|
1001
|
-
this.queries.deleteFile(filePath);
|
|
1002
|
-
}
|
|
1003
|
-
// Filter out nodes with missing required fields before insertion.
|
|
1004
|
-
// This prevents FK violations when edges reference nodes that would
|
|
1005
|
-
// be silently skipped by insertNode() (see issue #42).
|
|
1006
|
-
const validNodes = result.nodes.filter((n) => n.id && n.kind && n.name && n.filePath && n.language);
|
|
1007
|
-
// Insert nodes
|
|
1008
|
-
if (validNodes.length > 0) {
|
|
1009
|
-
this.queries.insertNodes(validNodes);
|
|
1010
|
-
}
|
|
1011
|
-
// Filter edges to only reference nodes that were actually inserted
|
|
1012
|
-
if (result.edges.length > 0) {
|
|
1013
|
-
const insertedIds = new Set(validNodes.map((n) => n.id));
|
|
1014
|
-
const validEdges = result.edges.filter((e) => insertedIds.has(e.source) && insertedIds.has(e.target));
|
|
1015
|
-
if (validEdges.length > 0) {
|
|
1016
|
-
this.queries.insertEdges(validEdges);
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
// Insert unresolved references in batch with denormalized filePath/language
|
|
1020
|
-
if (result.unresolvedReferences.length > 0) {
|
|
1021
|
-
const insertedIds = new Set(validNodes.map((n) => n.id));
|
|
1022
|
-
const refsWithContext = result.unresolvedReferences
|
|
1023
|
-
.filter((ref) => insertedIds.has(ref.fromNodeId))
|
|
1024
|
-
.map((ref) => ({
|
|
1025
|
-
...ref,
|
|
1026
|
-
filePath: ref.filePath ?? filePath,
|
|
1027
|
-
language: ref.language ?? language,
|
|
1028
|
-
}));
|
|
1029
|
-
if (refsWithContext.length > 0) {
|
|
1030
|
-
this.queries.insertUnresolvedRefsBatch(refsWithContext);
|
|
1031
|
-
}
|
|
1032
|
-
}
|
|
1033
|
-
// Insert file record
|
|
1034
|
-
const fileRecord = {
|
|
1035
|
-
path: filePath,
|
|
1036
|
-
contentHash,
|
|
1037
|
-
language,
|
|
1038
|
-
size: stats.size,
|
|
1039
|
-
modifiedAt: stats.mtimeMs,
|
|
1040
|
-
indexedAt: Date.now(),
|
|
1041
|
-
nodeCount: result.nodes.length,
|
|
1042
|
-
errors: result.errors.length > 0 ? result.errors : undefined,
|
|
1043
|
-
};
|
|
1044
|
-
this.queries.upsertFile(fileRecord);
|
|
1045
|
-
}
|
|
1046
|
-
/**
|
|
1047
|
-
* Sync with current file state.
|
|
1048
|
-
* Uses git status as a fast path when available, falling back to full scan.
|
|
1049
|
-
*/
|
|
1050
|
-
async sync(onProgress) {
|
|
1051
|
-
await (0, grammars_1.initGrammars)(); // Initialize WASM runtime (grammars loaded lazily below)
|
|
1052
|
-
const startTime = Date.now();
|
|
1053
|
-
let filesChecked = 0;
|
|
1054
|
-
let filesAdded = 0;
|
|
1055
|
-
let filesModified = 0;
|
|
1056
|
-
let filesRemoved = 0;
|
|
1057
|
-
let nodesUpdated = 0;
|
|
1058
|
-
const changedFilePaths = [];
|
|
1059
|
-
onProgress?.({
|
|
1060
|
-
phase: 'scanning',
|
|
1061
|
-
current: 0,
|
|
1062
|
-
total: 0,
|
|
1063
|
-
});
|
|
1064
|
-
const filesToIndex = [];
|
|
1065
|
-
const gitChanges = getGitChangedFiles(this.rootDir, this.config);
|
|
1066
|
-
if (gitChanges) {
|
|
1067
|
-
// === Git fast path ===
|
|
1068
|
-
// Only inspect the files git reports as changed instead of scanning everything.
|
|
1069
|
-
filesChecked = gitChanges.modified.length + gitChanges.added.length + gitChanges.deleted.length;
|
|
1070
|
-
// Handle deleted files
|
|
1071
|
-
for (const filePath of gitChanges.deleted) {
|
|
1072
|
-
const tracked = this.queries.getFileByPath(filePath);
|
|
1073
|
-
if (tracked) {
|
|
1074
|
-
this.queries.deleteFile(filePath);
|
|
1075
|
-
filesRemoved++;
|
|
1076
|
-
}
|
|
1077
|
-
}
|
|
1078
|
-
// Handle modified files — read + hash only these files
|
|
1079
|
-
for (const filePath of gitChanges.modified) {
|
|
1080
|
-
const fullPath = path.join(this.rootDir, filePath);
|
|
1081
|
-
let content;
|
|
1082
|
-
try {
|
|
1083
|
-
content = fs.readFileSync(fullPath, 'utf-8');
|
|
1084
|
-
}
|
|
1085
|
-
catch (error) {
|
|
1086
|
-
(0, errors_1.logDebug)('Skipping unreadable file during sync', { filePath, error: String(error) });
|
|
1087
|
-
continue;
|
|
1088
|
-
}
|
|
1089
|
-
const contentHash = hashContent(content);
|
|
1090
|
-
const tracked = this.queries.getFileByPath(filePath);
|
|
1091
|
-
if (!tracked) {
|
|
1092
|
-
filesToIndex.push(filePath);
|
|
1093
|
-
changedFilePaths.push(filePath);
|
|
1094
|
-
filesAdded++;
|
|
1095
|
-
}
|
|
1096
|
-
else if (tracked.contentHash !== contentHash) {
|
|
1097
|
-
filesToIndex.push(filePath);
|
|
1098
|
-
changedFilePaths.push(filePath);
|
|
1099
|
-
filesModified++;
|
|
1100
|
-
}
|
|
1101
|
-
}
|
|
1102
|
-
// Handle added (untracked) files
|
|
1103
|
-
for (const filePath of gitChanges.added) {
|
|
1104
|
-
filesToIndex.push(filePath);
|
|
1105
|
-
changedFilePaths.push(filePath);
|
|
1106
|
-
filesAdded++;
|
|
1107
|
-
}
|
|
1108
|
-
}
|
|
1109
|
-
else {
|
|
1110
|
-
// === Fallback: full scan (non-git project or git failure) ===
|
|
1111
|
-
const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
|
|
1112
|
-
filesChecked = currentFiles.size;
|
|
1113
|
-
// Build Map for O(1) lookups instead of .find() per file
|
|
1114
|
-
const trackedFiles = this.queries.getAllFiles();
|
|
1115
|
-
const trackedMap = new Map();
|
|
1116
|
-
for (const f of trackedFiles) {
|
|
1117
|
-
trackedMap.set(f.path, f);
|
|
1118
|
-
}
|
|
1119
|
-
// Find files to remove (in DB but not on disk)
|
|
1120
|
-
for (const tracked of trackedFiles) {
|
|
1121
|
-
if (!currentFiles.has(tracked.path)) {
|
|
1122
|
-
this.queries.deleteFile(tracked.path);
|
|
1123
|
-
filesRemoved++;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
// Find files to add or update
|
|
1127
|
-
for (const filePath of currentFiles) {
|
|
1128
|
-
const fullPath = path.join(this.rootDir, filePath);
|
|
1129
|
-
let content;
|
|
1130
|
-
try {
|
|
1131
|
-
content = fs.readFileSync(fullPath, 'utf-8');
|
|
1132
|
-
}
|
|
1133
|
-
catch (error) {
|
|
1134
|
-
(0, errors_1.logDebug)('Skipping unreadable file during sync', { filePath, error: String(error) });
|
|
1135
|
-
continue;
|
|
1136
|
-
}
|
|
1137
|
-
const contentHash = hashContent(content);
|
|
1138
|
-
const tracked = trackedMap.get(filePath);
|
|
1139
|
-
if (!tracked) {
|
|
1140
|
-
filesToIndex.push(filePath);
|
|
1141
|
-
changedFilePaths.push(filePath);
|
|
1142
|
-
filesAdded++;
|
|
1143
|
-
}
|
|
1144
|
-
else if (tracked.contentHash !== contentHash) {
|
|
1145
|
-
filesToIndex.push(filePath);
|
|
1146
|
-
changedFilePaths.push(filePath);
|
|
1147
|
-
filesModified++;
|
|
1148
|
-
}
|
|
1149
|
-
}
|
|
1150
|
-
}
|
|
1151
|
-
// Load only grammars needed for changed files
|
|
1152
|
-
if (filesToIndex.length > 0) {
|
|
1153
|
-
const neededLanguages = [...new Set(filesToIndex.map((f) => (0, grammars_1.detectLanguage)(f)))];
|
|
1154
|
-
// .h files default to 'c' but may be C++ — ensure cpp grammar is loaded
|
|
1155
|
-
if (neededLanguages.includes('c') && !neededLanguages.includes('cpp')) {
|
|
1156
|
-
neededLanguages.push('cpp');
|
|
1157
|
-
}
|
|
1158
|
-
await (0, grammars_1.loadGrammarsForLanguages)(neededLanguages);
|
|
1159
|
-
}
|
|
1160
|
-
// Index changed files
|
|
1161
|
-
const total = filesToIndex.length;
|
|
1162
|
-
for (let i = 0; i < filesToIndex.length; i++) {
|
|
1163
|
-
const filePath = filesToIndex[i];
|
|
1164
|
-
onProgress?.({
|
|
1165
|
-
phase: 'parsing',
|
|
1166
|
-
current: i + 1,
|
|
1167
|
-
total,
|
|
1168
|
-
currentFile: filePath,
|
|
1169
|
-
});
|
|
1170
|
-
const result = await this.indexFile(filePath);
|
|
1171
|
-
nodesUpdated += result.nodes.length;
|
|
1172
|
-
}
|
|
1173
|
-
return {
|
|
1174
|
-
filesChecked,
|
|
1175
|
-
filesAdded,
|
|
1176
|
-
filesModified,
|
|
1177
|
-
filesRemoved,
|
|
1178
|
-
nodesUpdated,
|
|
1179
|
-
durationMs: Date.now() - startTime,
|
|
1180
|
-
changedFilePaths: changedFilePaths.length > 0 ? changedFilePaths : undefined,
|
|
1181
|
-
};
|
|
1182
|
-
}
|
|
1183
|
-
/**
|
|
1184
|
-
* Get files that have changed since last index.
|
|
1185
|
-
* Uses git status as a fast path when available, falling back to full scan.
|
|
1186
|
-
*/
|
|
1187
|
-
getChangedFiles() {
|
|
1188
|
-
const gitChanges = getGitChangedFiles(this.rootDir, this.config);
|
|
1189
|
-
if (gitChanges) {
|
|
1190
|
-
// === Git fast path ===
|
|
1191
|
-
const added = [];
|
|
1192
|
-
const modified = [];
|
|
1193
|
-
const removed = [];
|
|
1194
|
-
// Deleted files — only report if tracked in DB
|
|
1195
|
-
for (const filePath of gitChanges.deleted) {
|
|
1196
|
-
const tracked = this.queries.getFileByPath(filePath);
|
|
1197
|
-
if (tracked) {
|
|
1198
|
-
removed.push(filePath);
|
|
1199
|
-
}
|
|
1200
|
-
}
|
|
1201
|
-
// Modified files — read + hash only these, compare with DB
|
|
1202
|
-
for (const filePath of gitChanges.modified) {
|
|
1203
|
-
const fullPath = path.join(this.rootDir, filePath);
|
|
1204
|
-
let content;
|
|
1205
|
-
try {
|
|
1206
|
-
content = fs.readFileSync(fullPath, 'utf-8');
|
|
1207
|
-
}
|
|
1208
|
-
catch (error) {
|
|
1209
|
-
(0, errors_1.logDebug)('Skipping unreadable file while detecting changes', { filePath, error: String(error) });
|
|
1210
|
-
continue;
|
|
1211
|
-
}
|
|
1212
|
-
const contentHash = hashContent(content);
|
|
1213
|
-
const tracked = this.queries.getFileByPath(filePath);
|
|
1214
|
-
if (!tracked) {
|
|
1215
|
-
added.push(filePath);
|
|
1216
|
-
}
|
|
1217
|
-
else if (tracked.contentHash !== contentHash) {
|
|
1218
|
-
modified.push(filePath);
|
|
1219
|
-
}
|
|
1220
|
-
}
|
|
1221
|
-
// Added (untracked) files
|
|
1222
|
-
for (const filePath of gitChanges.added) {
|
|
1223
|
-
added.push(filePath);
|
|
1224
|
-
}
|
|
1225
|
-
return { added, modified, removed };
|
|
1226
|
-
}
|
|
1227
|
-
// === Fallback: full scan (non-git project or git failure) ===
|
|
1228
|
-
const currentFiles = new Set(scanDirectory(this.rootDir, this.config));
|
|
1229
|
-
const trackedFiles = this.queries.getAllFiles();
|
|
1230
|
-
// Build Map for O(1) lookups
|
|
1231
|
-
const trackedMap = new Map();
|
|
1232
|
-
for (const f of trackedFiles) {
|
|
1233
|
-
trackedMap.set(f.path, f);
|
|
1234
|
-
}
|
|
1235
|
-
const added = [];
|
|
1236
|
-
const modified = [];
|
|
1237
|
-
const removed = [];
|
|
1238
|
-
// Find removed files
|
|
1239
|
-
for (const tracked of trackedFiles) {
|
|
1240
|
-
if (!currentFiles.has(tracked.path)) {
|
|
1241
|
-
removed.push(tracked.path);
|
|
1242
|
-
}
|
|
1243
|
-
}
|
|
1244
|
-
// Find added and modified files
|
|
1245
|
-
for (const filePath of currentFiles) {
|
|
1246
|
-
const fullPath = path.join(this.rootDir, filePath);
|
|
1247
|
-
let content;
|
|
1248
|
-
try {
|
|
1249
|
-
content = fs.readFileSync(fullPath, 'utf-8');
|
|
1250
|
-
}
|
|
1251
|
-
catch (error) {
|
|
1252
|
-
(0, errors_1.logDebug)('Skipping unreadable file while detecting changes', { filePath, error: String(error) });
|
|
1253
|
-
continue;
|
|
1254
|
-
}
|
|
1255
|
-
const contentHash = hashContent(content);
|
|
1256
|
-
const tracked = trackedMap.get(filePath);
|
|
1257
|
-
if (!tracked) {
|
|
1258
|
-
added.push(filePath);
|
|
1259
|
-
}
|
|
1260
|
-
else if (tracked.contentHash !== contentHash) {
|
|
1261
|
-
modified.push(filePath);
|
|
1262
|
-
}
|
|
1263
|
-
}
|
|
1264
|
-
return { added, modified, removed };
|
|
1265
|
-
}
|
|
1266
|
-
}
|
|
1267
|
-
exports.ExtractionOrchestrator = ExtractionOrchestrator;
|
|
1268
|
-
// Re-export useful types and functions
|
|
1269
|
-
var tree_sitter_2 = require("./tree-sitter");
|
|
1270
|
-
Object.defineProperty(exports, "extractFromSource", { enumerable: true, get: function () { return tree_sitter_2.extractFromSource; } });
|
|
1271
|
-
var grammars_2 = require("./grammars");
|
|
1272
|
-
Object.defineProperty(exports, "detectLanguage", { enumerable: true, get: function () { return grammars_2.detectLanguage; } });
|
|
1273
|
-
Object.defineProperty(exports, "isLanguageSupported", { enumerable: true, get: function () { return grammars_2.isLanguageSupported; } });
|
|
1274
|
-
Object.defineProperty(exports, "isGrammarLoaded", { enumerable: true, get: function () { return grammars_2.isGrammarLoaded; } });
|
|
1275
|
-
Object.defineProperty(exports, "getSupportedLanguages", { enumerable: true, get: function () { return grammars_2.getSupportedLanguages; } });
|
|
1276
|
-
Object.defineProperty(exports, "initGrammars", { enumerable: true, get: function () { return grammars_2.initGrammars; } });
|
|
1277
|
-
Object.defineProperty(exports, "loadGrammarsForLanguages", { enumerable: true, get: function () { return grammars_2.loadGrammarsForLanguages; } });
|
|
1278
|
-
Object.defineProperty(exports, "loadAllGrammars", { enumerable: true, get: function () { return grammars_2.loadAllGrammars; } });
|
|
1279
|
-
//# sourceMappingURL=index.js.map
|