gitnexus 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/dist/cli/analyze.js +28 -3
  2. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  3. package/dist/core/group/extractors/fs-utils.js +24 -0
  4. package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
  5. package/dist/core/group/extractors/grpc-extractor.js +313 -191
  6. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  7. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  8. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  9. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  10. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  11. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  12. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  13. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  14. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  15. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  16. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  17. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  18. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  19. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  20. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  21. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  22. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  23. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  24. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  25. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  26. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  27. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  28. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  29. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  30. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  31. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  32. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  33. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  34. package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
  35. package/dist/core/group/extractors/http-route-extractor.js +201 -238
  36. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  37. package/dist/core/group/extractors/manifest-extractor.js +235 -0
  38. package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
  39. package/dist/core/group/extractors/topic-extractor.js +55 -192
  40. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  41. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  42. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  43. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  44. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  45. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  46. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  47. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  48. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  49. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  50. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  51. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  52. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  53. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  54. package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
  55. package/dist/core/ingestion/binding-accumulator.js +29 -25
  56. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  57. package/dist/core/ingestion/import-processor.js +1 -1
  58. package/dist/core/ingestion/language-config.js +1 -1
  59. package/dist/core/ingestion/language-provider.d.ts +8 -0
  60. package/dist/core/ingestion/languages/ruby.js +15 -0
  61. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  62. package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
  63. package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
  64. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  65. package/dist/core/ingestion/method-extractors/generic.js +48 -4
  66. package/dist/core/ingestion/method-types.d.ts +4 -0
  67. package/dist/core/ingestion/model/resolve.js +103 -48
  68. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  69. package/dist/core/ingestion/model/semantic-model.js +1 -1
  70. package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
  71. package/dist/core/ingestion/model/symbol-table.js +7 -7
  72. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  73. package/dist/core/ingestion/mro-processor.js +1 -1
  74. package/dist/core/ingestion/parsing-processor.js +54 -42
  75. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  76. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  77. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  78. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  79. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  80. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  81. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  82. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  83. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  84. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  85. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  86. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  87. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  88. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  89. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  90. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  91. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  92. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  93. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  94. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  95. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  96. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  97. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  98. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  99. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  100. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  101. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  102. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  103. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  104. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  105. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  106. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  107. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  108. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  109. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  110. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  111. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +35 -0
  112. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +174 -0
  113. package/dist/core/ingestion/pipeline.d.ts +16 -10
  114. package/dist/core/ingestion/pipeline.js +66 -1534
  115. package/dist/core/ingestion/process-processor.js +1 -1
  116. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  117. package/dist/core/ingestion/tree-sitter-queries.js +69 -0
  118. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
  119. package/dist/core/ingestion/utils/ast-helpers.js +48 -21
  120. package/dist/core/ingestion/utils/env.d.ts +10 -0
  121. package/dist/core/ingestion/utils/env.js +10 -0
  122. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  123. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  124. package/dist/core/ingestion/workers/parse-worker.js +12 -8
  125. package/dist/core/lbug/lbug-adapter.js +66 -24
  126. package/package.json +3 -3
  127. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  128. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  129. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  130. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  131. package/vendor/tree-sitter-proto/package.json +18 -0
  132. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  133. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  134. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  135. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  136. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
@@ -1,1549 +1,81 @@
1
- import { createKnowledgeGraph } from '../graph/graph.js';
2
- import { BindingAccumulator, enrichExportedTypeMap, } from './binding-accumulator.js';
3
- import { processStructure } from './structure-processor.js';
4
- import { processMarkdown } from './markdown-processor.js';
5
- import { processCobol, isCobolFile, isJclFile } from './cobol-processor.js';
6
- import { processParsing } from './parsing-processor.js';
7
- import { processImports, processImportsFromExtracted, buildImportResolutionContext, } from './import-processor.js';
8
- import { EMPTY_INDEX } from './import-resolvers/utils.js';
9
- import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted, processNextjsFetchRoutes, extractFetchCallsFromFiles, seedCrossFileReceiverTypes, buildImportedReturnTypes, buildImportedRawReturnTypes, buildExportedTypeMapFromGraph, } from './call-processor.js';
10
- import { buildHeritageMap } from './model/heritage-map.js';
11
- import { nextjsFileToRouteURL, normalizeFetchURL } from './route-extractors/nextjs.js';
12
- import { expoFileToRouteURL } from './route-extractors/expo.js';
13
- import { phpFileToRouteURL } from './route-extractors/php.js';
14
- import { extractResponseShapes, extractPHPResponseShapes, } from './route-extractors/response-shapes.js';
15
- import { extractMiddlewareChain, extractNextjsMiddlewareConfig, compileMatcher, compiledMatcherMatchesRoute, } from './route-extractors/middleware.js';
16
- import { generateId } from '../../lib/utils.js';
17
- import { processHeritage, processHeritageFromExtracted, extractExtractedHeritageFromFiles, getHeritageStrategyForLanguage, } from './heritage-processor.js';
18
- import { computeMRO } from './mro-processor.js';
19
- import { processCommunities } from './community-processor.js';
20
- import { processProcesses } from './process-processor.js';
21
- import { createResolutionContext } from './model/resolution-context.js';
22
- import { createASTCache } from './ast-cache.js';
23
- import { getLanguageFromFilename } from '../../_shared/index.js';
24
- import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
25
- import { isLanguageAvailable } from '../tree-sitter/parser-loader.js';
26
- import { providers, getProviderForFile } from './languages/index.js';
27
- import { createWorkerPool } from './workers/worker-pool.js';
28
- import fs from 'node:fs';
29
- import path from 'node:path';
30
- import { fileURLToPath, pathToFileURL } from 'node:url';
31
- const isDev = process.env.NODE_ENV === 'development';
32
- const EXPO_NAV_PATTERNS = [
33
- /router\.(push|replace|navigate)\(\s*['"`]([^'"`]+)['"`]/g,
34
- /<Link\s+[^>]*href=\s*['"`]([^'"`]+)['"`]/g,
35
- ];
36
- /** Kahn's algorithm: returns files grouped by topological level.
37
- * Files in the same level have no mutual dependencies — safe to process in parallel.
38
- * Files in cycles are returned as a final group (no cross-cycle propagation). */
39
- export function topologicalLevelSort(importMap) {
40
- // Build in-degree map and reverse dependency map
41
- const inDegree = new Map();
42
- const reverseDeps = new Map();
43
- for (const [file, deps] of importMap) {
44
- if (!inDegree.has(file))
45
- inDegree.set(file, 0);
46
- for (const dep of deps) {
47
- if (!inDegree.has(dep))
48
- inDegree.set(dep, 0);
49
- // file imports dep, so dep must be processed before file
50
- // In Kahn's terms: dep → file (dep is a prerequisite of file)
51
- inDegree.set(file, (inDegree.get(file) ?? 0) + 1);
52
- let rev = reverseDeps.get(dep);
53
- if (!rev) {
54
- rev = [];
55
- reverseDeps.set(dep, rev);
56
- }
57
- rev.push(file);
58
- }
59
- }
60
- // BFS from zero-in-degree nodes, grouping by level
61
- const levels = [];
62
- let currentLevel = [...inDegree.entries()].filter(([, d]) => d === 0).map(([f]) => f);
63
- while (currentLevel.length > 0) {
64
- levels.push(currentLevel);
65
- const nextLevel = [];
66
- for (const file of currentLevel) {
67
- for (const dependent of reverseDeps.get(file) ?? []) {
68
- const newDeg = (inDegree.get(dependent) ?? 1) - 1;
69
- inDegree.set(dependent, newDeg);
70
- if (newDeg === 0)
71
- nextLevel.push(dependent);
72
- }
73
- }
74
- currentLevel = nextLevel;
75
- }
76
- // Files still with positive in-degree are in cycles — add as final group
77
- const cycleFiles = [...inDegree.entries()].filter(([, d]) => d > 0).map(([f]) => f);
78
- if (cycleFiles.length > 0) {
79
- levels.push(cycleFiles);
80
- }
81
- return { levels, cycleCount: cycleFiles.length };
82
- }
83
- /** Max bytes of source content to load per parse chunk. Each chunk's source +
84
- * parsed ASTs + extracted records + worker serialization overhead all live in
85
- * memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
86
- * peak working memory per chunk after parse expansion. */
87
- const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
88
- /** Max AST trees to keep in LRU cache */
89
- const AST_CACHE_CAP = 50;
90
- /** Minimum percentage of files that must benefit from cross-file seeding to justify the re-resolution pass. */
91
- const CROSS_FILE_SKIP_THRESHOLD = 0.03;
92
- /** Hard cap on files re-processed during cross-file propagation. */
93
- const MAX_CROSS_FILE_REPROCESS = 2000;
94
- /** Node labels that represent top-level importable symbols.
95
- * Excludes Method, Property, Constructor (accessed via receiver, not directly imported),
96
- * and structural labels (File, Folder, Package, Module, Project, etc.). */
97
- const IMPORTABLE_SYMBOL_LABELS = new Set([
98
- 'Function',
99
- 'Class',
100
- 'Interface',
101
- 'Struct',
102
- 'Enum',
103
- 'Trait',
104
- 'TypeAlias',
105
- 'Const',
106
- 'Static',
107
- 'Record',
108
- 'Union',
109
- 'Typedef',
110
- 'Macro',
111
- ]);
112
- /** Max synthetic bindings per importing file — prevents memory bloat for
113
- * C/C++ files that include many large headers. */
114
- const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
115
- /** Pre-computed language sets derived from providers at module load. */
116
- const WILDCARD_LANGUAGES = new Set(Object.values(providers)
117
- .filter((p) => p.importSemantics === 'wildcard')
118
- .map((p) => p.id));
119
- const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
120
- .filter((p) => p.importSemantics !== 'named')
121
- .map((p) => p.id));
122
- /** Check if a language uses wildcard (whole-module) import semantics.
123
- * Derived from LanguageProvider.importSemantics — no hardcoded set needed. */
124
- function isWildcardImportLanguage(lang) {
125
- return WILDCARD_LANGUAGES.has(lang);
126
- }
127
- /** Check if a language needs synthesis before call resolution.
128
- * True for wildcard-import languages AND namespace-import languages (Python). */
129
- function needsSynthesis(lang) {
130
- return SYNTHESIS_LANGUAGES.has(lang);
131
- }
132
- /** Synthesize namedImportMap entries for languages with whole-module imports.
133
- * These languages (Go, Ruby, C/C++, Swift, Python) import all exported symbols from a
134
- * file, not specific named symbols. After parsing, we know which symbols each file
135
- * exports (via graph isExported), so we can expand ImportMap edges into per-symbol
136
- * bindings that Phase 14 can use for cross-file type propagation. */
137
- function synthesizeWildcardImportBindings(graph, ctx) {
138
- // Pre-compute exported symbols per file from graph (single pass)
139
- const exportedSymbolsByFile = new Map();
140
- graph.forEachNode((node) => {
141
- if (!node.properties?.isExported)
142
- return;
143
- if (!IMPORTABLE_SYMBOL_LABELS.has(node.label))
144
- return;
145
- const fp = node.properties.filePath;
146
- const name = node.properties.name;
147
- if (!fp || !name)
148
- return;
149
- let symbols = exportedSymbolsByFile.get(fp);
150
- if (!symbols) {
151
- symbols = [];
152
- exportedSymbolsByFile.set(fp, symbols);
153
- }
154
- symbols.push({ name, filePath: fp });
155
- });
156
- if (exportedSymbolsByFile.size === 0)
157
- return 0;
158
- // Build a merged import map: ctx.importMap has file-based imports (Ruby, C/C++),
159
- // but Go/C# package imports use graph IMPORTS edges + PackageMap instead.
160
- // Collect graph-level IMPORTS edges for wildcard languages missing from ctx.importMap.
161
- const FILE_PREFIX = 'File:';
162
- const graphImports = new Map();
163
- graph.forEachRelationship((rel) => {
164
- if (rel.type !== 'IMPORTS')
165
- return;
166
- if (!rel.sourceId.startsWith(FILE_PREFIX) || !rel.targetId.startsWith(FILE_PREFIX))
167
- return;
168
- const srcFile = rel.sourceId.slice(FILE_PREFIX.length);
169
- const tgtFile = rel.targetId.slice(FILE_PREFIX.length);
170
- const lang = getLanguageFromFilename(srcFile);
171
- if (!lang || !isWildcardImportLanguage(lang))
172
- return;
173
- // Only add if not already in ctx.importMap (avoid duplicates)
174
- if (ctx.importMap.get(srcFile)?.has(tgtFile))
175
- return;
176
- let set = graphImports.get(srcFile);
177
- if (!set) {
178
- set = new Set();
179
- graphImports.set(srcFile, set);
180
- }
181
- set.add(tgtFile);
182
- });
183
- let totalSynthesized = 0;
184
- // Helper: synthesize bindings for a file given its imported files
185
- const synthesizeForFile = (filePath, importedFiles) => {
186
- let fileBindings = ctx.namedImportMap.get(filePath);
187
- let fileCount = fileBindings?.size ?? 0;
188
- for (const importedFile of importedFiles) {
189
- const exportedSymbols = exportedSymbolsByFile.get(importedFile);
190
- if (!exportedSymbols)
191
- continue;
192
- for (const sym of exportedSymbols) {
193
- if (fileCount >= MAX_SYNTHETIC_BINDINGS_PER_FILE)
194
- return;
195
- if (fileBindings?.has(sym.name))
196
- continue;
197
- if (!fileBindings) {
198
- fileBindings = new Map();
199
- ctx.namedImportMap.set(filePath, fileBindings);
200
- }
201
- fileBindings.set(sym.name, {
202
- sourcePath: importedFile,
203
- exportedName: sym.name,
204
- });
205
- fileCount++;
206
- totalSynthesized++;
207
- }
208
- }
209
- };
210
- // Process files from ctx.importMap (Ruby, C/C++, Swift file-based imports)
211
- for (const [filePath, importedFiles] of ctx.importMap) {
212
- const lang = getLanguageFromFilename(filePath);
213
- if (!lang || !isWildcardImportLanguage(lang))
214
- continue;
215
- synthesizeForFile(filePath, importedFiles);
216
- }
217
- // Process files from graph IMPORTS edges (Go and other wildcard-import languages)
218
- for (const [filePath, importedFiles] of graphImports) {
219
- synthesizeForFile(filePath, importedFiles);
220
- }
221
- // Build module alias map for Python namespace imports.
222
- // `import models` in app.py → ctx.moduleAliasMap['app.py']['models'] = 'models.py'
223
- // Enables `models.User()` to resolve to models.py:User without ambiguous symbol expansion.
224
- const buildPythonModuleAliasForFile = (callerFile, importedFiles) => {
225
- let aliasMap = ctx.moduleAliasMap.get(callerFile);
226
- for (const importedFile of importedFiles) {
227
- // Derive the module alias from the imported filename stem (e.g. "models.py" → "models")
228
- const lastSlash = importedFile.lastIndexOf('/');
229
- const base = lastSlash >= 0 ? importedFile.slice(lastSlash + 1) : importedFile;
230
- const dot = base.lastIndexOf('.');
231
- const stem = dot >= 0 ? base.slice(0, dot) : base;
232
- if (!stem)
233
- continue;
234
- if (!aliasMap) {
235
- aliasMap = new Map();
236
- ctx.moduleAliasMap.set(callerFile, aliasMap);
237
- }
238
- aliasMap.set(stem, importedFile);
239
- }
240
- };
241
- for (const [filePath, importedFiles] of ctx.importMap) {
242
- const provider = getProviderForFile(filePath);
243
- if (!provider || provider.importSemantics !== 'namespace')
244
- continue;
245
- buildPythonModuleAliasForFile(filePath, importedFiles);
246
- }
247
- return totalSynthesized;
248
- }
249
- /** Phase 14: Cross-file binding propagation.
250
- * Seeds downstream files with resolved type bindings from upstream exports.
251
- * Files are processed in topological import order so upstream bindings are
252
- * available when downstream files are re-resolved. */
253
- async function runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress) {
254
- // For the worker path, buildTypeEnv runs inside workers without SymbolTable,
255
- // so exported bindings must be collected from graph + SymbolTable in main thread.
256
- if (exportedTypeMap.size === 0 && graph.nodeCount > 0) {
257
- const graphExports = buildExportedTypeMapFromGraph(graph, ctx.model.symbols);
258
- for (const [fp, exports] of graphExports)
259
- exportedTypeMap.set(fp, exports);
260
- }
261
- if (exportedTypeMap.size === 0 || ctx.namedImportMap.size === 0)
262
- return;
263
- const allPathSet = new Set(allPaths);
264
- const { levels, cycleCount } = topologicalLevelSort(ctx.importMap);
265
- // Cycle diagnostic: only log when actual cycles detected (cycleCount from Kahn's BFS)
266
- if (isDev && cycleCount > 0) {
267
- console.log(`🔄 ${cycleCount} files in import cycles (skipped for cross-file propagation)`);
268
- }
269
- // Quick count of files with cross-file binding gaps (early exit once threshold exceeded)
270
- let filesWithGaps = 0;
271
- const gapThreshold = Math.max(1, Math.ceil(totalFiles * CROSS_FILE_SKIP_THRESHOLD));
272
- outer: for (const level of levels) {
273
- for (const filePath of level) {
274
- const imports = ctx.namedImportMap.get(filePath);
275
- if (!imports)
276
- continue;
277
- for (const [, binding] of imports) {
278
- const upstream = exportedTypeMap.get(binding.sourcePath);
279
- if (upstream?.has(binding.exportedName)) {
280
- filesWithGaps++;
281
- break;
282
- }
283
- const def = ctx.model.symbols.lookupExactFull(binding.sourcePath, binding.exportedName);
284
- if (def?.returnType) {
285
- filesWithGaps++;
286
- break;
287
- }
288
- }
289
- if (filesWithGaps >= gapThreshold)
290
- break outer;
291
- }
292
- }
293
- const gapRatio = totalFiles > 0 ? filesWithGaps / totalFiles : 0;
294
- if (gapRatio < CROSS_FILE_SKIP_THRESHOLD && filesWithGaps < gapThreshold) {
295
- if (isDev) {
296
- console.log(`⏭️ Cross-file re-resolution skipped (${filesWithGaps}/${totalFiles} files, ${(gapRatio * 100).toFixed(1)}% < ${CROSS_FILE_SKIP_THRESHOLD * 100}% threshold)`);
297
- }
298
- return;
299
- }
300
- onProgress({
301
- phase: 'parsing',
302
- percent: 82,
303
- message: `Cross-file type propagation (${filesWithGaps}+ files)...`,
304
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
305
- });
306
- let crossFileResolved = 0;
307
- const crossFileStart = Date.now();
308
- const astCache = createASTCache(AST_CACHE_CAP);
309
- for (const level of levels) {
310
- const levelCandidates = [];
311
- for (const filePath of level) {
312
- if (crossFileResolved + levelCandidates.length >= MAX_CROSS_FILE_REPROCESS)
313
- break;
314
- const imports = ctx.namedImportMap.get(filePath);
315
- if (!imports)
316
- continue;
317
- const seeded = new Map();
318
- for (const [localName, binding] of imports) {
319
- const upstream = exportedTypeMap.get(binding.sourcePath);
320
- if (upstream) {
321
- const type = upstream.get(binding.exportedName);
322
- if (type)
323
- seeded.set(localName, type);
324
- }
325
- }
326
- const importedReturns = buildImportedReturnTypes(filePath, ctx.namedImportMap, ctx.model.symbols);
327
- const importedRawReturns = buildImportedRawReturnTypes(filePath, ctx.namedImportMap, ctx.model.symbols);
328
- if (seeded.size === 0 && importedReturns.size === 0)
329
- continue;
330
- if (!allPathSet.has(filePath))
331
- continue;
332
- const lang = getLanguageFromFilename(filePath);
333
- if (!lang || !isLanguageAvailable(lang))
334
- continue;
335
- levelCandidates.push({ filePath, seeded, importedReturns, importedRawReturns });
336
- }
337
- if (levelCandidates.length === 0)
338
- continue;
339
- const levelPaths = levelCandidates.map((c) => c.filePath);
340
- const contentMap = await readFileContents(repoPath, levelPaths);
341
- for (const { filePath, seeded, importedReturns, importedRawReturns } of levelCandidates) {
342
- const content = contentMap.get(filePath);
343
- if (!content)
344
- continue;
345
- const reFile = [{ path: filePath, content }];
346
- const bindings = new Map();
347
- if (seeded.size > 0)
348
- bindings.set(filePath, seeded);
349
- const importedReturnTypesMap = new Map();
350
- if (importedReturns.size > 0) {
351
- importedReturnTypesMap.set(filePath, importedReturns);
352
- }
353
- const importedRawReturnTypesMap = new Map();
354
- if (importedRawReturns.size > 0) {
355
- importedRawReturnTypesMap.set(filePath, importedRawReturns);
356
- }
357
- await processCalls(graph, reFile, astCache, ctx, undefined, exportedTypeMap, bindings.size > 0 ? bindings : undefined, importedReturnTypesMap.size > 0 ? importedReturnTypesMap : undefined, importedRawReturnTypesMap.size > 0 ? importedRawReturnTypesMap : undefined);
358
- crossFileResolved++;
359
- }
360
- if (crossFileResolved >= MAX_CROSS_FILE_REPROCESS) {
361
- if (isDev)
362
- console.log(`⚠️ Cross-file re-resolution capped at ${MAX_CROSS_FILE_REPROCESS} files`);
363
- break;
364
- }
365
- }
366
- astCache.clear();
367
- if (isDev) {
368
- const elapsed = Date.now() - crossFileStart;
369
- const totalElapsed = Date.now() - pipelineStart;
370
- const reResolutionPct = totalElapsed > 0 ? ((elapsed / totalElapsed) * 100).toFixed(1) : '0';
371
- console.log(`🔗 Cross-file re-resolution: ${crossFileResolved} candidates re-processed` +
372
- ` in ${elapsed}ms (${reResolutionPct}% of total ingestion time so far)`);
373
- }
374
- }
375
- /**
376
- * Phase 1+2: Scan repository paths, build file/folder structure, process markdown.
377
- *
378
- * @reads repoPath (filesystem)
379
- * @writes graph (File, Folder nodes + CONTAINS edges; Markdown sections + cross-links)
380
- */
381
- async function runScanAndStructure(repoPath, graph, onProgress) {
382
- // ── Phase 1: Scan paths only (no content read) ─────────────────────
383
- onProgress({
384
- phase: 'extracting',
385
- percent: 0,
386
- message: 'Scanning repository...',
387
- });
388
- const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
389
- const scanProgress = Math.round((current / total) * 15);
390
- onProgress({
391
- phase: 'extracting',
392
- percent: scanProgress,
393
- message: 'Scanning repository...',
394
- detail: filePath,
395
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
396
- });
397
- });
398
- const totalFiles = scannedFiles.length;
399
- onProgress({
400
- phase: 'extracting',
401
- percent: 15,
402
- message: 'Repository scanned successfully',
403
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
404
- });
405
- // ── Phase 2: Structure (paths only — no content needed) ────────────
406
- onProgress({
407
- phase: 'structure',
408
- percent: 15,
409
- message: 'Analyzing project structure...',
410
- stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
411
- });
412
- const allPaths = scannedFiles.map((f) => f.path);
413
- processStructure(graph, allPaths);
414
- onProgress({
415
- phase: 'structure',
416
- percent: 20,
417
- message: 'Project structure analyzed',
418
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
419
- });
420
- // ── Custom (non-tree-sitter) processors ─────────────────────────────
421
- // Each custom processor follows the pattern in markdown-processor.ts:
422
- // 1. Export a process function: (graph, files, allPathSet) => result
423
- // 2. Export a file detection function: (path) => boolean
424
- // 3. Filter files by extension, write nodes/edges directly to graph
425
- // To add a new language: create a new processor file, import it here,
426
- // and add a filter-read-call-log block following the pattern below.
427
- // ── Phase 2.5: Markdown processing (headings + cross-links) ────────
428
- const mdScanned = scannedFiles.filter((f) => f.path.endsWith('.md') || f.path.endsWith('.mdx'));
429
- if (mdScanned.length > 0) {
430
- const mdContents = await readFileContents(repoPath, mdScanned.map((f) => f.path));
431
- const mdFiles = mdScanned
432
- .filter((f) => mdContents.has(f.path))
433
- .map((f) => ({ path: f.path, content: mdContents.get(f.path) }));
434
- const allPathSet = new Set(allPaths);
435
- const mdResult = processMarkdown(graph, mdFiles, allPathSet);
436
- if (isDev) {
437
- console.log(` Markdown: ${mdResult.sections} sections, ${mdResult.links} cross-links from ${mdFiles.length} files`);
438
- }
439
- }
440
- // ── Phase 2.6: COBOL processing (regex extraction, no tree-sitter) ──
441
- const cobolScanned = scannedFiles.filter((f) => isCobolFile(f.path) || isJclFile(f.path));
442
- if (cobolScanned.length > 0) {
443
- const cobolContents = await readFileContents(repoPath, cobolScanned.map((f) => f.path));
444
- const cobolFiles = cobolScanned
445
- .filter((f) => cobolContents.has(f.path))
446
- .map((f) => ({ path: f.path, content: cobolContents.get(f.path) }));
447
- const allPathSet = new Set(allPaths);
448
- const cobolResult = processCobol(graph, cobolFiles, allPathSet);
449
- if (isDev) {
450
- console.log(` COBOL: ${cobolResult.programs} programs, ${cobolResult.paragraphs} paragraphs, ${cobolResult.sections} sections from ${cobolFiles.length} files`);
451
- if (cobolResult.execSqlBlocks > 0 ||
452
- cobolResult.execCicsBlocks > 0 ||
453
- cobolResult.entryPoints > 0) {
454
- console.log(` COBOL enriched: ${cobolResult.execSqlBlocks} SQL blocks, ${cobolResult.execCicsBlocks} CICS blocks, ${cobolResult.entryPoints} entry points, ${cobolResult.moves} moves, ${cobolResult.fileDeclarations} file declarations`);
455
- }
456
- if (cobolResult.jclJobs > 0) {
457
- console.log(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`);
458
- }
459
- }
460
- }
461
- return { scannedFiles, allPaths, totalFiles };
462
- }
463
1
  /**
464
- * Phase 3+4: Chunked parse + resolve loop.
465
- *
466
- * Reads source in byte-budget chunks (~20MB each). For each chunk:
467
- * 1. Parse via worker pool (or sequential fallback)
468
- * 2. Resolve imports from extracted data
469
- * 3. Synthesize wildcard import bindings (Go/Ruby/C++/Swift/Python)
470
- * 4. Resolve heritage + routes per chunk; defer worker CALLS until all chunks
471
- * have contributed heritage so interface-dispatch implementor map is complete
472
- * 5. Collect TypeEnv bindings for cross-file propagation
2
+ * Pipeline orchestrator dependency-ordered ingestion pipeline.
473
3
  *
474
- * State accumulated across chunks: symbolTable, importMap, namedImportMap,
475
- * moduleAliasMap (all via ResolutionContext), exportedTypeMap, workerTypeEnvBindings.
4
+ * The pipeline is composed of named phases with explicit dependencies.
5
+ * Each phase is defined in its own file under `pipeline-phases/`.
6
+ * The runner in `pipeline-phases/runner.ts` executes phases in
7
+ * topological order, passing typed outputs from upstream phases as
8
+ * inputs to downstream phases.
476
9
  *
477
- * @reads graph (structure nodes from Phase 1+2)
478
- * @reads allPaths (from scan phase)
479
- * @writes graph (Symbol nodes, IMPORTS/CALLS/EXTENDS/IMPLEMENTS/ACCESSES edges)
480
- * @writes ctx.symbolTable, ctx.importMap, ctx.namedImportMap, ctx.moduleAliasMap
10
+ * To add a new phase:
11
+ * 1. Create a new file in `pipeline-phases/` following the pattern
12
+ * 2. Export it from `pipeline-phases/index.ts`
13
+ * 3. Add it to the `ALL_PHASES` array below
481
14
  *
482
- * Follow-up from PR review: MethodExtractor (FieldExtractor parity) and optional
483
- * METHOD_IMPLEMENTS graph edges to make dispatch queryable without an in-memory map.
15
+ * See ARCHITECTURE.md for the full phase dependency diagram.
484
16
  */
485
- async function runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress, options) {
486
- const symbolTable = ctx.model.symbols;
487
- const parseableScanned = scannedFiles.filter((f) => {
488
- const lang = getLanguageFromFilename(f.path);
489
- return lang && isLanguageAvailable(lang);
490
- });
491
- // Warn about files skipped due to unavailable parsers
492
- const skippedByLang = new Map();
493
- for (const f of scannedFiles) {
494
- const lang = getLanguageFromFilename(f.path);
495
- if (lang && !isLanguageAvailable(lang)) {
496
- skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
497
- }
498
- }
499
- for (const [lang, count] of skippedByLang) {
500
- console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
501
- }
502
- const totalParseable = parseableScanned.length;
503
- if (totalParseable === 0) {
504
- onProgress({
505
- phase: 'parsing',
506
- percent: 82,
507
- message: 'No parseable files found — skipping parsing phase',
508
- stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
509
- });
510
- }
511
- // Build byte-budget chunks
512
- const chunks = [];
513
- let currentChunk = [];
514
- let currentBytes = 0;
515
- for (const file of parseableScanned) {
516
- if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
517
- chunks.push(currentChunk);
518
- currentChunk = [];
519
- currentBytes = 0;
520
- }
521
- currentChunk.push(file.path);
522
- currentBytes += file.size;
523
- }
524
- if (currentChunk.length > 0)
525
- chunks.push(currentChunk);
526
- const numChunks = chunks.length;
527
- if (isDev) {
528
- const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
529
- console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
530
- }
531
- onProgress({
532
- phase: 'parsing',
533
- percent: 20,
534
- message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
535
- stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
536
- });
537
- // Don't spawn workers for tiny repos — overhead exceeds benefit
538
- const MIN_FILES_FOR_WORKERS = 15;
539
- const MIN_BYTES_FOR_WORKERS = 512 * 1024;
540
- const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
541
- // Create worker pool once, reuse across chunks
542
- let workerPool;
543
- if (!options?.skipWorkers &&
544
- (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS)) {
545
- try {
546
- let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
547
- // When running under vitest, import.meta.url points to src/ where no .js exists.
548
- // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
549
- const thisDir = fileURLToPath(new URL('.', import.meta.url));
550
- if (!fs.existsSync(fileURLToPath(workerUrl))) {
551
- const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
552
- if (fs.existsSync(distWorker)) {
553
- workerUrl = pathToFileURL(distWorker);
554
- }
555
- }
556
- workerPool = createWorkerPool(workerUrl);
557
- }
558
- catch (err) {
559
- if (isDev)
560
- console.warn('Worker pool creation failed, using sequential fallback:', err.message);
561
- }
562
- }
563
- let filesParsedSoFar = 0;
564
- // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
565
- const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
566
- let astCache = createASTCache(maxChunkFiles);
567
- // Build import resolution context once — suffix index, file lists, resolve cache.
568
- // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
569
- const importCtx = buildImportResolutionContext(allPaths);
570
- const allPathObjects = allPaths.map((p) => ({ path: p }));
571
- // Worker path: parse + imports + heritage per chunk; buffer extracted calls and
572
- // run processCallsFromExtracted once after all chunks so interface-dispatch uses a
573
- // complete implementor map (heritage from every chunk). Costs peak RAM for buffered
574
- // call rows vs streaming resolution per chunk.
575
- const sequentialChunkPaths = [];
576
- // Pre-compute which chunks need synthesis — O(1) lookup per chunk.
577
- const chunkNeedsSynthesis = chunks.map((paths) => paths.some((p) => {
578
- const lang = getLanguageFromFilename(p);
579
- return lang != null && needsSynthesis(lang);
580
- }));
581
- // Phase 14: Collect exported type bindings for cross-file propagation
582
- const exportedTypeMap = new Map();
583
- // Accumulate file-scope TypeEnv bindings from workers (closes worker/sequential quality gap)
584
- const bindingAccumulator = new BindingAccumulator();
585
- // Accumulate fetch() calls from workers for Next.js route matching
586
- const allFetchCalls = [];
587
- // Accumulate framework-extracted routes (Laravel, etc.) for Route node creation
588
- const allExtractedRoutes = [];
589
- // Accumulate decorator-based routes (@Get, @Post, @app.route, etc.)
590
- const allDecoratorRoutes = [];
591
- // Accumulate MCP/RPC tool definitions (@mcp.tool(), @app.tool(), etc.)
592
- const allToolDefs = [];
593
- const allORMQueries = [];
594
- const deferredWorkerCalls = [];
595
- const deferredWorkerHeritage = [];
596
- const deferredConstructorBindings = [];
597
- const deferredAssignments = [];
598
- try {
599
- for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
600
- const chunkPaths = chunks[chunkIdx];
601
- // Read content for this chunk only
602
- const chunkContents = await readFileContents(repoPath, chunkPaths);
603
- const chunkFiles = chunkPaths
604
- .filter((p) => chunkContents.has(p))
605
- .map((p) => ({ path: p, content: chunkContents.get(p) }));
606
- // Parse this chunk (workers or sequential fallback)
607
- const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
608
- const globalCurrent = filesParsedSoFar + current;
609
- const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
610
- onProgress({
611
- phase: 'parsing',
612
- percent: Math.round(parsingProgress),
613
- message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
614
- detail: filePath,
615
- stats: {
616
- filesProcessed: globalCurrent,
617
- totalFiles: totalParseable,
618
- nodesCreated: graph.nodeCount,
619
- },
620
- });
621
- }, workerPool);
622
- const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
623
- if (chunkWorkerData) {
624
- // Imports
625
- await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
626
- onProgress({
627
- phase: 'parsing',
628
- percent: Math.round(chunkBasePercent),
629
- message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
630
- detail: `${current}/${total} files`,
631
- stats: {
632
- filesProcessed: filesParsedSoFar,
633
- totalFiles: totalParseable,
634
- nodesCreated: graph.nodeCount,
635
- },
636
- });
637
- }, repoPath, importCtx);
638
- // ── Wildcard-import synthesis (Ruby / C/C++ / Swift / Go) + Python module aliases ─
639
- // Synthesize namedImportMap entries for wildcard-import languages and build
640
- // moduleAliasMap for Python namespace imports. Must run after imports are resolved
641
- // (importMap is populated) but BEFORE call resolution.
642
- if (chunkNeedsSynthesis[chunkIdx])
643
- synthesizeWildcardImportBindings(graph, ctx);
644
- // Phase 14 E1: Seed cross-file receiver types from ExportedTypeMap
645
- // before call resolution — eliminates re-parse for single-hop imported receivers.
646
- // NOTE: In the worker path, exportedTypeMap is empty during chunk processing
647
- // (populated later in runCrossFileBindingPropagation). This block is latent —
648
- // it activates only if incremental export collection is added per-chunk.
649
- if (exportedTypeMap.size > 0 && ctx.namedImportMap.size > 0) {
650
- const { enrichedCount } = seedCrossFileReceiverTypes(chunkWorkerData.calls, ctx.namedImportMap, exportedTypeMap);
651
- if (isDev && enrichedCount > 0) {
652
- console.log(`🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`);
653
- }
654
- }
655
- for (const _item of chunkWorkerData.calls)
656
- deferredWorkerCalls.push(_item);
657
- for (const _item of chunkWorkerData.heritage)
658
- deferredWorkerHeritage.push(_item);
659
- for (const _item of chunkWorkerData.constructorBindings)
660
- deferredConstructorBindings.push(_item);
661
- if (chunkWorkerData.assignments?.length) {
662
- for (const _item of chunkWorkerData.assignments)
663
- deferredAssignments.push(_item);
664
- }
665
- // Heritage + Routes — calls deferred until all chunks have contributed heritage
666
- // (complete implementor map for interface dispatch).
667
- await Promise.all([
668
- processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
669
- onProgress({
670
- phase: 'parsing',
671
- percent: Math.round(chunkBasePercent),
672
- message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
673
- detail: `${current}/${total} records`,
674
- stats: {
675
- filesProcessed: filesParsedSoFar,
676
- totalFiles: totalParseable,
677
- nodesCreated: graph.nodeCount,
678
- },
679
- });
680
- }),
681
- processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
682
- onProgress({
683
- phase: 'parsing',
684
- percent: Math.round(chunkBasePercent),
685
- message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
686
- detail: `${current}/${total} routes`,
687
- stats: {
688
- filesProcessed: filesParsedSoFar,
689
- totalFiles: totalParseable,
690
- nodesCreated: graph.nodeCount,
691
- },
692
- });
693
- }),
694
- ]);
695
- // Collect file-scope bindings into BindingAccumulator. The worker
696
- // IPC payload carries only file-scope entries (`scope = ''`
697
- // hardcoded here). See the FileScopeBindings JSDoc in
698
- // parse-worker.ts for the rationale and Phase 9 reversion path.
699
- //
700
- // Defensive validation at the IPC boundary: silently skip entries
701
- // with non-string varName/typeName. If a future worker regression
702
- // (or a Phase 9 reversion mistake that emits 3-tuples into the
703
- // 2-tuple consumer) produces malformed data, logging is better
704
- // than silently writing `undefined` into the enrichment map.
705
- if (chunkWorkerData.fileScopeBindings?.length) {
706
- for (const { filePath, bindings } of chunkWorkerData.fileScopeBindings) {
707
- if (typeof filePath !== 'string' || filePath.length === 0)
708
- continue;
709
- if (!Array.isArray(bindings))
710
- continue;
711
- const entries = [];
712
- for (const tuple of bindings) {
713
- if (!Array.isArray(tuple) || tuple.length !== 2)
714
- continue;
715
- const [varName, typeName] = tuple;
716
- if (typeof varName !== 'string' || typeof typeName !== 'string')
717
- continue;
718
- entries.push({ scope: '', varName, typeName });
719
- }
720
- if (entries.length > 0) {
721
- bindingAccumulator.appendFile(filePath, entries);
722
- }
723
- }
724
- }
725
- // Collect fetch() calls for Next.js route matching
726
- if (chunkWorkerData.fetchCalls?.length) {
727
- for (const _item of chunkWorkerData.fetchCalls)
728
- allFetchCalls.push(_item);
729
- }
730
- if (chunkWorkerData.routes?.length) {
731
- for (const _item of chunkWorkerData.routes)
732
- allExtractedRoutes.push(_item);
733
- }
734
- if (chunkWorkerData.decoratorRoutes?.length) {
735
- for (const _item of chunkWorkerData.decoratorRoutes)
736
- allDecoratorRoutes.push(_item);
737
- }
738
- if (chunkWorkerData.toolDefs?.length) {
739
- for (const _item of chunkWorkerData.toolDefs)
740
- allToolDefs.push(_item);
741
- }
742
- if (chunkWorkerData.ormQueries?.length) {
743
- for (const _item of chunkWorkerData.ormQueries)
744
- allORMQueries.push(_item);
745
- }
746
- }
747
- else {
748
- await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
749
- sequentialChunkPaths.push(chunkPaths);
750
- }
751
- filesParsedSoFar += chunkFiles.length;
752
- // Clear AST cache between chunks to free memory
753
- astCache.clear();
754
- // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
755
- }
756
- // Build unified HeritageMap (parent lookup + implementor index) after all chunks.
757
- const fullWorkerHeritageMap = deferredWorkerHeritage.length > 0
758
- ? buildHeritageMap(deferredWorkerHeritage, ctx, getHeritageStrategyForLanguage)
759
- : undefined;
760
- if (deferredWorkerCalls.length > 0) {
761
- await processCallsFromExtracted(graph, deferredWorkerCalls, ctx, (current, total) => {
762
- onProgress({
763
- phase: 'parsing',
764
- percent: 82,
765
- message: 'Resolving calls (all chunks)...',
766
- detail: `${current}/${total} files`,
767
- stats: {
768
- filesProcessed: filesParsedSoFar,
769
- totalFiles: totalParseable,
770
- nodesCreated: graph.nodeCount,
771
- },
772
- });
773
- }, deferredConstructorBindings.length > 0 ? deferredConstructorBindings : undefined, fullWorkerHeritageMap,
774
- // Phase 9: pass the accumulator so processCallsFromExtracted can fall back
775
- // to file-scope TypeEnv bindings when the SymbolTable lacks a return type
776
- // for a cross-file callee (e.g. var x = getUser() → x: User).
777
- //
778
- // Lifecycle ordering: the accumulator is populated but NOT yet finalized
779
- // at this seam. finalize() is called later (after the sequential-path
780
- // processCalls which also appends via typeEnv.flush()). Moving finalize()
781
- // before this call would break sequential-path repos. Pre-finalize reads
782
- // are safe because finalize() is a write-lock-only operation with no side
783
- // effects on stored data. All worker-path appendFile calls complete in the
784
- // chunk loop above, so every worker-contributed binding is available via
785
- // fileScopeGet().
786
- bindingAccumulator);
787
- }
788
- if (deferredAssignments.length > 0) {
789
- processAssignmentsFromExtracted(graph, deferredAssignments, ctx, deferredConstructorBindings.length > 0 ? deferredConstructorBindings : undefined, bindingAccumulator);
790
- }
791
- }
792
- finally {
793
- await workerPool?.terminate();
794
- }
795
- // Sequential fallback chunks: re-read source for call/heritage resolution
796
- // Synthesize wildcard import bindings once after ALL imports are processed,
797
- // before any call resolution — same rationale as the worker-path inline synthesis.
798
- if (sequentialChunkPaths.length > 0)
799
- synthesizeWildcardImportBindings(graph, ctx);
800
- // Pass 1: Extract heritage from all sequential chunks.
801
- // Heritage must be fully accumulated BEFORE call resolution so the HeritageMap
802
- // has the complete ancestor chain and implementor index (same constraint as
803
- // the worker path).
804
- //
805
- // File contents are read once here and cached for Pass 2 to avoid a 2× I/O
806
- // cost on the sequential path (ASTs are intentionally NOT cached — rebuilding
807
- // them in Pass 2 keeps peak memory bounded to one chunk at a time).
808
- const allSequentialHeritage = [];
809
- const cachedSequentialChunkFiles = [];
810
- for (const chunkPaths of sequentialChunkPaths) {
811
- const chunkContents = await readFileContents(repoPath, chunkPaths);
812
- const chunkFiles = chunkPaths
813
- .filter((p) => chunkContents.has(p))
814
- .map((p) => ({ path: p, content: chunkContents.get(p) }));
815
- cachedSequentialChunkFiles.push(chunkFiles);
816
- astCache = createASTCache(chunkFiles.length);
817
- const sequentialHeritage = await extractExtractedHeritageFromFiles(chunkFiles, astCache);
818
- // Manual loop (not spread) — `push(...arr)` blows the stack on very large
819
- // arrays, see #650. Pay the explicit iteration cost for safety.
820
- for (const h of sequentialHeritage)
821
- allSequentialHeritage.push(h);
822
- astCache.clear();
823
- }
824
- // Build unified HeritageMap from all sequential heritage (parent lookup + implementor index).
825
- const sequentialHeritageMap = allSequentialHeritage.length > 0
826
- ? buildHeritageMap(allSequentialHeritage, ctx, getHeritageStrategyForLanguage)
827
- : undefined;
828
- // Pass 2: Process calls, heritage edges, fetch calls, and ORM queries per chunk.
829
- // Reuse the file contents cached in Pass 1 instead of re-reading from disk.
830
- for (let chunkIdx = 0; chunkIdx < sequentialChunkPaths.length; chunkIdx++) {
831
- const chunkFiles = cachedSequentialChunkFiles[chunkIdx];
832
- astCache = createASTCache(chunkFiles.length);
833
- const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, exportedTypeMap, undefined, undefined, undefined, sequentialHeritageMap, bindingAccumulator);
834
- await processHeritage(graph, chunkFiles, astCache, ctx);
835
- if (rubyHeritage.length > 0) {
836
- await processHeritageFromExtracted(graph, rubyHeritage, ctx);
837
- }
838
- // Extract fetch() calls for Next.js route matching (sequential path)
839
- const chunkFetchCalls = await extractFetchCallsFromFiles(chunkFiles, astCache);
840
- if (chunkFetchCalls.length > 0) {
841
- for (const _item of chunkFetchCalls)
842
- allFetchCalls.push(_item);
843
- }
844
- // Extract ORM queries (sequential path)
845
- for (const f of chunkFiles) {
846
- extractORMQueriesInline(f.path, f.content, allORMQueries);
847
- }
848
- astCache.clear();
849
- // Release cached chunk content as soon as Pass 2 finishes with it so the
850
- // Pass-1 content map drains incrementally rather than being held for the
851
- // full duration of Pass 2.
852
- cachedSequentialChunkFiles[chunkIdx] = [];
853
- }
854
- // Log resolution cache stats
855
- if (isDev) {
856
- const rcStats = ctx.getStats();
857
- const total = rcStats.cacheHits + rcStats.cacheMisses;
858
- const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
859
- console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
860
- }
861
- // ── Finalize the accumulator before the read phase begins. All worker-path
862
- // appends (line ~934) and sequential-path flushes (via `processCalls` →
863
- // `typeEnv.flush()` earlier in this function) have completed by here,
864
- // so the finalize-write-lock is correct at this seam. Making the
865
- // lifecycle contract explicit — `append → finalize → consume → dispose`.
866
- // Previously `finalize()` was called much later in `runPipelineFromRepo`
867
- // after the enrichment loop had already read the mutable accumulator.
868
- bindingAccumulator.finalize();
869
- // ── Worker path quality enrichment: merge file-scope bindings into ExportedTypeMap ──
870
- // Counterpart to `collectExportedBindings()` in call-processor.ts which
871
- // handles the sequential path (main thread, full SymbolTable access).
872
- // This call handles the worker path via the accumulator. Both sites
873
- // populate the same `exportedTypeMap` with subtly different export-check
874
- // semantics — sequential uses SymbolTable + graph lookup, `enrichExportedTypeMap`
875
- // uses a three-candidate-ID graph lookup. They must stay in sync until
876
- // Phase 9 unifies them. If you edit one, check the other.
877
- //
878
- // The enrichment loop itself lives in `binding-accumulator.ts` so tests
879
- // can exercise the real production code instead of reimplementing it.
880
- const enriched = enrichExportedTypeMap(bindingAccumulator, graph, exportedTypeMap);
881
- if (isDev && enriched > 0) {
882
- console.log(`🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`);
883
- }
884
- // ── Final synthesis pass for whole-module-import languages ──
885
- // Per-chunk synthesis (above) already ran incrementally. This final pass ensures
886
- // any remaining files whose imports were not covered inline are also synthesized,
887
- // and that Phase 14 type propagation has complete namedImportMap data.
888
- const synthesized = synthesizeWildcardImportBindings(graph, ctx);
889
- if (isDev && synthesized > 0) {
890
- console.log(`🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`);
891
- }
892
- // Free import resolution context — suffix index + resolve cache no longer needed
893
- // (allPathObjects and importCtx hold ~94MB+ for large repos)
894
- allPathObjects.length = 0;
895
- importCtx.resolveCache.clear();
896
- importCtx.index = EMPTY_INDEX; // Release suffix index memory (~30MB for large repos)
897
- importCtx.normalizedFileList = [];
898
- return {
899
- exportedTypeMap,
900
- allFetchCalls,
901
- allExtractedRoutes,
902
- allDecoratorRoutes,
903
- allToolDefs,
904
- allORMQueries,
905
- bindingAccumulator,
906
- };
907
- }
17
+ import { createKnowledgeGraph } from '../graph/graph.js';
18
+ import { runPipeline, getPhaseOutput, scanPhase, structurePhase, markdownPhase, cobolPhase, parsePhase, routesPhase, toolsPhase, ormPhase, crossFilePhase, mroPhase, communitiesPhase, processesPhase, } from './pipeline-phases/index.js';
19
+ // ── Phase registry ─────────────────────────────────────────────────────────
908
20
  /**
909
- * Post-parse graph analysis: MRO, community detection, process extraction.
21
+ * All pipeline phases with their dependency relationships.
22
+ *
23
+ * Phase dependency graph:
910
24
  *
911
- * @reads graph (all nodes and relationships from parse + resolve phases)
912
- * @writes graph (Community nodes, Process nodes, MEMBER_OF edges, STEP_IN_PROCESS edges, METHOD_OVERRIDES edges)
25
+ * scan structure [markdown, cobol] parse [routes, tools, orm]
26
+ * crossFile mro communities processes
27
+ *
28
+ * To add a new phase: create a file in pipeline-phases/, export the phase
29
+ * object, and add it to the appropriate position in this array.
913
30
  */
914
- async function runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs) {
915
- // ── Phase 4.5: Method Resolution Order ──────────────────────────────
916
- onProgress({
917
- phase: 'parsing',
918
- percent: 81,
919
- message: 'Computing method resolution order...',
920
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
921
- });
922
- const mroResult = computeMRO(graph);
923
- if (isDev && mroResult.entries.length > 0) {
924
- console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities, ${mroResult.overrideEdges} METHOD_OVERRIDES, ${mroResult.methodImplementsEdges} METHOD_IMPLEMENTS`);
925
- }
926
- // ── Phase 5: Communities ───────────────────────────────────────────
927
- onProgress({
928
- phase: 'communities',
929
- percent: 82,
930
- message: 'Detecting code communities...',
931
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
932
- });
933
- const communityResult = await processCommunities(graph, (message, progress) => {
934
- const communityProgress = 82 + progress * 0.1;
935
- onProgress({
936
- phase: 'communities',
937
- percent: Math.round(communityProgress),
938
- message,
939
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
940
- });
941
- });
942
- if (isDev) {
943
- console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
944
- }
945
- communityResult.communities.forEach((comm) => {
946
- graph.addNode({
947
- id: comm.id,
948
- label: 'Community',
949
- properties: {
950
- name: comm.label,
951
- filePath: '',
952
- heuristicLabel: comm.heuristicLabel,
953
- cohesion: comm.cohesion,
954
- symbolCount: comm.symbolCount,
955
- },
956
- });
957
- });
958
- communityResult.memberships.forEach((membership) => {
959
- graph.addRelationship({
960
- id: `${membership.nodeId}_member_of_${membership.communityId}`,
961
- type: 'MEMBER_OF',
962
- sourceId: membership.nodeId,
963
- targetId: membership.communityId,
964
- confidence: 1.0,
965
- reason: 'leiden-algorithm',
966
- });
967
- });
968
- // ── Phase 6: Processes ─────────────────────────────────────────────
969
- onProgress({
970
- phase: 'processes',
971
- percent: 94,
972
- message: 'Detecting execution flows...',
973
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
974
- });
975
- let symbolCount = 0;
976
- graph.forEachNode((n) => {
977
- if (n.label !== 'File')
978
- symbolCount++;
979
- });
980
- const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
981
- const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
982
- const processProgress = 94 + progress * 0.05;
983
- onProgress({
984
- phase: 'processes',
985
- percent: Math.round(processProgress),
986
- message,
987
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
988
- });
989
- }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
990
- if (isDev) {
991
- console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
992
- }
993
- processResult.processes.forEach((proc) => {
994
- graph.addNode({
995
- id: proc.id,
996
- label: 'Process',
997
- properties: {
998
- name: proc.label,
999
- filePath: '',
1000
- heuristicLabel: proc.heuristicLabel,
1001
- processType: proc.processType,
1002
- stepCount: proc.stepCount,
1003
- communities: proc.communities,
1004
- entryPointId: proc.entryPointId,
1005
- terminalId: proc.terminalId,
1006
- },
1007
- });
1008
- });
1009
- processResult.steps.forEach((step) => {
1010
- graph.addRelationship({
1011
- id: `${step.nodeId}_step_${step.step}_${step.processId}`,
1012
- type: 'STEP_IN_PROCESS',
1013
- sourceId: step.nodeId,
1014
- targetId: step.processId,
1015
- confidence: 1.0,
1016
- reason: 'trace-detection',
1017
- step: step.step,
1018
- });
1019
- });
1020
- // Link Route and Tool nodes to Processes via reverse index (file → node id)
1021
- if ((routeRegistry?.size ?? 0) > 0 || (toolDefs?.length ?? 0) > 0) {
1022
- // Reverse indexes: file → all route URLs / tool names (handles multi-route files)
1023
- const routesByFile = new Map();
1024
- if (routeRegistry) {
1025
- for (const [url, entry] of routeRegistry) {
1026
- let list = routesByFile.get(entry.filePath);
1027
- if (!list) {
1028
- list = [];
1029
- routesByFile.set(entry.filePath, list);
1030
- }
1031
- list.push(url);
1032
- }
1033
- }
1034
- const toolsByFile = new Map();
1035
- if (toolDefs) {
1036
- for (const td of toolDefs) {
1037
- let list = toolsByFile.get(td.filePath);
1038
- if (!list) {
1039
- list = [];
1040
- toolsByFile.set(td.filePath, list);
1041
- }
1042
- list.push(td.name);
1043
- }
1044
- }
1045
- let linked = 0;
1046
- for (const proc of processResult.processes) {
1047
- if (!proc.entryPointId)
1048
- continue;
1049
- const entryNode = graph.getNode(proc.entryPointId);
1050
- if (!entryNode)
1051
- continue;
1052
- const entryFile = entryNode.properties.filePath;
1053
- if (!entryFile)
1054
- continue;
1055
- const routeURLs = routesByFile.get(entryFile);
1056
- if (routeURLs) {
1057
- for (const routeURL of routeURLs) {
1058
- const routeNodeId = generateId('Route', routeURL);
1059
- graph.addRelationship({
1060
- id: generateId('ENTRY_POINT_OF', `${routeNodeId}->${proc.id}`),
1061
- sourceId: routeNodeId,
1062
- targetId: proc.id,
1063
- type: 'ENTRY_POINT_OF',
1064
- confidence: 0.85,
1065
- reason: 'route-handler-entry-point',
1066
- });
1067
- linked++;
1068
- }
1069
- }
1070
- const toolNames = toolsByFile.get(entryFile);
1071
- if (toolNames) {
1072
- for (const toolName of toolNames) {
1073
- const toolNodeId = generateId('Tool', toolName);
1074
- graph.addRelationship({
1075
- id: generateId('ENTRY_POINT_OF', `${toolNodeId}->${proc.id}`),
1076
- sourceId: toolNodeId,
1077
- targetId: proc.id,
1078
- type: 'ENTRY_POINT_OF',
1079
- confidence: 0.85,
1080
- reason: 'tool-handler-entry-point',
1081
- });
1082
- linked++;
1083
- }
1084
- }
1085
- }
1086
- if (isDev && linked > 0) {
1087
- console.log(`🔗 Linked ${linked} Route/Tool nodes to execution flows`);
1088
- }
1089
- }
1090
- return { communityResult, processResult };
31
+ function buildPhaseList(options) {
32
+ const phases = [
33
+ scanPhase,
34
+ structurePhase,
35
+ markdownPhase,
36
+ cobolPhase,
37
+ parsePhase,
38
+ routesPhase,
39
+ toolsPhase,
40
+ ormPhase,
41
+ crossFilePhase,
42
+ ];
43
+ if (!options?.skipGraphPhases) {
44
+ phases.push(mroPhase, communitiesPhase, processesPhase);
45
+ }
46
+ return phases;
1091
47
  }
1092
48
  // ── Pipeline orchestrator ─────────────────────────────────────────────────
1093
49
  export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
1094
50
  const graph = createKnowledgeGraph();
1095
- const ctx = createResolutionContext();
1096
51
  const pipelineStart = Date.now();
1097
- // Hoisted reference for error-path cleanup. The accumulator is normally
1098
- // disposed at the happy-path seam after the dev telemetry log, but if any
1099
- // step between the runChunkedParseAndResolve return and that seam throws
1100
- // (ORM processing, tool node creation, Phase 14, graph analysis), the
1101
- // catch handler disposes it here so the heap footprint does not leak
1102
- // through the rethrow. See binding-accumulator.ts dispose() JSDoc for the
1103
- // lifecycle contract.
1104
- let bindingAccumulatorForCleanup;
1105
- try {
1106
- // Phase 1+2: Scan paths, build structure, process markdown
1107
- const { scannedFiles, allPaths, totalFiles } = await runScanAndStructure(repoPath, graph, onProgress);
1108
- // Phase 3+4: Chunked parse + resolve (imports, calls, heritage, routes)
1109
- const { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries, bindingAccumulator, } = await runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress, options);
1110
- // Track the accumulator for error-path cleanup — the happy-path dispose
1111
- // is still at the post-telemetry seam below, this reference is only
1112
- // consulted by the catch handler if any step between here and there
1113
- // throws.
1114
- bindingAccumulatorForCleanup = bindingAccumulator;
1115
- const routeRegistry = new Map();
1116
- // Detect Expo Router app/ roots vs Next.js app/ roots (monorepo-safe).
1117
- const expoAppRoots = new Set();
1118
- const nextjsAppRoots = new Set();
1119
- const expoAppPaths = new Set();
1120
- for (const p of allPaths) {
1121
- const norm = p.replace(/\\/g, '/');
1122
- const appIdx = norm.lastIndexOf('app/');
1123
- if (appIdx < 0)
1124
- continue;
1125
- const root = norm.slice(0, appIdx + 4);
1126
- if (/\/_layout\.(tsx?|jsx?)$/.test(norm))
1127
- expoAppRoots.add(root);
1128
- if (/\/page\.(tsx?|jsx?)$/.test(norm))
1129
- nextjsAppRoots.add(root);
1130
- }
1131
- for (const root of nextjsAppRoots)
1132
- expoAppRoots.delete(root);
1133
- if (expoAppRoots.size > 0) {
1134
- for (const p of allPaths) {
1135
- const norm = p.replace(/\\/g, '/');
1136
- const appIdx = norm.lastIndexOf('app/');
1137
- if (appIdx >= 0 && expoAppRoots.has(norm.slice(0, appIdx + 4)))
1138
- expoAppPaths.add(p);
1139
- }
1140
- }
1141
- for (const p of allPaths) {
1142
- if (expoAppPaths.has(p)) {
1143
- const expoURL = expoFileToRouteURL(p);
1144
- if (expoURL && !routeRegistry.has(expoURL)) {
1145
- routeRegistry.set(expoURL, { filePath: p, source: 'expo-filesystem-route' });
1146
- continue;
1147
- }
1148
- }
1149
- const nextjsURL = nextjsFileToRouteURL(p);
1150
- if (nextjsURL && !routeRegistry.has(nextjsURL)) {
1151
- routeRegistry.set(nextjsURL, { filePath: p, source: 'nextjs-filesystem-route' });
1152
- continue;
1153
- }
1154
- if (p.endsWith('.php')) {
1155
- const phpURL = phpFileToRouteURL(p);
1156
- if (phpURL && !routeRegistry.has(phpURL)) {
1157
- routeRegistry.set(phpURL, { filePath: p, source: 'php-file-route' });
1158
- }
1159
- }
1160
- }
1161
- const ensureSlash = (path) => (path.startsWith('/') ? path : '/' + path);
1162
- let duplicateRoutes = 0;
1163
- const addRoute = (url, entry) => {
1164
- if (routeRegistry.has(url)) {
1165
- duplicateRoutes++;
1166
- return;
1167
- }
1168
- routeRegistry.set(url, entry);
1169
- };
1170
- for (const route of allExtractedRoutes) {
1171
- if (!route.routePath)
1172
- continue;
1173
- addRoute(ensureSlash(route.routePath), {
1174
- filePath: route.filePath,
1175
- source: 'framework-route',
1176
- });
1177
- }
1178
- for (const dr of allDecoratorRoutes) {
1179
- addRoute(ensureSlash(dr.routePath), {
1180
- filePath: dr.filePath,
1181
- source: `decorator-${dr.decoratorName}`,
1182
- });
1183
- }
1184
- let handlerContents;
1185
- if (routeRegistry.size > 0) {
1186
- const handlerPaths = [...routeRegistry.values()].map((e) => e.filePath);
1187
- handlerContents = await readFileContents(repoPath, handlerPaths);
1188
- for (const [routeURL, entry] of routeRegistry) {
1189
- const { filePath: handlerPath, source: routeSource } = entry;
1190
- const content = handlerContents.get(handlerPath);
1191
- const { responseKeys, errorKeys } = content
1192
- ? handlerPath.endsWith('.php')
1193
- ? extractPHPResponseShapes(content)
1194
- : extractResponseShapes(content)
1195
- : { responseKeys: undefined, errorKeys: undefined };
1196
- const mwResult = content ? extractMiddlewareChain(content) : undefined;
1197
- const middleware = mwResult?.chain;
1198
- const routeNodeId = generateId('Route', routeURL);
1199
- graph.addNode({
1200
- id: routeNodeId,
1201
- label: 'Route',
1202
- properties: {
1203
- name: routeURL,
1204
- filePath: handlerPath,
1205
- ...(responseKeys ? { responseKeys } : {}),
1206
- ...(errorKeys ? { errorKeys } : {}),
1207
- ...(middleware && middleware.length > 0 ? { middleware } : {}),
1208
- },
1209
- });
1210
- const handlerFileId = generateId('File', handlerPath);
1211
- graph.addRelationship({
1212
- id: generateId('HANDLES_ROUTE', `${handlerFileId}->${routeNodeId}`),
1213
- sourceId: handlerFileId,
1214
- targetId: routeNodeId,
1215
- type: 'HANDLES_ROUTE',
1216
- confidence: 1.0,
1217
- reason: routeSource,
1218
- });
1219
- }
1220
- if (isDev) {
1221
- console.log(`🗺️ Route registry: ${routeRegistry.size} routes${duplicateRoutes > 0 ? ` (${duplicateRoutes} duplicate URLs skipped)` : ''}`);
1222
- }
1223
- }
1224
- // ── Phase 3.5b: Link Next.js project-level middleware.ts to routes ──
1225
- if (routeRegistry.size > 0) {
1226
- const middlewareCandidates = allPaths.filter((p) => p === 'middleware.ts' ||
1227
- p === 'middleware.js' ||
1228
- p === 'middleware.tsx' ||
1229
- p === 'middleware.jsx' ||
1230
- p === 'src/middleware.ts' ||
1231
- p === 'src/middleware.js' ||
1232
- p === 'src/middleware.tsx' ||
1233
- p === 'src/middleware.jsx');
1234
- if (middlewareCandidates.length > 0) {
1235
- const mwContents = await readFileContents(repoPath, middlewareCandidates);
1236
- for (const [mwPath, mwContent] of mwContents) {
1237
- const config = extractNextjsMiddlewareConfig(mwContent);
1238
- if (!config)
1239
- continue;
1240
- const mwLabel = config.wrappedFunctions.length > 0 ? config.wrappedFunctions : [config.exportedName];
1241
- // Pre-compile matchers once per middleware file
1242
- const compiled = config.matchers
1243
- .map(compileMatcher)
1244
- .filter((m) => m !== null);
1245
- let linkedCount = 0;
1246
- for (const [routeURL] of routeRegistry) {
1247
- const matches = compiled.length === 0 ||
1248
- compiled.some((cm) => compiledMatcherMatchesRoute(cm, routeURL));
1249
- if (!matches)
1250
- continue;
1251
- const routeNodeId = generateId('Route', routeURL);
1252
- const existing = graph.getNode(routeNodeId);
1253
- if (!existing)
1254
- continue;
1255
- const currentMw = existing.properties.middleware ?? [];
1256
- // Prepend project-level middleware (runs before handler-level wrappers)
1257
- existing.properties.middleware = [
1258
- ...mwLabel,
1259
- ...currentMw.filter((m) => !mwLabel.includes(m)),
1260
- ];
1261
- linkedCount++;
1262
- }
1263
- if (isDev && linkedCount > 0) {
1264
- console.log(`🛡️ Linked ${mwPath} middleware [${mwLabel.join(', ')}] to ${linkedCount} routes`);
1265
- }
1266
- }
1267
- }
1268
- }
1269
- // Scan HTML/PHP/template files for <form action="/path"> and AJAX url patterns
1270
- // Scan HTML/template files for <form action="/path"> and AJAX url patterns
1271
- // Skip .php — already parsed by tree-sitter with http_client/fetch queries
1272
- const htmlCandidates = allPaths.filter((p) => p.endsWith('.html') ||
1273
- p.endsWith('.htm') ||
1274
- p.endsWith('.ejs') ||
1275
- p.endsWith('.hbs') ||
1276
- p.endsWith('.blade.php'));
1277
- if (htmlCandidates.length > 0 && routeRegistry.size > 0) {
1278
- const htmlContents = await readFileContents(repoPath, htmlCandidates);
1279
- const htmlPatterns = [/action=["']([^"']+)["']/g, /url:\s*["']([^"']+)["']/g];
1280
- for (const [filePath, content] of htmlContents) {
1281
- for (const pattern of htmlPatterns) {
1282
- pattern.lastIndex = 0;
1283
- let match;
1284
- while ((match = pattern.exec(content)) !== null) {
1285
- const normalized = normalizeFetchURL(match[1]);
1286
- if (normalized) {
1287
- allFetchCalls.push({ filePath, fetchURL: normalized, lineNumber: 0 });
1288
- }
1289
- }
1290
- }
1291
- }
1292
- }
1293
- // ── Phase 3.5c: Extract Expo Router navigation patterns ──
1294
- if (expoAppPaths.size > 0 && routeRegistry.size > 0) {
1295
- const unreadExpoPaths = [...expoAppPaths].filter((p) => !handlerContents?.has(p));
1296
- const extraContents = unreadExpoPaths.length > 0
1297
- ? await readFileContents(repoPath, unreadExpoPaths)
1298
- : new Map();
1299
- const allExpoContents = new Map([...(handlerContents ?? new Map()), ...extraContents]);
1300
- for (const [filePath, content] of allExpoContents) {
1301
- if (!expoAppPaths.has(filePath))
1302
- continue;
1303
- for (const pattern of EXPO_NAV_PATTERNS) {
1304
- pattern.lastIndex = 0;
1305
- let match;
1306
- while ((match = pattern.exec(content)) !== null) {
1307
- const url = match[2] ?? match[1];
1308
- if (url && url.startsWith('/')) {
1309
- allFetchCalls.push({ filePath, fetchURL: url, lineNumber: 0 });
1310
- }
1311
- }
1312
- }
1313
- }
1314
- }
1315
- if (routeRegistry.size > 0 && allFetchCalls.length > 0) {
1316
- const routeURLToFile = new Map();
1317
- for (const [url, entry] of routeRegistry)
1318
- routeURLToFile.set(url, entry.filePath);
1319
- // Read consumer file contents so we can extract property access patterns
1320
- const consumerPaths = [...new Set(allFetchCalls.map((c) => c.filePath))];
1321
- const consumerContents = await readFileContents(repoPath, consumerPaths);
1322
- processNextjsFetchRoutes(graph, allFetchCalls, routeURLToFile, consumerContents);
1323
- if (isDev) {
1324
- console.log(`🔗 Processed ${allFetchCalls.length} fetch() calls against ${routeRegistry.size} routes`);
1325
- }
1326
- }
1327
- // ── Phase 3.6: Tool Detection (MCP/RPC) ──────────────────────────
1328
- const toolDefs = [];
1329
- const seenToolNames = new Set();
1330
- for (const td of allToolDefs) {
1331
- if (seenToolNames.has(td.toolName))
1332
- continue;
1333
- seenToolNames.add(td.toolName);
1334
- toolDefs.push({ name: td.toolName, filePath: td.filePath, description: td.description });
1335
- }
1336
- // TS tool definition arrays — require inputSchema nearby to distinguish from config objects
1337
- const toolCandidatePaths = allPaths.filter((p) => (p.endsWith('.ts') || p.endsWith('.js')) &&
1338
- p.toLowerCase().includes('tool') &&
1339
- !p.includes('node_modules') &&
1340
- !p.includes('test') &&
1341
- !p.includes('__'));
1342
- if (toolCandidatePaths.length > 0) {
1343
- const toolContents = await readFileContents(repoPath, toolCandidatePaths);
1344
- for (const [filePath, content] of toolContents) {
1345
- // Only scan files that contain 'inputSchema' — this is the MCP tool signature
1346
- if (!content.includes('inputSchema'))
1347
- continue;
1348
- const toolPattern = /name:\s*['"](\w+)['"]\s*,\s*\n?\s*description:\s*[`'"]([\s\S]*?)[`'"]/g;
1349
- let match;
1350
- while ((match = toolPattern.exec(content)) !== null) {
1351
- const name = match[1];
1352
- if (seenToolNames.has(name))
1353
- continue;
1354
- seenToolNames.add(name);
1355
- toolDefs.push({
1356
- name,
1357
- filePath,
1358
- description: match[2].slice(0, 200).replace(/\n/g, ' ').trim(),
1359
- });
1360
- }
1361
- }
1362
- }
1363
- // Create Tool nodes and HANDLES_TOOL edges
1364
- if (toolDefs.length > 0) {
1365
- for (const td of toolDefs) {
1366
- const toolNodeId = generateId('Tool', td.name);
1367
- graph.addNode({
1368
- id: toolNodeId,
1369
- label: 'Tool',
1370
- properties: { name: td.name, filePath: td.filePath, description: td.description },
1371
- });
1372
- const handlerFileId = generateId('File', td.filePath);
1373
- graph.addRelationship({
1374
- id: generateId('HANDLES_TOOL', `${handlerFileId}->${toolNodeId}`),
1375
- sourceId: handlerFileId,
1376
- targetId: toolNodeId,
1377
- type: 'HANDLES_TOOL',
1378
- confidence: 1.0,
1379
- reason: 'tool-definition',
1380
- });
1381
- }
1382
- if (isDev) {
1383
- console.log(`🔧 Tool registry: ${toolDefs.length} tools detected`);
1384
- }
1385
- }
1386
- // ── Phase 3.7: ORM Dataflow Detection (Prisma + Supabase) ──────────
1387
- if (allORMQueries.length > 0) {
1388
- processORMQueries(graph, allORMQueries, isDev);
1389
- }
1390
- // `bindingAccumulator.finalize()` was moved inside `runChunkedParseAndResolve`
1391
- // to immediately precede the enrichment loop — see the comment there for
1392
- // the ordering rationale. By the time execution
1393
- // reaches this point, the accumulator has already been finalized, consumed
1394
- // by the enrichment loop, and is ready for dispose() below after the dev
1395
- // telemetry log captures peak state.
1396
- if (isDev) {
1397
- if (bindingAccumulator.totalBindings > 0) {
1398
- const memKB = Math.round(bindingAccumulator.estimateMemoryBytes() / 1024);
1399
- console.log(`📦 BindingAccumulator: ${bindingAccumulator.totalBindings} bindings across ${bindingAccumulator.fileCount} files (~${memKB} KB)`);
1400
- }
1401
- else if (totalFiles > 0) {
1402
- // Zero-binding signal: if the pipeline parsed files but the
1403
- // accumulator is empty, something upstream dropped all bindings.
1404
- // Flag it so operators can spot a regression (e.g. a worker path
1405
- // that accidentally emits empty fileScopeBindings arrays for every
1406
- // file, or a TypeEnv build failure). Dev-mode only.
1407
- console.log(`📦 BindingAccumulator: EMPTY — 0 bindings across 0 files despite ${totalFiles} parsed files. If the codebase has typed bindings, this indicates an upstream regression.`);
1408
- }
1409
- }
1410
- // Release the accumulator's heap footprint now. Both consumers of the
1411
- // accumulator have completed:
1412
- // 1. ExportedTypeMap enrichment loop (enrichExportedTypeMap, above).
1413
- // 2. Phase 9: processCallsFromExtracted in runChunkedParseAndResolve,
1414
- // which uses the accumulator as a BindingAccumulator fallback for
1415
- // cross-file return types when the SymbolTable has no returnType.
1416
- // Phase 14 (runCrossFileBindingPropagation) and runGraphAnalysisPhases
1417
- // do not read the accumulator — keeping it alive through those long-
1418
- // running phases pins heap for no reason.
1419
- bindingAccumulator.dispose();
1420
- // Happy-path dispose completed — clear the cleanup ref so the catch
1421
- // handler doesn't attempt a second (harmless but noisy) dispose if a
1422
- // later phase throws.
1423
- bindingAccumulatorForCleanup = undefined;
1424
- // ── Phase 14: Cross-file binding propagation (topological level sort) ──
1425
- await runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
1426
- // Post-parse graph analysis (MRO, communities, processes)
1427
- let communityResult;
1428
- let processResult;
1429
- if (!options?.skipGraphPhases) {
1430
- const graphResults = await runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs);
1431
- communityResult = graphResults.communityResult;
1432
- processResult = graphResults.processResult;
1433
- }
1434
- onProgress({
1435
- phase: 'complete',
1436
- percent: 100,
1437
- message: communityResult && processResult
1438
- ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`
1439
- : 'Graph complete! (graph phases skipped)',
1440
- stats: {
1441
- filesProcessed: totalFiles,
1442
- totalFiles,
1443
- nodesCreated: graph.nodeCount,
1444
- },
1445
- });
1446
- return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
1447
- }
1448
- catch (error) {
1449
- // Error-path cleanup: dispose the accumulator if a step after the
1450
- // destructure from runChunkedParseAndResolve but before the happy-path
1451
- // dispose threw. The reference is cleared on the happy path, so this
1452
- // is a no-op when the pipeline completed successfully and then threw
1453
- // from an unrelated post-dispose step (e.g., future cleanup code).
1454
- bindingAccumulatorForCleanup?.dispose();
1455
- ctx.clear();
1456
- throw error;
52
+ const phases = buildPhaseList(options);
53
+ const results = await runPipeline(phases, {
54
+ repoPath,
55
+ graph,
56
+ onProgress,
57
+ options,
58
+ pipelineStart,
59
+ });
60
+ // Extract final results for the PipelineResult contract
61
+ const { totalFiles } = getPhaseOutput(results, 'parse');
62
+ let communityResult;
63
+ let processResult;
64
+ if (!options?.skipGraphPhases) {
65
+ communityResult = getPhaseOutput(results, 'communities').communityResult;
66
+ processResult = getPhaseOutput(results, 'processes').processResult;
1457
67
  }
68
+ onProgress({
69
+ phase: 'complete',
70
+ percent: 100,
71
+ message: communityResult && processResult
72
+ ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`
73
+ : 'Graph complete! (graph phases skipped)',
74
+ stats: {
75
+ filesProcessed: totalFiles,
76
+ totalFiles,
77
+ nodesCreated: graph.nodeCount,
78
+ },
79
+ });
80
+ return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
1458
81
  };
1459
- // Inline ORM regex extraction (avoids importing parse-worker which has worker-only code)
1460
- const PRISMA_QUERY_RE = /\bprisma\.(\w+)\.(findMany|findFirst|findUnique|findUniqueOrThrow|findFirstOrThrow|create|createMany|update|updateMany|delete|deleteMany|upsert|count|aggregate|groupBy)\s*\(/g;
1461
- const SUPABASE_QUERY_RE = /\bsupabase\.from\s*\(\s*['"](\w+)['"]\s*\)\s*\.(select|insert|update|delete|upsert)\s*\(/g;
1462
- function extractORMQueriesInline(filePath, content, out) {
1463
- const hasPrisma = content.includes('prisma.');
1464
- const hasSupabase = content.includes('supabase.from');
1465
- if (!hasPrisma && !hasSupabase)
1466
- return;
1467
- if (hasPrisma) {
1468
- PRISMA_QUERY_RE.lastIndex = 0;
1469
- let m;
1470
- while ((m = PRISMA_QUERY_RE.exec(content)) !== null) {
1471
- const model = m[1];
1472
- if (model.startsWith('$'))
1473
- continue;
1474
- out.push({
1475
- filePath,
1476
- orm: 'prisma',
1477
- model,
1478
- method: m[2],
1479
- lineNumber: content.substring(0, m.index).split('\n').length - 1,
1480
- });
1481
- }
1482
- }
1483
- if (hasSupabase) {
1484
- SUPABASE_QUERY_RE.lastIndex = 0;
1485
- let m;
1486
- while ((m = SUPABASE_QUERY_RE.exec(content)) !== null) {
1487
- out.push({
1488
- filePath,
1489
- orm: 'supabase',
1490
- model: m[1],
1491
- method: m[2],
1492
- lineNumber: content.substring(0, m.index).split('\n').length - 1,
1493
- });
1494
- }
1495
- }
1496
- }
1497
- // ============================================================================
1498
- // ORM Query Processing — creates QUERIES edges from callers to model nodes
1499
- // ============================================================================
1500
- function processORMQueries(graph, queries, isDev) {
1501
- const modelNodes = new Map();
1502
- const seenEdges = new Set();
1503
- let edgesCreated = 0;
1504
- for (const q of queries) {
1505
- const modelKey = `${q.orm}:${q.model}`;
1506
- let modelNodeId = modelNodes.get(modelKey);
1507
- if (!modelNodeId) {
1508
- const candidateIds = [
1509
- generateId('Class', `${q.model}`),
1510
- generateId('Interface', `${q.model}`),
1511
- generateId('CodeElement', `${q.model}`),
1512
- ];
1513
- const existing = candidateIds.find((id) => graph.getNode(id));
1514
- if (existing) {
1515
- modelNodeId = existing;
1516
- }
1517
- else {
1518
- modelNodeId = generateId('CodeElement', `${q.orm}:${q.model}`);
1519
- graph.addNode({
1520
- id: modelNodeId,
1521
- label: 'CodeElement',
1522
- properties: {
1523
- name: q.model,
1524
- filePath: '',
1525
- description: `${q.orm} model/table: ${q.model}`,
1526
- },
1527
- });
1528
- }
1529
- modelNodes.set(modelKey, modelNodeId);
1530
- }
1531
- const fileId = generateId('File', q.filePath);
1532
- const edgeKey = `${fileId}->${modelNodeId}:${q.method}`;
1533
- if (seenEdges.has(edgeKey))
1534
- continue;
1535
- seenEdges.add(edgeKey);
1536
- graph.addRelationship({
1537
- id: generateId('QUERIES', edgeKey),
1538
- sourceId: fileId,
1539
- targetId: modelNodeId,
1540
- type: 'QUERIES',
1541
- confidence: 0.9,
1542
- reason: `${q.orm}-${q.method}`,
1543
- });
1544
- edgesCreated++;
1545
- }
1546
- if (isDev) {
1547
- console.log(`ORM dataflow: ${edgesCreated} QUERIES edges, ${modelNodes.size} models (${queries.length} total calls)`);
1548
- }
1549
- }