gitnexus 1.4.7 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/README.md +29 -1
  2. package/dist/cli/ai-context.d.ts +1 -1
  3. package/dist/cli/ai-context.js +1 -1
  4. package/dist/cli/analyze.d.ts +2 -0
  5. package/dist/cli/analyze.js +54 -21
  6. package/dist/cli/index-repo.d.ts +15 -0
  7. package/dist/cli/index-repo.js +115 -0
  8. package/dist/cli/index.js +13 -3
  9. package/dist/cli/setup.js +90 -10
  10. package/dist/cli/wiki.d.ts +4 -0
  11. package/dist/cli/wiki.js +174 -53
  12. package/dist/config/supported-languages.d.ts +33 -1
  13. package/dist/config/supported-languages.js +32 -0
  14. package/dist/core/embeddings/embedder.d.ts +6 -1
  15. package/dist/core/embeddings/embedder.js +65 -5
  16. package/dist/core/embeddings/embedding-pipeline.js +11 -9
  17. package/dist/core/embeddings/http-client.d.ts +31 -0
  18. package/dist/core/embeddings/http-client.js +179 -0
  19. package/dist/core/embeddings/index.d.ts +1 -0
  20. package/dist/core/embeddings/index.js +1 -0
  21. package/dist/core/embeddings/types.d.ts +1 -1
  22. package/dist/core/graph/graph.js +9 -1
  23. package/dist/core/graph/types.d.ts +11 -2
  24. package/dist/core/ingestion/call-processor.d.ts +66 -2
  25. package/dist/core/ingestion/call-processor.js +650 -30
  26. package/dist/core/ingestion/call-routing.d.ts +9 -18
  27. package/dist/core/ingestion/call-routing.js +0 -19
  28. package/dist/core/ingestion/cobol/cobol-copy-expander.d.ts +57 -0
  29. package/dist/core/ingestion/cobol/cobol-copy-expander.js +385 -0
  30. package/dist/core/ingestion/cobol/cobol-preprocessor.d.ts +210 -0
  31. package/dist/core/ingestion/cobol/cobol-preprocessor.js +1509 -0
  32. package/dist/core/ingestion/cobol/jcl-parser.d.ts +68 -0
  33. package/dist/core/ingestion/cobol/jcl-parser.js +217 -0
  34. package/dist/core/ingestion/cobol/jcl-processor.d.ts +33 -0
  35. package/dist/core/ingestion/cobol/jcl-processor.js +229 -0
  36. package/dist/core/ingestion/cobol-processor.d.ts +54 -0
  37. package/dist/core/ingestion/cobol-processor.js +1186 -0
  38. package/dist/core/ingestion/entry-point-scoring.d.ts +17 -0
  39. package/dist/core/ingestion/entry-point-scoring.js +52 -28
  40. package/dist/core/ingestion/export-detection.d.ts +47 -8
  41. package/dist/core/ingestion/export-detection.js +29 -50
  42. package/dist/core/ingestion/field-extractor.d.ts +29 -0
  43. package/dist/core/ingestion/field-extractor.js +25 -0
  44. package/dist/core/ingestion/field-extractors/configs/c-cpp.d.ts +3 -0
  45. package/dist/core/ingestion/field-extractors/configs/c-cpp.js +108 -0
  46. package/dist/core/ingestion/field-extractors/configs/csharp.d.ts +8 -0
  47. package/dist/core/ingestion/field-extractors/configs/csharp.js +73 -0
  48. package/dist/core/ingestion/field-extractors/configs/dart.d.ts +8 -0
  49. package/dist/core/ingestion/field-extractors/configs/dart.js +76 -0
  50. package/dist/core/ingestion/field-extractors/configs/go.d.ts +11 -0
  51. package/dist/core/ingestion/field-extractors/configs/go.js +64 -0
  52. package/dist/core/ingestion/field-extractors/configs/helpers.d.ts +44 -0
  53. package/dist/core/ingestion/field-extractors/configs/helpers.js +134 -0
  54. package/dist/core/ingestion/field-extractors/configs/jvm.d.ts +3 -0
  55. package/dist/core/ingestion/field-extractors/configs/jvm.js +118 -0
  56. package/dist/core/ingestion/field-extractors/configs/php.d.ts +8 -0
  57. package/dist/core/ingestion/field-extractors/configs/php.js +67 -0
  58. package/dist/core/ingestion/field-extractors/configs/python.d.ts +12 -0
  59. package/dist/core/ingestion/field-extractors/configs/python.js +91 -0
  60. package/dist/core/ingestion/field-extractors/configs/ruby.d.ts +16 -0
  61. package/dist/core/ingestion/field-extractors/configs/ruby.js +75 -0
  62. package/dist/core/ingestion/field-extractors/configs/rust.d.ts +9 -0
  63. package/dist/core/ingestion/field-extractors/configs/rust.js +55 -0
  64. package/dist/core/ingestion/field-extractors/configs/swift.d.ts +8 -0
  65. package/dist/core/ingestion/field-extractors/configs/swift.js +63 -0
  66. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.d.ts +3 -0
  67. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.js +60 -0
  68. package/dist/core/ingestion/field-extractors/generic.d.ts +46 -0
  69. package/dist/core/ingestion/field-extractors/generic.js +111 -0
  70. package/dist/core/ingestion/field-extractors/typescript.d.ts +77 -0
  71. package/dist/core/ingestion/field-extractors/typescript.js +291 -0
  72. package/dist/core/ingestion/field-types.d.ts +59 -0
  73. package/dist/core/ingestion/field-types.js +2 -0
  74. package/dist/core/ingestion/framework-detection.d.ts +97 -2
  75. package/dist/core/ingestion/framework-detection.js +114 -14
  76. package/dist/core/ingestion/heritage-processor.js +62 -66
  77. package/dist/core/ingestion/import-processor.d.ts +9 -10
  78. package/dist/core/ingestion/import-processor.js +150 -196
  79. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.d.ts +6 -9
  80. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.js +20 -2
  81. package/dist/core/ingestion/import-resolvers/dart.d.ts +7 -0
  82. package/dist/core/ingestion/import-resolvers/dart.js +44 -0
  83. package/dist/core/ingestion/{resolvers → import-resolvers}/go.d.ts +4 -5
  84. package/dist/core/ingestion/{resolvers → import-resolvers}/go.js +17 -0
  85. package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.d.ts +10 -1
  86. package/dist/core/ingestion/import-resolvers/jvm.js +159 -0
  87. package/dist/core/ingestion/import-resolvers/php.d.ts +25 -0
  88. package/dist/core/ingestion/import-resolvers/php.js +80 -0
  89. package/dist/core/ingestion/{resolvers → import-resolvers}/python.d.ts +9 -3
  90. package/dist/core/ingestion/{resolvers → import-resolvers}/python.js +35 -3
  91. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.d.ts +5 -2
  92. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.js +7 -2
  93. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.d.ts +5 -2
  94. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.js +41 -2
  95. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.d.ts +15 -7
  96. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.js +22 -3
  97. package/dist/core/ingestion/import-resolvers/swift.d.ts +7 -0
  98. package/dist/core/ingestion/import-resolvers/swift.js +23 -0
  99. package/dist/core/ingestion/import-resolvers/types.d.ts +44 -0
  100. package/dist/core/ingestion/import-resolvers/types.js +6 -0
  101. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.d.ts +2 -0
  102. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.js +7 -0
  103. package/dist/core/ingestion/language-config.d.ts +6 -0
  104. package/dist/core/ingestion/language-config.js +13 -0
  105. package/dist/core/ingestion/language-provider.d.ts +121 -0
  106. package/dist/core/ingestion/language-provider.js +24 -0
  107. package/dist/core/ingestion/languages/c-cpp.d.ts +12 -0
  108. package/dist/core/ingestion/languages/c-cpp.js +71 -0
  109. package/dist/core/ingestion/languages/cobol.d.ts +1 -0
  110. package/dist/core/ingestion/languages/cobol.js +26 -0
  111. package/dist/core/ingestion/languages/csharp.d.ts +8 -0
  112. package/dist/core/ingestion/languages/csharp.js +49 -0
  113. package/dist/core/ingestion/languages/dart.d.ts +12 -0
  114. package/dist/core/ingestion/languages/dart.js +58 -0
  115. package/dist/core/ingestion/languages/go.d.ts +11 -0
  116. package/dist/core/ingestion/languages/go.js +28 -0
  117. package/dist/core/ingestion/languages/index.d.ts +38 -0
  118. package/dist/core/ingestion/languages/index.js +63 -0
  119. package/dist/core/ingestion/languages/java.d.ts +9 -0
  120. package/dist/core/ingestion/languages/java.js +29 -0
  121. package/dist/core/ingestion/languages/kotlin.d.ts +9 -0
  122. package/dist/core/ingestion/languages/kotlin.js +53 -0
  123. package/dist/core/ingestion/languages/php.d.ts +8 -0
  124. package/dist/core/ingestion/languages/php.js +145 -0
  125. package/dist/core/ingestion/languages/python.d.ts +12 -0
  126. package/dist/core/ingestion/languages/python.js +39 -0
  127. package/dist/core/ingestion/languages/ruby.d.ts +9 -0
  128. package/dist/core/ingestion/languages/ruby.js +44 -0
  129. package/dist/core/ingestion/languages/rust.d.ts +12 -0
  130. package/dist/core/ingestion/languages/rust.js +44 -0
  131. package/dist/core/ingestion/languages/swift.d.ts +12 -0
  132. package/dist/core/ingestion/languages/swift.js +133 -0
  133. package/dist/core/ingestion/languages/typescript.d.ts +10 -0
  134. package/dist/core/ingestion/languages/typescript.js +60 -0
  135. package/dist/core/ingestion/markdown-processor.d.ts +17 -0
  136. package/dist/core/ingestion/markdown-processor.js +124 -0
  137. package/dist/core/ingestion/mro-processor.js +22 -18
  138. package/dist/core/ingestion/named-binding-processor.d.ts +18 -0
  139. package/dist/core/ingestion/named-binding-processor.js +42 -0
  140. package/dist/core/ingestion/named-bindings/csharp.d.ts +3 -0
  141. package/dist/core/ingestion/named-bindings/csharp.js +37 -0
  142. package/dist/core/ingestion/named-bindings/java.d.ts +3 -0
  143. package/dist/core/ingestion/named-bindings/java.js +29 -0
  144. package/dist/core/ingestion/named-bindings/kotlin.d.ts +3 -0
  145. package/dist/core/ingestion/named-bindings/kotlin.js +36 -0
  146. package/dist/core/ingestion/named-bindings/php.d.ts +3 -0
  147. package/dist/core/ingestion/named-bindings/php.js +61 -0
  148. package/dist/core/ingestion/named-bindings/python.d.ts +3 -0
  149. package/dist/core/ingestion/named-bindings/python.js +49 -0
  150. package/dist/core/ingestion/named-bindings/rust.d.ts +3 -0
  151. package/dist/core/ingestion/named-bindings/rust.js +64 -0
  152. package/dist/core/ingestion/named-bindings/types.d.ts +16 -0
  153. package/dist/core/ingestion/named-bindings/types.js +6 -0
  154. package/dist/core/ingestion/named-bindings/typescript.d.ts +3 -0
  155. package/dist/core/ingestion/named-bindings/typescript.js +58 -0
  156. package/dist/core/ingestion/parsing-processor.d.ts +6 -2
  157. package/dist/core/ingestion/parsing-processor.js +125 -85
  158. package/dist/core/ingestion/pipeline.d.ts +10 -0
  159. package/dist/core/ingestion/pipeline.js +1235 -317
  160. package/dist/core/ingestion/resolution-context.d.ts +5 -0
  161. package/dist/core/ingestion/resolution-context.js +8 -5
  162. package/dist/core/ingestion/route-extractors/expo.d.ts +1 -0
  163. package/dist/core/ingestion/route-extractors/expo.js +36 -0
  164. package/dist/core/ingestion/route-extractors/middleware.d.ts +47 -0
  165. package/dist/core/ingestion/route-extractors/middleware.js +143 -0
  166. package/dist/core/ingestion/route-extractors/nextjs.d.ts +3 -0
  167. package/dist/core/ingestion/route-extractors/nextjs.js +76 -0
  168. package/dist/core/ingestion/route-extractors/php.d.ts +7 -0
  169. package/dist/core/ingestion/route-extractors/php.js +21 -0
  170. package/dist/core/ingestion/route-extractors/response-shapes.d.ts +20 -0
  171. package/dist/core/ingestion/route-extractors/response-shapes.js +290 -0
  172. package/dist/core/ingestion/symbol-table.d.ts +16 -0
  173. package/dist/core/ingestion/symbol-table.js +20 -6
  174. package/dist/core/ingestion/tree-sitter-queries.d.ts +10 -9
  175. package/dist/core/ingestion/tree-sitter-queries.js +274 -11
  176. package/dist/core/ingestion/type-env.d.ts +42 -18
  177. package/dist/core/ingestion/type-env.js +481 -106
  178. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +5 -0
  179. package/dist/core/ingestion/type-extractors/c-cpp.js +119 -0
  180. package/dist/core/ingestion/type-extractors/csharp.js +149 -16
  181. package/dist/core/ingestion/type-extractors/dart.d.ts +15 -0
  182. package/dist/core/ingestion/type-extractors/dart.js +371 -0
  183. package/dist/core/ingestion/type-extractors/jvm.js +169 -66
  184. package/dist/core/ingestion/type-extractors/rust.js +35 -1
  185. package/dist/core/ingestion/type-extractors/shared.d.ts +1 -15
  186. package/dist/core/ingestion/type-extractors/shared.js +14 -112
  187. package/dist/core/ingestion/type-extractors/swift.js +338 -7
  188. package/dist/core/ingestion/type-extractors/types.d.ts +40 -8
  189. package/dist/core/ingestion/type-extractors/typescript.js +141 -9
  190. package/dist/core/ingestion/utils/ast-helpers.d.ts +83 -0
  191. package/dist/core/ingestion/utils/ast-helpers.js +817 -0
  192. package/dist/core/ingestion/utils/call-analysis.d.ts +73 -0
  193. package/dist/core/ingestion/utils/call-analysis.js +527 -0
  194. package/dist/core/ingestion/utils/event-loop.d.ts +5 -0
  195. package/dist/core/ingestion/utils/event-loop.js +5 -0
  196. package/dist/core/ingestion/utils/language-detection.d.ts +9 -0
  197. package/dist/core/ingestion/utils/language-detection.js +70 -0
  198. package/dist/core/ingestion/utils/verbose.d.ts +1 -0
  199. package/dist/core/ingestion/utils/verbose.js +7 -0
  200. package/dist/core/ingestion/workers/parse-worker.d.ts +55 -5
  201. package/dist/core/ingestion/workers/parse-worker.js +415 -225
  202. package/dist/core/lbug/csv-generator.js +51 -1
  203. package/dist/core/lbug/lbug-adapter.d.ts +10 -0
  204. package/dist/core/lbug/lbug-adapter.js +75 -4
  205. package/dist/core/lbug/schema.d.ts +8 -4
  206. package/dist/core/lbug/schema.js +65 -4
  207. package/dist/core/tree-sitter/parser-loader.js +7 -1
  208. package/dist/core/wiki/cursor-client.d.ts +31 -0
  209. package/dist/core/wiki/cursor-client.js +127 -0
  210. package/dist/core/wiki/generator.d.ts +28 -9
  211. package/dist/core/wiki/generator.js +115 -18
  212. package/dist/core/wiki/graph-queries.d.ts +4 -0
  213. package/dist/core/wiki/graph-queries.js +7 -1
  214. package/dist/core/wiki/llm-client.d.ts +2 -0
  215. package/dist/core/wiki/llm-client.js +8 -4
  216. package/dist/core/wiki/prompts.d.ts +3 -3
  217. package/dist/core/wiki/prompts.js +6 -0
  218. package/dist/mcp/core/embedder.js +11 -3
  219. package/dist/mcp/core/lbug-adapter.d.ts +5 -0
  220. package/dist/mcp/core/lbug-adapter.js +23 -2
  221. package/dist/mcp/local/local-backend.d.ts +38 -5
  222. package/dist/mcp/local/local-backend.js +804 -63
  223. package/dist/mcp/resources.js +2 -0
  224. package/dist/mcp/tools.js +73 -4
  225. package/dist/server/api.d.ts +19 -1
  226. package/dist/server/api.js +66 -6
  227. package/dist/storage/git.d.ts +12 -0
  228. package/dist/storage/git.js +21 -0
  229. package/dist/storage/repo-manager.d.ts +3 -0
  230. package/package.json +25 -16
  231. package/dist/core/ingestion/named-binding-extraction.d.ts +0 -61
  232. package/dist/core/ingestion/named-binding-extraction.js +0 -363
  233. package/dist/core/ingestion/resolvers/index.d.ts +0 -18
  234. package/dist/core/ingestion/resolvers/index.js +0 -13
  235. package/dist/core/ingestion/resolvers/jvm.js +0 -87
  236. package/dist/core/ingestion/resolvers/php.d.ts +0 -15
  237. package/dist/core/ingestion/resolvers/php.js +0 -35
  238. package/dist/core/ingestion/type-extractors/index.d.ts +0 -22
  239. package/dist/core/ingestion/type-extractors/index.js +0 -31
  240. package/dist/core/ingestion/utils.d.ts +0 -138
  241. package/dist/core/ingestion/utils.js +0 -1290
  242. package/scripts/patch-tree-sitter-swift.cjs +0 -74
@@ -1,8 +1,17 @@
1
1
  import { createKnowledgeGraph } from '../graph/graph.js';
2
2
  import { processStructure } from './structure-processor.js';
3
+ import { processMarkdown } from './markdown-processor.js';
4
+ import { processCobol, isCobolFile, isJclFile } from './cobol-processor.js';
3
5
  import { processParsing } from './parsing-processor.js';
4
6
  import { processImports, processImportsFromExtracted, buildImportResolutionContext } from './import-processor.js';
5
- import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted } from './call-processor.js';
7
+ import { EMPTY_INDEX } from './import-resolvers/utils.js';
8
+ import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted, processNextjsFetchRoutes, extractFetchCallsFromFiles, seedCrossFileReceiverTypes, buildImportedReturnTypes, buildImportedRawReturnTypes, buildExportedTypeMapFromGraph } from './call-processor.js';
9
+ import { nextjsFileToRouteURL, normalizeFetchURL } from './route-extractors/nextjs.js';
10
+ import { expoFileToRouteURL } from './route-extractors/expo.js';
11
+ import { phpFileToRouteURL } from './route-extractors/php.js';
12
+ import { extractResponseShapes, extractPHPResponseShapes } from './route-extractors/response-shapes.js';
13
+ import { extractMiddlewareChain, extractNextjsMiddlewareConfig, compileMatcher, compiledMatcherMatchesRoute } from './route-extractors/middleware.js';
14
+ import { generateId } from '../../lib/utils.js';
6
15
  import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
7
16
  import { computeMRO } from './mro-processor.js';
8
17
  import { processCommunities } from './community-processor.js';
@@ -10,13 +19,69 @@ import { processProcesses } from './process-processor.js';
10
19
  import { createResolutionContext } from './resolution-context.js';
11
20
  import { createASTCache } from './ast-cache.js';
12
21
  import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
13
- import { getLanguageFromFilename } from './utils.js';
22
+ import { getLanguageFromFilename } from './utils/language-detection.js';
14
23
  import { isLanguageAvailable } from '../tree-sitter/parser-loader.js';
24
+ import { providers, getProviderForFile } from './languages/index.js';
15
25
  import { createWorkerPool } from './workers/worker-pool.js';
16
26
  import fs from 'node:fs';
17
27
  import path from 'node:path';
18
28
  import { fileURLToPath, pathToFileURL } from 'node:url';
19
29
  const isDev = process.env.NODE_ENV === 'development';
30
+ const EXPO_NAV_PATTERNS = [
31
+ /router\.(push|replace|navigate)\(\s*['"`]([^'"`]+)['"`]/g,
32
+ /<Link\s+[^>]*href=\s*['"`]([^'"`]+)['"`]/g,
33
+ ];
34
+ /** Kahn's algorithm: returns files grouped by topological level.
35
+ * Files in the same level have no mutual dependencies — safe to process in parallel.
36
+ * Files in cycles are returned as a final group (no cross-cycle propagation). */
37
+ export function topologicalLevelSort(importMap) {
38
+ // Build in-degree map and reverse dependency map
39
+ const inDegree = new Map();
40
+ const reverseDeps = new Map();
41
+ for (const [file, deps] of importMap) {
42
+ if (!inDegree.has(file))
43
+ inDegree.set(file, 0);
44
+ for (const dep of deps) {
45
+ if (!inDegree.has(dep))
46
+ inDegree.set(dep, 0);
47
+ // file imports dep, so dep must be processed before file
48
+ // In Kahn's terms: dep → file (dep is a prerequisite of file)
49
+ inDegree.set(file, (inDegree.get(file) ?? 0) + 1);
50
+ let rev = reverseDeps.get(dep);
51
+ if (!rev) {
52
+ rev = [];
53
+ reverseDeps.set(dep, rev);
54
+ }
55
+ rev.push(file);
56
+ }
57
+ }
58
+ // BFS from zero-in-degree nodes, grouping by level
59
+ const levels = [];
60
+ let currentLevel = [...inDegree.entries()]
61
+ .filter(([, d]) => d === 0)
62
+ .map(([f]) => f);
63
+ while (currentLevel.length > 0) {
64
+ levels.push(currentLevel);
65
+ const nextLevel = [];
66
+ for (const file of currentLevel) {
67
+ for (const dependent of reverseDeps.get(file) ?? []) {
68
+ const newDeg = (inDegree.get(dependent) ?? 1) - 1;
69
+ inDegree.set(dependent, newDeg);
70
+ if (newDeg === 0)
71
+ nextLevel.push(dependent);
72
+ }
73
+ }
74
+ currentLevel = nextLevel;
75
+ }
76
+ // Files still with positive in-degree are in cycles — add as final group
77
+ const cycleFiles = [...inDegree.entries()]
78
+ .filter(([, d]) => d > 0)
79
+ .map(([f]) => f);
80
+ if (cycleFiles.length > 0) {
81
+ levels.push(cycleFiles);
82
+ }
83
+ return { levels, cycleCount: cycleFiles.length };
84
+ }
20
85
  /** Max bytes of source content to load per parse chunk. Each chunk's source +
21
86
  * parsed ASTs + extracted records + worker serialization overhead all live in
22
87
  * memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
@@ -24,360 +89,1135 @@ const isDev = process.env.NODE_ENV === 'development';
24
89
  const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
25
90
  /** Max AST trees to keep in LRU cache */
26
91
  const AST_CACHE_CAP = 50;
27
- export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
28
- const graph = createKnowledgeGraph();
29
- const ctx = createResolutionContext();
30
- const symbolTable = ctx.symbols;
31
- let astCache = createASTCache(AST_CACHE_CAP);
32
- const cleanup = () => {
33
- astCache.clear();
34
- ctx.clear();
92
+ /** Minimum percentage of files that must benefit from cross-file seeding to justify the re-resolution pass. */
93
+ const CROSS_FILE_SKIP_THRESHOLD = 0.03;
94
+ /** Hard cap on files re-processed during cross-file propagation. */
95
+ const MAX_CROSS_FILE_REPROCESS = 2000;
96
+ /** Node labels that represent top-level importable symbols.
97
+ * Excludes Method, Property, Constructor (accessed via receiver, not directly imported),
98
+ * and structural labels (File, Folder, Package, Module, Project, etc.). */
99
+ const IMPORTABLE_SYMBOL_LABELS = new Set([
100
+ 'Function', 'Class', 'Interface', 'Struct', 'Enum', 'Trait',
101
+ 'TypeAlias', 'Const', 'Static', 'Record', 'Union', 'Typedef', 'Macro',
102
+ ]);
103
+ /** Max synthetic bindings per importing file — prevents memory bloat for
104
+ * C/C++ files that include many large headers. */
105
+ const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
106
+ /** Pre-computed language sets derived from providers at module load. */
107
+ const WILDCARD_LANGUAGES = new Set(Object.values(providers).filter(p => p.importSemantics === 'wildcard').map(p => p.id));
108
+ const SYNTHESIS_LANGUAGES = new Set(Object.values(providers).filter(p => p.importSemantics !== 'named').map(p => p.id));
109
+ /** Check if a language uses wildcard (whole-module) import semantics.
110
+ * Derived from LanguageProvider.importSemantics — no hardcoded set needed. */
111
+ function isWildcardImportLanguage(lang) {
112
+ return WILDCARD_LANGUAGES.has(lang);
113
+ }
114
+ /** Check if a language needs synthesis before call resolution.
115
+ * True for wildcard-import languages AND namespace-import languages (Python). */
116
+ function needsSynthesis(lang) {
117
+ return SYNTHESIS_LANGUAGES.has(lang);
118
+ }
119
+ /** Synthesize namedImportMap entries for languages with whole-module imports.
120
+ * These languages (Go, Ruby, C/C++, Swift, Python) import all exported symbols from a
121
+ * file, not specific named symbols. After parsing, we know which symbols each file
122
+ * exports (via graph isExported), so we can expand ImportMap edges into per-symbol
123
+ * bindings that Phase 14 can use for cross-file type propagation. */
124
+ function synthesizeWildcardImportBindings(graph, ctx) {
125
+ // Pre-compute exported symbols per file from graph (single pass)
126
+ const exportedSymbolsByFile = new Map();
127
+ graph.forEachNode(node => {
128
+ if (!node.properties?.isExported)
129
+ return;
130
+ if (!IMPORTABLE_SYMBOL_LABELS.has(node.label))
131
+ return;
132
+ const fp = node.properties.filePath;
133
+ const name = node.properties.name;
134
+ if (!fp || !name)
135
+ return;
136
+ let symbols = exportedSymbolsByFile.get(fp);
137
+ if (!symbols) {
138
+ symbols = [];
139
+ exportedSymbolsByFile.set(fp, symbols);
140
+ }
141
+ symbols.push({ name, filePath: fp });
142
+ });
143
+ if (exportedSymbolsByFile.size === 0)
144
+ return 0;
145
+ // Build a merged import map: ctx.importMap has file-based imports (Ruby, C/C++),
146
+ // but Go/C# package imports use graph IMPORTS edges + PackageMap instead.
147
+ // Collect graph-level IMPORTS edges for wildcard languages missing from ctx.importMap.
148
+ const FILE_PREFIX = 'File:';
149
+ const graphImports = new Map();
150
+ graph.forEachRelationship(rel => {
151
+ if (rel.type !== 'IMPORTS')
152
+ return;
153
+ if (!rel.sourceId.startsWith(FILE_PREFIX) || !rel.targetId.startsWith(FILE_PREFIX))
154
+ return;
155
+ const srcFile = rel.sourceId.slice(FILE_PREFIX.length);
156
+ const tgtFile = rel.targetId.slice(FILE_PREFIX.length);
157
+ const lang = getLanguageFromFilename(srcFile);
158
+ if (!lang || !isWildcardImportLanguage(lang))
159
+ return;
160
+ // Only add if not already in ctx.importMap (avoid duplicates)
161
+ if (ctx.importMap.get(srcFile)?.has(tgtFile))
162
+ return;
163
+ let set = graphImports.get(srcFile);
164
+ if (!set) {
165
+ set = new Set();
166
+ graphImports.set(srcFile, set);
167
+ }
168
+ set.add(tgtFile);
169
+ });
170
+ let totalSynthesized = 0;
171
+ // Helper: synthesize bindings for a file given its imported files
172
+ const synthesizeForFile = (filePath, importedFiles) => {
173
+ let fileBindings = ctx.namedImportMap.get(filePath);
174
+ let fileCount = fileBindings?.size ?? 0;
175
+ for (const importedFile of importedFiles) {
176
+ const exportedSymbols = exportedSymbolsByFile.get(importedFile);
177
+ if (!exportedSymbols)
178
+ continue;
179
+ for (const sym of exportedSymbols) {
180
+ if (fileCount >= MAX_SYNTHETIC_BINDINGS_PER_FILE)
181
+ return;
182
+ if (fileBindings?.has(sym.name))
183
+ continue;
184
+ if (!fileBindings) {
185
+ fileBindings = new Map();
186
+ ctx.namedImportMap.set(filePath, fileBindings);
187
+ }
188
+ fileBindings.set(sym.name, {
189
+ sourcePath: importedFile,
190
+ exportedName: sym.name,
191
+ });
192
+ fileCount++;
193
+ totalSynthesized++;
194
+ }
195
+ }
35
196
  };
36
- try {
37
- // ── Phase 1: Scan paths only (no content read) ─────────────────────
197
+ // Process files from ctx.importMap (Ruby, C/C++, Swift file-based imports)
198
+ for (const [filePath, importedFiles] of ctx.importMap) {
199
+ const lang = getLanguageFromFilename(filePath);
200
+ if (!lang || !isWildcardImportLanguage(lang))
201
+ continue;
202
+ synthesizeForFile(filePath, importedFiles);
203
+ }
204
+ // Process files from graph IMPORTS edges (Go and other wildcard-import languages)
205
+ for (const [filePath, importedFiles] of graphImports) {
206
+ synthesizeForFile(filePath, importedFiles);
207
+ }
208
+ // Build module alias map for Python namespace imports.
209
+ // `import models` in app.py → ctx.moduleAliasMap['app.py']['models'] = 'models.py'
210
+ // Enables `models.User()` to resolve to models.py:User without ambiguous symbol expansion.
211
+ const buildPythonModuleAliasForFile = (callerFile, importedFiles) => {
212
+ let aliasMap = ctx.moduleAliasMap.get(callerFile);
213
+ for (const importedFile of importedFiles) {
214
+ // Derive the module alias from the imported filename stem (e.g. "models.py" → "models")
215
+ const lastSlash = importedFile.lastIndexOf('/');
216
+ const base = lastSlash >= 0 ? importedFile.slice(lastSlash + 1) : importedFile;
217
+ const dot = base.lastIndexOf('.');
218
+ const stem = dot >= 0 ? base.slice(0, dot) : base;
219
+ if (!stem)
220
+ continue;
221
+ if (!aliasMap) {
222
+ aliasMap = new Map();
223
+ ctx.moduleAliasMap.set(callerFile, aliasMap);
224
+ }
225
+ aliasMap.set(stem, importedFile);
226
+ }
227
+ };
228
+ for (const [filePath, importedFiles] of ctx.importMap) {
229
+ const provider = getProviderForFile(filePath);
230
+ if (!provider || provider.importSemantics !== 'namespace')
231
+ continue;
232
+ buildPythonModuleAliasForFile(filePath, importedFiles);
233
+ }
234
+ return totalSynthesized;
235
+ }
236
+ /** Phase 14: Cross-file binding propagation.
237
+ * Seeds downstream files with resolved type bindings from upstream exports.
238
+ * Files are processed in topological import order so upstream bindings are
239
+ * available when downstream files are re-resolved. */
240
+ async function runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress) {
241
+ // For the worker path, buildTypeEnv runs inside workers without SymbolTable,
242
+ // so exported bindings must be collected from graph + SymbolTable in main thread.
243
+ if (exportedTypeMap.size === 0 && graph.nodeCount > 0) {
244
+ const graphExports = buildExportedTypeMapFromGraph(graph, ctx.symbols);
245
+ for (const [fp, exports] of graphExports)
246
+ exportedTypeMap.set(fp, exports);
247
+ }
248
+ if (exportedTypeMap.size === 0 || ctx.namedImportMap.size === 0)
249
+ return;
250
+ const allPathSet = new Set(allPaths);
251
+ const { levels, cycleCount } = topologicalLevelSort(ctx.importMap);
252
+ // Cycle diagnostic: only log when actual cycles detected (cycleCount from Kahn's BFS)
253
+ if (isDev && cycleCount > 0) {
254
+ console.log(`🔄 ${cycleCount} files in import cycles (skipped for cross-file propagation)`);
255
+ }
256
+ // Quick count of files with cross-file binding gaps (early exit once threshold exceeded)
257
+ let filesWithGaps = 0;
258
+ const gapThreshold = Math.max(1, Math.ceil(totalFiles * CROSS_FILE_SKIP_THRESHOLD));
259
+ outer: for (const level of levels) {
260
+ for (const filePath of level) {
261
+ const imports = ctx.namedImportMap.get(filePath);
262
+ if (!imports)
263
+ continue;
264
+ for (const [, binding] of imports) {
265
+ const upstream = exportedTypeMap.get(binding.sourcePath);
266
+ if (upstream?.has(binding.exportedName)) {
267
+ filesWithGaps++;
268
+ break;
269
+ }
270
+ const def = ctx.symbols.lookupExactFull(binding.sourcePath, binding.exportedName);
271
+ if (def?.returnType) {
272
+ filesWithGaps++;
273
+ break;
274
+ }
275
+ }
276
+ if (filesWithGaps >= gapThreshold)
277
+ break outer;
278
+ }
279
+ }
280
+ const gapRatio = totalFiles > 0 ? filesWithGaps / totalFiles : 0;
281
+ if (gapRatio < CROSS_FILE_SKIP_THRESHOLD && filesWithGaps < gapThreshold) {
282
+ if (isDev) {
283
+ console.log(`⏭️ Cross-file re-resolution skipped (${filesWithGaps}/${totalFiles} files, ${(gapRatio * 100).toFixed(1)}% < ${CROSS_FILE_SKIP_THRESHOLD * 100}% threshold)`);
284
+ }
285
+ return;
286
+ }
287
+ onProgress({
288
+ phase: 'parsing',
289
+ percent: 82,
290
+ message: `Cross-file type propagation (${filesWithGaps}+ files)...`,
291
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
292
+ });
293
+ let crossFileResolved = 0;
294
+ const crossFileStart = Date.now();
295
+ let astCache = createASTCache(AST_CACHE_CAP);
296
+ for (const level of levels) {
297
+ const levelCandidates = [];
298
+ for (const filePath of level) {
299
+ if (crossFileResolved + levelCandidates.length >= MAX_CROSS_FILE_REPROCESS)
300
+ break;
301
+ const imports = ctx.namedImportMap.get(filePath);
302
+ if (!imports)
303
+ continue;
304
+ const seeded = new Map();
305
+ for (const [localName, binding] of imports) {
306
+ const upstream = exportedTypeMap.get(binding.sourcePath);
307
+ if (upstream) {
308
+ const type = upstream.get(binding.exportedName);
309
+ if (type)
310
+ seeded.set(localName, type);
311
+ }
312
+ }
313
+ const importedReturns = buildImportedReturnTypes(filePath, ctx.namedImportMap, ctx.symbols);
314
+ const importedRawReturns = buildImportedRawReturnTypes(filePath, ctx.namedImportMap, ctx.symbols);
315
+ if (seeded.size === 0 && importedReturns.size === 0)
316
+ continue;
317
+ if (!allPathSet.has(filePath))
318
+ continue;
319
+ const lang = getLanguageFromFilename(filePath);
320
+ if (!lang || !isLanguageAvailable(lang))
321
+ continue;
322
+ levelCandidates.push({ filePath, seeded, importedReturns, importedRawReturns });
323
+ }
324
+ if (levelCandidates.length === 0)
325
+ continue;
326
+ const levelPaths = levelCandidates.map(c => c.filePath);
327
+ const contentMap = await readFileContents(repoPath, levelPaths);
328
+ for (const { filePath, seeded, importedReturns, importedRawReturns } of levelCandidates) {
329
+ const content = contentMap.get(filePath);
330
+ if (!content)
331
+ continue;
332
+ const reFile = [{ path: filePath, content }];
333
+ const bindings = new Map();
334
+ if (seeded.size > 0)
335
+ bindings.set(filePath, seeded);
336
+ const importedReturnTypesMap = new Map();
337
+ if (importedReturns.size > 0) {
338
+ importedReturnTypesMap.set(filePath, importedReturns);
339
+ }
340
+ const importedRawReturnTypesMap = new Map();
341
+ if (importedRawReturns.size > 0) {
342
+ importedRawReturnTypesMap.set(filePath, importedRawReturns);
343
+ }
344
+ await processCalls(graph, reFile, astCache, ctx, undefined, exportedTypeMap, bindings.size > 0 ? bindings : undefined, importedReturnTypesMap.size > 0 ? importedReturnTypesMap : undefined, importedRawReturnTypesMap.size > 0 ? importedRawReturnTypesMap : undefined);
345
+ crossFileResolved++;
346
+ }
347
+ if (crossFileResolved >= MAX_CROSS_FILE_REPROCESS) {
348
+ if (isDev)
349
+ console.log(`⚠️ Cross-file re-resolution capped at ${MAX_CROSS_FILE_REPROCESS} files`);
350
+ break;
351
+ }
352
+ }
353
+ astCache.clear();
354
+ if (isDev) {
355
+ const elapsed = Date.now() - crossFileStart;
356
+ const totalElapsed = Date.now() - pipelineStart;
357
+ const reResolutionPct = totalElapsed > 0 ? ((elapsed / totalElapsed) * 100).toFixed(1) : '0';
358
+ console.log(`🔗 Cross-file re-resolution: ${crossFileResolved} candidates re-processed` +
359
+ ` in ${elapsed}ms (${reResolutionPct}% of total ingestion time so far)`);
360
+ }
361
+ }
362
+ /**
363
+ * Phase 1+2: Scan repository paths, build file/folder structure, process markdown.
364
+ *
365
+ * @reads repoPath (filesystem)
366
+ * @writes graph (File, Folder nodes + CONTAINS edges; Markdown sections + cross-links)
367
+ */
368
+ async function runScanAndStructure(repoPath, graph, onProgress) {
369
+ // ── Phase 1: Scan paths only (no content read) ─────────────────────
370
+ onProgress({
371
+ phase: 'extracting',
372
+ percent: 0,
373
+ message: 'Scanning repository...',
374
+ });
375
+ const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
376
+ const scanProgress = Math.round((current / total) * 15);
38
377
  onProgress({
39
378
  phase: 'extracting',
40
- percent: 0,
379
+ percent: scanProgress,
41
380
  message: 'Scanning repository...',
381
+ detail: filePath,
382
+ stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
42
383
  });
43
- const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
44
- const scanProgress = Math.round((current / total) * 15);
45
- onProgress({
46
- phase: 'extracting',
47
- percent: scanProgress,
48
- message: 'Scanning repository...',
49
- detail: filePath,
50
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
51
- });
52
- });
53
- const totalFiles = scannedFiles.length;
54
- onProgress({
55
- phase: 'extracting',
56
- percent: 15,
57
- message: 'Repository scanned successfully',
58
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
59
- });
60
- // ── Phase 2: Structure (paths only — no content needed) ────────────
61
- onProgress({
62
- phase: 'structure',
63
- percent: 15,
64
- message: 'Analyzing project structure...',
65
- stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
66
- });
67
- const allPaths = scannedFiles.map(f => f.path);
68
- processStructure(graph, allPaths);
69
- onProgress({
70
- phase: 'structure',
71
- percent: 20,
72
- message: 'Project structure analyzed',
73
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
74
- });
75
- // ── Phase 3+4: Chunked read + parse ────────────────────────────────
76
- // Group parseable files into byte-budget chunks so only ~20MB of source
77
- // is in memory at a time. Each chunk is: read → parse → extract → free.
78
- const parseableScanned = scannedFiles.filter(f => {
79
- const lang = getLanguageFromFilename(f.path);
80
- return lang && isLanguageAvailable(lang);
81
- });
82
- // Warn about files skipped due to unavailable parsers
83
- const skippedByLang = new Map();
84
- for (const f of scannedFiles) {
85
- const lang = getLanguageFromFilename(f.path);
86
- if (lang && !isLanguageAvailable(lang)) {
87
- skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
88
- }
89
- }
90
- for (const [lang, count] of skippedByLang) {
91
- console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
92
- }
93
- const totalParseable = parseableScanned.length;
94
- if (totalParseable === 0) {
95
- onProgress({
96
- phase: 'parsing',
97
- percent: 82,
98
- message: 'No parseable files found — skipping parsing phase',
99
- stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
100
- });
101
- }
102
- // Build byte-budget chunks
103
- const chunks = [];
104
- let currentChunk = [];
105
- let currentBytes = 0;
106
- for (const file of parseableScanned) {
107
- if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
108
- chunks.push(currentChunk);
109
- currentChunk = [];
110
- currentBytes = 0;
111
- }
112
- currentChunk.push(file.path);
113
- currentBytes += file.size;
114
- }
115
- if (currentChunk.length > 0)
116
- chunks.push(currentChunk);
117
- const numChunks = chunks.length;
384
+ });
385
+ const totalFiles = scannedFiles.length;
386
+ onProgress({
387
+ phase: 'extracting',
388
+ percent: 15,
389
+ message: 'Repository scanned successfully',
390
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
391
+ });
392
+ // ── Phase 2: Structure (paths only — no content needed) ────────────
393
+ onProgress({
394
+ phase: 'structure',
395
+ percent: 15,
396
+ message: 'Analyzing project structure...',
397
+ stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
398
+ });
399
+ const allPaths = scannedFiles.map(f => f.path);
400
+ processStructure(graph, allPaths);
401
+ onProgress({
402
+ phase: 'structure',
403
+ percent: 20,
404
+ message: 'Project structure analyzed',
405
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
406
+ });
407
+ // ── Custom (non-tree-sitter) processors ─────────────────────────────
408
+ // Each custom processor follows the pattern in markdown-processor.ts:
409
+ // 1. Export a process function: (graph, files, allPathSet) => result
410
+ // 2. Export a file detection function: (path) => boolean
411
+ // 3. Filter files by extension, write nodes/edges directly to graph
412
+ // To add a new language: create a new processor file, import it here,
413
+ // and add a filter-read-call-log block following the pattern below.
414
+ // ── Phase 2.5: Markdown processing (headings + cross-links) ────────
415
+ const mdScanned = scannedFiles.filter(f => f.path.endsWith('.md') || f.path.endsWith('.mdx'));
416
+ if (mdScanned.length > 0) {
417
+ const mdContents = await readFileContents(repoPath, mdScanned.map(f => f.path));
418
+ const mdFiles = mdScanned
419
+ .filter(f => mdContents.has(f.path))
420
+ .map(f => ({ path: f.path, content: mdContents.get(f.path) }));
421
+ const allPathSet = new Set(allPaths);
422
+ const mdResult = processMarkdown(graph, mdFiles, allPathSet);
423
+ if (isDev) {
424
+ console.log(` Markdown: ${mdResult.sections} sections, ${mdResult.links} cross-links from ${mdFiles.length} files`);
425
+ }
426
+ }
427
+ // ── Phase 2.6: COBOL processing (regex extraction, no tree-sitter) ──
428
+ const cobolScanned = scannedFiles.filter(f => isCobolFile(f.path) || isJclFile(f.path));
429
+ if (cobolScanned.length > 0) {
430
+ const cobolContents = await readFileContents(repoPath, cobolScanned.map(f => f.path));
431
+ const cobolFiles = cobolScanned
432
+ .filter(f => cobolContents.has(f.path))
433
+ .map(f => ({ path: f.path, content: cobolContents.get(f.path) }));
434
+ const allPathSet = new Set(allPaths);
435
+ const cobolResult = processCobol(graph, cobolFiles, allPathSet);
118
436
  if (isDev) {
119
- const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
120
- console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
437
+ console.log(` COBOL: ${cobolResult.programs} programs, ${cobolResult.paragraphs} paragraphs, ${cobolResult.sections} sections from ${cobolFiles.length} files`);
438
+ if (cobolResult.execSqlBlocks > 0 || cobolResult.execCicsBlocks > 0 || cobolResult.entryPoints > 0) {
439
+ console.log(` COBOL enriched: ${cobolResult.execSqlBlocks} SQL blocks, ${cobolResult.execCicsBlocks} CICS blocks, ${cobolResult.entryPoints} entry points, ${cobolResult.moves} moves, ${cobolResult.fileDeclarations} file declarations`);
440
+ }
441
+ if (cobolResult.jclJobs > 0) {
442
+ console.log(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`);
443
+ }
121
444
  }
445
+ }
446
+ return { scannedFiles, allPaths, totalFiles };
447
+ }
448
+ /**
449
+ * Phase 3+4: Chunked parse + resolve loop.
450
+ *
451
+ * Reads source in byte-budget chunks (~20MB each). For each chunk:
452
+ * 1. Parse via worker pool (or sequential fallback)
453
+ * 2. Resolve imports from extracted data
454
+ * 3. Synthesize wildcard import bindings (Go/Ruby/C++/Swift/Python)
455
+ * 4. Resolve calls, heritage, routes concurrently (Promise.all)
456
+ * 5. Collect TypeEnv bindings for cross-file propagation
457
+ *
458
+ * State accumulated across chunks: symbolTable, importMap, namedImportMap,
459
+ * moduleAliasMap (all via ResolutionContext), exportedTypeMap, workerTypeEnvBindings.
460
+ *
461
+ * @reads graph (structure nodes from Phase 1+2)
462
+ * @reads allPaths (from scan phase)
463
+ * @writes graph (Symbol nodes, IMPORTS/CALLS/EXTENDS/IMPLEMENTS/ACCESSES edges)
464
+ * @writes ctx.symbolTable, ctx.importMap, ctx.namedImportMap, ctx.moduleAliasMap
465
+ */
466
+ async function runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress) {
467
+ const symbolTable = ctx.symbols;
468
+ const parseableScanned = scannedFiles.filter(f => {
469
+ const lang = getLanguageFromFilename(f.path);
470
+ return lang && isLanguageAvailable(lang);
471
+ });
472
+ // Warn about files skipped due to unavailable parsers
473
+ const skippedByLang = new Map();
474
+ for (const f of scannedFiles) {
475
+ const lang = getLanguageFromFilename(f.path);
476
+ if (lang && !isLanguageAvailable(lang)) {
477
+ skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
478
+ }
479
+ }
480
+ for (const [lang, count] of skippedByLang) {
481
+ console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
482
+ }
483
+ const totalParseable = parseableScanned.length;
484
+ if (totalParseable === 0) {
122
485
  onProgress({
123
486
  phase: 'parsing',
124
- percent: 20,
125
- message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
126
- stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
487
+ percent: 82,
488
+ message: 'No parseable files found skipping parsing phase',
489
+ stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
127
490
  });
128
- // Don't spawn workers for tiny repos — overhead exceeds benefit
129
- const MIN_FILES_FOR_WORKERS = 15;
130
- const MIN_BYTES_FOR_WORKERS = 512 * 1024;
131
- const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
132
- // Create worker pool once, reuse across chunks
133
- let workerPool;
134
- if (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS) {
135
- try {
136
- let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
137
- // When running under vitest, import.meta.url points to src/ where no .js exists.
138
- // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
139
- const thisDir = fileURLToPath(new URL('.', import.meta.url));
140
- if (!fs.existsSync(fileURLToPath(workerUrl))) {
141
- const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
142
- if (fs.existsSync(distWorker)) {
143
- workerUrl = pathToFileURL(distWorker);
144
- }
145
- }
146
- workerPool = createWorkerPool(workerUrl);
147
- }
148
- catch (err) {
149
- if (isDev)
150
- console.warn('Worker pool creation failed, using sequential fallback:', err.message);
151
- }
152
- }
153
- let filesParsedSoFar = 0;
154
- // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
155
- const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
156
- astCache = createASTCache(maxChunkFiles);
157
- // Build import resolution context once — suffix index, file lists, resolve cache.
158
- // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
159
- const importCtx = buildImportResolutionContext(allPaths);
160
- const allPathObjects = allPaths.map(p => ({ path: p }));
161
- // Single-pass: parse + resolve imports/calls/heritage per chunk.
162
- // Calls/heritage use the symbol table built so far (symbols from earlier chunks
163
- // are already registered). This trades ~5% cross-chunk resolution accuracy for
164
- // 200-400MB less memory — critical for Linux-kernel-scale repos.
165
- const sequentialChunkPaths = [];
491
+ }
492
+ // Build byte-budget chunks
493
+ const chunks = [];
494
+ let currentChunk = [];
495
+ let currentBytes = 0;
496
+ for (const file of parseableScanned) {
497
+ if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
498
+ chunks.push(currentChunk);
499
+ currentChunk = [];
500
+ currentBytes = 0;
501
+ }
502
+ currentChunk.push(file.path);
503
+ currentBytes += file.size;
504
+ }
505
+ if (currentChunk.length > 0)
506
+ chunks.push(currentChunk);
507
+ const numChunks = chunks.length;
508
+ if (isDev) {
509
+ const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
510
+ console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
511
+ }
512
+ onProgress({
513
+ phase: 'parsing',
514
+ percent: 20,
515
+ message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
516
+ stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
517
+ });
518
+ // Don't spawn workers for tiny repos overhead exceeds benefit
519
+ const MIN_FILES_FOR_WORKERS = 15;
520
+ const MIN_BYTES_FOR_WORKERS = 512 * 1024;
521
+ const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
522
+ // Create worker pool once, reuse across chunks
523
+ let workerPool;
524
+ if (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS) {
166
525
  try {
167
- for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
168
- const chunkPaths = chunks[chunkIdx];
169
- // Read content for this chunk only
170
- const chunkContents = await readFileContents(repoPath, chunkPaths);
171
- const chunkFiles = chunkPaths
172
- .filter(p => chunkContents.has(p))
173
- .map(p => ({ path: p, content: chunkContents.get(p) }));
174
- // Parse this chunk (workers or sequential fallback)
175
- const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
176
- const globalCurrent = filesParsedSoFar + current;
177
- const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
526
+ let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
527
+ // When running under vitest, import.meta.url points to src/ where no .js exists.
528
+ // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
529
+ const thisDir = fileURLToPath(new URL('.', import.meta.url));
530
+ if (!fs.existsSync(fileURLToPath(workerUrl))) {
531
+ const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
532
+ if (fs.existsSync(distWorker)) {
533
+ workerUrl = pathToFileURL(distWorker);
534
+ }
535
+ }
536
+ workerPool = createWorkerPool(workerUrl);
537
+ }
538
+ catch (err) {
539
+ if (isDev)
540
+ console.warn('Worker pool creation failed, using sequential fallback:', err.message);
541
+ }
542
+ }
543
+ let filesParsedSoFar = 0;
544
+ // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
545
+ const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
546
+ let astCache = createASTCache(maxChunkFiles);
547
+ // Build import resolution context once — suffix index, file lists, resolve cache.
548
+ // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
549
+ const importCtx = buildImportResolutionContext(allPaths);
550
+ const allPathObjects = allPaths.map(p => ({ path: p }));
551
+ // Single-pass: parse + resolve imports/calls/heritage per chunk.
552
+ // Calls/heritage use the symbol table built so far (symbols from earlier chunks
553
+ // are already registered). This trades ~5% cross-chunk resolution accuracy for
554
+ // 200-400MB less memory — critical for Linux-kernel-scale repos.
555
+ const sequentialChunkPaths = [];
556
+ // Pre-compute which chunks need synthesis — O(1) lookup per chunk.
557
+ const chunkNeedsSynthesis = chunks.map(paths => paths.some(p => {
558
+ const lang = getLanguageFromFilename(p);
559
+ return lang != null && needsSynthesis(lang);
560
+ }));
561
+ // Phase 14: Collect exported type bindings for cross-file propagation
562
+ const exportedTypeMap = new Map();
563
+ // Accumulate file-scope TypeEnv bindings from workers (closes worker/sequential quality gap)
564
+ const workerTypeEnvBindings = [];
565
+ // Accumulate fetch() calls from workers for Next.js route matching
566
+ const allFetchCalls = [];
567
+ // Accumulate framework-extracted routes (Laravel, etc.) for Route node creation
568
+ const allExtractedRoutes = [];
569
+ // Accumulate decorator-based routes (@Get, @Post, @app.route, etc.)
570
+ const allDecoratorRoutes = [];
571
+ // Accumulate MCP/RPC tool definitions (@mcp.tool(), @app.tool(), etc.)
572
+ const allToolDefs = [];
573
+ const allORMQueries = [];
574
+ try {
575
+ for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
576
+ const chunkPaths = chunks[chunkIdx];
577
+ // Read content for this chunk only
578
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
579
+ const chunkFiles = chunkPaths
580
+ .filter(p => chunkContents.has(p))
581
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
582
+ // Parse this chunk (workers or sequential fallback)
583
+ const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
584
+ const globalCurrent = filesParsedSoFar + current;
585
+ const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
586
+ onProgress({
587
+ phase: 'parsing',
588
+ percent: Math.round(parsingProgress),
589
+ message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
590
+ detail: filePath,
591
+ stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
592
+ });
593
+ }, workerPool);
594
+ const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
595
+ if (chunkWorkerData) {
596
+ // Imports
597
+ await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
178
598
  onProgress({
179
599
  phase: 'parsing',
180
- percent: Math.round(parsingProgress),
181
- message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
182
- detail: filePath,
183
- stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
600
+ percent: Math.round(chunkBasePercent),
601
+ message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
602
+ detail: `${current}/${total} files`,
603
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
184
604
  });
185
- }, workerPool);
186
- const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
187
- if (chunkWorkerData) {
188
- // Imports
189
- await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
605
+ }, repoPath, importCtx);
606
+ // ── Wildcard-import synthesis (Ruby / C/C++ / Swift / Go) + Python module aliases ─
607
+ // Synthesize namedImportMap entries for wildcard-import languages and build
608
+ // moduleAliasMap for Python namespace imports. Must run after imports are resolved
609
+ // (importMap is populated) but BEFORE call resolution.
610
+ if (chunkNeedsSynthesis[chunkIdx])
611
+ synthesizeWildcardImportBindings(graph, ctx);
612
+ // Phase 14 E1: Seed cross-file receiver types from ExportedTypeMap
613
+ // before call resolution — eliminates re-parse for single-hop imported receivers.
614
+ // NOTE: In the worker path, exportedTypeMap is empty during chunk processing
615
+ // (populated later in runCrossFileBindingPropagation). This block is latent —
616
+ // it activates only if incremental export collection is added per-chunk.
617
+ if (exportedTypeMap.size > 0 && ctx.namedImportMap.size > 0) {
618
+ const { enrichedCount } = seedCrossFileReceiverTypes(chunkWorkerData.calls, ctx.namedImportMap, exportedTypeMap);
619
+ if (isDev && enrichedCount > 0) {
620
+ console.log(`🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`);
621
+ }
622
+ }
623
+ // Calls + Heritage + Routes — resolve in parallel (no shared mutable state between them)
624
+ // This is safe because each writes disjoint relationship types into idempotent id-keyed Maps,
625
+ // and the single-threaded event loop prevents races between synchronous addRelationship calls.
626
+ await Promise.all([
627
+ processCallsFromExtracted(graph, chunkWorkerData.calls, ctx, (current, total) => {
190
628
  onProgress({
191
629
  phase: 'parsing',
192
630
  percent: Math.round(chunkBasePercent),
193
- message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
631
+ message: `Resolving calls (chunk ${chunkIdx + 1}/${numChunks})...`,
194
632
  detail: `${current}/${total} files`,
195
633
  stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
196
634
  });
197
- }, repoPath, importCtx);
198
- // Calls + Heritage + Routes — resolve in parallel (no shared mutable state between them)
199
- // This is safe because each writes disjoint relationship types into idempotent id-keyed Maps,
200
- // and the single-threaded event loop prevents races between synchronous addRelationship calls.
201
- await Promise.all([
202
- processCallsFromExtracted(graph, chunkWorkerData.calls, ctx, (current, total) => {
203
- onProgress({
204
- phase: 'parsing',
205
- percent: Math.round(chunkBasePercent),
206
- message: `Resolving calls (chunk ${chunkIdx + 1}/${numChunks})...`,
207
- detail: `${current}/${total} files`,
208
- stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
209
- });
210
- }, chunkWorkerData.constructorBindings),
211
- processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
212
- onProgress({
213
- phase: 'parsing',
214
- percent: Math.round(chunkBasePercent),
215
- message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
216
- detail: `${current}/${total} records`,
217
- stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
218
- });
219
- }),
220
- processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
221
- onProgress({
222
- phase: 'parsing',
223
- percent: Math.round(chunkBasePercent),
224
- message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
225
- detail: `${current}/${total} routes`,
226
- stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
227
- });
228
- }),
229
- ]);
230
- // Process field write assignments (synchronous, runs after calls resolve)
231
- if (chunkWorkerData.assignments?.length) {
232
- processAssignmentsFromExtracted(graph, chunkWorkerData.assignments, ctx, chunkWorkerData.constructorBindings);
233
- }
635
+ }, chunkWorkerData.constructorBindings),
636
+ processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
637
+ onProgress({
638
+ phase: 'parsing',
639
+ percent: Math.round(chunkBasePercent),
640
+ message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
641
+ detail: `${current}/${total} records`,
642
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
643
+ });
644
+ }),
645
+ processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
646
+ onProgress({
647
+ phase: 'parsing',
648
+ percent: Math.round(chunkBasePercent),
649
+ message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
650
+ detail: `${current}/${total} routes`,
651
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
652
+ });
653
+ }),
654
+ ]);
655
+ // Process field write assignments (synchronous, runs after calls resolve)
656
+ if (chunkWorkerData.assignments?.length) {
657
+ processAssignmentsFromExtracted(graph, chunkWorkerData.assignments, ctx, chunkWorkerData.constructorBindings);
234
658
  }
235
- else {
236
- await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
237
- sequentialChunkPaths.push(chunkPaths);
659
+ // Collect TypeEnv file-scope bindings for exported type enrichment
660
+ if (chunkWorkerData.typeEnvBindings?.length) {
661
+ workerTypeEnvBindings.push(...chunkWorkerData.typeEnvBindings);
238
662
  }
239
- filesParsedSoFar += chunkFiles.length;
240
- // Clear AST cache between chunks to free memory
241
- astCache.clear();
242
- // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
663
+ // Collect fetch() calls for Next.js route matching
664
+ if (chunkWorkerData.fetchCalls?.length) {
665
+ allFetchCalls.push(...chunkWorkerData.fetchCalls);
666
+ }
667
+ if (chunkWorkerData.routes?.length) {
668
+ allExtractedRoutes.push(...chunkWorkerData.routes);
669
+ }
670
+ if (chunkWorkerData.decoratorRoutes?.length) {
671
+ allDecoratorRoutes.push(...chunkWorkerData.decoratorRoutes);
672
+ }
673
+ if (chunkWorkerData.toolDefs?.length) {
674
+ allToolDefs.push(...chunkWorkerData.toolDefs);
675
+ }
676
+ if (chunkWorkerData.ormQueries?.length) {
677
+ allORMQueries.push(...chunkWorkerData.ormQueries);
678
+ }
679
+ }
680
+ else {
681
+ await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
682
+ sequentialChunkPaths.push(chunkPaths);
243
683
  }
684
+ filesParsedSoFar += chunkFiles.length;
685
+ // Clear AST cache between chunks to free memory
686
+ astCache.clear();
687
+ // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
244
688
  }
245
- finally {
246
- await workerPool?.terminate();
689
+ }
690
+ finally {
691
+ await workerPool?.terminate();
692
+ }
693
+ // Sequential fallback chunks: re-read source for call/heritage resolution
694
+ // Synthesize wildcard import bindings once after ALL imports are processed,
695
+ // before any call resolution — same rationale as the worker-path inline synthesis.
696
+ if (sequentialChunkPaths.length > 0)
697
+ synthesizeWildcardImportBindings(graph, ctx);
698
+ for (const chunkPaths of sequentialChunkPaths) {
699
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
700
+ const chunkFiles = chunkPaths
701
+ .filter(p => chunkContents.has(p))
702
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
703
+ astCache = createASTCache(chunkFiles.length);
704
+ const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, exportedTypeMap);
705
+ await processHeritage(graph, chunkFiles, astCache, ctx);
706
+ if (rubyHeritage.length > 0) {
707
+ await processHeritageFromExtracted(graph, rubyHeritage, ctx);
247
708
  }
248
- // Sequential fallback chunks: re-read source for call/heritage resolution
249
- for (const chunkPaths of sequentialChunkPaths) {
250
- const chunkContents = await readFileContents(repoPath, chunkPaths);
251
- const chunkFiles = chunkPaths
252
- .filter(p => chunkContents.has(p))
253
- .map(p => ({ path: p, content: chunkContents.get(p) }));
254
- astCache = createASTCache(chunkFiles.length);
255
- const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx);
256
- await processHeritage(graph, chunkFiles, astCache, ctx);
257
- if (rubyHeritage.length > 0) {
258
- await processHeritageFromExtracted(graph, rubyHeritage, ctx);
709
+ // Extract fetch() calls for Next.js route matching (sequential path)
710
+ const chunkFetchCalls = await extractFetchCallsFromFiles(chunkFiles, astCache);
711
+ if (chunkFetchCalls.length > 0) {
712
+ allFetchCalls.push(...chunkFetchCalls);
713
+ }
714
+ // Extract ORM queries (sequential path)
715
+ for (const f of chunkFiles) {
716
+ extractORMQueriesInline(f.path, f.content, allORMQueries);
717
+ }
718
+ astCache.clear();
719
+ }
720
+ // Log resolution cache stats
721
+ if (isDev) {
722
+ const rcStats = ctx.getStats();
723
+ const total = rcStats.cacheHits + rcStats.cacheMisses;
724
+ const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
725
+ console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
726
+ }
727
+ // ── Worker path quality enrichment: merge TypeEnv file-scope bindings into ExportedTypeMap ──
728
+ // Workers return file-scope bindings from their TypeEnv fixpoint (includes inferred types
729
+ // like `const config = getConfig()` → Config). Filter by graph isExported to match
730
+ // the sequential path's collectExportedBindings behavior.
731
+ if (workerTypeEnvBindings.length > 0) {
732
+ let enriched = 0;
733
+ for (const { filePath, bindings } of workerTypeEnvBindings) {
734
+ for (const [name, type] of bindings) {
735
+ // Verify the symbol is exported via graph node
736
+ const nodeId = `Function:${filePath}:${name}`;
737
+ const varNodeId = `Variable:${filePath}:${name}`;
738
+ const constNodeId = `Const:${filePath}:${name}`;
739
+ const node = graph.getNode(nodeId) ?? graph.getNode(varNodeId) ?? graph.getNode(constNodeId);
740
+ if (!node?.properties?.isExported)
741
+ continue;
742
+ let fileExports = exportedTypeMap.get(filePath);
743
+ if (!fileExports) {
744
+ fileExports = new Map();
745
+ exportedTypeMap.set(filePath, fileExports);
746
+ }
747
+ // Don't overwrite existing entries (Tier 0 from SymbolTable is authoritative)
748
+ if (!fileExports.has(name)) {
749
+ fileExports.set(name, type);
750
+ enriched++;
751
+ }
259
752
  }
260
- astCache.clear();
261
753
  }
262
- // Log resolution cache stats
263
- if (isDev) {
264
- const rcStats = ctx.getStats();
265
- const total = rcStats.cacheHits + rcStats.cacheMisses;
266
- const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
267
- console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
268
- }
269
- // Free import resolution context suffix index + resolve cache no longer needed
270
- // (allPathObjects and importCtx hold ~94MB+ for large repos)
271
- allPathObjects.length = 0;
272
- importCtx.resolveCache.clear();
273
- importCtx.suffixIndex = null;
274
- importCtx.normalizedFileList = null;
275
- let communityResult;
276
- let processResult;
277
- if (!options?.skipGraphPhases) {
278
- // ── Phase 4.5: Method Resolution Order ──────────────────────────────
279
- onProgress({
280
- phase: 'parsing',
281
- percent: 81,
282
- message: 'Computing method resolution order...',
283
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
284
- });
285
- const mroResult = computeMRO(graph);
286
- if (isDev && mroResult.entries.length > 0) {
287
- console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
288
- }
289
- // ── Phase 5: Communities ───────────────────────────────────────────
290
- onProgress({
291
- phase: 'communities',
292
- percent: 82,
293
- message: 'Detecting code communities...',
294
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
295
- });
296
- communityResult = await processCommunities(graph, (message, progress) => {
297
- const communityProgress = 82 + (progress * 0.10);
298
- onProgress({
299
- phase: 'communities',
300
- percent: Math.round(communityProgress),
301
- message,
302
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
303
- });
304
- });
305
- if (isDev) {
306
- console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
754
+ if (isDev && enriched > 0) {
755
+ console.log(`🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`);
756
+ }
757
+ }
758
+ // ── Final synthesis pass for whole-module-import languages ──
759
+ // Per-chunk synthesis (above) already ran incrementally. This final pass ensures
760
+ // any remaining files whose imports were not covered inline are also synthesized,
761
+ // and that Phase 14 type propagation has complete namedImportMap data.
762
+ const synthesized = synthesizeWildcardImportBindings(graph, ctx);
763
+ if (isDev && synthesized > 0) {
764
+ console.log(`🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`);
765
+ }
766
+ // Free import resolution context — suffix index + resolve cache no longer needed
767
+ // (allPathObjects and importCtx hold ~94MB+ for large repos)
768
+ allPathObjects.length = 0;
769
+ importCtx.resolveCache.clear();
770
+ importCtx.index = EMPTY_INDEX; // Release suffix index memory (~30MB for large repos)
771
+ importCtx.normalizedFileList = [];
772
+ return { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries };
773
+ }
774
+ /**
775
+ * Post-parse graph analysis: MRO, community detection, process extraction.
776
+ *
777
+ * @reads graph (all nodes and relationships from parse + resolve phases)
778
+ * @writes graph (Community nodes, Process nodes, MEMBER_OF edges, STEP_IN_PROCESS edges, OVERRIDES edges)
779
+ */
780
+ async function runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs) {
781
+ // ── Phase 4.5: Method Resolution Order ──────────────────────────────
782
+ onProgress({
783
+ phase: 'parsing',
784
+ percent: 81,
785
+ message: 'Computing method resolution order...',
786
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
787
+ });
788
+ const mroResult = computeMRO(graph);
789
+ if (isDev && mroResult.entries.length > 0) {
790
+ console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
791
+ }
792
+ // ── Phase 5: Communities ───────────────────────────────────────────
793
+ onProgress({
794
+ phase: 'communities',
795
+ percent: 82,
796
+ message: 'Detecting code communities...',
797
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
798
+ });
799
+ const communityResult = await processCommunities(graph, (message, progress) => {
800
+ const communityProgress = 82 + (progress * 0.10);
801
+ onProgress({
802
+ phase: 'communities',
803
+ percent: Math.round(communityProgress),
804
+ message,
805
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
806
+ });
807
+ });
808
+ if (isDev) {
809
+ console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
810
+ }
811
+ communityResult.communities.forEach(comm => {
812
+ graph.addNode({
813
+ id: comm.id,
814
+ label: 'Community',
815
+ properties: {
816
+ name: comm.label,
817
+ filePath: '',
818
+ heuristicLabel: comm.heuristicLabel,
819
+ cohesion: comm.cohesion,
820
+ symbolCount: comm.symbolCount,
821
+ }
822
+ });
823
+ });
824
+ communityResult.memberships.forEach(membership => {
825
+ graph.addRelationship({
826
+ id: `${membership.nodeId}_member_of_${membership.communityId}`,
827
+ type: 'MEMBER_OF',
828
+ sourceId: membership.nodeId,
829
+ targetId: membership.communityId,
830
+ confidence: 1.0,
831
+ reason: 'leiden-algorithm',
832
+ });
833
+ });
834
+ // ── Phase 6: Processes ─────────────────────────────────────────────
835
+ onProgress({
836
+ phase: 'processes',
837
+ percent: 94,
838
+ message: 'Detecting execution flows...',
839
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
840
+ });
841
+ let symbolCount = 0;
842
+ graph.forEachNode(n => { if (n.label !== 'File')
843
+ symbolCount++; });
844
+ const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
845
+ const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
846
+ const processProgress = 94 + (progress * 0.05);
847
+ onProgress({
848
+ phase: 'processes',
849
+ percent: Math.round(processProgress),
850
+ message,
851
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
852
+ });
853
+ }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
854
+ if (isDev) {
855
+ console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
856
+ }
857
+ processResult.processes.forEach(proc => {
858
+ graph.addNode({
859
+ id: proc.id,
860
+ label: 'Process',
861
+ properties: {
862
+ name: proc.label,
863
+ filePath: '',
864
+ heuristicLabel: proc.heuristicLabel,
865
+ processType: proc.processType,
866
+ stepCount: proc.stepCount,
867
+ communities: proc.communities,
868
+ entryPointId: proc.entryPointId,
869
+ terminalId: proc.terminalId,
870
+ }
871
+ });
872
+ });
873
+ processResult.steps.forEach(step => {
874
+ graph.addRelationship({
875
+ id: `${step.nodeId}_step_${step.step}_${step.processId}`,
876
+ type: 'STEP_IN_PROCESS',
877
+ sourceId: step.nodeId,
878
+ targetId: step.processId,
879
+ confidence: 1.0,
880
+ reason: 'trace-detection',
881
+ step: step.step,
882
+ });
883
+ });
884
+ // Link Route and Tool nodes to Processes via reverse index (file → node id)
885
+ if ((routeRegistry?.size ?? 0) > 0 || (toolDefs?.length ?? 0) > 0) {
886
+ // Reverse indexes: file → all route URLs / tool names (handles multi-route files)
887
+ const routesByFile = new Map();
888
+ if (routeRegistry) {
889
+ for (const [url, entry] of routeRegistry) {
890
+ let list = routesByFile.get(entry.filePath);
891
+ if (!list) {
892
+ list = [];
893
+ routesByFile.set(entry.filePath, list);
894
+ }
895
+ list.push(url);
896
+ }
897
+ }
898
+ const toolsByFile = new Map();
899
+ if (toolDefs) {
900
+ for (const td of toolDefs) {
901
+ let list = toolsByFile.get(td.filePath);
902
+ if (!list) {
903
+ list = [];
904
+ toolsByFile.set(td.filePath, list);
905
+ }
906
+ list.push(td.name);
907
+ }
908
+ }
909
+ let linked = 0;
910
+ for (const proc of processResult.processes) {
911
+ if (!proc.entryPointId)
912
+ continue;
913
+ const entryNode = graph.getNode(proc.entryPointId);
914
+ if (!entryNode)
915
+ continue;
916
+ const entryFile = entryNode.properties.filePath;
917
+ if (!entryFile)
918
+ continue;
919
+ const routeURLs = routesByFile.get(entryFile);
920
+ if (routeURLs) {
921
+ for (const routeURL of routeURLs) {
922
+ const routeNodeId = generateId('Route', routeURL);
923
+ graph.addRelationship({
924
+ id: generateId('ENTRY_POINT_OF', `${routeNodeId}->${proc.id}`),
925
+ sourceId: routeNodeId,
926
+ targetId: proc.id,
927
+ type: 'ENTRY_POINT_OF',
928
+ confidence: 0.85,
929
+ reason: 'route-handler-entry-point',
930
+ });
931
+ linked++;
932
+ }
933
+ }
934
+ const toolNames = toolsByFile.get(entryFile);
935
+ if (toolNames) {
936
+ for (const toolName of toolNames) {
937
+ const toolNodeId = generateId('Tool', toolName);
938
+ graph.addRelationship({
939
+ id: generateId('ENTRY_POINT_OF', `${toolNodeId}->${proc.id}`),
940
+ sourceId: toolNodeId,
941
+ targetId: proc.id,
942
+ type: 'ENTRY_POINT_OF',
943
+ confidence: 0.85,
944
+ reason: 'tool-handler-entry-point',
945
+ });
946
+ linked++;
947
+ }
948
+ }
949
+ }
950
+ if (isDev && linked > 0) {
951
+ console.log(`🔗 Linked ${linked} Route/Tool nodes to execution flows`);
952
+ }
953
+ }
954
+ return { communityResult, processResult };
955
+ }
956
+ // ── Pipeline orchestrator ─────────────────────────────────────────────────
957
+ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
958
+ const graph = createKnowledgeGraph();
959
+ const ctx = createResolutionContext();
960
+ const pipelineStart = Date.now();
961
+ try {
962
+ // Phase 1+2: Scan paths, build structure, process markdown
963
+ const { scannedFiles, allPaths, totalFiles } = await runScanAndStructure(repoPath, graph, onProgress);
964
+ // Phase 3+4: Chunked parse + resolve (imports, calls, heritage, routes)
965
+ const { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries } = await runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
966
+ const routeRegistry = new Map();
967
+ // Detect Expo Router app/ roots vs Next.js app/ roots (monorepo-safe).
968
+ const expoAppRoots = new Set();
969
+ const nextjsAppRoots = new Set();
970
+ const expoAppPaths = new Set();
971
+ for (const p of allPaths) {
972
+ const norm = p.replace(/\\/g, '/');
973
+ const appIdx = norm.lastIndexOf('app/');
974
+ if (appIdx < 0)
975
+ continue;
976
+ const root = norm.slice(0, appIdx + 4);
977
+ if (/\/_layout\.(tsx?|jsx?)$/.test(norm))
978
+ expoAppRoots.add(root);
979
+ if (/\/page\.(tsx?|jsx?)$/.test(norm))
980
+ nextjsAppRoots.add(root);
981
+ }
982
+ for (const root of nextjsAppRoots)
983
+ expoAppRoots.delete(root);
984
+ if (expoAppRoots.size > 0) {
985
+ for (const p of allPaths) {
986
+ const norm = p.replace(/\\/g, '/');
987
+ const appIdx = norm.lastIndexOf('app/');
988
+ if (appIdx >= 0 && expoAppRoots.has(norm.slice(0, appIdx + 4)))
989
+ expoAppPaths.add(p);
990
+ }
991
+ }
992
+ for (const p of allPaths) {
993
+ if (expoAppPaths.has(p)) {
994
+ const expoURL = expoFileToRouteURL(p);
995
+ if (expoURL && !routeRegistry.has(expoURL)) {
996
+ routeRegistry.set(expoURL, { filePath: p, source: 'expo-filesystem-route' });
997
+ continue;
998
+ }
999
+ }
1000
+ const nextjsURL = nextjsFileToRouteURL(p);
1001
+ if (nextjsURL && !routeRegistry.has(nextjsURL)) {
1002
+ routeRegistry.set(nextjsURL, { filePath: p, source: 'nextjs-filesystem-route' });
1003
+ continue;
1004
+ }
1005
+ if (p.endsWith('.php')) {
1006
+ const phpURL = phpFileToRouteURL(p);
1007
+ if (phpURL && !routeRegistry.has(phpURL)) {
1008
+ routeRegistry.set(phpURL, { filePath: p, source: 'php-file-route' });
1009
+ }
307
1010
  }
308
- communityResult.communities.forEach(comm => {
1011
+ }
1012
+ const ensureSlash = (path) => path.startsWith('/') ? path : '/' + path;
1013
+ let duplicateRoutes = 0;
1014
+ const addRoute = (url, entry) => {
1015
+ if (routeRegistry.has(url)) {
1016
+ duplicateRoutes++;
1017
+ return;
1018
+ }
1019
+ routeRegistry.set(url, entry);
1020
+ };
1021
+ for (const route of allExtractedRoutes) {
1022
+ if (!route.routePath)
1023
+ continue;
1024
+ addRoute(ensureSlash(route.routePath), { filePath: route.filePath, source: 'framework-route' });
1025
+ }
1026
+ for (const dr of allDecoratorRoutes) {
1027
+ addRoute(ensureSlash(dr.routePath), { filePath: dr.filePath, source: `decorator-${dr.decoratorName}` });
1028
+ }
1029
+ let handlerContents;
1030
+ if (routeRegistry.size > 0) {
1031
+ const handlerPaths = [...routeRegistry.values()].map(e => e.filePath);
1032
+ handlerContents = await readFileContents(repoPath, handlerPaths);
1033
+ for (const [routeURL, entry] of routeRegistry) {
1034
+ const { filePath: handlerPath, source: routeSource } = entry;
1035
+ const content = handlerContents.get(handlerPath);
1036
+ const { responseKeys, errorKeys } = content
1037
+ ? (handlerPath.endsWith(".php") ? extractPHPResponseShapes(content) : extractResponseShapes(content))
1038
+ : { responseKeys: undefined, errorKeys: undefined };
1039
+ const mwResult = content ? extractMiddlewareChain(content) : undefined;
1040
+ const middleware = mwResult?.chain;
1041
+ const routeNodeId = generateId('Route', routeURL);
309
1042
  graph.addNode({
310
- id: comm.id,
311
- label: 'Community',
1043
+ id: routeNodeId,
1044
+ label: 'Route',
312
1045
  properties: {
313
- name: comm.label,
314
- filePath: '',
315
- heuristicLabel: comm.heuristicLabel,
316
- cohesion: comm.cohesion,
317
- symbolCount: comm.symbolCount,
318
- }
1046
+ name: routeURL,
1047
+ filePath: handlerPath,
1048
+ ...(responseKeys ? { responseKeys } : {}),
1049
+ ...(errorKeys ? { errorKeys } : {}),
1050
+ ...(middleware && middleware.length > 0 ? { middleware } : {}),
1051
+ },
319
1052
  });
320
- });
321
- communityResult.memberships.forEach(membership => {
1053
+ const handlerFileId = generateId('File', handlerPath);
322
1054
  graph.addRelationship({
323
- id: `${membership.nodeId}_member_of_${membership.communityId}`,
324
- type: 'MEMBER_OF',
325
- sourceId: membership.nodeId,
326
- targetId: membership.communityId,
1055
+ id: generateId('HANDLES_ROUTE', `${handlerFileId}->${routeNodeId}`),
1056
+ sourceId: handlerFileId,
1057
+ targetId: routeNodeId,
1058
+ type: 'HANDLES_ROUTE',
327
1059
  confidence: 1.0,
328
- reason: 'leiden-algorithm',
329
- });
330
- });
331
- // ── Phase 6: Processes ─────────────────────────────────────────────
332
- onProgress({
333
- phase: 'processes',
334
- percent: 94,
335
- message: 'Detecting execution flows...',
336
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
337
- });
338
- let symbolCount = 0;
339
- graph.forEachNode(n => { if (n.label !== 'File')
340
- symbolCount++; });
341
- const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
342
- processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
343
- const processProgress = 94 + (progress * 0.05);
344
- onProgress({
345
- phase: 'processes',
346
- percent: Math.round(processProgress),
347
- message,
348
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
1060
+ reason: routeSource,
349
1061
  });
350
- }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
1062
+ }
351
1063
  if (isDev) {
352
- console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
1064
+ console.log(`🗺️ Route registry: ${routeRegistry.size} routes${duplicateRoutes > 0 ? ` (${duplicateRoutes} duplicate URLs skipped)` : ''}`);
353
1065
  }
354
- processResult.processes.forEach(proc => {
355
- graph.addNode({
356
- id: proc.id,
357
- label: 'Process',
358
- properties: {
359
- name: proc.label,
360
- filePath: '',
361
- heuristicLabel: proc.heuristicLabel,
362
- processType: proc.processType,
363
- stepCount: proc.stepCount,
364
- communities: proc.communities,
365
- entryPointId: proc.entryPointId,
366
- terminalId: proc.terminalId,
1066
+ }
1067
+ // ── Phase 3.5b: Link Next.js project-level middleware.ts to routes ──
1068
+ if (routeRegistry.size > 0) {
1069
+ const middlewareCandidates = allPaths.filter(p => p === 'middleware.ts' || p === 'middleware.js' || p === 'middleware.tsx' || p === 'middleware.jsx' ||
1070
+ p === 'src/middleware.ts' || p === 'src/middleware.js' || p === 'src/middleware.tsx' || p === 'src/middleware.jsx');
1071
+ if (middlewareCandidates.length > 0) {
1072
+ const mwContents = await readFileContents(repoPath, middlewareCandidates);
1073
+ for (const [mwPath, mwContent] of mwContents) {
1074
+ const config = extractNextjsMiddlewareConfig(mwContent);
1075
+ if (!config)
1076
+ continue;
1077
+ const mwLabel = config.wrappedFunctions.length > 0
1078
+ ? config.wrappedFunctions
1079
+ : [config.exportedName];
1080
+ // Pre-compile matchers once per middleware file
1081
+ const compiled = config.matchers.map(compileMatcher).filter((m) => m !== null);
1082
+ let linkedCount = 0;
1083
+ for (const [routeURL] of routeRegistry) {
1084
+ const matches = compiled.length === 0 ||
1085
+ compiled.some(cm => compiledMatcherMatchesRoute(cm, routeURL));
1086
+ if (!matches)
1087
+ continue;
1088
+ const routeNodeId = generateId('Route', routeURL);
1089
+ const existing = graph.getNode(routeNodeId);
1090
+ if (!existing)
1091
+ continue;
1092
+ const currentMw = existing.properties.middleware ?? [];
1093
+ // Prepend project-level middleware (runs before handler-level wrappers)
1094
+ existing.properties.middleware = [...mwLabel, ...currentMw.filter(m => !mwLabel.includes(m))];
1095
+ linkedCount++;
1096
+ }
1097
+ if (isDev && linkedCount > 0) {
1098
+ console.log(`🛡️ Linked ${mwPath} middleware [${mwLabel.join(', ')}] to ${linkedCount} routes`);
367
1099
  }
1100
+ }
1101
+ }
1102
+ }
1103
+ // Scan HTML/PHP/template files for <form action="/path"> and AJAX url patterns
1104
+ // Scan HTML/template files for <form action="/path"> and AJAX url patterns
1105
+ // Skip .php — already parsed by tree-sitter with http_client/fetch queries
1106
+ const htmlCandidates = allPaths.filter(p => p.endsWith('.html') || p.endsWith('.htm') ||
1107
+ p.endsWith('.ejs') || p.endsWith('.hbs') || p.endsWith('.blade.php'));
1108
+ if (htmlCandidates.length > 0 && routeRegistry.size > 0) {
1109
+ const htmlContents = await readFileContents(repoPath, htmlCandidates);
1110
+ const htmlPatterns = [/action=["']([^"']+)["']/g, /url:\s*["']([^"']+)["']/g];
1111
+ for (const [filePath, content] of htmlContents) {
1112
+ for (const pattern of htmlPatterns) {
1113
+ pattern.lastIndex = 0;
1114
+ let match;
1115
+ while ((match = pattern.exec(content)) !== null) {
1116
+ const normalized = normalizeFetchURL(match[1]);
1117
+ if (normalized) {
1118
+ allFetchCalls.push({ filePath, fetchURL: normalized, lineNumber: 0 });
1119
+ }
1120
+ }
1121
+ }
1122
+ }
1123
+ }
1124
+ // ── Phase 3.5c: Extract Expo Router navigation patterns ──
1125
+ if (expoAppPaths.size > 0 && routeRegistry.size > 0) {
1126
+ const unreadExpoPaths = [...expoAppPaths].filter(p => !handlerContents?.has(p));
1127
+ const extraContents = unreadExpoPaths.length > 0 ? await readFileContents(repoPath, unreadExpoPaths) : new Map();
1128
+ const allExpoContents = new Map([...(handlerContents ?? new Map()), ...extraContents]);
1129
+ for (const [filePath, content] of allExpoContents) {
1130
+ if (!expoAppPaths.has(filePath))
1131
+ continue;
1132
+ for (const pattern of EXPO_NAV_PATTERNS) {
1133
+ pattern.lastIndex = 0;
1134
+ let match;
1135
+ while ((match = pattern.exec(content)) !== null) {
1136
+ const url = match[2] ?? match[1];
1137
+ if (url && url.startsWith('/')) {
1138
+ allFetchCalls.push({ filePath, fetchURL: url, lineNumber: 0 });
1139
+ }
1140
+ }
1141
+ }
1142
+ }
1143
+ }
1144
+ if (routeRegistry.size > 0 && allFetchCalls.length > 0) {
1145
+ const routeURLToFile = new Map();
1146
+ for (const [url, entry] of routeRegistry)
1147
+ routeURLToFile.set(url, entry.filePath);
1148
+ // Read consumer file contents so we can extract property access patterns
1149
+ const consumerPaths = [...new Set(allFetchCalls.map(c => c.filePath))];
1150
+ const consumerContents = await readFileContents(repoPath, consumerPaths);
1151
+ processNextjsFetchRoutes(graph, allFetchCalls, routeURLToFile, consumerContents);
1152
+ if (isDev) {
1153
+ console.log(`🔗 Processed ${allFetchCalls.length} fetch() calls against ${routeRegistry.size} routes`);
1154
+ }
1155
+ }
1156
+ // ── Phase 3.6: Tool Detection (MCP/RPC) ──────────────────────────
1157
+ const toolDefs = [];
1158
+ const seenToolNames = new Set();
1159
+ for (const td of allToolDefs) {
1160
+ if (seenToolNames.has(td.toolName))
1161
+ continue;
1162
+ seenToolNames.add(td.toolName);
1163
+ toolDefs.push({ name: td.toolName, filePath: td.filePath, description: td.description });
1164
+ }
1165
+ // TS tool definition arrays — require inputSchema nearby to distinguish from config objects
1166
+ const toolCandidatePaths = allPaths.filter(p => (p.endsWith('.ts') || p.endsWith('.js')) && p.toLowerCase().includes('tool')
1167
+ && !p.includes('node_modules') && !p.includes('test') && !p.includes('__'));
1168
+ if (toolCandidatePaths.length > 0) {
1169
+ const toolContents = await readFileContents(repoPath, toolCandidatePaths);
1170
+ for (const [filePath, content] of toolContents) {
1171
+ // Only scan files that contain 'inputSchema' — this is the MCP tool signature
1172
+ if (!content.includes('inputSchema'))
1173
+ continue;
1174
+ const toolPattern = /name:\s*['"](\w+)['"]\s*,\s*\n?\s*description:\s*[`'"]([\s\S]*?)[`'"]/g;
1175
+ let match;
1176
+ while ((match = toolPattern.exec(content)) !== null) {
1177
+ const name = match[1];
1178
+ if (seenToolNames.has(name))
1179
+ continue;
1180
+ seenToolNames.add(name);
1181
+ toolDefs.push({ name, filePath, description: match[2].slice(0, 200).replace(/\n/g, ' ').trim() });
1182
+ }
1183
+ }
1184
+ }
1185
+ // Create Tool nodes and HANDLES_TOOL edges
1186
+ if (toolDefs.length > 0) {
1187
+ for (const td of toolDefs) {
1188
+ const toolNodeId = generateId('Tool', td.name);
1189
+ graph.addNode({
1190
+ id: toolNodeId,
1191
+ label: 'Tool',
1192
+ properties: { name: td.name, filePath: td.filePath, description: td.description },
368
1193
  });
369
- });
370
- processResult.steps.forEach(step => {
1194
+ const handlerFileId = generateId('File', td.filePath);
371
1195
  graph.addRelationship({
372
- id: `${step.nodeId}_step_${step.step}_${step.processId}`,
373
- type: 'STEP_IN_PROCESS',
374
- sourceId: step.nodeId,
375
- targetId: step.processId,
1196
+ id: generateId('HANDLES_TOOL', `${handlerFileId}->${toolNodeId}`),
1197
+ sourceId: handlerFileId,
1198
+ targetId: toolNodeId,
1199
+ type: 'HANDLES_TOOL',
376
1200
  confidence: 1.0,
377
- reason: 'trace-detection',
378
- step: step.step,
1201
+ reason: 'tool-definition',
379
1202
  });
380
- });
1203
+ }
1204
+ if (isDev) {
1205
+ console.log(`🔧 Tool registry: ${toolDefs.length} tools detected`);
1206
+ }
1207
+ }
1208
+ // ── Phase 3.7: ORM Dataflow Detection (Prisma + Supabase) ──────────
1209
+ if (allORMQueries.length > 0) {
1210
+ processORMQueries(graph, allORMQueries, isDev);
1211
+ }
1212
+ // ── Phase 14: Cross-file binding propagation (topological level sort) ──
1213
+ await runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
1214
+ // Post-parse graph analysis (MRO, communities, processes)
1215
+ let communityResult;
1216
+ let processResult;
1217
+ if (!options?.skipGraphPhases) {
1218
+ const graphResults = await runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs);
1219
+ communityResult = graphResults.communityResult;
1220
+ processResult = graphResults.processResult;
381
1221
  }
382
1222
  onProgress({
383
1223
  phase: 'complete',
@@ -391,11 +1231,89 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
391
1231
  nodesCreated: graph.nodeCount
392
1232
  },
393
1233
  });
394
- astCache.clear();
395
1234
  return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
396
1235
  }
397
1236
  catch (error) {
398
- cleanup();
1237
+ ctx.clear();
399
1238
  throw error;
400
1239
  }
401
1240
  };
1241
+ // Inline ORM regex extraction (avoids importing parse-worker which has worker-only code)
1242
+ const PRISMA_QUERY_RE = /\bprisma\.(\w+)\.(findMany|findFirst|findUnique|findUniqueOrThrow|findFirstOrThrow|create|createMany|update|updateMany|delete|deleteMany|upsert|count|aggregate|groupBy)\s*\(/g;
1243
+ const SUPABASE_QUERY_RE = /\bsupabase\.from\s*\(\s*['"](\w+)['"]\s*\)\s*\.(select|insert|update|delete|upsert)\s*\(/g;
1244
+ function extractORMQueriesInline(filePath, content, out) {
1245
+ const hasPrisma = content.includes('prisma.');
1246
+ const hasSupabase = content.includes('supabase.from');
1247
+ if (!hasPrisma && !hasSupabase)
1248
+ return;
1249
+ if (hasPrisma) {
1250
+ PRISMA_QUERY_RE.lastIndex = 0;
1251
+ let m;
1252
+ while ((m = PRISMA_QUERY_RE.exec(content)) !== null) {
1253
+ const model = m[1];
1254
+ if (model.startsWith('$'))
1255
+ continue;
1256
+ out.push({ filePath, orm: 'prisma', model, method: m[2], lineNumber: content.substring(0, m.index).split('\n').length - 1 });
1257
+ }
1258
+ }
1259
+ if (hasSupabase) {
1260
+ SUPABASE_QUERY_RE.lastIndex = 0;
1261
+ let m;
1262
+ while ((m = SUPABASE_QUERY_RE.exec(content)) !== null) {
1263
+ out.push({ filePath, orm: 'supabase', model: m[1], method: m[2], lineNumber: content.substring(0, m.index).split('\n').length - 1 });
1264
+ }
1265
+ }
1266
+ }
1267
+ // ============================================================================
1268
+ // ORM Query Processing — creates QUERIES edges from callers to model nodes
1269
+ // ============================================================================
1270
+ function processORMQueries(graph, queries, isDev) {
1271
+ const modelNodes = new Map();
1272
+ const seenEdges = new Set();
1273
+ let edgesCreated = 0;
1274
+ for (const q of queries) {
1275
+ const modelKey = `${q.orm}:${q.model}`;
1276
+ let modelNodeId = modelNodes.get(modelKey);
1277
+ if (!modelNodeId) {
1278
+ const candidateIds = [
1279
+ generateId('Class', `${q.model}`),
1280
+ generateId('Interface', `${q.model}`),
1281
+ generateId('CodeElement', `${q.model}`),
1282
+ ];
1283
+ const existing = candidateIds.find(id => graph.getNode(id));
1284
+ if (existing) {
1285
+ modelNodeId = existing;
1286
+ }
1287
+ else {
1288
+ modelNodeId = generateId('CodeElement', `${q.orm}:${q.model}`);
1289
+ graph.addNode({
1290
+ id: modelNodeId,
1291
+ label: 'CodeElement',
1292
+ properties: {
1293
+ name: q.model,
1294
+ filePath: '',
1295
+ description: `${q.orm} model/table: ${q.model}`,
1296
+ },
1297
+ });
1298
+ }
1299
+ modelNodes.set(modelKey, modelNodeId);
1300
+ }
1301
+ const fileId = generateId('File', q.filePath);
1302
+ const edgeKey = `${fileId}->${modelNodeId}:${q.method}`;
1303
+ if (seenEdges.has(edgeKey))
1304
+ continue;
1305
+ seenEdges.add(edgeKey);
1306
+ graph.addRelationship({
1307
+ id: generateId('QUERIES', edgeKey),
1308
+ sourceId: fileId,
1309
+ targetId: modelNodeId,
1310
+ type: 'QUERIES',
1311
+ confidence: 0.9,
1312
+ reason: `${q.orm}-${q.method}`,
1313
+ });
1314
+ edgesCreated++;
1315
+ }
1316
+ if (isDev) {
1317
+ console.log(`ORM dataflow: ${edgesCreated} QUERIES edges, ${modelNodes.size} models (${queries.length} total calls)`);
1318
+ }
1319
+ }