gitnexus 1.4.8 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +7 -0
  2. package/dist/cli/index-repo.d.ts +15 -0
  3. package/dist/cli/index-repo.js +115 -0
  4. package/dist/cli/index.js +11 -2
  5. package/dist/cli/setup.js +12 -9
  6. package/dist/cli/wiki.d.ts +4 -0
  7. package/dist/cli/wiki.js +174 -53
  8. package/dist/config/supported-languages.d.ts +7 -5
  9. package/dist/config/supported-languages.js +6 -4
  10. package/dist/core/graph/graph.js +9 -1
  11. package/dist/core/graph/types.d.ts +10 -2
  12. package/dist/core/ingestion/call-processor.d.ts +18 -1
  13. package/dist/core/ingestion/call-processor.js +297 -38
  14. package/dist/core/ingestion/call-routing.d.ts +3 -18
  15. package/dist/core/ingestion/call-routing.js +0 -19
  16. package/dist/core/ingestion/cobol/cobol-copy-expander.d.ts +57 -0
  17. package/dist/core/ingestion/cobol/cobol-copy-expander.js +385 -0
  18. package/dist/core/ingestion/cobol/cobol-preprocessor.d.ts +210 -0
  19. package/dist/core/ingestion/cobol/cobol-preprocessor.js +1509 -0
  20. package/dist/core/ingestion/cobol/jcl-parser.d.ts +68 -0
  21. package/dist/core/ingestion/cobol/jcl-parser.js +217 -0
  22. package/dist/core/ingestion/cobol/jcl-processor.d.ts +33 -0
  23. package/dist/core/ingestion/cobol/jcl-processor.js +229 -0
  24. package/dist/core/ingestion/cobol-processor.d.ts +54 -0
  25. package/dist/core/ingestion/cobol-processor.js +1186 -0
  26. package/dist/core/ingestion/entry-point-scoring.d.ts +17 -0
  27. package/dist/core/ingestion/entry-point-scoring.js +18 -4
  28. package/dist/core/ingestion/export-detection.d.ts +47 -8
  29. package/dist/core/ingestion/export-detection.js +29 -50
  30. package/dist/core/ingestion/field-extractor.d.ts +29 -0
  31. package/dist/core/ingestion/field-extractor.js +25 -0
  32. package/dist/core/ingestion/field-extractors/configs/c-cpp.d.ts +3 -0
  33. package/dist/core/ingestion/field-extractors/configs/c-cpp.js +108 -0
  34. package/dist/core/ingestion/field-extractors/configs/csharp.d.ts +8 -0
  35. package/dist/core/ingestion/field-extractors/configs/csharp.js +73 -0
  36. package/dist/core/ingestion/field-extractors/configs/dart.d.ts +8 -0
  37. package/dist/core/ingestion/field-extractors/configs/dart.js +76 -0
  38. package/dist/core/ingestion/field-extractors/configs/go.d.ts +11 -0
  39. package/dist/core/ingestion/field-extractors/configs/go.js +64 -0
  40. package/dist/core/ingestion/field-extractors/configs/helpers.d.ts +44 -0
  41. package/dist/core/ingestion/field-extractors/configs/helpers.js +134 -0
  42. package/dist/core/ingestion/field-extractors/configs/jvm.d.ts +3 -0
  43. package/dist/core/ingestion/field-extractors/configs/jvm.js +118 -0
  44. package/dist/core/ingestion/field-extractors/configs/php.d.ts +8 -0
  45. package/dist/core/ingestion/field-extractors/configs/php.js +67 -0
  46. package/dist/core/ingestion/field-extractors/configs/python.d.ts +12 -0
  47. package/dist/core/ingestion/field-extractors/configs/python.js +91 -0
  48. package/dist/core/ingestion/field-extractors/configs/ruby.d.ts +16 -0
  49. package/dist/core/ingestion/field-extractors/configs/ruby.js +75 -0
  50. package/dist/core/ingestion/field-extractors/configs/rust.d.ts +9 -0
  51. package/dist/core/ingestion/field-extractors/configs/rust.js +55 -0
  52. package/dist/core/ingestion/field-extractors/configs/swift.d.ts +8 -0
  53. package/dist/core/ingestion/field-extractors/configs/swift.js +63 -0
  54. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.d.ts +3 -0
  55. package/dist/core/ingestion/field-extractors/configs/typescript-javascript.js +60 -0
  56. package/dist/core/ingestion/field-extractors/generic.d.ts +46 -0
  57. package/dist/core/ingestion/field-extractors/generic.js +111 -0
  58. package/dist/core/ingestion/field-extractors/typescript.d.ts +77 -0
  59. package/dist/core/ingestion/field-extractors/typescript.js +291 -0
  60. package/dist/core/ingestion/field-types.d.ts +59 -0
  61. package/dist/core/ingestion/field-types.js +2 -0
  62. package/dist/core/ingestion/framework-detection.d.ts +87 -0
  63. package/dist/core/ingestion/framework-detection.js +65 -2
  64. package/dist/core/ingestion/heritage-processor.js +15 -17
  65. package/dist/core/ingestion/import-processor.d.ts +9 -10
  66. package/dist/core/ingestion/import-processor.js +59 -14
  67. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.d.ts +6 -9
  68. package/dist/core/ingestion/{resolvers → import-resolvers}/csharp.js +20 -2
  69. package/dist/core/ingestion/import-resolvers/dart.d.ts +7 -0
  70. package/dist/core/ingestion/import-resolvers/dart.js +44 -0
  71. package/dist/core/ingestion/{resolvers → import-resolvers}/go.d.ts +4 -5
  72. package/dist/core/ingestion/{resolvers → import-resolvers}/go.js +17 -0
  73. package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.d.ts +9 -1
  74. package/dist/core/ingestion/{resolvers → import-resolvers}/jvm.js +56 -0
  75. package/dist/core/ingestion/{resolvers → import-resolvers}/php.d.ts +6 -10
  76. package/dist/core/ingestion/{resolvers → import-resolvers}/php.js +7 -2
  77. package/dist/core/ingestion/{resolvers → import-resolvers}/python.d.ts +9 -3
  78. package/dist/core/ingestion/{resolvers → import-resolvers}/python.js +35 -3
  79. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.d.ts +5 -2
  80. package/dist/core/ingestion/{resolvers → import-resolvers}/ruby.js +7 -2
  81. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.d.ts +5 -2
  82. package/dist/core/ingestion/{resolvers → import-resolvers}/rust.js +41 -2
  83. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.d.ts +15 -7
  84. package/dist/core/ingestion/{resolvers → import-resolvers}/standard.js +22 -3
  85. package/dist/core/ingestion/import-resolvers/swift.d.ts +7 -0
  86. package/dist/core/ingestion/import-resolvers/swift.js +23 -0
  87. package/dist/core/ingestion/import-resolvers/types.d.ts +44 -0
  88. package/dist/core/ingestion/import-resolvers/types.js +6 -0
  89. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.d.ts +0 -3
  90. package/dist/core/ingestion/{resolvers → import-resolvers}/utils.js +0 -9
  91. package/dist/core/ingestion/language-config.d.ts +4 -1
  92. package/dist/core/ingestion/language-provider.d.ts +121 -0
  93. package/dist/core/ingestion/language-provider.js +24 -0
  94. package/dist/core/ingestion/languages/c-cpp.d.ts +12 -0
  95. package/dist/core/ingestion/languages/c-cpp.js +71 -0
  96. package/dist/core/ingestion/languages/cobol.d.ts +1 -0
  97. package/dist/core/ingestion/languages/cobol.js +26 -0
  98. package/dist/core/ingestion/languages/csharp.d.ts +8 -0
  99. package/dist/core/ingestion/languages/csharp.js +49 -0
  100. package/dist/core/ingestion/languages/dart.d.ts +12 -0
  101. package/dist/core/ingestion/languages/dart.js +58 -0
  102. package/dist/core/ingestion/languages/go.d.ts +11 -0
  103. package/dist/core/ingestion/languages/go.js +28 -0
  104. package/dist/core/ingestion/languages/index.d.ts +38 -0
  105. package/dist/core/ingestion/languages/index.js +63 -0
  106. package/dist/core/ingestion/languages/java.d.ts +9 -0
  107. package/dist/core/ingestion/languages/java.js +29 -0
  108. package/dist/core/ingestion/languages/kotlin.d.ts +9 -0
  109. package/dist/core/ingestion/languages/kotlin.js +53 -0
  110. package/dist/core/ingestion/languages/php.d.ts +8 -0
  111. package/dist/core/ingestion/languages/php.js +145 -0
  112. package/dist/core/ingestion/languages/python.d.ts +12 -0
  113. package/dist/core/ingestion/languages/python.js +39 -0
  114. package/dist/core/ingestion/languages/ruby.d.ts +9 -0
  115. package/dist/core/ingestion/languages/ruby.js +44 -0
  116. package/dist/core/ingestion/languages/rust.d.ts +12 -0
  117. package/dist/core/ingestion/languages/rust.js +44 -0
  118. package/dist/core/ingestion/languages/swift.d.ts +12 -0
  119. package/dist/core/ingestion/languages/swift.js +133 -0
  120. package/dist/core/ingestion/languages/typescript.d.ts +10 -0
  121. package/dist/core/ingestion/languages/typescript.js +60 -0
  122. package/dist/core/ingestion/mro-processor.js +14 -15
  123. package/dist/core/ingestion/{named-binding-extraction.d.ts → named-binding-processor.d.ts} +0 -9
  124. package/dist/core/ingestion/named-binding-processor.js +42 -0
  125. package/dist/core/ingestion/named-bindings/csharp.d.ts +3 -0
  126. package/dist/core/ingestion/named-bindings/csharp.js +37 -0
  127. package/dist/core/ingestion/named-bindings/java.d.ts +3 -0
  128. package/dist/core/ingestion/named-bindings/java.js +29 -0
  129. package/dist/core/ingestion/named-bindings/kotlin.d.ts +3 -0
  130. package/dist/core/ingestion/named-bindings/kotlin.js +36 -0
  131. package/dist/core/ingestion/named-bindings/php.d.ts +3 -0
  132. package/dist/core/ingestion/named-bindings/php.js +61 -0
  133. package/dist/core/ingestion/named-bindings/python.d.ts +3 -0
  134. package/dist/core/ingestion/named-bindings/python.js +49 -0
  135. package/dist/core/ingestion/named-bindings/rust.d.ts +3 -0
  136. package/dist/core/ingestion/named-bindings/rust.js +64 -0
  137. package/dist/core/ingestion/named-bindings/types.d.ts +16 -0
  138. package/dist/core/ingestion/named-bindings/types.js +6 -0
  139. package/dist/core/ingestion/named-bindings/typescript.d.ts +3 -0
  140. package/dist/core/ingestion/named-bindings/typescript.js +58 -0
  141. package/dist/core/ingestion/parsing-processor.d.ts +5 -1
  142. package/dist/core/ingestion/parsing-processor.js +115 -16
  143. package/dist/core/ingestion/pipeline.js +925 -424
  144. package/dist/core/ingestion/resolution-context.js +1 -1
  145. package/dist/core/ingestion/route-extractors/expo.d.ts +1 -0
  146. package/dist/core/ingestion/route-extractors/expo.js +36 -0
  147. package/dist/core/ingestion/route-extractors/middleware.d.ts +47 -0
  148. package/dist/core/ingestion/route-extractors/middleware.js +143 -0
  149. package/dist/core/ingestion/route-extractors/nextjs.d.ts +3 -0
  150. package/dist/core/ingestion/route-extractors/nextjs.js +76 -0
  151. package/dist/core/ingestion/route-extractors/php.d.ts +7 -0
  152. package/dist/core/ingestion/route-extractors/php.js +21 -0
  153. package/dist/core/ingestion/route-extractors/response-shapes.d.ts +20 -0
  154. package/dist/core/ingestion/route-extractors/response-shapes.js +290 -0
  155. package/dist/core/ingestion/tree-sitter-queries.d.ts +8 -7
  156. package/dist/core/ingestion/tree-sitter-queries.js +231 -9
  157. package/dist/core/ingestion/type-env.d.ts +14 -17
  158. package/dist/core/ingestion/type-env.js +66 -14
  159. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +1 -1
  160. package/dist/core/ingestion/type-extractors/csharp.js +1 -1
  161. package/dist/core/ingestion/type-extractors/dart.d.ts +15 -0
  162. package/dist/core/ingestion/type-extractors/dart.js +371 -0
  163. package/dist/core/ingestion/type-extractors/jvm.js +1 -1
  164. package/dist/core/ingestion/type-extractors/shared.d.ts +1 -13
  165. package/dist/core/ingestion/type-extractors/shared.js +9 -102
  166. package/dist/core/ingestion/type-extractors/swift.js +334 -4
  167. package/dist/core/ingestion/type-extractors/types.d.ts +3 -1
  168. package/dist/core/ingestion/{ast-helpers.d.ts → utils/ast-helpers.d.ts} +16 -13
  169. package/dist/core/ingestion/{ast-helpers.js → utils/ast-helpers.js} +111 -32
  170. package/dist/core/ingestion/{call-analysis.js → utils/call-analysis.js} +37 -0
  171. package/dist/core/ingestion/utils/event-loop.d.ts +5 -0
  172. package/dist/core/ingestion/utils/event-loop.js +5 -0
  173. package/dist/core/ingestion/utils/language-detection.d.ts +9 -0
  174. package/dist/core/ingestion/utils/language-detection.js +70 -0
  175. package/dist/core/ingestion/utils/verbose.d.ts +1 -0
  176. package/dist/core/ingestion/utils/verbose.js +7 -0
  177. package/dist/core/ingestion/workers/parse-worker.d.ts +43 -2
  178. package/dist/core/ingestion/workers/parse-worker.js +361 -150
  179. package/dist/core/lbug/csv-generator.js +34 -1
  180. package/dist/core/lbug/lbug-adapter.js +6 -0
  181. package/dist/core/lbug/schema.d.ts +5 -3
  182. package/dist/core/lbug/schema.js +39 -2
  183. package/dist/core/tree-sitter/parser-loader.js +7 -1
  184. package/dist/core/wiki/cursor-client.d.ts +31 -0
  185. package/dist/core/wiki/cursor-client.js +127 -0
  186. package/dist/core/wiki/generator.d.ts +28 -9
  187. package/dist/core/wiki/generator.js +115 -18
  188. package/dist/core/wiki/graph-queries.d.ts +4 -0
  189. package/dist/core/wiki/graph-queries.js +7 -1
  190. package/dist/core/wiki/llm-client.d.ts +2 -0
  191. package/dist/core/wiki/llm-client.js +8 -4
  192. package/dist/core/wiki/prompts.d.ts +3 -3
  193. package/dist/core/wiki/prompts.js +6 -0
  194. package/dist/mcp/core/lbug-adapter.d.ts +5 -0
  195. package/dist/mcp/core/lbug-adapter.js +11 -1
  196. package/dist/mcp/local/local-backend.d.ts +16 -5
  197. package/dist/mcp/local/local-backend.js +711 -74
  198. package/dist/mcp/tools.js +71 -2
  199. package/dist/storage/repo-manager.d.ts +3 -0
  200. package/package.json +17 -16
  201. package/dist/core/ingestion/import-resolution.d.ts +0 -101
  202. package/dist/core/ingestion/import-resolution.js +0 -251
  203. package/dist/core/ingestion/named-binding-extraction.js +0 -373
  204. package/dist/core/ingestion/resolvers/index.d.ts +0 -18
  205. package/dist/core/ingestion/resolvers/index.js +0 -13
  206. package/dist/core/ingestion/type-extractors/index.d.ts +0 -22
  207. package/dist/core/ingestion/type-extractors/index.js +0 -31
  208. package/dist/core/ingestion/utils.d.ts +0 -20
  209. package/dist/core/ingestion/utils.js +0 -242
  210. package/scripts/patch-tree-sitter-swift.cjs +0 -74
  211. /package/dist/core/ingestion/{call-analysis.d.ts → utils/call-analysis.d.ts} +0 -0
@@ -1,10 +1,17 @@
1
1
  import { createKnowledgeGraph } from '../graph/graph.js';
2
2
  import { processStructure } from './structure-processor.js';
3
3
  import { processMarkdown } from './markdown-processor.js';
4
+ import { processCobol, isCobolFile, isJclFile } from './cobol-processor.js';
4
5
  import { processParsing } from './parsing-processor.js';
5
6
  import { processImports, processImportsFromExtracted, buildImportResolutionContext } from './import-processor.js';
6
- import { EMPTY_INDEX } from './resolvers/index.js';
7
- import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted, seedCrossFileReceiverTypes, buildImportedReturnTypes, buildImportedRawReturnTypes, buildExportedTypeMapFromGraph } from './call-processor.js';
7
+ import { EMPTY_INDEX } from './import-resolvers/utils.js';
8
+ import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted, processNextjsFetchRoutes, extractFetchCallsFromFiles, seedCrossFileReceiverTypes, buildImportedReturnTypes, buildImportedRawReturnTypes, buildExportedTypeMapFromGraph } from './call-processor.js';
9
+ import { nextjsFileToRouteURL, normalizeFetchURL } from './route-extractors/nextjs.js';
10
+ import { expoFileToRouteURL } from './route-extractors/expo.js';
11
+ import { phpFileToRouteURL } from './route-extractors/php.js';
12
+ import { extractResponseShapes, extractPHPResponseShapes } from './route-extractors/response-shapes.js';
13
+ import { extractMiddlewareChain, extractNextjsMiddlewareConfig, compileMatcher, compiledMatcherMatchesRoute } from './route-extractors/middleware.js';
14
+ import { generateId } from '../../lib/utils.js';
8
15
  import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
9
16
  import { computeMRO } from './mro-processor.js';
10
17
  import { processCommunities } from './community-processor.js';
@@ -12,14 +19,18 @@ import { processProcesses } from './process-processor.js';
12
19
  import { createResolutionContext } from './resolution-context.js';
13
20
  import { createASTCache } from './ast-cache.js';
14
21
  import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
15
- import { getLanguageFromFilename } from './utils.js';
22
+ import { getLanguageFromFilename } from './utils/language-detection.js';
16
23
  import { isLanguageAvailable } from '../tree-sitter/parser-loader.js';
17
- import { SupportedLanguages } from '../../config/supported-languages.js';
24
+ import { providers, getProviderForFile } from './languages/index.js';
18
25
  import { createWorkerPool } from './workers/worker-pool.js';
19
26
  import fs from 'node:fs';
20
27
  import path from 'node:path';
21
28
  import { fileURLToPath, pathToFileURL } from 'node:url';
22
29
  const isDev = process.env.NODE_ENV === 'development';
30
+ const EXPO_NAV_PATTERNS = [
31
+ /router\.(push|replace|navigate)\(\s*['"`]([^'"`]+)['"`]/g,
32
+ /<Link\s+[^>]*href=\s*['"`]([^'"`]+)['"`]/g,
33
+ ];
23
34
  /** Kahn's algorithm: returns files grouped by topological level.
24
35
  * Files in the same level have no mutual dependencies — safe to process in parallel.
25
36
  * Files in cycles are returned as a final group (no cross-cycle propagation). */
@@ -92,24 +103,19 @@ const IMPORTABLE_SYMBOL_LABELS = new Set([
92
103
  /** Max synthetic bindings per importing file — prevents memory bloat for
93
104
  * C/C++ files that include many large headers. */
94
105
  const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
95
- /** Languages with whole-module import semantics (no per-symbol named imports).
96
- * For these languages, namedImportMap entries are synthesized from graph-exported
97
- * symbols after parsing, enabling Phase 14 cross-file binding propagation.
98
- *
99
- * Note: Python is intentionally excluded here. `import models` is a namespace import
100
- * (not wildcard symbol expansion) — expanding all exported symbols produces ambiguous
101
- * bindings when multiple modules export the same name (e.g. models.User vs auth.User).
102
- * Python module aliases are built in synthesizeWildcardImportBindings via moduleAliasMap. */
103
- const WILDCARD_IMPORT_LANGUAGES = new Set([
104
- SupportedLanguages.Go,
105
- SupportedLanguages.Ruby,
106
- SupportedLanguages.C,
107
- SupportedLanguages.CPlusPlus,
108
- SupportedLanguages.Swift,
109
- ]);
110
- /** Languages that require synthesizeWildcardImportBindings to run before call resolution.
111
- * Superset of WILDCARD_IMPORT_LANGUAGES — includes Python for moduleAliasMap building. */
112
- const SYNTHESIS_LANGUAGES = new Set([...WILDCARD_IMPORT_LANGUAGES, SupportedLanguages.Python]);
106
+ /** Pre-computed language sets derived from providers at module load. */
107
+ const WILDCARD_LANGUAGES = new Set(Object.values(providers).filter(p => p.importSemantics === 'wildcard').map(p => p.id));
108
+ const SYNTHESIS_LANGUAGES = new Set(Object.values(providers).filter(p => p.importSemantics !== 'named').map(p => p.id));
109
+ /** Check if a language uses wildcard (whole-module) import semantics.
110
+ * Derived from LanguageProvider.importSemantics no hardcoded set needed. */
111
+ function isWildcardImportLanguage(lang) {
112
+ return WILDCARD_LANGUAGES.has(lang);
113
+ }
114
+ /** Check if a language needs synthesis before call resolution.
115
+ * True for wildcard-import languages AND namespace-import languages (Python). */
116
+ function needsSynthesis(lang) {
117
+ return SYNTHESIS_LANGUAGES.has(lang);
118
+ }
113
119
  /** Synthesize namedImportMap entries for languages with whole-module imports.
114
120
  * These languages (Go, Ruby, C/C++, Swift, Python) import all exported symbols from a
115
121
  * file, not specific named symbols. After parsing, we know which symbols each file
@@ -149,7 +155,7 @@ function synthesizeWildcardImportBindings(graph, ctx) {
149
155
  const srcFile = rel.sourceId.slice(FILE_PREFIX.length);
150
156
  const tgtFile = rel.targetId.slice(FILE_PREFIX.length);
151
157
  const lang = getLanguageFromFilename(srcFile);
152
- if (!lang || !WILDCARD_IMPORT_LANGUAGES.has(lang))
158
+ if (!lang || !isWildcardImportLanguage(lang))
153
159
  return;
154
160
  // Only add if not already in ctx.importMap (avoid duplicates)
155
161
  if (ctx.importMap.get(srcFile)?.has(tgtFile))
@@ -191,7 +197,7 @@ function synthesizeWildcardImportBindings(graph, ctx) {
191
197
  // Process files from ctx.importMap (Ruby, C/C++, Swift file-based imports)
192
198
  for (const [filePath, importedFiles] of ctx.importMap) {
193
199
  const lang = getLanguageFromFilename(filePath);
194
- if (!lang || !WILDCARD_IMPORT_LANGUAGES.has(lang))
200
+ if (!lang || !isWildcardImportLanguage(lang))
195
201
  continue;
196
202
  synthesizeForFile(filePath, importedFiles);
197
203
  }
@@ -220,7 +226,8 @@ function synthesizeWildcardImportBindings(graph, ctx) {
220
226
  }
221
227
  };
222
228
  for (const [filePath, importedFiles] of ctx.importMap) {
223
- if (getLanguageFromFilename(filePath) !== SupportedLanguages.Python)
229
+ const provider = getProviderForFile(filePath);
230
+ if (!provider || provider.importSemantics !== 'namespace')
224
231
  continue;
225
232
  buildPythonModuleAliasForFile(filePath, importedFiles);
226
233
  }
@@ -352,449 +359,865 @@ async function runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPa
352
359
  ` in ${elapsed}ms (${reResolutionPct}% of total ingestion time so far)`);
353
360
  }
354
361
  }
355
- export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
356
- const graph = createKnowledgeGraph();
357
- const ctx = createResolutionContext();
358
- const symbolTable = ctx.symbols;
359
- let astCache = createASTCache(AST_CACHE_CAP);
360
- const pipelineStart = Date.now();
361
- const cleanup = () => {
362
- astCache.clear();
363
- ctx.clear();
364
- };
365
- try {
366
- // ── Phase 1: Scan paths only (no content read) ─────────────────────
362
+ /**
363
+ * Phase 1+2: Scan repository paths, build file/folder structure, process markdown.
364
+ *
365
+ * @reads repoPath (filesystem)
366
+ * @writes graph (File, Folder nodes + CONTAINS edges; Markdown sections + cross-links)
367
+ */
368
+ async function runScanAndStructure(repoPath, graph, onProgress) {
369
+ // ── Phase 1: Scan paths only (no content read) ─────────────────────
370
+ onProgress({
371
+ phase: 'extracting',
372
+ percent: 0,
373
+ message: 'Scanning repository...',
374
+ });
375
+ const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
376
+ const scanProgress = Math.round((current / total) * 15);
367
377
  onProgress({
368
378
  phase: 'extracting',
369
- percent: 0,
379
+ percent: scanProgress,
370
380
  message: 'Scanning repository...',
381
+ detail: filePath,
382
+ stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
371
383
  });
372
- const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
373
- const scanProgress = Math.round((current / total) * 15);
374
- onProgress({
375
- phase: 'extracting',
376
- percent: scanProgress,
377
- message: 'Scanning repository...',
378
- detail: filePath,
379
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
380
- });
381
- });
382
- const totalFiles = scannedFiles.length;
383
- onProgress({
384
- phase: 'extracting',
385
- percent: 15,
386
- message: 'Repository scanned successfully',
387
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
388
- });
389
- // ── Phase 2: Structure (paths only — no content needed) ────────────
390
- onProgress({
391
- phase: 'structure',
392
- percent: 15,
393
- message: 'Analyzing project structure...',
394
- stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
395
- });
396
- const allPaths = scannedFiles.map(f => f.path);
397
- processStructure(graph, allPaths);
398
- onProgress({
399
- phase: 'structure',
400
- percent: 20,
401
- message: 'Project structure analyzed',
402
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
403
- });
404
- // ── Phase 2.5: Markdown processing (headings + cross-links) ────────
405
- const mdScanned = scannedFiles.filter(f => f.path.endsWith('.md') || f.path.endsWith('.mdx'));
406
- if (mdScanned.length > 0) {
407
- const mdContents = await readFileContents(repoPath, mdScanned.map(f => f.path));
408
- const mdFiles = mdScanned
409
- .filter(f => mdContents.has(f.path))
410
- .map(f => ({ path: f.path, content: mdContents.get(f.path) }));
411
- const allPathSet = new Set(allPaths);
412
- const mdResult = processMarkdown(graph, mdFiles, allPathSet);
413
- if (isDev) {
414
- console.log(` Markdown: ${mdResult.sections} sections, ${mdResult.links} cross-links from ${mdFiles.length} files`);
415
- }
384
+ });
385
+ const totalFiles = scannedFiles.length;
386
+ onProgress({
387
+ phase: 'extracting',
388
+ percent: 15,
389
+ message: 'Repository scanned successfully',
390
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
391
+ });
392
+ // ── Phase 2: Structure (paths only — no content needed) ────────────
393
+ onProgress({
394
+ phase: 'structure',
395
+ percent: 15,
396
+ message: 'Analyzing project structure...',
397
+ stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
398
+ });
399
+ const allPaths = scannedFiles.map(f => f.path);
400
+ processStructure(graph, allPaths);
401
+ onProgress({
402
+ phase: 'structure',
403
+ percent: 20,
404
+ message: 'Project structure analyzed',
405
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
406
+ });
407
+ // ── Custom (non-tree-sitter) processors ─────────────────────────────
408
+ // Each custom processor follows the pattern in markdown-processor.ts:
409
+ // 1. Export a process function: (graph, files, allPathSet) => result
410
+ // 2. Export a file detection function: (path) => boolean
411
+ // 3. Filter files by extension, write nodes/edges directly to graph
412
+ // To add a new language: create a new processor file, import it here,
413
+ // and add a filter-read-call-log block following the pattern below.
414
+ // ── Phase 2.5: Markdown processing (headings + cross-links) ────────
415
+ const mdScanned = scannedFiles.filter(f => f.path.endsWith('.md') || f.path.endsWith('.mdx'));
416
+ if (mdScanned.length > 0) {
417
+ const mdContents = await readFileContents(repoPath, mdScanned.map(f => f.path));
418
+ const mdFiles = mdScanned
419
+ .filter(f => mdContents.has(f.path))
420
+ .map(f => ({ path: f.path, content: mdContents.get(f.path) }));
421
+ const allPathSet = new Set(allPaths);
422
+ const mdResult = processMarkdown(graph, mdFiles, allPathSet);
423
+ if (isDev) {
424
+ console.log(` Markdown: ${mdResult.sections} sections, ${mdResult.links} cross-links from ${mdFiles.length} files`);
416
425
  }
417
- // ── Phase 3+4: Chunked read + parse ────────────────────────────────
418
- // Group parseable files into byte-budget chunks so only ~20MB of source
419
- // is in memory at a time. Each chunk is: read → parse → extract → free.
420
- const parseableScanned = scannedFiles.filter(f => {
421
- const lang = getLanguageFromFilename(f.path);
422
- return lang && isLanguageAvailable(lang);
423
- });
424
- // Warn about files skipped due to unavailable parsers
425
- const skippedByLang = new Map();
426
- for (const f of scannedFiles) {
427
- const lang = getLanguageFromFilename(f.path);
428
- if (lang && !isLanguageAvailable(lang)) {
429
- skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
430
- }
431
- }
432
- for (const [lang, count] of skippedByLang) {
433
- console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
434
- }
435
- const totalParseable = parseableScanned.length;
436
- if (totalParseable === 0) {
437
- onProgress({
438
- phase: 'parsing',
439
- percent: 82,
440
- message: 'No parseable files found — skipping parsing phase',
441
- stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
442
- });
443
- }
444
- // Build byte-budget chunks
445
- const chunks = [];
446
- let currentChunk = [];
447
- let currentBytes = 0;
448
- for (const file of parseableScanned) {
449
- if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
450
- chunks.push(currentChunk);
451
- currentChunk = [];
452
- currentBytes = 0;
453
- }
454
- currentChunk.push(file.path);
455
- currentBytes += file.size;
456
- }
457
- if (currentChunk.length > 0)
458
- chunks.push(currentChunk);
459
- const numChunks = chunks.length;
426
+ }
427
+ // ── Phase 2.6: COBOL processing (regex extraction, no tree-sitter) ──
428
+ const cobolScanned = scannedFiles.filter(f => isCobolFile(f.path) || isJclFile(f.path));
429
+ if (cobolScanned.length > 0) {
430
+ const cobolContents = await readFileContents(repoPath, cobolScanned.map(f => f.path));
431
+ const cobolFiles = cobolScanned
432
+ .filter(f => cobolContents.has(f.path))
433
+ .map(f => ({ path: f.path, content: cobolContents.get(f.path) }));
434
+ const allPathSet = new Set(allPaths);
435
+ const cobolResult = processCobol(graph, cobolFiles, allPathSet);
460
436
  if (isDev) {
461
- const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
462
- console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
437
+ console.log(` COBOL: ${cobolResult.programs} programs, ${cobolResult.paragraphs} paragraphs, ${cobolResult.sections} sections from ${cobolFiles.length} files`);
438
+ if (cobolResult.execSqlBlocks > 0 || cobolResult.execCicsBlocks > 0 || cobolResult.entryPoints > 0) {
439
+ console.log(` COBOL enriched: ${cobolResult.execSqlBlocks} SQL blocks, ${cobolResult.execCicsBlocks} CICS blocks, ${cobolResult.entryPoints} entry points, ${cobolResult.moves} moves, ${cobolResult.fileDeclarations} file declarations`);
440
+ }
441
+ if (cobolResult.jclJobs > 0) {
442
+ console.log(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`);
443
+ }
463
444
  }
445
+ }
446
+ return { scannedFiles, allPaths, totalFiles };
447
+ }
448
+ /**
449
+ * Phase 3+4: Chunked parse + resolve loop.
450
+ *
451
+ * Reads source in byte-budget chunks (~20MB each). For each chunk:
452
+ * 1. Parse via worker pool (or sequential fallback)
453
+ * 2. Resolve imports from extracted data
454
+ * 3. Synthesize wildcard import bindings (Go/Ruby/C++/Swift/Python)
455
+ * 4. Resolve calls, heritage, routes concurrently (Promise.all)
456
+ * 5. Collect TypeEnv bindings for cross-file propagation
457
+ *
458
+ * State accumulated across chunks: symbolTable, importMap, namedImportMap,
459
+ * moduleAliasMap (all via ResolutionContext), exportedTypeMap, workerTypeEnvBindings.
460
+ *
461
+ * @reads graph (structure nodes from Phase 1+2)
462
+ * @reads allPaths (from scan phase)
463
+ * @writes graph (Symbol nodes, IMPORTS/CALLS/EXTENDS/IMPLEMENTS/ACCESSES edges)
464
+ * @writes ctx.symbolTable, ctx.importMap, ctx.namedImportMap, ctx.moduleAliasMap
465
+ */
466
+ async function runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress) {
467
+ const symbolTable = ctx.symbols;
468
+ const parseableScanned = scannedFiles.filter(f => {
469
+ const lang = getLanguageFromFilename(f.path);
470
+ return lang && isLanguageAvailable(lang);
471
+ });
472
+ // Warn about files skipped due to unavailable parsers
473
+ const skippedByLang = new Map();
474
+ for (const f of scannedFiles) {
475
+ const lang = getLanguageFromFilename(f.path);
476
+ if (lang && !isLanguageAvailable(lang)) {
477
+ skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
478
+ }
479
+ }
480
+ for (const [lang, count] of skippedByLang) {
481
+ console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
482
+ }
483
+ const totalParseable = parseableScanned.length;
484
+ if (totalParseable === 0) {
464
485
  onProgress({
465
486
  phase: 'parsing',
466
- percent: 20,
467
- message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
468
- stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
487
+ percent: 82,
488
+ message: 'No parseable files found skipping parsing phase',
489
+ stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
469
490
  });
470
- // Don't spawn workers for tiny repos — overhead exceeds benefit
471
- const MIN_FILES_FOR_WORKERS = 15;
472
- const MIN_BYTES_FOR_WORKERS = 512 * 1024;
473
- const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
474
- // Create worker pool once, reuse across chunks
475
- let workerPool;
476
- if (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS) {
477
- try {
478
- let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
479
- // When running under vitest, import.meta.url points to src/ where no .js exists.
480
- // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
481
- const thisDir = fileURLToPath(new URL('.', import.meta.url));
482
- if (!fs.existsSync(fileURLToPath(workerUrl))) {
483
- const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
484
- if (fs.existsSync(distWorker)) {
485
- workerUrl = pathToFileURL(distWorker);
486
- }
487
- }
488
- workerPool = createWorkerPool(workerUrl);
489
- }
490
- catch (err) {
491
- if (isDev)
492
- console.warn('Worker pool creation failed, using sequential fallback:', err.message);
493
- }
494
- }
495
- let filesParsedSoFar = 0;
496
- // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
497
- const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
498
- astCache = createASTCache(maxChunkFiles);
499
- // Build import resolution context once — suffix index, file lists, resolve cache.
500
- // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
501
- const importCtx = buildImportResolutionContext(allPaths);
502
- const allPathObjects = allPaths.map(p => ({ path: p }));
503
- // Single-pass: parse + resolve imports/calls/heritage per chunk.
504
- // Calls/heritage use the symbol table built so far (symbols from earlier chunks
505
- // are already registered). This trades ~5% cross-chunk resolution accuracy for
506
- // 200-400MB less memory — critical for Linux-kernel-scale repos.
507
- const sequentialChunkPaths = [];
508
- // Pre-compute which chunks need synthesis — O(1) lookup per chunk.
509
- const chunkNeedsSynthesis = chunks.map(paths => paths.some(p => {
510
- const lang = getLanguageFromFilename(p);
511
- return lang != null && SYNTHESIS_LANGUAGES.has(lang);
512
- }));
513
- // Phase 14: Collect exported type bindings for cross-file propagation
514
- const exportedTypeMap = new Map();
515
- // Accumulate file-scope TypeEnv bindings from workers (closes worker/sequential quality gap)
516
- const workerTypeEnvBindings = [];
491
+ }
492
+ // Build byte-budget chunks
493
+ const chunks = [];
494
+ let currentChunk = [];
495
+ let currentBytes = 0;
496
+ for (const file of parseableScanned) {
497
+ if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
498
+ chunks.push(currentChunk);
499
+ currentChunk = [];
500
+ currentBytes = 0;
501
+ }
502
+ currentChunk.push(file.path);
503
+ currentBytes += file.size;
504
+ }
505
+ if (currentChunk.length > 0)
506
+ chunks.push(currentChunk);
507
+ const numChunks = chunks.length;
508
+ if (isDev) {
509
+ const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
510
+ console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
511
+ }
512
+ onProgress({
513
+ phase: 'parsing',
514
+ percent: 20,
515
+ message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
516
+ stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
517
+ });
518
+ // Don't spawn workers for tiny repos overhead exceeds benefit
519
+ const MIN_FILES_FOR_WORKERS = 15;
520
+ const MIN_BYTES_FOR_WORKERS = 512 * 1024;
521
+ const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
522
+ // Create worker pool once, reuse across chunks
523
+ let workerPool;
524
+ if (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS) {
517
525
  try {
518
- for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
519
- const chunkPaths = chunks[chunkIdx];
520
- // Read content for this chunk only
521
- const chunkContents = await readFileContents(repoPath, chunkPaths);
522
- const chunkFiles = chunkPaths
523
- .filter(p => chunkContents.has(p))
524
- .map(p => ({ path: p, content: chunkContents.get(p) }));
525
- // Parse this chunk (workers or sequential fallback)
526
- const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
527
- const globalCurrent = filesParsedSoFar + current;
528
- const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
526
+ let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
527
+ // When running under vitest, import.meta.url points to src/ where no .js exists.
528
+ // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
529
+ const thisDir = fileURLToPath(new URL('.', import.meta.url));
530
+ if (!fs.existsSync(fileURLToPath(workerUrl))) {
531
+ const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
532
+ if (fs.existsSync(distWorker)) {
533
+ workerUrl = pathToFileURL(distWorker);
534
+ }
535
+ }
536
+ workerPool = createWorkerPool(workerUrl);
537
+ }
538
+ catch (err) {
539
+ if (isDev)
540
+ console.warn('Worker pool creation failed, using sequential fallback:', err.message);
541
+ }
542
+ }
543
+ let filesParsedSoFar = 0;
544
+ // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
545
+ const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
546
+ let astCache = createASTCache(maxChunkFiles);
547
+ // Build import resolution context once — suffix index, file lists, resolve cache.
548
+ // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
549
+ const importCtx = buildImportResolutionContext(allPaths);
550
+ const allPathObjects = allPaths.map(p => ({ path: p }));
551
+ // Single-pass: parse + resolve imports/calls/heritage per chunk.
552
+ // Calls/heritage use the symbol table built so far (symbols from earlier chunks
553
+ // are already registered). This trades ~5% cross-chunk resolution accuracy for
554
+ // 200-400MB less memory — critical for Linux-kernel-scale repos.
555
+ const sequentialChunkPaths = [];
556
+ // Pre-compute which chunks need synthesis — O(1) lookup per chunk.
557
+ const chunkNeedsSynthesis = chunks.map(paths => paths.some(p => {
558
+ const lang = getLanguageFromFilename(p);
559
+ return lang != null && needsSynthesis(lang);
560
+ }));
561
+ // Phase 14: Collect exported type bindings for cross-file propagation
562
+ const exportedTypeMap = new Map();
563
+ // Accumulate file-scope TypeEnv bindings from workers (closes worker/sequential quality gap)
564
+ const workerTypeEnvBindings = [];
565
+ // Accumulate fetch() calls from workers for Next.js route matching
566
+ const allFetchCalls = [];
567
+ // Accumulate framework-extracted routes (Laravel, etc.) for Route node creation
568
+ const allExtractedRoutes = [];
569
+ // Accumulate decorator-based routes (@Get, @Post, @app.route, etc.)
570
+ const allDecoratorRoutes = [];
571
+ // Accumulate MCP/RPC tool definitions (@mcp.tool(), @app.tool(), etc.)
572
+ const allToolDefs = [];
573
+ const allORMQueries = [];
574
+ try {
575
+ for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
576
+ const chunkPaths = chunks[chunkIdx];
577
+ // Read content for this chunk only
578
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
579
+ const chunkFiles = chunkPaths
580
+ .filter(p => chunkContents.has(p))
581
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
582
+ // Parse this chunk (workers or sequential fallback)
583
+ const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
584
+ const globalCurrent = filesParsedSoFar + current;
585
+ const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
586
+ onProgress({
587
+ phase: 'parsing',
588
+ percent: Math.round(parsingProgress),
589
+ message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
590
+ detail: filePath,
591
+ stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
592
+ });
593
+ }, workerPool);
594
+ const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
595
+ if (chunkWorkerData) {
596
+ // Imports
597
+ await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
529
598
  onProgress({
530
599
  phase: 'parsing',
531
- percent: Math.round(parsingProgress),
532
- message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
533
- detail: filePath,
534
- stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
600
+ percent: Math.round(chunkBasePercent),
601
+ message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
602
+ detail: `${current}/${total} files`,
603
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
535
604
  });
536
- }, workerPool);
537
- const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
538
- if (chunkWorkerData) {
539
- // Imports
540
- await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
605
+ }, repoPath, importCtx);
606
+ // ── Wildcard-import synthesis (Ruby / C/C++ / Swift / Go) + Python module aliases ─
607
+ // Synthesize namedImportMap entries for wildcard-import languages and build
608
+ // moduleAliasMap for Python namespace imports. Must run after imports are resolved
609
+ // (importMap is populated) but BEFORE call resolution.
610
+ if (chunkNeedsSynthesis[chunkIdx])
611
+ synthesizeWildcardImportBindings(graph, ctx);
612
+ // Phase 14 E1: Seed cross-file receiver types from ExportedTypeMap
613
+ // before call resolution — eliminates re-parse for single-hop imported receivers.
614
+ // NOTE: In the worker path, exportedTypeMap is empty during chunk processing
615
+ // (populated later in runCrossFileBindingPropagation). This block is latent —
616
+ // it activates only if incremental export collection is added per-chunk.
617
+ if (exportedTypeMap.size > 0 && ctx.namedImportMap.size > 0) {
618
+ const { enrichedCount } = seedCrossFileReceiverTypes(chunkWorkerData.calls, ctx.namedImportMap, exportedTypeMap);
619
+ if (isDev && enrichedCount > 0) {
620
+ console.log(`🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`);
621
+ }
622
+ }
623
+ // Calls + Heritage + Routes — resolve in parallel (no shared mutable state between them)
624
+ // This is safe because each writes disjoint relationship types into idempotent id-keyed Maps,
625
+ // and the single-threaded event loop prevents races between synchronous addRelationship calls.
626
+ await Promise.all([
627
+ processCallsFromExtracted(graph, chunkWorkerData.calls, ctx, (current, total) => {
541
628
  onProgress({
542
629
  phase: 'parsing',
543
630
  percent: Math.round(chunkBasePercent),
544
- message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
631
+ message: `Resolving calls (chunk ${chunkIdx + 1}/${numChunks})...`,
545
632
  detail: `${current}/${total} files`,
546
633
  stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
547
634
  });
548
- }, repoPath, importCtx);
549
- // ── Wildcard-import synthesis (Ruby / C/C++ / Swift / Go) + Python module aliases ─
550
- // Synthesize namedImportMap entries for wildcard-import languages and build
551
- // moduleAliasMap for Python namespace imports. Must run after imports are resolved
552
- // (importMap is populated) but BEFORE call resolution.
553
- if (chunkNeedsSynthesis[chunkIdx])
554
- synthesizeWildcardImportBindings(graph, ctx);
555
- // Phase 14 E1: Seed cross-file receiver types from ExportedTypeMap
556
- // before call resolution — eliminates re-parse for single-hop imported receivers.
557
- // NOTE: In the worker path, exportedTypeMap is empty during chunk processing
558
- // (populated later in runCrossFileBindingPropagation). This block is latent
559
- // it activates only if incremental export collection is added per-chunk.
560
- if (exportedTypeMap.size > 0 && ctx.namedImportMap.size > 0) {
561
- const { enrichedCount } = seedCrossFileReceiverTypes(chunkWorkerData.calls, ctx.namedImportMap, exportedTypeMap);
562
- if (isDev && enrichedCount > 0) {
563
- console.log(`🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`);
564
- }
565
- }
566
- // Calls + Heritage + Routes — resolve in parallel (no shared mutable state between them)
567
- // This is safe because each writes disjoint relationship types into idempotent id-keyed Maps,
568
- // and the single-threaded event loop prevents races between synchronous addRelationship calls.
569
- await Promise.all([
570
- processCallsFromExtracted(graph, chunkWorkerData.calls, ctx, (current, total) => {
571
- onProgress({
572
- phase: 'parsing',
573
- percent: Math.round(chunkBasePercent),
574
- message: `Resolving calls (chunk ${chunkIdx + 1}/${numChunks})...`,
575
- detail: `${current}/${total} files`,
576
- stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
577
- });
578
- }, chunkWorkerData.constructorBindings),
579
- processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
580
- onProgress({
581
- phase: 'parsing',
582
- percent: Math.round(chunkBasePercent),
583
- message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
584
- detail: `${current}/${total} records`,
585
- stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
586
- });
587
- }),
588
- processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
589
- onProgress({
590
- phase: 'parsing',
591
- percent: Math.round(chunkBasePercent),
592
- message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
593
- detail: `${current}/${total} routes`,
594
- stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
595
- });
596
- }),
597
- ]);
598
- // Process field write assignments (synchronous, runs after calls resolve)
599
- if (chunkWorkerData.assignments?.length) {
600
- processAssignmentsFromExtracted(graph, chunkWorkerData.assignments, ctx, chunkWorkerData.constructorBindings);
601
- }
602
- // Collect TypeEnv file-scope bindings for exported type enrichment
603
- if (chunkWorkerData.typeEnvBindings?.length) {
604
- workerTypeEnvBindings.push(...chunkWorkerData.typeEnvBindings);
605
- }
635
+ }, chunkWorkerData.constructorBindings),
636
+ processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
637
+ onProgress({
638
+ phase: 'parsing',
639
+ percent: Math.round(chunkBasePercent),
640
+ message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
641
+ detail: `${current}/${total} records`,
642
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
643
+ });
644
+ }),
645
+ processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
646
+ onProgress({
647
+ phase: 'parsing',
648
+ percent: Math.round(chunkBasePercent),
649
+ message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
650
+ detail: `${current}/${total} routes`,
651
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
652
+ });
653
+ }),
654
+ ]);
655
+ // Process field write assignments (synchronous, runs after calls resolve)
656
+ if (chunkWorkerData.assignments?.length) {
657
+ processAssignmentsFromExtracted(graph, chunkWorkerData.assignments, ctx, chunkWorkerData.constructorBindings);
658
+ }
659
+ // Collect TypeEnv file-scope bindings for exported type enrichment
660
+ if (chunkWorkerData.typeEnvBindings?.length) {
661
+ workerTypeEnvBindings.push(...chunkWorkerData.typeEnvBindings);
662
+ }
663
+ // Collect fetch() calls for Next.js route matching
664
+ if (chunkWorkerData.fetchCalls?.length) {
665
+ allFetchCalls.push(...chunkWorkerData.fetchCalls);
666
+ }
667
+ if (chunkWorkerData.routes?.length) {
668
+ allExtractedRoutes.push(...chunkWorkerData.routes);
606
669
  }
607
- else {
608
- await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
609
- sequentialChunkPaths.push(chunkPaths);
670
+ if (chunkWorkerData.decoratorRoutes?.length) {
671
+ allDecoratorRoutes.push(...chunkWorkerData.decoratorRoutes);
672
+ }
673
+ if (chunkWorkerData.toolDefs?.length) {
674
+ allToolDefs.push(...chunkWorkerData.toolDefs);
675
+ }
676
+ if (chunkWorkerData.ormQueries?.length) {
677
+ allORMQueries.push(...chunkWorkerData.ormQueries);
610
678
  }
611
- filesParsedSoFar += chunkFiles.length;
612
- // Clear AST cache between chunks to free memory
613
- astCache.clear();
614
- // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
615
679
  }
680
+ else {
681
+ await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
682
+ sequentialChunkPaths.push(chunkPaths);
683
+ }
684
+ filesParsedSoFar += chunkFiles.length;
685
+ // Clear AST cache between chunks to free memory
686
+ astCache.clear();
687
+ // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
616
688
  }
617
- finally {
618
- await workerPool?.terminate();
689
+ }
690
+ finally {
691
+ await workerPool?.terminate();
692
+ }
693
+ // Sequential fallback chunks: re-read source for call/heritage resolution
694
+ // Synthesize wildcard import bindings once after ALL imports are processed,
695
+ // before any call resolution — same rationale as the worker-path inline synthesis.
696
+ if (sequentialChunkPaths.length > 0)
697
+ synthesizeWildcardImportBindings(graph, ctx);
698
+ for (const chunkPaths of sequentialChunkPaths) {
699
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
700
+ const chunkFiles = chunkPaths
701
+ .filter(p => chunkContents.has(p))
702
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
703
+ astCache = createASTCache(chunkFiles.length);
704
+ const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, exportedTypeMap);
705
+ await processHeritage(graph, chunkFiles, astCache, ctx);
706
+ if (rubyHeritage.length > 0) {
707
+ await processHeritageFromExtracted(graph, rubyHeritage, ctx);
619
708
  }
620
- // Sequential fallback chunks: re-read source for call/heritage resolution
621
- // Synthesize wildcard import bindings once after ALL imports are processed,
622
- // before any call resolution — same rationale as the worker-path inline synthesis.
623
- if (sequentialChunkPaths.length > 0)
624
- synthesizeWildcardImportBindings(graph, ctx);
625
- for (const chunkPaths of sequentialChunkPaths) {
626
- const chunkContents = await readFileContents(repoPath, chunkPaths);
627
- const chunkFiles = chunkPaths
628
- .filter(p => chunkContents.has(p))
629
- .map(p => ({ path: p, content: chunkContents.get(p) }));
630
- astCache = createASTCache(chunkFiles.length);
631
- const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, exportedTypeMap);
632
- await processHeritage(graph, chunkFiles, astCache, ctx);
633
- if (rubyHeritage.length > 0) {
634
- await processHeritageFromExtracted(graph, rubyHeritage, ctx);
709
+ // Extract fetch() calls for Next.js route matching (sequential path)
710
+ const chunkFetchCalls = await extractFetchCallsFromFiles(chunkFiles, astCache);
711
+ if (chunkFetchCalls.length > 0) {
712
+ allFetchCalls.push(...chunkFetchCalls);
713
+ }
714
+ // Extract ORM queries (sequential path)
715
+ for (const f of chunkFiles) {
716
+ extractORMQueriesInline(f.path, f.content, allORMQueries);
717
+ }
718
+ astCache.clear();
719
+ }
720
+ // Log resolution cache stats
721
+ if (isDev) {
722
+ const rcStats = ctx.getStats();
723
+ const total = rcStats.cacheHits + rcStats.cacheMisses;
724
+ const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
725
+ console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
726
+ }
727
+ // ── Worker path quality enrichment: merge TypeEnv file-scope bindings into ExportedTypeMap ──
728
+ // Workers return file-scope bindings from their TypeEnv fixpoint (includes inferred types
729
+ // like `const config = getConfig()` → Config). Filter by graph isExported to match
730
+ // the sequential path's collectExportedBindings behavior.
731
+ if (workerTypeEnvBindings.length > 0) {
732
+ let enriched = 0;
733
+ for (const { filePath, bindings } of workerTypeEnvBindings) {
734
+ for (const [name, type] of bindings) {
735
+ // Verify the symbol is exported via graph node
736
+ const nodeId = `Function:${filePath}:${name}`;
737
+ const varNodeId = `Variable:${filePath}:${name}`;
738
+ const constNodeId = `Const:${filePath}:${name}`;
739
+ const node = graph.getNode(nodeId) ?? graph.getNode(varNodeId) ?? graph.getNode(constNodeId);
740
+ if (!node?.properties?.isExported)
741
+ continue;
742
+ let fileExports = exportedTypeMap.get(filePath);
743
+ if (!fileExports) {
744
+ fileExports = new Map();
745
+ exportedTypeMap.set(filePath, fileExports);
746
+ }
747
+ // Don't overwrite existing entries (Tier 0 from SymbolTable is authoritative)
748
+ if (!fileExports.has(name)) {
749
+ fileExports.set(name, type);
750
+ enriched++;
751
+ }
635
752
  }
636
- astCache.clear();
637
753
  }
638
- // Log resolution cache stats
639
- if (isDev) {
640
- const rcStats = ctx.getStats();
641
- const total = rcStats.cacheHits + rcStats.cacheMisses;
642
- const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
643
- console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
644
- }
645
- // ── Worker path quality enrichment: merge TypeEnv file-scope bindings into ExportedTypeMap ──
646
- // Workers return file-scope bindings from their TypeEnv fixpoint (includes inferred types
647
- // like `const config = getConfig()` → Config). Filter by graph isExported to match
648
- // the sequential path's collectExportedBindings behavior.
649
- if (workerTypeEnvBindings.length > 0) {
650
- let enriched = 0;
651
- for (const { filePath, bindings } of workerTypeEnvBindings) {
652
- for (const [name, type] of bindings) {
653
- // Verify the symbol is exported via graph node
654
- const nodeId = `Function:${filePath}:${name}`;
655
- const varNodeId = `Variable:${filePath}:${name}`;
656
- const constNodeId = `Const:${filePath}:${name}`;
657
- const node = graph.getNode(nodeId) ?? graph.getNode(varNodeId) ?? graph.getNode(constNodeId);
658
- if (!node?.properties?.isExported)
659
- continue;
660
- let fileExports = exportedTypeMap.get(filePath);
661
- if (!fileExports) {
662
- fileExports = new Map();
663
- exportedTypeMap.set(filePath, fileExports);
664
- }
665
- // Don't overwrite existing entries (Tier 0 from SymbolTable is authoritative)
666
- if (!fileExports.has(name)) {
667
- fileExports.set(name, type);
668
- enriched++;
669
- }
754
+ if (isDev && enriched > 0) {
755
+ console.log(`🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`);
756
+ }
757
+ }
758
+ // ── Final synthesis pass for whole-module-import languages ──
759
+ // Per-chunk synthesis (above) already ran incrementally. This final pass ensures
760
+ // any remaining files whose imports were not covered inline are also synthesized,
761
+ // and that Phase 14 type propagation has complete namedImportMap data.
762
+ const synthesized = synthesizeWildcardImportBindings(graph, ctx);
763
+ if (isDev && synthesized > 0) {
764
+ console.log(`🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`);
765
+ }
766
+ // Free import resolution context — suffix index + resolve cache no longer needed
767
+ // (allPathObjects and importCtx hold ~94MB+ for large repos)
768
+ allPathObjects.length = 0;
769
+ importCtx.resolveCache.clear();
770
+ importCtx.index = EMPTY_INDEX; // Release suffix index memory (~30MB for large repos)
771
+ importCtx.normalizedFileList = [];
772
+ return { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries };
773
+ }
774
+ /**
775
+ * Post-parse graph analysis: MRO, community detection, process extraction.
776
+ *
777
+ * @reads graph (all nodes and relationships from parse + resolve phases)
778
+ * @writes graph (Community nodes, Process nodes, MEMBER_OF edges, STEP_IN_PROCESS edges, OVERRIDES edges)
779
+ */
780
+ async function runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs) {
781
+ // ── Phase 4.5: Method Resolution Order ──────────────────────────────
782
+ onProgress({
783
+ phase: 'parsing',
784
+ percent: 81,
785
+ message: 'Computing method resolution order...',
786
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
787
+ });
788
+ const mroResult = computeMRO(graph);
789
+ if (isDev && mroResult.entries.length > 0) {
790
+ console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
791
+ }
792
+ // ── Phase 5: Communities ───────────────────────────────────────────
793
+ onProgress({
794
+ phase: 'communities',
795
+ percent: 82,
796
+ message: 'Detecting code communities...',
797
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
798
+ });
799
+ const communityResult = await processCommunities(graph, (message, progress) => {
800
+ const communityProgress = 82 + (progress * 0.10);
801
+ onProgress({
802
+ phase: 'communities',
803
+ percent: Math.round(communityProgress),
804
+ message,
805
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
806
+ });
807
+ });
808
+ if (isDev) {
809
+ console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
810
+ }
811
+ communityResult.communities.forEach(comm => {
812
+ graph.addNode({
813
+ id: comm.id,
814
+ label: 'Community',
815
+ properties: {
816
+ name: comm.label,
817
+ filePath: '',
818
+ heuristicLabel: comm.heuristicLabel,
819
+ cohesion: comm.cohesion,
820
+ symbolCount: comm.symbolCount,
821
+ }
822
+ });
823
+ });
824
+ communityResult.memberships.forEach(membership => {
825
+ graph.addRelationship({
826
+ id: `${membership.nodeId}_member_of_${membership.communityId}`,
827
+ type: 'MEMBER_OF',
828
+ sourceId: membership.nodeId,
829
+ targetId: membership.communityId,
830
+ confidence: 1.0,
831
+ reason: 'leiden-algorithm',
832
+ });
833
+ });
834
+ // ── Phase 6: Processes ─────────────────────────────────────────────
835
+ onProgress({
836
+ phase: 'processes',
837
+ percent: 94,
838
+ message: 'Detecting execution flows...',
839
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
840
+ });
841
+ let symbolCount = 0;
842
+ graph.forEachNode(n => { if (n.label !== 'File')
843
+ symbolCount++; });
844
+ const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
845
+ const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
846
+ const processProgress = 94 + (progress * 0.05);
847
+ onProgress({
848
+ phase: 'processes',
849
+ percent: Math.round(processProgress),
850
+ message,
851
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
852
+ });
853
+ }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
854
+ if (isDev) {
855
+ console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
856
+ }
857
+ processResult.processes.forEach(proc => {
858
+ graph.addNode({
859
+ id: proc.id,
860
+ label: 'Process',
861
+ properties: {
862
+ name: proc.label,
863
+ filePath: '',
864
+ heuristicLabel: proc.heuristicLabel,
865
+ processType: proc.processType,
866
+ stepCount: proc.stepCount,
867
+ communities: proc.communities,
868
+ entryPointId: proc.entryPointId,
869
+ terminalId: proc.terminalId,
870
+ }
871
+ });
872
+ });
873
+ processResult.steps.forEach(step => {
874
+ graph.addRelationship({
875
+ id: `${step.nodeId}_step_${step.step}_${step.processId}`,
876
+ type: 'STEP_IN_PROCESS',
877
+ sourceId: step.nodeId,
878
+ targetId: step.processId,
879
+ confidence: 1.0,
880
+ reason: 'trace-detection',
881
+ step: step.step,
882
+ });
883
+ });
884
+ // Link Route and Tool nodes to Processes via reverse index (file → node id)
885
+ if ((routeRegistry?.size ?? 0) > 0 || (toolDefs?.length ?? 0) > 0) {
886
+ // Reverse indexes: file → all route URLs / tool names (handles multi-route files)
887
+ const routesByFile = new Map();
888
+ if (routeRegistry) {
889
+ for (const [url, entry] of routeRegistry) {
890
+ let list = routesByFile.get(entry.filePath);
891
+ if (!list) {
892
+ list = [];
893
+ routesByFile.set(entry.filePath, list);
670
894
  }
895
+ list.push(url);
671
896
  }
672
- if (isDev && enriched > 0) {
673
- console.log(`🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`);
897
+ }
898
+ const toolsByFile = new Map();
899
+ if (toolDefs) {
900
+ for (const td of toolDefs) {
901
+ let list = toolsByFile.get(td.filePath);
902
+ if (!list) {
903
+ list = [];
904
+ toolsByFile.set(td.filePath, list);
905
+ }
906
+ list.push(td.name);
674
907
  }
675
908
  }
676
- // ── Phase 14 pre-pass: Final synthesis pass for whole-module-import languages ──
677
- // Per-chunk synthesis (above) already ran incrementally. This final pass ensures
678
- // any remaining files whose imports were not covered inline are also synthesized,
679
- // and that Phase 14 type propagation has complete namedImportMap data.
680
- const synthesized = synthesizeWildcardImportBindings(graph, ctx);
681
- if (isDev && synthesized > 0) {
682
- console.log(`🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`);
909
+ let linked = 0;
910
+ for (const proc of processResult.processes) {
911
+ if (!proc.entryPointId)
912
+ continue;
913
+ const entryNode = graph.getNode(proc.entryPointId);
914
+ if (!entryNode)
915
+ continue;
916
+ const entryFile = entryNode.properties.filePath;
917
+ if (!entryFile)
918
+ continue;
919
+ const routeURLs = routesByFile.get(entryFile);
920
+ if (routeURLs) {
921
+ for (const routeURL of routeURLs) {
922
+ const routeNodeId = generateId('Route', routeURL);
923
+ graph.addRelationship({
924
+ id: generateId('ENTRY_POINT_OF', `${routeNodeId}->${proc.id}`),
925
+ sourceId: routeNodeId,
926
+ targetId: proc.id,
927
+ type: 'ENTRY_POINT_OF',
928
+ confidence: 0.85,
929
+ reason: 'route-handler-entry-point',
930
+ });
931
+ linked++;
932
+ }
933
+ }
934
+ const toolNames = toolsByFile.get(entryFile);
935
+ if (toolNames) {
936
+ for (const toolName of toolNames) {
937
+ const toolNodeId = generateId('Tool', toolName);
938
+ graph.addRelationship({
939
+ id: generateId('ENTRY_POINT_OF', `${toolNodeId}->${proc.id}`),
940
+ sourceId: toolNodeId,
941
+ targetId: proc.id,
942
+ type: 'ENTRY_POINT_OF',
943
+ confidence: 0.85,
944
+ reason: 'tool-handler-entry-point',
945
+ });
946
+ linked++;
947
+ }
948
+ }
683
949
  }
684
- // ── Phase 14: Cross-file binding propagation ──────────────────────
685
- await runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
686
- // Free import resolution context — suffix index + resolve cache no longer needed
687
- // (allPathObjects and importCtx hold ~94MB+ for large repos)
688
- allPathObjects.length = 0;
689
- importCtx.resolveCache.clear();
690
- importCtx.index = EMPTY_INDEX; // Release suffix index memory (~30MB for large repos)
691
- importCtx.normalizedFileList = [];
692
- let communityResult;
693
- let processResult;
694
- if (!options?.skipGraphPhases) {
695
- // ── Phase 4.5: Method Resolution Order ──────────────────────────────
696
- onProgress({
697
- phase: 'parsing',
698
- percent: 81,
699
- message: 'Computing method resolution order...',
700
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
701
- });
702
- const mroResult = computeMRO(graph);
703
- if (isDev && mroResult.entries.length > 0) {
704
- console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
705
- }
706
- // ── Phase 5: Communities ───────────────────────────────────────────
707
- onProgress({
708
- phase: 'communities',
709
- percent: 82,
710
- message: 'Detecting code communities...',
711
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
712
- });
713
- communityResult = await processCommunities(graph, (message, progress) => {
714
- const communityProgress = 82 + (progress * 0.10);
715
- onProgress({
716
- phase: 'communities',
717
- percent: Math.round(communityProgress),
718
- message,
719
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
720
- });
721
- });
722
- if (isDev) {
723
- console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
950
+ if (isDev && linked > 0) {
951
+ console.log(`🔗 Linked ${linked} Route/Tool nodes to execution flows`);
952
+ }
953
+ }
954
+ return { communityResult, processResult };
955
+ }
956
+ // ── Pipeline orchestrator ─────────────────────────────────────────────────
957
+ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
958
+ const graph = createKnowledgeGraph();
959
+ const ctx = createResolutionContext();
960
+ const pipelineStart = Date.now();
961
+ try {
962
+ // Phase 1+2: Scan paths, build structure, process markdown
963
+ const { scannedFiles, allPaths, totalFiles } = await runScanAndStructure(repoPath, graph, onProgress);
964
+ // Phase 3+4: Chunked parse + resolve (imports, calls, heritage, routes)
965
+ const { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries } = await runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
966
+ const routeRegistry = new Map();
967
+ // Detect Expo Router app/ roots vs Next.js app/ roots (monorepo-safe).
968
+ const expoAppRoots = new Set();
969
+ const nextjsAppRoots = new Set();
970
+ const expoAppPaths = new Set();
971
+ for (const p of allPaths) {
972
+ const norm = p.replace(/\\/g, '/');
973
+ const appIdx = norm.lastIndexOf('app/');
974
+ if (appIdx < 0)
975
+ continue;
976
+ const root = norm.slice(0, appIdx + 4);
977
+ if (/\/_layout\.(tsx?|jsx?)$/.test(norm))
978
+ expoAppRoots.add(root);
979
+ if (/\/page\.(tsx?|jsx?)$/.test(norm))
980
+ nextjsAppRoots.add(root);
981
+ }
982
+ for (const root of nextjsAppRoots)
983
+ expoAppRoots.delete(root);
984
+ if (expoAppRoots.size > 0) {
985
+ for (const p of allPaths) {
986
+ const norm = p.replace(/\\/g, '/');
987
+ const appIdx = norm.lastIndexOf('app/');
988
+ if (appIdx >= 0 && expoAppRoots.has(norm.slice(0, appIdx + 4)))
989
+ expoAppPaths.add(p);
990
+ }
991
+ }
992
+ for (const p of allPaths) {
993
+ if (expoAppPaths.has(p)) {
994
+ const expoURL = expoFileToRouteURL(p);
995
+ if (expoURL && !routeRegistry.has(expoURL)) {
996
+ routeRegistry.set(expoURL, { filePath: p, source: 'expo-filesystem-route' });
997
+ continue;
998
+ }
999
+ }
1000
+ const nextjsURL = nextjsFileToRouteURL(p);
1001
+ if (nextjsURL && !routeRegistry.has(nextjsURL)) {
1002
+ routeRegistry.set(nextjsURL, { filePath: p, source: 'nextjs-filesystem-route' });
1003
+ continue;
724
1004
  }
725
- communityResult.communities.forEach(comm => {
1005
+ if (p.endsWith('.php')) {
1006
+ const phpURL = phpFileToRouteURL(p);
1007
+ if (phpURL && !routeRegistry.has(phpURL)) {
1008
+ routeRegistry.set(phpURL, { filePath: p, source: 'php-file-route' });
1009
+ }
1010
+ }
1011
+ }
1012
+ const ensureSlash = (path) => path.startsWith('/') ? path : '/' + path;
1013
+ let duplicateRoutes = 0;
1014
+ const addRoute = (url, entry) => {
1015
+ if (routeRegistry.has(url)) {
1016
+ duplicateRoutes++;
1017
+ return;
1018
+ }
1019
+ routeRegistry.set(url, entry);
1020
+ };
1021
+ for (const route of allExtractedRoutes) {
1022
+ if (!route.routePath)
1023
+ continue;
1024
+ addRoute(ensureSlash(route.routePath), { filePath: route.filePath, source: 'framework-route' });
1025
+ }
1026
+ for (const dr of allDecoratorRoutes) {
1027
+ addRoute(ensureSlash(dr.routePath), { filePath: dr.filePath, source: `decorator-${dr.decoratorName}` });
1028
+ }
1029
+ let handlerContents;
1030
+ if (routeRegistry.size > 0) {
1031
+ const handlerPaths = [...routeRegistry.values()].map(e => e.filePath);
1032
+ handlerContents = await readFileContents(repoPath, handlerPaths);
1033
+ for (const [routeURL, entry] of routeRegistry) {
1034
+ const { filePath: handlerPath, source: routeSource } = entry;
1035
+ const content = handlerContents.get(handlerPath);
1036
+ const { responseKeys, errorKeys } = content
1037
+ ? (handlerPath.endsWith(".php") ? extractPHPResponseShapes(content) : extractResponseShapes(content))
1038
+ : { responseKeys: undefined, errorKeys: undefined };
1039
+ const mwResult = content ? extractMiddlewareChain(content) : undefined;
1040
+ const middleware = mwResult?.chain;
1041
+ const routeNodeId = generateId('Route', routeURL);
726
1042
  graph.addNode({
727
- id: comm.id,
728
- label: 'Community',
1043
+ id: routeNodeId,
1044
+ label: 'Route',
729
1045
  properties: {
730
- name: comm.label,
731
- filePath: '',
732
- heuristicLabel: comm.heuristicLabel,
733
- cohesion: comm.cohesion,
734
- symbolCount: comm.symbolCount,
735
- }
1046
+ name: routeURL,
1047
+ filePath: handlerPath,
1048
+ ...(responseKeys ? { responseKeys } : {}),
1049
+ ...(errorKeys ? { errorKeys } : {}),
1050
+ ...(middleware && middleware.length > 0 ? { middleware } : {}),
1051
+ },
736
1052
  });
737
- });
738
- communityResult.memberships.forEach(membership => {
1053
+ const handlerFileId = generateId('File', handlerPath);
739
1054
  graph.addRelationship({
740
- id: `${membership.nodeId}_member_of_${membership.communityId}`,
741
- type: 'MEMBER_OF',
742
- sourceId: membership.nodeId,
743
- targetId: membership.communityId,
1055
+ id: generateId('HANDLES_ROUTE', `${handlerFileId}->${routeNodeId}`),
1056
+ sourceId: handlerFileId,
1057
+ targetId: routeNodeId,
1058
+ type: 'HANDLES_ROUTE',
744
1059
  confidence: 1.0,
745
- reason: 'leiden-algorithm',
746
- });
747
- });
748
- // ── Phase 6: Processes ─────────────────────────────────────────────
749
- onProgress({
750
- phase: 'processes',
751
- percent: 94,
752
- message: 'Detecting execution flows...',
753
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
754
- });
755
- let symbolCount = 0;
756
- graph.forEachNode(n => { if (n.label !== 'File')
757
- symbolCount++; });
758
- const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
759
- processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
760
- const processProgress = 94 + (progress * 0.05);
761
- onProgress({
762
- phase: 'processes',
763
- percent: Math.round(processProgress),
764
- message,
765
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
1060
+ reason: routeSource,
766
1061
  });
767
- }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
1062
+ }
768
1063
  if (isDev) {
769
- console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
1064
+ console.log(`🗺️ Route registry: ${routeRegistry.size} routes${duplicateRoutes > 0 ? ` (${duplicateRoutes} duplicate URLs skipped)` : ''}`);
770
1065
  }
771
- processResult.processes.forEach(proc => {
772
- graph.addNode({
773
- id: proc.id,
774
- label: 'Process',
775
- properties: {
776
- name: proc.label,
777
- filePath: '',
778
- heuristicLabel: proc.heuristicLabel,
779
- processType: proc.processType,
780
- stepCount: proc.stepCount,
781
- communities: proc.communities,
782
- entryPointId: proc.entryPointId,
783
- terminalId: proc.terminalId,
1066
+ }
1067
+ // ── Phase 3.5b: Link Next.js project-level middleware.ts to routes ──
1068
+ if (routeRegistry.size > 0) {
1069
+ const middlewareCandidates = allPaths.filter(p => p === 'middleware.ts' || p === 'middleware.js' || p === 'middleware.tsx' || p === 'middleware.jsx' ||
1070
+ p === 'src/middleware.ts' || p === 'src/middleware.js' || p === 'src/middleware.tsx' || p === 'src/middleware.jsx');
1071
+ if (middlewareCandidates.length > 0) {
1072
+ const mwContents = await readFileContents(repoPath, middlewareCandidates);
1073
+ for (const [mwPath, mwContent] of mwContents) {
1074
+ const config = extractNextjsMiddlewareConfig(mwContent);
1075
+ if (!config)
1076
+ continue;
1077
+ const mwLabel = config.wrappedFunctions.length > 0
1078
+ ? config.wrappedFunctions
1079
+ : [config.exportedName];
1080
+ // Pre-compile matchers once per middleware file
1081
+ const compiled = config.matchers.map(compileMatcher).filter((m) => m !== null);
1082
+ let linkedCount = 0;
1083
+ for (const [routeURL] of routeRegistry) {
1084
+ const matches = compiled.length === 0 ||
1085
+ compiled.some(cm => compiledMatcherMatchesRoute(cm, routeURL));
1086
+ if (!matches)
1087
+ continue;
1088
+ const routeNodeId = generateId('Route', routeURL);
1089
+ const existing = graph.getNode(routeNodeId);
1090
+ if (!existing)
1091
+ continue;
1092
+ const currentMw = existing.properties.middleware ?? [];
1093
+ // Prepend project-level middleware (runs before handler-level wrappers)
1094
+ existing.properties.middleware = [...mwLabel, ...currentMw.filter(m => !mwLabel.includes(m))];
1095
+ linkedCount++;
784
1096
  }
1097
+ if (isDev && linkedCount > 0) {
1098
+ console.log(`🛡️ Linked ${mwPath} middleware [${mwLabel.join(', ')}] to ${linkedCount} routes`);
1099
+ }
1100
+ }
1101
+ }
1102
+ }
1103
+ // Scan HTML/PHP/template files for <form action="/path"> and AJAX url patterns
1104
+ // Scan HTML/template files for <form action="/path"> and AJAX url patterns
1105
+ // Skip .php — already parsed by tree-sitter with http_client/fetch queries
1106
+ const htmlCandidates = allPaths.filter(p => p.endsWith('.html') || p.endsWith('.htm') ||
1107
+ p.endsWith('.ejs') || p.endsWith('.hbs') || p.endsWith('.blade.php'));
1108
+ if (htmlCandidates.length > 0 && routeRegistry.size > 0) {
1109
+ const htmlContents = await readFileContents(repoPath, htmlCandidates);
1110
+ const htmlPatterns = [/action=["']([^"']+)["']/g, /url:\s*["']([^"']+)["']/g];
1111
+ for (const [filePath, content] of htmlContents) {
1112
+ for (const pattern of htmlPatterns) {
1113
+ pattern.lastIndex = 0;
1114
+ let match;
1115
+ while ((match = pattern.exec(content)) !== null) {
1116
+ const normalized = normalizeFetchURL(match[1]);
1117
+ if (normalized) {
1118
+ allFetchCalls.push({ filePath, fetchURL: normalized, lineNumber: 0 });
1119
+ }
1120
+ }
1121
+ }
1122
+ }
1123
+ }
1124
+ // ── Phase 3.5c: Extract Expo Router navigation patterns ──
1125
+ if (expoAppPaths.size > 0 && routeRegistry.size > 0) {
1126
+ const unreadExpoPaths = [...expoAppPaths].filter(p => !handlerContents?.has(p));
1127
+ const extraContents = unreadExpoPaths.length > 0 ? await readFileContents(repoPath, unreadExpoPaths) : new Map();
1128
+ const allExpoContents = new Map([...(handlerContents ?? new Map()), ...extraContents]);
1129
+ for (const [filePath, content] of allExpoContents) {
1130
+ if (!expoAppPaths.has(filePath))
1131
+ continue;
1132
+ for (const pattern of EXPO_NAV_PATTERNS) {
1133
+ pattern.lastIndex = 0;
1134
+ let match;
1135
+ while ((match = pattern.exec(content)) !== null) {
1136
+ const url = match[2] ?? match[1];
1137
+ if (url && url.startsWith('/')) {
1138
+ allFetchCalls.push({ filePath, fetchURL: url, lineNumber: 0 });
1139
+ }
1140
+ }
1141
+ }
1142
+ }
1143
+ }
1144
+ if (routeRegistry.size > 0 && allFetchCalls.length > 0) {
1145
+ const routeURLToFile = new Map();
1146
+ for (const [url, entry] of routeRegistry)
1147
+ routeURLToFile.set(url, entry.filePath);
1148
+ // Read consumer file contents so we can extract property access patterns
1149
+ const consumerPaths = [...new Set(allFetchCalls.map(c => c.filePath))];
1150
+ const consumerContents = await readFileContents(repoPath, consumerPaths);
1151
+ processNextjsFetchRoutes(graph, allFetchCalls, routeURLToFile, consumerContents);
1152
+ if (isDev) {
1153
+ console.log(`🔗 Processed ${allFetchCalls.length} fetch() calls against ${routeRegistry.size} routes`);
1154
+ }
1155
+ }
1156
+ // ── Phase 3.6: Tool Detection (MCP/RPC) ──────────────────────────
1157
+ const toolDefs = [];
1158
+ const seenToolNames = new Set();
1159
+ for (const td of allToolDefs) {
1160
+ if (seenToolNames.has(td.toolName))
1161
+ continue;
1162
+ seenToolNames.add(td.toolName);
1163
+ toolDefs.push({ name: td.toolName, filePath: td.filePath, description: td.description });
1164
+ }
1165
+ // TS tool definition arrays — require inputSchema nearby to distinguish from config objects
1166
+ const toolCandidatePaths = allPaths.filter(p => (p.endsWith('.ts') || p.endsWith('.js')) && p.toLowerCase().includes('tool')
1167
+ && !p.includes('node_modules') && !p.includes('test') && !p.includes('__'));
1168
+ if (toolCandidatePaths.length > 0) {
1169
+ const toolContents = await readFileContents(repoPath, toolCandidatePaths);
1170
+ for (const [filePath, content] of toolContents) {
1171
+ // Only scan files that contain 'inputSchema' — this is the MCP tool signature
1172
+ if (!content.includes('inputSchema'))
1173
+ continue;
1174
+ const toolPattern = /name:\s*['"](\w+)['"]\s*,\s*\n?\s*description:\s*[`'"]([\s\S]*?)[`'"]/g;
1175
+ let match;
1176
+ while ((match = toolPattern.exec(content)) !== null) {
1177
+ const name = match[1];
1178
+ if (seenToolNames.has(name))
1179
+ continue;
1180
+ seenToolNames.add(name);
1181
+ toolDefs.push({ name, filePath, description: match[2].slice(0, 200).replace(/\n/g, ' ').trim() });
1182
+ }
1183
+ }
1184
+ }
1185
+ // Create Tool nodes and HANDLES_TOOL edges
1186
+ if (toolDefs.length > 0) {
1187
+ for (const td of toolDefs) {
1188
+ const toolNodeId = generateId('Tool', td.name);
1189
+ graph.addNode({
1190
+ id: toolNodeId,
1191
+ label: 'Tool',
1192
+ properties: { name: td.name, filePath: td.filePath, description: td.description },
785
1193
  });
786
- });
787
- processResult.steps.forEach(step => {
1194
+ const handlerFileId = generateId('File', td.filePath);
788
1195
  graph.addRelationship({
789
- id: `${step.nodeId}_step_${step.step}_${step.processId}`,
790
- type: 'STEP_IN_PROCESS',
791
- sourceId: step.nodeId,
792
- targetId: step.processId,
1196
+ id: generateId('HANDLES_TOOL', `${handlerFileId}->${toolNodeId}`),
1197
+ sourceId: handlerFileId,
1198
+ targetId: toolNodeId,
1199
+ type: 'HANDLES_TOOL',
793
1200
  confidence: 1.0,
794
- reason: 'trace-detection',
795
- step: step.step,
1201
+ reason: 'tool-definition',
796
1202
  });
797
- });
1203
+ }
1204
+ if (isDev) {
1205
+ console.log(`🔧 Tool registry: ${toolDefs.length} tools detected`);
1206
+ }
1207
+ }
1208
+ // ── Phase 3.7: ORM Dataflow Detection (Prisma + Supabase) ──────────
1209
+ if (allORMQueries.length > 0) {
1210
+ processORMQueries(graph, allORMQueries, isDev);
1211
+ }
1212
+ // ── Phase 14: Cross-file binding propagation (topological level sort) ──
1213
+ await runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
1214
+ // Post-parse graph analysis (MRO, communities, processes)
1215
+ let communityResult;
1216
+ let processResult;
1217
+ if (!options?.skipGraphPhases) {
1218
+ const graphResults = await runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs);
1219
+ communityResult = graphResults.communityResult;
1220
+ processResult = graphResults.processResult;
798
1221
  }
799
1222
  onProgress({
800
1223
  phase: 'complete',
@@ -808,11 +1231,89 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
808
1231
  nodesCreated: graph.nodeCount
809
1232
  },
810
1233
  });
811
- astCache.clear();
812
1234
  return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
813
1235
  }
814
1236
  catch (error) {
815
- cleanup();
1237
+ ctx.clear();
816
1238
  throw error;
817
1239
  }
818
1240
  };
1241
+ // Inline ORM regex extraction (avoids importing parse-worker which has worker-only code)
1242
+ const PRISMA_QUERY_RE = /\bprisma\.(\w+)\.(findMany|findFirst|findUnique|findUniqueOrThrow|findFirstOrThrow|create|createMany|update|updateMany|delete|deleteMany|upsert|count|aggregate|groupBy)\s*\(/g;
1243
+ const SUPABASE_QUERY_RE = /\bsupabase\.from\s*\(\s*['"](\w+)['"]\s*\)\s*\.(select|insert|update|delete|upsert)\s*\(/g;
1244
+ function extractORMQueriesInline(filePath, content, out) {
1245
+ const hasPrisma = content.includes('prisma.');
1246
+ const hasSupabase = content.includes('supabase.from');
1247
+ if (!hasPrisma && !hasSupabase)
1248
+ return;
1249
+ if (hasPrisma) {
1250
+ PRISMA_QUERY_RE.lastIndex = 0;
1251
+ let m;
1252
+ while ((m = PRISMA_QUERY_RE.exec(content)) !== null) {
1253
+ const model = m[1];
1254
+ if (model.startsWith('$'))
1255
+ continue;
1256
+ out.push({ filePath, orm: 'prisma', model, method: m[2], lineNumber: content.substring(0, m.index).split('\n').length - 1 });
1257
+ }
1258
+ }
1259
+ if (hasSupabase) {
1260
+ SUPABASE_QUERY_RE.lastIndex = 0;
1261
+ let m;
1262
+ while ((m = SUPABASE_QUERY_RE.exec(content)) !== null) {
1263
+ out.push({ filePath, orm: 'supabase', model: m[1], method: m[2], lineNumber: content.substring(0, m.index).split('\n').length - 1 });
1264
+ }
1265
+ }
1266
+ }
1267
+ // ============================================================================
1268
+ // ORM Query Processing — creates QUERIES edges from callers to model nodes
1269
+ // ============================================================================
1270
+ function processORMQueries(graph, queries, isDev) {
1271
+ const modelNodes = new Map();
1272
+ const seenEdges = new Set();
1273
+ let edgesCreated = 0;
1274
+ for (const q of queries) {
1275
+ const modelKey = `${q.orm}:${q.model}`;
1276
+ let modelNodeId = modelNodes.get(modelKey);
1277
+ if (!modelNodeId) {
1278
+ const candidateIds = [
1279
+ generateId('Class', `${q.model}`),
1280
+ generateId('Interface', `${q.model}`),
1281
+ generateId('CodeElement', `${q.model}`),
1282
+ ];
1283
+ const existing = candidateIds.find(id => graph.getNode(id));
1284
+ if (existing) {
1285
+ modelNodeId = existing;
1286
+ }
1287
+ else {
1288
+ modelNodeId = generateId('CodeElement', `${q.orm}:${q.model}`);
1289
+ graph.addNode({
1290
+ id: modelNodeId,
1291
+ label: 'CodeElement',
1292
+ properties: {
1293
+ name: q.model,
1294
+ filePath: '',
1295
+ description: `${q.orm} model/table: ${q.model}`,
1296
+ },
1297
+ });
1298
+ }
1299
+ modelNodes.set(modelKey, modelNodeId);
1300
+ }
1301
+ const fileId = generateId('File', q.filePath);
1302
+ const edgeKey = `${fileId}->${modelNodeId}:${q.method}`;
1303
+ if (seenEdges.has(edgeKey))
1304
+ continue;
1305
+ seenEdges.add(edgeKey);
1306
+ graph.addRelationship({
1307
+ id: generateId('QUERIES', edgeKey),
1308
+ sourceId: fileId,
1309
+ targetId: modelNodeId,
1310
+ type: 'QUERIES',
1311
+ confidence: 0.9,
1312
+ reason: `${q.orm}-${q.method}`,
1313
+ });
1314
+ edgesCreated++;
1315
+ }
1316
+ if (isDev) {
1317
+ console.log(`ORM dataflow: ${edgesCreated} QUERIES edges, ${modelNodes.size} models (${queries.length} total calls)`);
1318
+ }
1319
+ }