gitnexus 1.5.3 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/README.md +10 -0
  2. package/dist/_shared/graph/types.d.ts +1 -1
  3. package/dist/_shared/graph/types.d.ts.map +1 -1
  4. package/dist/_shared/index.d.ts +1 -0
  5. package/dist/_shared/index.d.ts.map +1 -1
  6. package/dist/_shared/language-detection.d.ts.map +1 -1
  7. package/dist/_shared/language-detection.js +2 -0
  8. package/dist/_shared/language-detection.js.map +1 -1
  9. package/dist/_shared/languages.d.ts +1 -0
  10. package/dist/_shared/languages.d.ts.map +1 -1
  11. package/dist/_shared/languages.js +1 -0
  12. package/dist/_shared/languages.js.map +1 -1
  13. package/dist/_shared/lbug/schema-constants.d.ts +1 -1
  14. package/dist/_shared/lbug/schema-constants.d.ts.map +1 -1
  15. package/dist/_shared/lbug/schema-constants.js +3 -1
  16. package/dist/_shared/lbug/schema-constants.js.map +1 -1
  17. package/dist/_shared/mro-strategy.d.ts +19 -0
  18. package/dist/_shared/mro-strategy.d.ts.map +1 -0
  19. package/dist/_shared/mro-strategy.js +2 -0
  20. package/dist/_shared/mro-strategy.js.map +1 -0
  21. package/dist/cli/ai-context.d.ts +1 -0
  22. package/dist/cli/ai-context.js +28 -4
  23. package/dist/cli/analyze.d.ts +2 -0
  24. package/dist/cli/analyze.js +30 -4
  25. package/dist/cli/group.d.ts +2 -0
  26. package/dist/cli/group.js +233 -0
  27. package/dist/cli/index.js +3 -0
  28. package/dist/cli/serve.js +4 -1
  29. package/dist/cli/setup.js +34 -3
  30. package/dist/config/ignore-service.js +8 -3
  31. package/dist/core/augmentation/engine.js +1 -1
  32. package/dist/core/git-staleness.d.ts +13 -0
  33. package/dist/core/git-staleness.js +29 -0
  34. package/dist/core/group/bridge-db.d.ts +82 -0
  35. package/dist/core/group/bridge-db.js +460 -0
  36. package/dist/core/group/bridge-schema.d.ts +27 -0
  37. package/dist/core/group/bridge-schema.js +55 -0
  38. package/dist/core/group/config-parser.d.ts +3 -0
  39. package/dist/core/group/config-parser.js +83 -0
  40. package/dist/core/group/contract-extractor.d.ts +7 -0
  41. package/dist/core/group/contract-extractor.js +1 -0
  42. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  43. package/dist/core/group/extractors/fs-utils.js +24 -0
  44. package/dist/core/group/extractors/grpc-extractor.d.ts +25 -0
  45. package/dist/core/group/extractors/grpc-extractor.js +386 -0
  46. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  47. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  48. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  49. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  50. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  51. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  52. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  53. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  54. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  55. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  56. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  57. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  58. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  59. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  60. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  61. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  62. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  63. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  64. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  65. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  66. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  67. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  68. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  69. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  70. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  71. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  72. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  73. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  74. package/dist/core/group/extractors/http-route-extractor.d.ts +21 -0
  75. package/dist/core/group/extractors/http-route-extractor.js +391 -0
  76. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  77. package/dist/core/group/extractors/manifest-extractor.js +235 -0
  78. package/dist/core/group/extractors/topic-extractor.d.ts +8 -0
  79. package/dist/core/group/extractors/topic-extractor.js +97 -0
  80. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  81. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  82. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  83. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  84. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  85. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  86. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  87. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  88. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  89. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  90. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  91. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  92. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  93. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  94. package/dist/core/group/matching.d.ts +13 -0
  95. package/dist/core/group/matching.js +198 -0
  96. package/dist/core/group/normalization.d.ts +3 -0
  97. package/dist/core/group/normalization.js +115 -0
  98. package/dist/core/group/service-boundary-detector.d.ts +8 -0
  99. package/dist/core/group/service-boundary-detector.js +155 -0
  100. package/dist/core/group/service.d.ts +46 -0
  101. package/dist/core/group/service.js +160 -0
  102. package/dist/core/group/storage.d.ts +9 -0
  103. package/dist/core/group/storage.js +91 -0
  104. package/dist/core/group/sync.d.ts +21 -0
  105. package/dist/core/group/sync.js +148 -0
  106. package/dist/core/group/types.d.ts +130 -0
  107. package/dist/core/group/types.js +1 -0
  108. package/dist/core/ingestion/binding-accumulator.d.ts +212 -0
  109. package/dist/core/ingestion/binding-accumulator.js +336 -0
  110. package/dist/core/ingestion/call-processor.d.ts +155 -24
  111. package/dist/core/ingestion/call-processor.js +1129 -247
  112. package/dist/core/ingestion/class-extractors/generic.d.ts +2 -0
  113. package/dist/core/ingestion/class-extractors/generic.js +135 -0
  114. package/dist/core/ingestion/class-types.d.ts +34 -0
  115. package/dist/core/ingestion/class-types.js +1 -0
  116. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  117. package/dist/core/ingestion/entry-point-scoring.d.ts +1 -0
  118. package/dist/core/ingestion/entry-point-scoring.js +1 -0
  119. package/dist/core/ingestion/field-types.d.ts +2 -2
  120. package/dist/core/ingestion/filesystem-walker.js +8 -0
  121. package/dist/core/ingestion/framework-detection.d.ts +1 -0
  122. package/dist/core/ingestion/framework-detection.js +1 -0
  123. package/dist/core/ingestion/heritage-processor.d.ts +8 -15
  124. package/dist/core/ingestion/heritage-processor.js +15 -28
  125. package/dist/core/ingestion/import-processor.d.ts +1 -11
  126. package/dist/core/ingestion/import-processor.js +1 -13
  127. package/dist/core/ingestion/import-resolvers/utils.js +1 -0
  128. package/dist/core/ingestion/import-resolvers/vue.d.ts +8 -0
  129. package/dist/core/ingestion/import-resolvers/vue.js +9 -0
  130. package/dist/core/ingestion/language-config.js +1 -1
  131. package/dist/core/ingestion/language-provider.d.ts +14 -3
  132. package/dist/core/ingestion/languages/c-cpp.js +168 -1
  133. package/dist/core/ingestion/languages/csharp.js +20 -0
  134. package/dist/core/ingestion/languages/dart.js +26 -4
  135. package/dist/core/ingestion/languages/go.js +22 -0
  136. package/dist/core/ingestion/languages/index.d.ts +1 -0
  137. package/dist/core/ingestion/languages/index.js +2 -0
  138. package/dist/core/ingestion/languages/java.js +17 -0
  139. package/dist/core/ingestion/languages/kotlin.js +24 -1
  140. package/dist/core/ingestion/languages/php.js +23 -11
  141. package/dist/core/ingestion/languages/python.js +9 -0
  142. package/dist/core/ingestion/languages/ruby.js +43 -0
  143. package/dist/core/ingestion/languages/rust.js +38 -0
  144. package/dist/core/ingestion/languages/swift.js +31 -0
  145. package/dist/core/ingestion/languages/typescript.d.ts +1 -0
  146. package/dist/core/ingestion/languages/typescript.js +52 -3
  147. package/dist/core/ingestion/languages/vue.d.ts +13 -0
  148. package/dist/core/ingestion/languages/vue.js +81 -0
  149. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  150. package/dist/core/ingestion/method-extractors/configs/c-cpp.d.ts +3 -0
  151. package/dist/core/ingestion/method-extractors/configs/c-cpp.js +387 -0
  152. package/dist/core/ingestion/method-extractors/configs/csharp.js +5 -1
  153. package/dist/core/ingestion/method-extractors/configs/dart.d.ts +2 -0
  154. package/dist/core/ingestion/method-extractors/configs/dart.js +376 -0
  155. package/dist/core/ingestion/method-extractors/configs/go.d.ts +2 -0
  156. package/dist/core/ingestion/method-extractors/configs/go.js +176 -0
  157. package/dist/core/ingestion/method-extractors/configs/jvm.js +14 -4
  158. package/dist/core/ingestion/method-extractors/configs/php.d.ts +2 -0
  159. package/dist/core/ingestion/method-extractors/configs/php.js +304 -0
  160. package/dist/core/ingestion/method-extractors/configs/python.d.ts +2 -0
  161. package/dist/core/ingestion/method-extractors/configs/python.js +309 -0
  162. package/dist/core/ingestion/method-extractors/configs/ruby.d.ts +2 -0
  163. package/dist/core/ingestion/method-extractors/configs/ruby.js +286 -0
  164. package/dist/core/ingestion/method-extractors/configs/rust.d.ts +2 -0
  165. package/dist/core/ingestion/method-extractors/configs/rust.js +195 -0
  166. package/dist/core/ingestion/method-extractors/configs/swift.d.ts +2 -0
  167. package/dist/core/ingestion/method-extractors/configs/swift.js +277 -0
  168. package/dist/core/ingestion/method-extractors/configs/typescript-javascript.js +85 -8
  169. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  170. package/dist/core/ingestion/method-extractors/generic.js +84 -17
  171. package/dist/core/ingestion/method-types.d.ts +29 -0
  172. package/dist/core/ingestion/model/field-registry.d.ts +18 -0
  173. package/dist/core/ingestion/model/field-registry.js +22 -0
  174. package/dist/core/ingestion/model/heritage-map.d.ts +70 -0
  175. package/dist/core/ingestion/model/heritage-map.js +159 -0
  176. package/dist/core/ingestion/model/index.d.ts +20 -0
  177. package/dist/core/ingestion/model/index.js +41 -0
  178. package/dist/core/ingestion/model/method-registry.d.ts +62 -0
  179. package/dist/core/ingestion/model/method-registry.js +130 -0
  180. package/dist/core/ingestion/model/registration-table.d.ts +139 -0
  181. package/dist/core/ingestion/model/registration-table.js +224 -0
  182. package/dist/core/ingestion/model/resolution-context.d.ts +93 -0
  183. package/dist/core/ingestion/model/resolution-context.js +337 -0
  184. package/dist/core/ingestion/model/resolve.d.ts +56 -0
  185. package/dist/core/ingestion/model/resolve.js +297 -0
  186. package/dist/core/ingestion/model/semantic-model.d.ts +86 -0
  187. package/dist/core/ingestion/model/semantic-model.js +120 -0
  188. package/dist/core/ingestion/model/symbol-table.d.ts +222 -0
  189. package/dist/core/ingestion/model/symbol-table.js +206 -0
  190. package/dist/core/ingestion/model/type-registry.d.ts +39 -0
  191. package/dist/core/ingestion/model/type-registry.js +62 -0
  192. package/dist/core/ingestion/mro-processor.d.ts +5 -4
  193. package/dist/core/ingestion/mro-processor.js +311 -107
  194. package/dist/core/ingestion/parsing-processor.d.ts +5 -4
  195. package/dist/core/ingestion/parsing-processor.js +224 -87
  196. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  197. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  198. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  199. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  200. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  201. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  202. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  203. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  204. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  205. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  206. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  207. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  208. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  209. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  210. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  211. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  212. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  213. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  214. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  215. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  216. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  217. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  218. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  219. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  220. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  221. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  222. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  223. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  224. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  225. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  226. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  227. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  228. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  229. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  230. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  231. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  232. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +35 -0
  233. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +174 -0
  234. package/dist/core/ingestion/pipeline.d.ts +18 -10
  235. package/dist/core/ingestion/pipeline.js +66 -1410
  236. package/dist/core/ingestion/process-processor.js +1 -1
  237. package/dist/core/ingestion/tree-sitter-queries.d.ts +5 -5
  238. package/dist/core/ingestion/tree-sitter-queries.js +90 -0
  239. package/dist/core/ingestion/type-env.d.ts +15 -2
  240. package/dist/core/ingestion/type-env.js +163 -102
  241. package/dist/core/ingestion/type-extractors/csharp.js +17 -0
  242. package/dist/core/ingestion/type-extractors/jvm.js +11 -0
  243. package/dist/core/ingestion/type-extractors/php.js +0 -55
  244. package/dist/core/ingestion/type-extractors/ruby.js +0 -32
  245. package/dist/core/ingestion/type-extractors/swift.js +13 -0
  246. package/dist/core/ingestion/type-extractors/types.d.ts +8 -8
  247. package/dist/core/ingestion/type-extractors/typescript.js +66 -69
  248. package/dist/core/ingestion/utils/ast-helpers.d.ts +32 -44
  249. package/dist/core/ingestion/utils/ast-helpers.js +157 -573
  250. package/dist/core/ingestion/utils/env.d.ts +10 -0
  251. package/dist/core/ingestion/utils/env.js +10 -0
  252. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  253. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  254. package/dist/core/ingestion/utils/method-props.d.ts +32 -0
  255. package/dist/core/ingestion/utils/method-props.js +147 -0
  256. package/dist/core/ingestion/vue-sfc-extractor.d.ts +44 -0
  257. package/dist/core/ingestion/vue-sfc-extractor.js +94 -0
  258. package/dist/core/ingestion/workers/parse-worker.d.ts +31 -19
  259. package/dist/core/ingestion/workers/parse-worker.js +469 -200
  260. package/dist/core/lbug/lbug-adapter.d.ts +6 -0
  261. package/dist/core/lbug/lbug-adapter.js +134 -27
  262. package/dist/core/lbug/pool-adapter.d.ts +76 -0
  263. package/dist/core/lbug/pool-adapter.js +522 -0
  264. package/dist/core/run-analyze.d.ts +2 -0
  265. package/dist/core/run-analyze.js +1 -1
  266. package/dist/core/search/bm25-index.js +1 -1
  267. package/dist/core/tree-sitter/parser-loader.js +1 -0
  268. package/dist/core/wiki/graph-queries.js +1 -1
  269. package/dist/mcp/core/embedder.js +6 -5
  270. package/dist/mcp/core/lbug-adapter.d.ts +3 -63
  271. package/dist/mcp/core/lbug-adapter.js +3 -484
  272. package/dist/mcp/local/local-backend.d.ts +31 -2
  273. package/dist/mcp/local/local-backend.js +255 -46
  274. package/dist/mcp/resources.js +5 -4
  275. package/dist/mcp/staleness.d.ts +3 -13
  276. package/dist/mcp/staleness.js +2 -31
  277. package/dist/mcp/tools.js +80 -4
  278. package/dist/server/analyze-job.d.ts +2 -0
  279. package/dist/server/analyze-job.js +4 -0
  280. package/dist/server/api.d.ts +20 -1
  281. package/dist/server/api.js +306 -71
  282. package/dist/server/git-clone.d.ts +2 -1
  283. package/dist/server/git-clone.js +98 -5
  284. package/dist/storage/git.d.ts +13 -0
  285. package/dist/storage/git.js +25 -0
  286. package/dist/storage/repo-manager.js +1 -1
  287. package/package.json +9 -3
  288. package/scripts/patch-tree-sitter-swift.cjs +78 -0
  289. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  290. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  291. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  292. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  293. package/vendor/tree-sitter-proto/package.json +18 -0
  294. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  295. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  296. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  297. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  298. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
  299. package/dist/core/ingestion/named-binding-processor.d.ts +0 -18
  300. package/dist/core/ingestion/named-binding-processor.js +0 -42
  301. package/dist/core/ingestion/resolution-context.d.ts +0 -58
  302. package/dist/core/ingestion/resolution-context.js +0 -135
  303. package/dist/core/ingestion/symbol-table.d.ts +0 -79
  304. package/dist/core/ingestion/symbol-table.js +0 -115
@@ -1,1425 +1,81 @@
1
- import { createKnowledgeGraph } from '../graph/graph.js';
2
- import { processStructure } from './structure-processor.js';
3
- import { processMarkdown } from './markdown-processor.js';
4
- import { processCobol, isCobolFile, isJclFile } from './cobol-processor.js';
5
- import { processParsing } from './parsing-processor.js';
6
- import { processImports, processImportsFromExtracted, buildImportResolutionContext, } from './import-processor.js';
7
- import { EMPTY_INDEX } from './import-resolvers/utils.js';
8
- import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted, processNextjsFetchRoutes, extractFetchCallsFromFiles, seedCrossFileReceiverTypes, buildImportedReturnTypes, buildImportedRawReturnTypes, buildExportedTypeMapFromGraph, buildImplementorMap, mergeImplementorMaps, } from './call-processor.js';
9
- import { nextjsFileToRouteURL, normalizeFetchURL } from './route-extractors/nextjs.js';
10
- import { expoFileToRouteURL } from './route-extractors/expo.js';
11
- import { phpFileToRouteURL } from './route-extractors/php.js';
12
- import { extractResponseShapes, extractPHPResponseShapes, } from './route-extractors/response-shapes.js';
13
- import { extractMiddlewareChain, extractNextjsMiddlewareConfig, compileMatcher, compiledMatcherMatchesRoute, } from './route-extractors/middleware.js';
14
- import { generateId } from '../../lib/utils.js';
15
- import { processHeritage, processHeritageFromExtracted, extractExtractedHeritageFromFiles, } from './heritage-processor.js';
16
- import { computeMRO } from './mro-processor.js';
17
- import { processCommunities } from './community-processor.js';
18
- import { processProcesses } from './process-processor.js';
19
- import { createResolutionContext } from './resolution-context.js';
20
- import { createASTCache } from './ast-cache.js';
21
- import { getLanguageFromFilename } from '../../_shared/index.js';
22
- import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
23
- import { isLanguageAvailable } from '../tree-sitter/parser-loader.js';
24
- import { providers, getProviderForFile } from './languages/index.js';
25
- import { createWorkerPool } from './workers/worker-pool.js';
26
- import fs from 'node:fs';
27
- import path from 'node:path';
28
- import { fileURLToPath, pathToFileURL } from 'node:url';
29
- const isDev = process.env.NODE_ENV === 'development';
30
- const EXPO_NAV_PATTERNS = [
31
- /router\.(push|replace|navigate)\(\s*['"`]([^'"`]+)['"`]/g,
32
- /<Link\s+[^>]*href=\s*['"`]([^'"`]+)['"`]/g,
33
- ];
34
- /** Kahn's algorithm: returns files grouped by topological level.
35
- * Files in the same level have no mutual dependencies — safe to process in parallel.
36
- * Files in cycles are returned as a final group (no cross-cycle propagation). */
37
- export function topologicalLevelSort(importMap) {
38
- // Build in-degree map and reverse dependency map
39
- const inDegree = new Map();
40
- const reverseDeps = new Map();
41
- for (const [file, deps] of importMap) {
42
- if (!inDegree.has(file))
43
- inDegree.set(file, 0);
44
- for (const dep of deps) {
45
- if (!inDegree.has(dep))
46
- inDegree.set(dep, 0);
47
- // file imports dep, so dep must be processed before file
48
- // In Kahn's terms: dep → file (dep is a prerequisite of file)
49
- inDegree.set(file, (inDegree.get(file) ?? 0) + 1);
50
- let rev = reverseDeps.get(dep);
51
- if (!rev) {
52
- rev = [];
53
- reverseDeps.set(dep, rev);
54
- }
55
- rev.push(file);
56
- }
57
- }
58
- // BFS from zero-in-degree nodes, grouping by level
59
- const levels = [];
60
- let currentLevel = [...inDegree.entries()].filter(([, d]) => d === 0).map(([f]) => f);
61
- while (currentLevel.length > 0) {
62
- levels.push(currentLevel);
63
- const nextLevel = [];
64
- for (const file of currentLevel) {
65
- for (const dependent of reverseDeps.get(file) ?? []) {
66
- const newDeg = (inDegree.get(dependent) ?? 1) - 1;
67
- inDegree.set(dependent, newDeg);
68
- if (newDeg === 0)
69
- nextLevel.push(dependent);
70
- }
71
- }
72
- currentLevel = nextLevel;
73
- }
74
- // Files still with positive in-degree are in cycles — add as final group
75
- const cycleFiles = [...inDegree.entries()].filter(([, d]) => d > 0).map(([f]) => f);
76
- if (cycleFiles.length > 0) {
77
- levels.push(cycleFiles);
78
- }
79
- return { levels, cycleCount: cycleFiles.length };
80
- }
81
- /** Max bytes of source content to load per parse chunk. Each chunk's source +
82
- * parsed ASTs + extracted records + worker serialization overhead all live in
83
- * memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
84
- * peak working memory per chunk after parse expansion. */
85
- const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
86
- /** Max AST trees to keep in LRU cache */
87
- const AST_CACHE_CAP = 50;
88
- /** Minimum percentage of files that must benefit from cross-file seeding to justify the re-resolution pass. */
89
- const CROSS_FILE_SKIP_THRESHOLD = 0.03;
90
- /** Hard cap on files re-processed during cross-file propagation. */
91
- const MAX_CROSS_FILE_REPROCESS = 2000;
92
- /** Node labels that represent top-level importable symbols.
93
- * Excludes Method, Property, Constructor (accessed via receiver, not directly imported),
94
- * and structural labels (File, Folder, Package, Module, Project, etc.). */
95
- const IMPORTABLE_SYMBOL_LABELS = new Set([
96
- 'Function',
97
- 'Class',
98
- 'Interface',
99
- 'Struct',
100
- 'Enum',
101
- 'Trait',
102
- 'TypeAlias',
103
- 'Const',
104
- 'Static',
105
- 'Record',
106
- 'Union',
107
- 'Typedef',
108
- 'Macro',
109
- ]);
110
- /** Max synthetic bindings per importing file — prevents memory bloat for
111
- * C/C++ files that include many large headers. */
112
- const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
113
- /** Pre-computed language sets derived from providers at module load. */
114
- const WILDCARD_LANGUAGES = new Set(Object.values(providers)
115
- .filter((p) => p.importSemantics === 'wildcard')
116
- .map((p) => p.id));
117
- const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
118
- .filter((p) => p.importSemantics !== 'named')
119
- .map((p) => p.id));
120
- /** Check if a language uses wildcard (whole-module) import semantics.
121
- * Derived from LanguageProvider.importSemantics — no hardcoded set needed. */
122
- function isWildcardImportLanguage(lang) {
123
- return WILDCARD_LANGUAGES.has(lang);
124
- }
125
- /** Check if a language needs synthesis before call resolution.
126
- * True for wildcard-import languages AND namespace-import languages (Python). */
127
- function needsSynthesis(lang) {
128
- return SYNTHESIS_LANGUAGES.has(lang);
129
- }
130
- /** Synthesize namedImportMap entries for languages with whole-module imports.
131
- * These languages (Go, Ruby, C/C++, Swift, Python) import all exported symbols from a
132
- * file, not specific named symbols. After parsing, we know which symbols each file
133
- * exports (via graph isExported), so we can expand ImportMap edges into per-symbol
134
- * bindings that Phase 14 can use for cross-file type propagation. */
135
- function synthesizeWildcardImportBindings(graph, ctx) {
136
- // Pre-compute exported symbols per file from graph (single pass)
137
- const exportedSymbolsByFile = new Map();
138
- graph.forEachNode((node) => {
139
- if (!node.properties?.isExported)
140
- return;
141
- if (!IMPORTABLE_SYMBOL_LABELS.has(node.label))
142
- return;
143
- const fp = node.properties.filePath;
144
- const name = node.properties.name;
145
- if (!fp || !name)
146
- return;
147
- let symbols = exportedSymbolsByFile.get(fp);
148
- if (!symbols) {
149
- symbols = [];
150
- exportedSymbolsByFile.set(fp, symbols);
151
- }
152
- symbols.push({ name, filePath: fp });
153
- });
154
- if (exportedSymbolsByFile.size === 0)
155
- return 0;
156
- // Build a merged import map: ctx.importMap has file-based imports (Ruby, C/C++),
157
- // but Go/C# package imports use graph IMPORTS edges + PackageMap instead.
158
- // Collect graph-level IMPORTS edges for wildcard languages missing from ctx.importMap.
159
- const FILE_PREFIX = 'File:';
160
- const graphImports = new Map();
161
- graph.forEachRelationship((rel) => {
162
- if (rel.type !== 'IMPORTS')
163
- return;
164
- if (!rel.sourceId.startsWith(FILE_PREFIX) || !rel.targetId.startsWith(FILE_PREFIX))
165
- return;
166
- const srcFile = rel.sourceId.slice(FILE_PREFIX.length);
167
- const tgtFile = rel.targetId.slice(FILE_PREFIX.length);
168
- const lang = getLanguageFromFilename(srcFile);
169
- if (!lang || !isWildcardImportLanguage(lang))
170
- return;
171
- // Only add if not already in ctx.importMap (avoid duplicates)
172
- if (ctx.importMap.get(srcFile)?.has(tgtFile))
173
- return;
174
- let set = graphImports.get(srcFile);
175
- if (!set) {
176
- set = new Set();
177
- graphImports.set(srcFile, set);
178
- }
179
- set.add(tgtFile);
180
- });
181
- let totalSynthesized = 0;
182
- // Helper: synthesize bindings for a file given its imported files
183
- const synthesizeForFile = (filePath, importedFiles) => {
184
- let fileBindings = ctx.namedImportMap.get(filePath);
185
- let fileCount = fileBindings?.size ?? 0;
186
- for (const importedFile of importedFiles) {
187
- const exportedSymbols = exportedSymbolsByFile.get(importedFile);
188
- if (!exportedSymbols)
189
- continue;
190
- for (const sym of exportedSymbols) {
191
- if (fileCount >= MAX_SYNTHETIC_BINDINGS_PER_FILE)
192
- return;
193
- if (fileBindings?.has(sym.name))
194
- continue;
195
- if (!fileBindings) {
196
- fileBindings = new Map();
197
- ctx.namedImportMap.set(filePath, fileBindings);
198
- }
199
- fileBindings.set(sym.name, {
200
- sourcePath: importedFile,
201
- exportedName: sym.name,
202
- });
203
- fileCount++;
204
- totalSynthesized++;
205
- }
206
- }
207
- };
208
- // Process files from ctx.importMap (Ruby, C/C++, Swift file-based imports)
209
- for (const [filePath, importedFiles] of ctx.importMap) {
210
- const lang = getLanguageFromFilename(filePath);
211
- if (!lang || !isWildcardImportLanguage(lang))
212
- continue;
213
- synthesizeForFile(filePath, importedFiles);
214
- }
215
- // Process files from graph IMPORTS edges (Go and other wildcard-import languages)
216
- for (const [filePath, importedFiles] of graphImports) {
217
- synthesizeForFile(filePath, importedFiles);
218
- }
219
- // Build module alias map for Python namespace imports.
220
- // `import models` in app.py → ctx.moduleAliasMap['app.py']['models'] = 'models.py'
221
- // Enables `models.User()` to resolve to models.py:User without ambiguous symbol expansion.
222
- const buildPythonModuleAliasForFile = (callerFile, importedFiles) => {
223
- let aliasMap = ctx.moduleAliasMap.get(callerFile);
224
- for (const importedFile of importedFiles) {
225
- // Derive the module alias from the imported filename stem (e.g. "models.py" → "models")
226
- const lastSlash = importedFile.lastIndexOf('/');
227
- const base = lastSlash >= 0 ? importedFile.slice(lastSlash + 1) : importedFile;
228
- const dot = base.lastIndexOf('.');
229
- const stem = dot >= 0 ? base.slice(0, dot) : base;
230
- if (!stem)
231
- continue;
232
- if (!aliasMap) {
233
- aliasMap = new Map();
234
- ctx.moduleAliasMap.set(callerFile, aliasMap);
235
- }
236
- aliasMap.set(stem, importedFile);
237
- }
238
- };
239
- for (const [filePath, importedFiles] of ctx.importMap) {
240
- const provider = getProviderForFile(filePath);
241
- if (!provider || provider.importSemantics !== 'namespace')
242
- continue;
243
- buildPythonModuleAliasForFile(filePath, importedFiles);
244
- }
245
- return totalSynthesized;
246
- }
247
- /** Phase 14: Cross-file binding propagation.
248
- * Seeds downstream files with resolved type bindings from upstream exports.
249
- * Files are processed in topological import order so upstream bindings are
250
- * available when downstream files are re-resolved. */
251
- async function runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress) {
252
- // For the worker path, buildTypeEnv runs inside workers without SymbolTable,
253
- // so exported bindings must be collected from graph + SymbolTable in main thread.
254
- if (exportedTypeMap.size === 0 && graph.nodeCount > 0) {
255
- const graphExports = buildExportedTypeMapFromGraph(graph, ctx.symbols);
256
- for (const [fp, exports] of graphExports)
257
- exportedTypeMap.set(fp, exports);
258
- }
259
- if (exportedTypeMap.size === 0 || ctx.namedImportMap.size === 0)
260
- return;
261
- const allPathSet = new Set(allPaths);
262
- const { levels, cycleCount } = topologicalLevelSort(ctx.importMap);
263
- // Cycle diagnostic: only log when actual cycles detected (cycleCount from Kahn's BFS)
264
- if (isDev && cycleCount > 0) {
265
- console.log(`🔄 ${cycleCount} files in import cycles (skipped for cross-file propagation)`);
266
- }
267
- // Quick count of files with cross-file binding gaps (early exit once threshold exceeded)
268
- let filesWithGaps = 0;
269
- const gapThreshold = Math.max(1, Math.ceil(totalFiles * CROSS_FILE_SKIP_THRESHOLD));
270
- outer: for (const level of levels) {
271
- for (const filePath of level) {
272
- const imports = ctx.namedImportMap.get(filePath);
273
- if (!imports)
274
- continue;
275
- for (const [, binding] of imports) {
276
- const upstream = exportedTypeMap.get(binding.sourcePath);
277
- if (upstream?.has(binding.exportedName)) {
278
- filesWithGaps++;
279
- break;
280
- }
281
- const def = ctx.symbols.lookupExactFull(binding.sourcePath, binding.exportedName);
282
- if (def?.returnType) {
283
- filesWithGaps++;
284
- break;
285
- }
286
- }
287
- if (filesWithGaps >= gapThreshold)
288
- break outer;
289
- }
290
- }
291
- const gapRatio = totalFiles > 0 ? filesWithGaps / totalFiles : 0;
292
- if (gapRatio < CROSS_FILE_SKIP_THRESHOLD && filesWithGaps < gapThreshold) {
293
- if (isDev) {
294
- console.log(`⏭️ Cross-file re-resolution skipped (${filesWithGaps}/${totalFiles} files, ${(gapRatio * 100).toFixed(1)}% < ${CROSS_FILE_SKIP_THRESHOLD * 100}% threshold)`);
295
- }
296
- return;
297
- }
298
- onProgress({
299
- phase: 'parsing',
300
- percent: 82,
301
- message: `Cross-file type propagation (${filesWithGaps}+ files)...`,
302
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
303
- });
304
- let crossFileResolved = 0;
305
- const crossFileStart = Date.now();
306
- const astCache = createASTCache(AST_CACHE_CAP);
307
- for (const level of levels) {
308
- const levelCandidates = [];
309
- for (const filePath of level) {
310
- if (crossFileResolved + levelCandidates.length >= MAX_CROSS_FILE_REPROCESS)
311
- break;
312
- const imports = ctx.namedImportMap.get(filePath);
313
- if (!imports)
314
- continue;
315
- const seeded = new Map();
316
- for (const [localName, binding] of imports) {
317
- const upstream = exportedTypeMap.get(binding.sourcePath);
318
- if (upstream) {
319
- const type = upstream.get(binding.exportedName);
320
- if (type)
321
- seeded.set(localName, type);
322
- }
323
- }
324
- const importedReturns = buildImportedReturnTypes(filePath, ctx.namedImportMap, ctx.symbols);
325
- const importedRawReturns = buildImportedRawReturnTypes(filePath, ctx.namedImportMap, ctx.symbols);
326
- if (seeded.size === 0 && importedReturns.size === 0)
327
- continue;
328
- if (!allPathSet.has(filePath))
329
- continue;
330
- const lang = getLanguageFromFilename(filePath);
331
- if (!lang || !isLanguageAvailable(lang))
332
- continue;
333
- levelCandidates.push({ filePath, seeded, importedReturns, importedRawReturns });
334
- }
335
- if (levelCandidates.length === 0)
336
- continue;
337
- const levelPaths = levelCandidates.map((c) => c.filePath);
338
- const contentMap = await readFileContents(repoPath, levelPaths);
339
- for (const { filePath, seeded, importedReturns, importedRawReturns } of levelCandidates) {
340
- const content = contentMap.get(filePath);
341
- if (!content)
342
- continue;
343
- const reFile = [{ path: filePath, content }];
344
- const bindings = new Map();
345
- if (seeded.size > 0)
346
- bindings.set(filePath, seeded);
347
- const importedReturnTypesMap = new Map();
348
- if (importedReturns.size > 0) {
349
- importedReturnTypesMap.set(filePath, importedReturns);
350
- }
351
- const importedRawReturnTypesMap = new Map();
352
- if (importedRawReturns.size > 0) {
353
- importedRawReturnTypesMap.set(filePath, importedRawReturns);
354
- }
355
- await processCalls(graph, reFile, astCache, ctx, undefined, exportedTypeMap, bindings.size > 0 ? bindings : undefined, importedReturnTypesMap.size > 0 ? importedReturnTypesMap : undefined, importedRawReturnTypesMap.size > 0 ? importedRawReturnTypesMap : undefined);
356
- crossFileResolved++;
357
- }
358
- if (crossFileResolved >= MAX_CROSS_FILE_REPROCESS) {
359
- if (isDev)
360
- console.log(`⚠️ Cross-file re-resolution capped at ${MAX_CROSS_FILE_REPROCESS} files`);
361
- break;
362
- }
363
- }
364
- astCache.clear();
365
- if (isDev) {
366
- const elapsed = Date.now() - crossFileStart;
367
- const totalElapsed = Date.now() - pipelineStart;
368
- const reResolutionPct = totalElapsed > 0 ? ((elapsed / totalElapsed) * 100).toFixed(1) : '0';
369
- console.log(`🔗 Cross-file re-resolution: ${crossFileResolved} candidates re-processed` +
370
- ` in ${elapsed}ms (${reResolutionPct}% of total ingestion time so far)`);
371
- }
372
- }
373
- /**
374
- * Phase 1+2: Scan repository paths, build file/folder structure, process markdown.
375
- *
376
- * @reads repoPath (filesystem)
377
- * @writes graph (File, Folder nodes + CONTAINS edges; Markdown sections + cross-links)
378
- */
379
- async function runScanAndStructure(repoPath, graph, onProgress) {
380
- // ── Phase 1: Scan paths only (no content read) ─────────────────────
381
- onProgress({
382
- phase: 'extracting',
383
- percent: 0,
384
- message: 'Scanning repository...',
385
- });
386
- const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
387
- const scanProgress = Math.round((current / total) * 15);
388
- onProgress({
389
- phase: 'extracting',
390
- percent: scanProgress,
391
- message: 'Scanning repository...',
392
- detail: filePath,
393
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
394
- });
395
- });
396
- const totalFiles = scannedFiles.length;
397
- onProgress({
398
- phase: 'extracting',
399
- percent: 15,
400
- message: 'Repository scanned successfully',
401
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
402
- });
403
- // ── Phase 2: Structure (paths only — no content needed) ────────────
404
- onProgress({
405
- phase: 'structure',
406
- percent: 15,
407
- message: 'Analyzing project structure...',
408
- stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
409
- });
410
- const allPaths = scannedFiles.map((f) => f.path);
411
- processStructure(graph, allPaths);
412
- onProgress({
413
- phase: 'structure',
414
- percent: 20,
415
- message: 'Project structure analyzed',
416
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
417
- });
418
- // ── Custom (non-tree-sitter) processors ─────────────────────────────
419
- // Each custom processor follows the pattern in markdown-processor.ts:
420
- // 1. Export a process function: (graph, files, allPathSet) => result
421
- // 2. Export a file detection function: (path) => boolean
422
- // 3. Filter files by extension, write nodes/edges directly to graph
423
- // To add a new language: create a new processor file, import it here,
424
- // and add a filter-read-call-log block following the pattern below.
425
- // ── Phase 2.5: Markdown processing (headings + cross-links) ────────
426
- const mdScanned = scannedFiles.filter((f) => f.path.endsWith('.md') || f.path.endsWith('.mdx'));
427
- if (mdScanned.length > 0) {
428
- const mdContents = await readFileContents(repoPath, mdScanned.map((f) => f.path));
429
- const mdFiles = mdScanned
430
- .filter((f) => mdContents.has(f.path))
431
- .map((f) => ({ path: f.path, content: mdContents.get(f.path) }));
432
- const allPathSet = new Set(allPaths);
433
- const mdResult = processMarkdown(graph, mdFiles, allPathSet);
434
- if (isDev) {
435
- console.log(` Markdown: ${mdResult.sections} sections, ${mdResult.links} cross-links from ${mdFiles.length} files`);
436
- }
437
- }
438
- // ── Phase 2.6: COBOL processing (regex extraction, no tree-sitter) ──
439
- const cobolScanned = scannedFiles.filter((f) => isCobolFile(f.path) || isJclFile(f.path));
440
- if (cobolScanned.length > 0) {
441
- const cobolContents = await readFileContents(repoPath, cobolScanned.map((f) => f.path));
442
- const cobolFiles = cobolScanned
443
- .filter((f) => cobolContents.has(f.path))
444
- .map((f) => ({ path: f.path, content: cobolContents.get(f.path) }));
445
- const allPathSet = new Set(allPaths);
446
- const cobolResult = processCobol(graph, cobolFiles, allPathSet);
447
- if (isDev) {
448
- console.log(` COBOL: ${cobolResult.programs} programs, ${cobolResult.paragraphs} paragraphs, ${cobolResult.sections} sections from ${cobolFiles.length} files`);
449
- if (cobolResult.execSqlBlocks > 0 ||
450
- cobolResult.execCicsBlocks > 0 ||
451
- cobolResult.entryPoints > 0) {
452
- console.log(` COBOL enriched: ${cobolResult.execSqlBlocks} SQL blocks, ${cobolResult.execCicsBlocks} CICS blocks, ${cobolResult.entryPoints} entry points, ${cobolResult.moves} moves, ${cobolResult.fileDeclarations} file declarations`);
453
- }
454
- if (cobolResult.jclJobs > 0) {
455
- console.log(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`);
456
- }
457
- }
458
- }
459
- return { scannedFiles, allPaths, totalFiles };
460
- }
461
1
  /**
462
- * Phase 3+4: Chunked parse + resolve loop.
463
- *
464
- * Reads source in byte-budget chunks (~20MB each). For each chunk:
465
- * 1. Parse via worker pool (or sequential fallback)
466
- * 2. Resolve imports from extracted data
467
- * 3. Synthesize wildcard import bindings (Go/Ruby/C++/Swift/Python)
468
- * 4. Resolve heritage + routes per chunk; defer worker CALLS until all chunks
469
- * have contributed heritage so interface-dispatch implementor map is complete
470
- * 5. Collect TypeEnv bindings for cross-file propagation
2
+ * Pipeline orchestrator dependency-ordered ingestion pipeline.
471
3
  *
472
- * State accumulated across chunks: symbolTable, importMap, namedImportMap,
473
- * moduleAliasMap (all via ResolutionContext), exportedTypeMap, workerTypeEnvBindings.
4
+ * The pipeline is composed of named phases with explicit dependencies.
5
+ * Each phase is defined in its own file under `pipeline-phases/`.
6
+ * The runner in `pipeline-phases/runner.ts` executes phases in
7
+ * topological order, passing typed outputs from upstream phases as
8
+ * inputs to downstream phases.
474
9
  *
475
- * @reads graph (structure nodes from Phase 1+2)
476
- * @reads allPaths (from scan phase)
477
- * @writes graph (Symbol nodes, IMPORTS/CALLS/EXTENDS/IMPLEMENTS/ACCESSES edges)
478
- * @writes ctx.symbolTable, ctx.importMap, ctx.namedImportMap, ctx.moduleAliasMap
10
+ * To add a new phase:
11
+ * 1. Create a new file in `pipeline-phases/` following the pattern
12
+ * 2. Export it from `pipeline-phases/index.ts`
13
+ * 3. Add it to the `ALL_PHASES` array below
479
14
  *
480
- * Follow-up from PR review: MethodExtractor (FieldExtractor parity) and optional
481
- * METHOD_IMPLEMENTS graph edges to make dispatch queryable without an in-memory map.
15
+ * See ARCHITECTURE.md for the full phase dependency diagram.
482
16
  */
483
- async function runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress) {
484
- const symbolTable = ctx.symbols;
485
- const parseableScanned = scannedFiles.filter((f) => {
486
- const lang = getLanguageFromFilename(f.path);
487
- return lang && isLanguageAvailable(lang);
488
- });
489
- // Warn about files skipped due to unavailable parsers
490
- const skippedByLang = new Map();
491
- for (const f of scannedFiles) {
492
- const lang = getLanguageFromFilename(f.path);
493
- if (lang && !isLanguageAvailable(lang)) {
494
- skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
495
- }
496
- }
497
- for (const [lang, count] of skippedByLang) {
498
- console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
499
- }
500
- const totalParseable = parseableScanned.length;
501
- if (totalParseable === 0) {
502
- onProgress({
503
- phase: 'parsing',
504
- percent: 82,
505
- message: 'No parseable files found — skipping parsing phase',
506
- stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
507
- });
508
- }
509
- // Build byte-budget chunks
510
- const chunks = [];
511
- let currentChunk = [];
512
- let currentBytes = 0;
513
- for (const file of parseableScanned) {
514
- if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
515
- chunks.push(currentChunk);
516
- currentChunk = [];
517
- currentBytes = 0;
518
- }
519
- currentChunk.push(file.path);
520
- currentBytes += file.size;
521
- }
522
- if (currentChunk.length > 0)
523
- chunks.push(currentChunk);
524
- const numChunks = chunks.length;
525
- if (isDev) {
526
- const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
527
- console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
528
- }
529
- onProgress({
530
- phase: 'parsing',
531
- percent: 20,
532
- message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
533
- stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
534
- });
535
- // Don't spawn workers for tiny repos — overhead exceeds benefit
536
- const MIN_FILES_FOR_WORKERS = 15;
537
- const MIN_BYTES_FOR_WORKERS = 512 * 1024;
538
- const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
539
- // Create worker pool once, reuse across chunks
540
- let workerPool;
541
- if (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS) {
542
- try {
543
- let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
544
- // When running under vitest, import.meta.url points to src/ where no .js exists.
545
- // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
546
- const thisDir = fileURLToPath(new URL('.', import.meta.url));
547
- if (!fs.existsSync(fileURLToPath(workerUrl))) {
548
- const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
549
- if (fs.existsSync(distWorker)) {
550
- workerUrl = pathToFileURL(distWorker);
551
- }
552
- }
553
- workerPool = createWorkerPool(workerUrl);
554
- }
555
- catch (err) {
556
- if (isDev)
557
- console.warn('Worker pool creation failed, using sequential fallback:', err.message);
558
- }
559
- }
560
- let filesParsedSoFar = 0;
561
- // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
562
- const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
563
- let astCache = createASTCache(maxChunkFiles);
564
- // Build import resolution context once — suffix index, file lists, resolve cache.
565
- // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
566
- const importCtx = buildImportResolutionContext(allPaths);
567
- const allPathObjects = allPaths.map((p) => ({ path: p }));
568
- // Worker path: parse + imports + heritage per chunk; buffer extracted calls and
569
- // run processCallsFromExtracted once after all chunks so interface-dispatch uses a
570
- // complete implementor map (heritage from every chunk). Costs peak RAM for buffered
571
- // call rows vs streaming resolution per chunk.
572
- const sequentialChunkPaths = [];
573
- // Pre-compute which chunks need synthesis — O(1) lookup per chunk.
574
- const chunkNeedsSynthesis = chunks.map((paths) => paths.some((p) => {
575
- const lang = getLanguageFromFilename(p);
576
- return lang != null && needsSynthesis(lang);
577
- }));
578
- // Phase 14: Collect exported type bindings for cross-file propagation
579
- const exportedTypeMap = new Map();
580
- // Accumulate file-scope TypeEnv bindings from workers (closes worker/sequential quality gap)
581
- const workerTypeEnvBindings = [];
582
- // Accumulate fetch() calls from workers for Next.js route matching
583
- const allFetchCalls = [];
584
- // Accumulate framework-extracted routes (Laravel, etc.) for Route node creation
585
- const allExtractedRoutes = [];
586
- // Accumulate decorator-based routes (@Get, @Post, @app.route, etc.)
587
- const allDecoratorRoutes = [];
588
- // Accumulate MCP/RPC tool definitions (@mcp.tool(), @app.tool(), etc.)
589
- const allToolDefs = [];
590
- const allORMQueries = [];
591
- const deferredWorkerCalls = [];
592
- const deferredWorkerHeritage = [];
593
- const deferredConstructorBindings = [];
594
- const deferredAssignments = [];
595
- try {
596
- for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
597
- const chunkPaths = chunks[chunkIdx];
598
- // Read content for this chunk only
599
- const chunkContents = await readFileContents(repoPath, chunkPaths);
600
- const chunkFiles = chunkPaths
601
- .filter((p) => chunkContents.has(p))
602
- .map((p) => ({ path: p, content: chunkContents.get(p) }));
603
- // Parse this chunk (workers or sequential fallback)
604
- const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
605
- const globalCurrent = filesParsedSoFar + current;
606
- const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
607
- onProgress({
608
- phase: 'parsing',
609
- percent: Math.round(parsingProgress),
610
- message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
611
- detail: filePath,
612
- stats: {
613
- filesProcessed: globalCurrent,
614
- totalFiles: totalParseable,
615
- nodesCreated: graph.nodeCount,
616
- },
617
- });
618
- }, workerPool);
619
- const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
620
- if (chunkWorkerData) {
621
- // Imports
622
- await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
623
- onProgress({
624
- phase: 'parsing',
625
- percent: Math.round(chunkBasePercent),
626
- message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
627
- detail: `${current}/${total} files`,
628
- stats: {
629
- filesProcessed: filesParsedSoFar,
630
- totalFiles: totalParseable,
631
- nodesCreated: graph.nodeCount,
632
- },
633
- });
634
- }, repoPath, importCtx);
635
- // ── Wildcard-import synthesis (Ruby / C/C++ / Swift / Go) + Python module aliases ─
636
- // Synthesize namedImportMap entries for wildcard-import languages and build
637
- // moduleAliasMap for Python namespace imports. Must run after imports are resolved
638
- // (importMap is populated) but BEFORE call resolution.
639
- if (chunkNeedsSynthesis[chunkIdx])
640
- synthesizeWildcardImportBindings(graph, ctx);
641
- // Phase 14 E1: Seed cross-file receiver types from ExportedTypeMap
642
- // before call resolution — eliminates re-parse for single-hop imported receivers.
643
- // NOTE: In the worker path, exportedTypeMap is empty during chunk processing
644
- // (populated later in runCrossFileBindingPropagation). This block is latent —
645
- // it activates only if incremental export collection is added per-chunk.
646
- if (exportedTypeMap.size > 0 && ctx.namedImportMap.size > 0) {
647
- const { enrichedCount } = seedCrossFileReceiverTypes(chunkWorkerData.calls, ctx.namedImportMap, exportedTypeMap);
648
- if (isDev && enrichedCount > 0) {
649
- console.log(`🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`);
650
- }
651
- }
652
- deferredWorkerCalls.push(...chunkWorkerData.calls);
653
- deferredWorkerHeritage.push(...chunkWorkerData.heritage);
654
- deferredConstructorBindings.push(...chunkWorkerData.constructorBindings);
655
- if (chunkWorkerData.assignments?.length) {
656
- deferredAssignments.push(...chunkWorkerData.assignments);
657
- }
658
- // Heritage + Routes — calls deferred until all chunks have contributed heritage
659
- // (complete implementor map for interface dispatch).
660
- await Promise.all([
661
- processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
662
- onProgress({
663
- phase: 'parsing',
664
- percent: Math.round(chunkBasePercent),
665
- message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
666
- detail: `${current}/${total} records`,
667
- stats: {
668
- filesProcessed: filesParsedSoFar,
669
- totalFiles: totalParseable,
670
- nodesCreated: graph.nodeCount,
671
- },
672
- });
673
- }),
674
- processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
675
- onProgress({
676
- phase: 'parsing',
677
- percent: Math.round(chunkBasePercent),
678
- message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
679
- detail: `${current}/${total} routes`,
680
- stats: {
681
- filesProcessed: filesParsedSoFar,
682
- totalFiles: totalParseable,
683
- nodesCreated: graph.nodeCount,
684
- },
685
- });
686
- }),
687
- ]);
688
- // Collect TypeEnv file-scope bindings for exported type enrichment
689
- if (chunkWorkerData.typeEnvBindings?.length) {
690
- workerTypeEnvBindings.push(...chunkWorkerData.typeEnvBindings);
691
- }
692
- // Collect fetch() calls for Next.js route matching
693
- if (chunkWorkerData.fetchCalls?.length) {
694
- allFetchCalls.push(...chunkWorkerData.fetchCalls);
695
- }
696
- if (chunkWorkerData.routes?.length) {
697
- allExtractedRoutes.push(...chunkWorkerData.routes);
698
- }
699
- if (chunkWorkerData.decoratorRoutes?.length) {
700
- allDecoratorRoutes.push(...chunkWorkerData.decoratorRoutes);
701
- }
702
- if (chunkWorkerData.toolDefs?.length) {
703
- allToolDefs.push(...chunkWorkerData.toolDefs);
704
- }
705
- if (chunkWorkerData.ormQueries?.length) {
706
- allORMQueries.push(...chunkWorkerData.ormQueries);
707
- }
708
- }
709
- else {
710
- await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
711
- sequentialChunkPaths.push(chunkPaths);
712
- }
713
- filesParsedSoFar += chunkFiles.length;
714
- // Clear AST cache between chunks to free memory
715
- astCache.clear();
716
- // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
717
- }
718
- // Complete implementor map from all worker heritage, then resolve CALLS once (interface dispatch).
719
- const fullWorkerImplementorMap = deferredWorkerHeritage.length > 0
720
- ? buildImplementorMap(deferredWorkerHeritage, ctx)
721
- : new Map();
722
- if (deferredWorkerCalls.length > 0) {
723
- await processCallsFromExtracted(graph, deferredWorkerCalls, ctx, (current, total) => {
724
- onProgress({
725
- phase: 'parsing',
726
- percent: 82,
727
- message: 'Resolving calls (all chunks)...',
728
- detail: `${current}/${total} files`,
729
- stats: {
730
- filesProcessed: filesParsedSoFar,
731
- totalFiles: totalParseable,
732
- nodesCreated: graph.nodeCount,
733
- },
734
- });
735
- }, deferredConstructorBindings.length > 0 ? deferredConstructorBindings : undefined, fullWorkerImplementorMap);
736
- }
737
- if (deferredAssignments.length > 0) {
738
- processAssignmentsFromExtracted(graph, deferredAssignments, ctx, deferredConstructorBindings.length > 0 ? deferredConstructorBindings : undefined);
739
- }
740
- }
741
- finally {
742
- await workerPool?.terminate();
743
- }
744
- // Sequential fallback chunks: re-read source for call/heritage resolution
745
- // Synthesize wildcard import bindings once after ALL imports are processed,
746
- // before any call resolution — same rationale as the worker-path inline synthesis.
747
- if (sequentialChunkPaths.length > 0)
748
- synthesizeWildcardImportBindings(graph, ctx);
749
- // Merge implementor-map deltas per chunk (O(heritage per chunk)), not O(|edges|) graph scans
750
- // per chunk — mirrors worker-path deferred heritage without re-iterating all relationships.
751
- const sequentialImplementorMap = new Map();
752
- for (const chunkPaths of sequentialChunkPaths) {
753
- const chunkContents = await readFileContents(repoPath, chunkPaths);
754
- const chunkFiles = chunkPaths
755
- .filter((p) => chunkContents.has(p))
756
- .map((p) => ({ path: p, content: chunkContents.get(p) }));
757
- astCache = createASTCache(chunkFiles.length);
758
- const sequentialHeritage = await extractExtractedHeritageFromFiles(chunkFiles, astCache);
759
- mergeImplementorMaps(sequentialImplementorMap, buildImplementorMap(sequentialHeritage, ctx));
760
- const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, exportedTypeMap, undefined, undefined, undefined, sequentialImplementorMap);
761
- await processHeritage(graph, chunkFiles, astCache, ctx);
762
- if (rubyHeritage.length > 0) {
763
- await processHeritageFromExtracted(graph, rubyHeritage, ctx);
764
- }
765
- // Extract fetch() calls for Next.js route matching (sequential path)
766
- const chunkFetchCalls = await extractFetchCallsFromFiles(chunkFiles, astCache);
767
- if (chunkFetchCalls.length > 0) {
768
- allFetchCalls.push(...chunkFetchCalls);
769
- }
770
- // Extract ORM queries (sequential path)
771
- for (const f of chunkFiles) {
772
- extractORMQueriesInline(f.path, f.content, allORMQueries);
773
- }
774
- astCache.clear();
775
- }
776
- // Log resolution cache stats
777
- if (isDev) {
778
- const rcStats = ctx.getStats();
779
- const total = rcStats.cacheHits + rcStats.cacheMisses;
780
- const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
781
- console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
782
- }
783
- // ── Worker path quality enrichment: merge TypeEnv file-scope bindings into ExportedTypeMap ──
784
- // Workers return file-scope bindings from their TypeEnv fixpoint (includes inferred types
785
- // like `const config = getConfig()` → Config). Filter by graph isExported to match
786
- // the sequential path's collectExportedBindings behavior.
787
- if (workerTypeEnvBindings.length > 0) {
788
- let enriched = 0;
789
- for (const { filePath, bindings } of workerTypeEnvBindings) {
790
- for (const [name, type] of bindings) {
791
- // Verify the symbol is exported via graph node
792
- const nodeId = `Function:${filePath}:${name}`;
793
- const varNodeId = `Variable:${filePath}:${name}`;
794
- const constNodeId = `Const:${filePath}:${name}`;
795
- const node = graph.getNode(nodeId) ?? graph.getNode(varNodeId) ?? graph.getNode(constNodeId);
796
- if (!node?.properties?.isExported)
797
- continue;
798
- let fileExports = exportedTypeMap.get(filePath);
799
- if (!fileExports) {
800
- fileExports = new Map();
801
- exportedTypeMap.set(filePath, fileExports);
802
- }
803
- // Don't overwrite existing entries (Tier 0 from SymbolTable is authoritative)
804
- if (!fileExports.has(name)) {
805
- fileExports.set(name, type);
806
- enriched++;
807
- }
808
- }
809
- }
810
- if (isDev && enriched > 0) {
811
- console.log(`🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`);
812
- }
813
- }
814
- // ── Final synthesis pass for whole-module-import languages ──
815
- // Per-chunk synthesis (above) already ran incrementally. This final pass ensures
816
- // any remaining files whose imports were not covered inline are also synthesized,
817
- // and that Phase 14 type propagation has complete namedImportMap data.
818
- const synthesized = synthesizeWildcardImportBindings(graph, ctx);
819
- if (isDev && synthesized > 0) {
820
- console.log(`🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`);
821
- }
822
- // Free import resolution context — suffix index + resolve cache no longer needed
823
- // (allPathObjects and importCtx hold ~94MB+ for large repos)
824
- allPathObjects.length = 0;
825
- importCtx.resolveCache.clear();
826
- importCtx.index = EMPTY_INDEX; // Release suffix index memory (~30MB for large repos)
827
- importCtx.normalizedFileList = [];
828
- return {
829
- exportedTypeMap,
830
- allFetchCalls,
831
- allExtractedRoutes,
832
- allDecoratorRoutes,
833
- allToolDefs,
834
- allORMQueries,
835
- };
836
- }
17
+ import { createKnowledgeGraph } from '../graph/graph.js';
18
+ import { runPipeline, getPhaseOutput, scanPhase, structurePhase, markdownPhase, cobolPhase, parsePhase, routesPhase, toolsPhase, ormPhase, crossFilePhase, mroPhase, communitiesPhase, processesPhase, } from './pipeline-phases/index.js';
19
+ // ── Phase registry ─────────────────────────────────────────────────────────
837
20
  /**
838
- * Post-parse graph analysis: MRO, community detection, process extraction.
21
+ * All pipeline phases with their dependency relationships.
22
+ *
23
+ * Phase dependency graph:
839
24
  *
840
- * @reads graph (all nodes and relationships from parse + resolve phases)
841
- * @writes graph (Community nodes, Process nodes, MEMBER_OF edges, STEP_IN_PROCESS edges, OVERRIDES edges)
25
+ * scan structure [markdown, cobol] parse [routes, tools, orm]
26
+ * crossFile mro communities processes
27
+ *
28
+ * To add a new phase: create a file in pipeline-phases/, export the phase
29
+ * object, and add it to the appropriate position in this array.
842
30
  */
843
- async function runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs) {
844
- // ── Phase 4.5: Method Resolution Order ──────────────────────────────
845
- onProgress({
846
- phase: 'parsing',
847
- percent: 81,
848
- message: 'Computing method resolution order...',
849
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
850
- });
851
- const mroResult = computeMRO(graph);
852
- if (isDev && mroResult.entries.length > 0) {
853
- console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
854
- }
855
- // ── Phase 5: Communities ───────────────────────────────────────────
856
- onProgress({
857
- phase: 'communities',
858
- percent: 82,
859
- message: 'Detecting code communities...',
860
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
861
- });
862
- const communityResult = await processCommunities(graph, (message, progress) => {
863
- const communityProgress = 82 + progress * 0.1;
864
- onProgress({
865
- phase: 'communities',
866
- percent: Math.round(communityProgress),
867
- message,
868
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
869
- });
870
- });
871
- if (isDev) {
872
- console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
873
- }
874
- communityResult.communities.forEach((comm) => {
875
- graph.addNode({
876
- id: comm.id,
877
- label: 'Community',
878
- properties: {
879
- name: comm.label,
880
- filePath: '',
881
- heuristicLabel: comm.heuristicLabel,
882
- cohesion: comm.cohesion,
883
- symbolCount: comm.symbolCount,
884
- },
885
- });
886
- });
887
- communityResult.memberships.forEach((membership) => {
888
- graph.addRelationship({
889
- id: `${membership.nodeId}_member_of_${membership.communityId}`,
890
- type: 'MEMBER_OF',
891
- sourceId: membership.nodeId,
892
- targetId: membership.communityId,
893
- confidence: 1.0,
894
- reason: 'leiden-algorithm',
895
- });
896
- });
897
- // ── Phase 6: Processes ─────────────────────────────────────────────
898
- onProgress({
899
- phase: 'processes',
900
- percent: 94,
901
- message: 'Detecting execution flows...',
902
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
903
- });
904
- let symbolCount = 0;
905
- graph.forEachNode((n) => {
906
- if (n.label !== 'File')
907
- symbolCount++;
908
- });
909
- const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
910
- const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
911
- const processProgress = 94 + progress * 0.05;
912
- onProgress({
913
- phase: 'processes',
914
- percent: Math.round(processProgress),
915
- message,
916
- stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
917
- });
918
- }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
919
- if (isDev) {
920
- console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
921
- }
922
- processResult.processes.forEach((proc) => {
923
- graph.addNode({
924
- id: proc.id,
925
- label: 'Process',
926
- properties: {
927
- name: proc.label,
928
- filePath: '',
929
- heuristicLabel: proc.heuristicLabel,
930
- processType: proc.processType,
931
- stepCount: proc.stepCount,
932
- communities: proc.communities,
933
- entryPointId: proc.entryPointId,
934
- terminalId: proc.terminalId,
935
- },
936
- });
937
- });
938
- processResult.steps.forEach((step) => {
939
- graph.addRelationship({
940
- id: `${step.nodeId}_step_${step.step}_${step.processId}`,
941
- type: 'STEP_IN_PROCESS',
942
- sourceId: step.nodeId,
943
- targetId: step.processId,
944
- confidence: 1.0,
945
- reason: 'trace-detection',
946
- step: step.step,
947
- });
948
- });
949
- // Link Route and Tool nodes to Processes via reverse index (file → node id)
950
- if ((routeRegistry?.size ?? 0) > 0 || (toolDefs?.length ?? 0) > 0) {
951
- // Reverse indexes: file → all route URLs / tool names (handles multi-route files)
952
- const routesByFile = new Map();
953
- if (routeRegistry) {
954
- for (const [url, entry] of routeRegistry) {
955
- let list = routesByFile.get(entry.filePath);
956
- if (!list) {
957
- list = [];
958
- routesByFile.set(entry.filePath, list);
959
- }
960
- list.push(url);
961
- }
962
- }
963
- const toolsByFile = new Map();
964
- if (toolDefs) {
965
- for (const td of toolDefs) {
966
- let list = toolsByFile.get(td.filePath);
967
- if (!list) {
968
- list = [];
969
- toolsByFile.set(td.filePath, list);
970
- }
971
- list.push(td.name);
972
- }
973
- }
974
- let linked = 0;
975
- for (const proc of processResult.processes) {
976
- if (!proc.entryPointId)
977
- continue;
978
- const entryNode = graph.getNode(proc.entryPointId);
979
- if (!entryNode)
980
- continue;
981
- const entryFile = entryNode.properties.filePath;
982
- if (!entryFile)
983
- continue;
984
- const routeURLs = routesByFile.get(entryFile);
985
- if (routeURLs) {
986
- for (const routeURL of routeURLs) {
987
- const routeNodeId = generateId('Route', routeURL);
988
- graph.addRelationship({
989
- id: generateId('ENTRY_POINT_OF', `${routeNodeId}->${proc.id}`),
990
- sourceId: routeNodeId,
991
- targetId: proc.id,
992
- type: 'ENTRY_POINT_OF',
993
- confidence: 0.85,
994
- reason: 'route-handler-entry-point',
995
- });
996
- linked++;
997
- }
998
- }
999
- const toolNames = toolsByFile.get(entryFile);
1000
- if (toolNames) {
1001
- for (const toolName of toolNames) {
1002
- const toolNodeId = generateId('Tool', toolName);
1003
- graph.addRelationship({
1004
- id: generateId('ENTRY_POINT_OF', `${toolNodeId}->${proc.id}`),
1005
- sourceId: toolNodeId,
1006
- targetId: proc.id,
1007
- type: 'ENTRY_POINT_OF',
1008
- confidence: 0.85,
1009
- reason: 'tool-handler-entry-point',
1010
- });
1011
- linked++;
1012
- }
1013
- }
1014
- }
1015
- if (isDev && linked > 0) {
1016
- console.log(`🔗 Linked ${linked} Route/Tool nodes to execution flows`);
1017
- }
1018
- }
1019
- return { communityResult, processResult };
31
+ function buildPhaseList(options) {
32
+ const phases = [
33
+ scanPhase,
34
+ structurePhase,
35
+ markdownPhase,
36
+ cobolPhase,
37
+ parsePhase,
38
+ routesPhase,
39
+ toolsPhase,
40
+ ormPhase,
41
+ crossFilePhase,
42
+ ];
43
+ if (!options?.skipGraphPhases) {
44
+ phases.push(mroPhase, communitiesPhase, processesPhase);
45
+ }
46
+ return phases;
1020
47
  }
1021
48
  // ── Pipeline orchestrator ─────────────────────────────────────────────────
1022
49
  export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
1023
50
  const graph = createKnowledgeGraph();
1024
- const ctx = createResolutionContext();
1025
51
  const pipelineStart = Date.now();
1026
- try {
1027
- // Phase 1+2: Scan paths, build structure, process markdown
1028
- const { scannedFiles, allPaths, totalFiles } = await runScanAndStructure(repoPath, graph, onProgress);
1029
- // Phase 3+4: Chunked parse + resolve (imports, calls, heritage, routes)
1030
- const { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries, } = await runChunkedParseAndResolve(graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
1031
- const routeRegistry = new Map();
1032
- // Detect Expo Router app/ roots vs Next.js app/ roots (monorepo-safe).
1033
- const expoAppRoots = new Set();
1034
- const nextjsAppRoots = new Set();
1035
- const expoAppPaths = new Set();
1036
- for (const p of allPaths) {
1037
- const norm = p.replace(/\\/g, '/');
1038
- const appIdx = norm.lastIndexOf('app/');
1039
- if (appIdx < 0)
1040
- continue;
1041
- const root = norm.slice(0, appIdx + 4);
1042
- if (/\/_layout\.(tsx?|jsx?)$/.test(norm))
1043
- expoAppRoots.add(root);
1044
- if (/\/page\.(tsx?|jsx?)$/.test(norm))
1045
- nextjsAppRoots.add(root);
1046
- }
1047
- for (const root of nextjsAppRoots)
1048
- expoAppRoots.delete(root);
1049
- if (expoAppRoots.size > 0) {
1050
- for (const p of allPaths) {
1051
- const norm = p.replace(/\\/g, '/');
1052
- const appIdx = norm.lastIndexOf('app/');
1053
- if (appIdx >= 0 && expoAppRoots.has(norm.slice(0, appIdx + 4)))
1054
- expoAppPaths.add(p);
1055
- }
1056
- }
1057
- for (const p of allPaths) {
1058
- if (expoAppPaths.has(p)) {
1059
- const expoURL = expoFileToRouteURL(p);
1060
- if (expoURL && !routeRegistry.has(expoURL)) {
1061
- routeRegistry.set(expoURL, { filePath: p, source: 'expo-filesystem-route' });
1062
- continue;
1063
- }
1064
- }
1065
- const nextjsURL = nextjsFileToRouteURL(p);
1066
- if (nextjsURL && !routeRegistry.has(nextjsURL)) {
1067
- routeRegistry.set(nextjsURL, { filePath: p, source: 'nextjs-filesystem-route' });
1068
- continue;
1069
- }
1070
- if (p.endsWith('.php')) {
1071
- const phpURL = phpFileToRouteURL(p);
1072
- if (phpURL && !routeRegistry.has(phpURL)) {
1073
- routeRegistry.set(phpURL, { filePath: p, source: 'php-file-route' });
1074
- }
1075
- }
1076
- }
1077
- const ensureSlash = (path) => (path.startsWith('/') ? path : '/' + path);
1078
- let duplicateRoutes = 0;
1079
- const addRoute = (url, entry) => {
1080
- if (routeRegistry.has(url)) {
1081
- duplicateRoutes++;
1082
- return;
1083
- }
1084
- routeRegistry.set(url, entry);
1085
- };
1086
- for (const route of allExtractedRoutes) {
1087
- if (!route.routePath)
1088
- continue;
1089
- addRoute(ensureSlash(route.routePath), {
1090
- filePath: route.filePath,
1091
- source: 'framework-route',
1092
- });
1093
- }
1094
- for (const dr of allDecoratorRoutes) {
1095
- addRoute(ensureSlash(dr.routePath), {
1096
- filePath: dr.filePath,
1097
- source: `decorator-${dr.decoratorName}`,
1098
- });
1099
- }
1100
- let handlerContents;
1101
- if (routeRegistry.size > 0) {
1102
- const handlerPaths = [...routeRegistry.values()].map((e) => e.filePath);
1103
- handlerContents = await readFileContents(repoPath, handlerPaths);
1104
- for (const [routeURL, entry] of routeRegistry) {
1105
- const { filePath: handlerPath, source: routeSource } = entry;
1106
- const content = handlerContents.get(handlerPath);
1107
- const { responseKeys, errorKeys } = content
1108
- ? handlerPath.endsWith('.php')
1109
- ? extractPHPResponseShapes(content)
1110
- : extractResponseShapes(content)
1111
- : { responseKeys: undefined, errorKeys: undefined };
1112
- const mwResult = content ? extractMiddlewareChain(content) : undefined;
1113
- const middleware = mwResult?.chain;
1114
- const routeNodeId = generateId('Route', routeURL);
1115
- graph.addNode({
1116
- id: routeNodeId,
1117
- label: 'Route',
1118
- properties: {
1119
- name: routeURL,
1120
- filePath: handlerPath,
1121
- ...(responseKeys ? { responseKeys } : {}),
1122
- ...(errorKeys ? { errorKeys } : {}),
1123
- ...(middleware && middleware.length > 0 ? { middleware } : {}),
1124
- },
1125
- });
1126
- const handlerFileId = generateId('File', handlerPath);
1127
- graph.addRelationship({
1128
- id: generateId('HANDLES_ROUTE', `${handlerFileId}->${routeNodeId}`),
1129
- sourceId: handlerFileId,
1130
- targetId: routeNodeId,
1131
- type: 'HANDLES_ROUTE',
1132
- confidence: 1.0,
1133
- reason: routeSource,
1134
- });
1135
- }
1136
- if (isDev) {
1137
- console.log(`🗺️ Route registry: ${routeRegistry.size} routes${duplicateRoutes > 0 ? ` (${duplicateRoutes} duplicate URLs skipped)` : ''}`);
1138
- }
1139
- }
1140
- // ── Phase 3.5b: Link Next.js project-level middleware.ts to routes ──
1141
- if (routeRegistry.size > 0) {
1142
- const middlewareCandidates = allPaths.filter((p) => p === 'middleware.ts' ||
1143
- p === 'middleware.js' ||
1144
- p === 'middleware.tsx' ||
1145
- p === 'middleware.jsx' ||
1146
- p === 'src/middleware.ts' ||
1147
- p === 'src/middleware.js' ||
1148
- p === 'src/middleware.tsx' ||
1149
- p === 'src/middleware.jsx');
1150
- if (middlewareCandidates.length > 0) {
1151
- const mwContents = await readFileContents(repoPath, middlewareCandidates);
1152
- for (const [mwPath, mwContent] of mwContents) {
1153
- const config = extractNextjsMiddlewareConfig(mwContent);
1154
- if (!config)
1155
- continue;
1156
- const mwLabel = config.wrappedFunctions.length > 0 ? config.wrappedFunctions : [config.exportedName];
1157
- // Pre-compile matchers once per middleware file
1158
- const compiled = config.matchers
1159
- .map(compileMatcher)
1160
- .filter((m) => m !== null);
1161
- let linkedCount = 0;
1162
- for (const [routeURL] of routeRegistry) {
1163
- const matches = compiled.length === 0 ||
1164
- compiled.some((cm) => compiledMatcherMatchesRoute(cm, routeURL));
1165
- if (!matches)
1166
- continue;
1167
- const routeNodeId = generateId('Route', routeURL);
1168
- const existing = graph.getNode(routeNodeId);
1169
- if (!existing)
1170
- continue;
1171
- const currentMw = existing.properties.middleware ?? [];
1172
- // Prepend project-level middleware (runs before handler-level wrappers)
1173
- existing.properties.middleware = [
1174
- ...mwLabel,
1175
- ...currentMw.filter((m) => !mwLabel.includes(m)),
1176
- ];
1177
- linkedCount++;
1178
- }
1179
- if (isDev && linkedCount > 0) {
1180
- console.log(`🛡️ Linked ${mwPath} middleware [${mwLabel.join(', ')}] to ${linkedCount} routes`);
1181
- }
1182
- }
1183
- }
1184
- }
1185
- // Scan HTML/PHP/template files for <form action="/path"> and AJAX url patterns
1186
- // Scan HTML/template files for <form action="/path"> and AJAX url patterns
1187
- // Skip .php — already parsed by tree-sitter with http_client/fetch queries
1188
- const htmlCandidates = allPaths.filter((p) => p.endsWith('.html') ||
1189
- p.endsWith('.htm') ||
1190
- p.endsWith('.ejs') ||
1191
- p.endsWith('.hbs') ||
1192
- p.endsWith('.blade.php'));
1193
- if (htmlCandidates.length > 0 && routeRegistry.size > 0) {
1194
- const htmlContents = await readFileContents(repoPath, htmlCandidates);
1195
- const htmlPatterns = [/action=["']([^"']+)["']/g, /url:\s*["']([^"']+)["']/g];
1196
- for (const [filePath, content] of htmlContents) {
1197
- for (const pattern of htmlPatterns) {
1198
- pattern.lastIndex = 0;
1199
- let match;
1200
- while ((match = pattern.exec(content)) !== null) {
1201
- const normalized = normalizeFetchURL(match[1]);
1202
- if (normalized) {
1203
- allFetchCalls.push({ filePath, fetchURL: normalized, lineNumber: 0 });
1204
- }
1205
- }
1206
- }
1207
- }
1208
- }
1209
- // ── Phase 3.5c: Extract Expo Router navigation patterns ──
1210
- if (expoAppPaths.size > 0 && routeRegistry.size > 0) {
1211
- const unreadExpoPaths = [...expoAppPaths].filter((p) => !handlerContents?.has(p));
1212
- const extraContents = unreadExpoPaths.length > 0
1213
- ? await readFileContents(repoPath, unreadExpoPaths)
1214
- : new Map();
1215
- const allExpoContents = new Map([...(handlerContents ?? new Map()), ...extraContents]);
1216
- for (const [filePath, content] of allExpoContents) {
1217
- if (!expoAppPaths.has(filePath))
1218
- continue;
1219
- for (const pattern of EXPO_NAV_PATTERNS) {
1220
- pattern.lastIndex = 0;
1221
- let match;
1222
- while ((match = pattern.exec(content)) !== null) {
1223
- const url = match[2] ?? match[1];
1224
- if (url && url.startsWith('/')) {
1225
- allFetchCalls.push({ filePath, fetchURL: url, lineNumber: 0 });
1226
- }
1227
- }
1228
- }
1229
- }
1230
- }
1231
- if (routeRegistry.size > 0 && allFetchCalls.length > 0) {
1232
- const routeURLToFile = new Map();
1233
- for (const [url, entry] of routeRegistry)
1234
- routeURLToFile.set(url, entry.filePath);
1235
- // Read consumer file contents so we can extract property access patterns
1236
- const consumerPaths = [...new Set(allFetchCalls.map((c) => c.filePath))];
1237
- const consumerContents = await readFileContents(repoPath, consumerPaths);
1238
- processNextjsFetchRoutes(graph, allFetchCalls, routeURLToFile, consumerContents);
1239
- if (isDev) {
1240
- console.log(`🔗 Processed ${allFetchCalls.length} fetch() calls against ${routeRegistry.size} routes`);
1241
- }
1242
- }
1243
- // ── Phase 3.6: Tool Detection (MCP/RPC) ──────────────────────────
1244
- const toolDefs = [];
1245
- const seenToolNames = new Set();
1246
- for (const td of allToolDefs) {
1247
- if (seenToolNames.has(td.toolName))
1248
- continue;
1249
- seenToolNames.add(td.toolName);
1250
- toolDefs.push({ name: td.toolName, filePath: td.filePath, description: td.description });
1251
- }
1252
- // TS tool definition arrays — require inputSchema nearby to distinguish from config objects
1253
- const toolCandidatePaths = allPaths.filter((p) => (p.endsWith('.ts') || p.endsWith('.js')) &&
1254
- p.toLowerCase().includes('tool') &&
1255
- !p.includes('node_modules') &&
1256
- !p.includes('test') &&
1257
- !p.includes('__'));
1258
- if (toolCandidatePaths.length > 0) {
1259
- const toolContents = await readFileContents(repoPath, toolCandidatePaths);
1260
- for (const [filePath, content] of toolContents) {
1261
- // Only scan files that contain 'inputSchema' — this is the MCP tool signature
1262
- if (!content.includes('inputSchema'))
1263
- continue;
1264
- const toolPattern = /name:\s*['"](\w+)['"]\s*,\s*\n?\s*description:\s*[`'"]([\s\S]*?)[`'"]/g;
1265
- let match;
1266
- while ((match = toolPattern.exec(content)) !== null) {
1267
- const name = match[1];
1268
- if (seenToolNames.has(name))
1269
- continue;
1270
- seenToolNames.add(name);
1271
- toolDefs.push({
1272
- name,
1273
- filePath,
1274
- description: match[2].slice(0, 200).replace(/\n/g, ' ').trim(),
1275
- });
1276
- }
1277
- }
1278
- }
1279
- // Create Tool nodes and HANDLES_TOOL edges
1280
- if (toolDefs.length > 0) {
1281
- for (const td of toolDefs) {
1282
- const toolNodeId = generateId('Tool', td.name);
1283
- graph.addNode({
1284
- id: toolNodeId,
1285
- label: 'Tool',
1286
- properties: { name: td.name, filePath: td.filePath, description: td.description },
1287
- });
1288
- const handlerFileId = generateId('File', td.filePath);
1289
- graph.addRelationship({
1290
- id: generateId('HANDLES_TOOL', `${handlerFileId}->${toolNodeId}`),
1291
- sourceId: handlerFileId,
1292
- targetId: toolNodeId,
1293
- type: 'HANDLES_TOOL',
1294
- confidence: 1.0,
1295
- reason: 'tool-definition',
1296
- });
1297
- }
1298
- if (isDev) {
1299
- console.log(`🔧 Tool registry: ${toolDefs.length} tools detected`);
1300
- }
1301
- }
1302
- // ── Phase 3.7: ORM Dataflow Detection (Prisma + Supabase) ──────────
1303
- if (allORMQueries.length > 0) {
1304
- processORMQueries(graph, allORMQueries, isDev);
1305
- }
1306
- // ── Phase 14: Cross-file binding propagation (topological level sort) ──
1307
- await runCrossFileBindingPropagation(graph, ctx, exportedTypeMap, allPaths, totalFiles, repoPath, pipelineStart, onProgress);
1308
- // Post-parse graph analysis (MRO, communities, processes)
1309
- let communityResult;
1310
- let processResult;
1311
- if (!options?.skipGraphPhases) {
1312
- const graphResults = await runGraphAnalysisPhases(graph, totalFiles, onProgress, routeRegistry, toolDefs);
1313
- communityResult = graphResults.communityResult;
1314
- processResult = graphResults.processResult;
1315
- }
1316
- onProgress({
1317
- phase: 'complete',
1318
- percent: 100,
1319
- message: communityResult && processResult
1320
- ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`
1321
- : 'Graph complete! (graph phases skipped)',
1322
- stats: {
1323
- filesProcessed: totalFiles,
1324
- totalFiles,
1325
- nodesCreated: graph.nodeCount,
1326
- },
1327
- });
1328
- return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
1329
- }
1330
- catch (error) {
1331
- ctx.clear();
1332
- throw error;
52
+ const phases = buildPhaseList(options);
53
+ const results = await runPipeline(phases, {
54
+ repoPath,
55
+ graph,
56
+ onProgress,
57
+ options,
58
+ pipelineStart,
59
+ });
60
+ // Extract final results for the PipelineResult contract
61
+ const { totalFiles } = getPhaseOutput(results, 'parse');
62
+ let communityResult;
63
+ let processResult;
64
+ if (!options?.skipGraphPhases) {
65
+ communityResult = getPhaseOutput(results, 'communities').communityResult;
66
+ processResult = getPhaseOutput(results, 'processes').processResult;
1333
67
  }
68
+ onProgress({
69
+ phase: 'complete',
70
+ percent: 100,
71
+ message: communityResult && processResult
72
+ ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`
73
+ : 'Graph complete! (graph phases skipped)',
74
+ stats: {
75
+ filesProcessed: totalFiles,
76
+ totalFiles,
77
+ nodesCreated: graph.nodeCount,
78
+ },
79
+ });
80
+ return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
1334
81
  };
1335
- // Inline ORM regex extraction (avoids importing parse-worker which has worker-only code)
1336
- const PRISMA_QUERY_RE = /\bprisma\.(\w+)\.(findMany|findFirst|findUnique|findUniqueOrThrow|findFirstOrThrow|create|createMany|update|updateMany|delete|deleteMany|upsert|count|aggregate|groupBy)\s*\(/g;
1337
- const SUPABASE_QUERY_RE = /\bsupabase\.from\s*\(\s*['"](\w+)['"]\s*\)\s*\.(select|insert|update|delete|upsert)\s*\(/g;
1338
- function extractORMQueriesInline(filePath, content, out) {
1339
- const hasPrisma = content.includes('prisma.');
1340
- const hasSupabase = content.includes('supabase.from');
1341
- if (!hasPrisma && !hasSupabase)
1342
- return;
1343
- if (hasPrisma) {
1344
- PRISMA_QUERY_RE.lastIndex = 0;
1345
- let m;
1346
- while ((m = PRISMA_QUERY_RE.exec(content)) !== null) {
1347
- const model = m[1];
1348
- if (model.startsWith('$'))
1349
- continue;
1350
- out.push({
1351
- filePath,
1352
- orm: 'prisma',
1353
- model,
1354
- method: m[2],
1355
- lineNumber: content.substring(0, m.index).split('\n').length - 1,
1356
- });
1357
- }
1358
- }
1359
- if (hasSupabase) {
1360
- SUPABASE_QUERY_RE.lastIndex = 0;
1361
- let m;
1362
- while ((m = SUPABASE_QUERY_RE.exec(content)) !== null) {
1363
- out.push({
1364
- filePath,
1365
- orm: 'supabase',
1366
- model: m[1],
1367
- method: m[2],
1368
- lineNumber: content.substring(0, m.index).split('\n').length - 1,
1369
- });
1370
- }
1371
- }
1372
- }
1373
- // ============================================================================
1374
- // ORM Query Processing — creates QUERIES edges from callers to model nodes
1375
- // ============================================================================
1376
- function processORMQueries(graph, queries, isDev) {
1377
- const modelNodes = new Map();
1378
- const seenEdges = new Set();
1379
- let edgesCreated = 0;
1380
- for (const q of queries) {
1381
- const modelKey = `${q.orm}:${q.model}`;
1382
- let modelNodeId = modelNodes.get(modelKey);
1383
- if (!modelNodeId) {
1384
- const candidateIds = [
1385
- generateId('Class', `${q.model}`),
1386
- generateId('Interface', `${q.model}`),
1387
- generateId('CodeElement', `${q.model}`),
1388
- ];
1389
- const existing = candidateIds.find((id) => graph.getNode(id));
1390
- if (existing) {
1391
- modelNodeId = existing;
1392
- }
1393
- else {
1394
- modelNodeId = generateId('CodeElement', `${q.orm}:${q.model}`);
1395
- graph.addNode({
1396
- id: modelNodeId,
1397
- label: 'CodeElement',
1398
- properties: {
1399
- name: q.model,
1400
- filePath: '',
1401
- description: `${q.orm} model/table: ${q.model}`,
1402
- },
1403
- });
1404
- }
1405
- modelNodes.set(modelKey, modelNodeId);
1406
- }
1407
- const fileId = generateId('File', q.filePath);
1408
- const edgeKey = `${fileId}->${modelNodeId}:${q.method}`;
1409
- if (seenEdges.has(edgeKey))
1410
- continue;
1411
- seenEdges.add(edgeKey);
1412
- graph.addRelationship({
1413
- id: generateId('QUERIES', edgeKey),
1414
- sourceId: fileId,
1415
- targetId: modelNodeId,
1416
- type: 'QUERIES',
1417
- confidence: 0.9,
1418
- reason: `${q.orm}-${q.method}`,
1419
- });
1420
- edgesCreated++;
1421
- }
1422
- if (isDev) {
1423
- console.log(`ORM dataflow: ${edgesCreated} QUERIES edges, ${modelNodes.size} models (${queries.length} total calls)`);
1424
- }
1425
- }