gitnexus 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/dist/cli/analyze.js +28 -3
  2. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  3. package/dist/core/group/extractors/fs-utils.js +24 -0
  4. package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
  5. package/dist/core/group/extractors/grpc-extractor.js +313 -191
  6. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  7. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  8. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  9. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  10. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  11. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  12. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  13. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  14. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  15. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  16. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  17. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  18. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  19. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  20. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  21. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  22. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  23. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  24. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  25. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  26. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  27. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  28. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  29. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  30. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  31. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  32. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  33. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  34. package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
  35. package/dist/core/group/extractors/http-route-extractor.js +201 -238
  36. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  37. package/dist/core/group/extractors/manifest-extractor.js +235 -0
  38. package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
  39. package/dist/core/group/extractors/topic-extractor.js +55 -192
  40. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  41. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  42. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  43. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  44. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  45. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  46. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  47. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  48. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  49. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  50. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  51. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  52. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  53. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  54. package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
  55. package/dist/core/ingestion/binding-accumulator.js +29 -25
  56. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  57. package/dist/core/ingestion/import-processor.js +1 -1
  58. package/dist/core/ingestion/language-config.js +1 -1
  59. package/dist/core/ingestion/language-provider.d.ts +8 -0
  60. package/dist/core/ingestion/languages/ruby.js +15 -0
  61. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  62. package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
  63. package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
  64. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  65. package/dist/core/ingestion/method-extractors/generic.js +48 -4
  66. package/dist/core/ingestion/method-types.d.ts +4 -0
  67. package/dist/core/ingestion/model/resolve.js +103 -48
  68. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  69. package/dist/core/ingestion/model/semantic-model.js +1 -1
  70. package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
  71. package/dist/core/ingestion/model/symbol-table.js +7 -7
  72. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  73. package/dist/core/ingestion/mro-processor.js +1 -1
  74. package/dist/core/ingestion/parsing-processor.js +54 -42
  75. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  76. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  77. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  78. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  79. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  80. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  81. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  82. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  83. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  84. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  85. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  86. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  87. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  88. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  89. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  90. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  91. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  92. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  93. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  94. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  95. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  96. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  97. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  98. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  99. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  100. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  101. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  102. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  103. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  104. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  105. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  106. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  107. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  108. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  109. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  110. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  111. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +35 -0
  112. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +174 -0
  113. package/dist/core/ingestion/pipeline.d.ts +16 -10
  114. package/dist/core/ingestion/pipeline.js +66 -1534
  115. package/dist/core/ingestion/process-processor.js +1 -1
  116. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  117. package/dist/core/ingestion/tree-sitter-queries.js +69 -0
  118. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
  119. package/dist/core/ingestion/utils/ast-helpers.js +48 -21
  120. package/dist/core/ingestion/utils/env.d.ts +10 -0
  121. package/dist/core/ingestion/utils/env.js +10 -0
  122. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  123. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  124. package/dist/core/ingestion/workers/parse-worker.js +12 -8
  125. package/dist/core/lbug/lbug-adapter.js +66 -24
  126. package/package.json +3 -3
  127. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  128. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  129. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  130. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  131. package/vendor/tree-sitter-proto/package.json +18 -0
  132. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  133. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  134. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  135. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  136. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
@@ -74,30 +74,30 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
74
74
  qualifiedName: sym.qualifiedName,
75
75
  });
76
76
  }
77
- for (const _item of result.imports)
78
- allImports.push(_item);
79
- for (const _item of result.calls)
80
- allCalls.push(_item);
81
- for (const _item of result.assignments)
82
- allAssignments.push(_item);
83
- for (const _item of result.heritage)
84
- allHeritage.push(_item);
85
- for (const _item of result.routes)
86
- allRoutes.push(_item);
87
- for (const _item of result.fetchCalls)
88
- allFetchCalls.push(_item);
89
- for (const _item of result.decoratorRoutes)
90
- allDecoratorRoutes.push(_item);
91
- for (const _item of result.toolDefs)
92
- allToolDefs.push(_item);
77
+ for (const item of result.imports)
78
+ allImports.push(item);
79
+ for (const item of result.calls)
80
+ allCalls.push(item);
81
+ for (const item of result.assignments)
82
+ allAssignments.push(item);
83
+ for (const item of result.heritage)
84
+ allHeritage.push(item);
85
+ for (const item of result.routes)
86
+ allRoutes.push(item);
87
+ for (const item of result.fetchCalls)
88
+ allFetchCalls.push(item);
89
+ for (const item of result.decoratorRoutes)
90
+ allDecoratorRoutes.push(item);
91
+ for (const item of result.toolDefs)
92
+ allToolDefs.push(item);
93
93
  if (result.ormQueries)
94
- for (const _item of result.ormQueries)
95
- allORMQueries.push(_item);
96
- for (const _item of result.constructorBindings)
97
- allConstructorBindings.push(_item);
94
+ for (const item of result.ormQueries)
95
+ allORMQueries.push(item);
96
+ for (const item of result.constructorBindings)
97
+ allConstructorBindings.push(item);
98
98
  if (result.fileScopeBindings)
99
- for (const _item of result.fileScopeBindings)
100
- fileScopeBindingsByFile.push(_item);
99
+ for (const item of result.fileScopeBindings)
100
+ fileScopeBindingsByFile.push(item);
101
101
  }
102
102
  // Merge and log skipped languages from workers
103
103
  const skippedLanguages = new Map();
@@ -135,11 +135,11 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
135
135
  // Keyed by tree-sitter node reference — cleared at the start of each file.
136
136
  const classInfoCache = new Map();
137
137
  const exportCache = new Map();
138
- const cachedFindEnclosingClassInfo = (node, filePath) => {
138
+ const cachedFindEnclosingClassInfo = (node, filePath, resolveEnclosingOwner) => {
139
139
  const cached = classInfoCache.get(node);
140
140
  if (cached !== undefined)
141
141
  return cached;
142
- const result = findEnclosingClassInfo(node, filePath);
142
+ const result = findEnclosingClassInfo(node, filePath, resolveEnclosingOwner);
143
143
  classInfoCache.set(node, result);
144
144
  return result;
145
145
  };
@@ -158,24 +158,34 @@ const seqFieldInfoCache = new Map();
158
158
  const seqMethodExtractCache = new Map();
159
159
  // Derived method map + collision groups cache — avoids rebuilding per method.
160
160
  const seqMethodMapCache = new Map();
161
- function seqFindEnclosingClassNode(node) {
161
+ /** Provider-aware enclosing container lookup.
162
+ * Walks up from `node` until a CLASS_CONTAINER_TYPES node is found.
163
+ * When `resolveEnclosingOwner` is provided, delegates language-specific
164
+ * container remapping (e.g., Ruby singleton_class → enclosing class).
165
+ * Without the hook, returns the first matching container directly (raw lookup). */
166
+ function seqFindEnclosingOwnerNode(node, resolveEnclosingOwner) {
162
167
  let current = node.parent;
163
168
  while (current) {
164
169
  if (CLASS_CONTAINER_TYPES.has(current.type)) {
165
- // Return singleton_class directly so the method extractor sees it as
166
- // the owner node and correctly marks methods as static. Name resolution
167
- // for qualified names is handled separately by findEnclosingClassInfo.
170
+ if (resolveEnclosingOwner) {
171
+ const resolved = resolveEnclosingOwner(current);
172
+ if (resolved === null) {
173
+ // Provider says skip this container — keep walking up.
174
+ current = current.parent;
175
+ continue;
176
+ }
177
+ return resolved;
178
+ }
168
179
  return current;
169
180
  }
170
181
  current = current.parent;
171
182
  }
172
183
  return null;
173
184
  }
174
- /** Minimal no-op SymbolTable stub for FieldExtractorContext (sequential
175
- * path has a real SymbolTable, but it's incomplete at this stage use
176
- * the stub for safety). Implements the full {@link SymbolTableReader}
177
- * surface so future extractor additions don't silently fall off an
178
- * `as unknown as` cast. */
185
+ /** Minimal no-op SymbolTable stub for sequential extractor contexts. The real
186
+ * SymbolTable is not fully populated yet at this stage, so use the stub for safety.
187
+ * Implements the full {@link SymbolTableReader} surface so future extractor additions
188
+ * don't silently fall off an `as unknown as` cast. */
179
189
  const NOOP_SYMBOL_TABLE_SEQ = {
180
190
  lookupExact: () => undefined,
181
191
  lookupExactFull: () => undefined,
@@ -318,7 +328,7 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, onF
318
328
  nodeLabel === 'Property' ||
319
329
  nodeLabel === 'Function';
320
330
  const enclosingClassInfo = needsOwner
321
- ? cachedFindEnclosingClassInfo(nameNode || definitionNodeForRange, file.path)
331
+ ? cachedFindEnclosingClassInfo(nameNode || definitionNodeForRange, file.path, provider.resolveEnclosingOwner)
322
332
  : null;
323
333
  const enclosingClassId = enclosingClassInfo?.classId ?? null;
324
334
  // Qualify method/property IDs with enclosing class name to avoid collisions
@@ -339,19 +349,21 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, onF
339
349
  if (isMethodLike && definitionNode) {
340
350
  let enriched = false;
341
351
  if (provider.methodExtractor) {
342
- // Try class-based extraction (method inside a class/struct/trait body)
343
- const classNode = seqFindEnclosingClassNode(definitionNode);
344
- if (classNode) {
352
+ // Try class-based extraction (method inside a class/struct/trait body).
353
+ // Raw lookup (no resolveEnclosingOwner) so the method extractor sees
354
+ // the actual container node (e.g. singleton_class) for static detection.
355
+ const methodOwnerNode = seqFindEnclosingOwnerNode(definitionNode);
356
+ if (methodOwnerNode) {
345
357
  // Cache extract() results per class node to avoid re-traversing the
346
358
  // same class body for every method it contains (O(N) -> O(1) per hit).
347
- let result = seqMethodExtractCache.get(classNode.id);
359
+ let result = seqMethodExtractCache.get(methodOwnerNode.id);
348
360
  if (result === undefined) {
349
361
  result =
350
- provider.methodExtractor.extract(classNode, {
362
+ provider.methodExtractor.extract(methodOwnerNode, {
351
363
  filePath: file.path,
352
364
  language,
353
365
  }) ?? null;
354
- seqMethodExtractCache.set(classNode.id, result);
366
+ seqMethodExtractCache.set(methodOwnerNode.id, result);
355
367
  }
356
368
  if (result?.methods?.length) {
357
369
  const defLine = definitionNode.startPosition.row + 1;
@@ -362,7 +374,7 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, onF
362
374
  methodProps = buildMethodProps(info);
363
375
  seqDefMethodInfo = info;
364
376
  seqDefMethods = result.methods;
365
- seqClassNodeId = classNode.id;
377
+ seqClassNodeId = methodOwnerNode.id;
366
378
  }
367
379
  }
368
380
  }
@@ -443,7 +455,7 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, onF
443
455
  if (nodeLabel === 'Property' && definitionNode) {
444
456
  // FieldExtractor is the single source of truth when available
445
457
  if (provider.fieldExtractor && typeEnv) {
446
- const classNode = seqFindEnclosingClassNode(definitionNode);
458
+ const classNode = seqFindEnclosingOwnerNode(definitionNode, provider.resolveEnclosingOwner);
447
459
  if (classNode) {
448
460
  const fieldMap = seqGetFieldInfo(classNode, provider, {
449
461
  typeEnv,
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Phase: cobol
3
+ *
4
+ * Processes COBOL and JCL files via regex extraction (no tree-sitter).
5
+ *
6
+ * @deps structure
7
+ * @reads scannedFiles, allPaths (from structure phase)
8
+ * @writes graph (COBOL program/paragraph/section nodes, JCL job/step nodes)
9
+ */
10
+ import type { PipelinePhase } from './types.js';
11
+ export interface CobolOutput {
12
+ programs: number;
13
+ paragraphs: number;
14
+ sections: number;
15
+ }
16
+ export declare const cobolPhase: PipelinePhase<CobolOutput>;
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Phase: cobol
3
+ *
4
+ * Processes COBOL and JCL files via regex extraction (no tree-sitter).
5
+ *
6
+ * @deps structure
7
+ * @reads scannedFiles, allPaths (from structure phase)
8
+ * @writes graph (COBOL program/paragraph/section nodes, JCL job/step nodes)
9
+ */
10
+ import { getPhaseOutput } from './types.js';
11
+ import { processCobol, isCobolFile, isJclFile } from '../cobol-processor.js';
12
+ import { readFileContents } from '../filesystem-walker.js';
13
+ import { isDev } from '../utils/env.js';
14
+ export const cobolPhase = {
15
+ name: 'cobol',
16
+ deps: ['structure'],
17
+ async execute(ctx, deps) {
18
+ const { scannedFiles, allPathSet } = getPhaseOutput(deps, 'structure');
19
+ const cobolScanned = scannedFiles.filter((f) => isCobolFile(f.path) || isJclFile(f.path));
20
+ if (cobolScanned.length === 0) {
21
+ return { programs: 0, paragraphs: 0, sections: 0 };
22
+ }
23
+ const cobolContents = await readFileContents(ctx.repoPath, cobolScanned.map((f) => f.path));
24
+ const cobolFiles = cobolScanned
25
+ .filter((f) => cobolContents.has(f.path))
26
+ .map((f) => ({ path: f.path, content: cobolContents.get(f.path) }));
27
+ const cobolResult = processCobol(ctx.graph, cobolFiles, allPathSet);
28
+ if (isDev) {
29
+ console.log(` COBOL: ${cobolResult.programs} programs, ${cobolResult.paragraphs} paragraphs, ${cobolResult.sections} sections from ${cobolFiles.length} files`);
30
+ if (cobolResult.execSqlBlocks > 0 ||
31
+ cobolResult.execCicsBlocks > 0 ||
32
+ cobolResult.entryPoints > 0) {
33
+ console.log(` COBOL enriched: ${cobolResult.execSqlBlocks} SQL blocks, ${cobolResult.execCicsBlocks} CICS blocks, ${cobolResult.entryPoints} entry points, ${cobolResult.moves} moves, ${cobolResult.fileDeclarations} file declarations`);
34
+ }
35
+ if (cobolResult.jclJobs > 0) {
36
+ console.log(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`);
37
+ }
38
+ }
39
+ return {
40
+ programs: cobolResult.programs,
41
+ paragraphs: cobolResult.paragraphs,
42
+ sections: cobolResult.sections,
43
+ };
44
+ },
45
+ };
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Phase: communities
3
+ *
4
+ * Detects code communities via Leiden algorithm and creates
5
+ * Community nodes + MEMBER_OF edges.
6
+ *
7
+ * @deps mro
8
+ * @reads graph (all nodes and relationships)
9
+ * @writes graph (Community nodes, MEMBER_OF edges)
10
+ */
11
+ import type { PipelinePhase } from './types.js';
12
+ import { type CommunityDetectionResult } from '../community-processor.js';
13
+ export interface CommunitiesOutput {
14
+ communityResult: CommunityDetectionResult;
15
+ }
16
+ export declare const communitiesPhase: PipelinePhase<CommunitiesOutput>;
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Phase: communities
3
+ *
4
+ * Detects code communities via Leiden algorithm and creates
5
+ * Community nodes + MEMBER_OF edges.
6
+ *
7
+ * @deps mro
8
+ * @reads graph (all nodes and relationships)
9
+ * @writes graph (Community nodes, MEMBER_OF edges)
10
+ */
11
+ import { getPhaseOutput } from './types.js';
12
+ import { processCommunities } from '../community-processor.js';
13
+ import { isDev } from '../utils/env.js';
14
+ export const communitiesPhase = {
15
+ name: 'communities',
16
+ deps: ['mro', 'structure'],
17
+ async execute(ctx, deps) {
18
+ const { totalFiles } = getPhaseOutput(deps, 'structure');
19
+ ctx.onProgress({
20
+ phase: 'communities',
21
+ percent: 84,
22
+ message: 'Detecting code communities...',
23
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
24
+ });
25
+ const communityResult = await processCommunities(ctx.graph, (message, progress) => {
26
+ const communityProgress = 84 + progress * 0.09;
27
+ ctx.onProgress({
28
+ phase: 'communities',
29
+ percent: Math.round(communityProgress),
30
+ message,
31
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
32
+ });
33
+ });
34
+ if (isDev) {
35
+ console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
36
+ }
37
+ communityResult.communities.forEach((comm) => {
38
+ ctx.graph.addNode({
39
+ id: comm.id,
40
+ label: 'Community',
41
+ properties: {
42
+ name: comm.label,
43
+ filePath: '',
44
+ heuristicLabel: comm.heuristicLabel,
45
+ cohesion: comm.cohesion,
46
+ symbolCount: comm.symbolCount,
47
+ },
48
+ });
49
+ });
50
+ communityResult.memberships.forEach((membership) => {
51
+ ctx.graph.addRelationship({
52
+ id: `${membership.nodeId}_member_of_${membership.communityId}`,
53
+ type: 'MEMBER_OF',
54
+ sourceId: membership.nodeId,
55
+ targetId: membership.communityId,
56
+ confidence: 1.0,
57
+ reason: 'leiden-algorithm',
58
+ });
59
+ });
60
+ return { communityResult };
61
+ },
62
+ };
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Cross-file binding propagation — extracted from pipeline.ts.
3
+ *
4
+ * Seeds downstream files with resolved type bindings from upstream exports.
5
+ * Files are processed in topological import order so upstream bindings
6
+ * are available when downstream files are re-resolved.
7
+ *
8
+ * @module
9
+ */
10
+ import type { createResolutionContext } from '../model/resolution-context.js';
11
+ import { type PipelineProgress } from '../../../_shared/index.js';
12
+ import type { KnowledgeGraph } from '../../graph/types.js';
13
+ /**
14
+ * Cross-file binding propagation.
15
+ * Returns the number of files re-processed.
16
+ */
17
+ export declare function runCrossFileBindingPropagation(graph: KnowledgeGraph, ctx: ReturnType<typeof createResolutionContext>, parseExportedTypeMap: ReadonlyMap<string, ReadonlyMap<string, string>>, allPathSet: ReadonlySet<string>, totalFiles: number, repoPath: string, pipelineStart: number, onProgress: (progress: PipelineProgress) => void): Promise<number>;
@@ -0,0 +1,156 @@
1
+ /**
2
+ * Cross-file binding propagation — extracted from pipeline.ts.
3
+ *
4
+ * Seeds downstream files with resolved type bindings from upstream exports.
5
+ * Files are processed in topological import order so upstream bindings
6
+ * are available when downstream files are re-resolved.
7
+ *
8
+ * @module
9
+ */
10
+ import { processCalls, buildImportedReturnTypes, buildImportedRawReturnTypes, } from '../call-processor.js';
11
+ import { createASTCache } from '../ast-cache.js';
12
+ import { getLanguageFromFilename } from '../../../_shared/index.js';
13
+ import { readFileContents } from '../filesystem-walker.js';
14
+ import { isLanguageAvailable } from '../../tree-sitter/parser-loader.js';
15
+ import { topologicalLevelSort } from '../utils/graph-sort.js';
16
+ import { isDev } from '../utils/env.js';
17
+ /** Max AST trees to keep in LRU cache for cross-file binding propagation. */
18
+ const AST_CACHE_CAP = 50;
19
+ /** Minimum percentage of files that must benefit from cross-file seeding. */
20
+ const CROSS_FILE_SKIP_THRESHOLD = 0.03;
21
+ /** Hard cap on files re-processed during cross-file propagation. */
22
+ const MAX_CROSS_FILE_REPROCESS = 2000;
23
+ /**
24
+ * Cross-file binding propagation.
25
+ * Returns the number of files re-processed.
26
+ */
27
+ export async function runCrossFileBindingPropagation(graph, ctx, parseExportedTypeMap, allPathSet, totalFiles, repoPath, pipelineStart, onProgress) {
28
+ if (parseExportedTypeMap.size === 0 || ctx.namedImportMap.size === 0)
29
+ return 0;
30
+ // Build a local mutable working copy. Per-file re-resolution below mutates
31
+ // this map (each `processCalls` writes that file's exports back into it so
32
+ // later iterations in the same level/loop can resolve transitive bindings).
33
+ // Owning a local copy here keeps `ParseOutput.exportedTypeMap` truly
34
+ // read-only at the phase boundary — no cast, no shared-mutable handoff.
35
+ const exportedTypeMap = new Map();
36
+ for (const [fp, exports] of parseExportedTypeMap) {
37
+ exportedTypeMap.set(fp, new Map(exports));
38
+ }
39
+ const { levels, cycleCount } = topologicalLevelSort(ctx.importMap);
40
+ if (isDev && cycleCount > 0) {
41
+ console.log(`🔄 ${cycleCount} files in import cycles (processed last in undefined order)`);
42
+ }
43
+ let filesWithGaps = 0;
44
+ const gapThreshold = Math.max(1, Math.ceil(totalFiles * CROSS_FILE_SKIP_THRESHOLD));
45
+ outer: for (const level of levels) {
46
+ for (const filePath of level) {
47
+ const imports = ctx.namedImportMap.get(filePath);
48
+ if (!imports)
49
+ continue;
50
+ for (const [, binding] of imports) {
51
+ const upstream = exportedTypeMap.get(binding.sourcePath);
52
+ if (upstream?.has(binding.exportedName)) {
53
+ filesWithGaps++;
54
+ break;
55
+ }
56
+ const def = ctx.model.symbols.lookupExactFull(binding.sourcePath, binding.exportedName);
57
+ if (def?.returnType) {
58
+ filesWithGaps++;
59
+ break;
60
+ }
61
+ }
62
+ if (filesWithGaps >= gapThreshold)
63
+ break outer;
64
+ }
65
+ }
66
+ const gapRatio = totalFiles > 0 ? filesWithGaps / totalFiles : 0;
67
+ if (gapRatio < CROSS_FILE_SKIP_THRESHOLD && filesWithGaps < gapThreshold) {
68
+ if (isDev) {
69
+ console.log(`⏭️ Cross-file re-resolution skipped (${filesWithGaps}/${totalFiles} files, ${(gapRatio * 100).toFixed(1)}% < ${CROSS_FILE_SKIP_THRESHOLD * 100}% threshold)`);
70
+ }
71
+ return 0;
72
+ }
73
+ // Intentionally reports `phase: 'parsing'` rather than a separate
74
+ // 'crossFile' phase: cross-file re-resolution is logically a continuation of
75
+ // the parsing/resolution work and is bucketed under "parsing" in any
76
+ // telemetry that groups events by phase name. Kept consistent with the
77
+ // upstream `parse` phase's progress events so the UI shows one continuous
78
+ // progress segment instead of a phase flicker. If a future change splits
79
+ // this out into its own phase, also rename `parse-impl.ts` per-chunk
80
+ // progress events accordingly.
81
+ onProgress({
82
+ phase: 'parsing',
83
+ percent: 82,
84
+ message: `Cross-file type propagation (${filesWithGaps}+ files)...`,
85
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
86
+ });
87
+ let crossFileResolved = 0;
88
+ const crossFileStart = Date.now();
89
+ const astCache = createASTCache(AST_CACHE_CAP);
90
+ for (const level of levels) {
91
+ const levelCandidates = [];
92
+ for (const filePath of level) {
93
+ if (crossFileResolved + levelCandidates.length >= MAX_CROSS_FILE_REPROCESS)
94
+ break;
95
+ const imports = ctx.namedImportMap.get(filePath);
96
+ if (!imports)
97
+ continue;
98
+ const seeded = new Map();
99
+ for (const [localName, binding] of imports) {
100
+ const upstream = exportedTypeMap.get(binding.sourcePath);
101
+ if (upstream) {
102
+ const type = upstream.get(binding.exportedName);
103
+ if (type)
104
+ seeded.set(localName, type);
105
+ }
106
+ }
107
+ const importedReturns = buildImportedReturnTypes(filePath, ctx.namedImportMap, ctx.model.symbols);
108
+ const importedRawReturns = buildImportedRawReturnTypes(filePath, ctx.namedImportMap, ctx.model.symbols);
109
+ if (seeded.size === 0 && importedReturns.size === 0)
110
+ continue;
111
+ if (!allPathSet.has(filePath))
112
+ continue;
113
+ const lang = getLanguageFromFilename(filePath);
114
+ if (!lang || !isLanguageAvailable(lang))
115
+ continue;
116
+ levelCandidates.push({ filePath, seeded, importedReturns, importedRawReturns });
117
+ }
118
+ if (levelCandidates.length === 0)
119
+ continue;
120
+ const levelPaths = levelCandidates.map((c) => c.filePath);
121
+ const contentMap = await readFileContents(repoPath, levelPaths);
122
+ for (const { filePath, seeded, importedReturns, importedRawReturns } of levelCandidates) {
123
+ const content = contentMap.get(filePath);
124
+ if (!content)
125
+ continue;
126
+ const reFile = [{ path: filePath, content }];
127
+ const bindings = new Map();
128
+ if (seeded.size > 0)
129
+ bindings.set(filePath, seeded);
130
+ const importedReturnTypesMap = new Map();
131
+ if (importedReturns.size > 0) {
132
+ importedReturnTypesMap.set(filePath, importedReturns);
133
+ }
134
+ const importedRawReturnTypesMap = new Map();
135
+ if (importedRawReturns.size > 0) {
136
+ importedRawReturnTypesMap.set(filePath, importedRawReturns);
137
+ }
138
+ await processCalls(graph, reFile, astCache, ctx, undefined, exportedTypeMap, bindings.size > 0 ? bindings : undefined, importedReturnTypesMap.size > 0 ? importedReturnTypesMap : undefined, importedRawReturnTypesMap.size > 0 ? importedRawReturnTypesMap : undefined);
139
+ crossFileResolved++;
140
+ }
141
+ if (crossFileResolved >= MAX_CROSS_FILE_REPROCESS) {
142
+ if (isDev)
143
+ console.log(`⚠️ Cross-file re-resolution capped at ${MAX_CROSS_FILE_REPROCESS} files`);
144
+ break;
145
+ }
146
+ }
147
+ astCache.clear();
148
+ if (isDev) {
149
+ const elapsed = Date.now() - crossFileStart;
150
+ const totalElapsed = Date.now() - pipelineStart;
151
+ const reResolutionPct = totalElapsed > 0 ? ((elapsed / totalElapsed) * 100).toFixed(1) : '0';
152
+ console.log(`🔗 Cross-file re-resolution: ${crossFileResolved} candidates re-processed` +
153
+ ` in ${elapsed}ms (${reResolutionPct}% of total ingestion time so far)`);
154
+ }
155
+ return crossFileResolved;
156
+ }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Phase: crossFile
3
+ *
4
+ * Cross-file binding propagation: seeds downstream files with resolved
5
+ * type bindings from upstream exports. Files are processed in topological
6
+ * import order so upstream bindings are available when downstream files
7
+ * are re-resolved.
8
+ *
9
+ * @deps parse, routes, tools, orm (waits for all post-parse phases)
10
+ * @reads exportedTypeMap, allPaths, totalFiles
11
+ * @writes graph (refined CALLS edges via re-resolution)
12
+ *
13
+ * **Accumulator ownership / residual risk.** This phase is the sole
14
+ * disposer of the `BindingAccumulator` produced by `parse`. The dispose
15
+ * call lives inside a `finally` block in `execute()` so that a throw
16
+ * inside `runCrossFileBindingPropagation` (or anywhere else in the body)
17
+ * still releases the accumulator's heap. The dependency declaration
18
+ * (`deps: ['parse', 'routes', 'tools', 'orm']`) plus the runner's
19
+ * topological scheduling guarantee that every other consumer of the
20
+ * accumulator has finished before this phase starts, so disposing here
21
+ * is correct.
22
+ *
23
+ * The residual risk is intentional and accepted: if a future phase is
24
+ * inserted between `parse` and `crossFile` that reads the accumulator
25
+ * and throws, `crossFile.execute()` never runs and the accumulator
26
+ * leaks. Any author inserting a new phase between `parse` and
27
+ * `crossFile` MUST either route the new phase's output through
28
+ * `crossFile` (so disposal still happens here) or take ownership of
29
+ * the accumulator's lifetime explicitly (its own try/finally that
30
+ * disposes on the failure path). Do not silently rely on the GC.
31
+ */
32
+ import type { PipelinePhase } from './types.js';
33
+ export interface CrossFileOutput {
34
+ /** Number of files re-processed during cross-file propagation. */
35
+ filesReprocessed: number;
36
+ }
37
+ export declare const crossFilePhase: PipelinePhase<CrossFileOutput>;
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Phase: crossFile
3
+ *
4
+ * Cross-file binding propagation: seeds downstream files with resolved
5
+ * type bindings from upstream exports. Files are processed in topological
6
+ * import order so upstream bindings are available when downstream files
7
+ * are re-resolved.
8
+ *
9
+ * @deps parse, routes, tools, orm (waits for all post-parse phases)
10
+ * @reads exportedTypeMap, allPaths, totalFiles
11
+ * @writes graph (refined CALLS edges via re-resolution)
12
+ *
13
+ * **Accumulator ownership / residual risk.** This phase is the sole
14
+ * disposer of the `BindingAccumulator` produced by `parse`. The dispose
15
+ * call lives inside a `finally` block in `execute()` so that a throw
16
+ * inside `runCrossFileBindingPropagation` (or anywhere else in the body)
17
+ * still releases the accumulator's heap. The dependency declaration
18
+ * (`deps: ['parse', 'routes', 'tools', 'orm']`) plus the runner's
19
+ * topological scheduling guarantee that every other consumer of the
20
+ * accumulator has finished before this phase starts, so disposing here
21
+ * is correct.
22
+ *
23
+ * The residual risk is intentional and accepted: if a future phase is
24
+ * inserted between `parse` and `crossFile` that reads the accumulator
25
+ * and throws, `crossFile.execute()` never runs and the accumulator
26
+ * leaks. Any author inserting a new phase between `parse` and
27
+ * `crossFile` MUST either route the new phase's output through
28
+ * `crossFile` (so disposal still happens here) or take ownership of
29
+ * the accumulator's lifetime explicitly (its own try/finally that
30
+ * disposes on the failure path). Do not silently rely on the GC.
31
+ */
32
+ import { getPhaseOutput } from './types.js';
33
+ import { runCrossFileBindingPropagation } from './cross-file-impl.js';
34
+ import { isDev } from '../utils/env.js';
35
+ export const crossFilePhase = {
36
+ name: 'crossFile',
37
+ deps: ['parse', 'routes', 'tools', 'orm'],
38
+ async execute(ctx, deps) {
39
+ const { exportedTypeMap, allPathSet, totalFiles, bindingAccumulator, resolutionContext } = getPhaseOutput(deps, 'parse');
40
+ try {
41
+ // Telemetry must run BEFORE dispose: totalBindings, fileCount, and
42
+ // estimateMemoryBytes() all return 0 once dispose() clears the
43
+ // internal maps.
44
+ if (isDev) {
45
+ if (bindingAccumulator.totalBindings > 0) {
46
+ const memKB = Math.round(bindingAccumulator.estimateMemoryBytes() / 1024);
47
+ console.log(`📦 BindingAccumulator: ${bindingAccumulator.totalBindings} bindings across ${bindingAccumulator.fileCount} files (~${memKB} KB)`);
48
+ }
49
+ else if (totalFiles > 0) {
50
+ console.log(`📦 BindingAccumulator: EMPTY — 0 bindings across 0 files despite ${totalFiles} parsed files. If the codebase has typed bindings, this indicates an upstream regression.`);
51
+ }
52
+ }
53
+ const filesReprocessed = await runCrossFileBindingPropagation(ctx.graph, resolutionContext, exportedTypeMap, allPathSet, totalFiles, ctx.repoPath, ctx.pipelineStart, ctx.onProgress);
54
+ return { filesReprocessed };
55
+ }
56
+ finally {
57
+ // Single dispose call site for the accumulator — runs on both the
58
+ // happy path and the throw path so the heap is always released
59
+ // before the runner moves on (or surfaces the error).
60
+ bindingAccumulator.dispose();
61
+ }
62
+ },
63
+ };
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Pipeline Phases — barrel export.
3
+ *
4
+ * Exports all phases, the runner, types, and shared utilities
5
+ * for the ingestion pipeline.
6
+ */
7
+ export { scanPhase, type ScanOutput } from './scan.js';
8
+ export { structurePhase, type StructureOutput } from './structure.js';
9
+ export { markdownPhase, type MarkdownOutput } from './markdown.js';
10
+ export { cobolPhase, type CobolOutput } from './cobol.js';
11
+ export { parsePhase, type ParseOutput } from './parse.js';
12
+ export { routesPhase, type RoutesOutput, type RouteEntry } from './routes.js';
13
+ export { toolsPhase, type ToolsOutput, type ToolDef } from './tools.js';
14
+ export { ormPhase, type ORMOutput } from './orm.js';
15
+ export { crossFilePhase, type CrossFileOutput } from './cross-file.js';
16
+ export { mroPhase, type MROOutput } from './mro.js';
17
+ export { communitiesPhase, type CommunitiesOutput } from './communities.js';
18
+ export { processesPhase, type ProcessesOutput } from './processes.js';
19
+ export { runPipeline } from './runner.js';
20
+ export type { PipelinePhase, PipelineContext, PhaseResult } from './types.js';
21
+ export { getPhaseOutput } from './types.js';
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Pipeline Phases — barrel export.
3
+ *
4
+ * Exports all phases, the runner, types, and shared utilities
5
+ * for the ingestion pipeline.
6
+ */
7
+ // ── Phase exports (in dependency order) ────────────────────────────────────
8
+ export { scanPhase } from './scan.js';
9
+ export { structurePhase } from './structure.js';
10
+ export { markdownPhase } from './markdown.js';
11
+ export { cobolPhase } from './cobol.js';
12
+ export { parsePhase } from './parse.js';
13
+ export { routesPhase } from './routes.js';
14
+ export { toolsPhase } from './tools.js';
15
+ export { ormPhase } from './orm.js';
16
+ export { crossFilePhase } from './cross-file.js';
17
+ export { mroPhase } from './mro.js';
18
+ export { communitiesPhase } from './communities.js';
19
+ export { processesPhase } from './processes.js';
20
+ // ── Infrastructure ─────────────────────────────────────────────────────────
21
+ export { runPipeline } from './runner.js';
22
+ export { getPhaseOutput } from './types.js';
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Phase: markdown
3
+ *
4
+ * Processes Markdown/MDX files to extract headings and cross-links.
5
+ *
6
+ * @deps structure
7
+ * @reads scannedFiles, allPaths (from structure phase)
8
+ * @writes graph (Markdown section nodes + cross-link edges)
9
+ */
10
+ import type { PipelinePhase } from './types.js';
11
+ export interface MarkdownOutput {
12
+ /** Number of markdown sections extracted. */
13
+ sections: number;
14
+ /** Number of cross-links created. */
15
+ links: number;
16
+ }
17
+ export declare const markdownPhase: PipelinePhase<MarkdownOutput>;