@zuvia-software-solutions/code-mapper 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/README.md +215 -0
  2. package/dist/cli/ai-context.d.ts +19 -0
  3. package/dist/cli/ai-context.js +168 -0
  4. package/dist/cli/analyze.d.ts +7 -0
  5. package/dist/cli/analyze.js +325 -0
  6. package/dist/cli/augment.d.ts +7 -0
  7. package/dist/cli/augment.js +27 -0
  8. package/dist/cli/clean.d.ts +5 -0
  9. package/dist/cli/clean.js +56 -0
  10. package/dist/cli/eval-server.d.ts +25 -0
  11. package/dist/cli/eval-server.js +365 -0
  12. package/dist/cli/index.d.ts +6 -0
  13. package/dist/cli/index.js +102 -0
  14. package/dist/cli/lazy-action.d.ts +6 -0
  15. package/dist/cli/lazy-action.js +19 -0
  16. package/dist/cli/list.d.ts +2 -0
  17. package/dist/cli/list.js +27 -0
  18. package/dist/cli/mcp.d.ts +8 -0
  19. package/dist/cli/mcp.js +35 -0
  20. package/dist/cli/refresh.d.ts +12 -0
  21. package/dist/cli/refresh.js +165 -0
  22. package/dist/cli/serve.d.ts +5 -0
  23. package/dist/cli/serve.js +8 -0
  24. package/dist/cli/setup.d.ts +6 -0
  25. package/dist/cli/setup.js +218 -0
  26. package/dist/cli/status.d.ts +2 -0
  27. package/dist/cli/status.js +33 -0
  28. package/dist/cli/tool.d.ts +28 -0
  29. package/dist/cli/tool.js +87 -0
  30. package/dist/config/ignore-service.d.ts +32 -0
  31. package/dist/config/ignore-service.js +282 -0
  32. package/dist/config/supported-languages.d.ts +23 -0
  33. package/dist/config/supported-languages.js +52 -0
  34. package/dist/core/augmentation/engine.d.ts +22 -0
  35. package/dist/core/augmentation/engine.js +232 -0
  36. package/dist/core/embeddings/embedder.d.ts +35 -0
  37. package/dist/core/embeddings/embedder.js +171 -0
  38. package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
  39. package/dist/core/embeddings/embedding-pipeline.js +402 -0
  40. package/dist/core/embeddings/index.d.ts +5 -0
  41. package/dist/core/embeddings/index.js +6 -0
  42. package/dist/core/embeddings/text-generator.d.ts +20 -0
  43. package/dist/core/embeddings/text-generator.js +159 -0
  44. package/dist/core/embeddings/types.d.ts +60 -0
  45. package/dist/core/embeddings/types.js +23 -0
  46. package/dist/core/graph/graph.d.ts +4 -0
  47. package/dist/core/graph/graph.js +65 -0
  48. package/dist/core/graph/types.d.ts +69 -0
  49. package/dist/core/graph/types.js +3 -0
  50. package/dist/core/incremental/child-process.d.ts +8 -0
  51. package/dist/core/incremental/child-process.js +649 -0
  52. package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
  53. package/dist/core/incremental/refresh-coordinator.js +147 -0
  54. package/dist/core/incremental/types.d.ts +78 -0
  55. package/dist/core/incremental/types.js +153 -0
  56. package/dist/core/incremental/watcher.d.ts +63 -0
  57. package/dist/core/incremental/watcher.js +338 -0
  58. package/dist/core/ingestion/ast-cache.d.ts +12 -0
  59. package/dist/core/ingestion/ast-cache.js +34 -0
  60. package/dist/core/ingestion/call-processor.d.ts +34 -0
  61. package/dist/core/ingestion/call-processor.js +937 -0
  62. package/dist/core/ingestion/call-routing.d.ts +40 -0
  63. package/dist/core/ingestion/call-routing.js +97 -0
  64. package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
  65. package/dist/core/ingestion/cluster-enricher.js +151 -0
  66. package/dist/core/ingestion/community-processor.d.ts +26 -0
  67. package/dist/core/ingestion/community-processor.js +272 -0
  68. package/dist/core/ingestion/constants.d.ts +5 -0
  69. package/dist/core/ingestion/constants.js +8 -0
  70. package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
  71. package/dist/core/ingestion/entry-point-scoring.js +317 -0
  72. package/dist/core/ingestion/export-detection.d.ts +11 -0
  73. package/dist/core/ingestion/export-detection.js +203 -0
  74. package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
  75. package/dist/core/ingestion/filesystem-walker.js +64 -0
  76. package/dist/core/ingestion/framework-detection.d.ts +42 -0
  77. package/dist/core/ingestion/framework-detection.js +405 -0
  78. package/dist/core/ingestion/heritage-processor.d.ts +15 -0
  79. package/dist/core/ingestion/heritage-processor.js +237 -0
  80. package/dist/core/ingestion/import-processor.d.ts +31 -0
  81. package/dist/core/ingestion/import-processor.js +416 -0
  82. package/dist/core/ingestion/language-config.d.ts +32 -0
  83. package/dist/core/ingestion/language-config.js +161 -0
  84. package/dist/core/ingestion/mro-processor.d.ts +32 -0
  85. package/dist/core/ingestion/mro-processor.js +343 -0
  86. package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
  87. package/dist/core/ingestion/named-binding-extraction.js +343 -0
  88. package/dist/core/ingestion/parsing-processor.d.ts +20 -0
  89. package/dist/core/ingestion/parsing-processor.js +282 -0
  90. package/dist/core/ingestion/pipeline.d.ts +3 -0
  91. package/dist/core/ingestion/pipeline.js +416 -0
  92. package/dist/core/ingestion/process-processor.d.ts +42 -0
  93. package/dist/core/ingestion/process-processor.js +357 -0
  94. package/dist/core/ingestion/resolution-context.d.ts +40 -0
  95. package/dist/core/ingestion/resolution-context.js +171 -0
  96. package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
  97. package/dist/core/ingestion/resolvers/csharp.js +101 -0
  98. package/dist/core/ingestion/resolvers/go.d.ts +8 -0
  99. package/dist/core/ingestion/resolvers/go.js +33 -0
  100. package/dist/core/ingestion/resolvers/index.d.ts +14 -0
  101. package/dist/core/ingestion/resolvers/index.js +10 -0
  102. package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
  103. package/dist/core/ingestion/resolvers/jvm.js +74 -0
  104. package/dist/core/ingestion/resolvers/php.d.ts +7 -0
  105. package/dist/core/ingestion/resolvers/php.js +30 -0
  106. package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
  107. package/dist/core/ingestion/resolvers/ruby.js +13 -0
  108. package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
  109. package/dist/core/ingestion/resolvers/rust.js +62 -0
  110. package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
  111. package/dist/core/ingestion/resolvers/standard.js +144 -0
  112. package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
  113. package/dist/core/ingestion/resolvers/utils.js +113 -0
  114. package/dist/core/ingestion/structure-processor.d.ts +4 -0
  115. package/dist/core/ingestion/structure-processor.js +39 -0
  116. package/dist/core/ingestion/symbol-table.d.ts +34 -0
  117. package/dist/core/ingestion/symbol-table.js +48 -0
  118. package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
  119. package/dist/core/ingestion/tree-sitter-queries.js +691 -0
  120. package/dist/core/ingestion/type-env.d.ts +52 -0
  121. package/dist/core/ingestion/type-env.js +349 -0
  122. package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
  123. package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
  124. package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
  125. package/dist/core/ingestion/type-extractors/csharp.js +224 -0
  126. package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
  127. package/dist/core/ingestion/type-extractors/go.js +261 -0
  128. package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
  129. package/dist/core/ingestion/type-extractors/index.js +30 -0
  130. package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
  131. package/dist/core/ingestion/type-extractors/jvm.js +386 -0
  132. package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
  133. package/dist/core/ingestion/type-extractors/php.js +280 -0
  134. package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
  135. package/dist/core/ingestion/type-extractors/python.js +175 -0
  136. package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
  137. package/dist/core/ingestion/type-extractors/ruby.js +218 -0
  138. package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
  139. package/dist/core/ingestion/type-extractors/rust.js +290 -0
  140. package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
  141. package/dist/core/ingestion/type-extractors/shared.js +322 -0
  142. package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
  143. package/dist/core/ingestion/type-extractors/swift.js +140 -0
  144. package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
  145. package/dist/core/ingestion/type-extractors/types.js +4 -0
  146. package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
  147. package/dist/core/ingestion/type-extractors/typescript.js +227 -0
  148. package/dist/core/ingestion/utils.d.ts +73 -0
  149. package/dist/core/ingestion/utils.js +992 -0
  150. package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
  151. package/dist/core/ingestion/workers/parse-worker.js +1055 -0
  152. package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
  153. package/dist/core/ingestion/workers/worker-pool.js +123 -0
  154. package/dist/core/lbug/csv-generator.d.ts +28 -0
  155. package/dist/core/lbug/csv-generator.js +355 -0
  156. package/dist/core/lbug/lbug-adapter.d.ts +96 -0
  157. package/dist/core/lbug/lbug-adapter.js +753 -0
  158. package/dist/core/lbug/schema.d.ts +46 -0
  159. package/dist/core/lbug/schema.js +402 -0
  160. package/dist/core/search/bm25-index.d.ts +20 -0
  161. package/dist/core/search/bm25-index.js +123 -0
  162. package/dist/core/search/hybrid-search.d.ts +32 -0
  163. package/dist/core/search/hybrid-search.js +131 -0
  164. package/dist/core/search/query-cache.d.ts +18 -0
  165. package/dist/core/search/query-cache.js +47 -0
  166. package/dist/core/search/query-expansion.d.ts +19 -0
  167. package/dist/core/search/query-expansion.js +75 -0
  168. package/dist/core/search/reranker.d.ts +29 -0
  169. package/dist/core/search/reranker.js +122 -0
  170. package/dist/core/search/types.d.ts +154 -0
  171. package/dist/core/search/types.js +51 -0
  172. package/dist/core/semantic/tsgo-service.d.ts +67 -0
  173. package/dist/core/semantic/tsgo-service.js +355 -0
  174. package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
  175. package/dist/core/tree-sitter/parser-loader.js +71 -0
  176. package/dist/lib/memory-guard.d.ts +35 -0
  177. package/dist/lib/memory-guard.js +70 -0
  178. package/dist/lib/utils.d.ts +3 -0
  179. package/dist/lib/utils.js +6 -0
  180. package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
  181. package/dist/mcp/compatible-stdio-transport.js +209 -0
  182. package/dist/mcp/core/embedder.d.ts +24 -0
  183. package/dist/mcp/core/embedder.js +168 -0
  184. package/dist/mcp/core/lbug-adapter.d.ts +29 -0
  185. package/dist/mcp/core/lbug-adapter.js +330 -0
  186. package/dist/mcp/local/local-backend.d.ts +188 -0
  187. package/dist/mcp/local/local-backend.js +2759 -0
  188. package/dist/mcp/resources.d.ts +22 -0
  189. package/dist/mcp/resources.js +379 -0
  190. package/dist/mcp/server.d.ts +10 -0
  191. package/dist/mcp/server.js +217 -0
  192. package/dist/mcp/staleness.d.ts +10 -0
  193. package/dist/mcp/staleness.js +25 -0
  194. package/dist/mcp/tools.d.ts +21 -0
  195. package/dist/mcp/tools.js +202 -0
  196. package/dist/server/api.d.ts +5 -0
  197. package/dist/server/api.js +340 -0
  198. package/dist/server/mcp-http.d.ts +7 -0
  199. package/dist/server/mcp-http.js +95 -0
  200. package/dist/storage/git.d.ts +6 -0
  201. package/dist/storage/git.js +35 -0
  202. package/dist/storage/repo-manager.d.ts +87 -0
  203. package/dist/storage/repo-manager.js +249 -0
  204. package/dist/types/pipeline.d.ts +35 -0
  205. package/dist/types/pipeline.js +20 -0
  206. package/hooks/claude/code-mapper-hook.cjs +238 -0
  207. package/hooks/claude/pre-tool-use.sh +79 -0
  208. package/hooks/claude/session-start.sh +42 -0
  209. package/models/mlx-embedder.py +185 -0
  210. package/package.json +100 -0
  211. package/scripts/patch-tree-sitter-swift.cjs +74 -0
  212. package/vendor/leiden/index.cjs +355 -0
  213. package/vendor/leiden/utils.cjs +392 -0
@@ -0,0 +1,416 @@
1
+ // code-mapper/src/core/ingestion/pipeline.ts
2
+ /** @file pipeline.ts @description Main ingestion pipeline that orchestrates scanning, parsing, resolution, community detection, and process detection across chunked file batches */
3
+ import { createKnowledgeGraph } from '../graph/graph.js';
4
+ import { processStructure } from './structure-processor.js';
5
+ import { processParsing } from './parsing-processor.js';
6
+ import { processImports, processImportsFromExtracted, buildImportResolutionContext } from './import-processor.js';
7
+ import { processCalls, processCallsFromExtracted, processRoutesFromExtracted, createDependsOnEdges, createProvidesEdges } from './call-processor.js';
8
+ import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
9
+ import { computeMRO } from './mro-processor.js';
10
+ import { processCommunities } from './community-processor.js';
11
+ import { processProcesses } from './process-processor.js';
12
+ import { createResolutionContext } from './resolution-context.js';
13
+ import { createASTCache } from './ast-cache.js';
14
+ import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
15
+ import { getLanguageFromFilename } from './utils.js';
16
+ import { isLanguageAvailable } from '../tree-sitter/parser-loader.js';
17
+ import { createWorkerPool } from './workers/worker-pool.js';
18
+ import fs from 'node:fs';
19
+ import path from 'node:path';
20
+ import { fileURLToPath, pathToFileURL } from 'node:url';
21
+ import { memoryGuard } from '../../lib/memory-guard.js';
22
+ const isDev = process.env.NODE_ENV === 'development';
23
+ // Default chunk budget — used when memory is plentiful.
24
+ // Under memory pressure, adaptiveBatchSize() shrinks this automatically.
25
+ const DEFAULT_CHUNK_BYTE_BUDGET = 50 * 1024 * 1024;
26
+ // Estimated peak working memory multiplier per byte of source
27
+ // (ASTs, symbol table entries, extracted data per chunk)
28
+ const WORKING_MEMORY_MULTIPLIER = 20;
29
+ // Max AST trees to keep in LRU cache
30
+ const AST_CACHE_CAP = 50;
31
+ export const runPipelineFromRepo = async (repoPath, onProgress) => {
32
+ const graph = createKnowledgeGraph();
33
+ const ctx = createResolutionContext();
34
+ const symbolTable = ctx.symbols;
35
+ let astCache = createASTCache(AST_CACHE_CAP);
36
+ const cleanup = () => {
37
+ astCache.clear();
38
+ ctx.clear();
39
+ };
40
+ try {
41
+ // Phase 1: Scan paths only (no content read)
42
+ onProgress({
43
+ phase: 'extracting',
44
+ percent: 0,
45
+ message: 'Scanning repository...',
46
+ });
47
+ const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
48
+ const scanProgress = Math.round((current / total) * 15);
49
+ onProgress({
50
+ phase: 'extracting',
51
+ percent: scanProgress,
52
+ message: 'Scanning repository...',
53
+ detail: filePath,
54
+ stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
55
+ });
56
+ });
57
+ const totalFiles = scannedFiles.length;
58
+ onProgress({
59
+ phase: 'extracting',
60
+ percent: 15,
61
+ message: 'Repository scanned successfully',
62
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
63
+ });
64
+ // Phase 2: Structure (paths only, no content needed)
65
+ onProgress({
66
+ phase: 'structure',
67
+ percent: 15,
68
+ message: 'Analyzing project structure...',
69
+ stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
70
+ });
71
+ const allPaths = scannedFiles.map(f => f.path);
72
+ processStructure(graph, allPaths);
73
+ onProgress({
74
+ phase: 'structure',
75
+ percent: 20,
76
+ message: 'Project structure analyzed',
77
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
78
+ });
79
+ // Phase 3+4: Chunked read + parse
80
+ // Group parseable files into byte-budget chunks (~20MB at a time)
81
+ // Each chunk is: read -> parse -> extract -> free
82
+ const parseableScanned = scannedFiles.filter(f => {
83
+ const lang = getLanguageFromFilename(f.path);
84
+ return lang && isLanguageAvailable(lang);
85
+ });
86
+ // Log files skipped due to unavailable parsers
87
+ const skippedByLang = new Map();
88
+ for (const f of scannedFiles) {
89
+ const lang = getLanguageFromFilename(f.path);
90
+ if (lang && !isLanguageAvailable(lang)) {
91
+ skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
92
+ }
93
+ }
94
+ for (const [lang, count] of skippedByLang) {
95
+ console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
96
+ }
97
+ const totalParseable = parseableScanned.length;
98
+ if (totalParseable === 0) {
99
+ onProgress({
100
+ phase: 'parsing',
101
+ percent: 82,
102
+ message: 'No parseable files found — skipping parsing phase',
103
+ stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
104
+ });
105
+ }
106
+ // Build chunks based on adaptive byte budget — shrinks under memory pressure
107
+ const chunkBudget = memoryGuard.adaptiveBatchSize(WORKING_MEMORY_MULTIPLIER, DEFAULT_CHUNK_BYTE_BUDGET);
108
+ const chunks = [];
109
+ let currentChunk = [];
110
+ let currentBytes = 0;
111
+ for (const file of parseableScanned) {
112
+ if (currentChunk.length > 0 && currentBytes + file.size > chunkBudget) {
113
+ chunks.push(currentChunk);
114
+ currentChunk = [];
115
+ currentBytes = 0;
116
+ }
117
+ currentChunk.push(file.path);
118
+ currentBytes += file.size;
119
+ }
120
+ if (currentChunk.length > 0)
121
+ chunks.push(currentChunk);
122
+ const numChunks = chunks.length;
123
+ if (isDev) {
124
+ const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
125
+ console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${Math.round(chunkBudget / (1024 * 1024))}MB budget (${memoryGuard.summary()})`);
126
+ }
127
+ onProgress({
128
+ phase: 'parsing',
129
+ percent: 20,
130
+ message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
131
+ stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
132
+ });
133
+ // Create worker pool once, reused across chunks
134
+ let workerPool;
135
+ try {
136
+ let workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
137
+ // Under vitest, import.meta.url points to src/ where no .js exists;
138
+ // fall back to compiled dist/ worker for real worker threads
139
+ const thisDir = fileURLToPath(new URL('.', import.meta.url));
140
+ if (!fs.existsSync(fileURLToPath(workerUrl))) {
141
+ const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
142
+ if (fs.existsSync(distWorker)) {
143
+ workerUrl = pathToFileURL(distWorker);
144
+ }
145
+ }
146
+ workerPool = createWorkerPool(workerUrl);
147
+ }
148
+ catch (err) {
149
+ if (isDev)
150
+ console.warn('Worker pool creation failed, using sequential fallback:', err.message);
151
+ }
152
+ let filesParsedSoFar = 0;
153
+ // AST cache sized for one chunk (used by sequential fallback for import/call/heritage)
154
+ const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
155
+ astCache = createASTCache(maxChunkFiles);
156
+ // Build import resolution context once; reused across all chunks to avoid
157
+ // rebuilding O(files * path_depth) structures
158
+ const importCtx = buildImportResolutionContext(allPaths);
159
+ const allPathObjects = allPaths.map(p => ({ path: p }));
160
+ // Two-phase: parse all chunks first (populating symbol table), then resolve calls
161
+ // Phase A: parse + resolve imports/heritage per chunk, COLLECT calls for deferred resolution
162
+ // Phase B: resolve ALL collected calls after all symbols are registered (fixes cross-chunk calls)
163
+ const sequentialChunkPaths = [];
164
+ // Deferred call data — collected across chunks, resolved after all parsing completes
165
+ const allExtractedCalls = [];
166
+ const allConstructorBindings = [];
167
+ try {
168
+ for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
169
+ const chunkPaths = chunks[chunkIdx];
170
+ // Read content for this chunk
171
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
172
+ const chunkFiles = chunkPaths
173
+ .filter(p => chunkContents.has(p))
174
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
175
+ // Parse chunk (workers or sequential fallback)
176
+ const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
177
+ const globalCurrent = filesParsedSoFar + current;
178
+ const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
179
+ onProgress({
180
+ phase: 'parsing',
181
+ percent: Math.round(parsingProgress),
182
+ message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
183
+ detail: filePath,
184
+ stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
185
+ });
186
+ }, workerPool);
187
+ const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
188
+ if (chunkWorkerData) {
189
+ // Resolve imports per-chunk (file-level, doesn't need full symbol table)
190
+ await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
191
+ onProgress({
192
+ phase: 'parsing',
193
+ percent: Math.round(chunkBasePercent),
194
+ message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
195
+ detail: `${current}/${total} files`,
196
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
197
+ });
198
+ }, repoPath, importCtx);
199
+ // COLLECT calls for deferred resolution (don't resolve yet — callee may be in later chunk)
200
+ allExtractedCalls.push(...chunkWorkerData.calls);
201
+ if (chunkWorkerData.constructorBindings) {
202
+ allConstructorBindings.push(...chunkWorkerData.constructorBindings);
203
+ }
204
+ // Heritage + Routes can resolve per-chunk (class-level, usually same-file)
205
+ await Promise.all([
206
+ processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
207
+ onProgress({
208
+ phase: 'parsing',
209
+ percent: Math.round(chunkBasePercent),
210
+ message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
211
+ detail: `${current}/${total} records`,
212
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
213
+ });
214
+ }),
215
+ processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
216
+ onProgress({
217
+ phase: 'parsing',
218
+ percent: Math.round(chunkBasePercent),
219
+ message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
220
+ detail: `${current}/${total} routes`,
221
+ stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
222
+ });
223
+ }),
224
+ ]);
225
+ }
226
+ else {
227
+ await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
228
+ sequentialChunkPaths.push(chunkPaths);
229
+ }
230
+ filesParsedSoFar += chunkFiles.length;
231
+ // Clear AST cache between chunks to free memory; chunk locals go out of scope for GC
232
+ astCache.clear();
233
+ // Attempt GC between chunks if under memory pressure
234
+ if (memoryGuard.isUnderPressure()) {
235
+ memoryGuard.tryGC();
236
+ if (isDev) {
237
+ console.log(`⚠️ Memory pressure after chunk ${chunkIdx + 1}: ${memoryGuard.summary()}`);
238
+ }
239
+ }
240
+ }
241
+ }
242
+ finally {
243
+ await workerPool?.terminate();
244
+ }
245
+ // Sequential fallback: re-read source for call/heritage resolution
246
+ for (const chunkPaths of sequentialChunkPaths) {
247
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
248
+ const chunkFiles = chunkPaths
249
+ .filter(p => chunkContents.has(p))
250
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
251
+ astCache = createASTCache(chunkFiles.length);
252
+ const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx);
253
+ await processHeritage(graph, chunkFiles, astCache, ctx);
254
+ if (rubyHeritage.length > 0) {
255
+ await processHeritageFromExtracted(graph, rubyHeritage, ctx);
256
+ }
257
+ astCache.clear();
258
+ }
259
+ // Phase B: Resolve ALL deferred calls now that every symbol is registered
260
+ if (allExtractedCalls.length > 0) {
261
+ onProgress({
262
+ phase: 'parsing',
263
+ percent: 82,
264
+ message: `Resolving ${allExtractedCalls.length} calls across all files...`,
265
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
266
+ });
267
+ await processCallsFromExtracted(graph, allExtractedCalls, ctx, (current, total) => {
268
+ onProgress({
269
+ phase: 'parsing',
270
+ percent: 82,
271
+ message: `Resolving calls: ${current}/${total} files...`,
272
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
273
+ });
274
+ }, allConstructorBindings.length > 0 ? allConstructorBindings : undefined);
275
+ }
276
+ // Log resolution cache stats in dev mode
277
+ if (isDev) {
278
+ const rcStats = ctx.getStats();
279
+ const total = rcStats.cacheHits + rcStats.cacheMisses;
280
+ const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
281
+ console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
282
+ }
283
+ // Free import resolution context (~94MB+ for large repos)
284
+ allPathObjects.length = 0;
285
+ importCtx.resolveCache.clear();
286
+ importCtx.suffixIndex = null;
287
+ importCtx.normalizedFileList = null;
288
+ // Phase 4.5a: DI dependency edges (DEPENDS_ON from constructor params, PROVIDES from factories)
289
+ const [diEdgeCount, providesEdgeCount] = await Promise.all([
290
+ createDependsOnEdges(graph, ctx),
291
+ createProvidesEdges(graph, ctx),
292
+ ]);
293
+ if (isDev && (diEdgeCount > 0 || providesEdgeCount > 0)) {
294
+ console.log(`💉 DI: ${diEdgeCount} DEPENDS_ON edges, ${providesEdgeCount} PROVIDES edges`);
295
+ }
296
+ // Phase 4.5b: Method Resolution Order
297
+ onProgress({
298
+ phase: 'parsing',
299
+ percent: 81,
300
+ message: 'Computing method resolution order...',
301
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
302
+ });
303
+ const mroResult = computeMRO(graph);
304
+ if (isDev && mroResult.entries.length > 0) {
305
+ console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
306
+ }
307
+ // Phase 5: Communities
308
+ onProgress({
309
+ phase: 'communities',
310
+ percent: 82,
311
+ message: 'Detecting code communities...',
312
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
313
+ });
314
+ const communityResult = await processCommunities(graph, (message, progress) => {
315
+ const communityProgress = 82 + (progress * 0.10);
316
+ onProgress({
317
+ phase: 'communities',
318
+ percent: Math.round(communityProgress),
319
+ message,
320
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
321
+ });
322
+ });
323
+ if (isDev) {
324
+ console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
325
+ }
326
+ communityResult.communities.forEach(comm => {
327
+ graph.addNode({
328
+ id: comm.id,
329
+ label: 'Community',
330
+ properties: {
331
+ name: comm.label,
332
+ filePath: '',
333
+ heuristicLabel: comm.heuristicLabel,
334
+ cohesion: comm.cohesion,
335
+ symbolCount: comm.symbolCount,
336
+ }
337
+ });
338
+ });
339
+ communityResult.memberships.forEach(membership => {
340
+ graph.addRelationship({
341
+ id: `${membership.nodeId}_member_of_${membership.communityId}`,
342
+ type: 'MEMBER_OF',
343
+ sourceId: membership.nodeId,
344
+ targetId: membership.communityId,
345
+ confidence: 1.0,
346
+ reason: 'leiden-algorithm',
347
+ });
348
+ });
349
+ // Phase 6: Processes
350
+ onProgress({
351
+ phase: 'processes',
352
+ percent: 94,
353
+ message: 'Detecting execution flows...',
354
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
355
+ });
356
+ let symbolCount = 0;
357
+ graph.forEachNode(n => { if (n.label !== 'File')
358
+ symbolCount++; });
359
+ const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
360
+ const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
361
+ const processProgress = 94 + (progress * 0.05);
362
+ onProgress({
363
+ phase: 'processes',
364
+ percent: Math.round(processProgress),
365
+ message,
366
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
367
+ });
368
+ }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
369
+ if (isDev) {
370
+ console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
371
+ }
372
+ processResult.processes.forEach(proc => {
373
+ graph.addNode({
374
+ id: proc.id,
375
+ label: 'Process',
376
+ properties: {
377
+ name: proc.label,
378
+ filePath: '',
379
+ heuristicLabel: proc.heuristicLabel,
380
+ processType: proc.processType,
381
+ stepCount: proc.stepCount,
382
+ communities: proc.communities,
383
+ entryPointId: proc.entryPointId,
384
+ terminalId: proc.terminalId,
385
+ }
386
+ });
387
+ });
388
+ processResult.steps.forEach(step => {
389
+ graph.addRelationship({
390
+ id: `${step.nodeId}_step_${step.step}_${step.processId}`,
391
+ type: 'STEP_IN_PROCESS',
392
+ sourceId: step.nodeId,
393
+ targetId: step.processId,
394
+ confidence: 1.0,
395
+ reason: 'trace-detection',
396
+ step: step.step,
397
+ });
398
+ });
399
+ onProgress({
400
+ phase: 'complete',
401
+ percent: 100,
402
+ message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
403
+ stats: {
404
+ filesProcessed: totalFiles,
405
+ totalFiles,
406
+ nodesCreated: graph.nodeCount
407
+ },
408
+ });
409
+ astCache.clear();
410
+ return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
411
+ }
412
+ catch (error) {
413
+ cleanup();
414
+ throw error;
415
+ }
416
+ };
@@ -0,0 +1,42 @@
1
+ /** @file process-processor.ts @description Detects execution flows (Processes) by finding entry points, tracing forward via confidence-gated beam search, deduplicating similar paths, and labeling with heuristic names */
2
+ import { KnowledgeGraph } from '../graph/types.js';
3
+ import { CommunityMembership } from './community-processor.js';
4
+ export interface ProcessDetectionConfig {
5
+ maxTraceDepth: number;
6
+ maxProcesses: number;
7
+ minSteps: number;
8
+ /** Minimum per-edge confidence to follow during beam search */
9
+ minEdgeConfidence: number;
10
+ /** Minimum cumulative path confidence — paths below this are pruned */
11
+ minPathConfidence: number;
12
+ /** Maximum active paths in the beam (global memory bound) */
13
+ maxBeamSize: number;
14
+ }
15
+ export interface ProcessNode {
16
+ id: string;
17
+ label: string;
18
+ heuristicLabel: string;
19
+ processType: 'intra_community' | 'cross_community';
20
+ stepCount: number;
21
+ communities: string[];
22
+ entryPointId: string;
23
+ terminalId: string;
24
+ trace: string[];
25
+ }
26
+ export interface ProcessStep {
27
+ nodeId: string;
28
+ processId: string;
29
+ step: number;
30
+ }
31
+ export interface ProcessDetectionResult {
32
+ processes: ProcessNode[];
33
+ steps: ProcessStep[];
34
+ stats: {
35
+ totalProcesses: number;
36
+ crossCommunityCount: number;
37
+ avgStepCount: number;
38
+ entryPointsFound: number;
39
+ };
40
+ }
41
+ /** Detect processes (execution flows) in the knowledge graph, runs after community detection */
42
+ export declare const processProcesses: (knowledgeGraph: KnowledgeGraph, memberships: CommunityMembership[], onProgress?: (message: string, progress: number) => void, config?: Partial<ProcessDetectionConfig>) => Promise<ProcessDetectionResult>;