gitnexus 1.6.0 → 1.6.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/README.md +73 -0
  2. package/dist/cli/analyze.js +50 -3
  3. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  4. package/dist/core/group/extractors/fs-utils.js +24 -0
  5. package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
  6. package/dist/core/group/extractors/grpc-extractor.js +328 -191
  7. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  8. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  9. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  10. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  11. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  12. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  13. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  14. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  15. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  16. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  17. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  18. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  19. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  20. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  21. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  22. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  23. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  24. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  25. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  26. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  27. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  28. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  29. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  30. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  31. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  32. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  33. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  34. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  35. package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
  36. package/dist/core/group/extractors/http-route-extractor.js +231 -238
  37. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  38. package/dist/core/group/extractors/manifest-extractor.js +277 -0
  39. package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
  40. package/dist/core/group/extractors/topic-extractor.js +55 -192
  41. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  42. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  43. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  44. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  45. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  46. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  47. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  48. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  49. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  50. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  51. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  52. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  53. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  54. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  55. package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
  56. package/dist/core/ingestion/binding-accumulator.js +29 -25
  57. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  58. package/dist/core/ingestion/import-processor.js +1 -1
  59. package/dist/core/ingestion/language-config.js +1 -1
  60. package/dist/core/ingestion/language-provider.d.ts +32 -5
  61. package/dist/core/ingestion/languages/c-cpp.js +2 -2
  62. package/dist/core/ingestion/languages/dart.d.ts +1 -1
  63. package/dist/core/ingestion/languages/dart.js +2 -2
  64. package/dist/core/ingestion/languages/go.d.ts +1 -1
  65. package/dist/core/ingestion/languages/go.js +2 -2
  66. package/dist/core/ingestion/languages/ruby.js +16 -1
  67. package/dist/core/ingestion/languages/swift.d.ts +1 -1
  68. package/dist/core/ingestion/languages/swift.js +2 -2
  69. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  70. package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
  71. package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
  72. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  73. package/dist/core/ingestion/method-extractors/generic.js +48 -4
  74. package/dist/core/ingestion/method-types.d.ts +4 -0
  75. package/dist/core/ingestion/model/resolve.js +103 -48
  76. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  77. package/dist/core/ingestion/model/semantic-model.js +1 -1
  78. package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
  79. package/dist/core/ingestion/model/symbol-table.js +7 -7
  80. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  81. package/dist/core/ingestion/mro-processor.js +1 -1
  82. package/dist/core/ingestion/parsing-processor.js +54 -42
  83. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  84. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  85. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  86. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  87. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  88. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  89. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  90. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  91. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  92. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  93. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  94. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  95. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  96. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  97. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  98. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  99. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  100. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  101. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  102. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  103. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  104. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  105. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  106. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  107. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  108. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  109. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  110. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  111. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  112. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  113. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  114. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  115. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  116. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  117. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  118. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  119. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +70 -0
  120. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +312 -0
  121. package/dist/core/ingestion/pipeline.d.ts +16 -10
  122. package/dist/core/ingestion/pipeline.js +66 -1534
  123. package/dist/core/ingestion/process-processor.js +1 -1
  124. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  125. package/dist/core/ingestion/tree-sitter-queries.js +69 -0
  126. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
  127. package/dist/core/ingestion/utils/ast-helpers.js +48 -21
  128. package/dist/core/ingestion/utils/env.d.ts +10 -0
  129. package/dist/core/ingestion/utils/env.js +10 -0
  130. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  131. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  132. package/dist/core/ingestion/workers/parse-worker.js +12 -8
  133. package/dist/core/lbug/lbug-adapter.d.ts +28 -0
  134. package/dist/core/lbug/lbug-adapter.js +162 -57
  135. package/package.json +3 -3
  136. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  137. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  138. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  139. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  140. package/vendor/tree-sitter-proto/package.json +18 -0
  141. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  142. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  143. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  144. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  145. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Phase: orm
3
+ *
4
+ * Processes ORM queries (Prisma + Supabase) and creates QUERIES edges.
5
+ *
6
+ * @deps parse
7
+ * @reads allORMQueries (from parse)
8
+ * @writes graph (CodeElement nodes, QUERIES edges)
9
+ */
10
+ import { getPhaseOutput } from './types.js';
11
+ import { generateId } from '../../../lib/utils.js';
12
+ import { isDev } from '../utils/env.js';
13
+ export const ormPhase = {
14
+ name: 'orm',
15
+ deps: ['parse'],
16
+ async execute(ctx, deps) {
17
+ const { allORMQueries } = getPhaseOutput(deps, 'parse');
18
+ if (allORMQueries.length === 0) {
19
+ return { edgesCreated: 0, modelCount: 0 };
20
+ }
21
+ return processORMQueries(ctx.graph, allORMQueries);
22
+ },
23
+ };
24
+ function processORMQueries(graph, queries) {
25
+ const modelNodes = new Map();
26
+ const seenEdges = new Set();
27
+ let edgesCreated = 0;
28
+ for (const q of queries) {
29
+ const modelKey = `${q.orm}:${q.model}`;
30
+ let modelNodeId = modelNodes.get(modelKey);
31
+ if (!modelNodeId) {
32
+ const candidateIds = [
33
+ generateId('Class', `${q.model}`),
34
+ generateId('Interface', `${q.model}`),
35
+ generateId('CodeElement', `${q.model}`),
36
+ ];
37
+ const existing = candidateIds.find((id) => graph.getNode(id));
38
+ if (existing) {
39
+ modelNodeId = existing;
40
+ }
41
+ else {
42
+ modelNodeId = generateId('CodeElement', `${q.orm}:${q.model}`);
43
+ graph.addNode({
44
+ id: modelNodeId,
45
+ label: 'CodeElement',
46
+ properties: {
47
+ name: q.model,
48
+ filePath: '',
49
+ description: `${q.orm} model/table: ${q.model}`,
50
+ },
51
+ });
52
+ }
53
+ modelNodes.set(modelKey, modelNodeId);
54
+ }
55
+ const fileId = generateId('File', q.filePath);
56
+ const edgeKey = `${fileId}->${modelNodeId}:${q.method}`;
57
+ if (seenEdges.has(edgeKey))
58
+ continue;
59
+ seenEdges.add(edgeKey);
60
+ graph.addRelationship({
61
+ id: generateId('QUERIES', edgeKey),
62
+ sourceId: fileId,
63
+ targetId: modelNodeId,
64
+ type: 'QUERIES',
65
+ confidence: 0.9,
66
+ reason: `${q.orm}-${q.method}`,
67
+ });
68
+ edgesCreated++;
69
+ }
70
+ if (isDev) {
71
+ console.log(`ORM dataflow: ${edgesCreated} QUERIES edges, ${modelNodes.size} models (${queries.length} total calls)`);
72
+ }
73
+ return { edgesCreated, modelCount: modelNodes.size };
74
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Parse implementation — chunked parse + resolve loop.
3
+ *
4
+ * This is the core parsing engine of the ingestion pipeline. It reads
5
+ * source files in byte-budget chunks (~20MB each), parses via worker
6
+ * pool (or sequential fallback), resolves imports/calls/heritage per
7
+ * chunk, and synthesizes wildcard import bindings.
8
+ *
9
+ * Consumed by the parse phase (`parse.ts`) — the phase file handles
10
+ * dependency wiring while the heavy implementation lives here.
11
+ *
12
+ * @module
13
+ */
14
+ import { BindingAccumulator } from '../binding-accumulator.js';
15
+ import { type ExportedTypeMap } from '../call-processor.js';
16
+ import { createResolutionContext } from '../model/resolution-context.js';
17
+ import { type PipelineProgress } from '../../../_shared/index.js';
18
+ import type { ExtractedDecoratorRoute, ExtractedFetchCall, ExtractedORMQuery, ExtractedRoute, ExtractedToolDef } from '../workers/parse-worker.js';
19
+ import type { KnowledgeGraph } from '../../graph/types.js';
20
+ import type { PipelineOptions } from '../pipeline.js';
21
+ type ScannedFile = {
22
+ path: string;
23
+ size: number;
24
+ };
25
+ type ProgressFn = (progress: PipelineProgress) => void;
26
+ /**
27
+ * Chunked parse + resolve loop.
28
+ *
29
+ * Reads source in byte-budget chunks (~20MB each). For each chunk:
30
+ * 1. Parse via worker pool (or sequential fallback)
31
+ * 2. Resolve imports from extracted data
32
+ * 3. Synthesize wildcard import bindings (Go/Ruby/C++/Swift/Python)
33
+ * 4. Resolve heritage + routes per chunk; defer worker CALLS until all chunks
34
+ * have contributed heritage so interface-dispatch implementor map is complete
35
+ * 5. Collect TypeEnv bindings for cross-file propagation
36
+ */
37
+ export declare function runChunkedParseAndResolve(graph: KnowledgeGraph, scannedFiles: ScannedFile[], allPaths: string[], totalFiles: number, repoPath: string, pipelineStart: number, onProgress: ProgressFn, options?: PipelineOptions): Promise<{
38
+ exportedTypeMap: ExportedTypeMap;
39
+ allFetchCalls: ExtractedFetchCall[];
40
+ allExtractedRoutes: ExtractedRoute[];
41
+ allDecoratorRoutes: ExtractedDecoratorRoute[];
42
+ allToolDefs: ExtractedToolDef[];
43
+ allORMQueries: ExtractedORMQuery[];
44
+ bindingAccumulator: BindingAccumulator;
45
+ resolutionContext: ReturnType<typeof createResolutionContext>;
46
+ }>;
47
+ export {};
@@ -0,0 +1,437 @@
1
+ /**
2
+ * Parse implementation — chunked parse + resolve loop.
3
+ *
4
+ * This is the core parsing engine of the ingestion pipeline. It reads
5
+ * source files in byte-budget chunks (~20MB each), parses via worker
6
+ * pool (or sequential fallback), resolves imports/calls/heritage per
7
+ * chunk, and synthesizes wildcard import bindings.
8
+ *
9
+ * Consumed by the parse phase (`parse.ts`) — the phase file handles
10
+ * dependency wiring while the heavy implementation lives here.
11
+ *
12
+ * @module
13
+ */
14
+ import { BindingAccumulator, enrichExportedTypeMap, } from '../binding-accumulator.js';
15
+ import { processParsing } from '../parsing-processor.js';
16
+ import { processImports, processImportsFromExtracted, buildImportResolutionContext, } from '../import-processor.js';
17
+ import { EMPTY_INDEX } from '../import-resolvers/utils.js';
18
+ import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted, seedCrossFileReceiverTypes, buildExportedTypeMapFromGraph, } from '../call-processor.js';
19
+ import { buildHeritageMap } from '../model/heritage-map.js';
20
+ import { processHeritage, processHeritageFromExtracted, extractExtractedHeritageFromFiles, getHeritageStrategyForLanguage, } from '../heritage-processor.js';
21
+ import { createResolutionContext } from '../model/resolution-context.js';
22
+ import { createASTCache } from '../ast-cache.js';
23
+ import { getLanguageFromFilename } from '../../../_shared/index.js';
24
+ import { readFileContents } from '../filesystem-walker.js';
25
+ import { isLanguageAvailable } from '../../tree-sitter/parser-loader.js';
26
+ import { createWorkerPool } from '../workers/worker-pool.js';
27
+ import { extractFetchCallsFromFiles } from '../call-processor.js';
28
+ import fs from 'node:fs';
29
+ import path from 'node:path';
30
+ import { fileURLToPath, pathToFileURL } from 'node:url';
31
+ import { isDev } from '../utils/env.js';
32
+ import { synthesizeWildcardImportBindings, needsSynthesis } from './wildcard-synthesis.js';
33
+ import { extractORMQueriesInline } from './orm-extraction.js';
34
+ // ── Constants ──────────────────────────────────────────────────────────────
35
+ /** Max bytes of source content to load per parse chunk. */
36
+ const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
37
+ /**
38
+ * Chunked parse + resolve loop.
39
+ *
40
+ * Reads source in byte-budget chunks (~20MB each). For each chunk:
41
+ * 1. Parse via worker pool (or sequential fallback)
42
+ * 2. Resolve imports from extracted data
43
+ * 3. Synthesize wildcard import bindings (Go/Ruby/C++/Swift/Python)
44
+ * 4. Resolve heritage + routes per chunk; defer worker CALLS until all chunks
45
+ * have contributed heritage so interface-dispatch implementor map is complete
46
+ * 5. Collect TypeEnv bindings for cross-file propagation
47
+ */
48
+ export async function runChunkedParseAndResolve(graph, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress, options) {
49
+ const ctx = createResolutionContext();
50
+ const symbolTable = ctx.model.symbols;
51
+ const parseableScanned = scannedFiles.filter((f) => {
52
+ const lang = getLanguageFromFilename(f.path);
53
+ return lang && isLanguageAvailable(lang);
54
+ });
55
+ // Warn about files skipped due to unavailable parsers
56
+ const skippedByLang = new Map();
57
+ for (const f of scannedFiles) {
58
+ const lang = getLanguageFromFilename(f.path);
59
+ if (lang && !isLanguageAvailable(lang)) {
60
+ skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1);
61
+ }
62
+ }
63
+ for (const [lang, count] of skippedByLang) {
64
+ console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`);
65
+ }
66
+ const totalParseable = parseableScanned.length;
67
+ if (totalParseable === 0) {
68
+ onProgress({
69
+ phase: 'parsing',
70
+ percent: 82,
71
+ message: 'No parseable files found — skipping parsing phase',
72
+ stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
73
+ });
74
+ }
75
+ // Build byte-budget chunks
76
+ const chunks = [];
77
+ let currentChunk = [];
78
+ let currentBytes = 0;
79
+ for (const file of parseableScanned) {
80
+ if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
81
+ chunks.push(currentChunk);
82
+ currentChunk = [];
83
+ currentBytes = 0;
84
+ }
85
+ currentChunk.push(file.path);
86
+ currentBytes += file.size;
87
+ }
88
+ if (currentChunk.length > 0)
89
+ chunks.push(currentChunk);
90
+ const numChunks = chunks.length;
91
+ if (isDev) {
92
+ const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
93
+ console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
94
+ }
95
+ onProgress({
96
+ phase: 'parsing',
97
+ percent: 20,
98
+ message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
99
+ stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
100
+ });
101
+ // Don't spawn workers for tiny repos — overhead exceeds benefit
102
+ const MIN_FILES_FOR_WORKERS = 15;
103
+ const MIN_BYTES_FOR_WORKERS = 512 * 1024;
104
+ const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0);
105
+ // Create worker pool once, reuse across chunks
106
+ let workerPool;
107
+ if (!options?.skipWorkers &&
108
+ (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS)) {
109
+ try {
110
+ let workerUrl = new URL('../workers/parse-worker.js', import.meta.url);
111
+ // When running under vitest, import.meta.url points to src/ where no .js exists.
112
+ // Fall back to the compiled dist/ worker so the pool can spawn real worker threads.
113
+ const thisDir = fileURLToPath(new URL('.', import.meta.url));
114
+ if (!fs.existsSync(fileURLToPath(workerUrl))) {
115
+ const distWorker = path.resolve(thisDir, '..', '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js');
116
+ if (fs.existsSync(distWorker)) {
117
+ workerUrl = pathToFileURL(distWorker);
118
+ }
119
+ }
120
+ workerPool = createWorkerPool(workerUrl);
121
+ }
122
+ catch (err) {
123
+ console.warn('Worker pool creation failed, using sequential fallback:', err.message);
124
+ }
125
+ }
126
+ let filesParsedSoFar = 0;
127
+ // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
128
+ const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
129
+ let astCache = createASTCache(maxChunkFiles);
130
+ // Build import resolution context once — suffix index, file lists, resolve cache.
131
+ const importCtx = buildImportResolutionContext(allPaths);
132
+ const allPathObjects = allPaths.map((p) => ({ path: p }));
133
+ const sequentialChunkPaths = [];
134
+ const chunkNeedsSynthesis = chunks.map((paths) => paths.some((p) => {
135
+ const lang = getLanguageFromFilename(p);
136
+ return lang != null && needsSynthesis(lang);
137
+ }));
138
+ const exportedTypeMap = new Map();
139
+ const bindingAccumulator = new BindingAccumulator();
140
+ // Tracks whether per-chunk or fallback wildcard-binding synthesis already
141
+ // ran, so the unconditional final call below can be skipped when redundant.
142
+ // synthesizeWildcardImportBindings is graph-global; once any chunk runs it
143
+ // after parsing wildcard files, later non-wildcard chunks add no work for
144
+ // it, and later wildcard chunks re-run it themselves.
145
+ let hasSynthesized = false;
146
+ const allFetchCalls = [];
147
+ const allExtractedRoutes = [];
148
+ const allDecoratorRoutes = [];
149
+ const allToolDefs = [];
150
+ const allORMQueries = [];
151
+ const deferredWorkerCalls = [];
152
+ const deferredWorkerHeritage = [];
153
+ const deferredConstructorBindings = [];
154
+ const deferredAssignments = [];
155
+ try {
156
+ for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
157
+ const chunkPaths = chunks[chunkIdx];
158
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
159
+ const chunkFiles = chunkPaths
160
+ .filter((p) => chunkContents.has(p))
161
+ .map((p) => ({ path: p, content: chunkContents.get(p) }));
162
+ const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
163
+ const globalCurrent = filesParsedSoFar + current;
164
+ const parsingProgress = 20 + (globalCurrent / totalParseable) * 62;
165
+ onProgress({
166
+ phase: 'parsing',
167
+ percent: Math.round(parsingProgress),
168
+ message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
169
+ detail: filePath,
170
+ stats: {
171
+ filesProcessed: globalCurrent,
172
+ totalFiles: totalParseable,
173
+ nodesCreated: graph.nodeCount,
174
+ },
175
+ });
176
+ }, workerPool);
177
+ const chunkBasePercent = 20 + (filesParsedSoFar / totalParseable) * 62;
178
+ if (chunkWorkerData) {
179
+ await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
180
+ onProgress({
181
+ phase: 'parsing',
182
+ percent: Math.round(chunkBasePercent),
183
+ message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`,
184
+ detail: `${current}/${total} files`,
185
+ stats: {
186
+ filesProcessed: filesParsedSoFar,
187
+ totalFiles: totalParseable,
188
+ nodesCreated: graph.nodeCount,
189
+ },
190
+ });
191
+ }, repoPath, importCtx);
192
+ if (chunkNeedsSynthesis[chunkIdx]) {
193
+ synthesizeWildcardImportBindings(graph, ctx);
194
+ hasSynthesized = true;
195
+ }
196
+ if (exportedTypeMap.size > 0 && ctx.namedImportMap.size > 0) {
197
+ const { enrichedCount } = seedCrossFileReceiverTypes(chunkWorkerData.calls, ctx.namedImportMap, exportedTypeMap);
198
+ if (isDev && enrichedCount > 0) {
199
+ console.log(`🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`);
200
+ }
201
+ }
202
+ for (const item of chunkWorkerData.calls)
203
+ deferredWorkerCalls.push(item);
204
+ for (const item of chunkWorkerData.heritage)
205
+ deferredWorkerHeritage.push(item);
206
+ for (const item of chunkWorkerData.constructorBindings)
207
+ deferredConstructorBindings.push(item);
208
+ if (chunkWorkerData.assignments?.length) {
209
+ for (const item of chunkWorkerData.assignments)
210
+ deferredAssignments.push(item);
211
+ }
212
+ await Promise.all([
213
+ processHeritageFromExtracted(graph, chunkWorkerData.heritage, ctx, (current, total) => {
214
+ onProgress({
215
+ phase: 'parsing',
216
+ percent: Math.round(chunkBasePercent),
217
+ message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`,
218
+ detail: `${current}/${total} records`,
219
+ stats: {
220
+ filesProcessed: filesParsedSoFar,
221
+ totalFiles: totalParseable,
222
+ nodesCreated: graph.nodeCount,
223
+ },
224
+ });
225
+ }),
226
+ processRoutesFromExtracted(graph, chunkWorkerData.routes ?? [], ctx, (current, total) => {
227
+ onProgress({
228
+ phase: 'parsing',
229
+ percent: Math.round(chunkBasePercent),
230
+ message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`,
231
+ detail: `${current}/${total} routes`,
232
+ stats: {
233
+ filesProcessed: filesParsedSoFar,
234
+ totalFiles: totalParseable,
235
+ nodesCreated: graph.nodeCount,
236
+ },
237
+ });
238
+ }),
239
+ ]);
240
+ if (chunkWorkerData.fileScopeBindings?.length) {
241
+ for (const { filePath, bindings } of chunkWorkerData.fileScopeBindings) {
242
+ if (typeof filePath !== 'string' || filePath.length === 0)
243
+ continue;
244
+ if (!Array.isArray(bindings))
245
+ continue;
246
+ const entries = [];
247
+ for (const tuple of bindings) {
248
+ if (!Array.isArray(tuple) || tuple.length !== 2)
249
+ continue;
250
+ const [varName, typeName] = tuple;
251
+ if (typeof varName !== 'string' || typeof typeName !== 'string')
252
+ continue;
253
+ entries.push({ scope: '', varName, typeName });
254
+ }
255
+ if (entries.length > 0) {
256
+ bindingAccumulator.appendFile(filePath, entries);
257
+ }
258
+ }
259
+ }
260
+ if (chunkWorkerData.fetchCalls?.length) {
261
+ for (const item of chunkWorkerData.fetchCalls)
262
+ allFetchCalls.push(item);
263
+ }
264
+ if (chunkWorkerData.routes?.length) {
265
+ for (const item of chunkWorkerData.routes)
266
+ allExtractedRoutes.push(item);
267
+ }
268
+ if (chunkWorkerData.decoratorRoutes?.length) {
269
+ for (const item of chunkWorkerData.decoratorRoutes)
270
+ allDecoratorRoutes.push(item);
271
+ }
272
+ if (chunkWorkerData.toolDefs?.length) {
273
+ for (const item of chunkWorkerData.toolDefs)
274
+ allToolDefs.push(item);
275
+ }
276
+ if (chunkWorkerData.ormQueries?.length) {
277
+ for (const item of chunkWorkerData.ormQueries)
278
+ allORMQueries.push(item);
279
+ }
280
+ }
281
+ else {
282
+ await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths);
283
+ sequentialChunkPaths.push(chunkPaths);
284
+ }
285
+ filesParsedSoFar += chunkFiles.length;
286
+ astCache.clear();
287
+ }
288
+ const fullWorkerHeritageMap = deferredWorkerHeritage.length > 0
289
+ ? buildHeritageMap(deferredWorkerHeritage, ctx, getHeritageStrategyForLanguage)
290
+ : undefined;
291
+ if (deferredWorkerCalls.length > 0) {
292
+ await processCallsFromExtracted(graph, deferredWorkerCalls, ctx, (current, total) => {
293
+ onProgress({
294
+ phase: 'parsing',
295
+ percent: 82,
296
+ message: 'Resolving calls (all chunks)...',
297
+ detail: `${current}/${total} files`,
298
+ stats: {
299
+ filesProcessed: filesParsedSoFar,
300
+ totalFiles: totalParseable,
301
+ nodesCreated: graph.nodeCount,
302
+ },
303
+ });
304
+ }, deferredConstructorBindings.length > 0 ? deferredConstructorBindings : undefined, fullWorkerHeritageMap, bindingAccumulator);
305
+ }
306
+ if (deferredAssignments.length > 0) {
307
+ processAssignmentsFromExtracted(graph, deferredAssignments, ctx, deferredConstructorBindings.length > 0 ? deferredConstructorBindings : undefined, bindingAccumulator);
308
+ }
309
+ }
310
+ finally {
311
+ await workerPool?.terminate();
312
+ }
313
+ // Sequential fallback chunks.
314
+ //
315
+ // U6: wrap the fallback loop and the finalize/enrich steps in a try/finally
316
+ // so cleanup still runs on a mid-fallback throw. The `finally` guarantees:
317
+ // 1. `astCache.clear()` releases any tree-sitter trees held by the most
318
+ // recently allocated per-chunk cache, mirroring the per-chunk
319
+ // `astCache.clear()` calls on the happy path.
320
+ // 2. `bindingAccumulator.finalize()` runs before `crossFile` disposes the
321
+ // accumulator downstream — callers that inspect partial TypeEnv state
322
+ // (or consume it via `enrichExportedTypeMap` on a partial recovery)
323
+ // still see a finalized accumulator.
324
+ // 3. `enrichExportedTypeMap` runs so any bindings already accumulated
325
+ // are propagated into `exportedTypeMap` even if the fallback aborted.
326
+ //
327
+ // Disposal of the accumulator remains with `crossFile` (owned by U2). We do
328
+ // NOT call `bindingAccumulator.dispose()` here.
329
+ try {
330
+ if (sequentialChunkPaths.length > 0) {
331
+ synthesizeWildcardImportBindings(graph, ctx);
332
+ hasSynthesized = true;
333
+ }
334
+ const allSequentialHeritage = [];
335
+ const cachedSequentialChunkFiles = [];
336
+ for (const chunkPaths of sequentialChunkPaths) {
337
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
338
+ const chunkFiles = chunkPaths
339
+ .filter((p) => chunkContents.has(p))
340
+ .map((p) => ({ path: p, content: chunkContents.get(p) }));
341
+ cachedSequentialChunkFiles.push(chunkFiles);
342
+ astCache = createASTCache(chunkFiles.length);
343
+ const sequentialHeritage = await extractExtractedHeritageFromFiles(chunkFiles, astCache);
344
+ for (const h of sequentialHeritage)
345
+ allSequentialHeritage.push(h);
346
+ astCache.clear();
347
+ }
348
+ const sequentialHeritageMap = allSequentialHeritage.length > 0
349
+ ? buildHeritageMap(allSequentialHeritage, ctx, getHeritageStrategyForLanguage)
350
+ : undefined;
351
+ for (let chunkIdx = 0; chunkIdx < sequentialChunkPaths.length; chunkIdx++) {
352
+ const chunkFiles = cachedSequentialChunkFiles[chunkIdx];
353
+ astCache = createASTCache(chunkFiles.length);
354
+ const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx, undefined, exportedTypeMap, undefined, undefined, undefined, sequentialHeritageMap, bindingAccumulator);
355
+ await processHeritage(graph, chunkFiles, astCache, ctx);
356
+ if (rubyHeritage.length > 0) {
357
+ await processHeritageFromExtracted(graph, rubyHeritage, ctx);
358
+ }
359
+ const chunkFetchCalls = await extractFetchCallsFromFiles(chunkFiles, astCache);
360
+ if (chunkFetchCalls.length > 0) {
361
+ for (const item of chunkFetchCalls)
362
+ allFetchCalls.push(item);
363
+ }
364
+ for (const f of chunkFiles) {
365
+ extractORMQueriesInline(f.path, f.content, allORMQueries);
366
+ }
367
+ astCache.clear();
368
+ cachedSequentialChunkFiles[chunkIdx] = [];
369
+ }
370
+ // Log resolution cache stats
371
+ if (isDev) {
372
+ const rcStats = ctx.getStats();
373
+ const total = rcStats.cacheHits + rcStats.cacheMisses;
374
+ const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
375
+ console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
376
+ }
377
+ }
378
+ finally {
379
+ // Clearing an already-empty cache is a no-op, so this is idempotent-safe
380
+ // on the happy path where every per-chunk block already cleared astCache.
381
+ astCache.clear();
382
+ // Run finalize + enrichment inside try/catch so a cleanup failure never
383
+ // masks the original fallback error. finalize must precede crossFile's
384
+ // dispose (U2) and enrichExportedTypeMap depends on finalized bindings.
385
+ try {
386
+ bindingAccumulator.finalize();
387
+ const enriched = enrichExportedTypeMap(bindingAccumulator, graph, exportedTypeMap);
388
+ if (isDev && enriched > 0) {
389
+ console.log(`🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`);
390
+ }
391
+ }
392
+ catch (enrichErr) {
393
+ if (isDev) {
394
+ console.warn('Post-fallback finalize/enrich failed during cleanup:', enrichErr.message);
395
+ }
396
+ }
397
+ }
398
+ if (!hasSynthesized) {
399
+ const synthesized = synthesizeWildcardImportBindings(graph, ctx);
400
+ if (isDev && synthesized > 0) {
401
+ console.log(`🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`);
402
+ }
403
+ }
404
+ // Worker-path enrichment: if exportedTypeMap is empty (e.g. the worker pool
405
+ // built TypeEnv inside workers without access to SymbolTable), reconstruct
406
+ // the map from graph nodes + SymbolTable here in the main thread before
407
+ // handing the (now read-only) map to downstream phases. Doing it here means
408
+ // crossFile receives a fully-populated map and never needs to mutate it for
409
+ // initial-graph enrichment.
410
+ if (exportedTypeMap.size === 0 && graph.nodeCount > 0) {
411
+ const graphExports = buildExportedTypeMapFromGraph(graph, ctx.model.symbols);
412
+ for (const [fp, exports] of graphExports)
413
+ exportedTypeMap.set(fp, exports);
414
+ }
415
+ allPathObjects.length = 0;
416
+ // Safe to reset importCtx caches here: `importCtx` (ImportResolutionContext)
417
+ // is a scratch workspace used only during import path resolution. The
418
+ // `resolutionContext` (`ctx`) returned below is a distinct object — it owns
419
+ // the fully-populated, post-parse `importMap` / `namedImportMap` /
420
+ // `packageMap` / `moduleAliasMap` / `model`, and never references
421
+ // `importCtx`. Cross-file re-resolution in cross-file-impl.ts consumes only
422
+ // `ctx` (via `processCalls`), so clearing the suffix index / resolveCache /
423
+ // normalizedFileList here cannot lose import matches downstream.
424
+ importCtx.resolveCache.clear();
425
+ importCtx.index = EMPTY_INDEX;
426
+ importCtx.normalizedFileList = [];
427
+ return {
428
+ exportedTypeMap,
429
+ allFetchCalls,
430
+ allExtractedRoutes,
431
+ allDecoratorRoutes,
432
+ allToolDefs,
433
+ allORMQueries,
434
+ bindingAccumulator,
435
+ resolutionContext: ctx,
436
+ };
437
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Phase: parse
3
+ *
4
+ * Chunked parse + resolve loop: reads source in byte-budget chunks,
5
+ * parses via worker pool (or sequential fallback), resolves imports,
6
+ * heritage, and calls, synthesizes wildcard bindings.
7
+ *
8
+ * This phase encapsulates the entire `runChunkedParseAndResolve` function
9
+ * from the original pipeline. The chunk loop is a memory optimization
10
+ * internal to this phase, not a phase boundary.
11
+ *
12
+ * @deps structure, markdown, cobol
13
+ * @reads scannedFiles, allPaths, totalFiles (from structure)
14
+ * @writes graph (Symbol nodes, IMPORTS/CALLS/EXTENDS/IMPLEMENTS/ACCESSES edges)
15
+ * @output exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes,
16
+ * allToolDefs, allORMQueries, bindingAccumulator
17
+ */
18
+ import type { PipelinePhase } from './types.js';
19
+ import type { BindingAccumulator } from '../binding-accumulator.js';
20
+ import type { ExtractedFetchCall, ExtractedRoute, ExtractedDecoratorRoute, ExtractedToolDef, ExtractedORMQuery } from '../workers/parse-worker.js';
21
+ import type { createResolutionContext } from '../model/resolution-context.js';
22
+ export interface ParseOutput {
23
+ /**
24
+ * Read-only snapshot of exported type bindings keyed by file path.
25
+ *
26
+ * Fully populated by `parse` (sequential path via `enrichExportedTypeMap`
27
+ * and worker path via `buildExportedTypeMapFromGraph` in the main thread).
28
+ * Downstream phases — including `crossFile` — receive it as a true
29
+ * `ReadonlyMap`; `crossFile` builds its own mutable working copy locally
30
+ * for per-file re-resolution writes, so this snapshot is never mutated
31
+ * after parse returns.
32
+ */
33
+ readonly exportedTypeMap: ReadonlyMap<string, ReadonlyMap<string, string>>;
34
+ readonly allFetchCalls: readonly ExtractedFetchCall[];
35
+ readonly allExtractedRoutes: readonly ExtractedRoute[];
36
+ readonly allDecoratorRoutes: readonly ExtractedDecoratorRoute[];
37
+ readonly allToolDefs: readonly ExtractedToolDef[];
38
+ readonly allORMQueries: readonly ExtractedORMQuery[];
39
+ bindingAccumulator: BindingAccumulator;
40
+ /** Resolution context from the parse phase — carries importMap, namedImportMap, etc. */
41
+ resolutionContext: ReturnType<typeof createResolutionContext>;
42
+ /** Pass-through: all file paths for downstream phases. */
43
+ readonly allPaths: readonly string[];
44
+ /** Pass-through: shared `allPathSet` from structure (built once, not per-phase). */
45
+ readonly allPathSet: ReadonlySet<string>;
46
+ /** Pass-through: total file count for progress reporting. */
47
+ totalFiles: number;
48
+ }
49
+ export declare const parsePhase: PipelinePhase<ParseOutput>;
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Phase: parse
3
+ *
4
+ * Chunked parse + resolve loop: reads source in byte-budget chunks,
5
+ * parses via worker pool (or sequential fallback), resolves imports,
6
+ * heritage, and calls, synthesizes wildcard bindings.
7
+ *
8
+ * This phase encapsulates the entire `runChunkedParseAndResolve` function
9
+ * from the original pipeline. The chunk loop is a memory optimization
10
+ * internal to this phase, not a phase boundary.
11
+ *
12
+ * @deps structure, markdown, cobol
13
+ * @reads scannedFiles, allPaths, totalFiles (from structure)
14
+ * @writes graph (Symbol nodes, IMPORTS/CALLS/EXTENDS/IMPLEMENTS/ACCESSES edges)
15
+ * @output exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes,
16
+ * allToolDefs, allORMQueries, bindingAccumulator
17
+ */
18
+ import { getPhaseOutput } from './types.js';
19
+ import { runChunkedParseAndResolve } from './parse-impl.js';
20
+ export const parsePhase = {
21
+ name: 'parse',
22
+ deps: ['structure', 'markdown', 'cobol'],
23
+ async execute(ctx, deps) {
24
+ const { scannedFiles, allPaths, allPathSet, totalFiles } = getPhaseOutput(deps, 'structure');
25
+ const result = await runChunkedParseAndResolve(ctx.graph, scannedFiles, allPaths, totalFiles, ctx.repoPath, ctx.pipelineStart, ctx.onProgress, ctx.options);
26
+ return {
27
+ ...result,
28
+ allPaths,
29
+ allPathSet,
30
+ totalFiles,
31
+ };
32
+ },
33
+ };
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Phase: processes
3
+ *
4
+ * Detects execution flows (processes) and creates Process nodes +
5
+ * STEP_IN_PROCESS edges. Also links Route/Tool nodes to processes.
6
+ *
7
+ * @deps communities, routes, tools
8
+ * @reads graph (all nodes and relationships), communityResult, routeRegistry, toolDefs
9
+ * @writes graph (Process nodes, STEP_IN_PROCESS edges, ENTRY_POINT_OF edges)
10
+ */
11
+ import type { PipelinePhase } from './types.js';
12
+ import { type ProcessDetectionResult } from '../process-processor.js';
13
+ export interface ProcessesOutput {
14
+ processResult: ProcessDetectionResult;
15
+ }
16
+ export declare const processesPhase: PipelinePhase<ProcessesOutput>;