gitnexus 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +28 -3
- package/dist/core/group/extractors/fs-utils.d.ts +10 -0
- package/dist/core/group/extractors/fs-utils.js +24 -0
- package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
- package/dist/core/group/extractors/grpc-extractor.js +313 -191
- package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
- package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
- package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
- package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
- package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
- package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
- package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
- package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
- package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
- package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/go.js +215 -0
- package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
- package/dist/core/group/extractors/http-patterns/index.js +44 -0
- package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/java.js +253 -0
- package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/http-patterns/node.js +354 -0
- package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/php.js +70 -0
- package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/python.js +133 -0
- package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
- package/dist/core/group/extractors/http-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
- package/dist/core/group/extractors/http-route-extractor.js +201 -238
- package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
- package/dist/core/group/extractors/manifest-extractor.js +235 -0
- package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
- package/dist/core/group/extractors/topic-extractor.js +55 -192
- package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/go.js +120 -0
- package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
- package/dist/core/group/extractors/topic-patterns/index.js +38 -0
- package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/java.js +80 -0
- package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/topic-patterns/node.js +155 -0
- package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/python.js +116 -0
- package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
- package/dist/core/group/extractors/topic-patterns/types.js +10 -0
- package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
- package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
- package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
- package/dist/core/ingestion/binding-accumulator.js +29 -25
- package/dist/core/ingestion/cobol-processor.d.ts +1 -1
- package/dist/core/ingestion/import-processor.js +1 -1
- package/dist/core/ingestion/language-config.js +1 -1
- package/dist/core/ingestion/language-provider.d.ts +8 -0
- package/dist/core/ingestion/languages/ruby.js +15 -0
- package/dist/core/ingestion/markdown-processor.d.ts +1 -1
- package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
- package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
- package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
- package/dist/core/ingestion/method-extractors/generic.js +48 -4
- package/dist/core/ingestion/method-types.d.ts +4 -0
- package/dist/core/ingestion/model/resolve.js +103 -48
- package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
- package/dist/core/ingestion/model/semantic-model.js +1 -1
- package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
- package/dist/core/ingestion/model/symbol-table.js +7 -7
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +1 -1
- package/dist/core/ingestion/parsing-processor.js +54 -42
- package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
- package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
- package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/index.js +22 -0
- package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
- package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
- package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
- package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
- package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
- package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
- package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
- package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
- package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
- package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
- package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
- package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
- package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
- package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
- package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
- package/dist/core/ingestion/pipeline-phases/types.js +37 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +35 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +174 -0
- package/dist/core/ingestion/pipeline.d.ts +16 -10
- package/dist/core/ingestion/pipeline.js +66 -1534
- package/dist/core/ingestion/process-processor.js +1 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +69 -0
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
- package/dist/core/ingestion/utils/ast-helpers.js +48 -21
- package/dist/core/ingestion/utils/env.d.ts +10 -0
- package/dist/core/ingestion/utils/env.js +10 -0
- package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
- package/dist/core/ingestion/utils/graph-sort.js +100 -0
- package/dist/core/ingestion/workers/parse-worker.js +12 -8
- package/dist/core/lbug/lbug-adapter.js +66 -24
- package/package.json +3 -3
- package/vendor/tree-sitter-proto/binding.gyp +30 -0
- package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
- package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
- package/vendor/tree-sitter-proto/package.json +18 -0
- package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
- package/vendor/tree-sitter-proto/src/parser.c +10149 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: scan
|
|
3
|
+
*
|
|
4
|
+
* Walks the repository filesystem and collects file paths + sizes.
|
|
5
|
+
* Does NOT read file contents — that happens in downstream phases.
|
|
6
|
+
*
|
|
7
|
+
* @deps (none — this is the pipeline root)
|
|
8
|
+
* @reads repoPath (filesystem)
|
|
9
|
+
* @writes graph (nothing yet — just returns scanned paths)
|
|
10
|
+
* @output ScannedFile[], allPaths[], totalFiles
|
|
11
|
+
*/
|
|
12
|
+
import { walkRepositoryPaths } from '../filesystem-walker.js';
|
|
13
|
+
export const scanPhase = {
|
|
14
|
+
name: 'scan',
|
|
15
|
+
deps: [],
|
|
16
|
+
async execute(ctx) {
|
|
17
|
+
ctx.onProgress({
|
|
18
|
+
phase: 'extracting',
|
|
19
|
+
percent: 0,
|
|
20
|
+
message: 'Scanning repository...',
|
|
21
|
+
});
|
|
22
|
+
const scannedFiles = await walkRepositoryPaths(ctx.repoPath, (current, total, filePath) => {
|
|
23
|
+
const scanProgress = Math.round((current / total) * 15);
|
|
24
|
+
ctx.onProgress({
|
|
25
|
+
phase: 'extracting',
|
|
26
|
+
percent: scanProgress,
|
|
27
|
+
message: 'Scanning repository...',
|
|
28
|
+
detail: filePath,
|
|
29
|
+
stats: {
|
|
30
|
+
filesProcessed: current,
|
|
31
|
+
totalFiles: total,
|
|
32
|
+
nodesCreated: ctx.graph.nodeCount,
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
const totalFiles = scannedFiles.length;
|
|
37
|
+
const allPaths = scannedFiles.map((f) => f.path);
|
|
38
|
+
ctx.onProgress({
|
|
39
|
+
phase: 'extracting',
|
|
40
|
+
percent: 15,
|
|
41
|
+
message: 'Repository scanned successfully',
|
|
42
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
|
|
43
|
+
});
|
|
44
|
+
return { scannedFiles, allPaths, totalFiles };
|
|
45
|
+
},
|
|
46
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: structure
|
|
3
|
+
*
|
|
4
|
+
* Builds File and Folder nodes in the graph from scanned paths.
|
|
5
|
+
*
|
|
6
|
+
* @deps scan
|
|
7
|
+
* @reads allPaths (from scan phase)
|
|
8
|
+
* @writes graph (File, Folder nodes + CONTAINS edges)
|
|
9
|
+
*/
|
|
10
|
+
import type { PipelinePhase } from './types.js';
|
|
11
|
+
/** Structure phase produces no additional data — it writes directly to the graph. */
|
|
12
|
+
export interface StructureOutput {
|
|
13
|
+
/** Pass-through from scan for downstream phases. */
|
|
14
|
+
scannedFiles: {
|
|
15
|
+
path: string;
|
|
16
|
+
size: number;
|
|
17
|
+
}[];
|
|
18
|
+
allPaths: string[];
|
|
19
|
+
/**
|
|
20
|
+
* Materialized once here and shared across all downstream consumers
|
|
21
|
+
* (cobol, markdown, cross-file propagation). Avoids the previous
|
|
22
|
+
* per-phase `new Set(allPaths)` allocations on multi-thousand-file repos.
|
|
23
|
+
*/
|
|
24
|
+
allPathSet: ReadonlySet<string>;
|
|
25
|
+
totalFiles: number;
|
|
26
|
+
}
|
|
27
|
+
export declare const structurePhase: PipelinePhase<StructureOutput>;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: structure
|
|
3
|
+
*
|
|
4
|
+
* Builds File and Folder nodes in the graph from scanned paths.
|
|
5
|
+
*
|
|
6
|
+
* @deps scan
|
|
7
|
+
* @reads allPaths (from scan phase)
|
|
8
|
+
* @writes graph (File, Folder nodes + CONTAINS edges)
|
|
9
|
+
*/
|
|
10
|
+
import { getPhaseOutput } from './types.js';
|
|
11
|
+
import { processStructure } from '../structure-processor.js';
|
|
12
|
+
export const structurePhase = {
|
|
13
|
+
name: 'structure',
|
|
14
|
+
deps: ['scan'],
|
|
15
|
+
async execute(ctx, deps) {
|
|
16
|
+
const { scannedFiles, allPaths, totalFiles } = getPhaseOutput(deps, 'scan');
|
|
17
|
+
ctx.onProgress({
|
|
18
|
+
phase: 'structure',
|
|
19
|
+
percent: 15,
|
|
20
|
+
message: 'Analyzing project structure...',
|
|
21
|
+
stats: { filesProcessed: 0, totalFiles, nodesCreated: ctx.graph.nodeCount },
|
|
22
|
+
});
|
|
23
|
+
processStructure(ctx.graph, allPaths);
|
|
24
|
+
ctx.onProgress({
|
|
25
|
+
phase: 'structure',
|
|
26
|
+
percent: 20,
|
|
27
|
+
message: 'Project structure analyzed',
|
|
28
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
|
|
29
|
+
});
|
|
30
|
+
// Build the set once here so cobol, markdown, and cross-file propagation
|
|
31
|
+
// can all reuse it instead of re-materializing `new Set(allPaths)` each.
|
|
32
|
+
const allPathSet = new Set(allPaths);
|
|
33
|
+
return { scannedFiles, allPaths, allPathSet, totalFiles };
|
|
34
|
+
},
|
|
35
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: tools
|
|
3
|
+
*
|
|
4
|
+
* Detects MCP/RPC tool definitions and creates Tool graph nodes.
|
|
5
|
+
*
|
|
6
|
+
* @deps parse
|
|
7
|
+
* @reads allToolDefs (from parse), allPaths
|
|
8
|
+
* @writes graph (Tool nodes, HANDLES_TOOL edges)
|
|
9
|
+
* @output toolDefs array
|
|
10
|
+
*/
|
|
11
|
+
import type { PipelinePhase } from './types.js';
|
|
12
|
+
export interface ToolDef {
|
|
13
|
+
name: string;
|
|
14
|
+
filePath: string;
|
|
15
|
+
description: string;
|
|
16
|
+
}
|
|
17
|
+
export interface ToolsOutput {
|
|
18
|
+
toolDefs: ToolDef[];
|
|
19
|
+
}
|
|
20
|
+
export declare const toolsPhase: PipelinePhase<ToolsOutput>;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: tools
|
|
3
|
+
*
|
|
4
|
+
* Detects MCP/RPC tool definitions and creates Tool graph nodes.
|
|
5
|
+
*
|
|
6
|
+
* @deps parse
|
|
7
|
+
* @reads allToolDefs (from parse), allPaths
|
|
8
|
+
* @writes graph (Tool nodes, HANDLES_TOOL edges)
|
|
9
|
+
* @output toolDefs array
|
|
10
|
+
*/
|
|
11
|
+
import { getPhaseOutput } from './types.js';
|
|
12
|
+
import { generateId } from '../../../lib/utils.js';
|
|
13
|
+
import { readFileContents } from '../filesystem-walker.js';
|
|
14
|
+
import { isDev } from '../utils/env.js';
|
|
15
|
+
export const toolsPhase = {
|
|
16
|
+
name: 'tools',
|
|
17
|
+
deps: ['parse'],
|
|
18
|
+
async execute(ctx, deps) {
|
|
19
|
+
const { allToolDefs, allPaths } = getPhaseOutput(deps, 'parse');
|
|
20
|
+
const toolDefs = [];
|
|
21
|
+
const seenToolNames = new Set();
|
|
22
|
+
for (const td of allToolDefs) {
|
|
23
|
+
if (seenToolNames.has(td.toolName))
|
|
24
|
+
continue;
|
|
25
|
+
seenToolNames.add(td.toolName);
|
|
26
|
+
toolDefs.push({ name: td.toolName, filePath: td.filePath, description: td.description });
|
|
27
|
+
}
|
|
28
|
+
// TS tool definition arrays — require inputSchema nearby
|
|
29
|
+
const toolCandidatePaths = allPaths.filter((p) => (p.endsWith('.ts') || p.endsWith('.js')) &&
|
|
30
|
+
p.toLowerCase().includes('tool') &&
|
|
31
|
+
!p.includes('node_modules') &&
|
|
32
|
+
!p.includes('test') &&
|
|
33
|
+
!p.includes('__'));
|
|
34
|
+
if (toolCandidatePaths.length > 0) {
|
|
35
|
+
const toolContents = await readFileContents(ctx.repoPath, toolCandidatePaths);
|
|
36
|
+
for (const [filePath, content] of toolContents) {
|
|
37
|
+
if (!content.includes('inputSchema'))
|
|
38
|
+
continue;
|
|
39
|
+
const toolPattern = /name:\s*['"](\w+)['"]\s*,\s*\n?\s*description:\s*[`'"]([\s\S]*?)[`'"]/g;
|
|
40
|
+
let match;
|
|
41
|
+
while ((match = toolPattern.exec(content)) !== null) {
|
|
42
|
+
const name = match[1];
|
|
43
|
+
if (seenToolNames.has(name))
|
|
44
|
+
continue;
|
|
45
|
+
seenToolNames.add(name);
|
|
46
|
+
toolDefs.push({
|
|
47
|
+
name,
|
|
48
|
+
filePath,
|
|
49
|
+
description: match[2].slice(0, 200).replace(/\n/g, ' ').trim(),
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// Create Tool nodes and HANDLES_TOOL edges
|
|
55
|
+
if (toolDefs.length > 0) {
|
|
56
|
+
for (const td of toolDefs) {
|
|
57
|
+
const toolNodeId = generateId('Tool', td.name);
|
|
58
|
+
ctx.graph.addNode({
|
|
59
|
+
id: toolNodeId,
|
|
60
|
+
label: 'Tool',
|
|
61
|
+
properties: { name: td.name, filePath: td.filePath, description: td.description },
|
|
62
|
+
});
|
|
63
|
+
const handlerFileId = generateId('File', td.filePath);
|
|
64
|
+
ctx.graph.addRelationship({
|
|
65
|
+
id: generateId('HANDLES_TOOL', `${handlerFileId}->${toolNodeId}`),
|
|
66
|
+
sourceId: handlerFileId,
|
|
67
|
+
targetId: toolNodeId,
|
|
68
|
+
type: 'HANDLES_TOOL',
|
|
69
|
+
confidence: 1.0,
|
|
70
|
+
reason: 'tool-definition',
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
if (isDev) {
|
|
74
|
+
console.log(`🔧 Tool registry: ${toolDefs.length} tools detected`);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return { toolDefs };
|
|
78
|
+
},
|
|
79
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline Phase — Type definitions.
|
|
3
|
+
*
|
|
4
|
+
* Each phase is a named node in the dependency graph with typed inputs and outputs.
|
|
5
|
+
* The runner resolves dependencies via topological sort and passes
|
|
6
|
+
* typed results from upstream phases as inputs to downstream phases.
|
|
7
|
+
*
|
|
8
|
+
* Design goals:
|
|
9
|
+
* - Explicit data flow between phases via typed outputs
|
|
10
|
+
* - The knowledge graph is a shared mutable accumulator — phases add nodes/edges
|
|
11
|
+
* and may read prior phases' contributions. This is intentional: the graph is
|
|
12
|
+
* the pipeline's primary output, not an inter-phase communication channel.
|
|
13
|
+
* - Compile-time exhaustiveness (adding a phase = type error until wired)
|
|
14
|
+
* - Each phase is independently testable with mocked inputs
|
|
15
|
+
*/
|
|
16
|
+
import type { KnowledgeGraph } from '../../graph/types.js';
|
|
17
|
+
import type { PipelineProgress } from '../../../_shared/index.js';
|
|
18
|
+
import type { PipelineOptions } from '../pipeline.js';
|
|
19
|
+
/** Immutable context available to every phase. */
|
|
20
|
+
export interface PipelineContext {
|
|
21
|
+
/** Absolute path to the repository root. */
|
|
22
|
+
readonly repoPath: string;
|
|
23
|
+
/** Mutable knowledge graph — the single shared accumulator. */
|
|
24
|
+
readonly graph: KnowledgeGraph;
|
|
25
|
+
/** Progress callback for UI updates. */
|
|
26
|
+
readonly onProgress: (progress: PipelineProgress) => void;
|
|
27
|
+
/** Pipeline options (skipGraphPhases, skipWorkers, etc.). */
|
|
28
|
+
readonly options?: PipelineOptions;
|
|
29
|
+
/** Pipeline start timestamp (for elapsed-time logging). */
|
|
30
|
+
readonly pipelineStart: number;
|
|
31
|
+
}
|
|
32
|
+
/** Wraps a phase's output with timing metadata. */
|
|
33
|
+
export interface PhaseResult<T> {
|
|
34
|
+
/** Phase name (matches the phase's `name` field). */
|
|
35
|
+
readonly phaseName: string;
|
|
36
|
+
/** The typed output of the phase. */
|
|
37
|
+
readonly output: T;
|
|
38
|
+
/** Wall-clock duration in milliseconds. */
|
|
39
|
+
readonly durationMs: number;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* A single phase in the ingestion pipeline.
|
|
43
|
+
*
|
|
44
|
+
* @typeParam TDeps - Tuple of dependency phase output types
|
|
45
|
+
* @typeParam TOutput - This phase's output type
|
|
46
|
+
*/
|
|
47
|
+
export interface PipelinePhase<TOutput = unknown> {
|
|
48
|
+
/** Unique name for logging and result lookup. */
|
|
49
|
+
readonly name: string;
|
|
50
|
+
/**
|
|
51
|
+
* Names of phases this phase depends on.
|
|
52
|
+
* The runner guarantees these have completed before execute() is called.
|
|
53
|
+
*/
|
|
54
|
+
readonly deps: readonly string[];
|
|
55
|
+
/**
|
|
56
|
+
* Execute the phase.
|
|
57
|
+
*
|
|
58
|
+
* @param ctx Shared pipeline context (graph, repoPath, progress, options)
|
|
59
|
+
* @param deps Map of dependency name → PhaseResult (typed outputs from upstream phases)
|
|
60
|
+
* @returns The phase's typed output
|
|
61
|
+
*/
|
|
62
|
+
execute(ctx: PipelineContext, deps: ReadonlyMap<string, PhaseResult<unknown>>): Promise<TOutput>;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Helper to extract the typed output of a dependency phase.
|
|
66
|
+
*
|
|
67
|
+
* Type safety note: This uses an `as T` cast because the runner stores
|
|
68
|
+
* heterogeneous phase outputs in a single `Map<string, PhaseResult<unknown>>`.
|
|
69
|
+
* The cast is safe as long as callers use the correct output type for the
|
|
70
|
+
* named phase. Mismatches will surface as runtime type errors, not compile-time
|
|
71
|
+
* errors — this is an intentional trade-off for a static phase graph without
|
|
72
|
+
* a dynamic type registry.
|
|
73
|
+
*
|
|
74
|
+
* @param deps The resolved dependency map from the runner
|
|
75
|
+
* @param phaseName The name of the upstream phase whose output you need
|
|
76
|
+
* @returns The typed output of the phase
|
|
77
|
+
* @throws If the phase is not found in the dependency map
|
|
78
|
+
*/
|
|
79
|
+
export declare function getPhaseOutput<T>(deps: ReadonlyMap<string, PhaseResult<unknown>>, phaseName: string): T;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline Phase — Type definitions.
|
|
3
|
+
*
|
|
4
|
+
* Each phase is a named node in the dependency graph with typed inputs and outputs.
|
|
5
|
+
* The runner resolves dependencies via topological sort and passes
|
|
6
|
+
* typed results from upstream phases as inputs to downstream phases.
|
|
7
|
+
*
|
|
8
|
+
* Design goals:
|
|
9
|
+
* - Explicit data flow between phases via typed outputs
|
|
10
|
+
* - The knowledge graph is a shared mutable accumulator — phases add nodes/edges
|
|
11
|
+
* and may read prior phases' contributions. This is intentional: the graph is
|
|
12
|
+
* the pipeline's primary output, not an inter-phase communication channel.
|
|
13
|
+
* - Compile-time exhaustiveness (adding a phase = type error until wired)
|
|
14
|
+
* - Each phase is independently testable with mocked inputs
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Helper to extract the typed output of a dependency phase.
|
|
18
|
+
*
|
|
19
|
+
* Type safety note: This uses an `as T` cast because the runner stores
|
|
20
|
+
* heterogeneous phase outputs in a single `Map<string, PhaseResult<unknown>>`.
|
|
21
|
+
* The cast is safe as long as callers use the correct output type for the
|
|
22
|
+
* named phase. Mismatches will surface as runtime type errors, not compile-time
|
|
23
|
+
* errors — this is an intentional trade-off for a static phase graph without
|
|
24
|
+
* a dynamic type registry.
|
|
25
|
+
*
|
|
26
|
+
* @param deps The resolved dependency map from the runner
|
|
27
|
+
* @param phaseName The name of the upstream phase whose output you need
|
|
28
|
+
* @returns The typed output of the phase
|
|
29
|
+
* @throws If the phase is not found in the dependency map
|
|
30
|
+
*/
|
|
31
|
+
export function getPhaseOutput(deps, phaseName) {
|
|
32
|
+
const result = deps.get(phaseName);
|
|
33
|
+
if (!result) {
|
|
34
|
+
throw new Error(`Phase '${phaseName}' not found in resolved dependencies`);
|
|
35
|
+
}
|
|
36
|
+
return result.output;
|
|
37
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wildcard import binding synthesis.
|
|
3
|
+
*
|
|
4
|
+
* Languages with whole-module import semantics (Go, Ruby, C/C++, Swift)
|
|
5
|
+
* import all exported symbols from a file, not specific named symbols.
|
|
6
|
+
* After parsing, we know which symbols each file exports (via graph
|
|
7
|
+
* `isExported`), so we can expand IMPORTS edges into per-symbol bindings
|
|
8
|
+
* that the cross-file propagation phase can use for type resolution.
|
|
9
|
+
*
|
|
10
|
+
* Also builds Python module-alias maps for namespace-import languages
|
|
11
|
+
* (`import models` → `models.User()` resolves to `models.py:User`).
|
|
12
|
+
*
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
15
|
+
import type { KnowledgeGraph } from '../../graph/types.js';
|
|
16
|
+
import type { createResolutionContext } from '../model/resolution-context.js';
|
|
17
|
+
import { SupportedLanguages } from '../../../_shared/index.js';
|
|
18
|
+
/** Check if a language uses wildcard (whole-module) import semantics. */
|
|
19
|
+
export declare function isWildcardImportLanguage(lang: SupportedLanguages): boolean;
|
|
20
|
+
/** Check if a language needs synthesis before call resolution.
|
|
21
|
+
* True for wildcard-import languages AND namespace-import languages (Python). */
|
|
22
|
+
export declare function needsSynthesis(lang: SupportedLanguages): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Synthesize namedImportMap entries for languages with whole-module imports.
|
|
25
|
+
*
|
|
26
|
+
* For each file that imports another file via wildcard semantics:
|
|
27
|
+
* 1. Look up all exported symbols from the imported file (via graph nodes)
|
|
28
|
+
* 2. Create synthetic named bindings: `{ name → { sourcePath, exportedName } }`
|
|
29
|
+
* 3. Build Python module-alias maps for namespace-import languages
|
|
30
|
+
*
|
|
31
|
+
* @param graph The knowledge graph with parsed symbol nodes
|
|
32
|
+
* @param ctx Resolution context with importMap and namedImportMap
|
|
33
|
+
* @returns Number of synthetic bindings created
|
|
34
|
+
*/
|
|
35
|
+
export declare function synthesizeWildcardImportBindings(graph: KnowledgeGraph, ctx: ReturnType<typeof createResolutionContext>): number;
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wildcard import binding synthesis.
|
|
3
|
+
*
|
|
4
|
+
* Languages with whole-module import semantics (Go, Ruby, C/C++, Swift)
|
|
5
|
+
* import all exported symbols from a file, not specific named symbols.
|
|
6
|
+
* After parsing, we know which symbols each file exports (via graph
|
|
7
|
+
* `isExported`), so we can expand IMPORTS edges into per-symbol bindings
|
|
8
|
+
* that the cross-file propagation phase can use for type resolution.
|
|
9
|
+
*
|
|
10
|
+
* Also builds Python module-alias maps for namespace-import languages
|
|
11
|
+
* (`import models` → `models.User()` resolves to `models.py:User`).
|
|
12
|
+
*
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
15
|
+
import { getLanguageFromFilename } from '../../../_shared/index.js';
|
|
16
|
+
import { providers, getProviderForFile } from '../languages/index.js';
|
|
17
|
+
// ── Constants ──────────────────────────────────────────────────────────────
|
|
18
|
+
/** Node labels that represent top-level importable symbols. */
|
|
19
|
+
const IMPORTABLE_SYMBOL_LABELS = new Set([
|
|
20
|
+
'Function',
|
|
21
|
+
'Class',
|
|
22
|
+
'Interface',
|
|
23
|
+
'Struct',
|
|
24
|
+
'Enum',
|
|
25
|
+
'Trait',
|
|
26
|
+
'TypeAlias',
|
|
27
|
+
'Const',
|
|
28
|
+
'Static',
|
|
29
|
+
'Record',
|
|
30
|
+
'Union',
|
|
31
|
+
'Typedef',
|
|
32
|
+
'Macro',
|
|
33
|
+
]);
|
|
34
|
+
/** Max synthetic bindings per importing file — prevents memory bloat
|
|
35
|
+
* for C/C++ files that include many large headers. */
|
|
36
|
+
const MAX_SYNTHETIC_BINDINGS_PER_FILE = 1000;
|
|
37
|
+
/** Languages with whole-module import semantics (derived from providers at module load). */
|
|
38
|
+
const WILDCARD_LANGUAGES = new Set(Object.values(providers)
|
|
39
|
+
.filter((p) => p.importSemantics === 'wildcard')
|
|
40
|
+
.map((p) => p.id));
|
|
41
|
+
/** Languages that need binding synthesis before call resolution. */
|
|
42
|
+
const SYNTHESIS_LANGUAGES = new Set(Object.values(providers)
|
|
43
|
+
.filter((p) => p.importSemantics !== 'named')
|
|
44
|
+
.map((p) => p.id));
|
|
45
|
+
/** Check if a language uses wildcard (whole-module) import semantics. */
|
|
46
|
+
export function isWildcardImportLanguage(lang) {
|
|
47
|
+
return WILDCARD_LANGUAGES.has(lang);
|
|
48
|
+
}
|
|
49
|
+
/** Check if a language needs synthesis before call resolution.
|
|
50
|
+
* True for wildcard-import languages AND namespace-import languages (Python). */
|
|
51
|
+
export function needsSynthesis(lang) {
|
|
52
|
+
return SYNTHESIS_LANGUAGES.has(lang);
|
|
53
|
+
}
|
|
54
|
+
// ── Main synthesis function ────────────────────────────────────────────────
|
|
55
|
+
/**
|
|
56
|
+
* Synthesize namedImportMap entries for languages with whole-module imports.
|
|
57
|
+
*
|
|
58
|
+
* For each file that imports another file via wildcard semantics:
|
|
59
|
+
* 1. Look up all exported symbols from the imported file (via graph nodes)
|
|
60
|
+
* 2. Create synthetic named bindings: `{ name → { sourcePath, exportedName } }`
|
|
61
|
+
* 3. Build Python module-alias maps for namespace-import languages
|
|
62
|
+
*
|
|
63
|
+
* @param graph The knowledge graph with parsed symbol nodes
|
|
64
|
+
* @param ctx Resolution context with importMap and namedImportMap
|
|
65
|
+
* @returns Number of synthetic bindings created
|
|
66
|
+
*/
|
|
67
|
+
export function synthesizeWildcardImportBindings(graph, ctx) {
|
|
68
|
+
// Build exported symbols index from graph nodes (single pass)
|
|
69
|
+
const exportedSymbolsByFile = new Map();
|
|
70
|
+
graph.forEachNode((node) => {
|
|
71
|
+
if (!node.properties?.isExported)
|
|
72
|
+
return;
|
|
73
|
+
if (!IMPORTABLE_SYMBOL_LABELS.has(node.label))
|
|
74
|
+
return;
|
|
75
|
+
const fp = node.properties.filePath;
|
|
76
|
+
const name = node.properties.name;
|
|
77
|
+
if (!fp || !name)
|
|
78
|
+
return;
|
|
79
|
+
let symbols = exportedSymbolsByFile.get(fp);
|
|
80
|
+
if (!symbols) {
|
|
81
|
+
symbols = [];
|
|
82
|
+
exportedSymbolsByFile.set(fp, symbols);
|
|
83
|
+
}
|
|
84
|
+
symbols.push({ name, filePath: fp });
|
|
85
|
+
});
|
|
86
|
+
if (exportedSymbolsByFile.size === 0)
|
|
87
|
+
return 0;
|
|
88
|
+
// Collect graph-level IMPORTS edges for wildcard languages missing from ctx.importMap
|
|
89
|
+
const FILE_PREFIX = 'File:';
|
|
90
|
+
const graphImports = new Map();
|
|
91
|
+
graph.forEachRelationship((rel) => {
|
|
92
|
+
if (rel.type !== 'IMPORTS')
|
|
93
|
+
return;
|
|
94
|
+
if (!rel.sourceId.startsWith(FILE_PREFIX) || !rel.targetId.startsWith(FILE_PREFIX))
|
|
95
|
+
return;
|
|
96
|
+
const srcFile = rel.sourceId.slice(FILE_PREFIX.length);
|
|
97
|
+
const tgtFile = rel.targetId.slice(FILE_PREFIX.length);
|
|
98
|
+
const lang = getLanguageFromFilename(srcFile);
|
|
99
|
+
if (!lang || !isWildcardImportLanguage(lang))
|
|
100
|
+
return;
|
|
101
|
+
if (ctx.importMap.get(srcFile)?.has(tgtFile))
|
|
102
|
+
return;
|
|
103
|
+
let set = graphImports.get(srcFile);
|
|
104
|
+
if (!set) {
|
|
105
|
+
set = new Set();
|
|
106
|
+
graphImports.set(srcFile, set);
|
|
107
|
+
}
|
|
108
|
+
set.add(tgtFile);
|
|
109
|
+
});
|
|
110
|
+
let totalSynthesized = 0;
|
|
111
|
+
const synthesizeForFile = (filePath, importedFiles) => {
|
|
112
|
+
let fileBindings = ctx.namedImportMap.get(filePath);
|
|
113
|
+
let fileCount = fileBindings?.size ?? 0;
|
|
114
|
+
for (const importedFile of importedFiles) {
|
|
115
|
+
const exportedSymbols = exportedSymbolsByFile.get(importedFile);
|
|
116
|
+
if (!exportedSymbols)
|
|
117
|
+
continue;
|
|
118
|
+
for (const sym of exportedSymbols) {
|
|
119
|
+
if (fileCount >= MAX_SYNTHETIC_BINDINGS_PER_FILE)
|
|
120
|
+
return;
|
|
121
|
+
if (fileBindings?.has(sym.name))
|
|
122
|
+
continue;
|
|
123
|
+
if (!fileBindings) {
|
|
124
|
+
fileBindings = new Map();
|
|
125
|
+
ctx.namedImportMap.set(filePath, fileBindings);
|
|
126
|
+
}
|
|
127
|
+
fileBindings.set(sym.name, {
|
|
128
|
+
sourcePath: importedFile,
|
|
129
|
+
exportedName: sym.name,
|
|
130
|
+
});
|
|
131
|
+
fileCount++;
|
|
132
|
+
totalSynthesized++;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
};
|
|
136
|
+
// Synthesize from ctx.importMap (Ruby, C/C++, Swift file-based imports)
|
|
137
|
+
for (const [filePath, importedFiles] of ctx.importMap) {
|
|
138
|
+
const lang = getLanguageFromFilename(filePath);
|
|
139
|
+
if (!lang || !isWildcardImportLanguage(lang))
|
|
140
|
+
continue;
|
|
141
|
+
synthesizeForFile(filePath, importedFiles);
|
|
142
|
+
}
|
|
143
|
+
// Synthesize from graph IMPORTS edges (Go and other wildcard-import languages)
|
|
144
|
+
for (const [filePath, importedFiles] of graphImports) {
|
|
145
|
+
synthesizeForFile(filePath, importedFiles);
|
|
146
|
+
}
|
|
147
|
+
// Build Python module-alias maps for namespace-import languages.
|
|
148
|
+
// `import models` in app.py → moduleAliasMap['app.py']['models'] = 'models.py'
|
|
149
|
+
// Enables `models.User()` to resolve without ambiguous symbol expansion.
|
|
150
|
+
for (const [filePath, importedFiles] of ctx.importMap) {
|
|
151
|
+
const provider = getProviderForFile(filePath);
|
|
152
|
+
if (!provider || provider.importSemantics !== 'namespace')
|
|
153
|
+
continue;
|
|
154
|
+
buildPythonModuleAliasForFile(ctx, filePath, importedFiles);
|
|
155
|
+
}
|
|
156
|
+
return totalSynthesized;
|
|
157
|
+
}
|
|
158
|
+
/** Build module alias entries for namespace-import files (e.g. Python). */
|
|
159
|
+
function buildPythonModuleAliasForFile(ctx, callerFile, importedFiles) {
|
|
160
|
+
let aliasMap = ctx.moduleAliasMap.get(callerFile);
|
|
161
|
+
for (const importedFile of importedFiles) {
|
|
162
|
+
const lastSlash = importedFile.lastIndexOf('/');
|
|
163
|
+
const base = lastSlash >= 0 ? importedFile.slice(lastSlash + 1) : importedFile;
|
|
164
|
+
const dot = base.lastIndexOf('.');
|
|
165
|
+
const stem = dot >= 0 ? base.slice(0, dot) : base;
|
|
166
|
+
if (!stem)
|
|
167
|
+
continue;
|
|
168
|
+
if (!aliasMap) {
|
|
169
|
+
aliasMap = new Map();
|
|
170
|
+
ctx.moduleAliasMap.set(callerFile, aliasMap);
|
|
171
|
+
}
|
|
172
|
+
aliasMap.set(stem, importedFile);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
@@ -1,14 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline orchestrator — dependency-ordered ingestion pipeline.
|
|
3
|
+
*
|
|
4
|
+
* The pipeline is composed of named phases with explicit dependencies.
|
|
5
|
+
* Each phase is defined in its own file under `pipeline-phases/`.
|
|
6
|
+
* The runner in `pipeline-phases/runner.ts` executes phases in
|
|
7
|
+
* topological order, passing typed outputs from upstream phases as
|
|
8
|
+
* inputs to downstream phases.
|
|
9
|
+
*
|
|
10
|
+
* To add a new phase:
|
|
11
|
+
* 1. Create a new file in `pipeline-phases/` following the pattern
|
|
12
|
+
* 2. Export it from `pipeline-phases/index.ts`
|
|
13
|
+
* 3. Add it to the `ALL_PHASES` array below
|
|
14
|
+
*
|
|
15
|
+
* See ARCHITECTURE.md for the full phase dependency diagram.
|
|
16
|
+
*/
|
|
1
17
|
import { type PipelineProgress } from '../../_shared/index.js';
|
|
2
18
|
import { PipelineResult } from '../../types/pipeline.js';
|
|
3
|
-
/** A group of files with no mutual dependencies, safe to process in parallel. */
|
|
4
|
-
type IndependentFileGroup = readonly string[];
|
|
5
|
-
/** Kahn's algorithm: returns files grouped by topological level.
|
|
6
|
-
* Files in the same level have no mutual dependencies — safe to process in parallel.
|
|
7
|
-
* Files in cycles are returned as a final group (no cross-cycle propagation). */
|
|
8
|
-
export declare function topologicalLevelSort(importMap: ReadonlyMap<string, ReadonlySet<string>>): {
|
|
9
|
-
levels: readonly IndependentFileGroup[];
|
|
10
|
-
cycleCount: number;
|
|
11
|
-
};
|
|
12
19
|
export interface PipelineOptions {
|
|
13
20
|
/** Skip MRO, community detection, and process extraction for faster test runs. */
|
|
14
21
|
skipGraphPhases?: boolean;
|
|
@@ -16,4 +23,3 @@ export interface PipelineOptions {
|
|
|
16
23
|
skipWorkers?: boolean;
|
|
17
24
|
}
|
|
18
25
|
export declare const runPipelineFromRepo: (repoPath: string, onProgress: (progress: PipelineProgress) => void, options?: PipelineOptions) => Promise<PipelineResult>;
|
|
19
|
-
export {};
|