@kodus/kodus-graph 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kodus/kodus-graph",
3
- "version": "0.1.0",
3
+ "version": "0.2.1",
4
4
  "description": "Code graph builder for Kodus code review — parses source code into structural graphs with nodes, edges, and analysis",
5
5
  "type": "module",
6
6
  "bin": {
@@ -50,13 +50,13 @@
50
50
  "@ast-grep/lang-python": "^0.0.6",
51
51
  "@ast-grep/lang-ruby": "^0.0.7",
52
52
  "@ast-grep/lang-rust": "^0.0.7",
53
- "@ast-grep/napi": "^0.42.0",
54
- "commander": "^12.0.0",
53
+ "@ast-grep/napi": "^0.42.1",
54
+ "commander": "^14.0.3",
55
55
  "zod": "^4.3.6"
56
56
  },
57
57
  "devDependencies": {
58
58
  "@biomejs/biome": "^2.4.10",
59
59
  "@types/bun": "latest",
60
- "typescript": "^5.5.0"
60
+ "typescript": "^6.0.2"
61
61
  }
62
- }
62
+ }
@@ -0,0 +1,130 @@
1
+ import { performance } from 'perf_hooks';
2
+ import { type IndexedGraph, indexGraph } from '../graph/loader';
3
+ import type {
4
+ AffectedFlow,
5
+ ContextAnalysisMetadata,
6
+ GraphData,
7
+ GraphEdge,
8
+ GraphNode,
9
+ ParseMetadata,
10
+ } from '../graph/types';
11
+ import { computeBlastRadius } from './blast-radius';
12
+ import { computeStructuralDiff, type DiffResult } from './diff';
13
+ import { enrichChangedFunctions } from './enrich';
14
+ import { detectFlows } from './flows';
15
+ import { extractInheritance } from './inheritance';
16
+ import { computeRiskScore } from './risk-score';
17
+ import { findTestGaps } from './test-gaps';
18
+
19
+ export interface ContextV2Output {
20
+ graph: {
21
+ nodes: GraphNode[];
22
+ edges: GraphEdge[];
23
+ metadata: ParseMetadata;
24
+ };
25
+ analysis: {
26
+ changed_functions: ReturnType<typeof enrichChangedFunctions>;
27
+ structural_diff: DiffResult;
28
+ blast_radius: ReturnType<typeof computeBlastRadius>;
29
+ affected_flows: AffectedFlow[];
30
+ inheritance: ReturnType<typeof extractInheritance>;
31
+ test_gaps: ReturnType<typeof findTestGaps>;
32
+ risk: ReturnType<typeof computeRiskScore>;
33
+ metadata: ContextAnalysisMetadata;
34
+ };
35
+ }
36
+
37
+ interface BuildContextV2Options {
38
+ mergedGraph: GraphData;
39
+ oldGraph: GraphData | null;
40
+ changedFiles: string[];
41
+ minConfidence: number;
42
+ maxDepth: number;
43
+ }
44
+
45
+ export function buildContextV2(opts: BuildContextV2Options): ContextV2Output {
46
+ const t0 = performance.now();
47
+ const { mergedGraph, oldGraph, changedFiles, minConfidence, maxDepth } = opts;
48
+
49
+ // Phase 1: Index
50
+ const indexed = indexGraph(mergedGraph);
51
+ const oldIndexed: IndexedGraph = oldGraph ? indexGraph(oldGraph) : indexGraph({ nodes: [], edges: [] });
52
+
53
+ // Phase 2: Independent analyses
54
+ const changedSet = new Set(changedFiles);
55
+ const newNodesInChanged = mergedGraph.nodes.filter((n) => changedSet.has(n.file_path));
56
+ const newEdgesInChanged = mergedGraph.edges.filter((e) => changedSet.has(e.file_path));
57
+
58
+ const structuralDiff = computeStructuralDiff(oldIndexed, newNodesInChanged, newEdgesInChanged, changedFiles);
59
+ const blastRadius = computeBlastRadius(mergedGraph, changedFiles, maxDepth);
60
+ const allFlows = detectFlows(indexed, { maxDepth: 10, type: 'all' });
61
+ const testGaps = findTestGaps(mergedGraph, changedFiles);
62
+ const risk = computeRiskScore(mergedGraph, changedFiles, blastRadius);
63
+ const inheritance = extractInheritance(indexed, changedFiles);
64
+
65
+ // Phase 3: Filter affected flows
66
+ const changedFuncSet = new Set(
67
+ mergedGraph.nodes.filter((n) => changedSet.has(n.file_path) && !n.is_test).map((n) => n.qualified_name),
68
+ );
69
+
70
+ const affectedFlows: AffectedFlow[] = [];
71
+ for (const flow of allFlows.flows) {
72
+ const touches = flow.path.filter((qn) => changedFuncSet.has(qn));
73
+ if (touches.length > 0) {
74
+ affectedFlows.push({
75
+ entry_point: flow.entry_point,
76
+ type: flow.type,
77
+ touches_changed: touches,
78
+ depth: flow.depth,
79
+ path: flow.path,
80
+ });
81
+ }
82
+ }
83
+
84
+ // Phase 3: Enrichment
85
+ const enriched = enrichChangedFunctions(indexed, changedFiles, structuralDiff, allFlows.flows, minConfidence);
86
+
87
+ // Phase 4: Assembly
88
+ const totalCallers = enriched.reduce((s, f) => s + f.callers.length, 0);
89
+ const totalCallees = enriched.reduce((s, f) => s + f.callees.length, 0);
90
+
91
+ const metadata: ContextAnalysisMetadata = {
92
+ changed_functions_count: enriched.length,
93
+ total_callers: totalCallers,
94
+ total_callees: totalCallees,
95
+ untested_count: testGaps.length,
96
+ affected_flows_count: affectedFlows.length,
97
+ duration_ms: Math.round(performance.now() - t0),
98
+ min_confidence: minConfidence,
99
+ };
100
+
101
+ const graphMetadata: ParseMetadata = indexed.metadata.repo_dir
102
+ ? indexed.metadata
103
+ : {
104
+ repo_dir: '',
105
+ files_parsed: changedFiles.length,
106
+ total_nodes: mergedGraph.nodes.length,
107
+ total_edges: mergedGraph.edges.length,
108
+ duration_ms: 0,
109
+ parse_errors: 0,
110
+ extract_errors: 0,
111
+ };
112
+
113
+ return {
114
+ graph: {
115
+ nodes: mergedGraph.nodes,
116
+ edges: mergedGraph.edges,
117
+ metadata: graphMetadata,
118
+ },
119
+ analysis: {
120
+ changed_functions: enriched,
121
+ structural_diff: structuralDiff,
122
+ blast_radius: blastRadius,
123
+ affected_flows: affectedFlows,
124
+ inheritance,
125
+ test_gaps: testGaps,
126
+ risk,
127
+ metadata,
128
+ },
129
+ };
130
+ }
@@ -0,0 +1,109 @@
1
+ import type { IndexedGraph } from '../graph/loader';
2
+ import type { CalleeRef, CallerRef, EnrichedFunction } from '../graph/types';
3
+ import type { DiffResult } from './diff';
4
+ import type { Flow } from './flows';
5
+
6
+ export function enrichChangedFunctions(
7
+ graph: IndexedGraph,
8
+ changedFiles: string[],
9
+ diff: DiffResult,
10
+ allFlows: Flow[],
11
+ minConfidence: number,
12
+ ): EnrichedFunction[] {
13
+ const changedSet = new Set(changedFiles);
14
+
15
+ // Pre-index diff results
16
+ const addedSet = new Set(diff.nodes.added.map((n) => n.qualified_name));
17
+ const modifiedMap = new Map(diff.nodes.modified.map((m) => [m.qualified_name, m.changes]));
18
+
19
+ // Pre-index TESTED_BY
20
+ const testedFiles = new Set(graph.edges.filter((e) => e.kind === 'TESTED_BY').map((e) => e.source_qualified));
21
+
22
+ // Pre-index flows by function
23
+ const flowsByFunction = new Map<string, string[]>();
24
+ for (const flow of allFlows) {
25
+ for (const qn of flow.path) {
26
+ const list = flowsByFunction.get(qn);
27
+ if (list) {
28
+ if (!list.includes(flow.entry_point)) list.push(flow.entry_point);
29
+ } else {
30
+ flowsByFunction.set(qn, [flow.entry_point]);
31
+ }
32
+ }
33
+ }
34
+
35
+ // Filter functions in changed files
36
+ const changedFunctions = graph.nodes.filter(
37
+ (n) =>
38
+ changedSet.has(n.file_path) &&
39
+ !n.is_test &&
40
+ n.kind !== 'Constructor' &&
41
+ n.kind !== 'Class' &&
42
+ n.kind !== 'Interface' &&
43
+ n.kind !== 'Enum',
44
+ );
45
+
46
+ return changedFunctions
47
+ .sort((a, b) => a.file_path.localeCompare(b.file_path) || a.line_start - b.line_start)
48
+ .map((node) => {
49
+ // Callers
50
+ const callers: CallerRef[] = [];
51
+ for (const edge of graph.reverseAdjacency.get(node.qualified_name) || []) {
52
+ if (edge.kind !== 'CALLS') continue;
53
+ if ((edge.confidence ?? 0) < minConfidence) continue;
54
+ const sourceNode = graph.byQualified.get(edge.source_qualified);
55
+ callers.push({
56
+ qualified_name: edge.source_qualified,
57
+ name: sourceNode?.name || edge.source_qualified.split('::').pop() || 'unknown',
58
+ file_path: sourceNode?.file_path || edge.file_path,
59
+ line: edge.line,
60
+ confidence: edge.confidence || 0,
61
+ });
62
+ }
63
+
64
+ // Callees
65
+ const callees: CalleeRef[] = [];
66
+ const seenCallees = new Set<string>();
67
+ for (const edge of graph.adjacency.get(node.qualified_name) || []) {
68
+ if (edge.kind !== 'CALLS') continue;
69
+ if (seenCallees.has(edge.target_qualified)) continue;
70
+ seenCallees.add(edge.target_qualified);
71
+ const targetNode = graph.byQualified.get(edge.target_qualified);
72
+ const name = targetNode?.name || edge.target_qualified.split('::').pop() || 'unknown';
73
+ const params = targetNode?.params && targetNode.params !== '()' ? targetNode.params : '';
74
+ const ret = targetNode?.return_type ? ` -> ${targetNode.return_type}` : '';
75
+ callees.push({
76
+ qualified_name: edge.target_qualified,
77
+ name,
78
+ file_path: targetNode?.file_path || '',
79
+ signature: `${name}${params}${ret}`,
80
+ });
81
+ }
82
+
83
+ // Signature
84
+ const shortName = node.name.includes('.') ? node.name.split('.').pop()! : node.name;
85
+ const params = node.params && node.params !== '()' ? node.params : '';
86
+ const ret = node.return_type ? ` -> ${node.return_type}` : '';
87
+ const signature = `${shortName}${params}${ret}`;
88
+
89
+ // Diff
90
+ const isNew = addedSet.has(node.qualified_name);
91
+ const diffChanges = isNew ? [] : modifiedMap.get(node.qualified_name) || [];
92
+
93
+ return {
94
+ qualified_name: node.qualified_name,
95
+ name: node.name,
96
+ kind: node.kind,
97
+ signature,
98
+ file_path: node.file_path,
99
+ line_start: node.line_start,
100
+ line_end: node.line_end,
101
+ callers,
102
+ callees,
103
+ has_test_coverage: testedFiles.has(node.file_path),
104
+ diff_changes: diffChanges,
105
+ is_new: isNew,
106
+ in_flows: flowsByFunction.get(node.qualified_name) || [],
107
+ };
108
+ });
109
+ }
@@ -0,0 +1,34 @@
1
+ import type { IndexedGraph } from '../graph/loader';
2
+ import type { InheritanceEntry } from '../graph/types';
3
+
4
+ export function extractInheritance(graph: IndexedGraph, changedFiles: string[]): InheritanceEntry[] {
5
+ const changedSet = new Set(changedFiles);
6
+ const entries: InheritanceEntry[] = [];
7
+
8
+ const changedClasses = graph.nodes.filter((n) => changedSet.has(n.file_path) && n.kind === 'Class');
9
+
10
+ for (const cls of changedClasses) {
11
+ let extendsClass: string | undefined;
12
+ const implementsList: string[] = [];
13
+ const children: string[] = [];
14
+
15
+ for (const edge of graph.adjacency.get(cls.qualified_name) || []) {
16
+ if (edge.kind === 'INHERITS') extendsClass = edge.target_qualified;
17
+ if (edge.kind === 'IMPLEMENTS') implementsList.push(edge.target_qualified);
18
+ }
19
+
20
+ for (const edge of graph.reverseAdjacency.get(cls.qualified_name) || []) {
21
+ if (edge.kind === 'INHERITS') children.push(edge.source_qualified);
22
+ }
23
+
24
+ entries.push({
25
+ qualified_name: cls.qualified_name,
26
+ file_path: cls.file_path,
27
+ extends: extendsClass,
28
+ implements: implementsList,
29
+ children,
30
+ });
31
+ }
32
+
33
+ return entries;
34
+ }
@@ -0,0 +1,135 @@
1
+ import type { ContextV2Output } from './context-builder';
2
+
3
+ export function formatPrompt(output: ContextV2Output): string {
4
+ const { analysis } = output;
5
+ const lines: string[] = [];
6
+
7
+ // Header
8
+ const risk = analysis.risk;
9
+ const br = analysis.blast_radius;
10
+ const meta = analysis.metadata;
11
+ lines.push('# Code Review Context');
12
+ lines.push('');
13
+ lines.push(
14
+ `Risk: ${risk.level} (${risk.score}) | ${br.total_functions} functions impacted across ${br.total_files} files | ${meta.untested_count} untested`,
15
+ );
16
+ lines.push('');
17
+
18
+ // Changed functions
19
+ if (analysis.changed_functions.length > 0) {
20
+ lines.push('## Changed Functions');
21
+ lines.push('');
22
+
23
+ for (const fn of analysis.changed_functions) {
24
+ lines.push(`### ${fn.signature} [${fn.file_path}:${fn.line_start}-${fn.line_end}]`);
25
+
26
+ // Status
27
+ if (fn.is_new) {
28
+ lines.push('Status: new');
29
+ } else if (fn.diff_changes.length > 0) {
30
+ lines.push(`Status: modified (${fn.diff_changes.join(', ')})`);
31
+ } else {
32
+ lines.push('Status: unchanged');
33
+ }
34
+
35
+ // Callers
36
+ if (fn.callers.length > 0) {
37
+ lines.push('Callers:');
38
+ for (const c of fn.callers) {
39
+ const conf = c.confidence < 0.85 ? ` confidence=${c.confidence.toFixed(2)}` : '';
40
+ lines.push(` - ${c.name} [${c.file_path}:${c.line}]${conf}`);
41
+ }
42
+ } else {
43
+ lines.push('Callers: none');
44
+ }
45
+
46
+ // Callees
47
+ if (fn.callees.length > 0) {
48
+ lines.push('Callees:');
49
+ for (const c of fn.callees) {
50
+ lines.push(` - ${c.signature} [${c.file_path}]`);
51
+ }
52
+ } else {
53
+ lines.push('Callees: none');
54
+ }
55
+
56
+ // Test coverage
57
+ lines.push(`Test coverage: ${fn.has_test_coverage ? 'yes' : 'no'}`);
58
+
59
+ // Affected flows
60
+ if (fn.in_flows.length > 0) {
61
+ lines.push('Affected flows:');
62
+ for (const ep of fn.in_flows) {
63
+ const flow = analysis.affected_flows.find((f) => f.entry_point === ep);
64
+ if (flow) {
65
+ const prefix = flow.type === 'http' ? 'HTTP' : 'TEST';
66
+ lines.push(` - ${prefix}: ${flow.path.map((q) => q.split('::').pop()).join(' → ')}`);
67
+ } else {
68
+ lines.push(` - ${ep.split('::').pop()}`);
69
+ }
70
+ }
71
+ } else {
72
+ lines.push('Affected flows: none');
73
+ }
74
+
75
+ lines.push('');
76
+ }
77
+ }
78
+
79
+ // Inheritance
80
+ if (analysis.inheritance.length > 0) {
81
+ lines.push('## Inheritance');
82
+ lines.push('');
83
+ for (const entry of analysis.inheritance) {
84
+ const name = entry.qualified_name.split('::').pop();
85
+ const parts: string[] = [];
86
+ if (entry.extends) parts.push(`extends ${entry.extends.split('::').pop()}`);
87
+ if (entry.implements.length > 0)
88
+ parts.push(`implements ${entry.implements.map((i) => i.split('::').pop()).join(', ')}`);
89
+ lines.push(`- ${name} ${parts.join(', ')}`);
90
+ if (entry.children.length > 0) {
91
+ lines.push(` Children: ${entry.children.map((c) => c.split('::').pop()).join(', ')}`);
92
+ }
93
+ }
94
+ lines.push('');
95
+ }
96
+
97
+ // Blast radius by depth
98
+ const byDepth = analysis.blast_radius.by_depth;
99
+ const depthKeys = Object.keys(byDepth).sort();
100
+ if (depthKeys.length > 0) {
101
+ lines.push('## Blast Radius');
102
+ lines.push('');
103
+ for (const depth of depthKeys) {
104
+ const names = byDepth[depth].map((q) => q.split('::').pop());
105
+ lines.push(`Depth ${depth}: ${names.join(', ')} (${names.length} functions)`);
106
+ }
107
+ lines.push('');
108
+ }
109
+
110
+ // Test gaps
111
+ if (analysis.test_gaps.length > 0) {
112
+ lines.push('## Test Gaps');
113
+ lines.push('');
114
+ for (const gap of analysis.test_gaps) {
115
+ const name = gap.function.split('::').pop();
116
+ lines.push(`- ${name} [${gap.file_path}:${gap.line_start}]`);
117
+ }
118
+ lines.push('');
119
+ }
120
+
121
+ // Structural diff summary
122
+ const diff = analysis.structural_diff;
123
+ if (diff.summary.added > 0 || diff.summary.removed > 0 || diff.summary.modified > 0) {
124
+ lines.push('## Structural Changes');
125
+ lines.push('');
126
+ const parts: string[] = [];
127
+ if (diff.summary.added > 0) parts.push(`${diff.summary.added} added`);
128
+ if (diff.summary.removed > 0) parts.push(`${diff.summary.removed} removed`);
129
+ if (diff.summary.modified > 0) parts.push(`${diff.summary.modified} modified`);
130
+ lines.push(parts.join(', '));
131
+ lines.push('');
132
+ }
133
+
134
+ return lines.join('\n');
135
+ }
package/src/cli.ts CHANGED
@@ -13,7 +13,7 @@ import { executeUpdate } from './commands/update';
13
13
 
14
14
  const program = new Command();
15
15
 
16
- program.name('kodus-graph').description('Code graph builder for Kodus code review').version('0.1.0');
16
+ program.name('kodus-graph').description('Code graph builder for Kodus code review').version('0.2.1');
17
17
 
18
18
  program
19
19
  .command('parse')
@@ -21,6 +21,8 @@ program
21
21
  .option('--all', 'Parse all files in repo')
22
22
  .option('--files <paths...>', 'Parse specific files')
23
23
  .option('--repo-dir <path>', 'Repository root directory', '.')
24
+ .option('--include <glob...>', 'Include only files matching glob (repeatable)')
25
+ .option('--exclude <glob...>', 'Exclude files matching glob (repeatable)')
24
26
  .requiredOption('--out <path>', 'Output JSON file path')
25
27
  .action(async (opts) => {
26
28
  const repoDir = resolve(opts.repoDir);
@@ -33,6 +35,8 @@ program
33
35
  files: opts.files,
34
36
  all: opts.all ?? false,
35
37
  out: opts.out,
38
+ include: opts.include,
39
+ exclude: opts.exclude,
36
40
  });
37
41
  });
38
42
 
@@ -64,17 +68,27 @@ program
64
68
  .option('--repo-dir <path>', 'Repository root directory', '.')
65
69
  .option('--graph <path>', 'Path to main graph JSON')
66
70
  .requiredOption('--out <path>', 'Output JSON file path')
71
+ .option('--min-confidence <n>', 'Minimum CALLS edge confidence', '0.5')
72
+ .option('--max-depth <n>', 'Blast radius BFS depth', '3')
73
+ .option('--format <type>', 'Output format: json or prompt', 'json')
67
74
  .action(async (opts) => {
68
75
  const repoDir = resolve(opts.repoDir);
69
76
  if (!existsSync(repoDir)) {
70
77
  process.stderr.write(`Error: --repo-dir does not exist: ${repoDir}\n`);
71
78
  process.exit(1);
72
79
  }
80
+ if (opts.format !== 'json' && opts.format !== 'prompt') {
81
+ process.stderr.write('Error: --format must be "json" or "prompt"\n');
82
+ process.exit(1);
83
+ }
73
84
  await executeContext({
74
85
  repoDir: opts.repoDir,
75
86
  files: opts.files,
76
87
  graph: opts.graph,
77
88
  out: opts.out,
89
+ minConfidence: Number.parseFloat(opts.minConfidence),
90
+ maxDepth: Number.parseInt(opts.maxDepth, 10),
91
+ format: opts.format,
78
92
  });
79
93
  });
80
94
 
@@ -1,8 +1,9 @@
1
1
  import { readFileSync, rmSync, writeFileSync } from 'fs';
2
2
  import { resolve } from 'path';
3
- import { buildReviewContext } from '../analysis/review-context';
3
+ import { buildContextV2 } from '../analysis/context-builder';
4
+ import { formatPrompt } from '../analysis/prompt-formatter';
4
5
  import { mergeGraphs } from '../graph/merger';
5
- import type { ContextOutput, GraphData, MainGraphInput } from '../graph/types';
6
+ import type { GraphData, MainGraphInput } from '../graph/types';
6
7
  import { log } from '../shared/logger';
7
8
  import { GraphInputSchema } from '../shared/schemas';
8
9
  import { createSecureTempFile } from '../shared/temp';
@@ -13,6 +14,9 @@ interface ContextOptions {
13
14
  files: string[];
14
15
  graph?: string;
15
16
  out: string;
17
+ minConfidence: number;
18
+ maxDepth: number;
19
+ format: 'json' | 'prompt';
16
20
  }
17
21
 
18
22
  export async function executeContext(opts: ContextOptions): Promise<void> {
@@ -31,6 +35,8 @@ export async function executeContext(opts: ContextOptions): Promise<void> {
31
35
 
32
36
  // Load and merge with main graph if provided
33
37
  let mergedGraph: GraphData;
38
+ let oldGraph: GraphData | null = null;
39
+
34
40
  if (opts.graph) {
35
41
  let raw: unknown;
36
42
  try {
@@ -44,6 +50,7 @@ export async function executeContext(opts: ContextOptions): Promise<void> {
44
50
  process.stderr.write(`Error: Invalid graph JSON: ${validated.error.message}\n`);
45
51
  process.exit(1);
46
52
  }
53
+ oldGraph = { nodes: validated.data.nodes, edges: validated.data.edges };
47
54
  const mainGraph: MainGraphInput = {
48
55
  repo_id: '',
49
56
  sha: '',
@@ -55,10 +62,20 @@ export async function executeContext(opts: ContextOptions): Promise<void> {
55
62
  mergedGraph = { nodes: parseResult.nodes, edges: parseResult.edges };
56
63
  }
57
64
 
58
- // Build review context
59
- const contextOutput: ContextOutput = buildReviewContext(mergedGraph, opts.files);
65
+ // Build V2 context
66
+ const output = buildContextV2({
67
+ mergedGraph,
68
+ oldGraph,
69
+ changedFiles: opts.files,
70
+ minConfidence: opts.minConfidence,
71
+ maxDepth: opts.maxDepth,
72
+ });
60
73
 
61
- writeFileSync(opts.out, JSON.stringify(contextOutput, null, 2));
74
+ if (opts.format === 'prompt') {
75
+ writeFileSync(opts.out, formatPrompt(output));
76
+ } else {
77
+ writeFileSync(opts.out, JSON.stringify(output, null, 2));
78
+ }
62
79
  } finally {
63
80
  try {
64
81
  rmSync(tmp.dir, { recursive: true, force: true });
@@ -1,8 +1,8 @@
1
- import { writeFileSync } from 'fs';
2
- import { relative, resolve } from 'path';
1
+ import { resolve, relative } from 'path';
3
2
  import { performance } from 'perf_hooks';
4
3
  import { buildGraphData } from '../graph/builder';
5
- import type { ImportEdge, ParseOutput } from '../graph/types';
4
+ import { writeGraphJSON } from '../graph/json-writer';
5
+ import type { ImportEdge } from '../graph/types';
6
6
  import { parseBatch } from '../parser/batch';
7
7
  import { discoverFiles } from '../parser/discovery';
8
8
  import { resolveAllCalls } from '../resolver/call-resolver';
@@ -12,11 +12,13 @@ import { createSymbolTable } from '../resolver/symbol-table';
12
12
  import { computeFileHash } from '../shared/file-hash';
13
13
  import { log } from '../shared/logger';
14
14
 
15
- interface ParseOptions {
15
+ export interface ParseOptions {
16
16
  repoDir: string;
17
17
  files?: string[];
18
18
  all: boolean;
19
19
  out: string;
20
+ include?: string[];
21
+ exclude?: string[];
20
22
  }
21
23
 
22
24
  export async function executeParse(opts: ParseOptions): Promise<void> {
@@ -24,27 +26,25 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
24
26
  const repoDir = resolve(opts.repoDir);
25
27
 
26
28
  // Phase 1: Discover files
27
- const files = discoverFiles(repoDir, opts.all ? undefined : opts.files);
29
+ const files = discoverFiles(repoDir, opts.all ? undefined : opts.files, opts.include, opts.exclude);
28
30
  process.stderr.write(`[1/5] Discovered ${files.length} files\n`);
29
31
 
30
32
  // Phase 2: Parse + extract
31
- const rawGraph = await parseBatch(files, repoDir);
33
+ let rawGraph = await parseBatch(files, repoDir);
32
34
  process.stderr.write(
33
35
  `[2/5] Parsed ${rawGraph.functions.length} functions, ${rawGraph.classes.length} classes, ${rawGraph.rawCalls.length} call sites\n`,
34
36
  );
35
37
 
36
38
  // Phase 3: Resolve imports
37
39
  const tsconfigAliases = loadTsconfigAliases(repoDir);
38
- const symbolTable = createSymbolTable();
39
- const importMap = createImportMap();
40
- const importEdges: ImportEdge[] = [];
40
+ let symbolTable = createSymbolTable();
41
+ let importMap = createImportMap();
42
+ let importEdges: ImportEdge[] = [];
41
43
 
42
- // Populate symbol table
43
44
  for (const f of rawGraph.functions) symbolTable.add(f.file, f.name, f.qualified);
44
45
  for (const c of rawGraph.classes) symbolTable.add(c.file, c.name, c.qualified);
45
46
  for (const i of rawGraph.interfaces) symbolTable.add(i.file, i.name, i.qualified);
46
47
 
47
- // Resolve each import
48
48
  for (const imp of rawGraph.imports) {
49
49
  const langKey = imp.lang === 'python' ? 'python' : imp.lang === 'ruby' ? 'ruby' : 'typescript';
50
50
  const resolved = resolveImport(resolve(repoDir, imp.file), imp.module, langKey, repoDir, tsconfigAliases);
@@ -64,7 +64,7 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
64
64
  );
65
65
 
66
66
  // Phase 4: Resolve calls
67
- const { callEdges, stats } = resolveAllCalls(rawGraph.rawCalls, rawGraph.diMaps, symbolTable, importMap);
67
+ let { callEdges, stats } = resolveAllCalls(rawGraph.rawCalls, rawGraph.diMaps, symbolTable, importMap);
68
68
  process.stderr.write(
69
69
  `[4/5] Resolved ${callEdges.length} calls (DI:${stats.di} same:${stats.same} import:${stats.import} unique:${stats.unique} ambiguous:${stats.ambiguous} noise:${stats.noise})\n`,
70
70
  );
@@ -79,22 +79,27 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
79
79
  }
80
80
  }
81
81
 
82
+ const parseErrors = rawGraph.parseErrors;
83
+ const extractErrors = rawGraph.extractErrors;
82
84
  const graphData = buildGraphData(rawGraph, callEdges, importEdges, repoDir, fileHashes);
83
85
  process.stderr.write(`[5/5] Built graph: ${graphData.nodes.length} nodes, ${graphData.edges.length} edges\n`);
84
86
 
85
- const output: ParseOutput = {
86
- metadata: {
87
- repo_dir: repoDir,
88
- files_parsed: files.length,
89
- total_nodes: graphData.nodes.length,
90
- total_edges: graphData.edges.length,
91
- duration_ms: Math.round(performance.now() - t0),
92
- parse_errors: rawGraph.parseErrors,
93
- extract_errors: rawGraph.extractErrors,
94
- },
95
- nodes: graphData.nodes,
96
- edges: graphData.edges,
87
+ // Release intermediaries no longer needed after buildGraphData
88
+ rawGraph = null as any;
89
+ symbolTable = null as any;
90
+ importMap = null as any;
91
+ callEdges = null as any;
92
+ importEdges = null as any;
93
+
94
+ const metadata = {
95
+ repo_dir: repoDir,
96
+ files_parsed: files.length,
97
+ total_nodes: graphData.nodes.length,
98
+ total_edges: graphData.edges.length,
99
+ duration_ms: Math.round(performance.now() - t0),
100
+ parse_errors: parseErrors,
101
+ extract_errors: extractErrors,
97
102
  };
98
103
 
99
- writeFileSync(opts.out, JSON.stringify(output, null, 2));
104
+ writeGraphJSON(opts.out, metadata, graphData.nodes, graphData.edges);
100
105
  }
@@ -0,0 +1,43 @@
1
+ import { openSync, writeSync, closeSync } from 'fs';
2
+ import type { GraphEdge, GraphNode, ParseMetadata } from './types';
3
+
4
+ /**
5
+ * Write graph output as JSON to disk using incremental serialization.
6
+ *
7
+ * Instead of JSON.stringify on the full output (which creates a ~100-300 MB
8
+ * string for large repos), this writes each node/edge individually.
9
+ * Peak memory: only one JSON.stringify(singleNode) string at a time (~1 KB).
10
+ */
11
+ export function writeGraphJSON(
12
+ out: string,
13
+ metadata: ParseMetadata,
14
+ nodes: GraphNode[],
15
+ edges: GraphEdge[],
16
+ ): void {
17
+ const fd = openSync(out, 'w');
18
+
19
+ try {
20
+ writeSync(fd, '{"metadata":');
21
+ writeSync(fd, JSON.stringify(metadata));
22
+
23
+ // Nodes
24
+ writeSync(fd, ',"nodes":[');
25
+ for (let i = 0; i < nodes.length; i++) {
26
+ if (i > 0) writeSync(fd, ',');
27
+ writeSync(fd, '\n');
28
+ writeSync(fd, JSON.stringify(nodes[i]));
29
+ }
30
+ writeSync(fd, '\n]');
31
+
32
+ // Edges
33
+ writeSync(fd, ',"edges":[');
34
+ for (let i = 0; i < edges.length; i++) {
35
+ if (i > 0) writeSync(fd, ',');
36
+ writeSync(fd, '\n');
37
+ writeSync(fd, JSON.stringify(edges[i]));
38
+ }
39
+ writeSync(fd, '\n]}');
40
+ } finally {
41
+ closeSync(fd);
42
+ }
43
+ }
@@ -1,7 +1,7 @@
1
1
  // src/graph/loader.ts
2
2
  import { readFileSync } from 'fs';
3
3
  import { z } from 'zod';
4
- import type { GraphEdge, GraphNode, ParseMetadata } from './types';
4
+ import type { GraphData, GraphEdge, GraphNode, ParseMetadata } from './types';
5
5
 
6
6
  const ParseOutputSchema = z.object({
7
7
  metadata: z.object({
@@ -55,19 +55,17 @@ export interface IndexedGraph {
55
55
  metadata: ParseMetadata;
56
56
  }
57
57
 
58
- export function loadGraph(path: string): IndexedGraph {
59
- let raw: unknown;
60
- try {
61
- raw = JSON.parse(readFileSync(path, 'utf-8'));
62
- } catch (err) {
63
- throw new Error(`Failed to read graph file: ${path} — ${String(err)}`);
64
- }
65
-
66
- const parsed = ParseOutputSchema.parse(raw);
67
-
68
- const nodes = parsed.nodes as GraphNode[];
69
- const edges = parsed.edges as GraphEdge[];
70
- const metadata = parsed.metadata as ParseMetadata;
58
+ export function indexGraph(data: GraphData, metadata?: ParseMetadata): IndexedGraph {
59
+ const { nodes, edges } = data;
60
+ const meta: ParseMetadata = metadata ?? {
61
+ repo_dir: '',
62
+ files_parsed: 0,
63
+ total_nodes: nodes.length,
64
+ total_edges: edges.length,
65
+ duration_ms: 0,
66
+ parse_errors: 0,
67
+ extract_errors: 0,
68
+ };
71
69
 
72
70
  const byQualified = new Map<string, GraphNode>();
73
71
  const byFile = new Map<string, GraphNode[]>();
@@ -96,5 +94,20 @@ export function loadGraph(path: string): IndexedGraph {
96
94
  else edgesByKind.set(edge.kind, [edge]);
97
95
  }
98
96
 
99
- return { nodes, edges, byQualified, byFile, adjacency, reverseAdjacency, edgesByKind, metadata };
97
+ return { nodes, edges, byQualified, byFile, adjacency, reverseAdjacency, edgesByKind, metadata: meta };
98
+ }
99
+
100
+ export function loadGraph(path: string): IndexedGraph {
101
+ let raw: unknown;
102
+ try {
103
+ raw = JSON.parse(readFileSync(path, 'utf-8'));
104
+ } catch (err) {
105
+ throw new Error(`Failed to read graph file: ${path} — ${String(err)}`);
106
+ }
107
+
108
+ const parsed = ParseOutputSchema.parse(raw);
109
+ return indexGraph(
110
+ { nodes: parsed.nodes as GraphNode[], edges: parsed.edges as GraphEdge[] },
111
+ parsed.metadata as ParseMetadata,
112
+ );
100
113
  }
@@ -116,6 +116,64 @@ export interface MainGraphInput {
116
116
  edges: GraphEdge[];
117
117
  }
118
118
 
119
+ // ── Context V2 types ──
120
+ export interface CallerRef {
121
+ qualified_name: string;
122
+ name: string;
123
+ file_path: string;
124
+ line: number;
125
+ confidence: number;
126
+ }
127
+
128
+ export interface CalleeRef {
129
+ qualified_name: string;
130
+ name: string;
131
+ file_path: string;
132
+ signature: string;
133
+ }
134
+
135
+ export interface EnrichedFunction {
136
+ qualified_name: string;
137
+ name: string;
138
+ kind: NodeKind;
139
+ signature: string;
140
+ file_path: string;
141
+ line_start: number;
142
+ line_end: number;
143
+ callers: CallerRef[];
144
+ callees: CalleeRef[];
145
+ has_test_coverage: boolean;
146
+ diff_changes: string[];
147
+ is_new: boolean;
148
+ in_flows: string[];
149
+ }
150
+
151
+ export interface AffectedFlow {
152
+ entry_point: string;
153
+ type: 'test' | 'http';
154
+ touches_changed: string[];
155
+ depth: number;
156
+ path: string[];
157
+ }
158
+
159
+ export interface InheritanceEntry {
160
+ qualified_name: string;
161
+ file_path: string;
162
+ extends?: string;
163
+ implements: string[];
164
+ children: string[];
165
+ }
166
+
167
+ export interface ContextAnalysisMetadata {
168
+ changed_functions_count: number;
169
+ total_callers: number;
170
+ total_callees: number;
171
+ untested_count: number;
172
+ affected_flows_count: number;
173
+ duration_ms: number;
174
+ min_confidence: number;
175
+ }
176
+
119
177
  // ── Internal types used during parsing pipeline ──
120
178
  export interface RawFunction {
121
179
  name: string;
@@ -2,7 +2,8 @@ import type { SgRoot } from '@ast-grep/napi';
2
2
  import { parseAsync } from '@ast-grep/napi';
3
3
  import { readFileSync } from 'fs';
4
4
  import { extname, relative } from 'path';
5
- import type { ParseBatchResult, RawGraph } from '../graph/types';
5
+ import type { ParseBatchResult, RawCallSite, RawGraph } from '../graph/types';
6
+ import { NOISE } from '../shared/filters';
6
7
  import { log } from '../shared/logger';
7
8
  import { extractCallsFromFile, extractFromFile } from './extractor';
8
9
  import { getLanguage } from './languages';
@@ -60,7 +61,14 @@ export async function parseBatch(files: string[], repoRoot: string): Promise<Par
60
61
  }
61
62
 
62
63
  try {
63
- extractCallsFromFile(root, fp, lang, graph.rawCalls);
64
+ // Extract calls into a temporary buffer, then filter noise before pushing
65
+ const rawCalls: RawCallSite[] = [];
66
+ extractCallsFromFile(root, fp, lang, rawCalls);
67
+ for (const call of rawCalls) {
68
+ if (!NOISE.has(call.callName)) {
69
+ graph.rawCalls.push(call);
70
+ }
71
+ }
64
72
  } catch (err) {
65
73
  log.error('Call extraction crashed', { file: fp, error: String(err) });
66
74
  extractErrors++;
@@ -1,5 +1,5 @@
1
1
  import { readdirSync } from 'fs';
2
- import { extname, join, resolve } from 'path';
2
+ import { extname, join, relative, resolve } from 'path';
3
3
  import { isSkippableFile, SKIP_DIRS } from '../shared/filters';
4
4
  import { log } from '../shared/logger';
5
5
  import { ensureWithinRoot } from '../shared/safe-path';
@@ -8,8 +8,15 @@ import { getLanguage } from './languages';
8
8
  /**
9
9
  * Walk the filesystem and find all supported source files.
10
10
  * If `filterFiles` is provided, only return those specific files (resolved to absolute paths).
11
+ * If `include` patterns are provided, keep only files matching at least one pattern.
12
+ * If `exclude` patterns are provided, remove files matching any pattern.
11
13
  */
12
- export function discoverFiles(repoDir: string, filterFiles?: string[]): string[] {
14
+ export function discoverFiles(
15
+ repoDir: string,
16
+ filterFiles?: string[],
17
+ include?: string[],
18
+ exclude?: string[],
19
+ ): string[] {
13
20
  const absRepoDir = resolve(repoDir);
14
21
 
15
22
  if (filterFiles) {
@@ -26,8 +33,34 @@ export function discoverFiles(repoDir: string, filterFiles?: string[]): string[]
26
33
  });
27
34
  }
28
35
 
29
- const files: string[] = [];
36
+ let files: string[] = [];
30
37
  walkFiles(absRepoDir, files);
38
+
39
+ // Apply include/exclude filters using Bun.Glob
40
+ const hasInclude = include && include.length > 0;
41
+ const hasExclude = exclude && exclude.length > 0;
42
+
43
+ if (hasInclude || hasExclude) {
44
+ const includeGlobs = hasInclude ? include.map((p) => new Bun.Glob(p)) : null;
45
+ const excludeGlobs = hasExclude ? exclude.map((p) => new Bun.Glob(p)) : null;
46
+
47
+ files = files.filter((absPath) => {
48
+ const rel = relative(absRepoDir, absPath);
49
+
50
+ // If include patterns exist, file must match at least one
51
+ if (includeGlobs && !includeGlobs.some((g) => g.match(rel))) {
52
+ return false;
53
+ }
54
+
55
+ // If exclude patterns exist, file must not match any
56
+ if (excludeGlobs && excludeGlobs.some((g) => g.match(rel))) {
57
+ return false;
58
+ }
59
+
60
+ return true;
61
+ });
62
+ }
63
+
31
64
  return files;
32
65
  }
33
66
 
@@ -1,141 +0,0 @@
1
- import type { ContextOutput, GraphData } from '../graph/types';
2
- import { computeBlastRadius } from './blast-radius';
3
- import { computeRiskScore } from './risk-score';
4
- import { findTestGaps } from './test-gaps';
5
-
6
- export function buildReviewContext(graph: GraphData, changedFiles: string[]): ContextOutput {
7
- const changedSet = new Set(changedFiles);
8
- const lines: string[] = [];
9
-
10
- // Build caller/callee index from CALLS edges
11
- const callersOf = new Map<string, Array<{ name: string; file: string; line: number; confidence: number }>>();
12
- const calleesOf = new Map<
13
- string,
14
- Array<{ name: string; target: string; file: string; line: number; confidence: number }>
15
- >();
16
-
17
- // Index nodes by qualified name
18
- const nodeIndex = new Map(graph.nodes.map((n) => [n.qualified_name, n]));
19
-
20
- for (const edge of graph.edges) {
21
- if (edge.kind !== 'CALLS' || (edge.confidence ?? 0) < 0.5) continue;
22
-
23
- // callers: who calls edge.target
24
- if (!callersOf.has(edge.target_qualified)) callersOf.set(edge.target_qualified, []);
25
- const sourceNode = nodeIndex.get(edge.source_qualified);
26
- callersOf.get(edge.target_qualified)!.push({
27
- name: sourceNode?.name || edge.source_qualified.split('::').pop() || 'unknown',
28
- file: sourceNode?.file_path || edge.file_path,
29
- line: edge.line,
30
- confidence: edge.confidence || 0,
31
- });
32
-
33
- // callees: what does source call
34
- if (!calleesOf.has(edge.source_qualified)) calleesOf.set(edge.source_qualified, []);
35
- const targetNode = nodeIndex.get(edge.target_qualified);
36
- calleesOf.get(edge.source_qualified)!.push({
37
- name: targetNode?.name || edge.target_qualified.split('::').pop() || 'unknown',
38
- target: edge.target_qualified,
39
- file: targetNode?.file_path || '',
40
- line: edge.line,
41
- confidence: edge.confidence || 0,
42
- });
43
- }
44
-
45
- // TESTED_BY index
46
- const testedFiles = new Set(graph.edges.filter((e) => e.kind === 'TESTED_BY').map((e) => e.source_qualified));
47
-
48
- lines.push('Changed functions (AST analysis):\n');
49
-
50
- // Functions in changed files
51
- const changedFunctions = graph.nodes
52
- .filter(
53
- (n) =>
54
- changedSet.has(n.file_path) && !n.is_test && n.kind !== 'Class' && n.kind !== 'Interface' && n.kind !== 'Enum',
55
- )
56
- .sort((a, b) => a.file_path.localeCompare(b.file_path) || a.line_start - b.line_start);
57
-
58
- let callerCount = 0;
59
- let calleeCount = 0;
60
-
61
- for (const func of changedFunctions) {
62
- if (func.kind === 'Constructor') continue;
63
-
64
- const shortName = func.name.includes('.') ? func.name.split('.').pop()! : func.name;
65
- const sig = func.params && func.params !== '()' ? `${shortName}${func.params}` : shortName;
66
- const ret = func.return_type ? ` -> ${func.return_type}` : '';
67
- lines.push(`${sig}${ret} (${func.file_path}:${func.line_start})`);
68
-
69
- // Callers
70
- const callers = callersOf.get(func.qualified_name) || [];
71
- callerCount += callers.length;
72
- for (const caller of callers.slice(0, 5)) {
73
- const conf = caller.confidence >= 0.85 ? '' : ` [${Math.round(caller.confidence * 100)}%]`;
74
- lines.push(` ← called by ${caller.name} (${caller.file}:${caller.line})${conf}`);
75
- }
76
- if (callers.length > 5) lines.push(` ← ... and ${callers.length - 5} more callers`);
77
-
78
- // Callees
79
- const callees = calleesOf.get(func.qualified_name) || [];
80
- calleeCount += callees.length;
81
- const seenCallees = new Set<string>();
82
- for (const callee of callees.slice(0, 5)) {
83
- if (seenCallees.has(callee.target)) continue;
84
- seenCallees.add(callee.target);
85
- const calleeNode = nodeIndex.get(callee.target);
86
- if (calleeNode) {
87
- const calleeSig =
88
- calleeNode.params && calleeNode.params !== '()' ? `${callee.name}${calleeNode.params}` : callee.name;
89
- const calleeRet = calleeNode.return_type ? ` -> ${calleeNode.return_type}` : '';
90
- lines.push(` → calls ${calleeSig}${calleeRet} (${calleeNode.file_path}:${calleeNode.line_start})`);
91
- } else {
92
- lines.push(` → calls ${callee.name} (${callee.file || 'external'})`);
93
- }
94
- }
95
-
96
- // Test coverage
97
- if (testedFiles.has(func.file_path)) {
98
- lines.push(` ✅ has test coverage`);
99
- } else {
100
- lines.push(` ⚠ NO TEST COVERAGE`);
101
- }
102
-
103
- lines.push('');
104
- }
105
-
106
- // Blast radius
107
- const blastRadius = computeBlastRadius(graph, changedFiles);
108
- if (blastRadius.total_files > changedFiles.length) {
109
- lines.push(
110
- `Blast radius: ${changedFunctions.filter((f) => f.kind !== 'Constructor').length} changed functions impact ${blastRadius.total_files - changedFiles.length} other files`,
111
- );
112
- }
113
-
114
- // Risk score
115
- const riskScore = computeRiskScore(graph, changedFiles, blastRadius);
116
- lines.push(`\nRisk: ${riskScore.level} (${riskScore.score})`);
117
-
118
- // Test gaps
119
- const testGaps = findTestGaps(graph, changedFiles);
120
- const untestedCount = testGaps.length;
121
- if (untestedCount > 0) {
122
- lines.push(`\n⚠ ${untestedCount} changed function(s) without test coverage:`);
123
- for (const gap of testGaps.slice(0, 10)) {
124
- const shortName = gap.function.split('::').pop() || gap.function;
125
- lines.push(` ${shortName} (${gap.file_path}:${gap.line_start})`);
126
- }
127
- }
128
-
129
- return {
130
- text: lines.join('\n'),
131
- metadata: {
132
- changed_functions: changedFunctions.filter((f) => f.kind !== 'Constructor').length,
133
- caller_count: callerCount,
134
- callee_count: calleeCount,
135
- untested_count: untestedCount,
136
- blast_radius: { functions: blastRadius.total_functions, files: blastRadius.total_files },
137
- risk_level: riskScore.level,
138
- risk_score: riskScore.score,
139
- },
140
- };
141
- }