@kodus/kodus-graph 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kodus/kodus-graph",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "Code graph builder for Kodus code review — parses source code into structural graphs with nodes, edges, and analysis",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.ts CHANGED
@@ -13,7 +13,7 @@ import { executeUpdate } from './commands/update';
13
13
 
14
14
  const program = new Command();
15
15
 
16
- program.name('kodus-graph').description('Code graph builder for Kodus code review').version('0.2.0');
16
+ program.name('kodus-graph').description('Code graph builder for Kodus code review').version('0.2.1');
17
17
 
18
18
  program
19
19
  .command('parse')
@@ -21,6 +21,8 @@ program
21
21
  .option('--all', 'Parse all files in repo')
22
22
  .option('--files <paths...>', 'Parse specific files')
23
23
  .option('--repo-dir <path>', 'Repository root directory', '.')
24
+ .option('--include <glob...>', 'Include only files matching glob (repeatable)')
25
+ .option('--exclude <glob...>', 'Exclude files matching glob (repeatable)')
24
26
  .requiredOption('--out <path>', 'Output JSON file path')
25
27
  .action(async (opts) => {
26
28
  const repoDir = resolve(opts.repoDir);
@@ -33,6 +35,8 @@ program
33
35
  files: opts.files,
34
36
  all: opts.all ?? false,
35
37
  out: opts.out,
38
+ include: opts.include,
39
+ exclude: opts.exclude,
36
40
  });
37
41
  });
38
42
 
@@ -1,8 +1,8 @@
1
- import { writeFileSync } from 'fs';
2
- import { relative, resolve } from 'path';
1
+ import { resolve, relative } from 'path';
3
2
  import { performance } from 'perf_hooks';
4
3
  import { buildGraphData } from '../graph/builder';
5
- import type { ImportEdge, ParseOutput } from '../graph/types';
4
+ import { writeGraphJSON } from '../graph/json-writer';
5
+ import type { ImportEdge } from '../graph/types';
6
6
  import { parseBatch } from '../parser/batch';
7
7
  import { discoverFiles } from '../parser/discovery';
8
8
  import { resolveAllCalls } from '../resolver/call-resolver';
@@ -12,11 +12,13 @@ import { createSymbolTable } from '../resolver/symbol-table';
12
12
  import { computeFileHash } from '../shared/file-hash';
13
13
  import { log } from '../shared/logger';
14
14
 
15
- interface ParseOptions {
15
+ export interface ParseOptions {
16
16
  repoDir: string;
17
17
  files?: string[];
18
18
  all: boolean;
19
19
  out: string;
20
+ include?: string[];
21
+ exclude?: string[];
20
22
  }
21
23
 
22
24
  export async function executeParse(opts: ParseOptions): Promise<void> {
@@ -24,27 +26,25 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
24
26
  const repoDir = resolve(opts.repoDir);
25
27
 
26
28
  // Phase 1: Discover files
27
- const files = discoverFiles(repoDir, opts.all ? undefined : opts.files);
29
+ const files = discoverFiles(repoDir, opts.all ? undefined : opts.files, opts.include, opts.exclude);
28
30
  process.stderr.write(`[1/5] Discovered ${files.length} files\n`);
29
31
 
30
32
  // Phase 2: Parse + extract
31
- const rawGraph = await parseBatch(files, repoDir);
33
+ let rawGraph = await parseBatch(files, repoDir);
32
34
  process.stderr.write(
33
35
  `[2/5] Parsed ${rawGraph.functions.length} functions, ${rawGraph.classes.length} classes, ${rawGraph.rawCalls.length} call sites\n`,
34
36
  );
35
37
 
36
38
  // Phase 3: Resolve imports
37
39
  const tsconfigAliases = loadTsconfigAliases(repoDir);
38
- const symbolTable = createSymbolTable();
39
- const importMap = createImportMap();
40
- const importEdges: ImportEdge[] = [];
40
+ let symbolTable = createSymbolTable();
41
+ let importMap = createImportMap();
42
+ let importEdges: ImportEdge[] = [];
41
43
 
42
- // Populate symbol table
43
44
  for (const f of rawGraph.functions) symbolTable.add(f.file, f.name, f.qualified);
44
45
  for (const c of rawGraph.classes) symbolTable.add(c.file, c.name, c.qualified);
45
46
  for (const i of rawGraph.interfaces) symbolTable.add(i.file, i.name, i.qualified);
46
47
 
47
- // Resolve each import
48
48
  for (const imp of rawGraph.imports) {
49
49
  const langKey = imp.lang === 'python' ? 'python' : imp.lang === 'ruby' ? 'ruby' : 'typescript';
50
50
  const resolved = resolveImport(resolve(repoDir, imp.file), imp.module, langKey, repoDir, tsconfigAliases);
@@ -64,7 +64,7 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
64
64
  );
65
65
 
66
66
  // Phase 4: Resolve calls
67
- const { callEdges, stats } = resolveAllCalls(rawGraph.rawCalls, rawGraph.diMaps, symbolTable, importMap);
67
+ let { callEdges, stats } = resolveAllCalls(rawGraph.rawCalls, rawGraph.diMaps, symbolTable, importMap);
68
68
  process.stderr.write(
69
69
  `[4/5] Resolved ${callEdges.length} calls (DI:${stats.di} same:${stats.same} import:${stats.import} unique:${stats.unique} ambiguous:${stats.ambiguous} noise:${stats.noise})\n`,
70
70
  );
@@ -79,22 +79,27 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
79
79
  }
80
80
  }
81
81
 
82
+ const parseErrors = rawGraph.parseErrors;
83
+ const extractErrors = rawGraph.extractErrors;
82
84
  const graphData = buildGraphData(rawGraph, callEdges, importEdges, repoDir, fileHashes);
83
85
  process.stderr.write(`[5/5] Built graph: ${graphData.nodes.length} nodes, ${graphData.edges.length} edges\n`);
84
86
 
85
- const output: ParseOutput = {
86
- metadata: {
87
- repo_dir: repoDir,
88
- files_parsed: files.length,
89
- total_nodes: graphData.nodes.length,
90
- total_edges: graphData.edges.length,
91
- duration_ms: Math.round(performance.now() - t0),
92
- parse_errors: rawGraph.parseErrors,
93
- extract_errors: rawGraph.extractErrors,
94
- },
95
- nodes: graphData.nodes,
96
- edges: graphData.edges,
87
+ // Release intermediaries no longer needed after buildGraphData
88
+ rawGraph = null as any;
89
+ symbolTable = null as any;
90
+ importMap = null as any;
91
+ callEdges = null as any;
92
+ importEdges = null as any;
93
+
94
+ const metadata = {
95
+ repo_dir: repoDir,
96
+ files_parsed: files.length,
97
+ total_nodes: graphData.nodes.length,
98
+ total_edges: graphData.edges.length,
99
+ duration_ms: Math.round(performance.now() - t0),
100
+ parse_errors: parseErrors,
101
+ extract_errors: extractErrors,
97
102
  };
98
103
 
99
- writeFileSync(opts.out, JSON.stringify(output, null, 2));
104
+ writeGraphJSON(opts.out, metadata, graphData.nodes, graphData.edges);
100
105
  }
@@ -0,0 +1,43 @@
1
+ import { openSync, writeSync, closeSync } from 'fs';
2
+ import type { GraphEdge, GraphNode, ParseMetadata } from './types';
3
+
4
+ /**
5
+ * Write graph output as JSON to disk using incremental serialization.
6
+ *
7
+ * Instead of JSON.stringify on the full output (which creates a ~100-300 MB
8
+ * string for large repos), this writes each node/edge individually.
9
+ * Peak memory: only one JSON.stringify(singleNode) string at a time (~1 KB).
10
+ */
11
+ export function writeGraphJSON(
12
+ out: string,
13
+ metadata: ParseMetadata,
14
+ nodes: GraphNode[],
15
+ edges: GraphEdge[],
16
+ ): void {
17
+ const fd = openSync(out, 'w');
18
+
19
+ try {
20
+ writeSync(fd, '{"metadata":');
21
+ writeSync(fd, JSON.stringify(metadata));
22
+
23
+ // Nodes
24
+ writeSync(fd, ',"nodes":[');
25
+ for (let i = 0; i < nodes.length; i++) {
26
+ if (i > 0) writeSync(fd, ',');
27
+ writeSync(fd, '\n');
28
+ writeSync(fd, JSON.stringify(nodes[i]));
29
+ }
30
+ writeSync(fd, '\n]');
31
+
32
+ // Edges
33
+ writeSync(fd, ',"edges":[');
34
+ for (let i = 0; i < edges.length; i++) {
35
+ if (i > 0) writeSync(fd, ',');
36
+ writeSync(fd, '\n');
37
+ writeSync(fd, JSON.stringify(edges[i]));
38
+ }
39
+ writeSync(fd, '\n]}');
40
+ } finally {
41
+ closeSync(fd);
42
+ }
43
+ }
@@ -2,7 +2,8 @@ import type { SgRoot } from '@ast-grep/napi';
2
2
  import { parseAsync } from '@ast-grep/napi';
3
3
  import { readFileSync } from 'fs';
4
4
  import { extname, relative } from 'path';
5
- import type { ParseBatchResult, RawGraph } from '../graph/types';
5
+ import type { ParseBatchResult, RawCallSite, RawGraph } from '../graph/types';
6
+ import { NOISE } from '../shared/filters';
6
7
  import { log } from '../shared/logger';
7
8
  import { extractCallsFromFile, extractFromFile } from './extractor';
8
9
  import { getLanguage } from './languages';
@@ -60,7 +61,14 @@ export async function parseBatch(files: string[], repoRoot: string): Promise<Par
60
61
  }
61
62
 
62
63
  try {
63
- extractCallsFromFile(root, fp, lang, graph.rawCalls);
64
+ // Extract calls into a temporary buffer, then filter noise before pushing
65
+ const rawCalls: RawCallSite[] = [];
66
+ extractCallsFromFile(root, fp, lang, rawCalls);
67
+ for (const call of rawCalls) {
68
+ if (!NOISE.has(call.callName)) {
69
+ graph.rawCalls.push(call);
70
+ }
71
+ }
64
72
  } catch (err) {
65
73
  log.error('Call extraction crashed', { file: fp, error: String(err) });
66
74
  extractErrors++;
@@ -1,5 +1,5 @@
1
1
  import { readdirSync } from 'fs';
2
- import { extname, join, resolve } from 'path';
2
+ import { extname, join, relative, resolve } from 'path';
3
3
  import { isSkippableFile, SKIP_DIRS } from '../shared/filters';
4
4
  import { log } from '../shared/logger';
5
5
  import { ensureWithinRoot } from '../shared/safe-path';
@@ -8,8 +8,15 @@ import { getLanguage } from './languages';
8
8
  /**
9
9
  * Walk the filesystem and find all supported source files.
10
10
  * If `filterFiles` is provided, only return those specific files (resolved to absolute paths).
11
+ * If `include` patterns are provided, keep only files matching at least one pattern.
12
+ * If `exclude` patterns are provided, remove files matching any pattern.
11
13
  */
12
- export function discoverFiles(repoDir: string, filterFiles?: string[]): string[] {
14
+ export function discoverFiles(
15
+ repoDir: string,
16
+ filterFiles?: string[],
17
+ include?: string[],
18
+ exclude?: string[],
19
+ ): string[] {
13
20
  const absRepoDir = resolve(repoDir);
14
21
 
15
22
  if (filterFiles) {
@@ -26,8 +33,34 @@ export function discoverFiles(repoDir: string, filterFiles?: string[]): string[]
26
33
  });
27
34
  }
28
35
 
29
- const files: string[] = [];
36
+ let files: string[] = [];
30
37
  walkFiles(absRepoDir, files);
38
+
39
+ // Apply include/exclude filters using Bun.Glob
40
+ const hasInclude = include && include.length > 0;
41
+ const hasExclude = exclude && exclude.length > 0;
42
+
43
+ if (hasInclude || hasExclude) {
44
+ const includeGlobs = hasInclude ? include.map((p) => new Bun.Glob(p)) : null;
45
+ const excludeGlobs = hasExclude ? exclude.map((p) => new Bun.Glob(p)) : null;
46
+
47
+ files = files.filter((absPath) => {
48
+ const rel = relative(absRepoDir, absPath);
49
+
50
+ // If include patterns exist, file must match at least one
51
+ if (includeGlobs && !includeGlobs.some((g) => g.match(rel))) {
52
+ return false;
53
+ }
54
+
55
+ // If exclude patterns exist, file must not match any
56
+ if (excludeGlobs && excludeGlobs.some((g) => g.match(rel))) {
57
+ return false;
58
+ }
59
+
60
+ return true;
61
+ });
62
+ }
63
+
31
64
  return files;
32
65
  }
33
66