@kodus/kodus-graph 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.ts +5 -1
- package/src/commands/parse.ts +30 -25
- package/src/graph/json-writer.ts +43 -0
- package/src/parser/batch.ts +10 -2
- package/src/parser/discovery.ts +36 -3
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -13,7 +13,7 @@ import { executeUpdate } from './commands/update';
|
|
|
13
13
|
|
|
14
14
|
const program = new Command();
|
|
15
15
|
|
|
16
|
-
program.name('kodus-graph').description('Code graph builder for Kodus code review').version('0.2.
|
|
16
|
+
program.name('kodus-graph').description('Code graph builder for Kodus code review').version('0.2.1');
|
|
17
17
|
|
|
18
18
|
program
|
|
19
19
|
.command('parse')
|
|
@@ -21,6 +21,8 @@ program
|
|
|
21
21
|
.option('--all', 'Parse all files in repo')
|
|
22
22
|
.option('--files <paths...>', 'Parse specific files')
|
|
23
23
|
.option('--repo-dir <path>', 'Repository root directory', '.')
|
|
24
|
+
.option('--include <glob...>', 'Include only files matching glob (repeatable)')
|
|
25
|
+
.option('--exclude <glob...>', 'Exclude files matching glob (repeatable)')
|
|
24
26
|
.requiredOption('--out <path>', 'Output JSON file path')
|
|
25
27
|
.action(async (opts) => {
|
|
26
28
|
const repoDir = resolve(opts.repoDir);
|
|
@@ -33,6 +35,8 @@ program
|
|
|
33
35
|
files: opts.files,
|
|
34
36
|
all: opts.all ?? false,
|
|
35
37
|
out: opts.out,
|
|
38
|
+
include: opts.include,
|
|
39
|
+
exclude: opts.exclude,
|
|
36
40
|
});
|
|
37
41
|
});
|
|
38
42
|
|
package/src/commands/parse.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { relative, resolve } from 'path';
|
|
1
|
+
import { resolve, relative } from 'path';
|
|
3
2
|
import { performance } from 'perf_hooks';
|
|
4
3
|
import { buildGraphData } from '../graph/builder';
|
|
5
|
-
import
|
|
4
|
+
import { writeGraphJSON } from '../graph/json-writer';
|
|
5
|
+
import type { ImportEdge } from '../graph/types';
|
|
6
6
|
import { parseBatch } from '../parser/batch';
|
|
7
7
|
import { discoverFiles } from '../parser/discovery';
|
|
8
8
|
import { resolveAllCalls } from '../resolver/call-resolver';
|
|
@@ -12,11 +12,13 @@ import { createSymbolTable } from '../resolver/symbol-table';
|
|
|
12
12
|
import { computeFileHash } from '../shared/file-hash';
|
|
13
13
|
import { log } from '../shared/logger';
|
|
14
14
|
|
|
15
|
-
interface ParseOptions {
|
|
15
|
+
export interface ParseOptions {
|
|
16
16
|
repoDir: string;
|
|
17
17
|
files?: string[];
|
|
18
18
|
all: boolean;
|
|
19
19
|
out: string;
|
|
20
|
+
include?: string[];
|
|
21
|
+
exclude?: string[];
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
export async function executeParse(opts: ParseOptions): Promise<void> {
|
|
@@ -24,27 +26,25 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
|
|
|
24
26
|
const repoDir = resolve(opts.repoDir);
|
|
25
27
|
|
|
26
28
|
// Phase 1: Discover files
|
|
27
|
-
const files = discoverFiles(repoDir, opts.all ? undefined : opts.files);
|
|
29
|
+
const files = discoverFiles(repoDir, opts.all ? undefined : opts.files, opts.include, opts.exclude);
|
|
28
30
|
process.stderr.write(`[1/5] Discovered ${files.length} files\n`);
|
|
29
31
|
|
|
30
32
|
// Phase 2: Parse + extract
|
|
31
|
-
|
|
33
|
+
let rawGraph = await parseBatch(files, repoDir);
|
|
32
34
|
process.stderr.write(
|
|
33
35
|
`[2/5] Parsed ${rawGraph.functions.length} functions, ${rawGraph.classes.length} classes, ${rawGraph.rawCalls.length} call sites\n`,
|
|
34
36
|
);
|
|
35
37
|
|
|
36
38
|
// Phase 3: Resolve imports
|
|
37
39
|
const tsconfigAliases = loadTsconfigAliases(repoDir);
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
40
|
+
let symbolTable = createSymbolTable();
|
|
41
|
+
let importMap = createImportMap();
|
|
42
|
+
let importEdges: ImportEdge[] = [];
|
|
41
43
|
|
|
42
|
-
// Populate symbol table
|
|
43
44
|
for (const f of rawGraph.functions) symbolTable.add(f.file, f.name, f.qualified);
|
|
44
45
|
for (const c of rawGraph.classes) symbolTable.add(c.file, c.name, c.qualified);
|
|
45
46
|
for (const i of rawGraph.interfaces) symbolTable.add(i.file, i.name, i.qualified);
|
|
46
47
|
|
|
47
|
-
// Resolve each import
|
|
48
48
|
for (const imp of rawGraph.imports) {
|
|
49
49
|
const langKey = imp.lang === 'python' ? 'python' : imp.lang === 'ruby' ? 'ruby' : 'typescript';
|
|
50
50
|
const resolved = resolveImport(resolve(repoDir, imp.file), imp.module, langKey, repoDir, tsconfigAliases);
|
|
@@ -64,7 +64,7 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
|
|
|
64
64
|
);
|
|
65
65
|
|
|
66
66
|
// Phase 4: Resolve calls
|
|
67
|
-
|
|
67
|
+
let { callEdges, stats } = resolveAllCalls(rawGraph.rawCalls, rawGraph.diMaps, symbolTable, importMap);
|
|
68
68
|
process.stderr.write(
|
|
69
69
|
`[4/5] Resolved ${callEdges.length} calls (DI:${stats.di} same:${stats.same} import:${stats.import} unique:${stats.unique} ambiguous:${stats.ambiguous} noise:${stats.noise})\n`,
|
|
70
70
|
);
|
|
@@ -79,22 +79,27 @@ export async function executeParse(opts: ParseOptions): Promise<void> {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
const parseErrors = rawGraph.parseErrors;
|
|
83
|
+
const extractErrors = rawGraph.extractErrors;
|
|
82
84
|
const graphData = buildGraphData(rawGraph, callEdges, importEdges, repoDir, fileHashes);
|
|
83
85
|
process.stderr.write(`[5/5] Built graph: ${graphData.nodes.length} nodes, ${graphData.edges.length} edges\n`);
|
|
84
86
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
87
|
+
// Release intermediaries — no longer needed after buildGraphData
|
|
88
|
+
rawGraph = null as any;
|
|
89
|
+
symbolTable = null as any;
|
|
90
|
+
importMap = null as any;
|
|
91
|
+
callEdges = null as any;
|
|
92
|
+
importEdges = null as any;
|
|
93
|
+
|
|
94
|
+
const metadata = {
|
|
95
|
+
repo_dir: repoDir,
|
|
96
|
+
files_parsed: files.length,
|
|
97
|
+
total_nodes: graphData.nodes.length,
|
|
98
|
+
total_edges: graphData.edges.length,
|
|
99
|
+
duration_ms: Math.round(performance.now() - t0),
|
|
100
|
+
parse_errors: parseErrors,
|
|
101
|
+
extract_errors: extractErrors,
|
|
97
102
|
};
|
|
98
103
|
|
|
99
|
-
|
|
104
|
+
writeGraphJSON(opts.out, metadata, graphData.nodes, graphData.edges);
|
|
100
105
|
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { openSync, writeSync, closeSync } from 'fs';
|
|
2
|
+
import type { GraphEdge, GraphNode, ParseMetadata } from './types';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Write graph output as JSON to disk using incremental serialization.
|
|
6
|
+
*
|
|
7
|
+
* Instead of JSON.stringify on the full output (which creates a ~100-300 MB
|
|
8
|
+
* string for large repos), this writes each node/edge individually.
|
|
9
|
+
* Peak memory: only one JSON.stringify(singleNode) string at a time (~1 KB).
|
|
10
|
+
*/
|
|
11
|
+
export function writeGraphJSON(
|
|
12
|
+
out: string,
|
|
13
|
+
metadata: ParseMetadata,
|
|
14
|
+
nodes: GraphNode[],
|
|
15
|
+
edges: GraphEdge[],
|
|
16
|
+
): void {
|
|
17
|
+
const fd = openSync(out, 'w');
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
writeSync(fd, '{"metadata":');
|
|
21
|
+
writeSync(fd, JSON.stringify(metadata));
|
|
22
|
+
|
|
23
|
+
// Nodes
|
|
24
|
+
writeSync(fd, ',"nodes":[');
|
|
25
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
26
|
+
if (i > 0) writeSync(fd, ',');
|
|
27
|
+
writeSync(fd, '\n');
|
|
28
|
+
writeSync(fd, JSON.stringify(nodes[i]));
|
|
29
|
+
}
|
|
30
|
+
writeSync(fd, '\n]');
|
|
31
|
+
|
|
32
|
+
// Edges
|
|
33
|
+
writeSync(fd, ',"edges":[');
|
|
34
|
+
for (let i = 0; i < edges.length; i++) {
|
|
35
|
+
if (i > 0) writeSync(fd, ',');
|
|
36
|
+
writeSync(fd, '\n');
|
|
37
|
+
writeSync(fd, JSON.stringify(edges[i]));
|
|
38
|
+
}
|
|
39
|
+
writeSync(fd, '\n]}');
|
|
40
|
+
} finally {
|
|
41
|
+
closeSync(fd);
|
|
42
|
+
}
|
|
43
|
+
}
|
package/src/parser/batch.ts
CHANGED
|
@@ -2,7 +2,8 @@ import type { SgRoot } from '@ast-grep/napi';
|
|
|
2
2
|
import { parseAsync } from '@ast-grep/napi';
|
|
3
3
|
import { readFileSync } from 'fs';
|
|
4
4
|
import { extname, relative } from 'path';
|
|
5
|
-
import type { ParseBatchResult, RawGraph } from '../graph/types';
|
|
5
|
+
import type { ParseBatchResult, RawCallSite, RawGraph } from '../graph/types';
|
|
6
|
+
import { NOISE } from '../shared/filters';
|
|
6
7
|
import { log } from '../shared/logger';
|
|
7
8
|
import { extractCallsFromFile, extractFromFile } from './extractor';
|
|
8
9
|
import { getLanguage } from './languages';
|
|
@@ -60,7 +61,14 @@ export async function parseBatch(files: string[], repoRoot: string): Promise<Par
|
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
try {
|
|
63
|
-
|
|
64
|
+
// Extract calls into a temporary buffer, then filter noise before pushing
|
|
65
|
+
const rawCalls: RawCallSite[] = [];
|
|
66
|
+
extractCallsFromFile(root, fp, lang, rawCalls);
|
|
67
|
+
for (const call of rawCalls) {
|
|
68
|
+
if (!NOISE.has(call.callName)) {
|
|
69
|
+
graph.rawCalls.push(call);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
64
72
|
} catch (err) {
|
|
65
73
|
log.error('Call extraction crashed', { file: fp, error: String(err) });
|
|
66
74
|
extractErrors++;
|
package/src/parser/discovery.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { readdirSync } from 'fs';
|
|
2
|
-
import { extname, join, resolve } from 'path';
|
|
2
|
+
import { extname, join, relative, resolve } from 'path';
|
|
3
3
|
import { isSkippableFile, SKIP_DIRS } from '../shared/filters';
|
|
4
4
|
import { log } from '../shared/logger';
|
|
5
5
|
import { ensureWithinRoot } from '../shared/safe-path';
|
|
@@ -8,8 +8,15 @@ import { getLanguage } from './languages';
|
|
|
8
8
|
/**
|
|
9
9
|
* Walk the filesystem and find all supported source files.
|
|
10
10
|
* If `filterFiles` is provided, only return those specific files (resolved to absolute paths).
|
|
11
|
+
* If `include` patterns are provided, keep only files matching at least one pattern.
|
|
12
|
+
* If `exclude` patterns are provided, remove files matching any pattern.
|
|
11
13
|
*/
|
|
12
|
-
export function discoverFiles(
|
|
14
|
+
export function discoverFiles(
|
|
15
|
+
repoDir: string,
|
|
16
|
+
filterFiles?: string[],
|
|
17
|
+
include?: string[],
|
|
18
|
+
exclude?: string[],
|
|
19
|
+
): string[] {
|
|
13
20
|
const absRepoDir = resolve(repoDir);
|
|
14
21
|
|
|
15
22
|
if (filterFiles) {
|
|
@@ -26,8 +33,34 @@ export function discoverFiles(repoDir: string, filterFiles?: string[]): string[]
|
|
|
26
33
|
});
|
|
27
34
|
}
|
|
28
35
|
|
|
29
|
-
|
|
36
|
+
let files: string[] = [];
|
|
30
37
|
walkFiles(absRepoDir, files);
|
|
38
|
+
|
|
39
|
+
// Apply include/exclude filters using Bun.Glob
|
|
40
|
+
const hasInclude = include && include.length > 0;
|
|
41
|
+
const hasExclude = exclude && exclude.length > 0;
|
|
42
|
+
|
|
43
|
+
if (hasInclude || hasExclude) {
|
|
44
|
+
const includeGlobs = hasInclude ? include.map((p) => new Bun.Glob(p)) : null;
|
|
45
|
+
const excludeGlobs = hasExclude ? exclude.map((p) => new Bun.Glob(p)) : null;
|
|
46
|
+
|
|
47
|
+
files = files.filter((absPath) => {
|
|
48
|
+
const rel = relative(absRepoDir, absPath);
|
|
49
|
+
|
|
50
|
+
// If include patterns exist, file must match at least one
|
|
51
|
+
if (includeGlobs && !includeGlobs.some((g) => g.match(rel))) {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// If exclude patterns exist, file must not match any
|
|
56
|
+
if (excludeGlobs && excludeGlobs.some((g) => g.match(rel))) {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return true;
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
31
64
|
return files;
|
|
32
65
|
}
|
|
33
66
|
|