@codeflow-map/core 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Language-agnostic call-graph engine powered by [Tree-sitter](https://tree-sitter.github.io/tree-sitter/). Feed it source files, get back a structured graph of every function, every call relationship, and every execution flow — deterministic, fully local, no LLM, no cloud.
4
4
 
5
- Used by the [CallSight VS Code extension](https://github.com/devricky-codes/callsight-vscode-extension) to render interactive call-flow diagrams for any codebase.
5
+ Used by the [CallSight VS Code extension](https://github.com/devricky-codes/callsight-vscode) to render interactive call-flow diagrams for any codebase.
6
6
 
7
7
  ---
8
8
 
@@ -58,13 +58,22 @@ const ANALYZERS = {
58
58
  go: go_1.goAnalyzer,
59
59
  rust: null
60
60
  };
61
+ // Reuse a single Parser instance across all parseFile / parseFileContent calls
62
+ // to avoid exhausting OS file-handle limits (EMFILE) when scanning large codebases.
63
+ let sharedParser = null;
64
+ function getSharedParser() {
65
+ if (!sharedParser) {
66
+ sharedParser = new web_tree_sitter_1.default();
67
+ }
68
+ return sharedParser;
69
+ }
61
70
  async function parseFile(filePath, absolutePath, wasmDirectory, languageId) {
62
71
  const analyzer = ANALYZERS[languageId];
63
72
  if (!analyzer)
64
73
  return { functions: [], calls: [] };
65
74
  const content = await fs.readFile(absolutePath, 'utf8');
66
75
  const treeSitterLang = await (0, treeSitter_1.loadLanguage)(languageId, wasmDirectory);
67
- const parser = new web_tree_sitter_1.default();
76
+ const parser = getSharedParser();
68
77
  parser.setLanguage(treeSitterLang);
69
78
  const tree = parser.parse(content);
70
79
  let functionQuery = null;
@@ -99,8 +108,7 @@ async function parseFile(filePath, absolutePath, wasmDirectory, languageId) {
99
108
  callQuery.delete();
100
109
  if (tree)
101
110
  tree.delete();
102
- if (parser)
103
- parser.delete();
111
+ // Parser is shared — do NOT delete it here
104
112
  }
105
113
  }
106
114
  async function parseFileContent(filePath, content, wasmDirectory, languageId) {
@@ -108,7 +116,7 @@ async function parseFileContent(filePath, content, wasmDirectory, languageId) {
108
116
  if (!analyzer)
109
117
  return { functions: [], calls: [] };
110
118
  const treeSitterLang = await (0, treeSitter_1.loadLanguage)(languageId, wasmDirectory);
111
- const parser = new web_tree_sitter_1.default();
119
+ const parser = getSharedParser();
112
120
  parser.setLanguage(treeSitterLang);
113
121
  const tree = parser.parse(content);
114
122
  let functionQuery = null;
@@ -143,7 +151,6 @@ async function parseFileContent(filePath, content, wasmDirectory, languageId) {
143
151
  callQuery.delete();
144
152
  if (tree)
145
153
  tree.delete();
146
- if (parser)
147
- parser.delete();
154
+ // Parser is shared — do NOT delete it here
148
155
  }
149
156
  }
@@ -1,2 +1,2 @@
1
- import { FunctionNode, RawCall, CallEdge } from '../types';
2
- export declare function buildCallGraph(nodes: FunctionNode[], rawCalls: RawCall[]): CallEdge[];
1
+ import { FunctionNode, RawCall, CallEdge, Logger } from '../types';
2
+ export declare function buildCallGraph(nodes: FunctionNode[], rawCalls: RawCall[], log?: Logger): CallEdge[];
@@ -1,9 +1,13 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.buildCallGraph = buildCallGraph;
4
- function buildCallGraph(nodes, rawCalls) {
4
+ function buildCallGraph(nodes, rawCalls, log) {
5
+ const t0 = Date.now();
6
+ log?.(`buildCallGraph: START — ${nodes.length} nodes, ${rawCalls.length} raw calls`);
5
7
  const byName = new Map();
6
8
  const bySuffix = new Map();
9
+ // Index nodes by file path for fast caller lookups
10
+ const byFile = new Map();
7
11
  for (const n of nodes) {
8
12
  byName.set(n.name, n);
9
13
  const dotIdx = n.name.lastIndexOf('.');
@@ -13,14 +17,26 @@ function buildCallGraph(nodes, rawCalls) {
13
17
  bySuffix.set(suffix, []);
14
18
  bySuffix.get(suffix).push(n);
15
19
  }
20
+ if (!byFile.has(n.filePath))
21
+ byFile.set(n.filePath, []);
22
+ byFile.get(n.filePath).push(n);
16
23
  }
24
+ log?.(`buildCallGraph: indexes built in ${Date.now() - t0}ms (byName=${byName.size}, bySuffix=${bySuffix.size}, byFile=${byFile.size})`);
17
25
  const edges = [];
26
+ const edgeSet = new Set();
27
+ const tLoop = Date.now();
28
+ let processed = 0;
29
+ const logInterval = Math.max(1, Math.floor(rawCalls.length / 10)); // log every ~10%
18
30
  for (const call of rawCalls) {
31
+ processed++;
32
+ if (log && processed % logInterval === 0) {
33
+ log(`buildCallGraph: processing call ${processed}/${rawCalls.length} (${edges.length} edges so far, ${Date.now() - tLoop}ms)`);
34
+ }
19
35
  let callee = byName.get(call.calleeName);
20
36
  // Find the innermost function whose range contains the call line,
21
37
  // excluding the callee itself to avoid self-loops (e.g. go func(){...}())
22
- const callerCandidates = nodes.filter(n => n.filePath === call.callerFilePath &&
23
- call.line >= n.startLine &&
38
+ const fileNodes = byFile.get(call.callerFilePath) || [];
39
+ const callerCandidates = fileNodes.filter(n => call.line >= n.startLine &&
24
40
  call.line <= n.endLine &&
25
41
  n !== callee);
26
42
  const caller = callerCandidates.length > 0
@@ -47,12 +63,15 @@ function buildCallGraph(nodes, rawCalls) {
47
63
  if (!callee)
48
64
  continue;
49
65
  // Deduplicate: skip if an edge with same from, to, and line already exists
50
- if (edges.some(e => e.from === caller.id && e.to === callee.id && e.line === call.line))
66
+ const edgeKey = `${caller.id}|${callee.id}|${call.line}`;
67
+ if (edgeSet.has(edgeKey))
51
68
  continue;
69
+ edgeSet.add(edgeKey);
52
70
  const edge = { from: caller.id, to: callee.id, line: call.line };
53
71
  if (call.callType)
54
72
  edge.callType = call.callType;
55
73
  edges.push(edge);
56
74
  }
75
+ log?.(`buildCallGraph: DONE — ${edges.length} edges in ${Date.now() - t0}ms`);
57
76
  return edges;
58
77
  }
@@ -1,2 +1,2 @@
1
- import { FunctionNode, CallEdge } from '../types';
2
- export declare function detectEntryPoints(nodes: FunctionNode[], edges: CallEdge[]): void;
1
+ import { FunctionNode, CallEdge, Logger } from '../types';
2
+ export declare function detectEntryPoints(nodes: FunctionNode[], edges: CallEdge[], log?: Logger): void;
@@ -1,11 +1,17 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.detectEntryPoints = detectEntryPoints;
4
- function detectEntryPoints(nodes, edges) {
4
+ function detectEntryPoints(nodes, edges, log) {
5
+ const t0 = Date.now();
6
+ log?.(`detectEntryPoints: START — ${nodes.length} nodes, ${edges.length} edges`);
5
7
  const nonSelfEdges = edges.filter(e => e.from !== e.to);
6
8
  const calledIds = new Set(nonSelfEdges.map(e => e.to));
7
9
  const callerIds = new Set(nonSelfEdges.map(e => e.from));
10
+ let entryCount = 0;
8
11
  for (const node of nodes) {
9
12
  node.isEntryPoint = node.isEntryPoint || (!calledIds.has(node.id) && callerIds.has(node.id));
13
+ if (node.isEntryPoint)
14
+ entryCount++;
10
15
  }
16
+ log?.(`detectEntryPoints: DONE — ${entryCount} entry points found in ${Date.now() - t0}ms`);
11
17
  }
@@ -1,5 +1,5 @@
1
- import { FunctionNode, CallEdge, Flow } from '../types';
2
- export declare function partitionFlows(nodes: FunctionNode[], edges: CallEdge[]): {
1
+ import { FunctionNode, CallEdge, Flow, Logger } from '../types';
2
+ export declare function partitionFlows(nodes: FunctionNode[], edges: CallEdge[], log?: Logger): {
3
3
  flows: Flow[];
4
4
  orphans: string[];
5
5
  };
@@ -1,50 +1,101 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.partitionFlows = partitionFlows;
4
- function partitionFlows(nodes, edges) {
4
+ function partitionFlows(nodes, edges, log) {
5
+ const t0 = Date.now();
6
+ log?.(`partitionFlows: START — ${nodes.length} nodes, ${edges.length} edges`);
5
7
  const flows = [];
6
8
  const visited = new Set();
7
9
  const nonSelfEdges = edges.filter(e => e.from !== e.to);
10
+ log?.(`partitionFlows: ${nonSelfEdges.length} non-self edges after filtering`);
11
+ // Build adjacency list for fast BFS
12
+ const adj = new Map();
13
+ const adjBi = new Map();
14
+ for (const e of nonSelfEdges) {
15
+ if (!adj.has(e.from))
16
+ adj.set(e.from, []);
17
+ adj.get(e.from).push(e.to);
18
+ // bidirectional
19
+ if (!adjBi.has(e.from))
20
+ adjBi.set(e.from, []);
21
+ adjBi.get(e.from).push(e.to);
22
+ if (!adjBi.has(e.to))
23
+ adjBi.set(e.to, []);
24
+ adjBi.get(e.to).push(e.from);
25
+ }
26
+ log?.(`partitionFlows: adjacency lists built in ${Date.now() - t0}ms`);
8
27
  // 1. BFS from every entry point (in-degree=0, out-degree>0)
28
+ const t1 = Date.now();
9
29
  const entryIds = findEntryIds(new Set(nodes.map(n => n.id)), nonSelfEdges);
30
+ log?.(`partitionFlows: phase 1 — ${entryIds.length} entry points found in ${Date.now() - t1}ms`);
10
31
  for (const eid of entryIds) {
11
- const reachable = bfs(eid, nonSelfEdges);
32
+ const reachable = bfsAdj(eid, adj);
12
33
  flows.push({ id: eid, entryPoint: eid, nodeIds: [...reachable] });
13
34
  reachable.forEach(id => visited.add(id));
14
35
  }
36
+ log?.(`partitionFlows: phase 1 done — ${flows.length} flows, ${visited.size} visited nodes (${Date.now() - t0}ms)`);
15
37
  // 2. Iteratively find mini-flows in unvisited subgraph
38
+ const t2 = Date.now();
16
39
  let remaining = new Set(nodes.map(n => n.id).filter(id => !visited.has(id)));
40
+ log?.(`partitionFlows: phase 2 — ${remaining.size} remaining nodes`);
41
+ let phase2Iters = 0;
17
42
  while (remaining.size > 0) {
43
+ phase2Iters++;
18
44
  const subEdges = nonSelfEdges.filter(e => remaining.has(e.from) && remaining.has(e.to));
45
+ const subAdj = new Map();
46
+ for (const e of subEdges) {
47
+ if (!subAdj.has(e.from))
48
+ subAdj.set(e.from, []);
49
+ subAdj.get(e.from).push(e.to);
50
+ }
19
51
  const subEntries = findEntryIds(remaining, subEdges);
20
52
  if (subEntries.length === 0)
21
53
  break;
22
54
  for (const eid of subEntries) {
23
- const reachable = bfs(eid, subEdges);
55
+ const reachable = bfsAdj(eid, subAdj);
24
56
  flows.push({ id: eid, entryPoint: eid, nodeIds: [...reachable] });
25
57
  reachable.forEach(id => visited.add(id));
26
58
  }
27
59
  remaining = new Set([...remaining].filter(id => !visited.has(id)));
60
+ if (phase2Iters % 50 === 0) {
61
+ log?.(`partitionFlows: phase 2 iteration ${phase2Iters}, ${remaining.size} remaining, ${flows.length} flows (${Date.now() - t2}ms)`);
62
+ }
28
63
  }
64
+ log?.(`partitionFlows: phase 2 done — ${phase2Iters} iterations, ${flows.length} flows, ${remaining.size} remaining (${Date.now() - t0}ms)`);
29
65
  // 3. Remaining nodes with edges (pure cycles) → pick one per component
66
+ const t3 = Date.now();
30
67
  remaining = new Set([...remaining]);
31
68
  if (remaining.size > 0) {
32
69
  const subEdges = nonSelfEdges.filter(e => remaining.has(e.from) && remaining.has(e.to));
70
+ const subAdjBi = new Map();
71
+ const subAdj = new Map();
72
+ for (const e of subEdges) {
73
+ if (!subAdj.has(e.from))
74
+ subAdj.set(e.from, []);
75
+ subAdj.get(e.from).push(e.to);
76
+ if (!subAdjBi.has(e.from))
77
+ subAdjBi.set(e.from, []);
78
+ subAdjBi.get(e.from).push(e.to);
79
+ if (!subAdjBi.has(e.to))
80
+ subAdjBi.set(e.to, []);
81
+ subAdjBi.get(e.to).push(e.from);
82
+ }
33
83
  const nodesWithEdges = new Set();
34
84
  for (const e of subEdges) {
35
85
  nodesWithEdges.add(e.from);
36
86
  nodesWithEdges.add(e.to);
37
87
  }
88
+ log?.(`partitionFlows: phase 3 — ${nodesWithEdges.size} cycle-nodes to process`);
38
89
  const componentVisited = new Set();
39
90
  for (const nid of nodesWithEdges) {
40
91
  if (componentVisited.has(nid))
41
92
  continue;
42
93
  // BFS bidirectional to find connected component
43
- const component = bfsBidirectional(nid, subEdges);
94
+ const component = bfsAdj(nid, subAdjBi);
44
95
  component.forEach(id => componentVisited.add(id));
45
96
  // Pick the first node as synthetic entry
46
97
  const syntheticEntry = nid;
47
- const reachable = bfs(syntheticEntry, subEdges);
98
+ const reachable = bfsAdj(syntheticEntry, subAdj);
48
99
  // Include all component members even if not forward-reachable
49
100
  component.forEach(id => reachable.add(id));
50
101
  flows.push({ id: syntheticEntry, entryPoint: syntheticEntry, nodeIds: [...reachable] });
@@ -52,8 +103,10 @@ function partitionFlows(nodes, edges) {
52
103
  }
53
104
  remaining = new Set([...remaining].filter(id => !visited.has(id)));
54
105
  }
106
+ log?.(`partitionFlows: phase 3 done in ${Date.now() - t3}ms`);
55
107
  // 4. True orphans — zero edges in the full graph
56
108
  const orphans = [...remaining];
109
+ log?.(`partitionFlows: DONE — ${flows.length} flows, ${orphans.length} orphans, total ${Date.now() - t0}ms`);
57
110
  return { flows, orphans };
58
111
  }
59
112
  function findEntryIds(nodeIds, edges) {
@@ -67,33 +120,19 @@ function findEntryIds(nodeIds, edges) {
67
120
  }
68
121
  return [...nodeIds].filter(id => !calledIds.has(id) && callerIds.has(id));
69
122
  }
70
- function bfs(startId, edges) {
71
- const visited = new Set([startId]);
72
- const queue = [startId];
73
- while (queue.length) {
74
- const current = queue.shift();
75
- for (const edge of edges) {
76
- if (edge.from === current && !visited.has(edge.to)) {
77
- visited.add(edge.to);
78
- queue.push(edge.to);
79
- }
80
- }
81
- }
82
- return visited;
83
- }
84
- function bfsBidirectional(startId, edges) {
123
+ /** BFS using a pre-built adjacency list (forward or bidirectional). */
124
+ function bfsAdj(startId, adjList) {
85
125
  const visited = new Set([startId]);
86
126
  const queue = [startId];
87
127
  while (queue.length) {
88
128
  const current = queue.shift();
89
- for (const edge of edges) {
90
- if (edge.from === current && !visited.has(edge.to)) {
91
- visited.add(edge.to);
92
- queue.push(edge.to);
93
- }
94
- if (edge.to === current && !visited.has(edge.from)) {
95
- visited.add(edge.from);
96
- queue.push(edge.from);
129
+ const neighbors = adjList.get(current);
130
+ if (neighbors) {
131
+ for (const neighbor of neighbors) {
132
+ if (!visited.has(neighbor)) {
133
+ visited.add(neighbor);
134
+ queue.push(neighbor);
135
+ }
97
136
  }
98
137
  }
99
138
  }
package/dist/types.d.ts CHANGED
@@ -46,6 +46,8 @@ export interface RawCall {
46
46
  callType?: 'direct' | 'ref' | 'concurrent' | 'goroutine';
47
47
  isRef?: boolean;
48
48
  }
49
+ /** Simple logger callback used by core graph functions to report progress. */
50
+ export type Logger = (message: string) => void;
49
51
  export interface LanguageAnalyzer {
50
52
  functionQuery: string;
51
53
  callQuery: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@codeflow-map/core",
3
- "version": "0.1.2",
3
+ "version": "0.2.0",
4
4
  "description": "Language-agnostic call-graph analysis engine powered by Tree-sitter. Parses TypeScript, JavaScript, TSX, JSX, Python, and Go source files into a structured call graph with flows.",
5
5
  "keywords": [
6
6
  "callsight",
@@ -18,10 +18,10 @@
18
18
  "author": "devricky-codes",
19
19
  "repository": {
20
20
  "type": "git",
21
- "url": "https://github.com/devricky-codes/callsight-vscode-extension.git",
21
+ "url": "https://github.com/devricky-codes/callsight-vscode.git",
22
22
  "directory": "packages/core"
23
23
  },
24
- "homepage": "https://github.com/devricky-codes/callsight-vscode-extension#readme",
24
+ "homepage": "https://github.com/devricky-codes/callsight-vscode#readme",
25
25
  "main": "dist/index.js",
26
26
  "types": "dist/index.d.ts",
27
27
  "files": [