@optave/codegraph 2.6.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ast.js ADDED
@@ -0,0 +1,418 @@
1
+ /**
2
+ * Stored queryable AST nodes — build-time extraction + query functions.
3
+ *
4
+ * Persists selected AST nodes (calls, new, string, regex, throw, await) in the
5
+ * `ast_nodes` table during build. Queryable via CLI (`codegraph ast`), MCP
6
+ * (`ast_query`), and programmatic API.
7
+ */
8
+
9
+ import path from 'node:path';
10
+ import { openReadonlyOrFail } from './db.js';
11
+ import { debug } from './logger.js';
12
+ import { paginateResult, printNdjson } from './paginate.js';
13
+ import { LANGUAGE_REGISTRY } from './parser.js';
14
+
15
+ // ─── Constants ────────────────────────────────────────────────────────
16
+
17
+ export const AST_NODE_KINDS = ['call', 'new', 'string', 'regex', 'throw', 'await'];
18
+
19
+ const KIND_ICONS = {
20
+ call: '\u0192', // ƒ
21
+ new: '\u2295', // ⊕
22
+ string: '"',
23
+ regex: '/',
24
+ throw: '\u2191', // ↑
25
+ await: '\u22B3', // ⊳
26
+ };
27
+
28
+ /** Max length for the `text` column. */
29
+ const TEXT_MAX = 200;
30
+
31
+ /** tree-sitter node types that map to our AST node kinds (JS/TS/TSX). */
32
+ const JS_TS_AST_TYPES = {
33
+ new_expression: 'new',
34
+ throw_statement: 'throw',
35
+ await_expression: 'await',
36
+ string: 'string',
37
+ template_string: 'string',
38
+ regex: 'regex',
39
+ };
40
+
41
+ /** Extensions that support full AST walk (new/throw/await/string/regex). */
42
+ const WALK_EXTENSIONS = new Set();
43
+ for (const lang of Object.values(LANGUAGE_REGISTRY)) {
44
+ if (['javascript', 'typescript', 'tsx'].includes(lang.id)) {
45
+ for (const ext of lang.extensions) WALK_EXTENSIONS.add(ext);
46
+ }
47
+ }
48
+
49
+ // ─── Helpers ──────────────────────────────────────────────────────────
50
+
51
+ function truncate(s, max = TEXT_MAX) {
52
+ if (!s) return null;
53
+ return s.length <= max ? s : `${s.slice(0, max - 1)}\u2026`;
54
+ }
55
+
56
+ /**
57
+ * Extract the constructor name from a `new_expression` node.
58
+ * Handles `new Foo()`, `new a.Foo()`, `new Foo.Bar()`.
59
+ */
60
+ function extractNewName(node) {
61
+ for (let i = 0; i < node.childCount; i++) {
62
+ const child = node.child(i);
63
+ if (child.type === 'identifier') return child.text;
64
+ if (child.type === 'member_expression') {
65
+ // e.g. new a.Foo() → "a.Foo"
66
+ return child.text;
67
+ }
68
+ }
69
+ return node.text?.split('(')[0]?.replace('new ', '').trim() || '?';
70
+ }
71
+
72
+ /**
73
+ * Extract the expression text from a throw/await node.
74
+ */
75
+ function extractExpressionText(node) {
76
+ // Skip keyword child, take the rest
77
+ for (let i = 0; i < node.childCount; i++) {
78
+ const child = node.child(i);
79
+ if (child.type !== 'throw' && child.type !== 'await') {
80
+ return truncate(child.text);
81
+ }
82
+ }
83
+ return truncate(node.text);
84
+ }
85
+
86
+ /**
87
+ * Extract a meaningful name from throw/await nodes.
88
+ * For throw: the constructor or expression type.
89
+ * For await: the called function name.
90
+ */
91
+ function extractName(kind, node) {
92
+ if (kind === 'throw') {
93
+ // throw new Error(...) → "Error"; throw x → "x"
94
+ for (let i = 0; i < node.childCount; i++) {
95
+ const child = node.child(i);
96
+ if (child.type === 'new_expression') return extractNewName(child);
97
+ if (child.type === 'call_expression') {
98
+ const fn = child.childForFieldName('function');
99
+ return fn ? fn.text : child.text?.split('(')[0] || '?';
100
+ }
101
+ if (child.type === 'identifier') return child.text;
102
+ }
103
+ return truncate(node.text);
104
+ }
105
+ if (kind === 'await') {
106
+ // await fetch(...) → "fetch"; await this.foo() → "this.foo"
107
+ for (let i = 0; i < node.childCount; i++) {
108
+ const child = node.child(i);
109
+ if (child.type === 'call_expression') {
110
+ const fn = child.childForFieldName('function');
111
+ return fn ? fn.text : child.text?.split('(')[0] || '?';
112
+ }
113
+ if (child.type === 'identifier' || child.type === 'member_expression') {
114
+ return child.text;
115
+ }
116
+ }
117
+ return truncate(node.text);
118
+ }
119
+ return truncate(node.text);
120
+ }
121
+
122
+ /**
123
+ * Find the narrowest enclosing definition for a given line.
124
+ */
125
+ function findParentDef(defs, line) {
126
+ let best = null;
127
+ for (const def of defs) {
128
+ if (def.line <= line && (def.endLine == null || def.endLine >= line)) {
129
+ if (!best || def.endLine - def.line < best.endLine - best.line) {
130
+ best = def;
131
+ }
132
+ }
133
+ }
134
+ return best;
135
+ }
136
+
137
+ // ─── Build ────────────────────────────────────────────────────────────
138
+
139
+ /**
140
+ * Extract AST nodes from parsed files and persist to the ast_nodes table.
141
+ *
142
+ * @param {object} db - open better-sqlite3 database (read-write)
143
+ * @param {Map<string, object>} fileSymbols - Map<relPath, { definitions, calls, _tree, _langId }>
144
+ * @param {string} rootDir - absolute project root path
145
+ * @param {object} [_engineOpts] - engine options (unused)
146
+ */
147
+ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) {
148
+ // Ensure table exists (migration may not have run on older DBs)
149
+ let insertStmt;
150
+ try {
151
+ insertStmt = db.prepare(
152
+ 'INSERT INTO ast_nodes (file, line, kind, name, text, receiver, parent_node_id) VALUES (?, ?, ?, ?, ?, ?, ?)',
153
+ );
154
+ } catch {
155
+ debug('ast_nodes table not found — skipping AST extraction');
156
+ return;
157
+ }
158
+
159
+ // Bulk-fetch all node IDs per file (replaces per-def getNodeId calls)
160
+ const bulkGetNodeIds = db.prepare('SELECT id, name, kind, line FROM nodes WHERE file = ?');
161
+
162
+ const tx = db.transaction((rows) => {
163
+ for (const r of rows) {
164
+ insertStmt.run(r.file, r.line, r.kind, r.name, r.text, r.receiver, r.parentNodeId);
165
+ }
166
+ });
167
+
168
+ let totalInserted = 0;
169
+
170
+ for (const [relPath, symbols] of fileSymbols) {
171
+ const rows = [];
172
+ const defs = symbols.definitions || [];
173
+
174
+ // Pre-load all node IDs for this file into a map
175
+ const nodeIdMap = new Map();
176
+ for (const row of bulkGetNodeIds.all(relPath)) {
177
+ nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
178
+ }
179
+
180
+ // 1. Call nodes from symbols.calls (all languages)
181
+ if (symbols.calls) {
182
+ for (const call of symbols.calls) {
183
+ const parentDef = findParentDef(defs, call.line);
184
+ let parentNodeId = null;
185
+ if (parentDef) {
186
+ parentNodeId =
187
+ nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
188
+ }
189
+ rows.push({
190
+ file: relPath,
191
+ line: call.line,
192
+ kind: 'call',
193
+ name: call.name,
194
+ text: call.dynamic ? `[dynamic] ${call.name}` : null,
195
+ receiver: call.receiver || null,
196
+ parentNodeId,
197
+ });
198
+ }
199
+ }
200
+
201
+ // 2. AST walk for JS/TS/TSX — extract new, throw, await, string, regex
202
+ const ext = path.extname(relPath).toLowerCase();
203
+ if (WALK_EXTENSIONS.has(ext)) {
204
+ if (symbols._tree) {
205
+ // WASM path: walk the tree-sitter AST
206
+ const astRows = [];
207
+ walkAst(symbols._tree.rootNode, defs, relPath, astRows, nodeIdMap);
208
+ rows.push(...astRows);
209
+ } else if (symbols.astNodes?.length) {
210
+ // Native path: use pre-extracted AST nodes from Rust
211
+ for (const n of symbols.astNodes) {
212
+ const parentDef = findParentDef(defs, n.line);
213
+ let parentNodeId = null;
214
+ if (parentDef) {
215
+ parentNodeId =
216
+ nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
217
+ }
218
+ rows.push({
219
+ file: relPath,
220
+ line: n.line,
221
+ kind: n.kind,
222
+ name: n.name,
223
+ text: n.text || null,
224
+ receiver: n.receiver || null,
225
+ parentNodeId,
226
+ });
227
+ }
228
+ }
229
+ }
230
+
231
+ if (rows.length > 0) {
232
+ tx(rows);
233
+ totalInserted += rows.length;
234
+ }
235
+ }
236
+
237
+ debug(`AST extraction: ${totalInserted} nodes stored`);
238
+ }
239
+
240
+ /**
241
+ * Walk a tree-sitter AST and collect new/throw/await/string/regex nodes.
242
+ */
243
+ function walkAst(node, defs, relPath, rows, nodeIdMap) {
244
+ const kind = JS_TS_AST_TYPES[node.type];
245
+ if (kind) {
246
+ // tree-sitter lines are 0-indexed, our DB uses 1-indexed
247
+ const line = node.startPosition.row + 1;
248
+
249
+ let name;
250
+ let text = null;
251
+
252
+ if (kind === 'new') {
253
+ name = extractNewName(node);
254
+ text = truncate(node.text);
255
+ } else if (kind === 'throw') {
256
+ name = extractName('throw', node);
257
+ text = extractExpressionText(node);
258
+ } else if (kind === 'await') {
259
+ name = extractName('await', node);
260
+ text = extractExpressionText(node);
261
+ } else if (kind === 'string') {
262
+ // Skip trivial strings (length < 2 after removing quotes)
263
+ const content = node.text?.replace(/^['"`]|['"`]$/g, '') || '';
264
+ if (content.length < 2) {
265
+ // Still recurse children
266
+ for (let i = 0; i < node.childCount; i++) {
267
+ walkAst(node.child(i), defs, relPath, rows, nodeIdMap);
268
+ }
269
+ return;
270
+ }
271
+ name = truncate(content, 100);
272
+ text = truncate(node.text);
273
+ } else if (kind === 'regex') {
274
+ name = node.text || '?';
275
+ text = truncate(node.text);
276
+ }
277
+
278
+ const parentDef = findParentDef(defs, line);
279
+ let parentNodeId = null;
280
+ if (parentDef) {
281
+ parentNodeId = nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
282
+ }
283
+
284
+ rows.push({
285
+ file: relPath,
286
+ line,
287
+ kind,
288
+ name,
289
+ text,
290
+ receiver: null,
291
+ parentNodeId,
292
+ });
293
+
294
+ // Don't recurse into the children of matched nodes for new/throw/await
295
+ // (we already extracted what we need, and nested strings inside them are noise)
296
+ if (kind !== 'string' && kind !== 'regex') return;
297
+ }
298
+
299
+ for (let i = 0; i < node.childCount; i++) {
300
+ walkAst(node.child(i), defs, relPath, rows, nodeIdMap);
301
+ }
302
+ }
303
+
304
+ // ─── Query ────────────────────────────────────────────────────────────
305
+
306
+ /**
307
+ * Query AST nodes — data-returning function.
308
+ *
309
+ * @param {string} [pattern] - GLOB pattern for node name (auto-wrapped in *..*)
310
+ * @param {string} [customDbPath] - path to graph.db
311
+ * @param {object} [opts]
312
+ * @returns {{ pattern, kind, count, results, _pagination? }}
313
+ */
314
+ export function astQueryData(pattern, customDbPath, opts = {}) {
315
+ const db = openReadonlyOrFail(customDbPath);
316
+ const { kind, file, noTests, limit, offset } = opts;
317
+
318
+ let where = 'WHERE 1=1';
319
+ const params = [];
320
+
321
+ // Pattern matching
322
+ if (pattern && pattern !== '*') {
323
+ // If user already uses wildcards, use as-is; otherwise wrap in *..* for substring
324
+ const globPattern = pattern.includes('*') ? pattern : `*${pattern}*`;
325
+ where += ' AND a.name GLOB ?';
326
+ params.push(globPattern);
327
+ }
328
+
329
+ if (kind) {
330
+ where += ' AND a.kind = ?';
331
+ params.push(kind);
332
+ }
333
+
334
+ if (file) {
335
+ where += ' AND a.file LIKE ?';
336
+ params.push(`%${file}%`);
337
+ }
338
+
339
+ if (noTests) {
340
+ where += ` AND a.file NOT LIKE '%.test.%'
341
+ AND a.file NOT LIKE '%.spec.%'
342
+ AND a.file NOT LIKE '%__test__%'
343
+ AND a.file NOT LIKE '%__tests__%'
344
+ AND a.file NOT LIKE '%.stories.%'`;
345
+ }
346
+
347
+ const sql = `
348
+ SELECT a.kind, a.name, a.file, a.line, a.text, a.receiver, a.parent_node_id,
349
+ p.name AS parent_name, p.kind AS parent_kind, p.file AS parent_file
350
+ FROM ast_nodes a
351
+ LEFT JOIN nodes p ON a.parent_node_id = p.id
352
+ ${where}
353
+ ORDER BY a.file, a.line
354
+ `;
355
+
356
+ const rows = db.prepare(sql).all(...params);
357
+ db.close();
358
+
359
+ const results = rows.map((r) => ({
360
+ kind: r.kind,
361
+ name: r.name,
362
+ file: r.file,
363
+ line: r.line,
364
+ text: r.text,
365
+ receiver: r.receiver,
366
+ parent: r.parent_node_id
367
+ ? { name: r.parent_name, kind: r.parent_kind, file: r.parent_file }
368
+ : null,
369
+ }));
370
+
371
+ const data = {
372
+ pattern: pattern || '*',
373
+ kind: kind || null,
374
+ count: results.length,
375
+ results,
376
+ };
377
+
378
+ return paginateResult(data, 'results', { limit, offset });
379
+ }
380
+
381
+ /**
382
+ * Query AST nodes — display function (human/json/ndjson output).
383
+ */
384
+ export function astQuery(pattern, customDbPath, opts = {}) {
385
+ const data = astQueryData(pattern, customDbPath, opts);
386
+
387
+ if (opts.ndjson) {
388
+ printNdjson(data, 'results');
389
+ return;
390
+ }
391
+
392
+ if (opts.json) {
393
+ console.log(JSON.stringify(data, null, 2));
394
+ return;
395
+ }
396
+
397
+ // Human-readable output
398
+ if (data.results.length === 0) {
399
+ console.log(`No AST nodes found${pattern ? ` matching "${pattern}"` : ''}.`);
400
+ return;
401
+ }
402
+
403
+ const kindLabel = opts.kind ? ` (kind: ${opts.kind})` : '';
404
+ console.log(`\n${data.count} AST nodes${pattern ? ` matching "${pattern}"` : ''}${kindLabel}:\n`);
405
+
406
+ for (const r of data.results) {
407
+ const icon = KIND_ICONS[r.kind] || '?';
408
+ const parentInfo = r.parent ? ` (in ${r.parent.name})` : '';
409
+ console.log(` ${icon} ${r.name} -- ${r.file}:${r.line}${parentInfo}`);
410
+ }
411
+
412
+ if (data._pagination?.hasMore) {
413
+ console.log(
414
+ `\n ... ${data._pagination.total - data._pagination.offset - data._pagination.returned} more (use --offset ${data._pagination.offset + data._pagination.limit})`,
415
+ );
416
+ }
417
+ console.log();
418
+ }
package/src/batch.js CHANGED
@@ -6,15 +6,16 @@
6
6
  */
7
7
 
8
8
  import { complexityData } from './complexity.js';
9
+ import { dataflowData } from './dataflow.js';
9
10
  import { flowData } from './flow.js';
10
11
  import {
11
12
  contextData,
12
13
  explainData,
14
+ exportsData,
13
15
  fileDepsData,
14
16
  fnDepsData,
15
17
  fnImpactData,
16
18
  impactAnalysisData,
17
- queryNameData,
18
19
  whereData,
19
20
  } from './queries.js';
20
21
 
@@ -31,11 +32,12 @@ export const BATCH_COMMANDS = {
31
32
  context: { fn: contextData, sig: 'name' },
32
33
  explain: { fn: explainData, sig: 'target' },
33
34
  where: { fn: whereData, sig: 'target' },
34
- query: { fn: queryNameData, sig: 'name' },
35
- fn: { fn: fnDepsData, sig: 'name' },
35
+ query: { fn: fnDepsData, sig: 'name' },
36
36
  impact: { fn: impactAnalysisData, sig: 'file' },
37
37
  deps: { fn: fileDepsData, sig: 'file' },
38
+ exports: { fn: exportsData, sig: 'file' },
38
39
  flow: { fn: flowData, sig: 'name' },
40
+ dataflow: { fn: dataflowData, sig: 'name' },
39
41
  complexity: { fn: complexityData, sig: 'dbOnly' },
40
42
  };
41
43
 
@@ -88,3 +90,91 @@ export function batch(command, targets, customDbPath, opts = {}) {
88
90
  const data = batchData(command, targets, customDbPath, opts);
89
91
  console.log(JSON.stringify(data, null, 2));
90
92
  }
93
+
94
+ /**
95
+ * Expand comma-separated positional args into individual entries.
96
+ * `['a,b', 'c']` → `['a', 'b', 'c']`.
97
+ * Trims whitespace, filters empties. Passes through object items unchanged.
98
+ *
99
+ * @param {Array<string|object>} targets
100
+ * @returns {Array<string|object>}
101
+ */
102
+ export function splitTargets(targets) {
103
+ const out = [];
104
+ for (const item of targets) {
105
+ if (typeof item !== 'string') {
106
+ out.push(item);
107
+ continue;
108
+ }
109
+ for (const part of item.split(',')) {
110
+ const trimmed = part.trim();
111
+ if (trimmed) out.push(trimmed);
112
+ }
113
+ }
114
+ return out;
115
+ }
116
+
117
+ /**
118
+ * Multi-command batch orchestration — run different commands per target.
119
+ *
120
+ * @param {Array<{command: string, target: string, opts?: object}>} items
121
+ * @param {string} [customDbPath]
122
+ * @param {object} [sharedOpts] - Default opts merged under per-item opts
123
+ * @returns {{ mode: 'multi', total: number, succeeded: number, failed: number, results: object[] }}
124
+ */
125
+ export function multiBatchData(items, customDbPath, sharedOpts = {}) {
126
+ const results = [];
127
+ let succeeded = 0;
128
+ let failed = 0;
129
+
130
+ for (const item of items) {
131
+ const { command, target, opts: itemOpts } = item;
132
+ const entry = BATCH_COMMANDS[command];
133
+
134
+ if (!entry) {
135
+ results.push({
136
+ command,
137
+ target,
138
+ ok: false,
139
+ error: `Unknown batch command "${command}". Valid commands: ${Object.keys(BATCH_COMMANDS).join(', ')}`,
140
+ });
141
+ failed++;
142
+ continue;
143
+ }
144
+
145
+ const merged = { ...sharedOpts, ...itemOpts };
146
+
147
+ try {
148
+ let data;
149
+ if (entry.sig === 'dbOnly') {
150
+ data = entry.fn(customDbPath, { ...merged, target });
151
+ } else {
152
+ data = entry.fn(target, customDbPath, merged);
153
+ }
154
+ results.push({ command, target, ok: true, data });
155
+ succeeded++;
156
+ } catch (err) {
157
+ results.push({ command, target, ok: false, error: err.message });
158
+ failed++;
159
+ }
160
+ }
161
+
162
+ return { mode: 'multi', total: items.length, succeeded, failed, results };
163
+ }
164
+
165
+ /**
166
+ * CLI wrapper for batch-query — detects multi-command mode (objects with .command)
167
+ * or falls back to single-command batchData (default: 'where').
168
+ */
169
+ export function batchQuery(targets, customDbPath, opts = {}) {
170
+ const { command: defaultCommand = 'where', ...rest } = opts;
171
+ const isMulti = targets.length > 0 && typeof targets[0] === 'object' && targets[0].command;
172
+
173
+ let data;
174
+ if (isMulti) {
175
+ data = multiBatchData(targets, customDbPath, rest);
176
+ } else {
177
+ data = batchData(defaultCommand, targets, customDbPath, rest);
178
+ }
179
+ console.log(JSON.stringify(data, null, 2));
180
+ }