@monoes/graph 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/src/analyze.d.ts +23 -0
  2. package/dist/src/analyze.d.ts.map +1 -0
  3. package/dist/src/analyze.js +105 -0
  4. package/dist/src/analyze.js.map +1 -0
  5. package/dist/src/build.d.ts +8 -0
  6. package/dist/src/build.d.ts.map +1 -0
  7. package/dist/src/build.js +59 -0
  8. package/dist/src/build.js.map +1 -0
  9. package/dist/src/cache.d.ts +10 -0
  10. package/dist/src/cache.d.ts.map +1 -0
  11. package/dist/src/cache.js +34 -0
  12. package/dist/src/cache.js.map +1 -0
  13. package/dist/src/cluster.d.ts +8 -0
  14. package/dist/src/cluster.d.ts.map +1 -0
  15. package/dist/src/cluster.js +50 -0
  16. package/dist/src/cluster.js.map +1 -0
  17. package/dist/src/detect.d.ts +8 -0
  18. package/dist/src/detect.d.ts.map +1 -0
  19. package/dist/src/detect.js +108 -0
  20. package/dist/src/detect.js.map +1 -0
  21. package/dist/src/export.d.ts +21 -0
  22. package/dist/src/export.d.ts.map +1 -0
  23. package/dist/src/export.js +68 -0
  24. package/dist/src/export.js.map +1 -0
  25. package/dist/src/extract/index.d.ts +20 -0
  26. package/dist/src/extract/index.d.ts.map +1 -0
  27. package/dist/src/extract/index.js +158 -0
  28. package/dist/src/extract/index.js.map +1 -0
  29. package/dist/src/extract/languages/go.d.ts +3 -0
  30. package/dist/src/extract/languages/go.d.ts.map +1 -0
  31. package/dist/src/extract/languages/go.js +181 -0
  32. package/dist/src/extract/languages/go.js.map +1 -0
  33. package/dist/src/extract/languages/python.d.ts +3 -0
  34. package/dist/src/extract/languages/python.d.ts.map +1 -0
  35. package/dist/src/extract/languages/python.js +230 -0
  36. package/dist/src/extract/languages/python.js.map +1 -0
  37. package/dist/src/extract/languages/rust.d.ts +3 -0
  38. package/dist/src/extract/languages/rust.d.ts.map +1 -0
  39. package/dist/src/extract/languages/rust.js +195 -0
  40. package/dist/src/extract/languages/rust.js.map +1 -0
  41. package/dist/src/extract/languages/typescript.d.ts +3 -0
  42. package/dist/src/extract/languages/typescript.d.ts.map +1 -0
  43. package/dist/src/extract/languages/typescript.js +295 -0
  44. package/dist/src/extract/languages/typescript.js.map +1 -0
  45. package/dist/src/extract/tree-sitter-runner.d.ts +48 -0
  46. package/dist/src/extract/tree-sitter-runner.d.ts.map +1 -0
  47. package/dist/src/extract/tree-sitter-runner.js +128 -0
  48. package/dist/src/extract/tree-sitter-runner.js.map +1 -0
  49. package/dist/src/extract/types.d.ts +7 -0
  50. package/dist/src/extract/types.d.ts.map +1 -0
  51. package/dist/src/extract/types.js +2 -0
  52. package/dist/src/extract/types.js.map +1 -0
  53. package/dist/src/index.d.ts +11 -0
  54. package/dist/src/index.d.ts.map +1 -0
  55. package/dist/src/index.js +9 -0
  56. package/dist/src/index.js.map +1 -0
  57. package/dist/src/pipeline.d.ts +16 -0
  58. package/dist/src/pipeline.d.ts.map +1 -0
  59. package/dist/src/pipeline.js +143 -0
  60. package/dist/src/pipeline.js.map +1 -0
  61. package/dist/src/types.d.ts +99 -0
  62. package/dist/src/types.d.ts.map +1 -0
  63. package/dist/src/types.js +2 -0
  64. package/dist/src/types.js.map +1 -0
  65. package/dist/tsconfig.tsbuildinfo +1 -0
  66. package/package.json +44 -0
  67. package/src/analyze.ts +122 -0
  68. package/src/build.ts +62 -0
  69. package/src/cache.ts +38 -0
  70. package/src/cluster.ts +54 -0
  71. package/src/detect.ts +123 -0
  72. package/src/export.ts +78 -0
  73. package/src/extract/index.ts +190 -0
  74. package/src/extract/languages/go.ts +206 -0
  75. package/src/extract/languages/python.ts +270 -0
  76. package/src/extract/languages/rust.ts +230 -0
  77. package/src/extract/languages/typescript.ts +344 -0
  78. package/src/extract/tree-sitter-runner.ts +165 -0
  79. package/src/extract/types.ts +7 -0
  80. package/src/index.ts +10 -0
  81. package/src/pipeline.ts +166 -0
  82. package/src/types.ts +116 -0
@@ -0,0 +1,190 @@
1
+ import { readFileSync, mkdirSync } from 'fs';
2
+ import { extname } from 'path';
3
+ import type { ClassifiedFile, ExtractionResult, BuildOptions, GraphNode, GraphEdge } from '../types.js';
4
+ import type { LanguageExtractor } from './types.js';
5
+ import { FileCache } from '../cache.js';
6
+ import { typescriptExtractor } from './languages/typescript.js';
7
+ import { pythonExtractor } from './languages/python.js';
8
+ import { goExtractor } from './languages/go.js';
9
+ import { rustExtractor } from './languages/rust.js';
10
+
11
+ // ---- registry ----
12
+
13
+ const EXTRACTORS: LanguageExtractor[] = [
14
+ typescriptExtractor,
15
+ pythonExtractor,
16
+ goExtractor,
17
+ rustExtractor,
18
+ ];
19
+
20
+ /** Build a lookup from file extension → extractor */
21
+ const EXT_MAP = new Map<string, LanguageExtractor>(
22
+ EXTRACTORS.flatMap((e) => e.extensions.map((ext) => [ext, e])),
23
+ );
24
+
25
+ /** Build a lookup from language name → extractor */
26
+ const LANG_MAP = new Map<string, LanguageExtractor>(
27
+ EXTRACTORS.map((e) => [e.language, e]),
28
+ );
29
+
30
+ function resolveExtractor(file: ClassifiedFile): LanguageExtractor | null {
31
+ if (file.language) {
32
+ const byLang = LANG_MAP.get(file.language.toLowerCase());
33
+ if (byLang) return byLang;
34
+ }
35
+ const ext = extname(file.path).toLowerCase();
36
+ return EXT_MAP.get(ext) ?? null;
37
+ }
38
+
39
+ // ---- result merging ----
40
+
41
+ function mergeResults(results: ExtractionResult[]): ExtractionResult {
42
+ const nodeMap = new Map<string, GraphNode>();
43
+ const edgeSet = new Set<string>();
44
+ const edges: GraphEdge[] = [];
45
+ const errors: string[] = [];
46
+ let filesProcessed = 0;
47
+ let fromCache = 0;
48
+
49
+ for (const r of results) {
50
+ filesProcessed += r.filesProcessed;
51
+ fromCache += r.fromCache;
52
+ errors.push(...r.errors);
53
+
54
+ for (const node of r.nodes) {
55
+ // Deduplicate by id — keep the first occurrence
56
+ if (!nodeMap.has(node.id)) {
57
+ nodeMap.set(node.id, node);
58
+ }
59
+ }
60
+
61
+ for (const edge of r.edges) {
62
+ // Deduplicate edges by source+target+relation key
63
+ const key = `${edge.source}||${edge.target}||${edge.relation}`;
64
+ if (!edgeSet.has(key)) {
65
+ edgeSet.add(key);
66
+ edges.push(edge);
67
+ }
68
+ }
69
+ }
70
+
71
+ return {
72
+ nodes: Array.from(nodeMap.values()),
73
+ edges,
74
+ filesProcessed,
75
+ fromCache,
76
+ errors,
77
+ };
78
+ }
79
+
80
+ // ---- public API ----
81
+
82
+ /**
83
+ * Extract graph nodes and edges from all classified files.
84
+ *
85
+ * Reads each file, checks the on-disk cache keyed by SHA-256 content hash,
86
+ * dispatches to the appropriate language extractor, and merges all results
87
+ * into a single ExtractionResult (nodes deduplicated by id, edges by source+target+relation).
88
+ *
89
+ * @param files - Classified files to process (from the classify layer)
90
+ * @param outputDir - Directory used for cache storage (.monobrain/graph by default)
91
+ * @param options - Build options (languages filter, maxFileSizeBytes, etc.)
92
+ */
93
+ export async function extractAll(
94
+ files: ClassifiedFile[],
95
+ outputDir: string,
96
+ options?: BuildOptions,
97
+ ): Promise<ExtractionResult> {
98
+ const maxSize = options?.maxFileSizeBytes ?? 500 * 1024; // 500 KB
99
+ const langFilter = options?.languages?.map((l) => l.toLowerCase());
100
+
101
+ mkdirSync(outputDir, { recursive: true });
102
+ const cache = new FileCache(outputDir);
103
+
104
+ const results: ExtractionResult[] = [];
105
+
106
+ for (const file of files) {
107
+ // Skip files that are too large
108
+ if (file.sizeBytes > maxSize) {
109
+ results.push({
110
+ nodes: [],
111
+ edges: [],
112
+ filesProcessed: 1,
113
+ fromCache: 0,
114
+ errors: [`Skipped ${file.path}: file size ${file.sizeBytes} exceeds limit ${maxSize}`],
115
+ });
116
+ continue;
117
+ }
118
+
119
+ // Apply language filter
120
+ if (langFilter && langFilter.length > 0) {
121
+ const extractor = resolveExtractor(file);
122
+ const fileLang = (file.language ?? '').toLowerCase();
123
+ const extractorLang = extractor?.language ?? '';
124
+ if (!langFilter.includes(fileLang) && !langFilter.includes(extractorLang)) {
125
+ continue;
126
+ }
127
+ }
128
+
129
+ const extractor = resolveExtractor(file);
130
+ if (!extractor) {
131
+ // No extractor for this file type — skip silently
132
+ continue;
133
+ }
134
+
135
+ // Read content
136
+ let content: string;
137
+ try {
138
+ content = readFileSync(file.path, 'utf8');
139
+ } catch (err) {
140
+ results.push({
141
+ nodes: [],
142
+ edges: [],
143
+ filesProcessed: 1,
144
+ fromCache: 0,
145
+ errors: [`Failed to read ${file.path}: ${err instanceof Error ? err.message : String(err)}`],
146
+ });
147
+ continue;
148
+ }
149
+
150
+ // Check cache using FileCache (keyed on filePath + content hash)
151
+ const cacheKey = cache.key(file.path, content);
152
+ const cached = cache.get(cacheKey);
153
+ if (cached) {
154
+ results.push({
155
+ ...cached,
156
+ filesProcessed: 1,
157
+ fromCache: 1,
158
+ });
159
+ continue;
160
+ }
161
+
162
+ // Extract
163
+ let result: ExtractionResult;
164
+ try {
165
+ result = extractor.extract(file.path, content);
166
+ } catch (err) {
167
+ result = {
168
+ nodes: [],
169
+ edges: [],
170
+ filesProcessed: 1,
171
+ fromCache: 0,
172
+ errors: [`Extractor error for ${file.path}: ${err instanceof Error ? err.message : String(err)}`],
173
+ };
174
+ }
175
+
176
+ // Store in cache
177
+ cache.set(cacheKey, result);
178
+ results.push(result);
179
+ }
180
+
181
+ return mergeResults(results);
182
+ }
183
+
184
+ // Re-export types and utilities for consumers
185
+ export type { LanguageExtractor } from './types.js';
186
+ export { typescriptExtractor } from './languages/typescript.js';
187
+ export { pythonExtractor } from './languages/python.js';
188
+ export { goExtractor } from './languages/go.js';
189
+ export { rustExtractor } from './languages/rust.js';
190
+ export { isTreeSitterAvailable, tryLoadParser, walk, parseFile, parseFileFromDisk } from './tree-sitter-runner.js';
@@ -0,0 +1,206 @@
1
+ import { basename } from 'path';
2
+ import type { GraphNode, GraphEdge, ExtractionResult } from '../../types.js';
3
+ import type { LanguageExtractor } from '../types.js';
4
+ import {
5
+ tryLoadParser,
6
+ walk,
7
+ type SyntaxNodeLike,
8
+ } from '../tree-sitter-runner.js';
9
+
10
+ // ---- helpers ----
11
+
12
+ function nodeName(node: SyntaxNodeLike): string {
13
+ const nameNode = node.childForFieldName('name');
14
+ return nameNode?.text ?? '';
15
+ }
16
+
17
+ function loc(node: SyntaxNodeLike): string {
18
+ return `L${node.startPosition.row + 1}`;
19
+ }
20
+
21
+ // ---- tree-sitter extraction ----
22
+
23
+ function extractWithTreeSitter(filePath: string, content: string): ExtractionResult {
24
+ const nodes: GraphNode[] = [];
25
+ const edges: GraphEdge[] = [];
26
+ const errors: string[] = [];
27
+
28
+ const parser = tryLoadParser('go');
29
+ if (!parser) {
30
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
31
+ }
32
+
33
+ let tree: { rootNode: SyntaxNodeLike };
34
+ try {
35
+ tree = parser.parse(content);
36
+ } catch (err) {
37
+ errors.push(`tree-sitter parse error in ${filePath}: ${String(err)}`);
38
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
39
+ }
40
+
41
+ walk(tree.rootNode, (n) => {
42
+ // ---- function declarations ----
43
+ if (n.type === 'function_declaration' || n.type === 'method_declaration') {
44
+ const name = nodeName(n);
45
+ if (name) {
46
+ // For methods, qualify with receiver type
47
+ let qualifiedName = name;
48
+ if (n.type === 'method_declaration') {
49
+ const receiver = n.childForFieldName('receiver');
50
+ if (receiver) {
51
+ // receiver text looks like "(r *MyType)" — extract type name
52
+ const receiverType = receiver.text.replace(/^\(|\)$/g, '').trim().replace(/^\w+\s+\*?/, '');
53
+ if (receiverType) qualifiedName = `${receiverType}.${name}`;
54
+ }
55
+ }
56
+ nodes.push({
57
+ id: qualifiedName,
58
+ label: qualifiedName,
59
+ fileType: 'code',
60
+ sourceFile: filePath,
61
+ sourceLocation: loc(n),
62
+ });
63
+ }
64
+ return;
65
+ }
66
+
67
+ // ---- type declarations (struct, interface) ----
68
+ if (n.type === 'type_declaration') {
69
+ for (const child of n.children) {
70
+ if (child.type === 'type_spec') {
71
+ const nameNode = child.childForFieldName('name');
72
+ const typeNode = child.childForFieldName('type');
73
+ const typeName = nameNode?.text ?? '';
74
+
75
+ if (!typeName) continue;
76
+
77
+ const typeKind =
78
+ typeNode?.type === 'struct_type' ? 'struct'
79
+ : typeNode?.type === 'interface_type' ? 'interface'
80
+ : 'type';
81
+
82
+ nodes.push({
83
+ id: typeName,
84
+ label: typeName,
85
+ fileType: 'code',
86
+ sourceFile: filePath,
87
+ sourceLocation: loc(child),
88
+ nodeKind: typeKind,
89
+ });
90
+ }
91
+ }
92
+ return;
93
+ }
94
+
95
+ // ---- import declarations ----
96
+ if (n.type === 'import_declaration') {
97
+ for (const child of n.children) {
98
+ if (child.type === 'import_spec_list') {
99
+ for (const spec of child.children) {
100
+ if (spec.type === 'import_spec') {
101
+ const pathNode = spec.childForFieldName('path');
102
+ if (pathNode) {
103
+ const importPath = pathNode.text.replace(/^"|"$/g, '');
104
+ edges.push({
105
+ source: basename(filePath),
106
+ target: importPath,
107
+ relation: 'imports',
108
+ confidence: 'EXTRACTED',
109
+ sourceFile: filePath,
110
+ sourceLocation: loc(spec),
111
+ });
112
+ }
113
+ }
114
+ }
115
+ }
116
+ if (child.type === 'import_spec') {
117
+ const pathNode = child.childForFieldName('path');
118
+ if (pathNode) {
119
+ const importPath = pathNode.text.replace(/^"|"$/g, '');
120
+ edges.push({
121
+ source: basename(filePath),
122
+ target: importPath,
123
+ relation: 'imports',
124
+ confidence: 'EXTRACTED',
125
+ sourceFile: filePath,
126
+ sourceLocation: loc(child),
127
+ });
128
+ }
129
+ }
130
+ }
131
+ }
132
+ });
133
+
134
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
135
+ }
136
+
137
+ // ---- regex fallback ----
138
+
139
+ function extractWithRegex(filePath: string, content: string): ExtractionResult {
140
+ const nodes: GraphNode[] = [];
141
+ const edges: GraphEdge[] = [];
142
+
143
+ const lines = content.split('\n');
144
+ let inImportBlock = false;
145
+
146
+ lines.forEach((line, idx) => {
147
+ const location = `L${idx + 1}`;
148
+ const trimmed = line.trim();
149
+
150
+ // Detect import blocks
151
+ if (trimmed === 'import (') { inImportBlock = true; return; }
152
+ if (inImportBlock && trimmed === ')') { inImportBlock = false; return; }
153
+
154
+ if (inImportBlock) {
155
+ const importPathMatch = trimmed.match(/"([^"]+)"/);
156
+ if (importPathMatch) {
157
+ edges.push({ source: basename(filePath), target: importPathMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
158
+ }
159
+ return;
160
+ }
161
+
162
+ // Single-line import: import "pkg"
163
+ const singleImportMatch = trimmed.match(/^import\s+"([^"]+)"/);
164
+ if (singleImportMatch) {
165
+ edges.push({ source: basename(filePath), target: singleImportMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
166
+ }
167
+
168
+ // Function: func (r *Recv) MethodName(...) or func FuncName(...)
169
+ const funcMatch = trimmed.match(/^func\s+(?:\(\w+\s+\*?(\w+)\)\s+)?(\w+)/);
170
+ if (funcMatch) {
171
+ const receiver = funcMatch[1];
172
+ const name = funcMatch[2];
173
+ const qualifiedName = receiver ? `${receiver}.${name}` : name;
174
+ nodes.push({ id: qualifiedName, label: qualifiedName, fileType: 'code', sourceFile: filePath, sourceLocation: location });
175
+ }
176
+
177
+ // Struct: type Name struct
178
+ const structMatch = trimmed.match(/^type\s+(\w+)\s+struct/);
179
+ if (structMatch) {
180
+ nodes.push({ id: structMatch[1], label: structMatch[1], fileType: 'code', sourceFile: filePath, sourceLocation: location, nodeKind: 'struct' });
181
+ }
182
+
183
+ // Interface: type Name interface
184
+ const ifaceMatch = trimmed.match(/^type\s+(\w+)\s+interface/);
185
+ if (ifaceMatch) {
186
+ nodes.push({ id: ifaceMatch[1], label: ifaceMatch[1], fileType: 'code', sourceFile: filePath, sourceLocation: location, nodeKind: 'interface' });
187
+ }
188
+ });
189
+
190
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors: [] };
191
+ }
192
+
193
+ // ---- extractor implementation ----
194
+
195
+ export const goExtractor: LanguageExtractor = {
196
+ language: 'go',
197
+ extensions: ['.go'],
198
+
199
+ extract(filePath: string, content: string): ExtractionResult {
200
+ const tsResult = extractWithTreeSitter(filePath, content);
201
+ if (tsResult.nodes.length > 0 || tsResult.edges.length > 0 || tsResult.errors.length > 0) {
202
+ return tsResult;
203
+ }
204
+ return extractWithRegex(filePath, content);
205
+ },
206
+ };
@@ -0,0 +1,270 @@
1
+ import { basename } from 'path';
2
+ import type { GraphNode, GraphEdge, ExtractionResult } from '../../types.js';
3
+ import type { LanguageExtractor } from '../types.js';
4
+ import {
5
+ tryLoadParser,
6
+ walk,
7
+ type SyntaxNodeLike,
8
+ } from '../tree-sitter-runner.js';
9
+
10
+ // ---- helpers ----
11
+
12
+ function nodeName(node: SyntaxNodeLike): string {
13
+ const nameNode = node.childForFieldName('name');
14
+ return nameNode?.text ?? '';
15
+ }
16
+
17
+ function loc(node: SyntaxNodeLike): string {
18
+ return `L${node.startPosition.row + 1}`;
19
+ }
20
+
21
+ // ---- tree-sitter extraction ----
22
+
23
+ function extractWithTreeSitter(filePath: string, content: string): ExtractionResult {
24
+ const nodes: GraphNode[] = [];
25
+ const edges: GraphEdge[] = [];
26
+ const errors: string[] = [];
27
+
28
+ const parser = tryLoadParser('python');
29
+ if (!parser) {
30
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
31
+ }
32
+
33
+ let tree: { rootNode: SyntaxNodeLike };
34
+ try {
35
+ tree = parser.parse(content);
36
+ } catch (err) {
37
+ errors.push(`tree-sitter parse error in ${filePath}: ${String(err)}`);
38
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
39
+ }
40
+
41
+ // Track method context for call attribution
42
+ const functionStack: string[] = [];
43
+ const classStack: string[] = [];
44
+
45
+ walk(tree.rootNode, (n) => {
46
+ // ---- class definitions ----
47
+ if (n.type === 'class_definition') {
48
+ const name = nodeName(n);
49
+ if (name) {
50
+ nodes.push({
51
+ id: name,
52
+ label: name,
53
+ fileType: 'code',
54
+ sourceFile: filePath,
55
+ sourceLocation: loc(n),
56
+ });
57
+ classStack.push(name);
58
+
59
+ // Inheritance: class A(B, C):
60
+ const argList = n.childForFieldName('superclasses');
61
+ if (argList) {
62
+ for (const child of argList.children) {
63
+ if (child.type === 'identifier' || child.type === 'attribute') {
64
+ edges.push({
65
+ source: name,
66
+ target: child.text,
67
+ relation: 'extends',
68
+ confidence: 'EXTRACTED',
69
+ sourceFile: filePath,
70
+ sourceLocation: loc(argList),
71
+ });
72
+ }
73
+ }
74
+ }
75
+ }
76
+ return;
77
+ }
78
+
79
+ // ---- function / async function definitions ----
80
+ if (n.type === 'function_definition' || n.type === 'decorated_definition') {
81
+ // For decorated_definition, descend to find the inner function_definition
82
+ const funcNode =
83
+ n.type === 'decorated_definition'
84
+ ? (n.children.find((c) => c.type === 'function_definition') ?? n)
85
+ : n;
86
+
87
+ const name = nodeName(funcNode);
88
+ if (name) {
89
+ const qualifiedName =
90
+ classStack.length > 0
91
+ ? `${classStack[classStack.length - 1]}.${name}`
92
+ : name;
93
+
94
+ nodes.push({
95
+ id: qualifiedName,
96
+ label: qualifiedName,
97
+ fileType: 'code',
98
+ sourceFile: filePath,
99
+ sourceLocation: loc(funcNode),
100
+ });
101
+ functionStack.push(qualifiedName);
102
+ }
103
+ return;
104
+ }
105
+
106
+ // ---- import statements ----
107
+ if (n.type === 'import_statement') {
108
+ // import X, import X as Y
109
+ for (const child of n.children) {
110
+ if (child.type === 'dotted_name' || child.type === 'aliased_import') {
111
+ const importedName =
112
+ child.type === 'aliased_import'
113
+ ? child.childForFieldName('name')?.text ?? child.text
114
+ : child.text;
115
+ if (importedName) {
116
+ edges.push({
117
+ source: basename(filePath),
118
+ target: importedName,
119
+ relation: 'imports',
120
+ confidence: 'EXTRACTED',
121
+ sourceFile: filePath,
122
+ sourceLocation: loc(n),
123
+ });
124
+ }
125
+ }
126
+ }
127
+ return;
128
+ }
129
+
130
+ // ---- from X import Y ----
131
+ if (n.type === 'import_from_statement') {
132
+ const moduleNode = n.childForFieldName('module_name');
133
+ const moduleName = moduleNode?.text ?? '';
134
+
135
+ if (moduleName) {
136
+ edges.push({
137
+ source: basename(filePath),
138
+ target: moduleName,
139
+ relation: 'imports',
140
+ confidence: 'EXTRACTED',
141
+ sourceFile: filePath,
142
+ sourceLocation: loc(n),
143
+ });
144
+ }
145
+
146
+ // Also emit edges for individual imported names
147
+ for (const child of n.children) {
148
+ if (child.type === 'dotted_name' || child.type === 'identifier') {
149
+ // Skip the module_name we already handled
150
+ if (child === moduleNode) continue;
151
+ edges.push({
152
+ source: filePath,
153
+ target: child.text,
154
+ relation: 'imports',
155
+ confidence: 'EXTRACTED',
156
+ sourceFile: filePath,
157
+ sourceLocation: loc(n),
158
+ });
159
+ }
160
+ if (child.type === 'aliased_import') {
161
+ const importedName = child.childForFieldName('name')?.text ?? child.text;
162
+ if (importedName) {
163
+ edges.push({
164
+ source: filePath,
165
+ target: importedName,
166
+ relation: 'imports',
167
+ confidence: 'EXTRACTED',
168
+ sourceFile: filePath,
169
+ sourceLocation: loc(n),
170
+ });
171
+ }
172
+ }
173
+ }
174
+ return;
175
+ }
176
+
177
+ // ---- call expressions ----
178
+ if (n.type === 'call') {
179
+ const fnNode = n.childForFieldName('function');
180
+ if (!fnNode) return;
181
+
182
+ const calleeName = fnNode.text;
183
+ const caller =
184
+ functionStack[functionStack.length - 1] ??
185
+ classStack[classStack.length - 1] ??
186
+ basename(filePath);
187
+
188
+ if (calleeName && caller && calleeName !== caller) {
189
+ edges.push({
190
+ source: caller,
191
+ target: calleeName,
192
+ relation: 'calls',
193
+ confidence: 'INFERRED',
194
+ confidenceScore: 0.7,
195
+ sourceFile: filePath,
196
+ sourceLocation: loc(n),
197
+ });
198
+ }
199
+ }
200
+ });
201
+
202
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
203
+ }
204
+
205
+ // ---- regex fallback ----
206
+
207
+ function extractWithRegex(filePath: string, content: string): ExtractionResult {
208
+ const nodes: GraphNode[] = [];
209
+ const edges: GraphEdge[] = [];
210
+
211
+ const lines = content.split('\n');
212
+
213
+ lines.forEach((line, idx) => {
214
+ const location = `L${idx + 1}`;
215
+
216
+ // class
217
+ const classMatch = line.match(/^class\s+(\w+)/);
218
+ if (classMatch) {
219
+ const name = classMatch[1];
220
+ nodes.push({ id: name, label: name, fileType: 'code', sourceFile: filePath, sourceLocation: location });
221
+
222
+ // inheritance: class A(B, C):
223
+ const inheritMatch = line.match(/^class\s+\w+\(([^)]+)\)/);
224
+ if (inheritMatch) {
225
+ for (const base of inheritMatch[1].split(',')) {
226
+ const baseName = base.trim();
227
+ if (baseName && baseName !== 'object') {
228
+ edges.push({ source: name, target: baseName, relation: 'extends', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
229
+ }
230
+ }
231
+ }
232
+ }
233
+
234
+ // function
235
+ const funcMatch = line.match(/^(?:async\s+)?def\s+(\w+)/);
236
+ if (funcMatch) {
237
+ const name = funcMatch[1];
238
+ nodes.push({ id: name, label: name, fileType: 'code', sourceFile: filePath, sourceLocation: location });
239
+ }
240
+
241
+ // import
242
+ const importMatch = line.match(/^import\s+(\S+)/);
243
+ if (importMatch) {
244
+ edges.push({ source: basename(filePath), target: importMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
245
+ }
246
+
247
+ // from X import Y
248
+ const fromImportMatch = line.match(/^from\s+(\S+)\s+import/);
249
+ if (fromImportMatch) {
250
+ edges.push({ source: basename(filePath), target: fromImportMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
251
+ }
252
+ });
253
+
254
+ return { nodes, edges, filesProcessed: 1, fromCache: 0, errors: [] };
255
+ }
256
+
257
+ // ---- extractor implementation ----
258
+
259
+ export const pythonExtractor: LanguageExtractor = {
260
+ language: 'python',
261
+ extensions: ['.py', '.pyw'],
262
+
263
+ extract(filePath: string, content: string): ExtractionResult {
264
+ const tsResult = extractWithTreeSitter(filePath, content);
265
+ if (tsResult.nodes.length > 0 || tsResult.edges.length > 0 || tsResult.errors.length > 0) {
266
+ return tsResult;
267
+ }
268
+ return extractWithRegex(filePath, content);
269
+ },
270
+ };