@monoes/graph 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/analyze.d.ts +23 -0
- package/dist/src/analyze.d.ts.map +1 -0
- package/dist/src/analyze.js +105 -0
- package/dist/src/analyze.js.map +1 -0
- package/dist/src/build.d.ts +8 -0
- package/dist/src/build.d.ts.map +1 -0
- package/dist/src/build.js +59 -0
- package/dist/src/build.js.map +1 -0
- package/dist/src/cache.d.ts +10 -0
- package/dist/src/cache.d.ts.map +1 -0
- package/dist/src/cache.js +34 -0
- package/dist/src/cache.js.map +1 -0
- package/dist/src/cluster.d.ts +8 -0
- package/dist/src/cluster.d.ts.map +1 -0
- package/dist/src/cluster.js +50 -0
- package/dist/src/cluster.js.map +1 -0
- package/dist/src/detect.d.ts +8 -0
- package/dist/src/detect.d.ts.map +1 -0
- package/dist/src/detect.js +108 -0
- package/dist/src/detect.js.map +1 -0
- package/dist/src/export.d.ts +21 -0
- package/dist/src/export.d.ts.map +1 -0
- package/dist/src/export.js +68 -0
- package/dist/src/export.js.map +1 -0
- package/dist/src/extract/index.d.ts +20 -0
- package/dist/src/extract/index.d.ts.map +1 -0
- package/dist/src/extract/index.js +158 -0
- package/dist/src/extract/index.js.map +1 -0
- package/dist/src/extract/languages/go.d.ts +3 -0
- package/dist/src/extract/languages/go.d.ts.map +1 -0
- package/dist/src/extract/languages/go.js +181 -0
- package/dist/src/extract/languages/go.js.map +1 -0
- package/dist/src/extract/languages/python.d.ts +3 -0
- package/dist/src/extract/languages/python.d.ts.map +1 -0
- package/dist/src/extract/languages/python.js +230 -0
- package/dist/src/extract/languages/python.js.map +1 -0
- package/dist/src/extract/languages/rust.d.ts +3 -0
- package/dist/src/extract/languages/rust.d.ts.map +1 -0
- package/dist/src/extract/languages/rust.js +195 -0
- package/dist/src/extract/languages/rust.js.map +1 -0
- package/dist/src/extract/languages/typescript.d.ts +3 -0
- package/dist/src/extract/languages/typescript.d.ts.map +1 -0
- package/dist/src/extract/languages/typescript.js +295 -0
- package/dist/src/extract/languages/typescript.js.map +1 -0
- package/dist/src/extract/tree-sitter-runner.d.ts +48 -0
- package/dist/src/extract/tree-sitter-runner.d.ts.map +1 -0
- package/dist/src/extract/tree-sitter-runner.js +128 -0
- package/dist/src/extract/tree-sitter-runner.js.map +1 -0
- package/dist/src/extract/types.d.ts +7 -0
- package/dist/src/extract/types.d.ts.map +1 -0
- package/dist/src/extract/types.js +2 -0
- package/dist/src/extract/types.js.map +1 -0
- package/dist/src/index.d.ts +11 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +9 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/pipeline.d.ts +16 -0
- package/dist/src/pipeline.d.ts.map +1 -0
- package/dist/src/pipeline.js +143 -0
- package/dist/src/pipeline.js.map +1 -0
- package/dist/src/types.d.ts +99 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +2 -0
- package/dist/src/types.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +44 -0
- package/src/analyze.ts +122 -0
- package/src/build.ts +62 -0
- package/src/cache.ts +38 -0
- package/src/cluster.ts +54 -0
- package/src/detect.ts +123 -0
- package/src/export.ts +78 -0
- package/src/extract/index.ts +190 -0
- package/src/extract/languages/go.ts +206 -0
- package/src/extract/languages/python.ts +270 -0
- package/src/extract/languages/rust.ts +230 -0
- package/src/extract/languages/typescript.ts +344 -0
- package/src/extract/tree-sitter-runner.ts +165 -0
- package/src/extract/types.ts +7 -0
- package/src/index.ts +10 -0
- package/src/pipeline.ts +166 -0
- package/src/types.ts +116 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { readFileSync, mkdirSync } from 'fs';
|
|
2
|
+
import { extname } from 'path';
|
|
3
|
+
import type { ClassifiedFile, ExtractionResult, BuildOptions, GraphNode, GraphEdge } from '../types.js';
|
|
4
|
+
import type { LanguageExtractor } from './types.js';
|
|
5
|
+
import { FileCache } from '../cache.js';
|
|
6
|
+
import { typescriptExtractor } from './languages/typescript.js';
|
|
7
|
+
import { pythonExtractor } from './languages/python.js';
|
|
8
|
+
import { goExtractor } from './languages/go.js';
|
|
9
|
+
import { rustExtractor } from './languages/rust.js';
|
|
10
|
+
|
|
11
|
+
// ---- registry ----
|
|
12
|
+
|
|
13
|
+
const EXTRACTORS: LanguageExtractor[] = [
|
|
14
|
+
typescriptExtractor,
|
|
15
|
+
pythonExtractor,
|
|
16
|
+
goExtractor,
|
|
17
|
+
rustExtractor,
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
/** Build a lookup from file extension → extractor */
|
|
21
|
+
const EXT_MAP = new Map<string, LanguageExtractor>(
|
|
22
|
+
EXTRACTORS.flatMap((e) => e.extensions.map((ext) => [ext, e])),
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
/** Build a lookup from language name → extractor */
|
|
26
|
+
const LANG_MAP = new Map<string, LanguageExtractor>(
|
|
27
|
+
EXTRACTORS.map((e) => [e.language, e]),
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
function resolveExtractor(file: ClassifiedFile): LanguageExtractor | null {
|
|
31
|
+
if (file.language) {
|
|
32
|
+
const byLang = LANG_MAP.get(file.language.toLowerCase());
|
|
33
|
+
if (byLang) return byLang;
|
|
34
|
+
}
|
|
35
|
+
const ext = extname(file.path).toLowerCase();
|
|
36
|
+
return EXT_MAP.get(ext) ?? null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ---- result merging ----
|
|
40
|
+
|
|
41
|
+
function mergeResults(results: ExtractionResult[]): ExtractionResult {
|
|
42
|
+
const nodeMap = new Map<string, GraphNode>();
|
|
43
|
+
const edgeSet = new Set<string>();
|
|
44
|
+
const edges: GraphEdge[] = [];
|
|
45
|
+
const errors: string[] = [];
|
|
46
|
+
let filesProcessed = 0;
|
|
47
|
+
let fromCache = 0;
|
|
48
|
+
|
|
49
|
+
for (const r of results) {
|
|
50
|
+
filesProcessed += r.filesProcessed;
|
|
51
|
+
fromCache += r.fromCache;
|
|
52
|
+
errors.push(...r.errors);
|
|
53
|
+
|
|
54
|
+
for (const node of r.nodes) {
|
|
55
|
+
// Deduplicate by id — keep the first occurrence
|
|
56
|
+
if (!nodeMap.has(node.id)) {
|
|
57
|
+
nodeMap.set(node.id, node);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
for (const edge of r.edges) {
|
|
62
|
+
// Deduplicate edges by source+target+relation key
|
|
63
|
+
const key = `${edge.source}||${edge.target}||${edge.relation}`;
|
|
64
|
+
if (!edgeSet.has(key)) {
|
|
65
|
+
edgeSet.add(key);
|
|
66
|
+
edges.push(edge);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
nodes: Array.from(nodeMap.values()),
|
|
73
|
+
edges,
|
|
74
|
+
filesProcessed,
|
|
75
|
+
fromCache,
|
|
76
|
+
errors,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---- public API ----
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Extract graph nodes and edges from all classified files.
|
|
84
|
+
*
|
|
85
|
+
* Reads each file, checks the on-disk cache keyed by SHA-256 content hash,
|
|
86
|
+
* dispatches to the appropriate language extractor, and merges all results
|
|
87
|
+
* into a single ExtractionResult (nodes deduplicated by id, edges by source+target+relation).
|
|
88
|
+
*
|
|
89
|
+
* @param files - Classified files to process (from the classify layer)
|
|
90
|
+
* @param outputDir - Directory used for cache storage (.monobrain/graph by default)
|
|
91
|
+
* @param options - Build options (languages filter, maxFileSizeBytes, etc.)
|
|
92
|
+
*/
|
|
93
|
+
export async function extractAll(
|
|
94
|
+
files: ClassifiedFile[],
|
|
95
|
+
outputDir: string,
|
|
96
|
+
options?: BuildOptions,
|
|
97
|
+
): Promise<ExtractionResult> {
|
|
98
|
+
const maxSize = options?.maxFileSizeBytes ?? 500 * 1024; // 500 KB
|
|
99
|
+
const langFilter = options?.languages?.map((l) => l.toLowerCase());
|
|
100
|
+
|
|
101
|
+
mkdirSync(outputDir, { recursive: true });
|
|
102
|
+
const cache = new FileCache(outputDir);
|
|
103
|
+
|
|
104
|
+
const results: ExtractionResult[] = [];
|
|
105
|
+
|
|
106
|
+
for (const file of files) {
|
|
107
|
+
// Skip files that are too large
|
|
108
|
+
if (file.sizeBytes > maxSize) {
|
|
109
|
+
results.push({
|
|
110
|
+
nodes: [],
|
|
111
|
+
edges: [],
|
|
112
|
+
filesProcessed: 1,
|
|
113
|
+
fromCache: 0,
|
|
114
|
+
errors: [`Skipped ${file.path}: file size ${file.sizeBytes} exceeds limit ${maxSize}`],
|
|
115
|
+
});
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Apply language filter
|
|
120
|
+
if (langFilter && langFilter.length > 0) {
|
|
121
|
+
const extractor = resolveExtractor(file);
|
|
122
|
+
const fileLang = (file.language ?? '').toLowerCase();
|
|
123
|
+
const extractorLang = extractor?.language ?? '';
|
|
124
|
+
if (!langFilter.includes(fileLang) && !langFilter.includes(extractorLang)) {
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const extractor = resolveExtractor(file);
|
|
130
|
+
if (!extractor) {
|
|
131
|
+
// No extractor for this file type — skip silently
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Read content
|
|
136
|
+
let content: string;
|
|
137
|
+
try {
|
|
138
|
+
content = readFileSync(file.path, 'utf8');
|
|
139
|
+
} catch (err) {
|
|
140
|
+
results.push({
|
|
141
|
+
nodes: [],
|
|
142
|
+
edges: [],
|
|
143
|
+
filesProcessed: 1,
|
|
144
|
+
fromCache: 0,
|
|
145
|
+
errors: [`Failed to read ${file.path}: ${err instanceof Error ? err.message : String(err)}`],
|
|
146
|
+
});
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Check cache using FileCache (keyed on filePath + content hash)
|
|
151
|
+
const cacheKey = cache.key(file.path, content);
|
|
152
|
+
const cached = cache.get(cacheKey);
|
|
153
|
+
if (cached) {
|
|
154
|
+
results.push({
|
|
155
|
+
...cached,
|
|
156
|
+
filesProcessed: 1,
|
|
157
|
+
fromCache: 1,
|
|
158
|
+
});
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Extract
|
|
163
|
+
let result: ExtractionResult;
|
|
164
|
+
try {
|
|
165
|
+
result = extractor.extract(file.path, content);
|
|
166
|
+
} catch (err) {
|
|
167
|
+
result = {
|
|
168
|
+
nodes: [],
|
|
169
|
+
edges: [],
|
|
170
|
+
filesProcessed: 1,
|
|
171
|
+
fromCache: 0,
|
|
172
|
+
errors: [`Extractor error for ${file.path}: ${err instanceof Error ? err.message : String(err)}`],
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Store in cache
|
|
177
|
+
cache.set(cacheKey, result);
|
|
178
|
+
results.push(result);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return mergeResults(results);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Re-export types and utilities for consumers
|
|
185
|
+
export type { LanguageExtractor } from './types.js';
|
|
186
|
+
export { typescriptExtractor } from './languages/typescript.js';
|
|
187
|
+
export { pythonExtractor } from './languages/python.js';
|
|
188
|
+
export { goExtractor } from './languages/go.js';
|
|
189
|
+
export { rustExtractor } from './languages/rust.js';
|
|
190
|
+
export { isTreeSitterAvailable, tryLoadParser, walk, parseFile, parseFileFromDisk } from './tree-sitter-runner.js';
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { basename } from 'path';
|
|
2
|
+
import type { GraphNode, GraphEdge, ExtractionResult } from '../../types.js';
|
|
3
|
+
import type { LanguageExtractor } from '../types.js';
|
|
4
|
+
import {
|
|
5
|
+
tryLoadParser,
|
|
6
|
+
walk,
|
|
7
|
+
type SyntaxNodeLike,
|
|
8
|
+
} from '../tree-sitter-runner.js';
|
|
9
|
+
|
|
10
|
+
// ---- helpers ----
|
|
11
|
+
|
|
12
|
+
function nodeName(node: SyntaxNodeLike): string {
|
|
13
|
+
const nameNode = node.childForFieldName('name');
|
|
14
|
+
return nameNode?.text ?? '';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function loc(node: SyntaxNodeLike): string {
|
|
18
|
+
return `L${node.startPosition.row + 1}`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// ---- tree-sitter extraction ----
|
|
22
|
+
|
|
23
|
+
function extractWithTreeSitter(filePath: string, content: string): ExtractionResult {
|
|
24
|
+
const nodes: GraphNode[] = [];
|
|
25
|
+
const edges: GraphEdge[] = [];
|
|
26
|
+
const errors: string[] = [];
|
|
27
|
+
|
|
28
|
+
const parser = tryLoadParser('go');
|
|
29
|
+
if (!parser) {
|
|
30
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
let tree: { rootNode: SyntaxNodeLike };
|
|
34
|
+
try {
|
|
35
|
+
tree = parser.parse(content);
|
|
36
|
+
} catch (err) {
|
|
37
|
+
errors.push(`tree-sitter parse error in ${filePath}: ${String(err)}`);
|
|
38
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
walk(tree.rootNode, (n) => {
|
|
42
|
+
// ---- function declarations ----
|
|
43
|
+
if (n.type === 'function_declaration' || n.type === 'method_declaration') {
|
|
44
|
+
const name = nodeName(n);
|
|
45
|
+
if (name) {
|
|
46
|
+
// For methods, qualify with receiver type
|
|
47
|
+
let qualifiedName = name;
|
|
48
|
+
if (n.type === 'method_declaration') {
|
|
49
|
+
const receiver = n.childForFieldName('receiver');
|
|
50
|
+
if (receiver) {
|
|
51
|
+
// receiver text looks like "(r *MyType)" — extract type name
|
|
52
|
+
const receiverType = receiver.text.replace(/^\(|\)$/g, '').trim().replace(/^\w+\s+\*?/, '');
|
|
53
|
+
if (receiverType) qualifiedName = `${receiverType}.${name}`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
nodes.push({
|
|
57
|
+
id: qualifiedName,
|
|
58
|
+
label: qualifiedName,
|
|
59
|
+
fileType: 'code',
|
|
60
|
+
sourceFile: filePath,
|
|
61
|
+
sourceLocation: loc(n),
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// ---- type declarations (struct, interface) ----
|
|
68
|
+
if (n.type === 'type_declaration') {
|
|
69
|
+
for (const child of n.children) {
|
|
70
|
+
if (child.type === 'type_spec') {
|
|
71
|
+
const nameNode = child.childForFieldName('name');
|
|
72
|
+
const typeNode = child.childForFieldName('type');
|
|
73
|
+
const typeName = nameNode?.text ?? '';
|
|
74
|
+
|
|
75
|
+
if (!typeName) continue;
|
|
76
|
+
|
|
77
|
+
const typeKind =
|
|
78
|
+
typeNode?.type === 'struct_type' ? 'struct'
|
|
79
|
+
: typeNode?.type === 'interface_type' ? 'interface'
|
|
80
|
+
: 'type';
|
|
81
|
+
|
|
82
|
+
nodes.push({
|
|
83
|
+
id: typeName,
|
|
84
|
+
label: typeName,
|
|
85
|
+
fileType: 'code',
|
|
86
|
+
sourceFile: filePath,
|
|
87
|
+
sourceLocation: loc(child),
|
|
88
|
+
nodeKind: typeKind,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ---- import declarations ----
|
|
96
|
+
if (n.type === 'import_declaration') {
|
|
97
|
+
for (const child of n.children) {
|
|
98
|
+
if (child.type === 'import_spec_list') {
|
|
99
|
+
for (const spec of child.children) {
|
|
100
|
+
if (spec.type === 'import_spec') {
|
|
101
|
+
const pathNode = spec.childForFieldName('path');
|
|
102
|
+
if (pathNode) {
|
|
103
|
+
const importPath = pathNode.text.replace(/^"|"$/g, '');
|
|
104
|
+
edges.push({
|
|
105
|
+
source: basename(filePath),
|
|
106
|
+
target: importPath,
|
|
107
|
+
relation: 'imports',
|
|
108
|
+
confidence: 'EXTRACTED',
|
|
109
|
+
sourceFile: filePath,
|
|
110
|
+
sourceLocation: loc(spec),
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
if (child.type === 'import_spec') {
|
|
117
|
+
const pathNode = child.childForFieldName('path');
|
|
118
|
+
if (pathNode) {
|
|
119
|
+
const importPath = pathNode.text.replace(/^"|"$/g, '');
|
|
120
|
+
edges.push({
|
|
121
|
+
source: basename(filePath),
|
|
122
|
+
target: importPath,
|
|
123
|
+
relation: 'imports',
|
|
124
|
+
confidence: 'EXTRACTED',
|
|
125
|
+
sourceFile: filePath,
|
|
126
|
+
sourceLocation: loc(child),
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ---- regex fallback ----
|
|
138
|
+
|
|
139
|
+
function extractWithRegex(filePath: string, content: string): ExtractionResult {
|
|
140
|
+
const nodes: GraphNode[] = [];
|
|
141
|
+
const edges: GraphEdge[] = [];
|
|
142
|
+
|
|
143
|
+
const lines = content.split('\n');
|
|
144
|
+
let inImportBlock = false;
|
|
145
|
+
|
|
146
|
+
lines.forEach((line, idx) => {
|
|
147
|
+
const location = `L${idx + 1}`;
|
|
148
|
+
const trimmed = line.trim();
|
|
149
|
+
|
|
150
|
+
// Detect import blocks
|
|
151
|
+
if (trimmed === 'import (') { inImportBlock = true; return; }
|
|
152
|
+
if (inImportBlock && trimmed === ')') { inImportBlock = false; return; }
|
|
153
|
+
|
|
154
|
+
if (inImportBlock) {
|
|
155
|
+
const importPathMatch = trimmed.match(/"([^"]+)"/);
|
|
156
|
+
if (importPathMatch) {
|
|
157
|
+
edges.push({ source: basename(filePath), target: importPathMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
|
|
158
|
+
}
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Single-line import: import "pkg"
|
|
163
|
+
const singleImportMatch = trimmed.match(/^import\s+"([^"]+)"/);
|
|
164
|
+
if (singleImportMatch) {
|
|
165
|
+
edges.push({ source: basename(filePath), target: singleImportMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Function: func (r *Recv) MethodName(...) or func FuncName(...)
|
|
169
|
+
const funcMatch = trimmed.match(/^func\s+(?:\(\w+\s+\*?(\w+)\)\s+)?(\w+)/);
|
|
170
|
+
if (funcMatch) {
|
|
171
|
+
const receiver = funcMatch[1];
|
|
172
|
+
const name = funcMatch[2];
|
|
173
|
+
const qualifiedName = receiver ? `${receiver}.${name}` : name;
|
|
174
|
+
nodes.push({ id: qualifiedName, label: qualifiedName, fileType: 'code', sourceFile: filePath, sourceLocation: location });
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Struct: type Name struct
|
|
178
|
+
const structMatch = trimmed.match(/^type\s+(\w+)\s+struct/);
|
|
179
|
+
if (structMatch) {
|
|
180
|
+
nodes.push({ id: structMatch[1], label: structMatch[1], fileType: 'code', sourceFile: filePath, sourceLocation: location, nodeKind: 'struct' });
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Interface: type Name interface
|
|
184
|
+
const ifaceMatch = trimmed.match(/^type\s+(\w+)\s+interface/);
|
|
185
|
+
if (ifaceMatch) {
|
|
186
|
+
nodes.push({ id: ifaceMatch[1], label: ifaceMatch[1], fileType: 'code', sourceFile: filePath, sourceLocation: location, nodeKind: 'interface' });
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors: [] };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// ---- extractor implementation ----
|
|
194
|
+
|
|
195
|
+
export const goExtractor: LanguageExtractor = {
|
|
196
|
+
language: 'go',
|
|
197
|
+
extensions: ['.go'],
|
|
198
|
+
|
|
199
|
+
extract(filePath: string, content: string): ExtractionResult {
|
|
200
|
+
const tsResult = extractWithTreeSitter(filePath, content);
|
|
201
|
+
if (tsResult.nodes.length > 0 || tsResult.edges.length > 0 || tsResult.errors.length > 0) {
|
|
202
|
+
return tsResult;
|
|
203
|
+
}
|
|
204
|
+
return extractWithRegex(filePath, content);
|
|
205
|
+
},
|
|
206
|
+
};
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import { basename } from 'path';
|
|
2
|
+
import type { GraphNode, GraphEdge, ExtractionResult } from '../../types.js';
|
|
3
|
+
import type { LanguageExtractor } from '../types.js';
|
|
4
|
+
import {
|
|
5
|
+
tryLoadParser,
|
|
6
|
+
walk,
|
|
7
|
+
type SyntaxNodeLike,
|
|
8
|
+
} from '../tree-sitter-runner.js';
|
|
9
|
+
|
|
10
|
+
// ---- helpers ----
|
|
11
|
+
|
|
12
|
+
function nodeName(node: SyntaxNodeLike): string {
|
|
13
|
+
const nameNode = node.childForFieldName('name');
|
|
14
|
+
return nameNode?.text ?? '';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function loc(node: SyntaxNodeLike): string {
|
|
18
|
+
return `L${node.startPosition.row + 1}`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// ---- tree-sitter extraction ----
|
|
22
|
+
|
|
23
|
+
function extractWithTreeSitter(filePath: string, content: string): ExtractionResult {
|
|
24
|
+
const nodes: GraphNode[] = [];
|
|
25
|
+
const edges: GraphEdge[] = [];
|
|
26
|
+
const errors: string[] = [];
|
|
27
|
+
|
|
28
|
+
const parser = tryLoadParser('python');
|
|
29
|
+
if (!parser) {
|
|
30
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
let tree: { rootNode: SyntaxNodeLike };
|
|
34
|
+
try {
|
|
35
|
+
tree = parser.parse(content);
|
|
36
|
+
} catch (err) {
|
|
37
|
+
errors.push(`tree-sitter parse error in ${filePath}: ${String(err)}`);
|
|
38
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Track method context for call attribution
|
|
42
|
+
const functionStack: string[] = [];
|
|
43
|
+
const classStack: string[] = [];
|
|
44
|
+
|
|
45
|
+
walk(tree.rootNode, (n) => {
|
|
46
|
+
// ---- class definitions ----
|
|
47
|
+
if (n.type === 'class_definition') {
|
|
48
|
+
const name = nodeName(n);
|
|
49
|
+
if (name) {
|
|
50
|
+
nodes.push({
|
|
51
|
+
id: name,
|
|
52
|
+
label: name,
|
|
53
|
+
fileType: 'code',
|
|
54
|
+
sourceFile: filePath,
|
|
55
|
+
sourceLocation: loc(n),
|
|
56
|
+
});
|
|
57
|
+
classStack.push(name);
|
|
58
|
+
|
|
59
|
+
// Inheritance: class A(B, C):
|
|
60
|
+
const argList = n.childForFieldName('superclasses');
|
|
61
|
+
if (argList) {
|
|
62
|
+
for (const child of argList.children) {
|
|
63
|
+
if (child.type === 'identifier' || child.type === 'attribute') {
|
|
64
|
+
edges.push({
|
|
65
|
+
source: name,
|
|
66
|
+
target: child.text,
|
|
67
|
+
relation: 'extends',
|
|
68
|
+
confidence: 'EXTRACTED',
|
|
69
|
+
sourceFile: filePath,
|
|
70
|
+
sourceLocation: loc(argList),
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ---- function / async function definitions ----
|
|
80
|
+
if (n.type === 'function_definition' || n.type === 'decorated_definition') {
|
|
81
|
+
// For decorated_definition, descend to find the inner function_definition
|
|
82
|
+
const funcNode =
|
|
83
|
+
n.type === 'decorated_definition'
|
|
84
|
+
? (n.children.find((c) => c.type === 'function_definition') ?? n)
|
|
85
|
+
: n;
|
|
86
|
+
|
|
87
|
+
const name = nodeName(funcNode);
|
|
88
|
+
if (name) {
|
|
89
|
+
const qualifiedName =
|
|
90
|
+
classStack.length > 0
|
|
91
|
+
? `${classStack[classStack.length - 1]}.${name}`
|
|
92
|
+
: name;
|
|
93
|
+
|
|
94
|
+
nodes.push({
|
|
95
|
+
id: qualifiedName,
|
|
96
|
+
label: qualifiedName,
|
|
97
|
+
fileType: 'code',
|
|
98
|
+
sourceFile: filePath,
|
|
99
|
+
sourceLocation: loc(funcNode),
|
|
100
|
+
});
|
|
101
|
+
functionStack.push(qualifiedName);
|
|
102
|
+
}
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ---- import statements ----
|
|
107
|
+
if (n.type === 'import_statement') {
|
|
108
|
+
// import X, import X as Y
|
|
109
|
+
for (const child of n.children) {
|
|
110
|
+
if (child.type === 'dotted_name' || child.type === 'aliased_import') {
|
|
111
|
+
const importedName =
|
|
112
|
+
child.type === 'aliased_import'
|
|
113
|
+
? child.childForFieldName('name')?.text ?? child.text
|
|
114
|
+
: child.text;
|
|
115
|
+
if (importedName) {
|
|
116
|
+
edges.push({
|
|
117
|
+
source: basename(filePath),
|
|
118
|
+
target: importedName,
|
|
119
|
+
relation: 'imports',
|
|
120
|
+
confidence: 'EXTRACTED',
|
|
121
|
+
sourceFile: filePath,
|
|
122
|
+
sourceLocation: loc(n),
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ---- from X import Y ----
|
|
131
|
+
if (n.type === 'import_from_statement') {
|
|
132
|
+
const moduleNode = n.childForFieldName('module_name');
|
|
133
|
+
const moduleName = moduleNode?.text ?? '';
|
|
134
|
+
|
|
135
|
+
if (moduleName) {
|
|
136
|
+
edges.push({
|
|
137
|
+
source: basename(filePath),
|
|
138
|
+
target: moduleName,
|
|
139
|
+
relation: 'imports',
|
|
140
|
+
confidence: 'EXTRACTED',
|
|
141
|
+
sourceFile: filePath,
|
|
142
|
+
sourceLocation: loc(n),
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Also emit edges for individual imported names
|
|
147
|
+
for (const child of n.children) {
|
|
148
|
+
if (child.type === 'dotted_name' || child.type === 'identifier') {
|
|
149
|
+
// Skip the module_name we already handled
|
|
150
|
+
if (child === moduleNode) continue;
|
|
151
|
+
edges.push({
|
|
152
|
+
source: filePath,
|
|
153
|
+
target: child.text,
|
|
154
|
+
relation: 'imports',
|
|
155
|
+
confidence: 'EXTRACTED',
|
|
156
|
+
sourceFile: filePath,
|
|
157
|
+
sourceLocation: loc(n),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
if (child.type === 'aliased_import') {
|
|
161
|
+
const importedName = child.childForFieldName('name')?.text ?? child.text;
|
|
162
|
+
if (importedName) {
|
|
163
|
+
edges.push({
|
|
164
|
+
source: filePath,
|
|
165
|
+
target: importedName,
|
|
166
|
+
relation: 'imports',
|
|
167
|
+
confidence: 'EXTRACTED',
|
|
168
|
+
sourceFile: filePath,
|
|
169
|
+
sourceLocation: loc(n),
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ---- call expressions ----
|
|
178
|
+
if (n.type === 'call') {
|
|
179
|
+
const fnNode = n.childForFieldName('function');
|
|
180
|
+
if (!fnNode) return;
|
|
181
|
+
|
|
182
|
+
const calleeName = fnNode.text;
|
|
183
|
+
const caller =
|
|
184
|
+
functionStack[functionStack.length - 1] ??
|
|
185
|
+
classStack[classStack.length - 1] ??
|
|
186
|
+
basename(filePath);
|
|
187
|
+
|
|
188
|
+
if (calleeName && caller && calleeName !== caller) {
|
|
189
|
+
edges.push({
|
|
190
|
+
source: caller,
|
|
191
|
+
target: calleeName,
|
|
192
|
+
relation: 'calls',
|
|
193
|
+
confidence: 'INFERRED',
|
|
194
|
+
confidenceScore: 0.7,
|
|
195
|
+
sourceFile: filePath,
|
|
196
|
+
sourceLocation: loc(n),
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ---- regex fallback ----
|
|
206
|
+
|
|
207
|
+
function extractWithRegex(filePath: string, content: string): ExtractionResult {
|
|
208
|
+
const nodes: GraphNode[] = [];
|
|
209
|
+
const edges: GraphEdge[] = [];
|
|
210
|
+
|
|
211
|
+
const lines = content.split('\n');
|
|
212
|
+
|
|
213
|
+
lines.forEach((line, idx) => {
|
|
214
|
+
const location = `L${idx + 1}`;
|
|
215
|
+
|
|
216
|
+
// class
|
|
217
|
+
const classMatch = line.match(/^class\s+(\w+)/);
|
|
218
|
+
if (classMatch) {
|
|
219
|
+
const name = classMatch[1];
|
|
220
|
+
nodes.push({ id: name, label: name, fileType: 'code', sourceFile: filePath, sourceLocation: location });
|
|
221
|
+
|
|
222
|
+
// inheritance: class A(B, C):
|
|
223
|
+
const inheritMatch = line.match(/^class\s+\w+\(([^)]+)\)/);
|
|
224
|
+
if (inheritMatch) {
|
|
225
|
+
for (const base of inheritMatch[1].split(',')) {
|
|
226
|
+
const baseName = base.trim();
|
|
227
|
+
if (baseName && baseName !== 'object') {
|
|
228
|
+
edges.push({ source: name, target: baseName, relation: 'extends', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// function
|
|
235
|
+
const funcMatch = line.match(/^(?:async\s+)?def\s+(\w+)/);
|
|
236
|
+
if (funcMatch) {
|
|
237
|
+
const name = funcMatch[1];
|
|
238
|
+
nodes.push({ id: name, label: name, fileType: 'code', sourceFile: filePath, sourceLocation: location });
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// import
|
|
242
|
+
const importMatch = line.match(/^import\s+(\S+)/);
|
|
243
|
+
if (importMatch) {
|
|
244
|
+
edges.push({ source: basename(filePath), target: importMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// from X import Y
|
|
248
|
+
const fromImportMatch = line.match(/^from\s+(\S+)\s+import/);
|
|
249
|
+
if (fromImportMatch) {
|
|
250
|
+
edges.push({ source: basename(filePath), target: fromImportMatch[1], relation: 'imports', confidence: 'EXTRACTED', sourceFile: filePath, sourceLocation: location });
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
return { nodes, edges, filesProcessed: 1, fromCache: 0, errors: [] };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ---- extractor implementation ----
|
|
258
|
+
|
|
259
|
+
export const pythonExtractor: LanguageExtractor = {
|
|
260
|
+
language: 'python',
|
|
261
|
+
extensions: ['.py', '.pyw'],
|
|
262
|
+
|
|
263
|
+
extract(filePath: string, content: string): ExtractionResult {
|
|
264
|
+
const tsResult = extractWithTreeSitter(filePath, content);
|
|
265
|
+
if (tsResult.nodes.length > 0 || tsResult.edges.length > 0 || tsResult.errors.length > 0) {
|
|
266
|
+
return tsResult;
|
|
267
|
+
}
|
|
268
|
+
return extractWithRegex(filePath, content);
|
|
269
|
+
},
|
|
270
|
+
};
|