@kernlang/review-python 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/LICENSE +661 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +7 -0
- package/dist/mapper.d.ts +8 -0
- package/dist/mapper.js +522 -0
- package/jest.config.js +10 -0
- package/package.json +28 -0
- package/src/index.ts +8 -0
- package/src/mapper.ts +618 -0
- package/tests/bilingual-v2.test.ts +153 -0
- package/tests/bilingual.test.ts +112 -0
- package/tsconfig.json +19 -0
- package/tsconfig.tsbuildinfo +1 -0
package/src/mapper.ts
ADDED
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Python Concept Mapper — tree-sitter based.
|
|
3
|
+
*
|
|
4
|
+
* Maps Python syntax → universal KERN concepts.
|
|
5
|
+
* Phase 1: error_raise, error_handle, effect
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import Parser from 'tree-sitter';
|
|
9
|
+
import Python from 'tree-sitter-python';
|
|
10
|
+
import type {
|
|
11
|
+
ConceptMap, ConceptNode, ConceptEdge, ConceptSpan,
|
|
12
|
+
ErrorHandlePayload, EntrypointPayload, GuardPayload, StateMutationPayload, DependencyPayload,
|
|
13
|
+
} from '@kernlang/core';
|
|
14
|
+
import { conceptId, conceptSpan } from '@kernlang/core';
|
|
15
|
+
|
|
16
|
+
const EXTRACTOR_VERSION = '1.0.0';
|
|
17
|
+
|
|
18
|
+
// ── Network call patterns ────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
const NETWORK_MODULES = new Set(['requests', 'httpx', 'aiohttp', 'urllib']);
|
|
21
|
+
const NETWORK_METHODS = new Set(['get', 'post', 'put', 'patch', 'delete', 'head', 'options', 'request', 'fetch']);
|
|
22
|
+
|
|
23
|
+
const DB_MODULES = new Set(['psycopg2', 'asyncpg', 'pymongo', 'sqlalchemy', 'django']);
|
|
24
|
+
const DB_METHODS = new Set(['execute', 'executemany', 'fetchone', 'fetchall', 'fetchmany', 'query', 'find', 'find_one', 'insert_one', 'insert_many', 'update_one', 'delete_one']);
|
|
25
|
+
|
|
26
|
+
const FS_FUNCTIONS = new Set(['open', 'read', 'write', 'readlines', 'writelines']);
|
|
27
|
+
|
|
28
|
+
const STDLIB_MODULES = new Set([
|
|
29
|
+
'os', 'sys', 'json', 're', 'math', 'datetime', 'time', 'logging', 'argparse',
|
|
30
|
+
'collections', 'itertools', 'functools', 'pathlib', 'shutil', 'subprocess',
|
|
31
|
+
'threading', 'multiprocessing', 'abc', 'typing', 'io', 'pickle', 'random',
|
|
32
|
+
'hashlib', 'hmac', 'base64', 'csv', 'sqlite3', 'zlib', 'gzip', 'tarfile', 'zipfile',
|
|
33
|
+
'enum', 'struct', 'tempfile', 'unittest', 'urllib', 'uuid', 'xml',
|
|
34
|
+
]);
|
|
35
|
+
|
|
36
|
+
// ── Parser setup ─────────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
let parser: Parser | null = null;
|
|
39
|
+
|
|
40
|
+
function getParser(): Parser {
|
|
41
|
+
if (!parser) {
|
|
42
|
+
parser = new Parser();
|
|
43
|
+
parser.setLanguage(Python as unknown as Parser.Language);
|
|
44
|
+
}
|
|
45
|
+
return parser;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ── Main Extractor ───────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
export function extractPythonConcepts(source: string, filePath: string): ConceptMap {
|
|
51
|
+
const tree = getParser().parse(source);
|
|
52
|
+
const nodes: ConceptNode[] = [];
|
|
53
|
+
const edges: ConceptEdge[] = [];
|
|
54
|
+
|
|
55
|
+
extractErrorRaise(tree.rootNode, source, filePath, nodes);
|
|
56
|
+
extractErrorHandle(tree.rootNode, source, filePath, nodes);
|
|
57
|
+
extractEffects(tree.rootNode, source, filePath, nodes);
|
|
58
|
+
|
|
59
|
+
extractEntrypoints(tree.rootNode, source, filePath, nodes);
|
|
60
|
+
extractGuards(tree.rootNode, source, filePath, nodes);
|
|
61
|
+
extractStateMutation(tree.rootNode, source, filePath, nodes);
|
|
62
|
+
extractDependencyEdges(tree.rootNode, source, filePath, edges);
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
filePath,
|
|
66
|
+
language: 'py',
|
|
67
|
+
nodes,
|
|
68
|
+
edges,
|
|
69
|
+
extractorVersion: EXTRACTOR_VERSION,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ── error_raise ──────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
function extractErrorRaise(
|
|
76
|
+
root: Parser.SyntaxNode,
|
|
77
|
+
source: string,
|
|
78
|
+
filePath: string,
|
|
79
|
+
nodes: ConceptNode[],
|
|
80
|
+
): void {
|
|
81
|
+
// raise statements
|
|
82
|
+
walkNodes(root, 'raise_statement', (node) => {
|
|
83
|
+
const errorType = extractRaiseType(node);
|
|
84
|
+
nodes.push({
|
|
85
|
+
id: conceptId(filePath, 'error_raise', node.startIndex),
|
|
86
|
+
kind: 'error_raise',
|
|
87
|
+
primarySpan: nodeSpan(filePath, node),
|
|
88
|
+
evidence: nodeText(source, node, 100),
|
|
89
|
+
confidence: 1.0,
|
|
90
|
+
language: 'py',
|
|
91
|
+
containerId: getContainerId(node, filePath),
|
|
92
|
+
payload: {
|
|
93
|
+
kind: 'error_raise',
|
|
94
|
+
subtype: 'throw', // Python raise ≡ throw
|
|
95
|
+
errorType,
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ── error_handle ─────────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
function extractErrorHandle(
|
|
104
|
+
root: Parser.SyntaxNode,
|
|
105
|
+
source: string,
|
|
106
|
+
filePath: string,
|
|
107
|
+
nodes: ConceptNode[],
|
|
108
|
+
): void {
|
|
109
|
+
// except clauses
|
|
110
|
+
walkNodes(root, 'except_clause', (node) => {
|
|
111
|
+
const block = node.children.find(c => c.type === 'block');
|
|
112
|
+
const disposition = classifyPythonDisposition(block, source);
|
|
113
|
+
const errorVar = extractExceptVar(node);
|
|
114
|
+
|
|
115
|
+
nodes.push({
|
|
116
|
+
id: conceptId(filePath, 'error_handle', node.startIndex),
|
|
117
|
+
kind: 'error_handle',
|
|
118
|
+
primarySpan: nodeSpan(filePath, node),
|
|
119
|
+
evidence: nodeText(source, node, 150),
|
|
120
|
+
confidence: disposition.confidence,
|
|
121
|
+
language: 'py',
|
|
122
|
+
containerId: getContainerId(node, filePath),
|
|
123
|
+
payload: {
|
|
124
|
+
kind: 'error_handle',
|
|
125
|
+
disposition: disposition.type,
|
|
126
|
+
errorVariable: errorVar,
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function classifyPythonDisposition(
|
|
133
|
+
block: Parser.SyntaxNode | undefined,
|
|
134
|
+
source: string,
|
|
135
|
+
): { type: ErrorHandlePayload['disposition']; confidence: number } {
|
|
136
|
+
if (!block) return { type: 'ignored', confidence: 1.0 };
|
|
137
|
+
|
|
138
|
+
const children = block.namedChildren;
|
|
139
|
+
|
|
140
|
+
// except: pass → ignored
|
|
141
|
+
if (children.length === 1 && children[0].type === 'pass_statement') {
|
|
142
|
+
return { type: 'ignored', confidence: 1.0 };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// except: ... (ellipsis) → ignored
|
|
146
|
+
if (children.length === 1 && children[0].type === 'expression_statement') {
|
|
147
|
+
const text = source.substring(children[0].startIndex, children[0].endIndex).trim();
|
|
148
|
+
if (text === '...') return { type: 'ignored', confidence: 1.0 };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Empty block
|
|
152
|
+
if (children.length === 0) {
|
|
153
|
+
return { type: 'ignored', confidence: 1.0 };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const bodyText = source.substring(block.startIndex, block.endIndex);
|
|
157
|
+
|
|
158
|
+
// raise → rethrown or wrapped
|
|
159
|
+
if (bodyText.includes('raise')) {
|
|
160
|
+
// bare `raise` → rethrown
|
|
161
|
+
if (/\braise\s*$|\braise\s*\n/m.test(bodyText)) {
|
|
162
|
+
return { type: 'rethrown', confidence: 0.95 };
|
|
163
|
+
}
|
|
164
|
+
return { type: 'wrapped', confidence: 0.9 };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// return → returned
|
|
168
|
+
if (bodyText.includes('return')) {
|
|
169
|
+
return { type: 'returned', confidence: 0.85 };
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// logging
|
|
173
|
+
if (/\b(logging|logger|log|print)\b/.test(bodyText)) {
|
|
174
|
+
if (children.length === 1) return { type: 'logged', confidence: 0.9 };
|
|
175
|
+
return { type: 'logged', confidence: 0.7 };
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return { type: 'wrapped', confidence: 0.5 };
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ── effect ───────────────────────────────────────────────────────────────
|
|
182
|
+
|
|
183
|
+
function extractEffects(
|
|
184
|
+
root: Parser.SyntaxNode,
|
|
185
|
+
source: string,
|
|
186
|
+
filePath: string,
|
|
187
|
+
nodes: ConceptNode[],
|
|
188
|
+
): void {
|
|
189
|
+
walkNodes(root, 'call', (node) => {
|
|
190
|
+
const funcNode = node.childForFieldName('function');
|
|
191
|
+
if (!funcNode) return;
|
|
192
|
+
|
|
193
|
+
const funcText = source.substring(funcNode.startIndex, funcNode.endIndex);
|
|
194
|
+
|
|
195
|
+
// Network: requests.get(), httpx.post(), etc.
|
|
196
|
+
if (funcNode.type === 'attribute') {
|
|
197
|
+
const obj = funcNode.childForFieldName('object');
|
|
198
|
+
const attr = funcNode.childForFieldName('attribute');
|
|
199
|
+
if (obj && attr) {
|
|
200
|
+
const objName = source.substring(obj.startIndex, obj.endIndex);
|
|
201
|
+
const methodName = source.substring(attr.startIndex, attr.endIndex);
|
|
202
|
+
|
|
203
|
+
if (NETWORK_MODULES.has(objName) && NETWORK_METHODS.has(methodName)) {
|
|
204
|
+
nodes.push({
|
|
205
|
+
id: conceptId(filePath, 'effect', node.startIndex),
|
|
206
|
+
kind: 'effect',
|
|
207
|
+
primarySpan: nodeSpan(filePath, node),
|
|
208
|
+
evidence: nodeText(source, node, 120),
|
|
209
|
+
confidence: 0.95,
|
|
210
|
+
language: 'py',
|
|
211
|
+
containerId: getContainerId(node, filePath),
|
|
212
|
+
payload: { kind: 'effect', subtype: 'network', async: isInAsyncDef(node) },
|
|
213
|
+
});
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// DB: cursor.execute(), db.query(), etc.
|
|
218
|
+
if (DB_METHODS.has(methodName) && (DB_MODULES.has(objName) || /cursor|conn|db|session|collection/i.test(objName))) {
|
|
219
|
+
nodes.push({
|
|
220
|
+
id: conceptId(filePath, 'effect', node.startIndex),
|
|
221
|
+
kind: 'effect',
|
|
222
|
+
primarySpan: nodeSpan(filePath, node),
|
|
223
|
+
evidence: nodeText(source, node, 120),
|
|
224
|
+
confidence: 0.85,
|
|
225
|
+
language: 'py',
|
|
226
|
+
containerId: getContainerId(node, filePath),
|
|
227
|
+
payload: { kind: 'effect', subtype: 'db', async: isInAsyncDef(node) },
|
|
228
|
+
});
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// FS: open()
|
|
235
|
+
if (funcText === 'open') {
|
|
236
|
+
nodes.push({
|
|
237
|
+
id: conceptId(filePath, 'effect', node.startIndex),
|
|
238
|
+
kind: 'effect',
|
|
239
|
+
primarySpan: nodeSpan(filePath, node),
|
|
240
|
+
evidence: nodeText(source, node, 120),
|
|
241
|
+
confidence: 0.9,
|
|
242
|
+
language: 'py',
|
|
243
|
+
containerId: getContainerId(node, filePath),
|
|
244
|
+
payload: { kind: 'effect', subtype: 'fs', async: false },
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// fetch() in async context (aiohttp pattern)
|
|
249
|
+
if (funcText === 'fetch' || funcText === 'aiohttp.request') {
|
|
250
|
+
nodes.push({
|
|
251
|
+
id: conceptId(filePath, 'effect', node.startIndex),
|
|
252
|
+
kind: 'effect',
|
|
253
|
+
primarySpan: nodeSpan(filePath, node),
|
|
254
|
+
evidence: nodeText(source, node, 120),
|
|
255
|
+
confidence: 0.8,
|
|
256
|
+
language: 'py',
|
|
257
|
+
containerId: getContainerId(node, filePath),
|
|
258
|
+
payload: { kind: 'effect', subtype: 'network', async: true },
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// ── entrypoint ──────────────────────────────────────────────────────────
|
|
265
|
+
|
|
266
|
+
function extractEntrypoints(
|
|
267
|
+
root: Parser.SyntaxNode,
|
|
268
|
+
source: string,
|
|
269
|
+
filePath: string,
|
|
270
|
+
nodes: ConceptNode[],
|
|
271
|
+
): void {
|
|
272
|
+
// 1. Route decorators: @app.route, @app.get, @router.post, etc.
|
|
273
|
+
// tree-sitter Python wraps decorated functions in 'decorated_definition'
|
|
274
|
+
walkNodes(root, 'decorated_definition', (node) => {
|
|
275
|
+
const fnDef = node.children.find(c => c.type === 'function_definition');
|
|
276
|
+
if (!fnDef) return;
|
|
277
|
+
|
|
278
|
+
for (const child of node.children) {
|
|
279
|
+
if (child.type !== 'decorator') continue;
|
|
280
|
+
const decText = source.substring(child.startIndex, child.endIndex);
|
|
281
|
+
|
|
282
|
+
const routeMatch = decText.match(/@(app|router|bp)\.(route|get|post|put|delete|patch)\s*\(/);
|
|
283
|
+
if (routeMatch) {
|
|
284
|
+
const method = routeMatch[2].toUpperCase();
|
|
285
|
+
const nameNode = fnDef.childForFieldName('name');
|
|
286
|
+
// Try to extract path from decorator args
|
|
287
|
+
const pathMatch = decText.match(/['"]([^'"]+)['"]/);
|
|
288
|
+
|
|
289
|
+
nodes.push({
|
|
290
|
+
id: conceptId(filePath, 'entrypoint', child.startIndex),
|
|
291
|
+
kind: 'entrypoint',
|
|
292
|
+
primarySpan: nodeSpan(filePath, child),
|
|
293
|
+
evidence: nodeText(source, child, 100),
|
|
294
|
+
confidence: 1.0,
|
|
295
|
+
language: 'py',
|
|
296
|
+
containerId: getContainerId(node, filePath),
|
|
297
|
+
payload: {
|
|
298
|
+
kind: 'entrypoint',
|
|
299
|
+
subtype: 'route',
|
|
300
|
+
name: nameNode ? nameNode.text : (pathMatch?.[1] || 'anonymous'),
|
|
301
|
+
httpMethod: method === 'ROUTE' ? undefined : method,
|
|
302
|
+
},
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
// 2. if __name__ == '__main__':
|
|
309
|
+
walkNodes(root, 'if_statement', (node) => {
|
|
310
|
+
const condition = node.childForFieldName('condition');
|
|
311
|
+
if (condition && condition.text.includes('__name__') && condition.text.includes('__main__')) {
|
|
312
|
+
nodes.push({
|
|
313
|
+
id: conceptId(filePath, 'entrypoint', node.startIndex),
|
|
314
|
+
kind: 'entrypoint',
|
|
315
|
+
primarySpan: nodeSpan(filePath, node),
|
|
316
|
+
evidence: nodeText(source, node, 100),
|
|
317
|
+
confidence: 1.0,
|
|
318
|
+
language: 'py',
|
|
319
|
+
payload: {
|
|
320
|
+
kind: 'entrypoint',
|
|
321
|
+
subtype: 'main',
|
|
322
|
+
name: 'main',
|
|
323
|
+
},
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// ── guard ───────────────────────────────────────────────────────────────
|
|
330
|
+
|
|
331
|
+
function extractGuards(
|
|
332
|
+
root: Parser.SyntaxNode,
|
|
333
|
+
source: string,
|
|
334
|
+
filePath: string,
|
|
335
|
+
nodes: ConceptNode[],
|
|
336
|
+
): void {
|
|
337
|
+
// 1. Auth decorators (tree-sitter: decorated_definition → decorator + function_definition)
|
|
338
|
+
walkNodes(root, 'decorated_definition', (node) => {
|
|
339
|
+
for (const child of node.children) {
|
|
340
|
+
if (child.type !== 'decorator') continue;
|
|
341
|
+
const decText = source.substring(child.startIndex, child.endIndex);
|
|
342
|
+
if (/@(login_required|requires_auth|permission_required|auth_required|authenticated)/.test(decText)) {
|
|
343
|
+
nodes.push({
|
|
344
|
+
id: conceptId(filePath, 'guard', child.startIndex),
|
|
345
|
+
kind: 'guard',
|
|
346
|
+
primarySpan: nodeSpan(filePath, child),
|
|
347
|
+
evidence: nodeText(source, child, 100),
|
|
348
|
+
confidence: 1.0,
|
|
349
|
+
language: 'py',
|
|
350
|
+
containerId: getContainerId(node, filePath),
|
|
351
|
+
payload: {
|
|
352
|
+
kind: 'guard',
|
|
353
|
+
subtype: 'auth',
|
|
354
|
+
name: decText.replace('@', '').split('(')[0].trim(),
|
|
355
|
+
},
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
// 2. Pydantic validation: BaseModel.model_validate()
|
|
362
|
+
walkNodes(root, 'call', (node) => {
|
|
363
|
+
const func = node.childForFieldName('function');
|
|
364
|
+
if (func && func.text.includes('model_validate')) {
|
|
365
|
+
nodes.push({
|
|
366
|
+
id: conceptId(filePath, 'guard', node.startIndex),
|
|
367
|
+
kind: 'guard',
|
|
368
|
+
primarySpan: nodeSpan(filePath, node),
|
|
369
|
+
evidence: nodeText(source, node, 100),
|
|
370
|
+
confidence: 0.9,
|
|
371
|
+
language: 'py',
|
|
372
|
+
containerId: getContainerId(node, filePath),
|
|
373
|
+
payload: { kind: 'guard', subtype: 'validation', name: 'pydantic' },
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// 3. Early return/raise after auth check: if not request.user: raise/return
|
|
379
|
+
walkNodes(root, 'if_statement', (node) => {
|
|
380
|
+
const cond = node.childForFieldName('condition');
|
|
381
|
+
if (cond && /\b(user|auth|request\.user)\b/.test(cond.text)) {
|
|
382
|
+
const block = node.namedChildren.find(c => c.type === 'block');
|
|
383
|
+
if (block) {
|
|
384
|
+
const firstStmt = block.namedChildren[0];
|
|
385
|
+
if (firstStmt && (firstStmt.type === 'return_statement' || firstStmt.type === 'raise_statement')) {
|
|
386
|
+
nodes.push({
|
|
387
|
+
id: conceptId(filePath, 'guard', node.startIndex),
|
|
388
|
+
kind: 'guard',
|
|
389
|
+
primarySpan: nodeSpan(filePath, node),
|
|
390
|
+
evidence: nodeText(source, node, 100),
|
|
391
|
+
confidence: 0.8,
|
|
392
|
+
language: 'py',
|
|
393
|
+
containerId: getContainerId(node, filePath),
|
|
394
|
+
payload: { kind: 'guard', subtype: 'auth' },
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// ── state_mutation ───────────────────────────────────────────────────────
|
|
403
|
+
|
|
404
|
+
function extractStateMutation(
|
|
405
|
+
root: Parser.SyntaxNode,
|
|
406
|
+
source: string,
|
|
407
|
+
filePath: string,
|
|
408
|
+
nodes: ConceptNode[],
|
|
409
|
+
): void {
|
|
410
|
+
// Track global keyword usage
|
|
411
|
+
const globalVarsInFile = new Set<string>();
|
|
412
|
+
walkNodes(root, 'global_statement', (node) => {
|
|
413
|
+
for (const child of node.namedChildren) {
|
|
414
|
+
if (child.type === 'identifier') globalVarsInFile.add(child.text);
|
|
415
|
+
}
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
walkNodes(root, 'assignment', (node) => {
|
|
419
|
+
const left = node.childForFieldName('left');
|
|
420
|
+
if (!left) return;
|
|
421
|
+
|
|
422
|
+
// self.x = ... → scope 'module' (as requested)
|
|
423
|
+
if (left.type === 'attribute') {
|
|
424
|
+
const obj = left.childForFieldName('object');
|
|
425
|
+
if (obj && obj.text === 'self') {
|
|
426
|
+
nodes.push({
|
|
427
|
+
id: conceptId(filePath, 'state_mutation', node.startIndex),
|
|
428
|
+
kind: 'state_mutation',
|
|
429
|
+
primarySpan: nodeSpan(filePath, node),
|
|
430
|
+
evidence: nodeText(source, node, 100),
|
|
431
|
+
confidence: 0.9,
|
|
432
|
+
language: 'py',
|
|
433
|
+
containerId: getContainerId(node, filePath),
|
|
434
|
+
payload: { kind: 'state_mutation', target: left.text, scope: 'module' },
|
|
435
|
+
});
|
|
436
|
+
return;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Global or Module level assignment
|
|
441
|
+
if (left.type === 'identifier') {
|
|
442
|
+
const name = left.text;
|
|
443
|
+
const containerId = getContainerId(node, filePath);
|
|
444
|
+
|
|
445
|
+
if (globalVarsInFile.has(name)) {
|
|
446
|
+
nodes.push({
|
|
447
|
+
id: conceptId(filePath, 'state_mutation', node.startIndex),
|
|
448
|
+
kind: 'state_mutation',
|
|
449
|
+
primarySpan: nodeSpan(filePath, node),
|
|
450
|
+
evidence: nodeText(source, node, 100),
|
|
451
|
+
confidence: 1.0,
|
|
452
|
+
language: 'py',
|
|
453
|
+
containerId,
|
|
454
|
+
payload: { kind: 'state_mutation', target: name, scope: 'global' },
|
|
455
|
+
});
|
|
456
|
+
} else if (!containerId) {
|
|
457
|
+
// Module level (top level)
|
|
458
|
+
nodes.push({
|
|
459
|
+
id: conceptId(filePath, 'state_mutation', node.startIndex),
|
|
460
|
+
kind: 'state_mutation',
|
|
461
|
+
primarySpan: nodeSpan(filePath, node),
|
|
462
|
+
evidence: nodeText(source, node, 100),
|
|
463
|
+
confidence: 0.8,
|
|
464
|
+
language: 'py',
|
|
465
|
+
payload: { kind: 'state_mutation', target: name, scope: 'module' },
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
});
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// ── dependency ──────────────────────────────────────────────────────────
|
|
473
|
+
|
|
474
|
+
function extractDependencyEdges(
|
|
475
|
+
root: Parser.SyntaxNode,
|
|
476
|
+
source: string,
|
|
477
|
+
filePath: string,
|
|
478
|
+
edges: ConceptEdge[],
|
|
479
|
+
): void {
|
|
480
|
+
const addDependency = (node: Parser.SyntaxNode, specifier: string) => {
|
|
481
|
+
let subtype: 'stdlib' | 'external' | 'internal' = 'external';
|
|
482
|
+
if (specifier.startsWith('.')) {
|
|
483
|
+
subtype = 'internal';
|
|
484
|
+
} else {
|
|
485
|
+
const rootModule = specifier.split('.')[0];
|
|
486
|
+
if (STDLIB_MODULES.has(rootModule)) {
|
|
487
|
+
subtype = 'stdlib';
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
edges.push({
|
|
492
|
+
id: `${filePath}#dep@${node.startIndex}`,
|
|
493
|
+
kind: 'dependency',
|
|
494
|
+
sourceId: filePath,
|
|
495
|
+
targetId: specifier,
|
|
496
|
+
primarySpan: nodeSpan(filePath, node),
|
|
497
|
+
evidence: nodeText(source, node, 100),
|
|
498
|
+
confidence: 1.0,
|
|
499
|
+
language: 'py',
|
|
500
|
+
payload: { kind: 'dependency', subtype, specifier },
|
|
501
|
+
});
|
|
502
|
+
};
|
|
503
|
+
|
|
504
|
+
walkNodes(root, 'import_statement', (node) => {
|
|
505
|
+
// import x, y as z
|
|
506
|
+
for (const child of node.namedChildren) {
|
|
507
|
+
if (child.type === 'dotted_name') {
|
|
508
|
+
addDependency(node, child.text);
|
|
509
|
+
} else if (child.type === 'aliased_import') {
|
|
510
|
+
const name = child.childForFieldName('name');
|
|
511
|
+
if (name) addDependency(node, name.text);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
walkNodes(root, 'import_from_statement', (node) => {
|
|
517
|
+
// from x import y
|
|
518
|
+
const moduleNode = node.childForFieldName('module_name');
|
|
519
|
+
const relativeMatch = node.text.match(/^from\s+(\.+)/);
|
|
520
|
+
let specifier = moduleNode ? moduleNode.text : '';
|
|
521
|
+
if (relativeMatch) {
|
|
522
|
+
specifier = relativeMatch[1] + specifier;
|
|
523
|
+
}
|
|
524
|
+
if (specifier) {
|
|
525
|
+
addDependency(node, specifier);
|
|
526
|
+
}
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
// ── Tree-sitter Helpers ──────────────────────────────────────────────────
|
|
531
|
+
|
|
532
|
+
function walkNodes(
|
|
533
|
+
root: Parser.SyntaxNode,
|
|
534
|
+
type: string,
|
|
535
|
+
callback: (node: Parser.SyntaxNode) => void,
|
|
536
|
+
): void {
|
|
537
|
+
const cursor = root.walk();
|
|
538
|
+
let reachedRoot = false;
|
|
539
|
+
while (true) {
|
|
540
|
+
if (cursor.nodeType === type) {
|
|
541
|
+
callback(cursor.currentNode);
|
|
542
|
+
}
|
|
543
|
+
if (cursor.gotoFirstChild()) continue;
|
|
544
|
+
if (cursor.gotoNextSibling()) continue;
|
|
545
|
+
while (true) {
|
|
546
|
+
if (!cursor.gotoParent()) { reachedRoot = true; break; }
|
|
547
|
+
if (cursor.gotoNextSibling()) break;
|
|
548
|
+
}
|
|
549
|
+
if (reachedRoot) break;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
function nodeSpan(filePath: string, node: Parser.SyntaxNode): ConceptSpan {
|
|
554
|
+
return conceptSpan(
|
|
555
|
+
filePath,
|
|
556
|
+
node.startPosition.row + 1,
|
|
557
|
+
node.startPosition.column + 1,
|
|
558
|
+
node.endPosition.row + 1,
|
|
559
|
+
node.endPosition.column + 1,
|
|
560
|
+
);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
function nodeText(source: string, node: Parser.SyntaxNode, maxLen: number): string {
|
|
564
|
+
return source.substring(node.startIndex, Math.min(node.endIndex, node.startIndex + maxLen));
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
function getContainerId(node: Parser.SyntaxNode, filePath: string): string | undefined {
|
|
568
|
+
let parent = node.parent;
|
|
569
|
+
while (parent) {
|
|
570
|
+
if (parent.type === 'function_definition' || parent.type === 'class_definition') {
|
|
571
|
+
const nameNode = parent.childForFieldName('name');
|
|
572
|
+
const name = nameNode ? nameNode.text : 'anonymous';
|
|
573
|
+
return `${filePath}#fn:${name}@${parent.startIndex}`;
|
|
574
|
+
}
|
|
575
|
+
parent = parent.parent;
|
|
576
|
+
}
|
|
577
|
+
return undefined;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
function extractRaiseType(node: Parser.SyntaxNode): string | undefined {
|
|
581
|
+
// raise ValueError("...") → "ValueError"
|
|
582
|
+
const callNode = node.namedChildren.find(c => c.type === 'call');
|
|
583
|
+
if (callNode) {
|
|
584
|
+
const func = callNode.childForFieldName('function');
|
|
585
|
+
if (func) return func.text;
|
|
586
|
+
}
|
|
587
|
+
// raise ValueError → just identifier
|
|
588
|
+
const ident = node.namedChildren.find(c => c.type === 'identifier');
|
|
589
|
+
if (ident) return ident.text;
|
|
590
|
+
return undefined;
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
function extractExceptVar(node: Parser.SyntaxNode): string | undefined {
|
|
594
|
+
// except Exception as e → "e"
|
|
595
|
+
for (const child of node.children) {
|
|
596
|
+
if (child.type === 'as_pattern') {
|
|
597
|
+
const alias = child.childForFieldName('alias');
|
|
598
|
+
if (alias) return alias.text;
|
|
599
|
+
}
|
|
600
|
+
// Also try direct identifier after 'as'
|
|
601
|
+
if (child.type === 'identifier' && child.previousSibling?.text === 'as') {
|
|
602
|
+
return child.text;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
return undefined;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
function isInAsyncDef(node: Parser.SyntaxNode): boolean {
|
|
609
|
+
let parent = node.parent;
|
|
610
|
+
while (parent) {
|
|
611
|
+
if (parent.type === 'function_definition') {
|
|
612
|
+
// Check for 'async' keyword before 'def'
|
|
613
|
+
return parent.children.some(c => c.type === 'async');
|
|
614
|
+
}
|
|
615
|
+
parent = parent.parent;
|
|
616
|
+
}
|
|
617
|
+
return false;
|
|
618
|
+
}
|