@kernlang/review-python 3.4.6-canary.44.1.a85ee2e8 → 3.4.6-canary.46.1.19dcfc19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/mapper/extractors/dependency.d.ts +3 -0
  2. package/dist/mapper/extractors/dependency.js +52 -0
  3. package/dist/mapper/extractors/effect.d.ts +3 -0
  4. package/dist/mapper/extractors/effect.js +74 -0
  5. package/dist/mapper/extractors/entrypoint.d.ts +3 -0
  6. package/dist/mapper/extractors/entrypoint.js +225 -0
  7. package/dist/mapper/extractors/error.d.ts +5 -0
  8. package/dist/mapper/extractors/error.js +129 -0
  9. package/dist/mapper/extractors/fastapi-pagination.d.ts +5 -0
  10. package/dist/mapper/extractors/fastapi-pagination.js +119 -0
  11. package/dist/mapper/extractors/fastapi-status.d.ts +6 -0
  12. package/dist/mapper/extractors/fastapi-status.js +115 -0
  13. package/dist/mapper/extractors/guard.d.ts +3 -0
  14. package/dist/mapper/extractors/guard.js +115 -0
  15. package/dist/mapper/extractors/pydantic.d.ts +13 -0
  16. package/dist/mapper/extractors/pydantic.js +61 -0
  17. package/dist/mapper/extractors/state-mutation.d.ts +3 -0
  18. package/dist/mapper/extractors/state-mutation.js +63 -0
  19. package/dist/mapper/helpers/ast.d.ts +9 -0
  20. package/dist/mapper/helpers/ast.js +62 -0
  21. package/dist/mapper/helpers/types.d.ts +7 -0
  22. package/dist/mapper/helpers/types.js +168 -0
  23. package/dist/mapper/index.d.ts +8 -0
  24. package/dist/mapper/index.js +42 -0
  25. package/dist/mapper/signatures.d.ts +17 -0
  26. package/dist/mapper/signatures.js +87 -0
  27. package/dist/mapper.d.ts +1 -8
  28. package/dist/mapper.js +1 -1286
  29. package/package.json +3 -3
  30. package/src/mapper/extractors/dependency.ts +60 -0
  31. package/src/mapper/extractors/effect.ts +84 -0
  32. package/src/mapper/extractors/entrypoint.ts +272 -0
  33. package/src/mapper/extractors/error.ts +152 -0
  34. package/src/mapper/extractors/fastapi-pagination.ts +117 -0
  35. package/src/mapper/extractors/fastapi-status.ts +119 -0
  36. package/src/mapper/extractors/guard.ts +114 -0
  37. package/src/mapper/extractors/pydantic.ts +74 -0
  38. package/src/mapper/extractors/state-mutation.ts +72 -0
  39. package/src/mapper/helpers/ast.ts +72 -0
  40. package/src/mapper/helpers/types.ts +164 -0
  41. package/src/mapper/index.ts +50 -0
  42. package/src/mapper/signatures.ts +94 -0
  43. package/src/mapper.ts +1 -1388
  44. package/tsconfig.tsbuildinfo +1 -1
package/src/mapper.ts CHANGED
@@ -1,1388 +1 @@
1
- /**
2
- * Python Concept Mapper — tree-sitter based.
3
- *
4
- * Maps Python syntax → universal KERN concepts.
5
- * Phase 1: error_raise, error_handle, effect
6
- */
7
-
8
- import type { ConceptEdge, ConceptMap, ConceptNode, ConceptSpan, ErrorHandlePayload } from '@kernlang/core';
9
- import { conceptId, conceptSpan } from '@kernlang/core';
10
- import Parser from 'tree-sitter';
11
- import Python from 'tree-sitter-python';
12
-
13
- const EXTRACTOR_VERSION = '1.0.0';
14
-
15
- // ── Network call patterns ────────────────────────────────────────────────
16
-
17
- const NETWORK_MODULES = new Set(['requests', 'httpx', 'aiohttp', 'urllib']);
18
- const NETWORK_METHODS = new Set(['get', 'post', 'put', 'patch', 'delete', 'head', 'options', 'request', 'fetch']);
19
-
20
- const DB_MODULES = new Set(['psycopg2', 'asyncpg', 'pymongo', 'sqlalchemy', 'django']);
21
- const DB_METHODS = new Set([
22
- 'execute',
23
- 'executemany',
24
- 'fetchone',
25
- 'fetchall',
26
- 'fetchmany',
27
- 'query',
28
- 'find',
29
- 'find_one',
30
- 'insert_one',
31
- 'insert_many',
32
- 'update_one',
33
- 'delete_one',
34
- ]);
35
-
36
- const _FS_FUNCTIONS = new Set(['open', 'read', 'write', 'readlines', 'writelines']);
37
-
38
- type FieldTypeTag = 'string' | 'number' | 'boolean' | 'null' | 'object' | 'array' | 'unknown';
39
- type FieldTypeMap = Readonly<Record<string, FieldTypeTag>>;
40
-
41
- interface PydanticModel {
42
- fields: readonly string[];
43
- types: FieldTypeMap;
44
- }
45
-
46
- interface PythonRouteAnalysis {
47
- errorStatusCodes?: readonly number[];
48
- successStatusCodes?: readonly number[];
49
- successStatusCodesResolved?: boolean;
50
- paginationStrategy?: 'page' | 'offset' | 'cursor' | 'mixed' | 'none';
51
- paginationStrategyResolved?: boolean;
52
- hasUnboundedCollectionQuery?: boolean;
53
- hasDbWrite?: boolean;
54
- hasIdempotencyProtection?: boolean;
55
- hasBodyValidation?: boolean;
56
- validatedBodyFields?: readonly string[];
57
- bodyValidationResolved?: boolean;
58
- validatedBodyFieldTypes?: FieldTypeMap;
59
- }
60
-
61
- const PY_API_ERROR_STATUS_CODES = new Set([401, 403, 404, 422, 500]);
62
- const PY_API_SUCCESS_STATUS_CODES = new Set([200, 201, 202, 204, 206]);
63
- // FastAPI's documented default success status is 200, regardless of HTTP method
64
- // (Codex plan-review #1, FastAPI docs:
65
- // https://fastapi.tiangolo.com/tutorial/response-status-code/). 201 for POST is
66
- // a per-route opt-in via `status_code=201`, not a method-derived default.
67
- const FASTAPI_DEFAULT_SUCCESS_STATUS = 200;
68
- // Pagination anchor families — mirror the TS classification in
69
- // `packages/review/src/concept-rules/cross-stack-utils.ts`. The size keys
70
- // (`limit`, `take`, `page_size`, `per_page`) are intentionally NOT anchors
71
- // — they're compatible with either offset or cursor pagination.
72
- const PY_PAGE_ANCHORS = new Set(['page', 'page_number', 'pageNumber']);
73
- const PY_OFFSET_ANCHORS = new Set(['offset', 'skip']);
74
- const PY_CURSOR_ANCHORS = new Set(['cursor', 'after', 'before', 'next', 'previous']);
75
- const PY_PAGINATION_RE = /\b(limit|offset|skip|cursor|page|page_size|per_page)\b|\.limit\s*\(/i;
76
- const PY_DB_COLLECTION_RE = /\.(find|all|fetchall|to_list|scalars)\s*\(|\bselect\s*\(/i;
77
- const PY_DB_WRITE_RE =
78
- /\.(insert_one|insert_many|update_one|update_many|delete_one|delete_many|add|create|save|commit)\s*\(/i;
79
- const PY_IDEMPOTENCY_RE =
80
- /\b(idempotency(?:[_-]?key)?|Idempotency-Key|transaction|unique|upsert|get_or_create|on_conflict)\b/i;
81
-
82
- const STDLIB_MODULES = new Set([
83
- 'os',
84
- 'sys',
85
- 'json',
86
- 're',
87
- 'math',
88
- 'datetime',
89
- 'time',
90
- 'logging',
91
- 'argparse',
92
- 'collections',
93
- 'itertools',
94
- 'functools',
95
- 'pathlib',
96
- 'shutil',
97
- 'subprocess',
98
- 'threading',
99
- 'multiprocessing',
100
- 'abc',
101
- 'typing',
102
- 'io',
103
- 'pickle',
104
- 'random',
105
- 'hashlib',
106
- 'hmac',
107
- 'base64',
108
- 'csv',
109
- 'sqlite3',
110
- 'zlib',
111
- 'gzip',
112
- 'tarfile',
113
- 'zipfile',
114
- 'enum',
115
- 'struct',
116
- 'tempfile',
117
- 'unittest',
118
- 'urllib',
119
- 'uuid',
120
- 'xml',
121
- ]);
122
-
123
- // ── Parser setup ─────────────────────────────────────────────────────────
124
-
125
- let parser: Parser | null = null;
126
-
127
- function getParser(): Parser {
128
- if (!parser) {
129
- parser = new Parser();
130
- parser.setLanguage(Python as unknown as Parser.Language);
131
- }
132
- return parser;
133
- }
134
-
135
- // ── Main Extractor ───────────────────────────────────────────────────────
136
-
137
- export function extractPythonConcepts(source: string, filePath: string): ConceptMap {
138
- const tree = getParser().parse(source);
139
- const nodes: ConceptNode[] = [];
140
- const edges: ConceptEdge[] = [];
141
-
142
- extractErrorRaise(tree.rootNode, source, filePath, nodes);
143
- extractErrorHandle(tree.rootNode, source, filePath, nodes);
144
- extractEffects(tree.rootNode, source, filePath, nodes);
145
-
146
- extractEntrypoints(tree.rootNode, source, filePath, nodes);
147
- extractGuards(tree.rootNode, source, filePath, nodes);
148
- extractStateMutation(tree.rootNode, source, filePath, nodes);
149
- extractDependencyEdges(tree.rootNode, source, filePath, edges);
150
-
151
- return {
152
- filePath,
153
- language: 'py',
154
- nodes,
155
- edges,
156
- extractorVersion: EXTRACTOR_VERSION,
157
- };
158
- }
159
-
160
- // ── error_raise ──────────────────────────────────────────────────────────
161
-
162
- function extractErrorRaise(root: Parser.SyntaxNode, source: string, filePath: string, nodes: ConceptNode[]): void {
163
- // raise statements
164
- walkNodes(root, 'raise_statement', (node) => {
165
- const errorType = extractRaiseType(node);
166
- nodes.push({
167
- id: conceptId(filePath, 'error_raise', node.startIndex),
168
- kind: 'error_raise',
169
- primarySpan: nodeSpan(filePath, node),
170
- evidence: nodeText(source, node, 100),
171
- confidence: 1.0,
172
- language: 'py',
173
- containerId: getContainerId(node, filePath),
174
- payload: {
175
- kind: 'error_raise',
176
- subtype: 'throw', // Python raise ≡ throw
177
- errorType,
178
- },
179
- });
180
- });
181
- }
182
-
183
- // ── error_handle ─────────────────────────────────────────────────────────
184
-
185
- function extractErrorHandle(root: Parser.SyntaxNode, source: string, filePath: string, nodes: ConceptNode[]): void {
186
- // except clauses
187
- walkNodes(root, 'except_clause', (node) => {
188
- const block = node.children.find((c) => c.type === 'block');
189
- const disposition = classifyPythonDisposition(block, source);
190
- const errorVar = extractExceptVar(node);
191
-
192
- nodes.push({
193
- id: conceptId(filePath, 'error_handle', node.startIndex),
194
- kind: 'error_handle',
195
- primarySpan: nodeSpan(filePath, node),
196
- evidence: nodeText(source, node, 150),
197
- confidence: disposition.confidence,
198
- language: 'py',
199
- containerId: getContainerId(node, filePath),
200
- payload: {
201
- kind: 'error_handle',
202
- disposition: disposition.type,
203
- errorVariable: errorVar,
204
- },
205
- });
206
- });
207
- }
208
-
209
- function classifyPythonDisposition(
210
- block: Parser.SyntaxNode | undefined,
211
- source: string,
212
- ): { type: ErrorHandlePayload['disposition']; confidence: number } {
213
- if (!block) return { type: 'ignored', confidence: 1.0 };
214
-
215
- const children = block.namedChildren;
216
-
217
- // except: pass → ignored
218
- if (children.length === 1 && children[0].type === 'pass_statement') {
219
- return { type: 'ignored', confidence: 1.0 };
220
- }
221
-
222
- // except: ... (ellipsis) → ignored
223
- if (children.length === 1 && children[0].type === 'expression_statement') {
224
- const text = source.substring(children[0].startIndex, children[0].endIndex).trim();
225
- if (text === '...') return { type: 'ignored', confidence: 1.0 };
226
- }
227
-
228
- // Empty block
229
- if (children.length === 0) {
230
- return { type: 'ignored', confidence: 1.0 };
231
- }
232
-
233
- const bodyText = source.substring(block.startIndex, block.endIndex);
234
-
235
- // raise → rethrown or wrapped
236
- if (bodyText.includes('raise')) {
237
- // bare `raise` → rethrown
238
- if (/\braise\s*$|\braise\s*\n/m.test(bodyText)) {
239
- return { type: 'rethrown', confidence: 0.95 };
240
- }
241
- return { type: 'wrapped', confidence: 0.9 };
242
- }
243
-
244
- // return → returned
245
- if (bodyText.includes('return')) {
246
- return { type: 'returned', confidence: 0.85 };
247
- }
248
-
249
- // logging
250
- if (/\b(logging|logger|log|print)\b/.test(bodyText)) {
251
- if (children.length === 1) return { type: 'logged', confidence: 0.9 };
252
- return { type: 'logged', confidence: 0.7 };
253
- }
254
-
255
- return { type: 'wrapped', confidence: 0.5 };
256
- }
257
-
258
- // ── effect ───────────────────────────────────────────────────────────────
259
-
260
- function extractEffects(root: Parser.SyntaxNode, source: string, filePath: string, nodes: ConceptNode[]): void {
261
- walkNodes(root, 'call', (node) => {
262
- const funcNode = node.childForFieldName('function');
263
- if (!funcNode) return;
264
-
265
- const funcText = source.substring(funcNode.startIndex, funcNode.endIndex);
266
-
267
- // Network: requests.get(), httpx.post(), etc.
268
- if (funcNode.type === 'attribute') {
269
- const obj = funcNode.childForFieldName('object');
270
- const attr = funcNode.childForFieldName('attribute');
271
- if (obj && attr) {
272
- const objName = source.substring(obj.startIndex, obj.endIndex);
273
- const methodName = source.substring(attr.startIndex, attr.endIndex);
274
-
275
- if (NETWORK_MODULES.has(objName) && NETWORK_METHODS.has(methodName)) {
276
- nodes.push({
277
- id: conceptId(filePath, 'effect', node.startIndex),
278
- kind: 'effect',
279
- primarySpan: nodeSpan(filePath, node),
280
- evidence: nodeText(source, node, 120),
281
- confidence: 0.95,
282
- language: 'py',
283
- containerId: getContainerId(node, filePath),
284
- payload: { kind: 'effect', subtype: 'network', async: isInAsyncDef(node) },
285
- });
286
- return;
287
- }
288
-
289
- // DB: cursor.execute(), db.query(), etc.
290
- if (
291
- DB_METHODS.has(methodName) &&
292
- (DB_MODULES.has(objName) || /cursor|conn|db|session|collection/i.test(objName))
293
- ) {
294
- nodes.push({
295
- id: conceptId(filePath, 'effect', node.startIndex),
296
- kind: 'effect',
297
- primarySpan: nodeSpan(filePath, node),
298
- evidence: nodeText(source, node, 120),
299
- confidence: 0.85,
300
- language: 'py',
301
- containerId: getContainerId(node, filePath),
302
- payload: { kind: 'effect', subtype: 'db', async: isInAsyncDef(node) },
303
- });
304
- return;
305
- }
306
- }
307
- }
308
-
309
- // FS: open()
310
- if (funcText === 'open') {
311
- nodes.push({
312
- id: conceptId(filePath, 'effect', node.startIndex),
313
- kind: 'effect',
314
- primarySpan: nodeSpan(filePath, node),
315
- evidence: nodeText(source, node, 120),
316
- confidence: 0.9,
317
- language: 'py',
318
- containerId: getContainerId(node, filePath),
319
- payload: { kind: 'effect', subtype: 'fs', async: false },
320
- });
321
- }
322
-
323
- // fetch() in async context (aiohttp pattern)
324
- if (funcText === 'fetch' || funcText === 'aiohttp.request') {
325
- nodes.push({
326
- id: conceptId(filePath, 'effect', node.startIndex),
327
- kind: 'effect',
328
- primarySpan: nodeSpan(filePath, node),
329
- evidence: nodeText(source, node, 120),
330
- confidence: 0.8,
331
- language: 'py',
332
- containerId: getContainerId(node, filePath),
333
- payload: { kind: 'effect', subtype: 'network', async: true },
334
- });
335
- }
336
- });
337
- }
338
-
339
- // ── entrypoint ──────────────────────────────────────────────────────────
340
-
341
- function extractEntrypoints(root: Parser.SyntaxNode, source: string, filePath: string, nodes: ConceptNode[]): void {
342
- const pydanticModels = collectPydanticModels(source);
343
-
344
- // FastAPI / Flask route decorators.
345
- //
346
- // The route *path* (e.g. `/current`) is what cross-stack rules need to
347
- // match against — not the Python function name. Prior to 2026-04-21 this
348
- // emitted the function name, which `collectRoutes` then silently dropped
349
- // (it filters on paths starting with `/`). The FastAPI router-prefix join
350
- // in `cross-stack-utils.collectRoutes` also needs `routerName` so it can
351
- // pair per-file routes with the `include_router(prefix=…)` call that
352
- // mounts them.
353
- walkNodes(root, 'decorated_definition', (node) => {
354
- const fnDef = node.children.find((c) => c.type === 'function_definition');
355
- if (!fnDef) return;
356
-
357
- for (const child of node.children) {
358
- if (child.type !== 'decorator') continue;
359
- const decText = source.substring(child.startIndex, child.endIndex);
360
-
361
- const routeMatch = decText.match(/@(\w+)\.(route|get|post|put|delete|patch)\s*\(/);
362
- if (!routeMatch) continue;
363
-
364
- const routerName = routeMatch[1];
365
- const method = routeMatch[2].toUpperCase();
366
- const pathMatch = decText.match(/['"]([^'"]+)['"]/);
367
- const routePath = pathMatch?.[1];
368
- // Only surface the decorator as a route when we could extract a URL
369
- // path literal. Mystery decorators with only kwargs (e.g. `@app.get`
370
- // stub) are noise — skip them instead of filling `name` with the
371
- // function name, which cross-stack routes treat as invalid.
372
- if (!routePath?.startsWith('/')) continue;
373
-
374
- const responseModel = extractResponseModel(decText);
375
- const routeContainerId = getSelfContainerId(fnDef, filePath);
376
- const routeAnalysis = analyzePythonRoute(
377
- fnDef,
378
- source,
379
- method,
380
- routePath,
381
- responseModel,
382
- pydanticModels,
383
- decText,
384
- );
385
-
386
- nodes.push({
387
- id: conceptId(filePath, 'entrypoint', child.startIndex),
388
- kind: 'entrypoint',
389
- primarySpan: nodeSpan(filePath, child),
390
- evidence: nodeText(source, child, 100),
391
- confidence: 1.0,
392
- language: 'py',
393
- containerId: routeContainerId,
394
- payload: {
395
- kind: 'entrypoint',
396
- subtype: 'route',
397
- name: routePath,
398
- httpMethod: method === 'ROUTE' ? undefined : method,
399
- responseModel,
400
- isAsync: isAsyncFunction(fnDef),
401
- routerName,
402
- errorStatusCodes: routeAnalysis.errorStatusCodes,
403
- successStatusCodes: routeAnalysis.successStatusCodes,
404
- successStatusCodesResolved: routeAnalysis.successStatusCodesResolved,
405
- paginationStrategy: routeAnalysis.paginationStrategy,
406
- paginationStrategyResolved: routeAnalysis.paginationStrategyResolved,
407
- hasUnboundedCollectionQuery: routeAnalysis.hasUnboundedCollectionQuery,
408
- hasDbWrite: routeAnalysis.hasDbWrite,
409
- hasIdempotencyProtection: routeAnalysis.hasIdempotencyProtection,
410
- hasBodyValidation: routeAnalysis.hasBodyValidation,
411
- validatedBodyFields: routeAnalysis.validatedBodyFields,
412
- bodyValidationResolved: routeAnalysis.bodyValidationResolved,
413
- validatedBodyFieldTypes: routeAnalysis.validatedBodyFieldTypes,
414
- },
415
- });
416
- }
417
- });
418
-
419
- // FastAPI `app.include_router(<module>.<router>, prefix="/api/x")`.
420
- //
421
- // Emitted as a route-mount concept so `collectRoutes` can join it with
422
- // the per-file route nodes: a route declared on `router` in
423
- // `app/api/nutrition_goals.py` and mounted in `main.py` with
424
- // `app.include_router(nutrition_goals.router, prefix="/api/nutrition-goals")`
425
- // should resolve to the full URL `/api/nutrition-goals/<path>`.
426
- walkNodes(root, 'call', (node) => {
427
- const fn = node.childForFieldName('function');
428
- if (!fn) return;
429
- const fnText = source.substring(fn.startIndex, fn.endIndex);
430
- if (!/\.include_router$/.test(fnText)) return;
431
- const argsNode = node.childForFieldName('arguments');
432
- if (!argsNode) return;
433
- const argsText = source.substring(argsNode.startIndex, argsNode.endIndex);
434
-
435
- // First positional arg is the router. Common shapes:
436
- // include_router(router) — local identifier
437
- // include_router(nutrition_goals.router) — imported-module attribute
438
- // include_router(auth_router) — aliased local identifier
439
- const posMatch = argsText.match(/^\(\s*([A-Za-z_][\w.]*)/);
440
- if (!posMatch) return;
441
- const routerRef = posMatch[1];
442
- const dot = routerRef.lastIndexOf('.');
443
- const sourceModule = dot === -1 ? undefined : routerRef.slice(0, dot);
444
- const routerName = dot === -1 ? routerRef : routerRef.slice(dot + 1);
445
-
446
- const prefixMatch = argsText.match(/prefix\s*=\s*['"]([^'"]*)['"]/);
447
- // Prefix defaults to '' when omitted — still valid (the route keeps its
448
- // declared path as-is), so emit the mount either way.
449
- const prefix = prefixMatch?.[1] ?? '';
450
-
451
- nodes.push({
452
- id: conceptId(filePath, 'entrypoint', node.startIndex),
453
- kind: 'entrypoint',
454
- primarySpan: nodeSpan(filePath, node),
455
- evidence: nodeText(source, node, 120),
456
- confidence: 0.95,
457
- language: 'py',
458
- payload: {
459
- kind: 'entrypoint',
460
- subtype: 'route-mount',
461
- name: prefix,
462
- routerName,
463
- sourceModule,
464
- },
465
- });
466
- });
467
-
468
- // `if __name__ == '__main__':`
469
- walkNodes(root, 'if_statement', (node) => {
470
- const condition = node.childForFieldName('condition');
471
- if (condition?.text.includes('__name__') && condition.text.includes('__main__')) {
472
- nodes.push({
473
- id: conceptId(filePath, 'entrypoint', node.startIndex),
474
- kind: 'entrypoint',
475
- primarySpan: nodeSpan(filePath, node),
476
- evidence: nodeText(source, node, 100),
477
- confidence: 1.0,
478
- language: 'py',
479
- payload: {
480
- kind: 'entrypoint',
481
- subtype: 'main',
482
- name: 'main',
483
- },
484
- });
485
- }
486
- });
487
- }
488
-
489
- // ── guard ───────────────────────────────────────────────────────────────
490
-
491
- function extractGuards(root: Parser.SyntaxNode, source: string, filePath: string, nodes: ConceptNode[]): void {
492
- // 1. Auth decorators (tree-sitter: decorated_definition → decorator + function_definition)
493
- walkNodes(root, 'decorated_definition', (node) => {
494
- for (const child of node.children) {
495
- if (child.type !== 'decorator') continue;
496
- const decText = source.substring(child.startIndex, child.endIndex);
497
- if (/@(login_required|requires_auth|permission_required|auth_required|authenticated)/.test(decText)) {
498
- nodes.push({
499
- id: conceptId(filePath, 'guard', child.startIndex),
500
- kind: 'guard',
501
- primarySpan: nodeSpan(filePath, child),
502
- evidence: nodeText(source, child, 100),
503
- confidence: 1.0,
504
- language: 'py',
505
- containerId: getContainerId(node, filePath),
506
- payload: {
507
- kind: 'guard',
508
- subtype: 'auth',
509
- name: decText.replace('@', '').split('(')[0].trim(),
510
- },
511
- });
512
- }
513
- }
514
- });
515
-
516
- // 2. Pydantic validation: BaseModel.model_validate()
517
- walkNodes(root, 'call', (node) => {
518
- const func = node.childForFieldName('function');
519
- if (func?.text.includes('model_validate')) {
520
- nodes.push({
521
- id: conceptId(filePath, 'guard', node.startIndex),
522
- kind: 'guard',
523
- primarySpan: nodeSpan(filePath, node),
524
- evidence: nodeText(source, node, 100),
525
- confidence: 0.9,
526
- language: 'py',
527
- containerId: getContainerId(node, filePath),
528
- payload: { kind: 'guard', subtype: 'validation', name: 'pydantic' },
529
- });
530
- }
531
- });
532
-
533
- // 3. FastAPI `Depends(...)` injection — route handler parameter with a
534
- // `Depends` default is the idiomatic FastAPI auth/validation guard.
535
- // Example:
536
- // @router.get("/me")
537
- // def me(user: User = Depends(get_current_user)):
538
- // Classified by the dependency function name:
539
- // - `get_current_user` / `current_user` / `require_auth` / `*_user` → 'auth'
540
- // - `verify_*` / `validate_*` → 'validation'
541
- // - `rate_limit_*` / `check_rate_limit` → 'rate-limit'
542
- // - everything else → 'policy'
543
- // Feeds the `auth-drift` cross-stack rule.
544
- walkNodes(root, 'default_parameter', (node) => {
545
- const val = node.childForFieldName('value');
546
- if (!val || val.type !== 'call') return;
547
- const func = val.childForFieldName('function');
548
- if (!func || func.text !== 'Depends') return;
549
- const args = val.childForFieldName('arguments');
550
- if (!args) return;
551
- const posArg = args.namedChildren.find((c) => c.type === 'identifier' || c.type === 'attribute');
552
- const depName = posArg ? posArg.text : 'Depends';
553
- const subtype = classifyDependency(depName);
554
- nodes.push({
555
- id: conceptId(filePath, 'guard', node.startIndex),
556
- kind: 'guard',
557
- primarySpan: nodeSpan(filePath, node),
558
- evidence: nodeText(source, node, 120),
559
- confidence: 0.85,
560
- language: 'py',
561
- containerId: getContainerId(node, filePath),
562
- payload: { kind: 'guard', subtype, name: depName },
563
- });
564
- });
565
-
566
- // 4. Early return/raise after auth check: if not request.user: raise/return
567
- walkNodes(root, 'if_statement', (node) => {
568
- const cond = node.childForFieldName('condition');
569
- if (cond && /\b(user|auth|request\.user)\b/.test(cond.text)) {
570
- const block = node.namedChildren.find((c) => c.type === 'block');
571
- if (block) {
572
- const firstStmt = block.namedChildren[0];
573
- if (firstStmt && (firstStmt.type === 'return_statement' || firstStmt.type === 'raise_statement')) {
574
- nodes.push({
575
- id: conceptId(filePath, 'guard', node.startIndex),
576
- kind: 'guard',
577
- primarySpan: nodeSpan(filePath, node),
578
- evidence: nodeText(source, node, 100),
579
- confidence: 0.8,
580
- language: 'py',
581
- containerId: getContainerId(node, filePath),
582
- payload: { kind: 'guard', subtype: 'auth' },
583
- });
584
- }
585
- }
586
- }
587
- });
588
- }
589
-
590
- function classifyDependency(depName: string): 'auth' | 'validation' | 'rate-limit' | 'policy' {
591
- // Strip module prefix (`auth.get_current_user` → `get_current_user`) so the
592
- // heuristic looks at the final identifier where intent usually lives.
593
- const tail = depName.split('.').pop() ?? depName;
594
- if (/^(get_current_user|current_user|require_auth|authenticated|is_authenticated)$/i.test(tail)) return 'auth';
595
- if (/_user$|^user$|auth/i.test(tail)) return 'auth';
596
- if (/^(verify_|validate_)/i.test(tail)) return 'validation';
597
- if (/rate_?limit/i.test(tail)) return 'rate-limit';
598
- return 'policy';
599
- }
600
-
601
- function analyzePythonRoute(
602
- fnDef: Parser.SyntaxNode,
603
- source: string,
604
- method: string,
605
- routePath: string,
606
- responseModel: string | undefined,
607
- pydanticModels: ReadonlyMap<string, PydanticModel>,
608
- decText: string,
609
- ): PythonRouteAnalysis {
610
- const text = source.substring(fnDef.startIndex, fnDef.endIndex);
611
- const validation = extractFastApiBodyValidation(fnDef, source, pydanticModels);
612
- const success = extractFastApiSuccessStatusCodes(decText, fnDef, source);
613
- const pagination = extractFastApiPaginationStrategy(fnDef, source);
614
- return {
615
- errorStatusCodes: extractPythonHttpExceptionStatusCodes(text),
616
- successStatusCodes: success.codes,
617
- successStatusCodesResolved: success.resolved,
618
- paginationStrategy: pagination.strategy,
619
- paginationStrategyResolved: pagination.resolved,
620
- hasUnboundedCollectionQuery: hasUnboundedPythonCollectionQuery(text, method, routePath, responseModel),
621
- hasDbWrite: PY_DB_WRITE_RE.test(text),
622
- hasIdempotencyProtection: PY_IDEMPOTENCY_RE.test(text),
623
- hasBodyValidation: validation.has,
624
- validatedBodyFields: validation.fields,
625
- bodyValidationResolved: validation.resolved,
626
- validatedBodyFieldTypes: validation.types,
627
- };
628
- }
629
-
630
- // ── FastAPI success status codes ─────────────────────────────────────────
631
- // Phase 2 of cross-stack `status-code-drift`. Populates the
632
- // `successStatusCodes` / `successStatusCodesResolved` payload fields so the
633
- // rule can flag clients checking a 2xx the FastAPI server doesn't emit.
634
- //
635
- // Sources of evidence (per buddy plan-review consensus):
636
- // 1. Decorator `status_code=N` (literal) or `status_code=status.HTTP_NNN_*`.
637
- // 2. Body-side `Response(status_code=N)` / `JSONResponse(...)` returns.
638
- // 3. Body-side `<param>.status_code = N` mutations (FastAPI's documented
639
- // pattern for routes that take a `Response` parameter).
640
- // 4. When the decorator omits status_code AND the body has no explicit
641
- // Response / mutation, default to 200 — FastAPI's documented default
642
- // regardless of HTTP method. Codex caught Gemini's POST→201 premise as
643
- // wrong (FastAPI docs:
644
- // https://fastapi.tiangolo.com/tutorial/response-status-code/).
645
- //
646
- // Marked unresolved when:
647
- // - Decorator status_code is set to a non-literal/non-status-constant
648
- // expression (variable, function call).
649
- // - Any `Response(status_code=...)` / `<x>.status_code = ...` RHS is dynamic.
650
- function extractFastApiSuccessStatusCodes(
651
- decText: string,
652
- fnDef: Parser.SyntaxNode,
653
- source: string,
654
- ): { codes: readonly number[] | undefined; resolved: boolean } {
655
- let sawDynamic = false;
656
-
657
- // 1. Decorator `status_code=N` — applies ONLY to plain `return data` paths.
658
- // For routes whose return paths all use explicit Response/JSONResponse,
659
- // the decorator code is dead (Codex impl-review #1).
660
- const decStatusMatch = decText.match(/\bstatus_code\s*=\s*([^,)]+)/);
661
- let decoratorCode: number | undefined;
662
- if (decStatusMatch) {
663
- const code = parseFastApiStatusValue(decStatusMatch[1].trim());
664
- if (code === undefined) sawDynamic = true;
665
- else if (PY_API_SUCCESS_STATUS_CODES.has(code)) decoratorCode = code;
666
- }
667
-
668
- const body = fnDef.childForFieldName('body') ?? fnDef.namedChildren.find((c) => c.type === 'block');
669
- const bodyText = body ? source.substring(body.startIndex, body.endIndex) : '';
670
-
671
- // 2. Response(status_code=N) / JSONResponse(...) etc. — applies only to
672
- // that specific return path. Multiple Response codes contribute a
673
- // multi-2xx route.
674
- const responseCodes = new Set<number>();
675
- const responseRe =
676
- /\b(?:Response|JSONResponse|HTMLResponse|PlainTextResponse|RedirectResponse|StreamingResponse|FileResponse|ORJSONResponse|UJSONResponse)\s*\([^)]*?\bstatus_code\s*=\s*([^,)\n]+)/g;
677
- for (const match of bodyText.matchAll(responseRe)) {
678
- const code = parseFastApiStatusValue(match[1].trim());
679
- if (code === undefined) sawDynamic = true;
680
- else if (PY_API_SUCCESS_STATUS_CODES.has(code)) responseCodes.add(code);
681
- }
682
-
683
- // 3. `<paramName>.status_code = N` — mutation on the injected Response
684
- // parameter. The parameter name varies (`response`, `resp`, `r`, `out`,
685
- // custom names — Codex impl-review #2). Match any identifier prefix
686
- // rather than a name whitelist; the API_SUCCESS_STATUS_CODES filter
687
- // keeps the noise tax low.
688
- const mutationCodes = new Set<number>();
689
- // `=(?!=)` distinguishes assignment from `==` comparison so
690
- // `if response.status_code == 200:` doesn't masquerade as a dynamic
691
- // mutation (forge round, Claude engine).
692
- const mutateRe = /\b[A-Za-z_]\w*\.status_code\s*=(?!=)\s*([^\n;]+)/g;
693
- for (const match of bodyText.matchAll(mutateRe)) {
694
- const code = parseFastApiStatusValue(match[1].trim());
695
- if (code === undefined) sawDynamic = true;
696
- else if (PY_API_SUCCESS_STATUS_CODES.has(code)) mutationCodes.add(code);
697
- }
698
-
699
- if (sawDynamic) return { codes: undefined, resolved: false };
700
-
701
- // Plain return paths inherit the route's "primary" success code, computed
702
- // as: mutation > decorator > FastAPI default 200. When a mutation is
703
- // present we treat it as the plain-return code (the conditional-mutation
704
- // case is a documented v1 false-negative — would require control-flow
705
- // analysis to disambiguate).
706
- const plainReturnRe =
707
- /\breturn\b(?!\s+(?:Response|JSONResponse|HTMLResponse|PlainTextResponse|RedirectResponse|StreamingResponse|FileResponse|ORJSONResponse|UJSONResponse)\s*\()/;
708
- const hasPlainReturn = plainReturnRe.test(bodyText);
709
-
710
- const final = new Set<number>();
711
-
712
- if (hasPlainReturn) {
713
- if (mutationCodes.size > 0) {
714
- for (const c of mutationCodes) final.add(c);
715
- } else if (decoratorCode !== undefined) {
716
- final.add(decoratorCode);
717
- } else {
718
- final.add(FASTAPI_DEFAULT_SUCCESS_STATUS);
719
- }
720
- } else if (decoratorCode !== undefined && responseCodes.size === 0 && mutationCodes.size === 0) {
721
- // Handler with no plain return, no Response, no mutation — likely an
722
- // implicit-None-return stub or all-raise. Decorator is the only signal.
723
- final.add(decoratorCode);
724
- }
725
-
726
- // Response and mutation codes ALWAYS contribute (they're explicit choices
727
- // for their respective return paths).
728
- for (const c of responseCodes) final.add(c);
729
- for (const c of mutationCodes) final.add(c);
730
-
731
- return {
732
- codes: Array.from(final).sort((a, b) => a - b),
733
- resolved: true,
734
- };
735
- }
736
-
737
- function parseFastApiStatusValue(val: string): number | undefined {
738
- const trimmed = val.trim();
739
- // Literal 3-digit int.
740
- const litMatch = trimmed.match(/^(\d{3})$/);
741
- if (litMatch) return Number(litMatch[1]);
742
- // status.HTTP_NNN_NAME / starlette.status.HTTP_NNN_NAME / fastapi.status.HTTP_NNN_NAME.
743
- const httpMatch = trimmed.match(/HTTP_(\d{3})_/);
744
- if (httpMatch) return Number(httpMatch[1]);
745
- return undefined;
746
- }
747
-
748
- // ── FastAPI pagination strategy ──────────────────────────────────────────
749
- // Iterates the route handler's parameters and classifies each by name (or
750
- // `Query(alias=...)` literal alias when present) against page/offset/cursor
751
- // anchor sets. Returns:
752
- // - `none` / resolved=true — handler reads no anchor params (and no opaque
753
- // paths to query data).
754
- // - `page` / `offset` / `cursor` / resolved=true — handler reads exactly
755
- // one family.
756
- // - `mixed` / resolved=true — handler reads multiple families.
757
- // - `undefined` / resolved=false — handler has a `Request` parameter,
758
- // `**kwargs`, or a `Query(alias=<dynamic>)` we can't statically resolve.
759
- function extractFastApiPaginationStrategy(
760
- fnDef: Parser.SyntaxNode,
761
- source: string,
762
- ): {
763
- strategy: 'page' | 'offset' | 'cursor' | 'mixed' | 'none' | undefined;
764
- resolved: boolean;
765
- } {
766
- const paramsNode = fnDef.childForFieldName('parameters');
767
- if (!paramsNode) return { strategy: 'none', resolved: true };
768
-
769
- const families = new Set<'page' | 'offset' | 'cursor'>();
770
- let sawOpaque = false;
771
-
772
- for (const child of paramsNode.namedChildren) {
773
- // **kwargs — handler may read any query key dynamically; opaque.
774
- if (child.type === 'dictionary_splat_pattern') {
775
- sawOpaque = true;
776
- continue;
777
- }
778
- // *args — positional spread, irrelevant for query keys but rare in
779
- // FastAPI handlers; keep silent.
780
- if (child.type === 'list_splat_pattern') continue;
781
-
782
- // Drop typing wrappers to find the param identifier.
783
- const paramName = extractParamName(child);
784
- if (!paramName) continue;
785
-
786
- // `request: Request` — handler may call `request.query_params.get(...)`
787
- // arbitrarily; mark opaque.
788
- const typeText = extractParamTypeText(child, source);
789
- if (typeText && /\bRequest\b/.test(typeText)) {
790
- sawOpaque = true;
791
- continue;
792
- }
793
-
794
- // Default-value AND type expression both can carry a `Query(alias="...")`
795
- // call. Modern FastAPI (≥0.95) puts the call inside the type annotation
796
- // via `Annotated[int, Query(alias="page")]` (Gemini/OpenCode impl-review).
797
- // Older / classic syntax puts it in the default: `Query(0, alias="page")`.
798
- // Check both — default-value form takes precedence when both are present.
799
- const defaultText = extractParamDefaultText(child, source);
800
- const aliasFromDefault = extractQueryAlias(defaultText);
801
- const aliasFromType = aliasFromDefault.alias === undefined ? extractQueryAlias(typeText) : aliasFromDefault;
802
- let key = paramName;
803
- if (aliasFromDefault.opaque || aliasFromType.opaque) {
804
- sawOpaque = true;
805
- continue;
806
- }
807
- if (aliasFromDefault.alias) key = aliasFromDefault.alias;
808
- else if (aliasFromType.alias) key = aliasFromType.alias;
809
-
810
- const family = classifyPyAnchor(key);
811
- if (family) families.add(family);
812
- }
813
-
814
- if (sawOpaque) return { strategy: undefined, resolved: false };
815
- if (families.size === 0) return { strategy: 'none', resolved: true };
816
- if (families.size === 1) return { strategy: [...families][0], resolved: true };
817
- return { strategy: 'mixed', resolved: true };
818
- }
819
-
820
- function extractParamName(node: Parser.SyntaxNode): string | undefined {
821
- if (node.type === 'identifier') return node.text;
822
- if (node.type === 'typed_parameter' || node.type === 'typed_default_parameter' || node.type === 'default_parameter') {
823
- const nameChild = node.childForFieldName('name') ?? node.namedChildren.find((c) => c.type === 'identifier');
824
- if (nameChild) return nameChild.text;
825
- }
826
- return undefined;
827
- }
828
-
829
- function extractParamTypeText(node: Parser.SyntaxNode, source: string): string | undefined {
830
- if (node.type !== 'typed_parameter' && node.type !== 'typed_default_parameter') return undefined;
831
- const typeChild = node.childForFieldName('type');
832
- if (typeChild) return source.substring(typeChild.startIndex, typeChild.endIndex);
833
- return undefined;
834
- }
835
-
836
- function extractParamDefaultText(node: Parser.SyntaxNode, source: string): string | undefined {
837
- if (node.type !== 'default_parameter' && node.type !== 'typed_default_parameter') return undefined;
838
- const valueChild = node.childForFieldName('value');
839
- if (valueChild) return source.substring(valueChild.startIndex, valueChild.endIndex);
840
- return undefined;
841
- }
842
-
843
- function classifyPyAnchor(key: string): 'page' | 'offset' | 'cursor' | undefined {
844
- if (PY_PAGE_ANCHORS.has(key)) return 'page';
845
- if (PY_OFFSET_ANCHORS.has(key)) return 'offset';
846
- if (PY_CURSOR_ANCHORS.has(key)) return 'cursor';
847
- return undefined;
848
- }
849
-
850
- /** Extract a `Query(..., alias="...")` literal alias from a parameter's
851
- * default-value or type-annotation text. Used to support both classic
852
- * (`x = Query(0, alias="p")`) and modern (`x: Annotated[int, Query(alias="p")]`)
853
- * FastAPI patterns. Returns `{alias?, opaque}` where `opaque=true` indicates
854
- * a `Query(alias=<non-literal>)` we cannot statically resolve. */
855
- function extractQueryAlias(text: string | undefined): { alias?: string; opaque: boolean } {
856
- if (!text) return { opaque: false };
857
- if (!/\bQuery\s*\(/.test(text)) return { opaque: false };
858
- const aliasMatch = text.match(/\balias\s*=\s*['"]([^'"]+)['"]/);
859
- if (aliasMatch) return { alias: aliasMatch[1], opaque: false };
860
- if (/\balias\s*=/.test(text)) return { opaque: true };
861
- return { opaque: false };
862
- }
863
-
864
- function extractPythonHttpExceptionStatusCodes(text: string): readonly number[] | undefined {
865
- const codes = new Set<number>();
866
- const keywordRe = /HTTPException\s*\([^)]*status_code\s*=\s*(\d{3})/g;
867
- for (const match of text.matchAll(keywordRe)) {
868
- const code = Number(match[1]);
869
- if (PY_API_ERROR_STATUS_CODES.has(code)) codes.add(code);
870
- }
871
- const positionalRe = /HTTPException\s*\(\s*(\d{3})/g;
872
- for (const match of text.matchAll(positionalRe)) {
873
- const code = Number(match[1]);
874
- if (PY_API_ERROR_STATUS_CODES.has(code)) codes.add(code);
875
- }
876
- return codes.size > 0 ? Array.from(codes).sort((a, b) => a - b) : undefined;
877
- }
878
-
879
- function hasUnboundedPythonCollectionQuery(
880
- text: string,
881
- method: string,
882
- routePath: string,
883
- responseModel: string | undefined,
884
- ): boolean {
885
- if (method !== 'GET') return false;
886
- if (/[{:]/.test(routePath)) return false;
887
- if (PY_PAGINATION_RE.test(text)) return false;
888
- const responseLooksList = responseModel ? /^(list|List|Sequence|Iterable)\s*\[/.test(responseModel) : false;
889
- return (
890
- PY_DB_COLLECTION_RE.test(text) &&
891
- (responseLooksList || /\breturn\b[\s\S]*(\.all\s*\(|\.find\s*\(|\.fetchall\s*\()/.test(text))
892
- );
893
- }
894
-
895
- function collectPydanticModels(source: string): Map<string, PydanticModel> {
896
- const models = new Map<string, PydanticModel>();
897
- const classRe = /^class\s+([A-Za-z_]\w*)\s*\([^)]*BaseModel[^)]*\)\s*:/gm;
898
- for (const match of source.matchAll(classRe)) {
899
- const name = match[1];
900
- const start = (match.index ?? 0) + match[0].length;
901
- const rest = source.slice(start);
902
- const nextTopLevel = rest.search(/\n\S/);
903
- const body = nextTopLevel === -1 ? rest : rest.slice(0, nextTopLevel);
904
- const fields: string[] = [];
905
- const types: Record<string, FieldTypeTag> = {};
906
- // Capture annotations alongside names. The annotation runs until either
907
- // an `=` (default value) or end-of-line / inline comment. Multiline
908
- // annotations (`x: Annotated[\n str, Field(...)\n]`) are not handled —
909
- // false-negative on the type tag, never false-positive.
910
- const fieldRe = /^[ \t]+([A-Za-z_]\w*)[ \t]*:[ \t]*([^=#\n]+?)(?:[ \t]*=[^\n]*|[ \t]*#[^\n]*)?$/gm;
911
- for (const fieldMatch of body.matchAll(fieldRe)) {
912
- const field = fieldMatch[1];
913
- if (field === 'model_config' || field === 'Config') continue;
914
- fields.push(field);
915
- const annotation = fieldMatch[2].trim();
916
- types[field] = coarsenPythonTypeAnnotation(annotation);
917
- }
918
- if (fields.length > 0) {
919
- models.set(name, { fields: fields.sort(), types: Object.freeze({ ...types }) });
920
- }
921
- }
922
- return models;
923
- }
924
-
925
- // Split a type-annotation string at top-level commas / pipes — respecting
926
- // nested `[...]` brackets — so `Union[A, B[C, D]]` splits into `[A, B[C, D]]`
927
- // not `[A, B[C, D]]`.
928
- function splitTopLevelTypeArgs(s: string, delim: ',' | '|'): string[] {
929
- const parts: string[] = [];
930
- let depth = 0;
931
- let cur = '';
932
- for (let i = 0; i < s.length; i++) {
933
- const c = s[i];
934
- if (c === '[' || c === '(') depth++;
935
- else if (c === ']' || c === ')') depth--;
936
- else if (c === delim && depth === 0) {
937
- parts.push(cur.trim());
938
- cur = '';
939
- continue;
940
- }
941
- cur += c;
942
- }
943
- if (cur.trim()) parts.push(cur.trim());
944
- return parts;
945
- }
946
-
947
- // Coarsen a Pydantic field type annotation to the same FieldTypeTag union
948
- // the TS mapper uses, so cross-stack rules can compare client TS types
949
- // against server Pydantic types symmetrically. Handles the common shapes:
950
- //
951
- // str / int / float / bool / None / Decimal / UUID / EmailStr
952
- // Optional[T] / Annotated[T, ...] → coarsen T (drop wrapper)
953
- // Union[A, B] / `A | B` (PEP 604) → only stable if all agree
954
- // List[T] / list[T] / Sequence[T] / Tuple[...] → 'array'
955
- // Dict[K, V] / dict[K, V] / Mapping[K, V] → 'object'
956
- // Literal['admin'] / Literal[1] / Literal[True] → primitive of literal
957
- // <CapitalIdent> → 'object' (BaseModel sub)
958
- //
959
- // Anything we don't recognise → 'unknown'. Conservative on purpose:
960
- // /type rules skip 'unknown' tags.
961
- function coarsenPythonTypeAnnotation(ann: string): FieldTypeTag {
962
- const t = ann.trim();
963
- if (t === '') return 'unknown';
964
-
965
- // Optional[T] / typing.Optional[T] — strip and recurse.
966
- const optMatch = t.match(/^(?:typing\.)?Optional\[([\s\S]+)\]$/);
967
- if (optMatch) return coarsenPythonTypeAnnotation(optMatch[1]);
968
-
969
- // Annotated[T, ...] — first arg is the underlying type.
970
- const annoMatch = t.match(/^(?:typing\.)?Annotated\[([\s\S]+)\]$/);
971
- if (annoMatch) {
972
- const parts = splitTopLevelTypeArgs(annoMatch[1], ',');
973
- if (parts.length >= 1) return coarsenPythonTypeAnnotation(parts[0]);
974
- return 'unknown';
975
- }
976
-
977
- // Union[A, B, ...] — only stable if every non-null branch agrees.
978
- // ANY 'unknown' branch poisons the result.
979
- const unionMatch = t.match(/^(?:typing\.)?Union\[([\s\S]+)\]$/);
980
- if (unionMatch) {
981
- return coarsenUnionParts(splitTopLevelTypeArgs(unionMatch[1], ','));
982
- }
983
-
984
- // PEP 604 `int | None | str`. Only treat `|` as a union separator when
985
- // it appears OUTSIDE of any `[...]` — otherwise `Dict[str, int | None]`
986
- // would be split incorrectly.
987
- if (containsTopLevelChar(t, '|')) {
988
- return coarsenUnionParts(splitTopLevelTypeArgs(t, '|'));
989
- }
990
-
991
- // Container types — coarsen to wire shape.
992
- if (/^(?:typing\.)?(?:List|list|Sequence|Iterable|Tuple|tuple|Set|set|FrozenSet|frozenset)\[/.test(t)) return 'array';
993
- if (/^(?:typing\.)?(?:Dict|dict|Mapping|MutableMapping)\[/.test(t)) return 'object';
994
-
995
- // Literal[X, Y, ...] — coarsen every literal arg, return the shared tag
996
- // ONLY when all literals agree. Mixed-primitive literals like
997
- // `Literal['a', 1]` accept either string or number on the wire, so
998
- // tagging it 'string' (first-only) would FP-flag a number client.
999
- // OpenCode caught this in the v1 review.
1000
- const litMatch = t.match(/^(?:typing\.)?Literal\[([\s\S]+)\]$/);
1001
- if (litMatch) {
1002
- const parts = splitTopLevelTypeArgs(litMatch[1], ',');
1003
- if (parts.length === 0) return 'unknown';
1004
- const tags = parts.map((p) => coarsenLiteralValue(p.trim()));
1005
- if (tags.includes('unknown')) return 'unknown';
1006
- const set = new Set(tags);
1007
- return set.size === 1 ? [...set][0] : 'unknown';
1008
- }
1009
-
1010
- // Plain primitives + common Pydantic-string newtypes. `bytes` intentionally
1011
- // stays 'unknown' — it's binary on the wire and not a JSON primitive.
1012
- switch (t) {
1013
- case 'str':
1014
- case 'EmailStr':
1015
- case 'HttpUrl':
1016
- case 'AnyUrl':
1017
- case 'AnyHttpUrl':
1018
- case 'UUID':
1019
- case 'UUID1':
1020
- case 'UUID3':
1021
- case 'UUID4':
1022
- case 'UUID5':
1023
- case 'SecretStr':
1024
- return 'string';
1025
- case 'int':
1026
- case 'float':
1027
- case 'Decimal':
1028
- case 'PositiveInt':
1029
- case 'NegativeInt':
1030
- case 'NonNegativeInt':
1031
- case 'NonPositiveInt':
1032
- case 'PositiveFloat':
1033
- case 'NegativeFloat':
1034
- return 'number';
1035
- case 'bool':
1036
- case 'StrictBool':
1037
- return 'boolean';
1038
- case 'None':
1039
- case 'NoneType':
1040
- return 'null';
1041
- }
1042
-
1043
- // Capitalized bare identifier could be:
1044
- // - A nested BaseModel ('object' on the wire)
1045
- // - A `class Status(str, Enum)` ('string' on the wire)
1046
- // - A `Status = Literal['a','b']` type alias ('string' on the wire)
1047
- // - A custom newtype like StrictStr / IPvAnyAddress
1048
- // We can't disambiguate without symbol resolution. Tagging 'object'
1049
- // FP'd Enum/Literal aliases against string clients (Codex flag); tag
1050
- // 'unknown' instead — the rule will skip and we trade FN for FP.
1051
- if (/^[A-Z][\w]*$/.test(t)) return 'unknown';
1052
-
1053
- return 'unknown';
1054
- }
1055
-
1056
- // Coarsen a single literal-value source token (e.g. `'admin'`, `42`, `True`)
1057
- // to its primitive tag. Anything we don't recognise as one of the four JSON
1058
- // primitives → 'unknown'.
1059
- function coarsenLiteralValue(v: string): FieldTypeTag {
1060
- if (/^['"]/.test(v)) return 'string';
1061
- if (/^-?\d/.test(v)) return 'number';
1062
- if (v === 'True' || v === 'False') return 'boolean';
1063
- if (v === 'None') return 'null';
1064
- return 'unknown';
1065
- }
1066
-
1067
- function coarsenUnionParts(parts: readonly string[]): FieldTypeTag {
1068
- const tags = parts.map(coarsenPythonTypeAnnotation);
1069
- if (tags.includes('unknown')) return 'unknown';
1070
- const noNull = tags.filter((tag) => tag !== 'null');
1071
- if (noNull.length === 0) return 'null';
1072
- const set = new Set(noNull);
1073
- return set.size === 1 ? [...set][0] : 'unknown';
1074
- }
1075
-
1076
- function containsTopLevelChar(s: string, ch: string): boolean {
1077
- let depth = 0;
1078
- for (let i = 0; i < s.length; i++) {
1079
- const c = s[i];
1080
- if (c === '[' || c === '(') depth++;
1081
- else if (c === ']' || c === ')') depth--;
1082
- else if (c === ch && depth === 0) return true;
1083
- }
1084
- return false;
1085
- }
1086
-
1087
- function extractFastApiBodyValidation(
1088
- fnDef: Parser.SyntaxNode,
1089
- source: string,
1090
- pydanticModels: ReadonlyMap<string, PydanticModel>,
1091
- ): {
1092
- has: boolean;
1093
- fields: readonly string[] | undefined;
1094
- resolved: boolean;
1095
- types: FieldTypeMap | undefined;
1096
- } {
1097
- const body = fnDef.childForFieldName('body') ?? fnDef.namedChildren.find((child) => child.type === 'block');
1098
- const headerEnd = body ? body.startIndex : fnDef.endIndex;
1099
- const header = source.substring(fnDef.startIndex, headerEnd);
1100
- const fields = new Set<string>();
1101
- const types: Record<string, FieldTypeTag> = {};
1102
- let has = false;
1103
- const annotationRe = /([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*)/g;
1104
- for (const match of header.matchAll(annotationRe)) {
1105
- const model = pydanticModels.get(match[2]);
1106
- if (!model) continue;
1107
- has = true;
1108
- for (const field of model.fields) fields.add(field);
1109
- for (const [name, tag] of Object.entries(model.types)) {
1110
- // Only record concrete tags. 'unknown' for a key would shadow a
1111
- // concrete tag from another model parameter on the same handler
1112
- // (rare, but multi-arg handlers do exist), so skip them.
1113
- if (tag !== 'unknown') types[name] = tag;
1114
- }
1115
- }
1116
- return {
1117
- has,
1118
- fields: fields.size > 0 ? Array.from(fields).sort() : undefined,
1119
- resolved: fields.size > 0,
1120
- types: Object.keys(types).length > 0 ? Object.freeze({ ...types }) : undefined,
1121
- };
1122
- }
1123
-
1124
- // ── state_mutation ───────────────────────────────────────────────────────
1125
-
1126
- function extractStateMutation(root: Parser.SyntaxNode, source: string, filePath: string, nodes: ConceptNode[]): void {
1127
- // Track global keyword usage
1128
- const globalVarsInFile = new Set<string>();
1129
- walkNodes(root, 'global_statement', (node) => {
1130
- for (const child of node.namedChildren) {
1131
- if (child.type === 'identifier') globalVarsInFile.add(child.text);
1132
- }
1133
- });
1134
-
1135
- walkNodes(root, 'assignment', (node) => {
1136
- const left = node.childForFieldName('left');
1137
- if (!left) return;
1138
-
1139
- // self.x = ... → scope 'module' (as requested)
1140
- if (left.type === 'attribute') {
1141
- const obj = left.childForFieldName('object');
1142
- if (obj && obj.text === 'self') {
1143
- nodes.push({
1144
- id: conceptId(filePath, 'state_mutation', node.startIndex),
1145
- kind: 'state_mutation',
1146
- primarySpan: nodeSpan(filePath, node),
1147
- evidence: nodeText(source, node, 100),
1148
- confidence: 0.9,
1149
- language: 'py',
1150
- containerId: getContainerId(node, filePath),
1151
- payload: { kind: 'state_mutation', target: left.text, scope: 'module' },
1152
- });
1153
- return;
1154
- }
1155
- }
1156
-
1157
- // Global or Module level assignment
1158
- if (left.type === 'identifier') {
1159
- const name = left.text;
1160
- const containerId = getContainerId(node, filePath);
1161
-
1162
- if (globalVarsInFile.has(name)) {
1163
- nodes.push({
1164
- id: conceptId(filePath, 'state_mutation', node.startIndex),
1165
- kind: 'state_mutation',
1166
- primarySpan: nodeSpan(filePath, node),
1167
- evidence: nodeText(source, node, 100),
1168
- confidence: 1.0,
1169
- language: 'py',
1170
- containerId,
1171
- payload: { kind: 'state_mutation', target: name, scope: 'global' },
1172
- });
1173
- } else if (!containerId) {
1174
- // Module level (top level)
1175
- nodes.push({
1176
- id: conceptId(filePath, 'state_mutation', node.startIndex),
1177
- kind: 'state_mutation',
1178
- primarySpan: nodeSpan(filePath, node),
1179
- evidence: nodeText(source, node, 100),
1180
- confidence: 0.8,
1181
- language: 'py',
1182
- payload: { kind: 'state_mutation', target: name, scope: 'module' },
1183
- });
1184
- }
1185
- }
1186
- });
1187
- }
1188
-
1189
- // ── dependency ──────────────────────────────────────────────────────────
1190
-
1191
- function extractDependencyEdges(root: Parser.SyntaxNode, source: string, filePath: string, edges: ConceptEdge[]): void {
1192
- const addDependency = (node: Parser.SyntaxNode, specifier: string): void => {
1193
- let subtype: 'stdlib' | 'external' | 'internal' = 'external';
1194
- if (specifier.startsWith('.')) {
1195
- subtype = 'internal';
1196
- } else {
1197
- const rootModule = specifier.split('.')[0];
1198
- if (STDLIB_MODULES.has(rootModule)) {
1199
- subtype = 'stdlib';
1200
- }
1201
- }
1202
-
1203
- edges.push({
1204
- id: `${filePath}#dep@${node.startIndex}`,
1205
- kind: 'dependency',
1206
- sourceId: filePath,
1207
- targetId: specifier,
1208
- primarySpan: nodeSpan(filePath, node),
1209
- evidence: nodeText(source, node, 100),
1210
- confidence: 1.0,
1211
- language: 'py',
1212
- payload: { kind: 'dependency', subtype, specifier },
1213
- });
1214
- };
1215
-
1216
- walkNodes(root, 'import_statement', (node) => {
1217
- // import x, y as z
1218
- for (const child of node.namedChildren) {
1219
- if (child.type === 'dotted_name') {
1220
- addDependency(node, child.text);
1221
- } else if (child.type === 'aliased_import') {
1222
- const name = child.childForFieldName('name');
1223
- if (name) addDependency(node, name.text);
1224
- }
1225
- }
1226
- });
1227
-
1228
- walkNodes(root, 'import_from_statement', (node) => {
1229
- // from x import y
1230
- const moduleNode = node.childForFieldName('module_name');
1231
- const relativeMatch = node.text.match(/^from\s+(\.+)/);
1232
- let specifier = moduleNode ? moduleNode.text : '';
1233
- if (relativeMatch) {
1234
- specifier = relativeMatch[1] + specifier;
1235
- }
1236
- if (specifier) {
1237
- addDependency(node, specifier);
1238
- }
1239
- });
1240
- }
1241
-
1242
- // ── Tree-sitter Helpers ──────────────────────────────────────────────────
1243
-
1244
- function walkNodes(root: Parser.SyntaxNode, type: string, callback: (node: Parser.SyntaxNode) => void): void {
1245
- const cursor = root.walk();
1246
- let reachedRoot = false;
1247
- while (true) {
1248
- if (cursor.nodeType === type) {
1249
- callback(cursor.currentNode);
1250
- }
1251
- if (cursor.gotoFirstChild()) continue;
1252
- if (cursor.gotoNextSibling()) continue;
1253
- while (true) {
1254
- if (!cursor.gotoParent()) {
1255
- reachedRoot = true;
1256
- break;
1257
- }
1258
- if (cursor.gotoNextSibling()) break;
1259
- }
1260
- if (reachedRoot) break;
1261
- }
1262
- }
1263
-
1264
- function nodeSpan(filePath: string, node: Parser.SyntaxNode): ConceptSpan {
1265
- return conceptSpan(
1266
- filePath,
1267
- node.startPosition.row + 1,
1268
- node.startPosition.column + 1,
1269
- node.endPosition.row + 1,
1270
- node.endPosition.column + 1,
1271
- );
1272
- }
1273
-
1274
- function nodeText(source: string, node: Parser.SyntaxNode, maxLen: number): string {
1275
- return source.substring(node.startIndex, Math.min(node.endIndex, node.startIndex + maxLen));
1276
- }
1277
-
1278
- function getContainerId(node: Parser.SyntaxNode, filePath: string): string | undefined {
1279
- let parent = node.parent;
1280
- while (parent) {
1281
- if (parent.type === 'function_definition' || parent.type === 'class_definition') {
1282
- const nameNode = parent.childForFieldName('name');
1283
- const name = nameNode ? nameNode.text : 'anonymous';
1284
- return `${filePath}#fn:${name}@${parent.startIndex}`;
1285
- }
1286
- parent = parent.parent;
1287
- }
1288
- return undefined;
1289
- }
1290
-
1291
- function getSelfContainerId(node: Parser.SyntaxNode, filePath: string): string | undefined {
1292
- if (node.type !== 'function_definition' && node.type !== 'class_definition') return undefined;
1293
- const nameNode = node.childForFieldName('name');
1294
- const name = nameNode ? nameNode.text : 'anonymous';
1295
- return `${filePath}#fn:${name}@${node.startIndex}`;
1296
- }
1297
-
1298
- function extractResponseModel(decoratorText: string): string | undefined {
1299
- const match = decoratorText.match(/\bresponse_model\s*=/);
1300
- if (!match || match.index === undefined) return undefined;
1301
-
1302
- let index = match.index + match[0].length;
1303
- while (/\s/.test(decoratorText[index] ?? '')) index++;
1304
-
1305
- const start = index;
1306
- let squareDepth = 0;
1307
- let parenDepth = 0;
1308
- let braceDepth = 0;
1309
- let quote: string | undefined;
1310
-
1311
- while (index < decoratorText.length) {
1312
- const char = decoratorText[index];
1313
- const prev = decoratorText[index - 1];
1314
-
1315
- if (quote) {
1316
- if (char === quote && prev !== '\\') quote = undefined;
1317
- index++;
1318
- continue;
1319
- }
1320
-
1321
- if (char === '"' || char === "'") {
1322
- quote = char;
1323
- index++;
1324
- continue;
1325
- }
1326
-
1327
- if (char === '[') squareDepth++;
1328
- else if (char === ']') squareDepth = Math.max(0, squareDepth - 1);
1329
- else if (char === '(') parenDepth++;
1330
- else if (char === ')') {
1331
- if (squareDepth === 0 && parenDepth === 0 && braceDepth === 0) break;
1332
- parenDepth = Math.max(0, parenDepth - 1);
1333
- } else if (char === '{') braceDepth++;
1334
- else if (char === '}') braceDepth = Math.max(0, braceDepth - 1);
1335
- else if (char === ',' && squareDepth === 0 && parenDepth === 0 && braceDepth === 0) {
1336
- break;
1337
- }
1338
-
1339
- index++;
1340
- }
1341
-
1342
- const model = decoratorText.slice(start, index).trim();
1343
- if (!model || model === 'None') return undefined;
1344
- return model;
1345
- }
1346
-
1347
- function extractRaiseType(node: Parser.SyntaxNode): string | undefined {
1348
- // raise ValueError("...") → "ValueError"
1349
- const callNode = node.namedChildren.find((c) => c.type === 'call');
1350
- if (callNode) {
1351
- const func = callNode.childForFieldName('function');
1352
- if (func) return func.text;
1353
- }
1354
- // raise ValueError → just identifier
1355
- const ident = node.namedChildren.find((c) => c.type === 'identifier');
1356
- if (ident) return ident.text;
1357
- return undefined;
1358
- }
1359
-
1360
- function extractExceptVar(node: Parser.SyntaxNode): string | undefined {
1361
- // except Exception as e → "e"
1362
- for (const child of node.children) {
1363
- if (child.type === 'as_pattern') {
1364
- const alias = child.childForFieldName('alias');
1365
- if (alias) return alias.text;
1366
- }
1367
- // Also try direct identifier after 'as'
1368
- if (child.type === 'identifier' && child.previousSibling?.text === 'as') {
1369
- return child.text;
1370
- }
1371
- }
1372
- return undefined;
1373
- }
1374
-
1375
- function isInAsyncDef(node: Parser.SyntaxNode): boolean {
1376
- let parent = node.parent;
1377
- while (parent) {
1378
- if (parent.type === 'function_definition') {
1379
- return isAsyncFunction(parent);
1380
- }
1381
- parent = parent.parent;
1382
- }
1383
- return false;
1384
- }
1385
-
1386
- function isAsyncFunction(node: Parser.SyntaxNode): boolean {
1387
- return node.children.some((c) => c.type === 'async');
1388
- }
1
+ export { extractPythonConcepts } from './mapper/index.js';