@kernlang/review-python 3.4.6-canary.45.1.130ca3d2 → 3.4.6-canary.46.1.19dcfc19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mapper/extractors/dependency.d.ts +3 -0
- package/dist/mapper/extractors/dependency.js +52 -0
- package/dist/mapper/extractors/effect.d.ts +3 -0
- package/dist/mapper/extractors/effect.js +74 -0
- package/dist/mapper/extractors/entrypoint.d.ts +3 -0
- package/dist/mapper/extractors/entrypoint.js +225 -0
- package/dist/mapper/extractors/error.d.ts +5 -0
- package/dist/mapper/extractors/error.js +129 -0
- package/dist/mapper/extractors/fastapi-pagination.d.ts +5 -0
- package/dist/mapper/extractors/fastapi-pagination.js +119 -0
- package/dist/mapper/extractors/fastapi-status.d.ts +6 -0
- package/dist/mapper/extractors/fastapi-status.js +115 -0
- package/dist/mapper/extractors/guard.d.ts +3 -0
- package/dist/mapper/extractors/guard.js +115 -0
- package/dist/mapper/extractors/pydantic.d.ts +13 -0
- package/dist/mapper/extractors/pydantic.js +61 -0
- package/dist/mapper/extractors/state-mutation.d.ts +3 -0
- package/dist/mapper/extractors/state-mutation.js +63 -0
- package/dist/mapper/helpers/ast.d.ts +9 -0
- package/dist/mapper/helpers/ast.js +62 -0
- package/dist/mapper/helpers/types.d.ts +7 -0
- package/dist/mapper/helpers/types.js +168 -0
- package/dist/mapper/index.d.ts +8 -0
- package/dist/mapper/index.js +42 -0
- package/dist/mapper/signatures.d.ts +17 -0
- package/dist/mapper/signatures.js +87 -0
- package/dist/mapper.d.ts +1 -8
- package/dist/mapper.js +1 -1286
- package/package.json +3 -3
- package/src/mapper/extractors/dependency.ts +60 -0
- package/src/mapper/extractors/effect.ts +84 -0
- package/src/mapper/extractors/entrypoint.ts +272 -0
- package/src/mapper/extractors/error.ts +152 -0
- package/src/mapper/extractors/fastapi-pagination.ts +117 -0
- package/src/mapper/extractors/fastapi-status.ts +119 -0
- package/src/mapper/extractors/guard.ts +114 -0
- package/src/mapper/extractors/pydantic.ts +74 -0
- package/src/mapper/extractors/state-mutation.ts +72 -0
- package/src/mapper/helpers/ast.ts +72 -0
- package/src/mapper/helpers/types.ts +164 -0
- package/src/mapper/index.ts +50 -0
- package/src/mapper/signatures.ts +94 -0
- package/src/mapper.ts +1 -1388
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
export type FieldTypeTag = 'string' | 'number' | 'boolean' | 'null' | 'object' | 'array' | 'unknown';
|
|
2
|
+
export type FieldTypeMap = Readonly<Record<string, FieldTypeTag>>;
|
|
3
|
+
|
|
4
|
+
// Coarsen a Pydantic field type annotation to the same FieldTypeTag union
|
|
5
|
+
// the TS mapper uses, so cross-stack rules can compare client TS types
|
|
6
|
+
// against server Pydantic types symmetrically. Handles the common shapes:
|
|
7
|
+
//
|
|
8
|
+
// str / int / float / bool / None / Decimal / UUID / EmailStr
|
|
9
|
+
// Optional[T] / Annotated[T, ...] → coarsen T (drop wrapper)
|
|
10
|
+
// Union[A, B] / `A | B` (PEP 604) → only stable if all agree
|
|
11
|
+
// List[T] / list[T] / Sequence[T] / Tuple[...] → 'array'
|
|
12
|
+
// Dict[K, V] / dict[K, V] / Mapping[K, V] → 'object'
|
|
13
|
+
// Literal['admin'] / Literal[1] / Literal[True] → primitive of literal
|
|
14
|
+
// <CapitalIdent> → 'object' (BaseModel sub)
|
|
15
|
+
//
|
|
16
|
+
// Anything we don't recognise → 'unknown'. Conservative on purpose:
|
|
17
|
+
// /type rules skip 'unknown' tags.
|
|
18
|
+
export function coarsenPythonTypeAnnotation(ann: string): FieldTypeTag {
|
|
19
|
+
const t = ann.trim();
|
|
20
|
+
if (t === '') return 'unknown';
|
|
21
|
+
|
|
22
|
+
// Optional[T] / typing.Optional[T] — strip and recurse.
|
|
23
|
+
const optMatch = t.match(/^(?:typing\.)?Optional\[([\s\S]+)\]$/);
|
|
24
|
+
if (optMatch) return coarsenPythonTypeAnnotation(optMatch[1]);
|
|
25
|
+
|
|
26
|
+
// Annotated[T, ...] — first arg is the underlying type.
|
|
27
|
+
const annoMatch = t.match(/^(?:typing\.)?Annotated\[([\s\S]+)\]$/);
|
|
28
|
+
if (annoMatch) {
|
|
29
|
+
const parts = splitTopLevelTypeArgs(annoMatch[1], ',');
|
|
30
|
+
if (parts.length >= 1) return coarsenPythonTypeAnnotation(parts[0]);
|
|
31
|
+
return 'unknown';
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Union[A, B, ...] — only stable if every non-null branch agrees.
|
|
35
|
+
// ANY 'unknown' branch poisons the result.
|
|
36
|
+
const unionMatch = t.match(/^(?:typing\.)?Union\[([\s\S]+)\]$/);
|
|
37
|
+
if (unionMatch) {
|
|
38
|
+
return coarsenUnionParts(splitTopLevelTypeArgs(unionMatch[1], ','));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// PEP 604 `int | None | str`. Only treat `|` as a union separator when
|
|
42
|
+
// it appears OUTSIDE of any `[...]` — otherwise `Dict[str, int | None]`
|
|
43
|
+
// would be split incorrectly.
|
|
44
|
+
if (containsTopLevelChar(t, '|')) {
|
|
45
|
+
return coarsenUnionParts(splitTopLevelTypeArgs(t, '|'));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Container types — coarsen to wire shape.
|
|
49
|
+
if (/^(?:typing\.)?(?:List|list|Sequence|Iterable|Tuple|tuple|Set|set|FrozenSet|frozenset)\[/.test(t)) return 'array';
|
|
50
|
+
if (/^(?:typing\.)?(?:Dict|dict|Mapping|MutableMapping)\[/.test(t)) return 'object';
|
|
51
|
+
|
|
52
|
+
// Literal[X, Y, ...] — coarsen every literal arg, return the shared tag
|
|
53
|
+
// ONLY when all literals agree. Mixed-primitive literals like
|
|
54
|
+
// `Literal['a', 1]` accept either string or number on the wire, so
|
|
55
|
+
// tagging it 'string' (first-only) would FP-flag a number client.
|
|
56
|
+
// OpenCode caught this in the v1 review.
|
|
57
|
+
const litMatch = t.match(/^(?:typing\.)?Literal\[([\s\S]+)\]$/);
|
|
58
|
+
if (litMatch) {
|
|
59
|
+
const parts = splitTopLevelTypeArgs(litMatch[1], ',');
|
|
60
|
+
if (parts.length === 0) return 'unknown';
|
|
61
|
+
const tags = parts.map((p) => coarsenLiteralValue(p.trim()));
|
|
62
|
+
if (tags.includes('unknown')) return 'unknown';
|
|
63
|
+
const set = new Set(tags);
|
|
64
|
+
return set.size === 1 ? [...set][0] : 'unknown';
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Plain primitives + common Pydantic-string newtypes. `bytes` intentionally
|
|
68
|
+
// stays 'unknown' — it's binary on the wire and not a JSON primitive.
|
|
69
|
+
switch (t) {
|
|
70
|
+
case 'str':
|
|
71
|
+
case 'EmailStr':
|
|
72
|
+
case 'HttpUrl':
|
|
73
|
+
case 'AnyUrl':
|
|
74
|
+
case 'AnyHttpUrl':
|
|
75
|
+
case 'UUID':
|
|
76
|
+
case 'UUID1':
|
|
77
|
+
case 'UUID3':
|
|
78
|
+
case 'UUID4':
|
|
79
|
+
case 'UUID5':
|
|
80
|
+
case 'SecretStr':
|
|
81
|
+
return 'string';
|
|
82
|
+
case 'int':
|
|
83
|
+
case 'float':
|
|
84
|
+
case 'Decimal':
|
|
85
|
+
case 'PositiveInt':
|
|
86
|
+
case 'NegativeInt':
|
|
87
|
+
case 'NonNegativeInt':
|
|
88
|
+
case 'NonPositiveInt':
|
|
89
|
+
case 'PositiveFloat':
|
|
90
|
+
case 'NegativeFloat':
|
|
91
|
+
return 'number';
|
|
92
|
+
case 'bool':
|
|
93
|
+
case 'StrictBool':
|
|
94
|
+
return 'boolean';
|
|
95
|
+
case 'None':
|
|
96
|
+
case 'NoneType':
|
|
97
|
+
return 'null';
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Capitalized bare identifier could be:
|
|
101
|
+
// - A nested BaseModel ('object' on the wire)
|
|
102
|
+
// - A `class Status(str, Enum)` ('string' on the wire)
|
|
103
|
+
// - A `Status = Literal['a','b']` type alias ('string' on the wire)
|
|
104
|
+
// - A custom newtype like StrictStr / IPvAnyAddress
|
|
105
|
+
// We can't disambiguate without symbol resolution. Tagging 'object'
|
|
106
|
+
// FP'd Enum/Literal aliases against string clients (Codex flag); tag
|
|
107
|
+
// 'unknown' instead — the rule will skip and we trade FN for FP.
|
|
108
|
+
if (/^[A-Z][\w]*$/.test(t)) return 'unknown';
|
|
109
|
+
|
|
110
|
+
return 'unknown';
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Coarsen a single literal-value source token (e.g. `'admin'`, `42`, `True`)
|
|
114
|
+
// to its primitive tag. Anything we don't recognise as one of the four JSON
|
|
115
|
+
// primitives → 'unknown'.
|
|
116
|
+
export function coarsenLiteralValue(v: string): FieldTypeTag {
|
|
117
|
+
if (/^['"]/.test(v)) return 'string';
|
|
118
|
+
if (/^-?\d/.test(v)) return 'number';
|
|
119
|
+
if (v === 'True' || v === 'False') return 'boolean';
|
|
120
|
+
if (v === 'None') return 'null';
|
|
121
|
+
return 'unknown';
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export function coarsenUnionParts(parts: readonly string[]): FieldTypeTag {
|
|
125
|
+
const tags = parts.map(coarsenPythonTypeAnnotation);
|
|
126
|
+
if (tags.includes('unknown')) return 'unknown';
|
|
127
|
+
const noNull = tags.filter((tag) => tag !== 'null');
|
|
128
|
+
if (noNull.length === 0) return 'null';
|
|
129
|
+
const set = new Set(noNull);
|
|
130
|
+
return set.size === 1 ? [...set][0] : 'unknown';
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Split a type-annotation string at top-level commas / pipes — respecting
|
|
134
|
+
// nested `[...]` brackets — so `Union[A, B[C, D]]` splits into `[A, B[C, D]]`
|
|
135
|
+
// not `[A, B[C, D]]`.
|
|
136
|
+
export function splitTopLevelTypeArgs(s: string, delim: ',' | '|'): string[] {
|
|
137
|
+
const parts: string[] = [];
|
|
138
|
+
let depth = 0;
|
|
139
|
+
let cur = '';
|
|
140
|
+
for (let i = 0; i < s.length; i++) {
|
|
141
|
+
const c = s[i];
|
|
142
|
+
if (c === '[' || c === '(') depth++;
|
|
143
|
+
else if (c === ']' || c === ')') depth--;
|
|
144
|
+
else if (c === delim && depth === 0) {
|
|
145
|
+
parts.push(cur.trim());
|
|
146
|
+
cur = '';
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
cur += c;
|
|
150
|
+
}
|
|
151
|
+
if (cur.trim()) parts.push(cur.trim());
|
|
152
|
+
return parts;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export function containsTopLevelChar(s: string, ch: string): boolean {
|
|
156
|
+
let depth = 0;
|
|
157
|
+
for (let i = 0; i < s.length; i++) {
|
|
158
|
+
const c = s[i];
|
|
159
|
+
if (c === '[' || c === '(') depth++;
|
|
160
|
+
else if (c === ']' || c === ')') depth--;
|
|
161
|
+
else if (c === ch && depth === 0) return true;
|
|
162
|
+
}
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Python Concept Mapper — tree-sitter based.
|
|
3
|
+
*
|
|
4
|
+
* Maps Python syntax → universal KERN concepts.
|
|
5
|
+
* Phase 1: error_raise, error_handle, effect
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { ConceptEdge, ConceptMap, ConceptNode } from '@kernlang/core';
|
|
9
|
+
import Parser from 'tree-sitter';
|
|
10
|
+
import Python from 'tree-sitter-python';
|
|
11
|
+
import { extractDependencyEdges } from './extractors/dependency.js';
|
|
12
|
+
import { extractEffects } from './extractors/effect.js';
|
|
13
|
+
import { extractEntrypoints } from './extractors/entrypoint.js';
|
|
14
|
+
import { extractErrorHandle, extractErrorRaise } from './extractors/error.js';
|
|
15
|
+
import { extractGuards } from './extractors/guard.js';
|
|
16
|
+
import { extractStateMutation } from './extractors/state-mutation.js';
|
|
17
|
+
import { EXTRACTOR_VERSION } from './signatures.js';
|
|
18
|
+
|
|
19
|
+
let parser: Parser | null = null;
|
|
20
|
+
|
|
21
|
+
function getParser(): Parser {
|
|
22
|
+
if (!parser) {
|
|
23
|
+
parser = new Parser();
|
|
24
|
+
parser.setLanguage(Python as unknown as Parser.Language);
|
|
25
|
+
}
|
|
26
|
+
return parser;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function extractPythonConcepts(source: string, filePath: string): ConceptMap {
|
|
30
|
+
const tree = getParser().parse(source);
|
|
31
|
+
const nodes: ConceptNode[] = [];
|
|
32
|
+
const edges: ConceptEdge[] = [];
|
|
33
|
+
|
|
34
|
+
extractErrorRaise(tree.rootNode, source, filePath, nodes);
|
|
35
|
+
extractErrorHandle(tree.rootNode, source, filePath, nodes);
|
|
36
|
+
extractEffects(tree.rootNode, source, filePath, nodes);
|
|
37
|
+
|
|
38
|
+
extractEntrypoints(tree.rootNode, source, filePath, nodes);
|
|
39
|
+
extractGuards(tree.rootNode, source, filePath, nodes);
|
|
40
|
+
extractStateMutation(tree.rootNode, source, filePath, nodes);
|
|
41
|
+
extractDependencyEdges(tree.rootNode, source, filePath, edges);
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
filePath,
|
|
45
|
+
language: 'py',
|
|
46
|
+
nodes,
|
|
47
|
+
edges,
|
|
48
|
+
extractorVersion: EXTRACTOR_VERSION,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
export const EXTRACTOR_VERSION = '1.0.0';
|
|
2
|
+
|
|
3
|
+
export const NETWORK_MODULES = new Set(['requests', 'httpx', 'aiohttp', 'urllib']);
|
|
4
|
+
export const NETWORK_METHODS = new Set([
|
|
5
|
+
'get',
|
|
6
|
+
'post',
|
|
7
|
+
'put',
|
|
8
|
+
'patch',
|
|
9
|
+
'delete',
|
|
10
|
+
'head',
|
|
11
|
+
'options',
|
|
12
|
+
'request',
|
|
13
|
+
'fetch',
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
export const DB_MODULES = new Set(['psycopg2', 'asyncpg', 'pymongo', 'sqlalchemy', 'django']);
|
|
17
|
+
export const DB_METHODS = new Set([
|
|
18
|
+
'execute',
|
|
19
|
+
'executemany',
|
|
20
|
+
'fetchone',
|
|
21
|
+
'fetchall',
|
|
22
|
+
'fetchmany',
|
|
23
|
+
'query',
|
|
24
|
+
'find',
|
|
25
|
+
'find_one',
|
|
26
|
+
'insert_one',
|
|
27
|
+
'insert_many',
|
|
28
|
+
'update_one',
|
|
29
|
+
'delete_one',
|
|
30
|
+
]);
|
|
31
|
+
|
|
32
|
+
export const _FS_FUNCTIONS = new Set(['open', 'read', 'write', 'readlines', 'writelines']);
|
|
33
|
+
|
|
34
|
+
export const PY_API_ERROR_STATUS_CODES = new Set([401, 403, 404, 422, 500]);
|
|
35
|
+
export const PY_API_SUCCESS_STATUS_CODES = new Set([200, 201, 202, 204, 206]);
|
|
36
|
+
// FastAPI's documented default success status is 200, regardless of HTTP method
|
|
37
|
+
// (Codex plan-review #1, FastAPI docs:
|
|
38
|
+
// https://fastapi.tiangolo.com/tutorial/response-status-code/). 201 for POST is
|
|
39
|
+
// a per-route opt-in via `status_code=201`, not a method-derived default.
|
|
40
|
+
export const FASTAPI_DEFAULT_SUCCESS_STATUS = 200;
|
|
41
|
+
// Pagination anchor families — mirror the TS classification in
|
|
42
|
+
// `packages/review/src/concept-rules/cross-stack-utils.ts`. The size keys
|
|
43
|
+
// (`limit`, `take`, `page_size`, `per_page`) are intentionally NOT anchors
|
|
44
|
+
// — they're compatible with either offset or cursor pagination.
|
|
45
|
+
export const PY_PAGE_ANCHORS = new Set(['page', 'page_number', 'pageNumber']);
|
|
46
|
+
export const PY_OFFSET_ANCHORS = new Set(['offset', 'skip']);
|
|
47
|
+
export const PY_CURSOR_ANCHORS = new Set(['cursor', 'after', 'before', 'next', 'previous']);
|
|
48
|
+
export const PY_PAGINATION_RE = /\b(limit|offset|skip|cursor|page|page_size|per_page)\b|\.limit\s*\(/i;
|
|
49
|
+
export const PY_DB_COLLECTION_RE = /\.(find|all|fetchall|to_list|scalars)\s*\(|\bselect\s*\(/i;
|
|
50
|
+
export const PY_DB_WRITE_RE =
|
|
51
|
+
/\.(insert_one|insert_many|update_one|update_many|delete_one|delete_many|add|create|save|commit)\s*\(/i;
|
|
52
|
+
export const PY_IDEMPOTENCY_RE =
|
|
53
|
+
/\b(idempotency(?:[_-]?key)?|Idempotency-Key|transaction|unique|upsert|get_or_create|on_conflict)\b/i;
|
|
54
|
+
|
|
55
|
+
export const STDLIB_MODULES = new Set([
|
|
56
|
+
'os',
|
|
57
|
+
'sys',
|
|
58
|
+
'json',
|
|
59
|
+
're',
|
|
60
|
+
'math',
|
|
61
|
+
'datetime',
|
|
62
|
+
'time',
|
|
63
|
+
'logging',
|
|
64
|
+
'argparse',
|
|
65
|
+
'collections',
|
|
66
|
+
'itertools',
|
|
67
|
+
'functools',
|
|
68
|
+
'pathlib',
|
|
69
|
+
'shutil',
|
|
70
|
+
'subprocess',
|
|
71
|
+
'threading',
|
|
72
|
+
'multiprocessing',
|
|
73
|
+
'abc',
|
|
74
|
+
'typing',
|
|
75
|
+
'io',
|
|
76
|
+
'pickle',
|
|
77
|
+
'random',
|
|
78
|
+
'hashlib',
|
|
79
|
+
'hmac',
|
|
80
|
+
'base64',
|
|
81
|
+
'csv',
|
|
82
|
+
'sqlite3',
|
|
83
|
+
'zlib',
|
|
84
|
+
'gzip',
|
|
85
|
+
'tarfile',
|
|
86
|
+
'zipfile',
|
|
87
|
+
'enum',
|
|
88
|
+
'struct',
|
|
89
|
+
'tempfile',
|
|
90
|
+
'unittest',
|
|
91
|
+
'urllib',
|
|
92
|
+
'uuid',
|
|
93
|
+
'xml',
|
|
94
|
+
]);
|