@kernlang/review-python 3.4.4 → 3.4.5-canary.15.1.9efc3d4b
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mapper.js +193 -6
- package/package.json +3 -3
- package/src/mapper.ts +205 -10
package/dist/mapper.js
CHANGED
|
@@ -315,6 +315,7 @@ function extractEntrypoints(root, source, filePath, nodes) {
|
|
|
315
315
|
hasBodyValidation: routeAnalysis.hasBodyValidation,
|
|
316
316
|
validatedBodyFields: routeAnalysis.validatedBodyFields,
|
|
317
317
|
bodyValidationResolved: routeAnalysis.bodyValidationResolved,
|
|
318
|
+
validatedBodyFieldTypes: routeAnalysis.validatedBodyFieldTypes,
|
|
318
319
|
},
|
|
319
320
|
});
|
|
320
321
|
}
|
|
@@ -513,6 +514,7 @@ function analyzePythonRoute(fnDef, source, method, routePath, responseModel, pyd
|
|
|
513
514
|
hasBodyValidation: validation.has,
|
|
514
515
|
validatedBodyFields: validation.fields,
|
|
515
516
|
bodyValidationResolved: validation.resolved,
|
|
517
|
+
validatedBodyFieldTypes: validation.types,
|
|
516
518
|
};
|
|
517
519
|
}
|
|
518
520
|
function extractPythonHttpExceptionStatusCodes(text) {
|
|
@@ -552,37 +554,222 @@ function collectPydanticModels(source) {
|
|
|
552
554
|
const nextTopLevel = rest.search(/\n\S/);
|
|
553
555
|
const body = nextTopLevel === -1 ? rest : rest.slice(0, nextTopLevel);
|
|
554
556
|
const fields = [];
|
|
555
|
-
const
|
|
557
|
+
const types = {};
|
|
558
|
+
// Capture annotations alongside names. The annotation runs until either
|
|
559
|
+
// an `=` (default value) or end-of-line / inline comment. Multiline
|
|
560
|
+
// annotations (`x: Annotated[\n str, Field(...)\n]`) are not handled —
|
|
561
|
+
// false-negative on the type tag, never false-positive.
|
|
562
|
+
const fieldRe = /^[ \t]+([A-Za-z_]\w*)[ \t]*:[ \t]*([^=#\n]+?)(?:[ \t]*=[^\n]*|[ \t]*#[^\n]*)?$/gm;
|
|
556
563
|
for (const fieldMatch of body.matchAll(fieldRe)) {
|
|
557
564
|
const field = fieldMatch[1];
|
|
558
565
|
if (field === 'model_config' || field === 'Config')
|
|
559
566
|
continue;
|
|
560
567
|
fields.push(field);
|
|
568
|
+
const annotation = fieldMatch[2].trim();
|
|
569
|
+
types[field] = coarsenPythonTypeAnnotation(annotation);
|
|
570
|
+
}
|
|
571
|
+
if (fields.length > 0) {
|
|
572
|
+
models.set(name, { fields: fields.sort(), types: Object.freeze({ ...types }) });
|
|
561
573
|
}
|
|
562
|
-
if (fields.length > 0)
|
|
563
|
-
models.set(name, fields.sort());
|
|
564
574
|
}
|
|
565
575
|
return models;
|
|
566
576
|
}
|
|
577
|
+
// Split a type-annotation string at top-level commas / pipes — respecting
|
|
578
|
+
// nested `[...]` brackets — so `Union[A, B[C, D]]` splits into `[A, B[C, D]]`
|
|
579
|
+
// not `[A, B[C, D]]`.
|
|
580
|
+
function splitTopLevelTypeArgs(s, delim) {
|
|
581
|
+
const parts = [];
|
|
582
|
+
let depth = 0;
|
|
583
|
+
let cur = '';
|
|
584
|
+
for (let i = 0; i < s.length; i++) {
|
|
585
|
+
const c = s[i];
|
|
586
|
+
if (c === '[' || c === '(')
|
|
587
|
+
depth++;
|
|
588
|
+
else if (c === ']' || c === ')')
|
|
589
|
+
depth--;
|
|
590
|
+
else if (c === delim && depth === 0) {
|
|
591
|
+
parts.push(cur.trim());
|
|
592
|
+
cur = '';
|
|
593
|
+
continue;
|
|
594
|
+
}
|
|
595
|
+
cur += c;
|
|
596
|
+
}
|
|
597
|
+
if (cur.trim())
|
|
598
|
+
parts.push(cur.trim());
|
|
599
|
+
return parts;
|
|
600
|
+
}
|
|
601
|
+
// Coarsen a Pydantic field type annotation to the same FieldTypeTag union
|
|
602
|
+
// the TS mapper uses, so cross-stack rules can compare client TS types
|
|
603
|
+
// against server Pydantic types symmetrically. Handles the common shapes:
|
|
604
|
+
//
|
|
605
|
+
// str / int / float / bool / None / Decimal / UUID / EmailStr
|
|
606
|
+
// Optional[T] / Annotated[T, ...] → coarsen T (drop wrapper)
|
|
607
|
+
// Union[A, B] / `A | B` (PEP 604) → only stable if all agree
|
|
608
|
+
// List[T] / list[T] / Sequence[T] / Tuple[...] → 'array'
|
|
609
|
+
// Dict[K, V] / dict[K, V] / Mapping[K, V] → 'object'
|
|
610
|
+
// Literal['admin'] / Literal[1] / Literal[True] → primitive of literal
|
|
611
|
+
// <CapitalIdent> → 'object' (BaseModel sub)
|
|
612
|
+
//
|
|
613
|
+
// Anything we don't recognise → 'unknown'. Conservative on purpose:
|
|
614
|
+
// /type rules skip 'unknown' tags.
|
|
615
|
+
function coarsenPythonTypeAnnotation(ann) {
|
|
616
|
+
const t = ann.trim();
|
|
617
|
+
if (t === '')
|
|
618
|
+
return 'unknown';
|
|
619
|
+
// Optional[T] / typing.Optional[T] — strip and recurse.
|
|
620
|
+
const optMatch = t.match(/^(?:typing\.)?Optional\[([\s\S]+)\]$/);
|
|
621
|
+
if (optMatch)
|
|
622
|
+
return coarsenPythonTypeAnnotation(optMatch[1]);
|
|
623
|
+
// Annotated[T, ...] — first arg is the underlying type.
|
|
624
|
+
const annoMatch = t.match(/^(?:typing\.)?Annotated\[([\s\S]+)\]$/);
|
|
625
|
+
if (annoMatch) {
|
|
626
|
+
const parts = splitTopLevelTypeArgs(annoMatch[1], ',');
|
|
627
|
+
if (parts.length >= 1)
|
|
628
|
+
return coarsenPythonTypeAnnotation(parts[0]);
|
|
629
|
+
return 'unknown';
|
|
630
|
+
}
|
|
631
|
+
// Union[A, B, ...] — only stable if every non-null branch agrees.
|
|
632
|
+
// ANY 'unknown' branch poisons the result.
|
|
633
|
+
const unionMatch = t.match(/^(?:typing\.)?Union\[([\s\S]+)\]$/);
|
|
634
|
+
if (unionMatch) {
|
|
635
|
+
return coarsenUnionParts(splitTopLevelTypeArgs(unionMatch[1], ','));
|
|
636
|
+
}
|
|
637
|
+
// PEP 604 `int | None | str`. Only treat `|` as a union separator when
|
|
638
|
+
// it appears OUTSIDE of any `[...]` — otherwise `Dict[str, int | None]`
|
|
639
|
+
// would be split incorrectly.
|
|
640
|
+
if (containsTopLevelChar(t, '|')) {
|
|
641
|
+
return coarsenUnionParts(splitTopLevelTypeArgs(t, '|'));
|
|
642
|
+
}
|
|
643
|
+
// Container types — coarsen to wire shape.
|
|
644
|
+
if (/^(?:typing\.)?(?:List|list|Sequence|Iterable|Tuple|tuple|Set|set|FrozenSet|frozenset)\[/.test(t))
|
|
645
|
+
return 'array';
|
|
646
|
+
if (/^(?:typing\.)?(?:Dict|dict|Mapping|MutableMapping)\[/.test(t))
|
|
647
|
+
return 'object';
|
|
648
|
+
// Literal[X, Y, ...] — coarsen every literal arg, return the shared tag
|
|
649
|
+
// ONLY when all literals agree. Mixed-primitive literals like
|
|
650
|
+
// `Literal['a', 1]` accept either string or number on the wire, so
|
|
651
|
+
// tagging it 'string' (first-only) would FP-flag a number client.
|
|
652
|
+
// OpenCode caught this in the v1 review.
|
|
653
|
+
const litMatch = t.match(/^(?:typing\.)?Literal\[([\s\S]+)\]$/);
|
|
654
|
+
if (litMatch) {
|
|
655
|
+
const parts = splitTopLevelTypeArgs(litMatch[1], ',');
|
|
656
|
+
if (parts.length === 0)
|
|
657
|
+
return 'unknown';
|
|
658
|
+
const tags = parts.map((p) => coarsenLiteralValue(p.trim()));
|
|
659
|
+
if (tags.includes('unknown'))
|
|
660
|
+
return 'unknown';
|
|
661
|
+
const set = new Set(tags);
|
|
662
|
+
return set.size === 1 ? [...set][0] : 'unknown';
|
|
663
|
+
}
|
|
664
|
+
// Plain primitives + common Pydantic-string newtypes. `bytes` intentionally
|
|
665
|
+
// stays 'unknown' — it's binary on the wire and not a JSON primitive.
|
|
666
|
+
switch (t) {
|
|
667
|
+
case 'str':
|
|
668
|
+
case 'EmailStr':
|
|
669
|
+
case 'HttpUrl':
|
|
670
|
+
case 'AnyUrl':
|
|
671
|
+
case 'AnyHttpUrl':
|
|
672
|
+
case 'UUID':
|
|
673
|
+
case 'UUID1':
|
|
674
|
+
case 'UUID3':
|
|
675
|
+
case 'UUID4':
|
|
676
|
+
case 'UUID5':
|
|
677
|
+
case 'SecretStr':
|
|
678
|
+
return 'string';
|
|
679
|
+
case 'int':
|
|
680
|
+
case 'float':
|
|
681
|
+
case 'Decimal':
|
|
682
|
+
case 'PositiveInt':
|
|
683
|
+
case 'NegativeInt':
|
|
684
|
+
case 'NonNegativeInt':
|
|
685
|
+
case 'NonPositiveInt':
|
|
686
|
+
case 'PositiveFloat':
|
|
687
|
+
case 'NegativeFloat':
|
|
688
|
+
return 'number';
|
|
689
|
+
case 'bool':
|
|
690
|
+
case 'StrictBool':
|
|
691
|
+
return 'boolean';
|
|
692
|
+
case 'None':
|
|
693
|
+
case 'NoneType':
|
|
694
|
+
return 'null';
|
|
695
|
+
}
|
|
696
|
+
// Capitalized bare identifier could be:
|
|
697
|
+
// - A nested BaseModel ('object' on the wire)
|
|
698
|
+
// - A `class Status(str, Enum)` ('string' on the wire)
|
|
699
|
+
// - A `Status = Literal['a','b']` type alias ('string' on the wire)
|
|
700
|
+
// - A custom newtype like StrictStr / IPvAnyAddress
|
|
701
|
+
// We can't disambiguate without symbol resolution. Tagging 'object'
|
|
702
|
+
// FP'd Enum/Literal aliases against string clients (Codex flag); tag
|
|
703
|
+
// 'unknown' instead — the rule will skip and we trade FN for FP.
|
|
704
|
+
if (/^[A-Z][\w]*$/.test(t))
|
|
705
|
+
return 'unknown';
|
|
706
|
+
return 'unknown';
|
|
707
|
+
}
|
|
708
|
+
// Coarsen a single literal-value source token (e.g. `'admin'`, `42`, `True`)
|
|
709
|
+
// to its primitive tag. Anything we don't recognise as one of the four JSON
|
|
710
|
+
// primitives → 'unknown'.
|
|
711
|
+
function coarsenLiteralValue(v) {
|
|
712
|
+
if (/^['"]/.test(v))
|
|
713
|
+
return 'string';
|
|
714
|
+
if (/^-?\d/.test(v))
|
|
715
|
+
return 'number';
|
|
716
|
+
if (v === 'True' || v === 'False')
|
|
717
|
+
return 'boolean';
|
|
718
|
+
if (v === 'None')
|
|
719
|
+
return 'null';
|
|
720
|
+
return 'unknown';
|
|
721
|
+
}
|
|
722
|
+
function coarsenUnionParts(parts) {
|
|
723
|
+
const tags = parts.map(coarsenPythonTypeAnnotation);
|
|
724
|
+
if (tags.includes('unknown'))
|
|
725
|
+
return 'unknown';
|
|
726
|
+
const noNull = tags.filter((tag) => tag !== 'null');
|
|
727
|
+
if (noNull.length === 0)
|
|
728
|
+
return 'null';
|
|
729
|
+
const set = new Set(noNull);
|
|
730
|
+
return set.size === 1 ? [...set][0] : 'unknown';
|
|
731
|
+
}
|
|
732
|
+
function containsTopLevelChar(s, ch) {
|
|
733
|
+
let depth = 0;
|
|
734
|
+
for (let i = 0; i < s.length; i++) {
|
|
735
|
+
const c = s[i];
|
|
736
|
+
if (c === '[' || c === '(')
|
|
737
|
+
depth++;
|
|
738
|
+
else if (c === ']' || c === ')')
|
|
739
|
+
depth--;
|
|
740
|
+
else if (c === ch && depth === 0)
|
|
741
|
+
return true;
|
|
742
|
+
}
|
|
743
|
+
return false;
|
|
744
|
+
}
|
|
567
745
|
function extractFastApiBodyValidation(fnDef, source, pydanticModels) {
|
|
568
746
|
const body = fnDef.childForFieldName('body') ?? fnDef.namedChildren.find((child) => child.type === 'block');
|
|
569
747
|
const headerEnd = body ? body.startIndex : fnDef.endIndex;
|
|
570
748
|
const header = source.substring(fnDef.startIndex, headerEnd);
|
|
571
749
|
const fields = new Set();
|
|
750
|
+
const types = {};
|
|
572
751
|
let has = false;
|
|
573
752
|
const annotationRe = /([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*)/g;
|
|
574
753
|
for (const match of header.matchAll(annotationRe)) {
|
|
575
|
-
const
|
|
576
|
-
if (!
|
|
754
|
+
const model = pydanticModels.get(match[2]);
|
|
755
|
+
if (!model)
|
|
577
756
|
continue;
|
|
578
757
|
has = true;
|
|
579
|
-
for (const field of
|
|
758
|
+
for (const field of model.fields)
|
|
580
759
|
fields.add(field);
|
|
760
|
+
for (const [name, tag] of Object.entries(model.types)) {
|
|
761
|
+
// Only record concrete tags. 'unknown' for a key would shadow a
|
|
762
|
+
// concrete tag from another model parameter on the same handler
|
|
763
|
+
// (rare, but multi-arg handlers do exist), so skip them.
|
|
764
|
+
if (tag !== 'unknown')
|
|
765
|
+
types[name] = tag;
|
|
766
|
+
}
|
|
581
767
|
}
|
|
582
768
|
return {
|
|
583
769
|
has,
|
|
584
770
|
fields: fields.size > 0 ? Array.from(fields).sort() : undefined,
|
|
585
771
|
resolved: fields.size > 0,
|
|
772
|
+
types: Object.keys(types).length > 0 ? Object.freeze({ ...types }) : undefined,
|
|
586
773
|
};
|
|
587
774
|
}
|
|
588
775
|
// ── state_mutation ───────────────────────────────────────────────────────
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kernlang/review-python",
|
|
3
|
-
"version": "3.4.
|
|
3
|
+
"version": "3.4.5-canary.15.1.9efc3d4b",
|
|
4
4
|
"description": "Python concept mapper for kern review — tree-sitter based",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
"dependencies": {
|
|
9
9
|
"tree-sitter": "^0.25.0",
|
|
10
10
|
"tree-sitter-python": "^0.25.0",
|
|
11
|
-
"@kernlang/core": "3.4.
|
|
12
|
-
"@kernlang/review": "3.4.
|
|
11
|
+
"@kernlang/core": "3.4.5-canary.15.1.9efc3d4b",
|
|
12
|
+
"@kernlang/review": "3.4.5-canary.15.1.9efc3d4b"
|
|
13
13
|
},
|
|
14
14
|
"devDependencies": {
|
|
15
15
|
"ts-morph": "^28.0.0",
|
package/src/mapper.ts
CHANGED
|
@@ -35,6 +35,14 @@ const DB_METHODS = new Set([
|
|
|
35
35
|
|
|
36
36
|
const _FS_FUNCTIONS = new Set(['open', 'read', 'write', 'readlines', 'writelines']);
|
|
37
37
|
|
|
38
|
+
type FieldTypeTag = 'string' | 'number' | 'boolean' | 'null' | 'object' | 'array' | 'unknown';
|
|
39
|
+
type FieldTypeMap = Readonly<Record<string, FieldTypeTag>>;
|
|
40
|
+
|
|
41
|
+
interface PydanticModel {
|
|
42
|
+
fields: readonly string[];
|
|
43
|
+
types: FieldTypeMap;
|
|
44
|
+
}
|
|
45
|
+
|
|
38
46
|
interface PythonRouteAnalysis {
|
|
39
47
|
errorStatusCodes?: readonly number[];
|
|
40
48
|
hasUnboundedCollectionQuery?: boolean;
|
|
@@ -43,6 +51,7 @@ interface PythonRouteAnalysis {
|
|
|
43
51
|
hasBodyValidation?: boolean;
|
|
44
52
|
validatedBodyFields?: readonly string[];
|
|
45
53
|
bodyValidationResolved?: boolean;
|
|
54
|
+
validatedBodyFieldTypes?: FieldTypeMap;
|
|
46
55
|
}
|
|
47
56
|
|
|
48
57
|
const PY_API_ERROR_STATUS_CODES = new Set([401, 403, 404, 422, 500]);
|
|
@@ -372,6 +381,7 @@ function extractEntrypoints(root: Parser.SyntaxNode, source: string, filePath: s
|
|
|
372
381
|
hasBodyValidation: routeAnalysis.hasBodyValidation,
|
|
373
382
|
validatedBodyFields: routeAnalysis.validatedBodyFields,
|
|
374
383
|
bodyValidationResolved: routeAnalysis.bodyValidationResolved,
|
|
384
|
+
validatedBodyFieldTypes: routeAnalysis.validatedBodyFieldTypes,
|
|
375
385
|
},
|
|
376
386
|
});
|
|
377
387
|
}
|
|
@@ -565,7 +575,7 @@ function analyzePythonRoute(
|
|
|
565
575
|
method: string,
|
|
566
576
|
routePath: string,
|
|
567
577
|
responseModel: string | undefined,
|
|
568
|
-
pydanticModels: ReadonlyMap<string,
|
|
578
|
+
pydanticModels: ReadonlyMap<string, PydanticModel>,
|
|
569
579
|
): PythonRouteAnalysis {
|
|
570
580
|
const text = source.substring(fnDef.startIndex, fnDef.endIndex);
|
|
571
581
|
const validation = extractFastApiBodyValidation(fnDef, source, pydanticModels);
|
|
@@ -577,6 +587,7 @@ function analyzePythonRoute(
|
|
|
577
587
|
hasBodyValidation: validation.has,
|
|
578
588
|
validatedBodyFields: validation.fields,
|
|
579
589
|
bodyValidationResolved: validation.resolved,
|
|
590
|
+
validatedBodyFieldTypes: validation.types,
|
|
580
591
|
};
|
|
581
592
|
}
|
|
582
593
|
|
|
@@ -611,8 +622,8 @@ function hasUnboundedPythonCollectionQuery(
|
|
|
611
622
|
);
|
|
612
623
|
}
|
|
613
624
|
|
|
614
|
-
function collectPydanticModels(source: string): Map<string,
|
|
615
|
-
const models = new Map<string,
|
|
625
|
+
function collectPydanticModels(source: string): Map<string, PydanticModel> {
|
|
626
|
+
const models = new Map<string, PydanticModel>();
|
|
616
627
|
const classRe = /^class\s+([A-Za-z_]\w*)\s*\([^)]*BaseModel[^)]*\)\s*:/gm;
|
|
617
628
|
for (const match of source.matchAll(classRe)) {
|
|
618
629
|
const name = match[1];
|
|
@@ -621,38 +632,222 @@ function collectPydanticModels(source: string): Map<string, readonly string[]> {
|
|
|
621
632
|
const nextTopLevel = rest.search(/\n\S/);
|
|
622
633
|
const body = nextTopLevel === -1 ? rest : rest.slice(0, nextTopLevel);
|
|
623
634
|
const fields: string[] = [];
|
|
624
|
-
const
|
|
635
|
+
const types: Record<string, FieldTypeTag> = {};
|
|
636
|
+
// Capture annotations alongside names. The annotation runs until either
|
|
637
|
+
// an `=` (default value) or end-of-line / inline comment. Multiline
|
|
638
|
+
// annotations (`x: Annotated[\n str, Field(...)\n]`) are not handled —
|
|
639
|
+
// false-negative on the type tag, never false-positive.
|
|
640
|
+
const fieldRe = /^[ \t]+([A-Za-z_]\w*)[ \t]*:[ \t]*([^=#\n]+?)(?:[ \t]*=[^\n]*|[ \t]*#[^\n]*)?$/gm;
|
|
625
641
|
for (const fieldMatch of body.matchAll(fieldRe)) {
|
|
626
642
|
const field = fieldMatch[1];
|
|
627
643
|
if (field === 'model_config' || field === 'Config') continue;
|
|
628
644
|
fields.push(field);
|
|
645
|
+
const annotation = fieldMatch[2].trim();
|
|
646
|
+
types[field] = coarsenPythonTypeAnnotation(annotation);
|
|
647
|
+
}
|
|
648
|
+
if (fields.length > 0) {
|
|
649
|
+
models.set(name, { fields: fields.sort(), types: Object.freeze({ ...types }) });
|
|
629
650
|
}
|
|
630
|
-
if (fields.length > 0) models.set(name, fields.sort());
|
|
631
651
|
}
|
|
632
652
|
return models;
|
|
633
653
|
}
|
|
634
654
|
|
|
655
|
+
// Split a type-annotation string at top-level commas / pipes — respecting
|
|
656
|
+
// nested `[...]` brackets — so `Union[A, B[C, D]]` splits into `[A, B[C, D]]`
|
|
657
|
+
// not `[A, B[C, D]]`.
|
|
658
|
+
function splitTopLevelTypeArgs(s: string, delim: ',' | '|'): string[] {
|
|
659
|
+
const parts: string[] = [];
|
|
660
|
+
let depth = 0;
|
|
661
|
+
let cur = '';
|
|
662
|
+
for (let i = 0; i < s.length; i++) {
|
|
663
|
+
const c = s[i];
|
|
664
|
+
if (c === '[' || c === '(') depth++;
|
|
665
|
+
else if (c === ']' || c === ')') depth--;
|
|
666
|
+
else if (c === delim && depth === 0) {
|
|
667
|
+
parts.push(cur.trim());
|
|
668
|
+
cur = '';
|
|
669
|
+
continue;
|
|
670
|
+
}
|
|
671
|
+
cur += c;
|
|
672
|
+
}
|
|
673
|
+
if (cur.trim()) parts.push(cur.trim());
|
|
674
|
+
return parts;
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
// Coarsen a Pydantic field type annotation to the same FieldTypeTag union
|
|
678
|
+
// the TS mapper uses, so cross-stack rules can compare client TS types
|
|
679
|
+
// against server Pydantic types symmetrically. Handles the common shapes:
|
|
680
|
+
//
|
|
681
|
+
// str / int / float / bool / None / Decimal / UUID / EmailStr
|
|
682
|
+
// Optional[T] / Annotated[T, ...] → coarsen T (drop wrapper)
|
|
683
|
+
// Union[A, B] / `A | B` (PEP 604) → only stable if all agree
|
|
684
|
+
// List[T] / list[T] / Sequence[T] / Tuple[...] → 'array'
|
|
685
|
+
// Dict[K, V] / dict[K, V] / Mapping[K, V] → 'object'
|
|
686
|
+
// Literal['admin'] / Literal[1] / Literal[True] → primitive of literal
|
|
687
|
+
// <CapitalIdent> → 'object' (BaseModel sub)
|
|
688
|
+
//
|
|
689
|
+
// Anything we don't recognise → 'unknown'. Conservative on purpose:
|
|
690
|
+
// /type rules skip 'unknown' tags.
|
|
691
|
+
function coarsenPythonTypeAnnotation(ann: string): FieldTypeTag {
|
|
692
|
+
const t = ann.trim();
|
|
693
|
+
if (t === '') return 'unknown';
|
|
694
|
+
|
|
695
|
+
// Optional[T] / typing.Optional[T] — strip and recurse.
|
|
696
|
+
const optMatch = t.match(/^(?:typing\.)?Optional\[([\s\S]+)\]$/);
|
|
697
|
+
if (optMatch) return coarsenPythonTypeAnnotation(optMatch[1]);
|
|
698
|
+
|
|
699
|
+
// Annotated[T, ...] — first arg is the underlying type.
|
|
700
|
+
const annoMatch = t.match(/^(?:typing\.)?Annotated\[([\s\S]+)\]$/);
|
|
701
|
+
if (annoMatch) {
|
|
702
|
+
const parts = splitTopLevelTypeArgs(annoMatch[1], ',');
|
|
703
|
+
if (parts.length >= 1) return coarsenPythonTypeAnnotation(parts[0]);
|
|
704
|
+
return 'unknown';
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Union[A, B, ...] — only stable if every non-null branch agrees.
|
|
708
|
+
// ANY 'unknown' branch poisons the result.
|
|
709
|
+
const unionMatch = t.match(/^(?:typing\.)?Union\[([\s\S]+)\]$/);
|
|
710
|
+
if (unionMatch) {
|
|
711
|
+
return coarsenUnionParts(splitTopLevelTypeArgs(unionMatch[1], ','));
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
// PEP 604 `int | None | str`. Only treat `|` as a union separator when
|
|
715
|
+
// it appears OUTSIDE of any `[...]` — otherwise `Dict[str, int | None]`
|
|
716
|
+
// would be split incorrectly.
|
|
717
|
+
if (containsTopLevelChar(t, '|')) {
|
|
718
|
+
return coarsenUnionParts(splitTopLevelTypeArgs(t, '|'));
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Container types — coarsen to wire shape.
|
|
722
|
+
if (/^(?:typing\.)?(?:List|list|Sequence|Iterable|Tuple|tuple|Set|set|FrozenSet|frozenset)\[/.test(t)) return 'array';
|
|
723
|
+
if (/^(?:typing\.)?(?:Dict|dict|Mapping|MutableMapping)\[/.test(t)) return 'object';
|
|
724
|
+
|
|
725
|
+
// Literal[X, Y, ...] — coarsen every literal arg, return the shared tag
|
|
726
|
+
// ONLY when all literals agree. Mixed-primitive literals like
|
|
727
|
+
// `Literal['a', 1]` accept either string or number on the wire, so
|
|
728
|
+
// tagging it 'string' (first-only) would FP-flag a number client.
|
|
729
|
+
// OpenCode caught this in the v1 review.
|
|
730
|
+
const litMatch = t.match(/^(?:typing\.)?Literal\[([\s\S]+)\]$/);
|
|
731
|
+
if (litMatch) {
|
|
732
|
+
const parts = splitTopLevelTypeArgs(litMatch[1], ',');
|
|
733
|
+
if (parts.length === 0) return 'unknown';
|
|
734
|
+
const tags = parts.map((p) => coarsenLiteralValue(p.trim()));
|
|
735
|
+
if (tags.includes('unknown')) return 'unknown';
|
|
736
|
+
const set = new Set(tags);
|
|
737
|
+
return set.size === 1 ? [...set][0] : 'unknown';
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// Plain primitives + common Pydantic-string newtypes. `bytes` intentionally
|
|
741
|
+
// stays 'unknown' — it's binary on the wire and not a JSON primitive.
|
|
742
|
+
switch (t) {
|
|
743
|
+
case 'str':
|
|
744
|
+
case 'EmailStr':
|
|
745
|
+
case 'HttpUrl':
|
|
746
|
+
case 'AnyUrl':
|
|
747
|
+
case 'AnyHttpUrl':
|
|
748
|
+
case 'UUID':
|
|
749
|
+
case 'UUID1':
|
|
750
|
+
case 'UUID3':
|
|
751
|
+
case 'UUID4':
|
|
752
|
+
case 'UUID5':
|
|
753
|
+
case 'SecretStr':
|
|
754
|
+
return 'string';
|
|
755
|
+
case 'int':
|
|
756
|
+
case 'float':
|
|
757
|
+
case 'Decimal':
|
|
758
|
+
case 'PositiveInt':
|
|
759
|
+
case 'NegativeInt':
|
|
760
|
+
case 'NonNegativeInt':
|
|
761
|
+
case 'NonPositiveInt':
|
|
762
|
+
case 'PositiveFloat':
|
|
763
|
+
case 'NegativeFloat':
|
|
764
|
+
return 'number';
|
|
765
|
+
case 'bool':
|
|
766
|
+
case 'StrictBool':
|
|
767
|
+
return 'boolean';
|
|
768
|
+
case 'None':
|
|
769
|
+
case 'NoneType':
|
|
770
|
+
return 'null';
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
// Capitalized bare identifier could be:
|
|
774
|
+
// - A nested BaseModel ('object' on the wire)
|
|
775
|
+
// - A `class Status(str, Enum)` ('string' on the wire)
|
|
776
|
+
// - A `Status = Literal['a','b']` type alias ('string' on the wire)
|
|
777
|
+
// - A custom newtype like StrictStr / IPvAnyAddress
|
|
778
|
+
// We can't disambiguate without symbol resolution. Tagging 'object'
|
|
779
|
+
// FP'd Enum/Literal aliases against string clients (Codex flag); tag
|
|
780
|
+
// 'unknown' instead — the rule will skip and we trade FN for FP.
|
|
781
|
+
if (/^[A-Z][\w]*$/.test(t)) return 'unknown';
|
|
782
|
+
|
|
783
|
+
return 'unknown';
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
// Coarsen a single literal-value source token (e.g. `'admin'`, `42`, `True`)
|
|
787
|
+
// to its primitive tag. Anything we don't recognise as one of the four JSON
|
|
788
|
+
// primitives → 'unknown'.
|
|
789
|
+
function coarsenLiteralValue(v: string): FieldTypeTag {
|
|
790
|
+
if (/^['"]/.test(v)) return 'string';
|
|
791
|
+
if (/^-?\d/.test(v)) return 'number';
|
|
792
|
+
if (v === 'True' || v === 'False') return 'boolean';
|
|
793
|
+
if (v === 'None') return 'null';
|
|
794
|
+
return 'unknown';
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
function coarsenUnionParts(parts: readonly string[]): FieldTypeTag {
|
|
798
|
+
const tags = parts.map(coarsenPythonTypeAnnotation);
|
|
799
|
+
if (tags.includes('unknown')) return 'unknown';
|
|
800
|
+
const noNull = tags.filter((tag) => tag !== 'null');
|
|
801
|
+
if (noNull.length === 0) return 'null';
|
|
802
|
+
const set = new Set(noNull);
|
|
803
|
+
return set.size === 1 ? [...set][0] : 'unknown';
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
function containsTopLevelChar(s: string, ch: string): boolean {
|
|
807
|
+
let depth = 0;
|
|
808
|
+
for (let i = 0; i < s.length; i++) {
|
|
809
|
+
const c = s[i];
|
|
810
|
+
if (c === '[' || c === '(') depth++;
|
|
811
|
+
else if (c === ']' || c === ')') depth--;
|
|
812
|
+
else if (c === ch && depth === 0) return true;
|
|
813
|
+
}
|
|
814
|
+
return false;
|
|
815
|
+
}
|
|
816
|
+
|
|
635
817
|
function extractFastApiBodyValidation(
|
|
636
818
|
fnDef: Parser.SyntaxNode,
|
|
637
819
|
source: string,
|
|
638
|
-
pydanticModels: ReadonlyMap<string,
|
|
639
|
-
): {
|
|
820
|
+
pydanticModels: ReadonlyMap<string, PydanticModel>,
|
|
821
|
+
): {
|
|
822
|
+
has: boolean;
|
|
823
|
+
fields: readonly string[] | undefined;
|
|
824
|
+
resolved: boolean;
|
|
825
|
+
types: FieldTypeMap | undefined;
|
|
826
|
+
} {
|
|
640
827
|
const body = fnDef.childForFieldName('body') ?? fnDef.namedChildren.find((child) => child.type === 'block');
|
|
641
828
|
const headerEnd = body ? body.startIndex : fnDef.endIndex;
|
|
642
829
|
const header = source.substring(fnDef.startIndex, headerEnd);
|
|
643
830
|
const fields = new Set<string>();
|
|
831
|
+
const types: Record<string, FieldTypeTag> = {};
|
|
644
832
|
let has = false;
|
|
645
833
|
const annotationRe = /([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*)/g;
|
|
646
834
|
for (const match of header.matchAll(annotationRe)) {
|
|
647
|
-
const
|
|
648
|
-
if (!
|
|
835
|
+
const model = pydanticModels.get(match[2]);
|
|
836
|
+
if (!model) continue;
|
|
649
837
|
has = true;
|
|
650
|
-
for (const field of
|
|
838
|
+
for (const field of model.fields) fields.add(field);
|
|
839
|
+
for (const [name, tag] of Object.entries(model.types)) {
|
|
840
|
+
// Only record concrete tags. 'unknown' for a key would shadow a
|
|
841
|
+
// concrete tag from another model parameter on the same handler
|
|
842
|
+
// (rare, but multi-arg handlers do exist), so skip them.
|
|
843
|
+
if (tag !== 'unknown') types[name] = tag;
|
|
844
|
+
}
|
|
651
845
|
}
|
|
652
846
|
return {
|
|
653
847
|
has,
|
|
654
848
|
fields: fields.size > 0 ? Array.from(fields).sort() : undefined,
|
|
655
849
|
resolved: fields.size > 0,
|
|
850
|
+
types: Object.keys(types).length > 0 ? Object.freeze({ ...types }) : undefined,
|
|
656
851
|
};
|
|
657
852
|
}
|
|
658
853
|
|