sarj-python-lint 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/PKG-INFO +1 -1
  2. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/pyproject.toml +1 -1
  3. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/__init__.py +2 -0
  4. sarj_python_lint-0.4.1/src/sarj_python_lint/rules/no_isinstance_union_chain.py +183 -0
  5. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/prefer_discriminated_union.py +37 -0
  6. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/prefer_str_enum.py +7 -15
  7. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/pydantic_at_boundaries.py +33 -24
  8. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/.gitignore +0 -0
  9. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/README.md +0 -0
  10. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/__init__.py +0 -0
  11. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/__main__.py +0 -0
  12. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/py.typed +0 -0
  13. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rule_base.py +0 -0
  14. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/inefficient_string_concat_in_loop.py +0 -0
  15. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/no_fat_try_blocks.py +0 -0
  16. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/no_secret_in_log.py +0 -0
  17. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/no_sentinel_return_on_except.py +0 -0
  18. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/no_sequential_await.py +0 -0
  19. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/no_unreachable_after_terminal.py +0 -0
  20. {sarj_python_lint-0.3.0 → sarj_python_lint-0.4.1}/src/sarj_python_lint/rules/prefer_constant_time_secret_compare.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sarj-python-lint
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: Custom Python lint rules — AST-based, pre-commit-friendly, hypermodern defaults
5
5
  Project-URL: Homepage, https://github.com/sarj-ai/standards/tree/main/packages/python
6
6
  Project-URL: Repository, https://github.com/sarj-ai/standards
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sarj-python-lint"
3
- version = "0.3.0"
3
+ version = "0.4.1"
4
4
  description = "Custom Python lint rules — AST-based, pre-commit-friendly, hypermodern defaults"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "sarj-ai" }]
@@ -5,6 +5,7 @@ from sarj_python_lint.rules.inefficient_string_concat_in_loop import (
5
5
  InefficientStringConcatInLoop,
6
6
  )
7
7
  from sarj_python_lint.rules.no_fat_try_blocks import NoFatTryBlocks
8
+ from sarj_python_lint.rules.no_isinstance_union_chain import NoIsinstanceUnionChain
8
9
  from sarj_python_lint.rules.no_secret_in_log import NoSecretInLog
9
10
  from sarj_python_lint.rules.no_sentinel_return_on_except import NoSentinelReturnOnExcept
10
11
  from sarj_python_lint.rules.no_sequential_await import NoSequentialAwait
@@ -25,6 +26,7 @@ REGISTRY: dict[str, type[Rule]] = {
25
26
  PreferDiscriminatedUnion.id: PreferDiscriminatedUnion,
26
27
  PreferStrEnum.id: PreferStrEnum,
27
28
  NoFatTryBlocks.id: NoFatTryBlocks,
29
+ NoIsinstanceUnionChain.id: NoIsinstanceUnionChain,
28
30
  PydanticAtBoundaries.id: PydanticAtBoundaries,
29
31
  NoSentinelReturnOnExcept.id: NoSentinelReturnOnExcept,
30
32
  NoUnreachableAfterTerminal.id: NoUnreachableAfterTerminal,
@@ -0,0 +1,183 @@
1
+ """SARJ003: flag `if/elif isinstance(...)` chains that dispatch over a closed union.
2
+
3
+ A chain of `if isinstance(x, A): ... elif isinstance(x, B): ...` (2+ branches, same
4
+ target, each branch testing one locally-defined class) is almost always dispatch over a
5
+ closed discriminated union. `match`/`case` with `assert_never` in the fallthrough is
6
+ strictly better: pyright reports an error the moment a new variant is added and a branch
7
+ is missed — a plain `isinstance` chain silently falls through.
8
+
9
+ # flagged
10
+ if isinstance(subject, ApiKeySubject):
11
+ ...
12
+ elif isinstance(subject, JwtSubject):
13
+ ...
14
+
15
+ # preferred
16
+ match subject:
17
+ case ApiKeySubject():
18
+ ...
19
+ case JwtSubject():
20
+ ...
21
+ case _:
22
+ assert_never(subject)
23
+
24
+ This is a heuristic, not a proof the union is closed — so it accepts some false positives.
25
+ Suppress a deliberate boundary chain with `# sarj-noqa: SARJ003 — <reason>`.
26
+
27
+ Deliberately NOT flagged (boundary/runtime checks, not closed-union dispatch):
28
+ - a single `isinstance` guard (no chain),
29
+ - `isinstance(x, (A, B))` tuple-membership (one check, not a dispatch chain),
30
+ - any chain whose branches test builtins/stdlib types (`dict`, `str`, `list`, `Exception`,
31
+ `datetime`, ...), the generated-SDK `Unset` sentinel, or `collections.abc`/`typing` ABCs,
32
+ - any chain mixing `isinstance` with a non-`isinstance` condition (e.g. `hasattr`, a
33
+ comparison, a boolean combination) — a defensive guard, not a clean dispatch.
34
+
35
+ References:
36
+ - https://docs.python.org/3/library/typing.html#typing.assert_never
37
+ - https://typing.python.org/en/latest/spec/narrowing.html#assert-never-and-exhaustiveness-checking
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import ast
43
+ from pathlib import Path
44
+
45
+ from sarj_python_lint.rule_base import Diagnostic, Rule
46
+
47
+ _EXCLUDED_TYPE_NAMES = frozenset(
48
+ {
49
+ "dict",
50
+ "str",
51
+ "list",
52
+ "tuple",
53
+ "set",
54
+ "frozenset",
55
+ "int",
56
+ "float",
57
+ "bool",
58
+ "complex",
59
+ "bytes",
60
+ "bytearray",
61
+ "type",
62
+ "object",
63
+ "Exception",
64
+ "BaseException",
65
+ "NoneType",
66
+ "Unset",
67
+ "datetime",
68
+ "date",
69
+ "time",
70
+ "timedelta",
71
+ "Mapping",
72
+ "MutableMapping",
73
+ "Sequence",
74
+ "MutableSequence",
75
+ "Iterable",
76
+ "Iterator",
77
+ "Collection",
78
+ "Container",
79
+ "Set",
80
+ "Hashable",
81
+ "Callable",
82
+ }
83
+ )
84
+
85
+
86
+ class NoIsinstanceUnionChain(Rule):
87
+ """`if/elif isinstance` chains over local classes — prefer match/case + assert_never."""
88
+
89
+ id = "no-isinstance-union-chain"
90
+ code = "SARJ003"
91
+ description = (
92
+ "if/elif isinstance chain over local classes — prefer match/case with "
93
+ "assert_never for compile-time exhaustiveness."
94
+ )
95
+
96
+ def check(self, path: Path, source: str) -> list[Diagnostic]:
97
+ try:
98
+ tree = ast.parse(source, filename=str(path))
99
+ except SyntaxError:
100
+ return []
101
+ elif_nodes = _collect_elif_nodes(tree)
102
+ diags: list[Diagnostic] = []
103
+ for node in ast.walk(tree):
104
+ if not isinstance(node, ast.If) or id(node) in elif_nodes:
105
+ continue
106
+ count = _qualifying_chain_length(node)
107
+ if count >= 2:
108
+ diags.append(
109
+ Diagnostic(
110
+ path=path,
111
+ line=node.lineno,
112
+ col=node.col_offset + 1,
113
+ code=self.code,
114
+ message=(
115
+ f"if/elif isinstance chain over {count} types — prefer "
116
+ "match/case with assert_never for exhaustiveness."
117
+ ),
118
+ )
119
+ )
120
+ return diags
121
+
122
+
123
+ def _collect_elif_nodes(tree: ast.AST) -> set[int]:
124
+ """ids of `If` nodes that are the sole `orelse` of another `If` (i.e. `elif` arms)."""
125
+ elifs: set[int] = set()
126
+ for node in ast.walk(tree):
127
+ if isinstance(node, ast.If) and len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If):
128
+ elifs.add(id(node.orelse[0]))
129
+ return elifs
130
+
131
+
132
+ def _qualifying_chain_length(head: ast.If) -> int:
133
+ """Number of branches if `head` is an all-`isinstance`-on-one-target chain, else 0.
134
+
135
+ Returns 0 if any branch is not `isinstance(<same target>, <single local class>)`.
136
+ """
137
+ target_dump: str | None = None
138
+ count = 0
139
+ current: ast.If | None = head
140
+ while current is not None:
141
+ type_arg = _isinstance_single_type(current.test)
142
+ if type_arg is None:
143
+ return 0
144
+ target, type_name = type_arg
145
+ if type_name in _EXCLUDED_TYPE_NAMES:
146
+ return 0
147
+ dumped = ast.dump(target)
148
+ if target_dump is None:
149
+ target_dump = dumped
150
+ elif dumped != target_dump:
151
+ return 0
152
+ count += 1
153
+ if len(current.orelse) == 1 and isinstance(current.orelse[0], ast.If):
154
+ current = current.orelse[0]
155
+ else:
156
+ current = None
157
+ return count
158
+
159
+
160
+ def _isinstance_single_type(test: ast.expr) -> tuple[ast.expr, str] | None:
161
+ """If `test` is `isinstance(x, SomeClass)` with a single Name/Attribute class, return
162
+ (target, class_name); else None. Tuple-form `isinstance(x, (A, B))` returns None."""
163
+ if not isinstance(test, ast.Call):
164
+ return None
165
+ if not (isinstance(test.func, ast.Name) and test.func.id == "isinstance"):
166
+ return None
167
+ if len(test.args) != 2 or test.keywords:
168
+ return None
169
+ target, type_node = test.args
170
+ name = _class_name(type_node)
171
+ if name is None:
172
+ return None
173
+ return target, name
174
+
175
+
176
+ def _class_name(node: ast.expr) -> str | None:
177
+ """The trailing name of a class reference: `Foo` / `mod.Foo` -> 'Foo'. None for tuples
178
+ or anything that isn't a plain Name/Attribute (e.g. a subscript or tuple-membership)."""
179
+ if isinstance(node, ast.Name):
180
+ return node.id
181
+ if isinstance(node, ast.Attribute):
182
+ return node.attr
183
+ return None
@@ -43,6 +43,10 @@ Three triggers:
43
43
  (`*Input` / `*Params` / `*Filter` / `*Query` / `Update*` / `Patch*` /
44
44
  `Upsert*`) are excluded from this trigger.
45
45
 
46
+ A single-value `Literal` tag (e.g. `type: Literal["complete"]`) marks a model
47
+ that is already an arm of a discriminated union, so it is excluded too — a
48
+ multi-value `Literal[...]` is still treated as a poor-man's discriminator.
49
+
46
50
  References:
47
51
  - https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions
48
52
  - https://en.wikipedia.org/wiki/Tagged_union
@@ -131,6 +135,7 @@ class PreferDiscriminatedUnion(Rule):
131
135
  if not (is_model or is_dc):
132
136
  return None
133
137
  has_status_bool = False
138
+ has_literal_tag = False
134
139
  optional_fields: list[str] = []
135
140
  discriminator_fields: list[str] = []
136
141
  for stmt in node.body:
@@ -146,6 +151,8 @@ class PreferDiscriminatedUnion(Rule):
146
151
  stmt.annotation, str_enum_names
147
152
  ):
148
153
  discriminator_fields.append(name)
154
+ if _is_single_value_literal(stmt.annotation):
155
+ has_literal_tag = True
149
156
  if _is_optional(stmt.annotation):
150
157
  if name not in IGNORED_OPTIONAL_FIELDS:
151
158
  optional_fields.append(name)
@@ -164,10 +171,13 @@ class PreferDiscriminatedUnion(Rule):
164
171
  ),
165
172
  )
166
173
  # Nullable-cluster trigger: discriminator-ish field + 3 or more nullables.
174
+ # A single-value `Literal` tag (e.g. `type: Literal["complete"]`) marks a
175
+ # model that is already a discriminated-union arm, not a poor-man's result.
167
176
  if (
168
177
  discriminator_fields
169
178
  and len(optional_fields) >= NULLABLE_CLUSTER_THRESHOLD
170
179
  and not _is_dto_class_name(node.name)
180
+ and not has_literal_tag
171
181
  ):
172
182
  return Diagnostic(
173
183
  path=path,
@@ -190,6 +200,33 @@ def _is_dto_class_name(name: str) -> bool:
190
200
  return name.endswith(DTO_CLASS_NAME_SUFFIXES) or name.startswith(DTO_CLASS_NAME_PREFIXES)
191
201
 
192
202
 
203
+ def _is_single_value_literal(node: ast.AST | None) -> bool:
204
+ """Detect a single-constant `Literal[X]` annotation.
205
+
206
+ A one-element `Literal` (e.g. `type: Literal["complete"]`) is the canonical
207
+ tag of a discriminated-union arm, so a model carrying one is already modelled
208
+ correctly. A multi-value `Literal[...]` is still a poor-man's discriminator.
209
+ """
210
+ if node is None:
211
+ return False
212
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
213
+ try:
214
+ parsed = ast.parse(node.value, mode="eval")
215
+ except SyntaxError:
216
+ return False
217
+ return _is_single_value_literal(parsed.body)
218
+ if not isinstance(node, ast.Subscript):
219
+ return False
220
+ if _get_name_flat(node.value).rsplit(".", 1)[-1] != "Literal":
221
+ return False
222
+ slice_node = node.slice
223
+ if type(slice_node).__name__ == "Index":
224
+ slice_node = getattr(slice_node, "value", slice_node)
225
+ if isinstance(slice_node, ast.Tuple):
226
+ return len(slice_node.elts) == 1
227
+ return True
228
+
229
+
193
230
  def _inherits_basemodel(node: ast.ClassDef) -> bool:
194
231
  for base in node.bases:
195
232
  if isinstance(base, ast.Name) and base.id == "BaseModel":
@@ -40,30 +40,22 @@ from pathlib import Path
40
40
  from sarj_python_lint.rule_base import Diagnostic, Rule
41
41
 
42
42
  #: Field / variable name tokens that strongly suggest a closed enumeration.
43
+ #: Kept deliberately HIGH-PRECISION — only words that are almost always a fixed
44
+ #: set. Broader/free-form-prone tokens (type, provider, level, mode, category,
45
+ #: channel, method, strategy, format, source, language, environment, …) were
46
+ #: removed: they over-fired on free-form strings. Those cases are still caught
47
+ #: when corroborated — via a sibling `choices`/`states` attribute or a
48
+ #: comparison cluster against literal values.
43
49
  CHOICE_NAME_TOKENS = frozenset(
44
50
  {
45
51
  "status",
46
52
  "state",
47
- "type",
48
53
  "kind",
49
- "provider",
50
- "language",
51
- "lang",
52
54
  "role",
53
55
  "priority",
54
- "level",
55
- "mode",
56
- "category",
56
+ "severity",
57
57
  "direction",
58
- "environment",
59
- "env",
60
58
  "tier",
61
- "severity",
62
- "channel",
63
- "method",
64
- "strategy",
65
- "format",
66
- "source",
67
59
  "stage",
68
60
  }
69
61
  )
@@ -27,15 +27,16 @@ Purely annotation-based (no type inference), checked on function definitions
27
27
  (sync + async):
28
28
 
29
29
  1. Return annotation that is ``dict[str, Any]`` / ``dict[str, object]`` /
30
- bare ``dict`` / ``Dict``, ``list[dict[str, Any]]``, or a ``tuple[...]``
31
- with 2+ distinct element types.
30
+ bare ``dict`` / ``Dict``, or ``list[dict[str, Any]]``.
32
31
  2. FastAPI route handlers (``@router.get(...)`` / ``@app.post(...)`` etc.)
33
32
  with no return annotation and no ``response_model=`` in the decorator.
34
33
 
35
- Not flagged: fully-concrete dict value types (``dict[str, str]``),
36
- homogeneous tuples (``tuple[int, ...]``, ``tuple[str, str]``), heterogeneous
37
- tuple returns from private (``_``-prefixed) non-route functions, ``@overload``
38
- stubs, and test files.
34
+ Deliberately NOT flagged (kept high-precision for real boundaries):
35
+ private / ``_``-prefixed functions (internal, not a public contract), pydantic
36
+ ``@model_validator`` / ``@field_validator`` hooks (raw dict in/out is their
37
+ API), ``tuple[...]`` returns (multiple return values are idiomatic Python),
38
+ fully-concrete dict value types (``dict[str, str]``), ``@overload`` stubs, and
39
+ test files.
39
40
 
40
41
  References:
41
42
  - https://docs.pydantic.dev/latest/concepts/models/
@@ -53,7 +54,6 @@ from sarj_python_lint.rule_base import Diagnostic, Rule
53
54
  _HTTP_METHODS = {"get", "post", "put", "patch", "delete"}
54
55
  _DICT_NAMES = {"dict", "Dict"}
55
56
  _LIST_NAMES = {"list", "List"}
56
- _TUPLE_NAMES = {"tuple", "Tuple"}
57
57
  _ANY_VALUE_NAMES = {"Any", "object"}
58
58
 
59
59
 
@@ -65,12 +65,12 @@ class _RouteInfo:
65
65
 
66
66
 
67
67
  class PydanticAtBoundaries(Rule):
68
- """Untyped dict / heterogeneous tuple return — define a pydantic model."""
68
+ """Untyped dict return at a public boundary — define a pydantic model."""
69
69
 
70
70
  id = "pydantic-at-boundaries"
71
71
  code = "SARJ008"
72
72
  description = (
73
- "Function returns an untyped dict or heterogeneous tuple — "
73
+ "Public function/route returns an untyped dict — "
74
74
  "define a pydantic model (or frozen dataclass)."
75
75
  )
76
76
 
@@ -87,6 +87,15 @@ class PydanticAtBoundaries(Rule):
87
87
  continue
88
88
  if _is_overload(node):
89
89
  continue
90
+ # Private/internal functions are not public boundaries — their
91
+ # return shape is an implementation detail, not a data contract.
92
+ if node.name.startswith("_"):
93
+ continue
94
+ # Pydantic validator hooks (`@model_validator`/`@field_validator`)
95
+ # take and return raw dict/values by contract — that's the API, not
96
+ # a missing model.
97
+ if _is_validator(node):
98
+ continue
90
99
  route = _route_info(node)
91
100
  returns = _resolve_annotation(node.returns)
92
101
  if returns is None:
@@ -108,10 +117,6 @@ class PydanticAtBoundaries(Rule):
108
117
  kind = _classify_return(returns)
109
118
  if kind is None:
110
119
  continue
111
- # Private functions returning heterogeneous tuples are common and
112
- # fine-ish; untyped dicts are flagged everywhere.
113
- if kind == "tuple" and route is None and node.name.startswith("_"):
114
- continue
115
120
  ann_text = ast.unparse(returns)
116
121
  diags.append(
117
122
  Diagnostic(
@@ -141,6 +146,19 @@ def _is_overload(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
141
146
  return False
142
147
 
143
148
 
149
+ _VALIDATOR_DECORATORS = {"model_validator", "field_validator", "validator", "root_validator"}
150
+
151
+
152
+ def _is_validator(node: ast.FunctionDef | ast.AsyncFunctionDef) -> bool:
153
+ """A pydantic validator hook — dict/value in-and-out is its required contract."""
154
+ for dec in node.decorator_list:
155
+ target = dec.func if isinstance(dec, ast.Call) else dec
156
+ name = _flat_name(target) if isinstance(target, (ast.Name, ast.Attribute)) else ""
157
+ if name in _VALIDATOR_DECORATORS:
158
+ return True
159
+ return False
160
+
161
+
144
162
  def _route_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> _RouteInfo | None:
145
163
  """Detect a FastAPI route decorator: `@<router|app|*_router>.<method>(...)`."""
146
164
  for dec in node.decorator_list:
@@ -199,8 +217,8 @@ def _classify_return(node: ast.expr) -> str | None:
199
217
  return "dict" if inner == "dict" else None
200
218
  if base in _DICT_NAMES:
201
219
  return "dict" if _is_untyped_dict_args(node.slice) else None
202
- if base in _TUPLE_NAMES:
203
- return "tuple" if _is_heterogeneous_tuple_args(node.slice) else None
220
+ # Heterogeneous tuple returns are NOT flagged — multiple return values are
221
+ # idiomatic Python, not a missing data contract.
204
222
  return None
205
223
 
206
224
 
@@ -211,15 +229,6 @@ def _is_untyped_dict_args(slice_node: ast.expr) -> bool:
211
229
  return _flat_name(slice_node.elts[1]) in _ANY_VALUE_NAMES
212
230
 
213
231
 
214
- def _is_heterogeneous_tuple_args(slice_node: ast.expr) -> bool:
215
- """`tuple[...]` is flagged when it has 2+ distinct element types."""
216
- if not isinstance(slice_node, ast.Tuple):
217
- return False # single-element `tuple[X]`
218
- # `tuple[X, ...]` is a homogeneous variadic tuple.
219
- if any(isinstance(elt, ast.Constant) and elt.value is Ellipsis for elt in slice_node.elts):
220
- return False
221
- distinct = {ast.unparse(elt) for elt in slice_node.elts}
222
- return len(distinct) >= 2
223
232
 
224
233
 
225
234
  def _flat_name(node: ast.expr) -> str: