sarj-python-lint 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/PKG-INFO +5 -1
  2. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/README.md +4 -0
  3. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/pyproject.toml +5 -2
  4. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/__main__.py +11 -4
  5. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rule_base.py +13 -6
  6. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/__init__.py +6 -0
  7. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/_logging.py +35 -0
  8. sarj_python_lint-0.5.0/src/sarj_python_lint/rules/__init__.py → sarj_python_lint-0.6.0/src/sarj_python_lint/rules/_registry.py +30 -1
  9. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/inefficient_string_concat_in_loop.py +48 -29
  10. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/no_aggregation_in_store_query.py +134 -0
  11. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/no_comment_cruft.py +252 -0
  12. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/no_fat_try_blocks.py +10 -4
  13. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/no_fstring_in_log.py +106 -0
  14. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/no_isinstance_union_chain.py +23 -10
  15. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/no_query_with_many_joins.py +110 -0
  16. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/no_secret_in_log.py +15 -22
  17. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/no_select_star.py +114 -0
  18. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/no_sentinel_return_on_except.py +14 -19
  19. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/no_sequential_await.py +94 -0
  20. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/no_unreachable_after_terminal.py +14 -10
  21. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/prefer_class_row.py +11 -6
  22. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/prefer_constant_time_secret_compare.py +11 -5
  23. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/prefer_discriminated_union.py +41 -23
  24. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/prefer_str_enum.py +13 -7
  25. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/prefer_struct_over_namedtuple.py +100 -0
  26. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/prefer_timedelta_for_durations.py +196 -0
  27. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/rules/pydantic_at_boundaries.py +13 -7
  28. sarj_python_lint-0.6.0/src/sarj_python_lint/rules/store_insert_requires_on_conflict.py +106 -0
  29. sarj_python_lint-0.5.0/src/sarj_python_lint/rules/no_sequential_await.py +0 -71
  30. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/.gitignore +0 -0
  31. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/__init__.py +0 -0
  32. {sarj_python_lint-0.5.0 → sarj_python_lint-0.6.0}/src/sarj_python_lint/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sarj-python-lint
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Custom Python lint rules — AST-based, pre-commit-friendly, hypermodern defaults
5
5
  Project-URL: Homepage, https://github.com/sarj-ai/standards/tree/main/packages/python
6
6
  Project-URL: Repository, https://github.com/sarj-ai/standards
@@ -37,6 +37,10 @@ uv tool install sarj-python-lint
37
37
  - id: sarj-no-fat-try-blocks
38
38
  - id: sarj-pydantic-at-boundaries
39
39
  - id: sarj-prefer-class-row
40
+ - id: sarj-prefer-timedelta-for-durations
41
+ - id: sarj-prefer-struct-over-namedtuple
42
+ - id: sarj-no-comment-cruft
43
+ - id: sarj-no-fstring-in-log
40
44
  ```
41
45
 
42
46
  ## CLI
@@ -19,6 +19,10 @@ uv tool install sarj-python-lint
19
19
  - id: sarj-no-fat-try-blocks
20
20
  - id: sarj-pydantic-at-boundaries
21
21
  - id: sarj-prefer-class-row
22
+ - id: sarj-prefer-timedelta-for-durations
23
+ - id: sarj-prefer-struct-over-namedtuple
24
+ - id: sarj-no-comment-cruft
25
+ - id: sarj-no-fstring-in-log
22
26
  ```
23
27
 
24
28
  ## CLI
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sarj-python-lint"
3
- version = "0.5.0"
3
+ version = "0.6.0"
4
4
  description = "Custom Python lint rules — AST-based, pre-commit-friendly, hypermodern defaults"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "sarj-ai" }]
@@ -27,7 +27,6 @@ Issues = "https://github.com/sarj-ai/standards/issues"
27
27
  [dependency-groups]
28
28
  dev = [
29
29
  "pytest>=9.0",
30
- "pytest-benchmark>=5.2",
31
30
  "ruff>=0.15",
32
31
  "basedpyright>=1.39",
33
32
  ]
@@ -53,3 +52,7 @@ exclude = [
53
52
 
54
53
  [tool.pytest.ini_options]
55
54
  testpaths = ["tests"]
55
+
56
+ # Dogfooding: linted/formatted by this repo's own published config (root-synced).
57
+ [tool.ruff]
58
+ extend = "../../.ruff-strict.toml"
@@ -2,8 +2,8 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import argparse
5
- import sys
6
5
  from pathlib import Path
6
+ import sys
7
7
 
8
8
  from sarj_python_lint import __version__
9
9
  from sarj_python_lint.rule_base import Diagnostic, is_suppressed
@@ -16,6 +16,10 @@ SKIP_DIR_NAMES = {
16
16
  ".turbo", ".yarn", ".pnpm-store",
17
17
  }
18
18
 
19
+ # Skip files larger than this — they are almost always generated/vendored, not
20
+ # hand-written source worth linting.
21
+ _MAX_FILE_BYTES = 500_000
22
+
19
23
 
20
24
  def _expand_paths(paths: list[Path]) -> list[Path]:
21
25
  out: list[Path] = []
@@ -31,7 +35,7 @@ def _expand_paths(paths: list[Path]) -> list[Path]:
31
35
  if any(part in SKIP_DIR_NAMES for part in child.parts):
32
36
  continue
33
37
  try:
34
- if child.stat().st_size > 500_000:
38
+ if child.stat().st_size > _MAX_FILE_BYTES:
35
39
  continue
36
40
  except OSError:
37
41
  continue
@@ -82,14 +86,17 @@ def main(argv: list[str] | None = None) -> int:
82
86
  sub.add_parser("list-rules", help="List available rule IDs.")
83
87
 
84
88
  args = parser.parse_args(argv)
89
+ cmd: str | None = args.cmd
85
90
 
86
- if args.cmd == "list-rules":
91
+ if cmd == "list-rules":
87
92
  for rid, cls in sorted(REGISTRY.items()):
88
93
  inst = cls()
89
94
  sys.stdout.write(f"{inst.code:8} {rid:40} {inst.description}\n")
90
95
  return 0
91
96
 
92
- diags = _check(args.rule, args.files)
97
+ rule_ids: list[str] = args.rule
98
+ files: list[Path] = args.files
99
+ diags = _check(rule_ids, files)
93
100
  for d in diags:
94
101
  sys.stdout.write(d.format() + "\n")
95
102
  return 1 if diags else 0
@@ -2,24 +2,31 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import re
6
5
  from abc import ABC, abstractmethod
7
6
  from dataclasses import dataclass
8
- from pathlib import Path
7
+ import re
8
+ from typing import TYPE_CHECKING
9
+
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Sequence
13
+ from pathlib import Path
14
+
9
15
 
10
16
  # Suppression syntax. Two forms supported:
11
17
  # # sarj-noqa: SARJ001 — reason
12
18
  # # sarj-noqa: SARJ001, SARJ002 — reason
13
- # We deliberately do NOT use `# noqa` because ruff aggressively cleans
14
- # unrecognized noqa codes (RUF100/RUF102) even with `external` set, which
15
- # silently breaks suppressions across runs. Distinct prefix = no conflict.
19
+ # We deliberately do NOT reuse ruff's own suppression comment because ruff
20
+ # aggressively cleans unrecognized codes (RUF100/RUF102) even with `external`
21
+ # set, which silently breaks suppressions across runs. A distinct prefix
22
+ # (sarj-noqa) shares no syntax with ruff, so the two never collide.
16
23
  _SARJ_NOQA_RE = re.compile(
17
24
  r"#\s*sarj-noqa(?::\s*([A-Za-z0-9_, ]+))?",
18
25
  re.IGNORECASE,
19
26
  )
20
27
 
21
28
 
22
- def is_suppressed(source_lines: list[str], line: int, code: str) -> bool:
29
+ def is_suppressed(source_lines: Sequence[str], line: int, code: str) -> bool:
23
30
  """Return True if the diagnostic's line carries a `# sarj-noqa[: CODE]` comment.
24
31
 
25
32
  `line` is 1-based to match Diagnostic.line.
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from sarj_python_lint.rules._registry import REGISTRY
4
+
5
+
6
+ __all__ = ["REGISTRY"]
@@ -0,0 +1,35 @@
1
+ """Shared logging-receiver detection for SARJ012/SARJ017.
2
+
3
+ A single resolver for "does this receiver expression evaluate to a logger?",
4
+ used by both the secret-in-log and f-string-in-log rules so they recognise the
5
+ same factory/builder forms.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import ast
11
+
12
+
13
+ _LOGGER_NAMES = frozenset({"logger", "log", "logging", "loguru", "_logger", "_log"})
14
+
15
+ _LOGGER_FACTORIES = frozenset({"getlogger", "getchild"})
16
+
17
+
18
+ def is_logger_expr(expr: ast.expr) -> bool:
19
+ """True if `expr` evaluates to a logger.
20
+
21
+ Resolves the whole receiver chain so adapter/builder/factory calls are
22
+ caught: `logger.bind(...).info(...)`, `logger.opt(lazy=True).debug(...)`,
23
+ `logging.getLogger(__name__).info(...)`, `self.logger.error(...)`.
24
+ """
25
+ if isinstance(expr, ast.Name):
26
+ return expr.id.lower() in _LOGGER_NAMES
27
+ if isinstance(expr, ast.Attribute):
28
+ if expr.attr.lower() in _LOGGER_NAMES or expr.attr.lower() in _LOGGER_FACTORIES:
29
+ return True
30
+ return is_logger_expr(expr.value)
31
+ if isinstance(expr, ast.Call):
32
+ if isinstance(expr.func, ast.Attribute) and expr.func.attr.lower() in _LOGGER_FACTORIES:
33
+ return True
34
+ return is_logger_expr(expr.func)
35
+ return False
@@ -1,12 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
- from sarj_python_lint.rule_base import Rule
3
+ from typing import TYPE_CHECKING
4
+
4
5
  from sarj_python_lint.rules.inefficient_string_concat_in_loop import (
5
6
  InefficientStringConcatInLoop,
6
7
  )
8
+ from sarj_python_lint.rules.no_aggregation_in_store_query import (
9
+ NoAggregationInStoreQuery,
10
+ )
11
+ from sarj_python_lint.rules.no_comment_cruft import NoCommentCruft
7
12
  from sarj_python_lint.rules.no_fat_try_blocks import NoFatTryBlocks
13
+ from sarj_python_lint.rules.no_fstring_in_log import NoFstringInLog
8
14
  from sarj_python_lint.rules.no_isinstance_union_chain import NoIsinstanceUnionChain
15
+ from sarj_python_lint.rules.no_query_with_many_joins import NoQueryWithManyJoins
9
16
  from sarj_python_lint.rules.no_secret_in_log import NoSecretInLog
17
+ from sarj_python_lint.rules.no_select_star import NoSelectStar
10
18
  from sarj_python_lint.rules.no_sentinel_return_on_except import NoSentinelReturnOnExcept
11
19
  from sarj_python_lint.rules.no_sequential_await import NoSequentialAwait
12
20
  from sarj_python_lint.rules.no_unreachable_after_terminal import (
@@ -18,7 +26,20 @@ from sarj_python_lint.rules.prefer_constant_time_secret_compare import (
18
26
  )
19
27
  from sarj_python_lint.rules.prefer_discriminated_union import PreferDiscriminatedUnion
20
28
  from sarj_python_lint.rules.prefer_str_enum import PreferStrEnum
29
+ from sarj_python_lint.rules.prefer_struct_over_namedtuple import (
30
+ PreferStructOverNamedtuple,
31
+ )
32
+ from sarj_python_lint.rules.prefer_timedelta_for_durations import (
33
+ PreferTimedeltaForDurations,
34
+ )
21
35
  from sarj_python_lint.rules.pydantic_at_boundaries import PydanticAtBoundaries
36
+ from sarj_python_lint.rules.store_insert_requires_on_conflict import (
37
+ StoreInsertRequiresOnConflict,
38
+ )
39
+
40
+
41
+ if TYPE_CHECKING:
42
+ from sarj_python_lint.rule_base import Rule
22
43
 
23
44
 
24
45
  REGISTRY: dict[str, type[Rule]] = {
@@ -34,6 +55,14 @@ REGISTRY: dict[str, type[Rule]] = {
34
55
  NoUnreachableAfterTerminal.id: NoUnreachableAfterTerminal,
35
56
  PreferConstantTimeSecretCompare.id: PreferConstantTimeSecretCompare,
36
57
  NoSecretInLog.id: NoSecretInLog,
58
+ PreferTimedeltaForDurations.id: PreferTimedeltaForDurations,
59
+ PreferStructOverNamedtuple.id: PreferStructOverNamedtuple,
60
+ NoCommentCruft.id: NoCommentCruft,
61
+ NoFstringInLog.id: NoFstringInLog,
62
+ StoreInsertRequiresOnConflict.id: StoreInsertRequiresOnConflict,
63
+ NoQueryWithManyJoins.id: NoQueryWithManyJoins,
64
+ NoAggregationInStoreQuery.id: NoAggregationInStoreQuery,
65
+ NoSelectStar.id: NoSelectStar,
37
66
  }
38
67
 
39
68
  __all__ = ["REGISTRY"]
@@ -12,48 +12,67 @@ References:
12
12
  from __future__ import annotations
13
13
 
14
14
  import ast
15
- from pathlib import Path
15
+ from typing import TYPE_CHECKING, override
16
16
 
17
17
  from sarj_python_lint.rule_base import Diagnostic, Rule
18
18
 
19
19
 
20
+ if TYPE_CHECKING:
21
+ from pathlib import Path
22
+
23
+
20
24
  class InefficientStringConcatInLoop(Rule):
21
25
  """O(n²) string concatenation in a loop."""
22
26
 
23
- id = "inefficient-string-concat-in-loop"
24
- code = "SARJ002"
25
- description = "`s += '...'` in a loop is O(n²); append to a list and join."
27
+ id: str = "inefficient-string-concat-in-loop"
28
+ code: str = "SARJ002"
29
+ description: str = "`s += '...'` in a loop is O(n²); append to a list and join."
26
30
 
31
+ @override
27
32
  def check(self, path: Path, source: str) -> list[Diagnostic]:
28
33
  try:
29
34
  tree = ast.parse(source, filename=str(path))
30
35
  except SyntaxError:
31
36
  return []
32
- diags: list[Diagnostic] = []
33
- for loop in ast.walk(tree):
34
- if not isinstance(loop, (ast.For, ast.While)):
35
- continue
36
- for node in ast.walk(loop):
37
- if not isinstance(node, ast.AugAssign):
38
- continue
39
- if not isinstance(node.op, ast.Add):
40
- continue
41
- # Heuristic: the RHS is a string-like value
42
- if not _looks_like_string(node.value):
43
- continue
44
- diags.append(
45
- Diagnostic(
46
- path=path,
47
- line=node.lineno,
48
- col=node.col_offset + 1,
49
- code=self.code,
50
- message=(
51
- "`+=` string concat in a loop is O(n²). "
52
- "Append to a list and `''.join(...)`."
53
- ),
54
- )
55
- )
56
- return diags
37
+ visitor = _ConcatVisitor()
38
+ visitor.visit(tree)
39
+ return [
40
+ Diagnostic(
41
+ path=path,
42
+ line=node.lineno,
43
+ col=node.col_offset + 1,
44
+ code=self.code,
45
+ message=(
46
+ "`+=` string concat in a loop is O(n²). "
47
+ "Append to a list and `''.join(...)`."
48
+ ),
49
+ )
50
+ for node in visitor.hits
51
+ ]
52
+
53
+
54
+ class _ConcatVisitor(ast.NodeVisitor):
55
+ """Single O(n) pass flagging each in-loop string `+=` exactly once."""
56
+
57
+ def __init__(self) -> None:
58
+ self._loop_depth: int = 0
59
+ self.hits: list[ast.AugAssign] = []
60
+
61
+ @override
62
+ def generic_visit(self, node: ast.AST) -> None:
63
+ if isinstance(node, (ast.For, ast.While)):
64
+ self._loop_depth += 1
65
+ super().generic_visit(node)
66
+ self._loop_depth -= 1
67
+ return
68
+ if (
69
+ self._loop_depth
70
+ and isinstance(node, ast.AugAssign)
71
+ and isinstance(node.op, ast.Add)
72
+ and _looks_like_string(node.value)
73
+ ):
74
+ self.hits.append(node)
75
+ super().generic_visit(node)
57
76
 
58
77
 
59
78
  def _looks_like_string(node: ast.AST) -> bool:
@@ -0,0 +1,134 @@
1
+ """SARJ020: no DISTINCT / GROUP BY / COUNT in a store query — aggregate elsewhere.
2
+
3
+ Heavy aggregation (`COUNT`, `GROUP BY`, `DISTINCT`) does not belong in the
4
+ transactional Postgres store layer: it scans, sorts, and hashes large row sets
5
+ on the primary, competing with the latency-critical OLTP path. The house
6
+ direction is to push aggregate/analytical reads to the columnar mirror
7
+ (ClickHouse / BigQuery), where they are cheap, and keep Postgres queries to
8
+ point lookups and small bounded reads.
9
+
10
+ This rule walks SQL string literals embedded in `.py` (`*_store.py`) and flags
11
+ any query (a string containing `FROM`) that uses `COUNT(`, `GROUP BY`, or
12
+ `DISTINCT`. `--` and `/* */` comments are stripped first.
13
+
14
+ # flagged
15
+ "SELECT status, COUNT(*) FROM call GROUP BY status"
16
+ "SELECT DISTINCT org_id FROM call"
17
+
18
+ # preferred
19
+ point/bounded reads in Postgres; aggregate in ClickHouse/BigQuery.
20
+
21
+ If an aggregate genuinely must run on Postgres (e.g. a tiny bounded admin
22
+ count), suppress with `# sarj-noqa: SARJ020 — <reason>`.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import ast
28
+ import re
29
+ from typing import TYPE_CHECKING, override
30
+
31
+ from sarj_python_lint.rule_base import Diagnostic, Rule
32
+
33
+
34
+ if TYPE_CHECKING:
35
+ from pathlib import Path
36
+
37
+
38
+ _LINE_COMMENT = re.compile(r"--.*?$", re.MULTILINE)
39
+ _BLOCK_COMMENT = re.compile(r"/\*.*?\*/", re.DOTALL)
40
+ # A real SQL query shape — not just the word "from", so prose/LLM-prompt strings
41
+ # (e.g. "distinct from unexpected exceptions") are not mistaken for queries.
42
+ _QUERY_SHAPE = re.compile(
43
+ r"\bSELECT\b[\s\S]*?\bFROM\b|\bUPDATE\b[\s\S]*?\bSET\b|\bDELETE\b\s+FROM\b",
44
+ re.IGNORECASE,
45
+ )
46
+
47
+ # ClickHouse IS the place for aggregation. A file that talks to ClickHouse (the
48
+ # columnar mirror) is exempt — only Postgres store queries are in scope.
49
+ _CLICKHOUSE_FILE = re.compile(
50
+ r"\bclickhouse_connect\b|\bclickhouse_driver\b|^\s*import\s+clickhouse\b",
51
+ re.MULTILINE,
52
+ )
53
+ # Belt-and-braces: a single query using ClickHouse-only functions is ClickHouse.
54
+ _CLICKHOUSE_SQL = re.compile(
55
+ r"\barg(?:Max|Min)\b|\b_peerdb|\bJSONExtract|\buniqExact\b|\bgroupArray\b"
56
+ r"|\barrayJoin\b|\bquantile\w*\(",
57
+ )
58
+
59
+ _AGGREGATIONS: tuple[tuple[str, re.Pattern[str]], ...] = (
60
+ ("COUNT(", re.compile(r"\bCOUNT\s*\(", re.IGNORECASE)),
61
+ ("GROUP BY", re.compile(r"\bGROUP\s+BY\b", re.IGNORECASE)),
62
+ ("DISTINCT", re.compile(r"\bDISTINCT\b", re.IGNORECASE)),
63
+ )
64
+
65
+
66
+ def _string_value(node: ast.expr) -> str | None:
67
+ """Reconstruct a (possibly `+`-concatenated) string literal, else None."""
68
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
69
+ return node.value
70
+ if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
71
+ left = _string_value(node.left)
72
+ right = _string_value(node.right)
73
+ if left is not None and right is not None:
74
+ return left + right
75
+ return None
76
+
77
+
78
+ def _strip_sql_comments(text: str) -> str:
79
+ return _BLOCK_COMMENT.sub(" ", _LINE_COMMENT.sub("", text))
80
+
81
+
82
+ class NoAggregationInStoreQuery(Rule):
83
+ """DISTINCT / GROUP BY / COUNT in a store query — aggregate in ClickHouse."""
84
+
85
+ id = "no-aggregation-in-store-query"
86
+ code = "SARJ020"
87
+ description = (
88
+ "DISTINCT / GROUP BY / COUNT in a Postgres store query — push heavy "
89
+ "aggregation to the columnar mirror (ClickHouse / BigQuery)."
90
+ )
91
+
92
+ @override
93
+ def check(self, path: Path, source: str) -> list[Diagnostic]:
94
+ if _CLICKHOUSE_FILE.search(source):
95
+ return []
96
+ try:
97
+ tree = ast.parse(source, filename=str(path))
98
+ except SyntaxError:
99
+ return []
100
+
101
+ diags: list[Diagnostic] = []
102
+ consumed: set[int] = set()
103
+ for node in ast.walk(tree):
104
+ if not isinstance(node, ast.Constant | ast.BinOp):
105
+ continue
106
+ if id(node) in consumed:
107
+ continue
108
+ text = _string_value(node)
109
+ if text is None:
110
+ continue
111
+ consumed.update(id(sub) for sub in ast.walk(node))
112
+
113
+ sql = _strip_sql_comments(text)
114
+ if _QUERY_SHAPE.search(sql) is None or _CLICKHOUSE_SQL.search(sql):
115
+ continue
116
+ found = [label for label, pat in _AGGREGATIONS if pat.search(sql)]
117
+ if not found:
118
+ continue
119
+
120
+ diags.append(
121
+ Diagnostic(
122
+ path=path,
123
+ line=node.lineno,
124
+ col=node.col_offset + 1,
125
+ code=self.code,
126
+ message=(
127
+ f"Store query uses {', '.join(found)} — push heavy "
128
+ "aggregation to ClickHouse / BigQuery, keep Postgres to "
129
+ "point/bounded reads. Suppress with `# sarj-noqa: SARJ020`."
130
+ ),
131
+ )
132
+ )
133
+ diags.sort(key=lambda d: (d.line, d.col))
134
+ return diags