dataxplan 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataxplan/__init__.py +45 -0
- dataxplan/__main__.py +6 -0
- dataxplan/_result.py +83 -0
- dataxplan/_version.py +1 -0
- dataxplan/cli.py +66 -0
- dataxplan/compare.py +115 -0
- dataxplan/context.py +62 -0
- dataxplan/findings.py +164 -0
- dataxplan/metrics.py +104 -0
- dataxplan/parse.py +195 -0
- dataxplan/py.typed +0 -0
- dataxplan/render.py +83 -0
- dataxplan/report.py +112 -0
- dataxplan/run.py +43 -0
- dataxplan-0.1.0.dist-info/METADATA +210 -0
- dataxplan-0.1.0.dist-info/RECORD +20 -0
- dataxplan-0.1.0.dist-info/WHEEL +5 -0
- dataxplan-0.1.0.dist-info/entry_points.txt +2 -0
- dataxplan-0.1.0.dist-info/licenses/LICENSE +21 -0
- dataxplan-0.1.0.dist-info/top_level.txt +1 -0
dataxplan/__init__.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""dataxplan - read PostgreSQL EXPLAIN plans, locally and deterministically.
|
|
2
|
+
|
|
3
|
+
Give it the output of ``EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) ...`` and it
|
|
4
|
+
parses the plan, computes the metrics people misread (self time, estimation
|
|
5
|
+
error, disk spills), and flags documented problems. No database connection is
|
|
6
|
+
required and nothing leaves your machine.
|
|
7
|
+
|
|
8
|
+
import dataxplan
|
|
9
|
+
|
|
10
|
+
report = dataxplan.analyze(explain_json)
|
|
11
|
+
print(report.summary())
|
|
12
|
+
|
|
13
|
+
# guard a plan in a test (fail CI if it regresses)
|
|
14
|
+
assert not report.has_seq_scan_on("orders")
|
|
15
|
+
assert report.max_estimation_error < 100
|
|
16
|
+
assert not report.spilled_to_disk
|
|
17
|
+
|
|
18
|
+
# compare two plans (before/after an index)
|
|
19
|
+
print(dataxplan.compare(before_json, after_json).summary())
|
|
20
|
+
|
|
21
|
+
The findings are documented heuristics, not guarantees, and the analysis is of
|
|
22
|
+
the plan you provide; it does not run your queries or read your schema unless you
|
|
23
|
+
choose to supply catalog context.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from ._result import Finding
|
|
27
|
+
from ._version import __version__
|
|
28
|
+
from .compare import Comparison, compare
|
|
29
|
+
from .context import Context, TableInfo
|
|
30
|
+
from .metrics import NodeMetrics
|
|
31
|
+
from .parse import Plan, PlanNode, parse
|
|
32
|
+
from .render import plan_tree_chart, text_tree
|
|
33
|
+
from .report import Report, analyze
|
|
34
|
+
from .run import run_explain
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
# core flow
|
|
38
|
+
"parse", "analyze", "compare",
|
|
39
|
+
# types
|
|
40
|
+
"Plan", "PlanNode", "Report", "NodeMetrics", "Finding", "Comparison",
|
|
41
|
+
"Context", "TableInfo",
|
|
42
|
+
# render and helpers
|
|
43
|
+
"text_tree", "plan_tree_chart", "run_explain",
|
|
44
|
+
"__version__",
|
|
45
|
+
]
|
dataxplan/__main__.py
ADDED
dataxplan/_result.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Shared plumbing: provenance (version, input hash, timestamp), the ``Finding``
|
|
2
|
+
type, and small formatting helpers. Every public result carries a meta block so
|
|
3
|
+
an analysis can be reproduced and audited later.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
|
|
13
|
+
from ._version import __version__
|
|
14
|
+
|
|
15
|
+
SCHEMA = 1
|
|
16
|
+
|
|
17
|
+
# Finding severities, most to least serious.
|
|
18
|
+
HIGH = "high"
|
|
19
|
+
MEDIUM = "medium"
|
|
20
|
+
LOW = "low"
|
|
21
|
+
INFO = "info"
|
|
22
|
+
_ORDER = {HIGH: 0, MEDIUM: 1, LOW: 2, INFO: 3}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def utcnow() -> str:
|
|
26
|
+
return datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def data_hash(obj: object) -> str:
|
|
30
|
+
payload = json.dumps(obj, sort_keys=True, default=str).encode("utf-8")
|
|
31
|
+
return "sha256:" + hashlib.sha256(payload).hexdigest()[:16]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def make_meta(inputs: dict) -> dict:
|
|
35
|
+
"""The provenance block stamped onto every result."""
|
|
36
|
+
return {
|
|
37
|
+
"library": "dataxplan",
|
|
38
|
+
"version": __version__,
|
|
39
|
+
"computed_at": utcnow(),
|
|
40
|
+
"input_hash": data_hash(inputs),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class Finding:
|
|
46
|
+
"""One observation about a plan: a documented heuristic, not a guarantee."""
|
|
47
|
+
|
|
48
|
+
id: str
|
|
49
|
+
severity: str # high | medium | low | info
|
|
50
|
+
title: str
|
|
51
|
+
detail: str
|
|
52
|
+
node: str | None = None # e.g. "Seq Scan on orders"
|
|
53
|
+
path: tuple[int, ...] | None = None
|
|
54
|
+
suggestion: str | None = None
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def rank(self) -> int:
|
|
58
|
+
return _ORDER.get(self.severity, 9)
|
|
59
|
+
|
|
60
|
+
def __str__(self) -> str:
|
|
61
|
+
head = f"[{self.severity.upper()}] {self.title}"
|
|
62
|
+
if self.node:
|
|
63
|
+
head += f" ({self.node})"
|
|
64
|
+
lines = [head, f" {self.detail}"]
|
|
65
|
+
if self.suggestion:
|
|
66
|
+
lines.append(f" -> {self.suggestion}")
|
|
67
|
+
return "\n".join(lines)
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> dict:
|
|
70
|
+
return {
|
|
71
|
+
"id": self.id, "severity": self.severity, "title": self.title,
|
|
72
|
+
"detail": self.detail, "node": self.node,
|
|
73
|
+
"path": list(self.path) if self.path else None,
|
|
74
|
+
"suggestion": self.suggestion,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def ms(value: float | None) -> str:
|
|
79
|
+
return "-" if value is None else f"{value:,.2f} ms"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def num(value: float | None, places: int = 0) -> str:
|
|
83
|
+
return "-" if value is None else f"{value:,.{places}f}"
|
dataxplan/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
dataxplan/cli.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Command-line interface: analyse a plan from a file or stdin.
|
|
2
|
+
|
|
3
|
+
dataxplan plan.json
|
|
4
|
+
dataxplan plan.json --tree
|
|
5
|
+
dataxplan plan.json --json
|
|
6
|
+
dataxplan before.json --compare after.json
|
|
7
|
+
psql -XqAt -c "EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) ..." | dataxplan
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from . import analyze, compare, text_tree
|
|
17
|
+
from ._version import __version__
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _read(source: str) -> str:
|
|
21
|
+
if source in (None, "-"):
|
|
22
|
+
return sys.stdin.read()
|
|
23
|
+
with open(source, encoding="utf-8") as handle:
|
|
24
|
+
return handle.read()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def main(argv=None) -> int:
|
|
28
|
+
parser = argparse.ArgumentParser(
|
|
29
|
+
prog="dataxplan",
|
|
30
|
+
description="Analyse a PostgreSQL EXPLAIN (FORMAT JSON) plan, locally.")
|
|
31
|
+
parser.add_argument("plan", nargs="?", default="-",
|
|
32
|
+
help="plan file, or - for stdin (the default)")
|
|
33
|
+
parser.add_argument("--tree", action="store_true",
|
|
34
|
+
help="also print the annotated plan tree")
|
|
35
|
+
parser.add_argument("--json", action="store_true",
|
|
36
|
+
help="print the full report (or comparison) as JSON")
|
|
37
|
+
parser.add_argument("--compare", metavar="OTHER",
|
|
38
|
+
help="compare the plan against another plan file")
|
|
39
|
+
parser.add_argument("--version", action="version",
|
|
40
|
+
version=f"dataxplan {__version__}")
|
|
41
|
+
args = parser.parse_args(argv)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
plan = json.loads(_read(args.plan))
|
|
45
|
+
if args.compare:
|
|
46
|
+
result = compare(plan, json.loads(_read(args.compare)))
|
|
47
|
+
print(json.dumps(result.to_dict(), indent=2, default=str)
|
|
48
|
+
if args.json else result.summary())
|
|
49
|
+
return 0
|
|
50
|
+
report = analyze(plan)
|
|
51
|
+
except (ValueError, TypeError, OSError, json.JSONDecodeError) as exc:
|
|
52
|
+
print(f"dataxplan: {exc}", file=sys.stderr)
|
|
53
|
+
return 2
|
|
54
|
+
|
|
55
|
+
if args.json:
|
|
56
|
+
print(json.dumps(report.to_dict(), indent=2, default=str))
|
|
57
|
+
else:
|
|
58
|
+
print(report.summary())
|
|
59
|
+
if args.tree:
|
|
60
|
+
print("\nplan tree:")
|
|
61
|
+
print(text_tree(report))
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
sys.exit(main())
|
dataxplan/compare.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Compare two plans for regression: did a change make a query slower, change
|
|
2
|
+
its shape, or worsen its estimates? Useful before/after an index, a query
|
|
3
|
+
rewrite or a schema change, and as a guard in CI.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
from ._result import make_meta, ms
|
|
11
|
+
from .report import Report, analyze
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _as_report(x) -> Report:
|
|
15
|
+
return x if isinstance(x, Report) else analyze(x)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _self_by_label(report: Report) -> dict:
|
|
19
|
+
out: dict = {}
|
|
20
|
+
for m in report.metrics:
|
|
21
|
+
out[m.label] = out.get(m.label, 0.0) + (m.self_time or 0.0)
|
|
22
|
+
return out
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class Comparison:
|
|
27
|
+
before_time_ms: float | None
|
|
28
|
+
after_time_ms: float | None
|
|
29
|
+
delta_ms: float | None
|
|
30
|
+
delta_pct: float | None
|
|
31
|
+
node_deltas: tuple[dict, ...]
|
|
32
|
+
appeared: tuple[str, ...]
|
|
33
|
+
disappeared: tuple[str, ...]
|
|
34
|
+
new_findings: tuple[str, ...]
|
|
35
|
+
resolved_findings: tuple[str, ...]
|
|
36
|
+
before_max_error: float
|
|
37
|
+
after_max_error: float
|
|
38
|
+
verdict: str # improved | regressed | similar
|
|
39
|
+
meta: dict
|
|
40
|
+
|
|
41
|
+
def summary(self) -> str:
|
|
42
|
+
lines = [f"dataxplan compare - {self.verdict.upper()}"]
|
|
43
|
+
if self.before_time_ms is not None and self.after_time_ms is not None:
|
|
44
|
+
d = "" if self.delta_pct is None else f" ({self.delta_pct:+.0%})"
|
|
45
|
+
lines.append(f" execution time {ms(self.before_time_ms)} -> "
|
|
46
|
+
f"{ms(self.after_time_ms)}{d}")
|
|
47
|
+
if self.appeared:
|
|
48
|
+
lines.append(f" new nodes {', '.join(self.appeared)}")
|
|
49
|
+
if self.disappeared:
|
|
50
|
+
lines.append(f" gone nodes {', '.join(self.disappeared)}")
|
|
51
|
+
if self.new_findings:
|
|
52
|
+
lines.append(f" new findings {', '.join(self.new_findings)}")
|
|
53
|
+
if self.resolved_findings:
|
|
54
|
+
lines.append(f" resolved {', '.join(self.resolved_findings)}")
|
|
55
|
+
big = [d for d in self.node_deltas if abs(d["delta_ms"] or 0) > 0][:5]
|
|
56
|
+
if big:
|
|
57
|
+
lines.append(" largest self-time changes:")
|
|
58
|
+
for d in big:
|
|
59
|
+
lines.append(f" {d['label']:<32} {d['delta_ms']:+,.2f} ms")
|
|
60
|
+
return "\n".join(lines)
|
|
61
|
+
|
|
62
|
+
def to_dict(self) -> dict:
|
|
63
|
+
return {
|
|
64
|
+
"schema": 1, "verdict": self.verdict,
|
|
65
|
+
"before_time_ms": self.before_time_ms, "after_time_ms": self.after_time_ms,
|
|
66
|
+
"delta_ms": self.delta_ms, "delta_pct": self.delta_pct,
|
|
67
|
+
"node_deltas": list(self.node_deltas), "appeared": list(self.appeared),
|
|
68
|
+
"disappeared": list(self.disappeared),
|
|
69
|
+
"new_findings": list(self.new_findings),
|
|
70
|
+
"resolved_findings": list(self.resolved_findings),
|
|
71
|
+
"before_max_error": self.before_max_error,
|
|
72
|
+
"after_max_error": self.after_max_error, "meta": self.meta,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def compare(before, after) -> Comparison:
|
|
77
|
+
"""Compare two plans (``Report`` objects or raw EXPLAIN output)."""
|
|
78
|
+
a, b = _as_report(before), _as_report(after)
|
|
79
|
+
bt, at = a.execution_time_ms, b.execution_time_ms
|
|
80
|
+
|
|
81
|
+
delta = dpct = None
|
|
82
|
+
if bt is not None and at is not None:
|
|
83
|
+
delta = at - bt
|
|
84
|
+
dpct = (delta / bt) if bt else None
|
|
85
|
+
threshold = 0.05 * bt
|
|
86
|
+
verdict = ("regressed" if delta > threshold
|
|
87
|
+
else "improved" if delta < -threshold else "similar")
|
|
88
|
+
else:
|
|
89
|
+
bc, ac = a.plan.root.total_cost, b.plan.root.total_cost
|
|
90
|
+
verdict = ("regressed" if ac > 1.05 * bc
|
|
91
|
+
else "improved" if ac < 0.95 * bc else "similar")
|
|
92
|
+
|
|
93
|
+
sb, sa = _self_by_label(a), _self_by_label(b)
|
|
94
|
+
labels = sorted(set(sb) | set(sa))
|
|
95
|
+
node_deltas = tuple(sorted(
|
|
96
|
+
({"label": lb, "before_ms": sb.get(lb), "after_ms": sa.get(lb),
|
|
97
|
+
"delta_ms": (sa.get(lb, 0.0) - sb.get(lb, 0.0))} for lb in labels),
|
|
98
|
+
key=lambda d: abs(d["delta_ms"] or 0), reverse=True))
|
|
99
|
+
|
|
100
|
+
before_labels = {m.label for m in a.metrics}
|
|
101
|
+
after_labels = {m.label for m in b.metrics}
|
|
102
|
+
bf = {f.id for f in a.findings if f.id != "clean"}
|
|
103
|
+
af = {f.id for f in b.findings if f.id != "clean"}
|
|
104
|
+
|
|
105
|
+
return Comparison(
|
|
106
|
+
before_time_ms=bt, after_time_ms=at, delta_ms=delta, delta_pct=dpct,
|
|
107
|
+
node_deltas=node_deltas,
|
|
108
|
+
appeared=tuple(sorted(after_labels - before_labels)),
|
|
109
|
+
disappeared=tuple(sorted(before_labels - after_labels)),
|
|
110
|
+
new_findings=tuple(sorted(af - bf)),
|
|
111
|
+
resolved_findings=tuple(sorted(bf - af)),
|
|
112
|
+
before_max_error=a.max_estimation_error,
|
|
113
|
+
after_max_error=b.max_estimation_error,
|
|
114
|
+
verdict=verdict,
|
|
115
|
+
meta=make_meta({"before_time": bt, "after_time": at}))
|
dataxplan/context.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Optional catalog context.
|
|
2
|
+
|
|
3
|
+
The core analysis works from the plan alone. When the caller can supply a little
|
|
4
|
+
metadata from the database catalog (table sizes, the columns that are indexed,
|
|
5
|
+
the server's ``work_mem``), the findings get sharper: a sequential scan on a
|
|
6
|
+
known-large table with no index on the filtered column is a stronger signal than
|
|
7
|
+
a sequential scan in isolation. Context is data the caller provides; the library
|
|
8
|
+
never connects to a database to fetch it.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Mapping
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class TableInfo:
|
|
19
|
+
"""What we may know about one table."""
|
|
20
|
+
|
|
21
|
+
name: str
|
|
22
|
+
row_count: float | None = None
|
|
23
|
+
indexed_columns: tuple[str, ...] = () # columns that appear in some index
|
|
24
|
+
analyzed: bool = True # False if statistics look stale
|
|
25
|
+
|
|
26
|
+
def has_index_on(self, column: str) -> bool:
|
|
27
|
+
return column in self.indexed_columns
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class Context:
|
|
32
|
+
"""Catalog metadata the caller knows, keyed by table name."""
|
|
33
|
+
|
|
34
|
+
tables: dict = field(default_factory=dict) # name -> TableInfo
|
|
35
|
+
work_mem_mb: float | None = None
|
|
36
|
+
|
|
37
|
+
def table(self, name: str | None) -> TableInfo | None:
|
|
38
|
+
if name is None:
|
|
39
|
+
return None
|
|
40
|
+
return self.tables.get(name)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def as_context(value) -> Context | None:
|
|
44
|
+
"""Accept a :class:`Context`, a mapping, or None."""
|
|
45
|
+
if value is None or isinstance(value, Context):
|
|
46
|
+
return value
|
|
47
|
+
if isinstance(value, Mapping):
|
|
48
|
+
tables = {}
|
|
49
|
+
raw_tables = value.get("tables", {})
|
|
50
|
+
for name, info in raw_tables.items():
|
|
51
|
+
if isinstance(info, TableInfo):
|
|
52
|
+
tables[name] = info
|
|
53
|
+
elif isinstance(info, Mapping):
|
|
54
|
+
tables[name] = TableInfo(
|
|
55
|
+
name=name,
|
|
56
|
+
row_count=info.get("row_count"),
|
|
57
|
+
indexed_columns=tuple(info.get("indexed_columns", ())),
|
|
58
|
+
analyzed=info.get("analyzed", True))
|
|
59
|
+
else:
|
|
60
|
+
raise TypeError(f"table info for '{name}' must be a mapping")
|
|
61
|
+
return Context(tables=tables, work_mem_mb=value.get("work_mem_mb"))
|
|
62
|
+
raise TypeError("context must be a Context, a mapping, or None")
|
dataxplan/findings.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Heuristic rules that turn metrics into findings.
|
|
2
|
+
|
|
3
|
+
Each rule encodes a documented PostgreSQL behaviour (see the references in the
|
|
4
|
+
README). A finding is an observation with an explanation and, where reasonable,
|
|
5
|
+
a suggestion; it is never a promise that a change will help. When catalog
|
|
6
|
+
context is supplied the messages are sharpened, but every rule works from the
|
|
7
|
+
plan alone.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from ._result import HIGH, INFO, LOW, MEDIUM, Finding
|
|
13
|
+
from .context import Context
|
|
14
|
+
from .metrics import NodeMetrics
|
|
15
|
+
from .parse import Plan, PlanNode
|
|
16
|
+
|
|
17
|
+
DEFAULT_THRESHOLDS = {
|
|
18
|
+
"estimation_error_high": 100.0, # x off -> high severity
|
|
19
|
+
"estimation_error_med": 10.0, # x off -> medium (if the node matters)
|
|
20
|
+
"seq_scan_pct": 0.30, # share of execution time
|
|
21
|
+
"filter_discard_ratio": 10.0, # rows removed vs rows kept
|
|
22
|
+
"nested_loop_loops": 1000.0, # inner executions
|
|
23
|
+
"heap_fetch_ratio": 0.10, # heap fetches vs rows
|
|
24
|
+
"jit_pct": 0.25, # JIT time vs execution time
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _pct(value: float | None) -> str:
|
|
29
|
+
return "-" if value is None else f"{100 * value:.0f}%"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def run_findings(plan: Plan, metrics: list[NodeMetrics],
|
|
33
|
+
context: Context | None, thresholds: dict) -> list[Finding]:
|
|
34
|
+
t = {**DEFAULT_THRESHOLDS, **(thresholds or {})}
|
|
35
|
+
by_path: dict[tuple, PlanNode] = {n.path: n for n in plan.root.walk()}
|
|
36
|
+
found: list[Finding] = []
|
|
37
|
+
|
|
38
|
+
for m in metrics:
|
|
39
|
+
node = by_path[m.path]
|
|
40
|
+
ctx_table = context.table(m.relation) if context else None
|
|
41
|
+
|
|
42
|
+
# 1. Row estimate far from reality (the usual root cause).
|
|
43
|
+
if m.estimation_error is not None:
|
|
44
|
+
sev = None
|
|
45
|
+
if m.estimation_error >= t["estimation_error_high"]:
|
|
46
|
+
sev = HIGH
|
|
47
|
+
elif (m.estimation_error >= t["estimation_error_med"]
|
|
48
|
+
and (m.pct_self or 0) >= 0.10):
|
|
49
|
+
sev = MEDIUM
|
|
50
|
+
if sev:
|
|
51
|
+
direction = "under" if (m.estimation_factor or 1) > 1 else "over"
|
|
52
|
+
detail = (f"estimated {m.plan_rows:,.0f} rows, actual "
|
|
53
|
+
f"{m.actual_rows:,.0f} ({m.estimation_error:.0f}x "
|
|
54
|
+
f"{direction}-estimate)")
|
|
55
|
+
sug = ("run ANALYZE on the table; if the columns are correlated "
|
|
56
|
+
"consider extended statistics (CREATE STATISTICS)")
|
|
57
|
+
if ctx_table is not None and not ctx_table.analyzed:
|
|
58
|
+
sug = "statistics look stale; run ANALYZE on " + m.relation
|
|
59
|
+
found.append(Finding("estimate_off", sev, "Row estimate is far off",
|
|
60
|
+
detail, m.label, m.path, sug))
|
|
61
|
+
|
|
62
|
+
# 2. Sequential scan taking a large share of the time.
|
|
63
|
+
if (m.node_type == "Seq Scan" and m.relation
|
|
64
|
+
and (m.pct_self or 0) >= t["seq_scan_pct"]):
|
|
65
|
+
size = (f" ({ctx_table.row_count:,.0f} rows)"
|
|
66
|
+
if ctx_table and ctx_table.row_count else "")
|
|
67
|
+
detail = (f"sequential scan{size} is {_pct(m.pct_self)} of execution "
|
|
68
|
+
f"time, reading {m.actual_rows:,.0f} rows"
|
|
69
|
+
if m.actual_rows is not None else
|
|
70
|
+
f"sequential scan{size} is {_pct(m.pct_self)} of execution time")
|
|
71
|
+
sug = f"consider an index supporting the filter or join on {m.relation}"
|
|
72
|
+
if ctx_table is not None and ctx_table.indexed_columns:
|
|
73
|
+
sug += (f" (existing indexes cover: "
|
|
74
|
+
f"{', '.join(ctx_table.indexed_columns)})")
|
|
75
|
+
found.append(Finding("seq_scan_hot", HIGH, "Hot sequential scan",
|
|
76
|
+
detail, m.label, m.path, sug))
|
|
77
|
+
|
|
78
|
+
# 3. A sort or hash spilled to disk.
|
|
79
|
+
if m.spilled:
|
|
80
|
+
if node.sort_method and "external" in node.sort_method.lower():
|
|
81
|
+
what = f"sort spilled to disk ({node.sort_method})"
|
|
82
|
+
elif (node.hash_batches or 0) > 1:
|
|
83
|
+
what = f"hash spilled to disk ({node.hash_batches:.0f} batches)"
|
|
84
|
+
else:
|
|
85
|
+
what = f"wrote {node.temp_written:,.0f} temp blocks to disk"
|
|
86
|
+
mem = (f"; work_mem is {context.work_mem_mb:.0f} MB"
|
|
87
|
+
if context and context.work_mem_mb else "")
|
|
88
|
+
found.append(Finding(
|
|
89
|
+
"disk_spill", MEDIUM, "Operation spilled to disk",
|
|
90
|
+
what + mem, m.label, m.path,
|
|
91
|
+
"raise work_mem for this query, or reduce the rows being "
|
|
92
|
+
"sorted or hashed"))
|
|
93
|
+
|
|
94
|
+
# 4. Reading many rows and discarding most (non-sargable / missing index).
|
|
95
|
+
removed = m.rows_removed_by_filter
|
|
96
|
+
if (removed is not None and m.actual_rows is not None
|
|
97
|
+
and removed >= t["filter_discard_ratio"] * (m.actual_rows + 1)
|
|
98
|
+
and "Scan" in m.node_type):
|
|
99
|
+
detail = (f"removed {removed:,.0f} rows by filter but kept only "
|
|
100
|
+
f"{m.actual_rows:,.0f}")
|
|
101
|
+
found.append(Finding(
|
|
102
|
+
"filter_discard", MEDIUM, "Filter discards most rows read", detail,
|
|
103
|
+
m.label, m.path,
|
|
104
|
+
"the predicate is not selective via the current access path; an "
|
|
105
|
+
"index on the filtered column may help"))
|
|
106
|
+
|
|
107
|
+
# 5. Nested loop driving its inner side many times.
|
|
108
|
+
if m.node_type == "Nested Loop" and node.children:
|
|
109
|
+
inner_loops = max((c.actual_loops for c in node.children), default=0)
|
|
110
|
+
if inner_loops >= t["nested_loop_loops"]:
|
|
111
|
+
found.append(Finding(
|
|
112
|
+
"nested_loop_blowup", MEDIUM, "Nested loop with many iterations",
|
|
113
|
+
f"the inner side executed {inner_loops:,.0f} times", m.label,
|
|
114
|
+
m.path,
|
|
115
|
+
"usually an under-estimate upstream; check the row estimates, a "
|
|
116
|
+
"hash or merge join may be cheaper"))
|
|
117
|
+
|
|
118
|
+
# 6. Index-only scan still hitting the heap (visibility map not set).
|
|
119
|
+
if (m.node_type == "Index Only Scan" and m.heap_fetches
|
|
120
|
+
and m.actual_rows is not None
|
|
121
|
+
and m.heap_fetches >= t["heap_fetch_ratio"] * (m.actual_rows + 1)):
|
|
122
|
+
found.append(Finding(
|
|
123
|
+
"index_only_heap_fetches", LOW,
|
|
124
|
+
"Index-only scan with many heap fetches",
|
|
125
|
+
f"{m.heap_fetches:,.0f} heap fetches for {m.actual_rows:,.0f} rows",
|
|
126
|
+
m.label, m.path,
|
|
127
|
+
"VACUUM the table so the visibility map lets the scan skip the heap"))
|
|
128
|
+
|
|
129
|
+
# 7. Lossy bitmap heap scan (work_mem too small for the bitmap).
|
|
130
|
+
recheck = node.raw.get("Rows Removed by Index Recheck")
|
|
131
|
+
if m.node_type == "Bitmap Heap Scan" and recheck:
|
|
132
|
+
found.append(Finding(
|
|
133
|
+
"lossy_bitmap", LOW, "Bitmap heap scan went lossy",
|
|
134
|
+
f"{float(recheck):,.0f} rows rechecked after a lossy bitmap",
|
|
135
|
+
m.label, m.path,
|
|
136
|
+
"raise work_mem so the bitmap stays exact"))
|
|
137
|
+
|
|
138
|
+
# 8. JIT overhead on a short query.
|
|
139
|
+
jit = plan.jit
|
|
140
|
+
if jit and plan.execution_time:
|
|
141
|
+
total = (jit.get("Timing", {}) or {}).get("Total")
|
|
142
|
+
if total and plan.execution_time and total >= t["jit_pct"] * plan.execution_time:
|
|
143
|
+
found.append(Finding(
|
|
144
|
+
"jit_overhead", LOW, "JIT compilation is a large share of the time",
|
|
145
|
+
f"JIT took {total:,.1f} ms of {plan.execution_time:,.1f} ms total",
|
|
146
|
+
None, None,
|
|
147
|
+
"for short, frequent queries consider raising jit_above_cost or "
|
|
148
|
+
"turning JIT off"))
|
|
149
|
+
|
|
150
|
+
if not found:
|
|
151
|
+
found.append(Finding("clean", INFO, "No issues flagged",
|
|
152
|
+
"no heuristic flagged this plan", None, None, None))
|
|
153
|
+
|
|
154
|
+
found.sort(key=lambda f: (f.rank, -(_self_pct(metrics, f.path))))
|
|
155
|
+
return found
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _self_pct(metrics: list[NodeMetrics], path) -> float:
|
|
159
|
+
if path is None:
|
|
160
|
+
return 0.0
|
|
161
|
+
for m in metrics:
|
|
162
|
+
if m.path == path:
|
|
163
|
+
return m.pct_self or 0.0
|
|
164
|
+
return 0.0
|
dataxplan/metrics.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Derived metrics for a parsed plan.
|
|
2
|
+
|
|
3
|
+
The two numbers people most often get wrong when reading a plan are computed
|
|
4
|
+
here: the *self* (exclusive) time of a node, and the *estimation error*.
|
|
5
|
+
|
|
6
|
+
* Postgres reports ``Actual Total Time`` per loop and inclusive of children, so a
|
|
7
|
+
node's total time is ``Actual Total Time x Actual Loops`` and its self time is
|
|
8
|
+
that minus the total time of its children. Self time shows where the work
|
|
9
|
+
actually happens.
|
|
10
|
+
* ``Plan Rows`` (estimated) against ``Actual Rows`` gives the estimation error,
|
|
11
|
+
the usual root cause of a bad plan.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
from .parse import Plan
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class NodeMetrics:
|
|
23
|
+
path: tuple[int, ...]
|
|
24
|
+
label: str
|
|
25
|
+
node_type: str
|
|
26
|
+
relation: str | None
|
|
27
|
+
loops: float
|
|
28
|
+
inclusive_time: float | None
|
|
29
|
+
self_time: float | None
|
|
30
|
+
pct_self: float | None
|
|
31
|
+
plan_rows: float
|
|
32
|
+
actual_rows: float | None
|
|
33
|
+
estimation_factor: float | None # actual / estimated, per loop (>1 under-estimate)
|
|
34
|
+
estimation_error: float | None # how many times off, max(factor, 1/factor)
|
|
35
|
+
spilled: bool
|
|
36
|
+
shared_read: float
|
|
37
|
+
rows_removed_by_filter: float | None
|
|
38
|
+
heap_fetches: float | None
|
|
39
|
+
|
|
40
|
+
def to_dict(self) -> dict:
|
|
41
|
+
return {
|
|
42
|
+
"path": list(self.path), "label": self.label,
|
|
43
|
+
"node_type": self.node_type, "relation": self.relation,
|
|
44
|
+
"loops": self.loops, "inclusive_time_ms": self.inclusive_time,
|
|
45
|
+
"self_time_ms": self.self_time, "pct_self": self.pct_self,
|
|
46
|
+
"plan_rows": self.plan_rows, "actual_rows": self.actual_rows,
|
|
47
|
+
"estimation_factor": self.estimation_factor,
|
|
48
|
+
"estimation_error": self.estimation_error, "spilled": self.spilled,
|
|
49
|
+
"shared_read_blocks": self.shared_read,
|
|
50
|
+
"rows_removed_by_filter": self.rows_removed_by_filter,
|
|
51
|
+
"heap_fetches": self.heap_fetches,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def compute_metrics(plan: Plan) -> list[NodeMetrics]:
|
|
56
|
+
denom = plan.execution_time or plan.root.inclusive_time
|
|
57
|
+
out: list[NodeMetrics] = []
|
|
58
|
+
for node in plan.root.walk():
|
|
59
|
+
incl = node.inclusive_time
|
|
60
|
+
if incl is not None:
|
|
61
|
+
kids = sum(c.inclusive_time or 0.0 for c in node.children)
|
|
62
|
+
self_t = max(0.0, incl - kids)
|
|
63
|
+
else:
|
|
64
|
+
self_t = None
|
|
65
|
+
pct = (self_t / denom) if (self_t is not None and denom) else None
|
|
66
|
+
|
|
67
|
+
actual = node.actual_rows
|
|
68
|
+
if actual is not None:
|
|
69
|
+
est = max(node.plan_rows, 1.0)
|
|
70
|
+
act = max(actual, 1.0)
|
|
71
|
+
factor = act / est
|
|
72
|
+
error = max(factor, 1.0 / factor)
|
|
73
|
+
else:
|
|
74
|
+
factor = error = None
|
|
75
|
+
|
|
76
|
+
out.append(NodeMetrics(
|
|
77
|
+
path=node.path, label=node.label, node_type=node.node_type,
|
|
78
|
+
relation=node.relation, loops=node.actual_loops, inclusive_time=incl,
|
|
79
|
+
self_time=self_t, pct_self=pct, plan_rows=node.plan_rows,
|
|
80
|
+
actual_rows=actual, estimation_factor=factor, estimation_error=error,
|
|
81
|
+
spilled=node.spilled_to_disk, shared_read=node.shared_read,
|
|
82
|
+
rows_removed_by_filter=node.rows_removed_by_filter,
|
|
83
|
+
heap_fetches=node.heap_fetches))
|
|
84
|
+
return out
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def rollups(plan: Plan, metrics: list[NodeMetrics]) -> dict:
|
|
88
|
+
timed = [m for m in metrics if m.self_time is not None]
|
|
89
|
+
top = sorted(timed, key=lambda m: m.self_time, reverse=True)[:5]
|
|
90
|
+
errors = [m.estimation_error for m in metrics if m.estimation_error is not None]
|
|
91
|
+
return {
|
|
92
|
+
"execution_time_ms": plan.execution_time,
|
|
93
|
+
"planning_time_ms": plan.planning_time,
|
|
94
|
+
"has_actuals": plan.has_actuals,
|
|
95
|
+
"node_count": len(metrics),
|
|
96
|
+
"max_depth": max((len(m.path) for m in metrics), default=0),
|
|
97
|
+
"max_estimation_error": max(errors) if errors else None,
|
|
98
|
+
"spilled_to_disk": any(m.spilled for m in metrics),
|
|
99
|
+
"total_shared_read_blocks": sum(m.shared_read for m in metrics),
|
|
100
|
+
"top_self_time": [
|
|
101
|
+
{"label": m.label, "self_time_ms": m.self_time, "pct_self": m.pct_self}
|
|
102
|
+
for m in top
|
|
103
|
+
],
|
|
104
|
+
}
|