npm - @query-doctor/core - Versions diffs - 0.10.4 → 0.10.5 - Mend

@query-doctor/core 0.10.4 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/dist/findings/query-findings.cjs +709 -0
package/dist/findings/query-findings.cjs.map +1 -0
package/dist/findings/query-findings.d.cts +59 -0
package/dist/findings/query-findings.d.cts.map +1 -0
package/dist/findings/query-findings.d.mts +59 -0
package/dist/findings/query-findings.d.mts.map +1 -0
package/dist/findings/query-findings.mjs +709 -0
package/dist/findings/query-findings.mjs.map +1 -0
package/dist/index.cjs +2 -0
package/dist/index.d.cts +2 -1
package/dist/index.d.mts +2 -1
package/dist/index.mjs +2 -1
package/dist/optimizer/statistics.cjs +17 -11
package/dist/optimizer/statistics.cjs.map +1 -1
package/dist/optimizer/statistics.d.cts +2 -0
package/dist/optimizer/statistics.d.cts.map +1 -1
package/dist/optimizer/statistics.d.mts +2 -0
package/dist/optimizer/statistics.d.mts.map +1 -1
package/dist/optimizer/statistics.mjs +17 -11
package/dist/optimizer/statistics.mjs.map +1 -1
package/package.json +1 -1

package/dist/findings/query-findings.mjs ADDED Viewed

@@ -0,0 +1,709 @@
+"use client";
+//#region src/findings/query-findings.ts
+const MIN_QUERY_COST = 100;
+const INDEX_SERVED_NODES = new Set([
+	"Index Only Scan",
+	"Index Scan",
+	"Bitmap Heap Scan",
+	"Bitmap Index Scan"
+]);
+const COST_CONCENTRATION_SHARE = .5;
+const COST_CONCENTRATION_WARN_SHARE = .8;
+const SEQ_SCAN_COST_SHARE = .3;
+const SEQ_SCAN_ROWS = 5e4;
+const LOOP_RATIO = 10;
+const LOOP_SHARE = .25;
+const MAX_LOOP_FINDINGS = 3;
+const SORT_COST_SHARE = .2;
+const SORT_WARN_SHARE = .4;
+const WIDE_RESULT_BYTES = 1e5;
+const WIDE_RESULT_WARN_BYTES = 256e3;
+const COST_DRIVER_SHARE = .4;
+const SELECTIVE_SCAN_MIN_SCANNED = 1e4;
+const SELECTIVE_SCAN_MAX_RATIO = .05;
+function asNode(plan) {
+	return plan;
+}
+function num(node, key) {
+	const value = node[key];
+	return typeof value === "number" ? value : void 0;
+}
+function str(node, key) {
+	const value = node[key];
+	return typeof value === "string" ? value : void 0;
+}
+function childNodes(node) {
+	return Array.isArray(node.Plans) ? node.Plans : [];
+}
+function walk(node, visit) {
+	visit(node);
+	for (const child of childNodes(node)) walk(child, visit);
+}
+/**
+* Postgres reports cumulative Total Cost (a node's cost includes its children).
+* A node's own contribution is therefore its Total Cost minus its children's —
+* which telescopes so the self-costs across the tree sum back to the root total.
+*/
+function selfCost(node) {
+	const total = num(node, "Total Cost") ?? 0;
+	const childTotal = childNodes(node).reduce((sum, child) => sum + (num(child, "Total Cost") ?? 0), 0);
+	return Math.max(0, total - childTotal);
+}
+function nodeLabel(node) {
+	const type = str(node, "Node Type") ?? "node";
+	const relation = str(node, "Relation Name") ?? str(node, "Alias");
+	return relation ? `${type} on ${relation}` : type;
+}
+function formatBytes(bytes) {
+	if (bytes >= 1e6) return `${(bytes / 1e6).toFixed(1)} MB`;
+	if (bytes >= 1e3) return `${Math.round(bytes / 1e3)} KB`;
+	return `${Math.round(bytes)} B`;
+}
+function costBreakdown(node) {
+	const raw = node["Cost Breakdown"];
+	if (!Array.isArray(raw)) return [];
+	const components = [];
+	for (const entry of raw) {
+		if (typeof entry !== "object" || entry === null) continue;
+		const record = entry;
+		const cost = record["Cost"];
+		const reason = record["Reason"];
+		if (typeof cost !== "number" || typeof reason !== "string") continue;
+		const variables = typeof record["Variables"] === "object" && record["Variables"] !== null ? record["Variables"] : {};
+		components.push({
+			cost,
+			reason,
+			variables
+		});
+	}
+	return components;
+}
+/** Plain-language name for a cost component, citing its load-bearing cardinality
+* (the planner's assumed relpages / reltuples). Maps only the Reasons QD's
+* planner actually emits; an unmapped one falls back to a generic phrase so a new
+* component never breaks the sentence. The distinction that matters to a reader
+* is disk I/O (the table or index is big on disk — fewer pages or a covering
+* index helps) vs per-row CPU (many rows flow through — better selectivity helps). */
+function costComponentPhrase(component) {
+	const cardinality = (key) => {
+		const value = component.variables[key];
+		return typeof value === "number" ? value.toLocaleString("en-US") : void 0;
+	};
+	switch (component.reason) {
+		case "RUNTIME:DISK_IO": {
+			const pages = cardinality("relpages");
+			return pages ? `reading the table off disk (~${pages} pages)` : "sequential disk reads";
+		}
+		case "RUNTIME:DISK_ACCESS": {
+			const pages = cardinality("index_pages_fetched");
+			return pages ? `random index-page reads (~${pages} pages)` : "random index-page reads";
+		}
+		case "RUNTIME:WORST_CASE_IO": {
+			const pages = cardinality("pages_fetched");
+			return pages ? `worst-case random heap reads (~${pages} pages, assuming low page visibility)` : "worst-case random heap reads";
+		}
+		case "RUNTIME:HEAP_FETCH_AND_FILTER": {
+			const rows = cardinality("reltuples");
+			return rows ? `fetching and filtering ~${rows} rows from the heap` : "per-row heap fetch and filter";
+		}
+		case "RUNTIME:HEAP_FILTER": {
+			const rows = cardinality("reltuples");
+			return rows ? `filtering ~${rows} rows` : "per-row filtering";
+		}
+		case "RUNTIME:INDEX_FILTER": return "scanning index entries";
+		case "RUNTIME:BITMAP": return "building the row bitmap";
+		default: return "its main cost component";
+	}
+}
+/** A one-sentence "where this node's cost goes", derived from its Cost Breakdown:
+* the single largest positive component, in plain language. Discounts (negative
+* parallel-worker / I/O-correlation adjustments) and ~zero startup terms aren't
+* where the cost lands, so they're dropped. Returns "" when the node has no
+* breakdown (vanilla plan) or no component clearly dominates — callers append it
+* unconditionally and get nothing when there's nothing certain to add. */
+function whyExpensive(node) {
+	const positive = costBreakdown(node).filter((c) => c.cost > 0);
+	if (positive.length === 0) return "";
+	const total = positive.reduce((sum, c) => sum + c.cost, 0);
+	const dominant = positive.reduce((a, b) => b.cost > a.cost ? b : a);
+	if (total <= 0 || dominant.cost / total < COST_DRIVER_SHARE) return "";
+	return ` Most of that node's cost is ${costComponentPhrase(dominant)}.`;
+}
+/** Rows a scan node reads, from its Cost Breakdown's `reltuples` (the planner's
+* table-size estimate) — distinct from Plan Rows, which is the *post-filter*
+* output. Undefined when the node carries no breakdown cardinality (vanilla plan). */
+function rowsScanned(node) {
+	let max;
+	for (const component of costBreakdown(node)) {
+		const value = component.variables["reltuples"];
+		if (typeof value === "number" && (max === void 0 || value > max)) max = value;
+	}
+	return max;
+}
+/** A diagnosis sentence for a scan that reads far more rows than it returns: the
+* filter throws away nearly everything it touched, which is exactly the work an
+* index avoids. Pure diagnosis — it states the waste, not a fix; the optimizer's
+* index verdict (handled by the caller) decides whether an index is the answer.
+* "" when the node isn't a selective scan or has no breakdown to measure it from. */
+function selectivityClause(node) {
+	const scanned = rowsScanned(node);
+	const returned = num(node, "Plan Rows");
+	if (scanned === void 0 || returned === void 0) return "";
+	if (scanned < SELECTIVE_SCAN_MIN_SCANNED) return "";
+	const ratio = scanned > 0 ? returned / scanned : 1;
+	if (ratio >= SELECTIVE_SCAN_MAX_RATIO) return "";
+	const pct = ratio < 1e-4 ? "<0.01%" : `~${(ratio * 100).toFixed(2)}%`;
+	return ` It scans ~${scanned.toLocaleString("en-US")} rows and returns ~${returned.toLocaleString("en-US")} (${pct}), so nearly all of that work is rows the filter discards.`;
+}
+/** A cost as a fraction of the query total, clamped to [0,1]. Parallel plans
+* (Gather / Gather Merge) report a child node's cost as the per-worker figure —
+* which can exceed the gathered root total, since the Gather divides the work.
+* Left unclamped that yields a nonsensical >100% share. */
+function clampShare(cost, total) {
+	if (total <= 0) return 0;
+	return Math.min(1, cost / total);
+}
+/** A node that reads an entire relation with nothing to narrow it: a Seq Scan or
+* full Index Only Scan carrying no Filter / Index Cond / Recheck Cond. The cost
+* is the whole table by definition — and since there's no predicate, no index can
+* avoid the pass (which is why the optimizer returns no_improvement_found). */
+function isUnfilteredFullRead(node) {
+	const type = str(node, "Node Type");
+	if (type !== "Seq Scan" && type !== "Index Only Scan") return false;
+	return node["Filter"] == null && node["Index Cond"] == null && node["Recheck Cond"] == null;
+}
+/** Whether the plan aggregates, and how — a scalar aggregate (count(*) over the
+* whole table, no grouping) can be approximated or maintained out of band, a
+* grouped one (GROUP BY / HAVING) genuinely needs every row, and neither rules
+* the full read. Grouped wins when both appear (e.g. `count(*)` over a grouped
+* subquery), since the grouping is what forces the whole-table pass. */
+function aggregateShape(root) {
+	let scalar = false;
+	let grouped = false;
+	walk(root, (node) => {
+		if (str(node, "Node Type") !== "Aggregate") return;
+		const groupKey = node["Group Key"];
+		if (Array.isArray(groupKey) && groupKey.length > 0) grouped = true;
+		else scalar = true;
+	});
+	return grouped ? "grouped" : scalar ? "scalar" : "none";
+}
+/** FULL_TABLE_SCAN: an unfiltered whole-relation read that dominates the cost
+* while the optimizer found no index that helps — because there's no predicate to
+* index. Without this the query shows *zero* findings (the dominant node is
+* index-served, so the concentration pass suppresses it), which reads as "QD
+* didn't look". This fills that silence with the reason and an honest lead. */
+function buildFullTableReadFinding(node, share, shape) {
+	const pct = Math.round(share * 100);
+	const relation = relationOf(node);
+	const target = relation ? `\`${relation}\`` : "The table";
+	let detail;
+	let lead;
+	if (shape === "scalar") {
+		detail = `${target} is read end to end to compute the aggregate — there's no filter to narrow it, so no index avoids the full pass (the optimizer confirmed none helps).`;
+		lead = `If this runs often, an approximate count (\`pg_class.reltuples\`) or a maintained tally avoids re-reading the whole table each call.`;
+	} else if (shape === "grouped") detail = `${target} is read end to end to group every row — there's no filter to narrow it, so the whole-table pass is inherent to the query.`;
+	else {
+		detail = `${target} is read end to end — there's no filter to narrow it, so no index avoids the full pass (the optimizer confirmed none helps).`;
+		lead = `If you don't need every row, a WHERE filter or LIMIT lets an index read only part of the table.`;
+	}
+	return {
+		code: "FULL_TABLE_SCAN",
+		severity: "info",
+		impact: share,
+		title: relation ? `Whole-table read of ${relation}` : "Whole-table read",
+		detail: `${detail}${whyExpensive(node)}`,
+		...lead ? { lead } : {},
+		evidence: {
+			...relation ? { relation } : {},
+			costShare: `${pct}%`
+		}
+	};
+}
+/** Sort nodes a LIMIT bounds directly (top-N) — `Limit → Sort`, possibly through
+* pass-through nodes (Result / Gather Merge / Gather). For these the win from a
+* matching index is bigger than skipping the sort: the LIMIT also lets the scan
+* stop after the first n rows instead of sorting the whole set. A Limit above an
+* Aggregate or a Join doesn't bound the sort below it, so the descent stops at
+* any non-pass-through node. */
+const LIMIT_PASSTHROUGH = new Set([
+	"Result",
+	"Gather",
+	"Gather Merge",
+	"LockRows"
+]);
+function sortsBoundedByLimit(root) {
+	const bounded = /* @__PURE__ */ new Set();
+	walk(root, (node) => {
+		if (str(node, "Node Type") !== "Limit") return;
+		let cursor = node;
+		const seen = /* @__PURE__ */ new Set();
+		while (cursor && !seen.has(cursor)) {
+			seen.add(cursor);
+			const child = childNodes(cursor)[0];
+			if (!child) break;
+			const type = str(child, "Node Type");
+			if (type === "Sort") {
+				bounded.add(child);
+				break;
+			}
+			if (!LIMIT_PASSTHROUGH.has(type ?? "")) break;
+			cursor = child;
+		}
+	});
+	return bounded;
+}
+/** Advice for a dominant Sort node. Seq Scan index advice keys off the
+* optimizer's verdict (handled inline); a Nested Loop is a multiplication (its
+* own finding); everything else has no generic hint — hence Sort-only here. */
+function sortHint(nodeType) {
+	return nodeType === "Sort" ? " It's an in-memory sort — an index matching the ORDER BY could avoid it." : "";
+}
+function relationOf(node) {
+	return str(node, "Relation Name") ?? str(node, "Alias");
+}
+/** A nested loop's two inputs: the outer side (driven once, its row count is the
+* loop count) and the inner side (re-run once per outer row). Postgres tags them
+* via Parent Relationship; fall back to child order when the tag is absent. */
+function loopSides(node) {
+	const kids = childNodes(node);
+	return {
+		outer: kids.find((k) => str(k, "Parent Relationship") === "Outer") ?? kids[0],
+		inner: kids.find((k) => str(k, "Parent Relationship") === "Inner") ?? kids[1]
+	};
+}
+function setBasedLead(relation) {
+	return `This subquery might fold into one set-based pass: aggregate or join ${relation ? `\`${relation}\`` : "the subquery's table"} once, then filter. Often much faster, but it depends on the data, so check before committing.`;
+}
+/** A node's SubPlan children — correlated subqueries (EXISTS, scalar) run once
+* per row, whether in the Filter (EXISTS) or the SELECT list (a scalar COUNT).
+* Parent Relationship "SubPlan" is the reliable tell; an InitPlan (also carries
+* a Subplan Name) runs once, so it's deliberately excluded. */
+function subplanChildren(node) {
+	return childNodes(node).filter(isSubplanChild);
+}
+/** REPEATED_INNER_LOOP for the SubPlan-on-a-node shape: a Seq/Index Scan (or
+* other node) that evaluates correlated subqueries once per row. The planner
+* folds that multiplied cost into the node's own total, so a naive reading
+* blames the scan and says "add an index" — but the cost is the per-row
+* repetition, not the scan. Covers both the d2armory count(*) (EXISTS subplans
+* in the Filter) and the migration backfill (scalar subplans in the SELECT). */
+function buildSubplanFinding(node, self, rootTotal) {
+	const share = clampShare(self, rootTotal);
+	const pct = Math.round(share * 100);
+	const relation = relationOf(node);
+	const nodeType = str(node, "Node Type") ?? "scan";
+	const subplans = subplanChildren(node);
+	const loops = num(node, "Plan Rows");
+	const loopsText = loops !== void 0 ? ` (~${loops.toLocaleString("en-US")} rows)` : "";
+	const count = subplans.length;
+	const noun = count === 1 ? "a correlated subquery" : `${count} correlated subqueries`;
+	const indexClause = relation ? ` An index on \`${relation}\` can shrink the scan but not the per-row subqueries.` : ``;
+	let subqueryRelation;
+	for (const subplan of subplans) {
+		const driver = heaviestDriver(subplan);
+		const rel = driver ? relationOf(driver) : void 0;
+		if (rel) {
+			subqueryRelation = rel;
+			break;
+		}
+	}
+	return {
+		code: "REPEATED_INNER_LOOP",
+		severity: share >= COST_CONCENTRATION_WARN_SHARE ? "warning" : "info",
+		impact: share,
+		title: "Correlated subquery runs once per row",
+		detail: `This ${nodeType}${relation ? ` over \`${relation}\`` : ""} runs ${noun} once per row${loopsText}, so about ${pct}% of the cost is that repetition, not the scan.${indexClause}`,
+		lead: setBasedLead(subqueryRelation),
+		evidence: {
+			...relation ? { relation } : {},
+			...loops !== void 0 ? { loops } : {},
+			subqueries: count,
+			costShare: `${pct}%`
+		}
+	};
+}
+/** The inner side is a correlated subquery (re-evaluated per outer row) rather
+* than a plain table probe — the tell is a Memoize/Subquery/SubPlan in its
+* subtree. Postgres only inserts Memoize to cache a repeatedly-run inner, so its
+* presence is itself the signal that work is being multiplied per row. */
+function innerIsCorrelatedSubquery(inner) {
+	let found = false;
+	walk(inner, (n) => {
+		const type = str(n, "Node Type");
+		if (type === "Memoize" || type === "Subquery Scan") found = true;
+		if (str(n, "Parent Relationship") === "SubPlan") found = true;
+		if (typeof n["Subplan Name"] === "string") found = true;
+	});
+	return found;
+}
+/** The single heaviest node within a subtree (by own cost), used to name what
+* actually drives an inner side's per-loop cost. */
+function heaviestDriver(subtree) {
+	let best;
+	let bestSelf = -1;
+	walk(subtree, (n) => {
+		const s = selfCost(n);
+		if (s > bestSelf) {
+			bestSelf = s;
+			best = n;
+		}
+	});
+	return best;
+}
+/** True when a child node is a correlated SubPlan (e.g. a scalar COUNT subquery
+* in the SELECT list), which re-runs per row of its parent rather than once. */
+function isSubplanChild(node) {
+	return str(node, "Parent Relationship") === "SubPlan";
+}
+/**
+* The node whose *multiplied* cost (own cost × how many times it runs) is
+* largest within a loop's per-row work — i.e. where the repeated cost truly
+* lands. A node cheap per run (a 631-cost heap fetch, an 8-cost COUNT) can
+* dominate once run thousands of times; a plain "heaviest node" reading misses
+* it. Per-row work hangs off the loop two ways: the inner side (run per outer
+* row) and correlated SubPlan children (run per *output* row, e.g. scalar
+* subqueries in the SELECT). Both are walked. No-EXPLAIN-ANALYZE: executions
+* come from Plan Rows × nested-loop / subplan multipliers, never real loops.
+*/
+function multipliedDriver(loopNode, baseLoops) {
+	const { inner } = loopSides(loopNode);
+	const subplans = subplanChildren(loopNode);
+	const outputRows = num(loopNode, "Plan Rows") ?? baseLoops;
+	let best;
+	let bestCost = -1;
+	const visit = (node, executions) => {
+		const cost = selfCost(node) * executions;
+		if (cost > bestCost) {
+			bestCost = cost;
+			best = node;
+		}
+		let innerChild;
+		let outerRows = 1;
+		if (str(node, "Node Type") === "Nested Loop") {
+			const sides = loopSides(node);
+			innerChild = sides.inner;
+			outerRows = sides.outer ? num(sides.outer, "Plan Rows") ?? 1 : 1;
+		}
+		const rows = num(node, "Plan Rows") ?? 1;
+		for (const child of childNodes(node)) visit(child, child === innerChild ? executions * outerRows : isSubplanChild(child) ? executions * rows : executions);
+	};
+	if (inner) visit(inner, baseLoops);
+	for (const subplan of subplans) visit(subplan, outputRows);
+	return best ? {
+		node: best,
+		multiplied: bestCost
+	} : void 0;
+}
+/** Ratio of a node's Total Cost to its children's combined single-pass cost.
+* >LOOP_RATIO means the node re-runs a child per row (loop multiplication). */
+function loopRatio(node) {
+	const childTotal = childNodes(node).reduce((sum, child) => sum + (num(child, "Total Cost") ?? 0), 0);
+	const total = num(node, "Total Cost") ?? 0;
+	return childTotal > 0 ? total / childTotal : 0;
+}
+/** Identity of what a nested loop repeats, so twin loops across subplans (same
+* correlated inner) collapse to one finding instead of several. */
+function loopSignature(node) {
+	const { inner } = loopSides(node);
+	const driver = inner ? heaviestDriver(inner) : void 0;
+	const driverRel = driver ? relationOf(driver) : void 0;
+	const correlated = inner ? innerIsCorrelatedSubquery(inner) : false;
+	return `${driverRel ?? "?"}|${correlated}`;
+}
+/** Build the REPEATED_INNER_LOOP verdict for a nested loop. Diagnose, don't
+* prescribe: decompose the cost into loops × per-loop, name what drives the
+* per-loop cost, and — when the inner is a correlated subquery — state the
+* load-bearing fact that an index can't remove the per-row repetition. No
+* rewrite advice: whether a rewrite helps is conditional and a human's call. */
+function buildLoopFinding(node, self, rootTotal) {
+	const share = clampShare(self, rootTotal);
+	const pct = Math.round(share * 100);
+	const { outer, inner } = loopSides(node);
+	const loops = outer ? num(outer, "Plan Rows") : void 0;
+	const loopsText = loops !== void 0 ? `~${loops.toLocaleString("en-US")}` : "many";
+	const correlated = (inner ? innerIsCorrelatedSubquery(inner) : false) || childNodes(node).some(isSubplanChild);
+	const driver = loops !== void 0 ? multipliedDriver(node, loops) : void 0;
+	const driverNode = driver?.node;
+	const driverRel = driverNode ? relationOf(driverNode) : void 0;
+	const driverType = driverNode ? str(driverNode, "Node Type") : void 0;
+	const driverShare = driver ? clampShare(driver.multiplied, rootTotal) : void 0;
+	const driverPct = driverShare !== void 0 ? Math.round(driverShare * 100) : void 0;
+	const driverText = driverRel && driverType && driverPct !== void 0 ? ` Most of it is the ${driverType} on \`${driverRel}\`, run ${loopsText} times (about ${driverPct}% of the query).` : ``;
+	return {
+		code: "REPEATED_INNER_LOOP",
+		severity: share >= COST_CONCENTRATION_WARN_SHARE ? "warning" : "info",
+		impact: share,
+		title: correlated ? "Correlated subquery runs once per row" : "Inner side re-runs once per row",
+		detail: `A nested loop re-runs its inner side once per row (${loopsText} rows), so about ${pct}% of the cost is that repetition, not one node.${driverText}${correlated ? ` It's a correlated subquery, so an index can speed each run but won't cut the repeats.` : ``}`,
+		...correlated ? { lead: setBasedLead(driverRel) } : {},
+		evidence: {
+			...loops !== void 0 ? { loops } : {},
+			...correlated ? { innerKind: "correlated subquery" } : {},
+			...driverRel ? { driver: driverRel } : {},
+			...driverPct !== void 0 ? { driverShare: `${driverPct}%` } : {},
+			costShare: `${pct}%`
+		}
+	};
+}
+function escapeRegExp(value) {
+	return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+const CONDITION_KEYS = [
+	"Hash Cond",
+	"Join Filter",
+	"Filter",
+	"Recheck Cond",
+	"Merge Cond",
+	"Index Cond"
+];
+/**
+* Relations whose columns are wrapped in a function (or cast) inside some plan
+* condition — i.e. the relations a function-on-column actually blocks an index
+* on. The function-on-column nudge is AST-only and doesn't know which table is
+* affected; reading it off the plan's condition strings (e.g. `lower(cfg.repo)`,
+* mapping the alias back to its relation) is what stops us from blaming an
+* unrelated table that just happens to be sequentially scanned.
+*/
+function relationsWrappedInFunction(root) {
+	const conditions = [];
+	const aliasToRelation = /* @__PURE__ */ new Map();
+	walk(root, (node) => {
+		for (const key of CONDITION_KEYS) {
+			const value = node[key];
+			if (typeof value === "string") conditions.push(value);
+		}
+		const relation = str(node, "Relation Name");
+		if (relation) {
+			aliasToRelation.set(relation, relation);
+			const alias = str(node, "Alias");
+			if (alias) aliasToRelation.set(alias, relation);
+		}
+	});
+	const wrapped = /* @__PURE__ */ new Set();
+	for (const [aliasOrName, relation] of aliasToRelation) {
+		const ref = escapeRegExp(aliasOrName);
+		const inFunction = new RegExp(`[a-z_][a-z0-9_]*\\([^()]*\\b${ref}\\.`, "i");
+		const inCast = new RegExp(`\\b${ref}\\.[a-z_][a-z0-9_]*\\s*::`, "i");
+		if (conditions.some((c) => inFunction.test(c) || inCast.test(c))) wrapped.add(relation);
+	}
+	return wrapped;
+}
+function planOf(optimization) {
+	if (!optimization) return void 0;
+	if (optimization.state === "improvements_available" || optimization.state === "no_improvement_found") return optimization.explainPlan;
+}
+/**
+* Compute the plan-aware findings for one query from its stored optimization and
+* nudges. Returns [] when there's no plan to reason over (waiting, optimizing,
+* not_supported, timeout, error) — the syntactic nudges still stand on their own.
+*/
+function analyzeQueryFindings(optimization, nudges = []) {
+	const plan = planOf(optimization);
+	if (!plan) return [];
+	const root = asNode(plan);
+	const rootTotal = num(root, "Total Cost") ?? 0;
+	if (rootTotal < MIN_QUERY_COST) return [];
+	const recommendedDefs = optimization?.state === "improvements_available" ? optimization.indexRecommendations.map((rec) => rec.definition).filter((def) => typeof def === "string" && def !== "") : [];
+	const recommendedClause = recommendedDefs.length > 0 ? ` The recommended index ${recommendedDefs.map((def) => `\`${def}\``).join(" / ")} would help this.` : "";
+	const scanIndexClause = recommendedClause || (optimization?.state === "no_improvement_found" ? " The optimizer checked for an index and found none that helps here." : " An index on the filtered or joined columns would likely remove it.");
+	const nodes = [];
+	walk(root, (node) => nodes.push({
+		node,
+		self: selfCost(node)
+	}));
+	const executions = /* @__PURE__ */ new Map();
+	const countExecutions = (node, runs) => {
+		executions.set(node, runs);
+		let innerChild;
+		let outerRows = 1;
+		if (str(node, "Node Type") === "Nested Loop") {
+			const sides = loopSides(node);
+			innerChild = sides.inner;
+			outerRows = sides.outer ? num(sides.outer, "Plan Rows") ?? 1 : 1;
+		}
+		for (const child of childNodes(node)) countExecutions(child, child === innerChild ? runs * outerRows : runs);
+	};
+	countExecutions(root, 1);
+	const multipliedCost = (node) => selfCost(node) * (executions.get(node) ?? 1);
+	const findings = [];
+	const reported = /* @__PURE__ */ new Set();
+	if (rootTotal > 0) {
+		const loopNodes = nodes.filter(({ node, self }) => str(node, "Node Type") === "Nested Loop" && clampShare(self, rootTotal) >= LOOP_SHARE && loopRatio(node) > LOOP_RATIO).sort((a, b) => b.self - a.self);
+		const seenSignatures = /* @__PURE__ */ new Set();
+		for (const { node, self } of loopNodes) {
+			if (reported.size >= MAX_LOOP_FINDINGS) break;
+			const sig = loopSignature(node);
+			if (seenSignatures.has(sig)) continue;
+			seenSignatures.add(sig);
+			findings.push(buildLoopFinding(node, self, rootTotal));
+			reported.add(node);
+		}
+	}
+	if (rootTotal > 0 && nodes.length > 1) {
+		const top = nodes.reduce((best, candidate) => candidate.self > best.self ? candidate : best);
+		const share = clampShare(top.self, rootTotal);
+		if (share >= COST_CONCENTRATION_SHARE && !reported.has(top.node)) {
+			const nodeType = str(top.node, "Node Type");
+			if (nodeType === "Nested Loop") findings.push(buildLoopFinding(top.node, top.self, rootTotal));
+			else if (subplanChildren(top.node).length > 0) findings.push(buildSubplanFinding(top.node, top.self, rootTotal));
+			else if (INDEX_SERVED_NODES.has(nodeType ?? "") && optimization?.state === "no_improvement_found") {
+				if (isUnfilteredFullRead(top.node)) findings.push(buildFullTableReadFinding(top.node, share, aggregateShape(root)));
+			} else {
+				const pct = Math.round(share * 100);
+				findings.push({
+					code: "COST_CONCENTRATION",
+					severity: share >= COST_CONCENTRATION_WARN_SHARE ? "warning" : "info",
+					impact: share,
+					title: `${pct}% of the cost is one node`,
+					detail: `About ${pct}% of this query's estimated cost is a single ${nodeLabel(top.node)}. ${share >= COST_CONCENTRATION_WARN_SHARE ? "It's the one thing worth tuning here." : "It's the biggest single contributor."}${whyExpensive(top.node)}${nodeType === "Seq Scan" ? selectivityClause(top.node) + scanIndexClause : sortHint(nodeType) + recommendedClause}`,
+					evidence: {
+						node: nodeLabel(top.node),
+						costShare: `${pct}%`,
+						nodeCost: Math.round(top.self),
+						totalCost: Math.round(rootTotal)
+					}
+				});
+			}
+			reported.add(top.node);
+		}
+	}
+	const seqScansByRelation = /* @__PURE__ */ new Map();
+	for (const { node, self } of nodes) {
+		if (str(node, "Node Type") !== "Seq Scan") continue;
+		if (reported.has(node)) continue;
+		const relation = str(node, "Relation Name") ?? str(node, "Alias") ?? "?";
+		const rows = num(node, "Plan Rows");
+		const entry = seqScansByRelation.get(relation) ?? {
+			self: 0,
+			rows: void 0,
+			topNode: node,
+			topSelf: -1
+		};
+		entry.self += self;
+		if (self > entry.topSelf) {
+			entry.topSelf = self;
+			entry.topNode = node;
+		}
+		if (rows !== void 0) entry.rows = Math.max(entry.rows ?? 0, rows);
+		seqScansByRelation.set(relation, entry);
+	}
+	for (const [relation, { self, rows, topNode }] of seqScansByRelation) {
+		const share = clampShare(self, rootTotal);
+		if (!(share >= SEQ_SCAN_COST_SHARE) && !(rows !== void 0 && rows >= SEQ_SCAN_ROWS)) continue;
+		const pct = Math.round(share * 100);
+		const named = relation !== "?";
+		const selectivity = selectivityClause(topNode);
+		const rowsText = !selectivity && rows !== void 0 ? ` (~${rows.toLocaleString("en-US")} rows)` : "";
+		const scanned = selectivity ? rowsScanned(topNode) : void 0;
+		findings.push({
+			code: "EXPENSIVE_SEQ_SCAN",
+			severity: "warning",
+			impact: share,
+			title: `Sequential scan${named ? ` on ${relation}` : ""}`,
+			detail: `${named ? `\`${relation}\`` : "A table"} is read with a full sequential scan${rowsText}, about ${pct}% of the query's cost.${whyExpensive(topNode)}${selectivity}${scanIndexClause}`,
+			evidence: {
+				...named ? { relation } : {},
+				...rows !== void 0 ? { rows } : {},
+				...scanned !== void 0 ? { scanned } : {},
+				costShare: `${pct}%`
+			}
+		});
+	}
+	if (nudges.some((nudge) => nudge.kind === "MISSING_LIMIT_CLAUSE")) {
+		const rows = num(root, "Plan Rows");
+		const width = num(root, "Plan Width");
+		if (rows !== void 0 && width !== void 0) {
+			const bytes = rows * width;
+			if (bytes >= WIDE_RESULT_BYTES) findings.push({
+				code: "WIDE_RESULT_NO_LIMIT",
+				severity: bytes >= WIDE_RESULT_WARN_BYTES ? "warning" : "info",
+				impact: Math.min(.9, bytes / 2e6),
+				title: "Unbounded result set",
+				detail: `No LIMIT, and the query returns ~${rows.toLocaleString("en-US")} rows of ~${width} bytes (~${formatBytes(bytes)} total). That width is a planner estimate and undercounts jsonb and large-text columns, so the real payload is likely bigger. To shrink it, drop columns from the SELECT or bound the rows.`,
+				evidence: {
+					rows,
+					rowWidth: width,
+					estimatedPayload: formatBytes(bytes)
+				}
+			});
+		}
+	}
+	if (nudges.some((nudge) => nudge.kind === "AVOID_FUNCTIONS_ON_COLUMNS_IN_WHERE")) {
+		const wrapped = relationsWrappedInFunction(root);
+		const blocked = [...new Set(nodes.filter(({ node }) => str(node, "Node Type") === "Seq Scan").map(({ node }) => str(node, "Relation Name")).filter((relation) => relation !== void 0))].filter((relation) => wrapped.has(relation));
+		if (blocked.length > 0) {
+			const relationList = blocked.map((r) => `\`${r}\``).join(", ");
+			const blockedCost = nodes.filter(({ node }) => str(node, "Node Type") === "Seq Scan" && blocked.includes(str(node, "Relation Name") ?? "")).reduce((sum, { node }) => sum + multipliedCost(node), 0);
+			findings.push({
+				code: "FUNCTION_ON_COLUMN_BLOCKS_INDEX",
+				severity: "warning",
+				impact: clampShare(blockedCost, rootTotal),
+				title: "A function on a column blocks an index",
+				detail: `A condition wraps ${relationList}'s column in a function (e.g. \`lower(col)\`), so Postgres scans the table instead of using an index. Compare the bare column, or add an expression index for the function.${recommendedClause}`,
+				evidence: { sequentialScans: relationList }
+			});
+		}
+	}
+	const reportedSortKeys = /* @__PURE__ */ new Set();
+	const topNSorts = sortsBoundedByLimit(root);
+	for (const { node, self } of nodes) {
+		if (str(node, "Node Type") !== "Sort") continue;
+		if (reported.has(node)) continue;
+		const share = clampShare(self, rootTotal);
+		if (share < SORT_COST_SHARE) continue;
+		const sortKey = sortKeyText(node);
+		const dedupeKey = sortKey ?? "";
+		if (reportedSortKeys.has(dedupeKey)) continue;
+		reportedSortKeys.add(dedupeKey);
+		const pct = Math.round(share * 100);
+		const advice = sortKeyIsIndexable(sortKey) ? topNSorts.has(node) ? "An index in that order would skip the sort and, with the LIMIT, let Postgres stop after the first rows instead of ordering the whole set." : "An index in that order would let Postgres skip the sort." : "The sort key is computed at runtime (an aggregate or subquery result), so no index can pre-sort it.";
+		findings.push({
+			code: "SORT_WITHOUT_INDEX",
+			severity: share >= SORT_WARN_SHARE ? "warning" : "info",
+			impact: share,
+			title: "In-memory sort",
+			detail: `Rows are sorted in memory${sortKey ? ` by \`${sortKey}\`` : ""}, about ${pct}% of the query's cost. ${advice}`,
+			evidence: {
+				...sortKey ? { sortKey } : {},
+				costShare: `${pct}%`
+			}
+		});
+	}
+	const explained = /* @__PURE__ */ new Set();
+	for (const finding of findings) {
+		if (finding.code === "FUNCTION_ON_COLUMN_BLOCKS_INDEX") {
+			const list = String(finding.evidence?.sequentialScans ?? "");
+			for (const match of list.matchAll(/`([^`]+)`/g)) explained.add(match[1]);
+		}
+		if (finding.code === "REPEATED_INNER_LOOP" && finding.evidence?.driver) explained.add(String(finding.evidence.driver));
+	}
+	const deduped = findings.filter((finding) => !(finding.code === "EXPENSIVE_SEQ_SCAN" && explained.has(String(finding.evidence?.relation ?? ""))));
+	deduped.sort((a, b) => b.impact - a.impact);
+	return deduped;
+}
+/** Whether an index could pre-order this sort. A plain column (or a function of
+* one) can be served by an index; a sort on an aggregate, a subquery result, or
+* a computed comparison is produced at runtime, so no index can pre-order it —
+* promising one would be false advice. Unknown keys keep the generic advice. */
+function sortKeyIsIndexable(sortKey) {
+	if (!sortKey) return true;
+	if (/\bSubPlan\b/i.test(sortKey)) return false;
+	if (/\b(count|sum|avg|min|max|jsonb_agg|array_agg|string_agg|bool_or|bool_and|row_number|rank|dense_rank|ntile)\s*\(/i.test(sortKey)) return false;
+	if (/\s(=|<|>|<=|>=|<>)\s/.test(sortKey)) return false;
+	return true;
+}
+/** The Sort node's ORDER BY, as a compact string. `Sort Key` is a string array
+* (e.g. ["created_at DESC", "id DESC"]); join it and trim runaway length. */
+function sortKeyText(node) {
+	const raw = node["Sort Key"];
+	if (!Array.isArray(raw)) return void 0;
+	const keys = raw.filter((k) => typeof k === "string");
+	if (keys.length === 0) return void 0;
+	const joined = keys.join(", ");
+	return joined.length > 120 ? `${joined.slice(0, 117)}…` : joined;
+}
+//#endregion
+export { analyzeQueryFindings };
+//# sourceMappingURL=query-findings.mjs.map