@query-doctor/core 0.4.2 → 0.5.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -52,6 +52,14 @@ const findFuncCallsOnColumns = (whereClause) => {
52
52
  location: node.FuncCall.location
53
53
  });
54
54
  });
55
+ Walker.shallowMatch(whereClause, "CoalesceExpr", (node) => {
56
+ if (node.CoalesceExpr.args && containsColumnRef(node.CoalesceExpr.args)) nudges.push({
57
+ kind: "AVOID_FUNCTIONS_ON_COLUMNS_IN_WHERE",
58
+ severity: "WARNING",
59
+ message: "Avoid using functions on columns in WHERE clause",
60
+ location: node.CoalesceExpr.location
61
+ });
62
+ });
55
63
  return nudges;
56
64
  };
57
65
  /**
@@ -78,6 +86,12 @@ function parseNudges(node, stack) {
78
86
  location: star.ResTarget.location
79
87
  });
80
88
  }
89
+ for (const target of node.SelectStmt.targetList ?? []) if (is$1(target, "ResTarget") && target.ResTarget.val && is$1(target.ResTarget.val, "SubLink") && target.ResTarget.val.SubLink.subLinkType === "EXPR_SUBLINK") nudges.push({
90
+ kind: "AVOID_SCALAR_SUBQUERY_IN_SELECT",
91
+ severity: "WARNING",
92
+ message: "Avoid correlated scalar subqueries in SELECT; consider rewriting as a JOIN",
93
+ location: target.ResTarget.val.SubLink.location
94
+ });
81
95
  }
82
96
  if (is$1(node, "SelectStmt")) {
83
97
  if (!stack.some((item) => item === "RangeSubselect" || item === "SubLink" || item === "CommonTableExpr")) {
@@ -102,6 +116,22 @@ function parseNudges(node, stack) {
102
116
  }
103
117
  }
104
118
  }
119
+ if (is$1(node, "SelectStmt") && node.SelectStmt.sortClause) for (const sortItem of node.SelectStmt.sortClause) {
120
+ if (!is$1(sortItem, "SortBy")) continue;
121
+ const sortDir = sortItem.SortBy.sortby_dir ?? "SORTBY_DEFAULT";
122
+ const sortNulls = sortItem.SortBy.sortby_nulls ?? "SORTBY_NULLS_DEFAULT";
123
+ if (sortDir === "SORTBY_DESC" && sortNulls === "SORTBY_NULLS_DEFAULT") {
124
+ if (sortItem.SortBy.node && is$1(sortItem.SortBy.node, "ColumnRef")) {
125
+ const sortColumnName = getLastColumnRefField(sortItem.SortBy.node);
126
+ if (!(sortColumnName !== null && whereHasIsNotNull(node.SelectStmt.whereClause, sortColumnName))) nudges.push({
127
+ kind: "NULLS_FIRST_IN_DESC_ORDER",
128
+ severity: "INFO",
129
+ message: "ORDER BY … DESC sorts NULLs first — add NULLS LAST to push them to the end",
130
+ location: sortItem.SortBy.node.ColumnRef.location
131
+ });
132
+ }
133
+ }
134
+ }
105
135
  if (is$1(node, "A_Expr")) {
106
136
  if (node.A_Expr.kind === "AEXPR_OP" && node.A_Expr.name && node.A_Expr.name.length > 0 && is$1(node.A_Expr.name[0], "String") && (node.A_Expr.name[0].String.sval === "=" || node.A_Expr.name[0].String.sval === "!=" || node.A_Expr.name[0].String.sval === "<>")) {
107
137
  const leftIsNull = isNullConstant(node.A_Expr.lexpr);
@@ -121,12 +151,20 @@ function parseNudges(node, stack) {
121
151
  nudges.push({
122
152
  kind: "AVOID_LEADING_WILDCARD_LIKE",
123
153
  severity: "WARNING",
124
- message: "Avoid using LIKE with leading wildcards",
154
+ message: "Leading wildcard in LIKE/ILIKE prevents index usage — consider a GIN trigram index (pg_trgm) or full-text search",
125
155
  location: stringNode?.location
126
156
  });
127
157
  }
128
158
  }
129
159
  }
160
+ if (is$1(node, "SelectStmt") && node.SelectStmt.sortClause) {
161
+ for (const sortItem of node.SelectStmt.sortClause) if (is$1(sortItem, "SortBy") && sortItem.SortBy.node && is$1(sortItem.SortBy.node, "FuncCall") && sortItem.SortBy.node.FuncCall.funcname?.some((name) => is$1(name, "String") && name.String.sval === "random")) nudges.push({
162
+ kind: "AVOID_ORDER_BY_RANDOM",
163
+ severity: "WARNING",
164
+ message: "Avoid using ORDER BY random()",
165
+ location: sortItem.SortBy.node.FuncCall.location
166
+ });
167
+ }
130
168
  if (is$1(node, "SelectStmt") && node.SelectStmt.distinctClause) nudges.push({
131
169
  kind: "AVOID_DISTINCT_WITHOUT_REASON",
132
170
  severity: "WARNING",
@@ -168,6 +206,29 @@ function parseNudges(node, stack) {
168
206
  });
169
207
  }
170
208
  }
209
+ if (is$1(node, "FuncCall")) {
210
+ const funcName = node.FuncCall.funcname;
211
+ if (funcName && funcName.length === 1 && is$1(funcName[0], "String") && funcName[0].String.sval === "count" && node.FuncCall.args && !node.FuncCall.agg_star && !node.FuncCall.agg_distinct) nudges.push({
212
+ kind: "PREFER_COUNT_STAR_OVER_COUNT_COLUMN",
213
+ severity: "INFO",
214
+ message: "Prefer COUNT(*) over COUNT(column) or COUNT(1) — COUNT(*) counts rows without checking for NULLs. If you need to count non-NULL values, COUNT(column) is correct.",
215
+ location: node.FuncCall.location
216
+ });
217
+ }
218
+ if (is$1(node, "SelectStmt") && node.SelectStmt.havingClause) {
219
+ if (!containsAggregate(node.SelectStmt.havingClause)) {
220
+ const having = node.SelectStmt.havingClause;
221
+ let location;
222
+ if (is$1(having, "A_Expr")) location = having.A_Expr.location;
223
+ else if (is$1(having, "BoolExpr")) location = having.BoolExpr.location;
224
+ nudges.push({
225
+ kind: "PREFER_WHERE_OVER_HAVING_FOR_NON_AGGREGATES",
226
+ severity: "INFO",
227
+ message: "Non-aggregate condition in HAVING should be in WHERE",
228
+ location
229
+ });
230
+ }
231
+ }
171
232
  if (is$1(node, "A_Expr")) {
172
233
  if (node.A_Expr.kind === "AEXPR_IN") {
173
234
  let list;
@@ -181,6 +242,45 @@ function parseNudges(node, stack) {
181
242
  });
182
243
  }
183
244
  }
245
+ if (is$1(node, "FuncCall")) {
246
+ const funcname = node.FuncCall.funcname?.[0] && is$1(node.FuncCall.funcname[0], "String") && node.FuncCall.funcname[0].String.sval;
247
+ if (funcname && [
248
+ "sum",
249
+ "count",
250
+ "avg",
251
+ "min",
252
+ "max"
253
+ ].includes(funcname.toLowerCase())) {
254
+ const firstArg = node.FuncCall.args?.[0];
255
+ if (firstArg && isANode$1(firstArg) && is$1(firstArg, "CaseExpr")) {
256
+ const caseExpr = firstArg.CaseExpr;
257
+ if (caseExpr.args && caseExpr.args.length === 1) {
258
+ const defresult = caseExpr.defresult;
259
+ if (!defresult || isANode$1(defresult) && is$1(defresult, "A_Const") && (defresult.A_Const.isnull !== void 0 || defresult.A_Const.ival !== void 0 && (defresult.A_Const.ival.ival === 0 || defresult.A_Const.ival.ival === void 0))) nudges.push({
260
+ kind: "PREFER_FILTER_OVER_CASE_IN_AGGREGATE",
261
+ severity: "INFO",
262
+ message: "Use FILTER (WHERE ...) instead of CASE inside aggregate functions",
263
+ location: node.FuncCall.location
264
+ });
265
+ }
266
+ }
267
+ }
268
+ }
269
+ if (is$1(node, "SelectStmt") && node.SelectStmt.op === "SETOP_UNION" && !node.SelectStmt.all) nudges.push({
270
+ kind: "PREFER_UNION_ALL_OVER_UNION",
271
+ severity: "INFO",
272
+ message: "UNION removes duplicates with an implicit sort — use UNION ALL if deduplication is not needed"
273
+ });
274
+ if (is$1(node, "A_Expr") && node.A_Expr.kind === "AEXPR_OP" && node.A_Expr.name && node.A_Expr.name.length > 0) {
275
+ const opNode = node.A_Expr.name[0];
276
+ const op = is$1(opNode, "String") ? opNode.String.sval : null;
277
+ if (op && isExistenceCheckPattern(node.A_Expr.lexpr, node.A_Expr.rexpr, op)) nudges.push({
278
+ kind: "USE_EXISTS_NOT_COUNT_FOR_EXISTENCE_CHECK",
279
+ severity: "INFO",
280
+ message: "Use EXISTS instead of COUNT for existence checks",
281
+ location: node.A_Expr.location
282
+ });
283
+ }
184
284
  return nudges;
185
285
  }
186
286
  function containsColumnRef(args) {
@@ -216,6 +316,48 @@ function getStringConstantValue(node) {
216
316
  if (isANode$1(node) && is$1(node, "A_Const") && node.A_Const.sval) return node.A_Const.sval.sval || null;
217
317
  return null;
218
318
  }
319
+ function getLastColumnRefField(columnRef) {
320
+ const fields = columnRef.ColumnRef.fields;
321
+ if (!fields || fields.length === 0) return null;
322
+ const lastField = fields[fields.length - 1];
323
+ if (isANode$1(lastField) && is$1(lastField, "String")) return lastField.String.sval || null;
324
+ return null;
325
+ }
326
+ function whereHasIsNotNull(whereClause, columnName) {
327
+ if (!whereClause) return false;
328
+ let found = false;
329
+ Walker.shallowMatch(whereClause, "NullTest", (node) => {
330
+ if (node.NullTest.nulltesttype === "IS_NOT_NULL" && node.NullTest.arg && is$1(node.NullTest.arg, "ColumnRef")) {
331
+ if (getLastColumnRefField(node.NullTest.arg) === columnName) found = true;
332
+ }
333
+ });
334
+ return found;
335
+ }
336
+ const AGGREGATE_FUNCTIONS = new Set([
337
+ "count",
338
+ "sum",
339
+ "avg",
340
+ "min",
341
+ "max",
342
+ "array_agg",
343
+ "string_agg",
344
+ "bool_and",
345
+ "bool_or",
346
+ "every"
347
+ ]);
348
+ function containsAggregate(node) {
349
+ if (!node || typeof node !== "object") return false;
350
+ if (Array.isArray(node)) return node.some(containsAggregate);
351
+ if (isANode$1(node) && is$1(node, "FuncCall")) {
352
+ const funcname = node.FuncCall.funcname;
353
+ if (funcname) {
354
+ for (const f of funcname) if (isANode$1(f) && is$1(f, "String") && AGGREGATE_FUNCTIONS.has(f.String.sval?.toLowerCase() ?? "")) return true;
355
+ }
356
+ }
357
+ if (isANode$1(node)) return containsAggregate(node[Object.keys(node)[0]]);
358
+ for (const child of Object.values(node)) if (containsAggregate(child)) return true;
359
+ return false;
360
+ }
219
361
  function countBoolOrConditions(node) {
220
362
  if (node.BoolExpr.boolop !== "OR_EXPR" || !node.BoolExpr.args) return 1;
221
363
  let count = 0;
@@ -223,6 +365,54 @@ function countBoolOrConditions(node) {
223
365
  else count += 1;
224
366
  return count;
225
367
  }
368
+ function isCountFuncCall(node) {
369
+ if (!node || typeof node !== "object") return false;
370
+ if (!isANode$1(node) || !is$1(node, "FuncCall")) return false;
371
+ const fc = node.FuncCall;
372
+ if (!(fc.funcname?.some((n) => is$1(n, "String") && n.String.sval === "count") ?? false)) return false;
373
+ if (fc.agg_star) return true;
374
+ if (fc.args && fc.args.length === 1 && isANode$1(fc.args[0]) && is$1(fc.args[0], "A_Const")) return true;
375
+ return false;
376
+ }
377
+ function isSubLinkWithCount(node) {
378
+ if (!node || typeof node !== "object") return false;
379
+ if (!isANode$1(node) || !is$1(node, "SubLink")) return false;
380
+ const subselect = node.SubLink.subselect;
381
+ if (!subselect || !isANode$1(subselect) || !is$1(subselect, "SelectStmt")) return false;
382
+ const targets = subselect.SelectStmt.targetList;
383
+ if (!targets || targets.length !== 1) return false;
384
+ const target = targets[0];
385
+ if (!isANode$1(target) || !is$1(target, "ResTarget") || !target.ResTarget.val) return false;
386
+ return isCountFuncCall(target.ResTarget.val);
387
+ }
388
+ function isCountExpression(node) {
389
+ return isCountFuncCall(node) || isSubLinkWithCount(node);
390
+ }
391
+ function getIntegerConstantValue(node) {
392
+ if (!node || typeof node !== "object") return null;
393
+ if (!isANode$1(node) || !is$1(node, "A_Const")) return null;
394
+ if (node.A_Const.ival === void 0) return null;
395
+ return node.A_Const.ival.ival ?? 0;
396
+ }
397
+ function isExistenceCheckPattern(lexpr, rexpr, op) {
398
+ if (isCountExpression(lexpr)) {
399
+ const val = getIntegerConstantValue(rexpr);
400
+ if (val !== null) {
401
+ if (op === ">" && val === 0) return true;
402
+ if (op === ">=" && val === 1) return true;
403
+ if ((op === "!=" || op === "<>") && val === 0) return true;
404
+ }
405
+ }
406
+ if (isCountExpression(rexpr)) {
407
+ const val = getIntegerConstantValue(lexpr);
408
+ if (val !== null) {
409
+ if (op === "<" && val === 0) return true;
410
+ if (op === "<=" && val === 1) return true;
411
+ if ((op === "!=" || op === "<>") && val === 0) return true;
412
+ }
413
+ }
414
+ return false;
415
+ }
226
416
 
227
417
  //#endregion
228
418
  //#region \0@oxc-project+runtime@0.112.0/helpers/typeof.js
@@ -686,6 +876,11 @@ var Analyzer = class {
686
876
  tags: [],
687
877
  queryWithoutTags: trimmedQuery
688
878
  };
879
+ const afterComment = trimmedQuery.slice(endPosition + 2).trim();
880
+ if (afterComment && afterComment !== ";") return {
881
+ tags: [],
882
+ queryWithoutTags: trimmedQuery
883
+ };
689
884
  const queryWithoutTags = trimmedQuery.slice(0, startPosition);
690
885
  const tagString = trimmedQuery.slice(startPosition + 2, endPosition).trim();
691
886
  if (!tagString || typeof tagString !== "string") return {
@@ -1586,6 +1781,9 @@ var Statistics = class Statistics {
1586
1781
  });
1587
1782
  }
1588
1783
  }
1784
+ const sample = columnStatsValues.find((v) => v.table_name === "users" && v.stakind3 !== 0);
1785
+ if (sample) console.log("[stats debug] sample stakind3:", sample.stakind3, "stanumbers3:", sample.stanumbers3);
1786
+ else console.log("[stats debug] no users column found with non-zero stakind3");
1589
1787
  /**
1590
1788
  * Postgres has 5 different slots for storing statistics per column and a potentially unlimited
1591
1789
  * number of statistic types to choose from. Each code in `stakindN` can mean different things.
@@ -1794,7 +1992,9 @@ var Statistics = class Statistics {
1794
1992
  returning starelid, staattnum, stainherit, stakind1, stakind2, stakind3, stakind4, stakind5
1795
1993
  )
1796
1994
  select * from updated union all (select * from inserted); -- @qd_introspection`;
1797
- columnStatsUpdatePromise = tx.exec(sql, [columnStatsValues]).catch((err) => {
1995
+ columnStatsUpdatePromise = tx.exec(sql, [columnStatsValues]).then((rows) => {
1996
+ console.log("[stats debug] restore result rows:", JSON.stringify(rows));
1997
+ }).catch((err) => {
1798
1998
  console.error("Something wrong wrong updating column stats");
1799
1999
  console.error(err);
1800
2000
  throw err;
@@ -1889,24 +2089,27 @@ var Statistics = class Statistics {
1889
2089
  'columnName', c.column_name,
1890
2090
  'stats', (
1891
2091
  SELECT json_build_object(
1892
- 'starelid', s.starelid,
1893
- 'staattnum', s.staattnum,
1894
- 'stanullfrac', s.stanullfrac,
1895
- 'stawidth', s.stawidth,
1896
- 'stadistinct', s.stadistinct,
1897
- 'stakind1', s.stakind1, 'staop1', s.staop1, 'stacoll1', s.stacoll1, 'stanumbers1', s.stanumbers1,
1898
- 'stakind2', s.stakind2, 'staop2', s.staop2, 'stacoll2', s.stacoll2, 'stanumbers2', s.stanumbers2,
1899
- 'stakind3', s.stakind3, 'staop3', s.staop3, 'stacoll3', s.stacoll3, 'stanumbers3', s.stanumbers3,
1900
- 'stakind4', s.stakind4, 'staop4', s.staop4, 'stacoll4', s.stacoll4, 'stanumbers4', s.stanumbers4,
1901
- 'stakind5', s.stakind5, 'staop5', s.staop5, 'stacoll5', s.stacoll5, 'stanumbers5', s.stanumbers5,
1902
- 'stavalues1', CASE WHEN $1 THEN s.stavalues1 ELSE NULL END,
1903
- 'stavalues2', CASE WHEN $1 THEN s.stavalues2 ELSE NULL END,
1904
- 'stavalues3', CASE WHEN $1 THEN s.stavalues3 ELSE NULL END,
1905
- 'stavalues4', CASE WHEN $1 THEN s.stavalues4 ELSE NULL END,
1906
- 'stavalues5', CASE WHEN $1 THEN s.stavalues5 ELSE NULL END
2092
+ 'starelid', cl.oid,
2093
+ 'staattnum', a.attnum,
2094
+ 'stanullfrac', ps.null_frac,
2095
+ 'stawidth', ps.avg_width,
2096
+ 'stadistinct', ps.n_distinct,
2097
+ 'stakind1', CASE WHEN ps.most_common_vals IS NOT NULL THEN 1 ELSE 0 END, 'staop1', 0, 'stacoll1', 0, 'stanumbers1', ps.most_common_freqs,
2098
+ 'stakind2', CASE WHEN ps.histogram_bounds IS NOT NULL THEN 2 ELSE 0 END, 'staop2', 0, 'stacoll2', 0, 'stanumbers2', NULL,
2099
+ 'stakind3', CASE WHEN ps.correlation IS NOT NULL THEN 3 ELSE 0 END, 'staop3', 0, 'stacoll3', 0, 'stanumbers3', CASE WHEN ps.correlation IS NOT NULL THEN ARRAY[ps.correlation]::float4[] ELSE NULL END,
2100
+ 'stakind4', CASE WHEN ps.most_common_elems IS NOT NULL THEN 4 ELSE 0 END, 'staop4', 0, 'stacoll4', 0,
2101
+ 'stanumbers4', ps.most_common_elem_freqs,
2102
+ 'stakind5', 0, 'staop5', 0, 'stacoll5', 0, 'stanumbers5', NULL,
2103
+ 'stavalues1', CASE WHEN $1 THEN ps.most_common_vals ELSE NULL END,
2104
+ 'stavalues2', CASE WHEN $1 THEN ps.histogram_bounds ELSE NULL END,
2105
+ 'stavalues3', NULL,
2106
+ 'stavalues4', CASE WHEN $1 THEN ps.most_common_elems ELSE NULL END,
2107
+ 'stavalues5', NULL
1907
2108
  )
1908
- FROM pg_statistic s
1909
- WHERE s.starelid = a.attrelid AND s.staattnum = a.attnum
2109
+ FROM pg_stats ps
2110
+ WHERE ps.schemaname = c.table_schema
2111
+ AND ps.tablename = c.table_name
2112
+ AND ps.attname = c.column_name
1910
2113
  )
1911
2114
  )
1912
2115
  ORDER BY c.ordinal_position