@query-doctor/core 0.8.2-rc.1 → 0.8.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/defineProperty.cjs +1 -1
  2. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/defineProperty.mjs +1 -1
  3. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPrimitive.cjs +1 -1
  4. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPrimitive.mjs +1 -1
  5. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPropertyKey.cjs +1 -1
  6. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPropertyKey.mjs +1 -1
  7. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/typeof.cjs +1 -1
  8. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/typeof.mjs +1 -1
  9. package/dist/index.cjs +3 -0
  10. package/dist/index.d.cts +2 -2
  11. package/dist/index.d.mts +2 -2
  12. package/dist/index.mjs +2 -2
  13. package/dist/optimizer/genalgo.cjs +3 -2
  14. package/dist/optimizer/genalgo.cjs.map +1 -1
  15. package/dist/optimizer/genalgo.mjs +3 -2
  16. package/dist/optimizer/genalgo.mjs.map +1 -1
  17. package/dist/optimizer/pss-rewriter.cjs +1 -1
  18. package/dist/optimizer/pss-rewriter.mjs +1 -1
  19. package/dist/optimizer/statistics.cjs +396 -338
  20. package/dist/optimizer/statistics.cjs.map +1 -1
  21. package/dist/optimizer/statistics.d.cts +109 -1
  22. package/dist/optimizer/statistics.d.cts.map +1 -1
  23. package/dist/optimizer/statistics.d.mts +109 -1
  24. package/dist/optimizer/statistics.d.mts.map +1 -1
  25. package/dist/optimizer/statistics.mjs +393 -338
  26. package/dist/optimizer/statistics.mjs.map +1 -1
  27. package/dist/sql/builder.cjs +1 -1
  28. package/dist/sql/builder.mjs +1 -1
  29. package/dist/sql/pg-identifier.cjs +1 -1
  30. package/dist/sql/pg-identifier.mjs +1 -1
  31. package/dist/sql/walker.cjs +1 -1
  32. package/dist/sql/walker.mjs +1 -1
  33. package/package.json +3 -3
@@ -1,10 +1,10 @@
1
1
  "use client";
2
2
  const require_runtime = require("../_virtual/_rolldown/runtime.cjs");
3
- const require_defineProperty = require("../_virtual/_@oxc-project_runtime@0.122.0/helpers/defineProperty.cjs");
3
+ const require_defineProperty = require("../_virtual/_@oxc-project_runtime@0.126.0/helpers/defineProperty.cjs");
4
4
  let colorette = require("colorette");
5
5
  let zod = require("zod");
6
6
  let dedent = require("dedent");
7
- dedent = require_runtime.__toESM(dedent);
7
+ dedent = require_runtime.__toESM(dedent, 1);
8
8
  //#region src/optimizer/statistics.ts
9
9
  const StatisticsSource = zod.z.union([zod.z.object({
10
10
  kind: zod.z.literal("path"),
@@ -44,6 +44,7 @@ const ExportedStatsStatistics = zod.z.object({
44
44
  const ExportedStatsColumns = zod.z.object({
45
45
  columnName: zod.z.string(),
46
46
  attlen: zod.z.number().nullable(),
47
+ dataType: zod.z.string().optional(),
47
48
  stats: ExportedStatsStatistics.nullable()
48
49
  });
49
50
  const ExportedStatsIndex = zod.z.object({
@@ -75,7 +76,59 @@ const StatisticsMode = zod.z.discriminatedUnion("kind", [zod.z.object({
75
76
  stats: zod.z.array(ExportedStats),
76
77
  source: StatisticsSource
77
78
  })]);
78
- const DEFAULT_RELTUPLES = 1e4;
79
+ const ComputedColumnStats = zod.z.object({
80
+ schema_name: zod.z.string(),
81
+ table_name: zod.z.string(),
82
+ column_name: zod.z.string(),
83
+ data_type: zod.z.string().optional(),
84
+ stainherit: zod.z.boolean(),
85
+ stanullfrac: zod.z.number(),
86
+ stawidth: zod.z.number(),
87
+ stadistinct: zod.z.number(),
88
+ stakind1: zod.z.number(),
89
+ stakind2: zod.z.number(),
90
+ stakind3: zod.z.number(),
91
+ stakind4: zod.z.number(),
92
+ stakind5: zod.z.number(),
93
+ staop1: zod.z.string(),
94
+ staop2: zod.z.string(),
95
+ staop3: zod.z.string(),
96
+ staop4: zod.z.string(),
97
+ staop5: zod.z.string(),
98
+ stacoll1: zod.z.string(),
99
+ stacoll2: zod.z.string(),
100
+ stacoll3: zod.z.string(),
101
+ stacoll4: zod.z.string(),
102
+ stacoll5: zod.z.string(),
103
+ stanumbers1: zod.z.array(zod.z.number()).nullable(),
104
+ stanumbers2: zod.z.array(zod.z.number()).nullable(),
105
+ stanumbers3: zod.z.array(zod.z.number()).nullable(),
106
+ stanumbers4: zod.z.array(zod.z.number()).nullable(),
107
+ stanumbers5: zod.z.array(zod.z.number()).nullable(),
108
+ stavalues1: zod.z.array(zod.z.any()).nullable(),
109
+ stavalues2: zod.z.array(zod.z.any()).nullable(),
110
+ stavalues3: zod.z.array(zod.z.any()).nullable(),
111
+ stavalues4: zod.z.array(zod.z.any()).nullable(),
112
+ stavalues5: zod.z.array(zod.z.any()).nullable(),
113
+ _value_type1: zod.z.string().nullable(),
114
+ _value_type2: zod.z.string().nullable(),
115
+ _value_type3: zod.z.string().nullable(),
116
+ _value_type4: zod.z.string().nullable(),
117
+ _value_type5: zod.z.string().nullable()
118
+ });
119
+ const ComputedReltuples = zod.z.object({
120
+ relname: zod.z.string(),
121
+ schema_name: zod.z.string(),
122
+ reltuples: zod.z.number(),
123
+ relpages: zod.z.number(),
124
+ relallvisible: zod.z.number(),
125
+ relallfrozen: zod.z.number().optional()
126
+ });
127
+ const ComputedStats = zod.z.object({
128
+ columnStats: zod.z.array(ComputedColumnStats),
129
+ reltuples: zod.z.array(ComputedReltuples)
130
+ });
131
+ const DEFAULT_RELTUPLES = 1e7;
79
132
  const DEFAULT_RELPAGES = 1;
80
133
  const DEFAULT_PAGE_SIZE = 2 ** 13;
81
134
  function estimateStawidth(col) {
@@ -96,87 +149,29 @@ var Statistics = class Statistics {
96
149
  this.postgresVersion = postgresVersion;
97
150
  this.ownMetadata = ownMetadata;
98
151
  require_defineProperty._defineProperty(this, "mode", void 0);
152
+ require_defineProperty._defineProperty(this, "computedStats", void 0);
99
153
  require_defineProperty._defineProperty(this, "exportedMetadata", void 0);
100
154
  if (statsMode) {
101
155
  this.mode = statsMode;
102
156
  if (statsMode.kind === "fromStatisticsExport") this.exportedMetadata = statsMode.stats;
103
157
  } else this.mode = Statistics.defaultStatsMode;
158
+ this.computedStats = this.buildComputedStats();
104
159
  }
105
- static statsModeFromAssumption({ reltuples }) {
106
- return {
107
- kind: "fromAssumption",
108
- reltuples
109
- };
110
- }
111
- /**
112
- * Create a statistic mode from stats exported from another database
113
- **/
114
- static statsModeFromExport(stats) {
115
- return {
116
- kind: "fromStatisticsExport",
117
- source: { kind: "inline" },
118
- stats
119
- };
120
- }
121
- static async fromPostgres(db, statsMode) {
122
- const version = await db.serverNum();
123
- return new Statistics(db, version, await Statistics.dumpStats(db, version, "full"), statsMode);
124
- }
125
- restoreStats(tx) {
126
- return this.restoreStats17(tx);
127
- }
128
- approximateTotalRows() {
129
- if (!this.exportedMetadata) return 0;
130
- let totalRows = 0;
131
- for (const table of this.exportedMetadata) totalRows += table.reltuples;
132
- return totalRows;
133
- }
134
- /**
135
- * We have to cast stavaluesN to the correct type
136
- * This derives that type for us so it can be used in `array_in`
137
- */
138
- stavalueKind(values) {
139
- if (!values || values.length === 0) return null;
140
- const [elem] = values;
141
- if (typeof elem === "number") return "real";
142
- else if (typeof elem === "boolean") return "boolean";
143
- return "text";
144
- }
145
- /**
146
- * PostgreSQL's anyarray columns in pg_statistic can hold arrays of arrays
147
- * for columns with array types (e.g. text[], int4[]). These create
148
- * multidimensional arrays that can be "ragged" (sub-arrays with different
149
- * lengths). jsonb_to_recordset can't reconstruct ragged multidimensional
150
- * arrays from JSON, so we need to drop these values.
151
- */
152
- static safeStavalues(values) {
153
- if (!values || values.length === 0) return values;
154
- if (values.some((v) => Array.isArray(v))) {
155
- console.warn("Discarding ragged multidimensional stavalues array");
156
- return null;
157
- }
158
- return values;
159
- }
160
- async restoreStats17(tx) {
161
- const warnings = {
162
- tablesNotInExports: [],
163
- tablesNotInTest: [],
164
- tableNotAnalyzed: [],
165
- statsMissing: []
166
- };
167
- const processedTables = /* @__PURE__ */ new Set();
168
- let columnStatsUpdatePromise;
169
- const columnStatsValues = [];
160
+ buildComputedStats() {
161
+ const columnStats = [];
162
+ const reltuples = [];
170
163
  for (const table of this.ownMetadata) {
171
- const target = (this.exportedMetadata?.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName))?.columns ?? table.columns;
164
+ const targetTable = this.exportedMetadata?.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName);
165
+ const target = targetTable?.columns ?? table.columns;
172
166
  for (const column of target) {
173
167
  const { stats } = column;
174
168
  if (!stats || this.mode.kind === "fromAssumption") {
175
169
  const stawidth = stats?.stawidth || estimateStawidth(column);
176
- columnStatsValues.push({
170
+ columnStats.push({
177
171
  schema_name: table.schemaName,
178
172
  table_name: table.tableName,
179
173
  column_name: column.columnName,
174
+ data_type: column.dataType,
180
175
  stainherit: false,
181
176
  stanullfrac: .04,
182
177
  stawidth,
@@ -212,12 +207,11 @@ var Statistics = class Statistics {
212
207
  _value_type4: "real",
213
208
  _value_type5: "real"
214
209
  });
215
- continue;
216
- }
217
- columnStatsValues.push({
210
+ } else columnStats.push({
218
211
  schema_name: table.schemaName,
219
212
  table_name: table.tableName,
220
213
  column_name: column.columnName,
214
+ data_type: column.dataType,
221
215
  stainherit: stats.stainherit ?? false,
222
216
  stanullfrac: stats.stanullfrac,
223
217
  stawidth: stats.stawidth,
@@ -254,271 +248,148 @@ var Statistics = class Statistics {
254
248
  _value_type5: this.stavalueKind(Statistics.safeStavalues(stats.stavalues5))
255
249
  });
256
250
  }
251
+ let tableReltuples;
252
+ let tableRelpages;
253
+ let relallvisible = 0;
254
+ let relallfrozen;
255
+ if (this.mode.kind === "fromAssumption") {
256
+ tableReltuples = this.mode.reltuples;
257
+ tableRelpages = estimateRelpages(tableReltuples, table.columns);
258
+ } else if (targetTable) {
259
+ tableReltuples = targetTable.reltuples;
260
+ tableRelpages = targetTable.relpages;
261
+ relallvisible = targetTable.relallvisible;
262
+ relallfrozen = targetTable.relallfrozen;
263
+ } else {
264
+ tableReltuples = DEFAULT_RELTUPLES;
265
+ tableRelpages = DEFAULT_RELPAGES;
266
+ }
267
+ reltuples.push({
268
+ relname: table.tableName,
269
+ schema_name: table.schemaName,
270
+ reltuples: tableReltuples,
271
+ relpages: tableRelpages,
272
+ relallfrozen,
273
+ relallvisible
274
+ });
275
+ if (this.mode.kind === "fromAssumption") for (const index of table.indexes) {
276
+ const indexRelpages = estimateIndexRelpages(this.mode.reltuples, index.columns, index.fillfactor / 100, index.amname, tableRelpages);
277
+ reltuples.push({
278
+ relname: index.indexName,
279
+ schema_name: table.schemaName,
280
+ reltuples: this.mode.reltuples,
281
+ relpages: indexRelpages,
282
+ relallfrozen: 0,
283
+ relallvisible: indexRelpages
284
+ });
285
+ }
286
+ else if (targetTable) for (const index of targetTable.indexes) reltuples.push({
287
+ relname: index.indexName,
288
+ schema_name: targetTable.schemaName,
289
+ reltuples: index.reltuples,
290
+ relpages: index.relpages,
291
+ relallfrozen: index.relallfrozen,
292
+ relallvisible: index.relallvisible
293
+ });
257
294
  }
258
- /**
259
- * Postgres has 5 different slots for storing statistics per column and a potentially unlimited
260
- * number of statistic types to choose from. Each code in `stakindN` can mean different things.
261
- * Some statistics are just numerical values such as `n_distinct` and `correlation`, meaning
262
- * they're only derived from `stanumbersN` and the value of `stanumbersN` is never read.
263
- * Others take advantage of the `stavaluesN` columns which use `anyarray` type to store
264
- * concrete values internally for things like histogram bounds.
265
- * Unfortunately we cannot change anyarrays without a C extension.
266
- *
267
- * (1) = most common values
268
- * (2) = scalar histogram
269
- * (3) = correlation <- can change
270
- * (4) = most common elements
271
- * (5) = distinct elem count histogram <- can change
272
- * (6) = length histogram (?) These don't appear in pg_stats
273
- * (7) = bounds histogram (?) These don't appear in pg_stats
274
- * (N) = potentially many more kinds of statistics. But postgres <=18 only uses these 7.
275
- *
276
- * What we're doing here is setting ANY statistic we cannot directly control
277
- * (anything that relies on stavaluesN) to 0 to make sure the planner isn't influenced by what
278
- * what the db collected from the test data.
279
- * Because we do our tests with `generic_plan` it seems it's already unlikely that the planner will be
280
- * using things like common values or histogram bounds to make the planning decisions we care about.
281
- * This is a just in case.
282
- */
283
- const sql = dedent.default`
284
- WITH input AS (
285
- SELECT
286
- c.oid AS starelid,
287
- a.attnum AS staattnum,
288
- v.stainherit,
289
- v.stanullfrac,
290
- v.stawidth,
291
- v.stadistinct,
292
- v.stakind1,
293
- v.stakind2,
294
- v.stakind3,
295
- v.stakind4,
296
- v.stakind5,
297
- v.staop1,
298
- v.staop2,
299
- v.staop3,
300
- v.staop4,
301
- v.staop5,
302
- v.stacoll1,
303
- v.stacoll2,
304
- v.stacoll3,
305
- v.stacoll4,
306
- v.stacoll5,
307
- v.stanumbers1,
308
- v.stanumbers2,
309
- v.stanumbers3,
310
- v.stanumbers4,
311
- v.stanumbers5,
312
- v.stavalues1,
313
- v.stavalues2,
314
- v.stavalues3,
315
- v.stavalues4,
316
- v.stavalues5,
317
- _value_type1,
318
- _value_type2,
319
- _value_type3,
320
- _value_type4,
321
- _value_type5
322
- FROM jsonb_to_recordset($1::jsonb) AS v(
323
- schema_name text,
324
- table_name text,
325
- column_name text,
326
- stainherit boolean,
327
- stanullfrac real,
328
- stawidth integer,
329
- stadistinct real,
330
- stakind1 real,
331
- stakind2 real,
332
- stakind3 real,
333
- stakind4 real,
334
- stakind5 real,
335
- staop1 oid,
336
- staop2 oid,
337
- staop3 oid,
338
- staop4 oid,
339
- staop5 oid,
340
- stacoll1 oid,
341
- stacoll2 oid,
342
- stacoll3 oid,
343
- stacoll4 oid,
344
- stacoll5 oid,
345
- stanumbers1 real[],
346
- stanumbers2 real[],
347
- stanumbers3 real[],
348
- stanumbers4 real[],
349
- stanumbers5 real[],
350
- stavalues1 text[],
351
- stavalues2 text[],
352
- stavalues3 text[],
353
- stavalues4 text[],
354
- stavalues5 text[],
355
- _value_type1 text,
356
- _value_type2 text,
357
- _value_type3 text,
358
- _value_type4 text,
359
- _value_type5 text
360
- )
361
- JOIN pg_class c ON c.relname = v.table_name
362
- JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = v.schema_name
363
- JOIN pg_attribute a ON a.attrelid = c.oid AND a.attname = v.column_name
364
- ),
365
- updated AS (
366
- UPDATE pg_statistic s
367
- SET
368
- stanullfrac = i.stanullfrac,
369
- stawidth = i.stawidth,
370
- stadistinct = i.stadistinct,
371
- stakind1 = i.stakind1,
372
- stakind2 = i.stakind2,
373
- stakind3 = i.stakind3,
374
- stakind4 = i.stakind4,
375
- stakind5 = i.stakind5,
376
- staop1 = i.staop1,
377
- staop2 = i.staop2,
378
- staop3 = i.staop3,
379
- staop4 = i.staop4,
380
- staop5 = i.staop5,
381
- stacoll1 = i.stacoll1,
382
- stacoll2 = i.stacoll2,
383
- stacoll3 = i.stacoll3,
384
- stacoll4 = i.stacoll4,
385
- stacoll5 = i.stacoll5,
386
- stanumbers1 = i.stanumbers1,
387
- stanumbers2 = i.stanumbers2,
388
- stanumbers3 = i.stanumbers3,
389
- stanumbers4 = i.stanumbers4,
390
- stanumbers5 = i.stanumbers5,
391
- stavalues1 = case
392
- when i.stavalues1 is null then null
393
- else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
394
- end,
395
- stavalues2 = case
396
- when i.stavalues2 is null then null
397
- else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
398
- end,
399
- stavalues3 = case
400
- when i.stavalues3 is null then null
401
- else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
402
- end,
403
- stavalues4 = case
404
- when i.stavalues4 is null then null
405
- else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
406
- end,
407
- stavalues5 = case
408
- when i.stavalues5 is null then null
409
- else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
410
- end
411
- -- stavalues1 = i.stavalues1,
412
- -- stavalues2 = i.stavalues2,
413
- -- stavalues3 = i.stavalues3,
414
- -- stavalues4 = i.stavalues4,
415
- -- stavalues5 = i.stavalues5
416
- FROM input i
417
- WHERE s.starelid = i.starelid AND s.staattnum = i.staattnum AND s.stainherit = i.stainherit
418
- RETURNING s.starelid, s.staattnum, s.stainherit, s.stakind1, s.stakind2, s.stakind3, s.stakind4, s.stakind5
419
- ),
420
- inserted as (
421
- INSERT INTO pg_statistic (
422
- starelid, staattnum, stainherit,
423
- stanullfrac, stawidth, stadistinct,
424
- stakind1, stakind2, stakind3, stakind4, stakind5,
425
- staop1, staop2, staop3, staop4, staop5,
426
- stacoll1, stacoll2, stacoll3, stacoll4, stacoll5,
427
- stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5,
428
- stavalues1, stavalues2, stavalues3, stavalues4, stavalues5
429
- )
430
- SELECT
431
- i.starelid, i.staattnum, i.stainherit,
432
- i.stanullfrac, i.stawidth, i.stadistinct,
433
- i.stakind1, i.stakind2, i.stakind3, i.stakind4, i.stakind5,
434
- i.staop1, i.staop2, i.staop3, i.staop4, i.staop5,
435
- i.stacoll1, i.stacoll2, i.stacoll3, i.stacoll4, i.stacoll5,
436
- i.stanumbers1, i.stanumbers2, i.stanumbers3, i.stanumbers4, i.stanumbers5,
437
- -- i.stavalues1, i.stavalues2, i.stavalues3, i.stavalues4, i.stavalues5,
438
- case
439
- when i.stavalues1 is null then null
440
- else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
441
- end,
442
- case
443
- when i.stavalues2 is null then null
444
- else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
445
- end,
446
- case
447
- when i.stavalues3 is null then null
448
- else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
449
- end,
450
- case
451
- when i.stavalues4 is null then null
452
- else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
453
- end,
454
- case
455
- when i.stavalues5 is null then null
456
- else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
457
- end
458
- -- i._value_type1, i._value_type2, i._value_type3, i._value_type4, i._value_type5
459
- FROM input i
460
- LEFT JOIN updated u
461
- ON i.starelid = u.starelid AND i.staattnum = u.staattnum AND i.stainherit = u.stainherit
462
- WHERE u.starelid IS NULL
463
- returning starelid, staattnum, stainherit, stakind1, stakind2, stakind3, stakind4, stakind5
464
- )
465
- select * from updated union all (select * from inserted); -- @qd_introspection`;
466
- columnStatsUpdatePromise = tx.exec(sql, [columnStatsValues]).catch((err) => {
295
+ return {
296
+ columnStats,
297
+ reltuples
298
+ };
299
+ }
300
+ static statsModeFromAssumption({ reltuples }) {
301
+ return {
302
+ kind: "fromAssumption",
303
+ reltuples
304
+ };
305
+ }
306
+ /**
307
+ * Create a statistic mode from stats exported from another database
308
+ **/
309
+ static statsModeFromExport(stats) {
310
+ return {
311
+ kind: "fromStatisticsExport",
312
+ source: { kind: "inline" },
313
+ stats
314
+ };
315
+ }
316
+ static async fromPostgres(db, statsMode) {
317
+ const version = await db.serverNum();
318
+ return new Statistics(db, version, await Statistics.dumpStats(db, version, "full"), statsMode);
319
+ }
320
+ restoreStats(tx) {
321
+ return this.restoreStats17(tx);
322
+ }
323
+ approximateTotalRows() {
324
+ if (!this.exportedMetadata) return 0;
325
+ let totalRows = 0;
326
+ for (const table of this.exportedMetadata) totalRows += table.reltuples;
327
+ return totalRows;
328
+ }
329
+ /**
330
+ * We have to cast stavaluesN to the correct type
331
+ * This derives that type for us so it can be used in `array_in`
332
+ */
333
+ stavalueKind(values) {
334
+ if (!values || values.length === 0) return null;
335
+ const [elem] = values;
336
+ if (typeof elem === "number") return "real";
337
+ else if (typeof elem === "boolean") return "boolean";
338
+ return "text";
339
+ }
340
+ /**
341
+ * PostgreSQL's anyarray columns in pg_statistic can hold arrays of arrays
342
+ * for columns with array types (e.g. text[], int4[]). These create
343
+ * multidimensional arrays that can be "ragged" (sub-arrays with different
344
+ * lengths). jsonb_to_recordset can't reconstruct ragged multidimensional
345
+ * arrays from JSON, so we need to drop these values.
346
+ */
347
+ static safeStavalues(values) {
348
+ if (!values || values.length === 0) return values;
349
+ if (values.some((v) => Array.isArray(v))) {
350
+ console.warn("Discarding ragged multidimensional stavalues array");
351
+ return null;
352
+ }
353
+ return values;
354
+ }
355
+ async restoreStats17(tx) {
356
+ const warnings = {
357
+ tablesNotInExports: [],
358
+ tablesNotInTest: [],
359
+ tableNotAnalyzed: [],
360
+ statsMissing: []
361
+ };
362
+ const processedTables = new Set(this.ownMetadata.map((t) => `${t.schemaName}.${t.tableName}`));
363
+ const columnStatsUpdatePromise = tx.exec(Statistics.columnStatsSQL, [this.computedStats.columnStats]).catch((err) => {
467
364
  console.error("Something wrong wrong updating column stats");
468
365
  console.error(err);
469
366
  throw err;
470
367
  });
471
- const reltuplesValues = [];
472
- for (const table of this.ownMetadata) {
473
- processedTables.add(`${table.schemaName}.${table.tableName}`);
474
- let targetTable;
475
- if (this.exportedMetadata) targetTable = this.exportedMetadata.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName);
476
- else targetTable = table;
477
- let reltuples;
478
- let relpages;
479
- let relallvisible = 0;
480
- let relallfrozen;
481
- if (this.mode.kind === "fromAssumption") {
482
- reltuples = this.mode.reltuples;
483
- relpages = estimateRelpages(reltuples, table.columns);
484
- } else if (targetTable) {
485
- reltuples = targetTable.reltuples;
486
- relpages = targetTable.relpages;
487
- relallvisible = targetTable.relallvisible;
488
- relallfrozen = targetTable.relallfrozen;
489
- } else {
490
- warnings.tablesNotInExports.push(`${table.schemaName}.${table.tableName}`);
491
- reltuples = DEFAULT_RELTUPLES;
492
- relpages = DEFAULT_RELPAGES;
493
- }
494
- reltuplesValues.push({
495
- relname: table.tableName,
496
- schema_name: table.schemaName,
497
- reltuples,
498
- relpages,
499
- relallfrozen,
500
- relallvisible
501
- });
502
- if (this.mode.kind === "fromAssumption") for (const index of table.indexes) {
503
- const indexRelpages = estimateIndexRelpages(this.mode.reltuples, index.columns, index.fillfactor / 100, index.amname, relpages);
504
- reltuplesValues.push({
505
- relname: index.indexName,
506
- schema_name: table.schemaName,
507
- reltuples: this.mode.reltuples,
508
- relpages: indexRelpages,
509
- relallfrozen: 0,
510
- relallvisible: indexRelpages
511
- });
512
- }
513
- else if (targetTable) for (const index of targetTable.indexes) reltuplesValues.push({
514
- relname: index.indexName,
515
- schema_name: targetTable.schemaName,
516
- reltuples: index.reltuples,
517
- relpages: index.relpages,
518
- relallfrozen: index.relallfrozen,
519
- relallvisible: index.relallvisible
520
- });
521
- }
368
+ /**
369
+ * Postgres has 5 different slots for storing statistics per column and a potentially unlimited
370
+ * number of statistic types to choose from. Each code in `stakindN` can mean different things.
371
+ * Some statistics are just numerical values such as `n_distinct` and `correlation`, meaning
372
+ * they're only derived from `stanumbersN` and the value of `stanumbersN` is never read.
373
+ * Others take advantage of the `stavaluesN` columns which use `anyarray` type to store
374
+ * concrete values internally for things like histogram bounds.
375
+ * Unfortunately we cannot change anyarrays without a C extension.
376
+ *
377
+ * (1) = most common values
378
+ * (2) = scalar histogram
379
+ * (3) = correlation <- can change
380
+ * (4) = most common elements
381
+ * (5) = distinct elem count histogram <- can change
382
+ * (6) = length histogram (?) These don't appear in pg_stats
383
+ * (7) = bounds histogram (?) These don't appear in pg_stats
384
+ * (N) = potentially many more kinds of statistics. But postgres <=18 only uses these 7.
385
+ *
386
+ * What we're doing here is setting ANY statistic we cannot directly control
387
+ * (anything that relies on stavaluesN) to 0 to make sure the planner isn't influenced by what
388
+ * what the db collected from the test data.
389
+ * Because we do our tests with `generic_plan` it seems it's already unlikely that the planner will be
390
+ * using things like common values or histogram bounds to make the planning decisions we care about.
391
+ * This is a just in case.
392
+ */
522
393
  const reltuplesQuery = dedent.default`
523
394
  update pg_class p
524
395
  set reltuples = v.reltuples,
@@ -531,7 +402,7 @@ var Statistics = class Statistics {
531
402
  and p.relnamespace = (select oid from pg_namespace where nspname = v.schema_name)
532
403
  returning p.relname, p.relnamespace, p.reltuples, p.relpages;
533
404
  `;
534
- const reltuplesPromise = tx.exec(reltuplesQuery, [reltuplesValues]).catch((err) => {
405
+ const reltuplesPromise = tx.exec(reltuplesQuery, [this.computedStats.reltuples]).catch((err) => {
535
406
  console.error("Something went wrong updating reltuples/relpages");
536
407
  console.error(err);
537
408
  return err;
@@ -546,8 +417,8 @@ var Statistics = class Statistics {
546
417
  warnings.tablesNotInTest.push(`${table.schemaName}.${table.tableName}`);
547
418
  }
548
419
  const [statsUpdates, reltuplesUpdates] = await Promise.all([columnStatsUpdatePromise, reltuplesPromise]);
549
- if (!(statsUpdates ? statsUpdates.length === columnStatsValues.length : true)) console.error(`Did not update expected column stats`);
550
- if (reltuplesUpdates.length !== reltuplesValues.length) console.error(`Did not update expected reltuples/relpages`);
420
+ if (!(statsUpdates ? statsUpdates.length === this.computedStats.columnStats.length : true)) console.error(`Did not update expected column stats`);
421
+ if (reltuplesUpdates.length !== this.computedStats.reltuples.length) console.error(`Did not update expected reltuples/relpages`);
551
422
  return warnings;
552
423
  }
553
424
  static async dumpStats(db, postgresVersion, kind) {
@@ -566,6 +437,7 @@ var Statistics = class Statistics {
566
437
  json_build_object(
567
438
  'columnName', a.attname,
568
439
  'attlen', CASE WHEN a.attlen > 0 THEN a.attlen ELSE NULL END,
440
+ 'dataType', t.typname,
569
441
  'stats', (
570
442
  SELECT json_build_object(
571
443
  'starelid', s.starelid,
@@ -593,6 +465,7 @@ var Statistics = class Statistics {
593
465
  FROM pg_class cl
594
466
  JOIN pg_namespace n ON n.oid = cl.relnamespace
595
467
  JOIN pg_attribute a ON a.attrelid = cl.oid AND a.attnum > 0 AND NOT a.attisdropped
468
+ JOIN pg_type t ON t.oid = a.atttypid
596
469
  WHERE cl.relkind = 'r'
597
470
  AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'tiger', 'tiger_data', 'topology')
598
471
  AND cl.relname NOT IN ('pg_stat_statements', 'pg_stat_statements_info')
@@ -722,10 +595,195 @@ var Statistics = class Statistics {
722
595
  };
723
596
  require_defineProperty._defineProperty(Statistics, "defaultStatsMode", Object.freeze({
724
597
  kind: "fromAssumption",
725
- reltuples: DEFAULT_RELTUPLES,
726
- relpages: DEFAULT_RELPAGES
598
+ reltuples: DEFAULT_RELTUPLES
727
599
  }));
600
+ require_defineProperty._defineProperty(Statistics, "columnStatsSQL", dedent.default`
601
+ WITH input AS (
602
+ SELECT
603
+ c.oid AS starelid,
604
+ a.attnum AS staattnum,
605
+ v.stainherit,
606
+ v.stanullfrac,
607
+ v.stawidth,
608
+ v.stadistinct,
609
+ v.stakind1,
610
+ v.stakind2,
611
+ v.stakind3,
612
+ v.stakind4,
613
+ v.stakind5,
614
+ v.staop1,
615
+ v.staop2,
616
+ v.staop3,
617
+ v.staop4,
618
+ v.staop5,
619
+ v.stacoll1,
620
+ v.stacoll2,
621
+ v.stacoll3,
622
+ v.stacoll4,
623
+ v.stacoll5,
624
+ v.stanumbers1,
625
+ v.stanumbers2,
626
+ v.stanumbers3,
627
+ v.stanumbers4,
628
+ v.stanumbers5,
629
+ v.stavalues1,
630
+ v.stavalues2,
631
+ v.stavalues3,
632
+ v.stavalues4,
633
+ v.stavalues5,
634
+ _value_type1,
635
+ _value_type2,
636
+ _value_type3,
637
+ _value_type4,
638
+ _value_type5
639
+ FROM jsonb_to_recordset($1::jsonb) AS v(
640
+ schema_name text,
641
+ table_name text,
642
+ column_name text,
643
+ stainherit boolean,
644
+ stanullfrac real,
645
+ stawidth integer,
646
+ stadistinct real,
647
+ stakind1 real,
648
+ stakind2 real,
649
+ stakind3 real,
650
+ stakind4 real,
651
+ stakind5 real,
652
+ staop1 oid,
653
+ staop2 oid,
654
+ staop3 oid,
655
+ staop4 oid,
656
+ staop5 oid,
657
+ stacoll1 oid,
658
+ stacoll2 oid,
659
+ stacoll3 oid,
660
+ stacoll4 oid,
661
+ stacoll5 oid,
662
+ stanumbers1 real[],
663
+ stanumbers2 real[],
664
+ stanumbers3 real[],
665
+ stanumbers4 real[],
666
+ stanumbers5 real[],
667
+ stavalues1 text[],
668
+ stavalues2 text[],
669
+ stavalues3 text[],
670
+ stavalues4 text[],
671
+ stavalues5 text[],
672
+ _value_type1 text,
673
+ _value_type2 text,
674
+ _value_type3 text,
675
+ _value_type4 text,
676
+ _value_type5 text
677
+ )
678
+ JOIN pg_class c ON c.relname = v.table_name
679
+ JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = v.schema_name
680
+ JOIN pg_attribute a ON a.attrelid = c.oid AND a.attname = v.column_name
681
+ ),
682
+ updated AS (
683
+ UPDATE pg_statistic s
684
+ SET
685
+ stanullfrac = i.stanullfrac,
686
+ stawidth = i.stawidth,
687
+ stadistinct = i.stadistinct,
688
+ stakind1 = i.stakind1,
689
+ stakind2 = i.stakind2,
690
+ stakind3 = i.stakind3,
691
+ stakind4 = i.stakind4,
692
+ stakind5 = i.stakind5,
693
+ staop1 = i.staop1,
694
+ staop2 = i.staop2,
695
+ staop3 = i.staop3,
696
+ staop4 = i.staop4,
697
+ staop5 = i.staop5,
698
+ stacoll1 = i.stacoll1,
699
+ stacoll2 = i.stacoll2,
700
+ stacoll3 = i.stacoll3,
701
+ stacoll4 = i.stacoll4,
702
+ stacoll5 = i.stacoll5,
703
+ stanumbers1 = i.stanumbers1,
704
+ stanumbers2 = i.stanumbers2,
705
+ stanumbers3 = i.stanumbers3,
706
+ stanumbers4 = i.stanumbers4,
707
+ stanumbers5 = i.stanumbers5,
708
+ stavalues1 = case
709
+ when i.stavalues1 is null then null
710
+ else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
711
+ end,
712
+ stavalues2 = case
713
+ when i.stavalues2 is null then null
714
+ else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
715
+ end,
716
+ stavalues3 = case
717
+ when i.stavalues3 is null then null
718
+ else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
719
+ end,
720
+ stavalues4 = case
721
+ when i.stavalues4 is null then null
722
+ else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
723
+ end,
724
+ stavalues5 = case
725
+ when i.stavalues5 is null then null
726
+ else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
727
+ end
728
+ -- stavalues1 = i.stavalues1,
729
+ -- stavalues2 = i.stavalues2,
730
+ -- stavalues3 = i.stavalues3,
731
+ -- stavalues4 = i.stavalues4,
732
+ -- stavalues5 = i.stavalues5
733
+ FROM input i
734
+ WHERE s.starelid = i.starelid AND s.staattnum = i.staattnum AND s.stainherit = i.stainherit
735
+ RETURNING s.starelid, s.staattnum, s.stainherit, s.stakind1, s.stakind2, s.stakind3, s.stakind4, s.stakind5
736
+ ),
737
+ inserted as (
738
+ INSERT INTO pg_statistic (
739
+ starelid, staattnum, stainherit,
740
+ stanullfrac, stawidth, stadistinct,
741
+ stakind1, stakind2, stakind3, stakind4, stakind5,
742
+ staop1, staop2, staop3, staop4, staop5,
743
+ stacoll1, stacoll2, stacoll3, stacoll4, stacoll5,
744
+ stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5,
745
+ stavalues1, stavalues2, stavalues3, stavalues4, stavalues5
746
+ )
747
+ SELECT
748
+ i.starelid, i.staattnum, i.stainherit,
749
+ i.stanullfrac, i.stawidth, i.stadistinct,
750
+ i.stakind1, i.stakind2, i.stakind3, i.stakind4, i.stakind5,
751
+ i.staop1, i.staop2, i.staop3, i.staop4, i.staop5,
752
+ i.stacoll1, i.stacoll2, i.stacoll3, i.stacoll4, i.stacoll5,
753
+ i.stanumbers1, i.stanumbers2, i.stanumbers3, i.stanumbers4, i.stanumbers5,
754
+ -- i.stavalues1, i.stavalues2, i.stavalues3, i.stavalues4, i.stavalues5,
755
+ case
756
+ when i.stavalues1 is null then null
757
+ else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
758
+ end,
759
+ case
760
+ when i.stavalues2 is null then null
761
+ else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
762
+ end,
763
+ case
764
+ when i.stavalues3 is null then null
765
+ else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
766
+ end,
767
+ case
768
+ when i.stavalues4 is null then null
769
+ else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
770
+ end,
771
+ case
772
+ when i.stavalues5 is null then null
773
+ else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
774
+ end
775
+ -- i._value_type1, i._value_type2, i._value_type3, i._value_type4, i._value_type5
776
+ FROM input i
777
+ LEFT JOIN updated u
778
+ ON i.starelid = u.starelid AND i.staattnum = u.staattnum AND i.stainherit = u.stainherit
779
+ WHERE u.starelid IS NULL
780
+ returning starelid, staattnum, stainherit, stakind1, stakind2, stakind3, stakind4, stakind5
781
+ )
782
+ select * from updated union all (select * from inserted); -- @qd_introspection`);
728
783
  //#endregion
784
+ exports.ComputedColumnStats = ComputedColumnStats;
785
+ exports.ComputedReltuples = ComputedReltuples;
786
+ exports.ComputedStats = ComputedStats;
729
787
  exports.ExportedStats = ExportedStats;
730
788
  exports.ExportedStatsColumns = ExportedStatsColumns;
731
789
  exports.ExportedStatsIndex = ExportedStatsIndex;