@query-doctor/core 0.8.2 → 0.8.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/defineProperty.cjs +1 -1
  2. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/defineProperty.mjs +1 -1
  3. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPrimitive.cjs +1 -1
  4. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPrimitive.mjs +1 -1
  5. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPropertyKey.cjs +1 -1
  6. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/toPropertyKey.mjs +1 -1
  7. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/typeof.cjs +1 -1
  8. package/dist/_virtual/{_@oxc-project_runtime@0.122.0 → _@oxc-project_runtime@0.126.0}/helpers/typeof.mjs +1 -1
  9. package/dist/index.cjs +3 -0
  10. package/dist/index.d.cts +2 -2
  11. package/dist/index.d.mts +2 -2
  12. package/dist/index.mjs +2 -2
  13. package/dist/optimizer/genalgo.cjs +3 -2
  14. package/dist/optimizer/genalgo.cjs.map +1 -1
  15. package/dist/optimizer/genalgo.mjs +3 -2
  16. package/dist/optimizer/genalgo.mjs.map +1 -1
  17. package/dist/optimizer/pss-rewriter.cjs +1 -1
  18. package/dist/optimizer/pss-rewriter.mjs +1 -1
  19. package/dist/optimizer/statistics.cjs +396 -338
  20. package/dist/optimizer/statistics.cjs.map +1 -1
  21. package/dist/optimizer/statistics.d.cts +109 -1
  22. package/dist/optimizer/statistics.d.cts.map +1 -1
  23. package/dist/optimizer/statistics.d.mts +109 -1
  24. package/dist/optimizer/statistics.d.mts.map +1 -1
  25. package/dist/optimizer/statistics.mjs +393 -338
  26. package/dist/optimizer/statistics.mjs.map +1 -1
  27. package/dist/sql/builder.cjs +1 -1
  28. package/dist/sql/builder.mjs +1 -1
  29. package/dist/sql/pg-identifier.cjs +1 -1
  30. package/dist/sql/pg-identifier.mjs +1 -1
  31. package/dist/sql/walker.cjs +1 -1
  32. package/dist/sql/walker.mjs +1 -1
  33. package/package.json +3 -3
@@ -1,5 +1,5 @@
1
1
  "use client";
2
- import { _defineProperty } from "../_virtual/_@oxc-project_runtime@0.122.0/helpers/defineProperty.mjs";
2
+ import { _defineProperty } from "../_virtual/_@oxc-project_runtime@0.126.0/helpers/defineProperty.mjs";
3
3
  import { gray } from "colorette";
4
4
  import { z } from "zod";
5
5
  import dedent from "dedent";
@@ -42,6 +42,7 @@ const ExportedStatsStatistics = z.object({
42
42
  const ExportedStatsColumns = z.object({
43
43
  columnName: z.string(),
44
44
  attlen: z.number().nullable(),
45
+ dataType: z.string().optional(),
45
46
  stats: ExportedStatsStatistics.nullable()
46
47
  });
47
48
  const ExportedStatsIndex = z.object({
@@ -73,7 +74,59 @@ const StatisticsMode = z.discriminatedUnion("kind", [z.object({
73
74
  stats: z.array(ExportedStats),
74
75
  source: StatisticsSource
75
76
  })]);
76
- const DEFAULT_RELTUPLES = 1e4;
77
+ const ComputedColumnStats = z.object({
78
+ schema_name: z.string(),
79
+ table_name: z.string(),
80
+ column_name: z.string(),
81
+ data_type: z.string().optional(),
82
+ stainherit: z.boolean(),
83
+ stanullfrac: z.number(),
84
+ stawidth: z.number(),
85
+ stadistinct: z.number(),
86
+ stakind1: z.number(),
87
+ stakind2: z.number(),
88
+ stakind3: z.number(),
89
+ stakind4: z.number(),
90
+ stakind5: z.number(),
91
+ staop1: z.string(),
92
+ staop2: z.string(),
93
+ staop3: z.string(),
94
+ staop4: z.string(),
95
+ staop5: z.string(),
96
+ stacoll1: z.string(),
97
+ stacoll2: z.string(),
98
+ stacoll3: z.string(),
99
+ stacoll4: z.string(),
100
+ stacoll5: z.string(),
101
+ stanumbers1: z.array(z.number()).nullable(),
102
+ stanumbers2: z.array(z.number()).nullable(),
103
+ stanumbers3: z.array(z.number()).nullable(),
104
+ stanumbers4: z.array(z.number()).nullable(),
105
+ stanumbers5: z.array(z.number()).nullable(),
106
+ stavalues1: z.array(z.any()).nullable(),
107
+ stavalues2: z.array(z.any()).nullable(),
108
+ stavalues3: z.array(z.any()).nullable(),
109
+ stavalues4: z.array(z.any()).nullable(),
110
+ stavalues5: z.array(z.any()).nullable(),
111
+ _value_type1: z.string().nullable(),
112
+ _value_type2: z.string().nullable(),
113
+ _value_type3: z.string().nullable(),
114
+ _value_type4: z.string().nullable(),
115
+ _value_type5: z.string().nullable()
116
+ });
117
+ const ComputedReltuples = z.object({
118
+ relname: z.string(),
119
+ schema_name: z.string(),
120
+ reltuples: z.number(),
121
+ relpages: z.number(),
122
+ relallvisible: z.number(),
123
+ relallfrozen: z.number().optional()
124
+ });
125
+ const ComputedStats = z.object({
126
+ columnStats: z.array(ComputedColumnStats),
127
+ reltuples: z.array(ComputedReltuples)
128
+ });
129
+ const DEFAULT_RELTUPLES = 1e7;
77
130
  const DEFAULT_RELPAGES = 1;
78
131
  const DEFAULT_PAGE_SIZE = 2 ** 13;
79
132
  function estimateStawidth(col) {
@@ -94,87 +147,29 @@ var Statistics = class Statistics {
94
147
  this.postgresVersion = postgresVersion;
95
148
  this.ownMetadata = ownMetadata;
96
149
  _defineProperty(this, "mode", void 0);
150
+ _defineProperty(this, "computedStats", void 0);
97
151
  _defineProperty(this, "exportedMetadata", void 0);
98
152
  if (statsMode) {
99
153
  this.mode = statsMode;
100
154
  if (statsMode.kind === "fromStatisticsExport") this.exportedMetadata = statsMode.stats;
101
155
  } else this.mode = Statistics.defaultStatsMode;
156
+ this.computedStats = this.buildComputedStats();
102
157
  }
103
- static statsModeFromAssumption({ reltuples }) {
104
- return {
105
- kind: "fromAssumption",
106
- reltuples
107
- };
108
- }
109
- /**
110
- * Create a statistic mode from stats exported from another database
111
- **/
112
- static statsModeFromExport(stats) {
113
- return {
114
- kind: "fromStatisticsExport",
115
- source: { kind: "inline" },
116
- stats
117
- };
118
- }
119
- static async fromPostgres(db, statsMode) {
120
- const version = await db.serverNum();
121
- return new Statistics(db, version, await Statistics.dumpStats(db, version, "full"), statsMode);
122
- }
123
- restoreStats(tx) {
124
- return this.restoreStats17(tx);
125
- }
126
- approximateTotalRows() {
127
- if (!this.exportedMetadata) return 0;
128
- let totalRows = 0;
129
- for (const table of this.exportedMetadata) totalRows += table.reltuples;
130
- return totalRows;
131
- }
132
- /**
133
- * We have to cast stavaluesN to the correct type
134
- * This derives that type for us so it can be used in `array_in`
135
- */
136
- stavalueKind(values) {
137
- if (!values || values.length === 0) return null;
138
- const [elem] = values;
139
- if (typeof elem === "number") return "real";
140
- else if (typeof elem === "boolean") return "boolean";
141
- return "text";
142
- }
143
- /**
144
- * PostgreSQL's anyarray columns in pg_statistic can hold arrays of arrays
145
- * for columns with array types (e.g. text[], int4[]). These create
146
- * multidimensional arrays that can be "ragged" (sub-arrays with different
147
- * lengths). jsonb_to_recordset can't reconstruct ragged multidimensional
148
- * arrays from JSON, so we need to drop these values.
149
- */
150
- static safeStavalues(values) {
151
- if (!values || values.length === 0) return values;
152
- if (values.some((v) => Array.isArray(v))) {
153
- console.warn("Discarding ragged multidimensional stavalues array");
154
- return null;
155
- }
156
- return values;
157
- }
158
- async restoreStats17(tx) {
159
- const warnings = {
160
- tablesNotInExports: [],
161
- tablesNotInTest: [],
162
- tableNotAnalyzed: [],
163
- statsMissing: []
164
- };
165
- const processedTables = /* @__PURE__ */ new Set();
166
- let columnStatsUpdatePromise;
167
- const columnStatsValues = [];
158
+ buildComputedStats() {
159
+ const columnStats = [];
160
+ const reltuples = [];
168
161
  for (const table of this.ownMetadata) {
169
- const target = (this.exportedMetadata?.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName))?.columns ?? table.columns;
162
+ const targetTable = this.exportedMetadata?.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName);
163
+ const target = targetTable?.columns ?? table.columns;
170
164
  for (const column of target) {
171
165
  const { stats } = column;
172
166
  if (!stats || this.mode.kind === "fromAssumption") {
173
167
  const stawidth = stats?.stawidth || estimateStawidth(column);
174
- columnStatsValues.push({
168
+ columnStats.push({
175
169
  schema_name: table.schemaName,
176
170
  table_name: table.tableName,
177
171
  column_name: column.columnName,
172
+ data_type: column.dataType,
178
173
  stainherit: false,
179
174
  stanullfrac: .04,
180
175
  stawidth,
@@ -210,12 +205,11 @@ var Statistics = class Statistics {
210
205
  _value_type4: "real",
211
206
  _value_type5: "real"
212
207
  });
213
- continue;
214
- }
215
- columnStatsValues.push({
208
+ } else columnStats.push({
216
209
  schema_name: table.schemaName,
217
210
  table_name: table.tableName,
218
211
  column_name: column.columnName,
212
+ data_type: column.dataType,
219
213
  stainherit: stats.stainherit ?? false,
220
214
  stanullfrac: stats.stanullfrac,
221
215
  stawidth: stats.stawidth,
@@ -252,271 +246,148 @@ var Statistics = class Statistics {
252
246
  _value_type5: this.stavalueKind(Statistics.safeStavalues(stats.stavalues5))
253
247
  });
254
248
  }
249
+ let tableReltuples;
250
+ let tableRelpages;
251
+ let relallvisible = 0;
252
+ let relallfrozen;
253
+ if (this.mode.kind === "fromAssumption") {
254
+ tableReltuples = this.mode.reltuples;
255
+ tableRelpages = estimateRelpages(tableReltuples, table.columns);
256
+ } else if (targetTable) {
257
+ tableReltuples = targetTable.reltuples;
258
+ tableRelpages = targetTable.relpages;
259
+ relallvisible = targetTable.relallvisible;
260
+ relallfrozen = targetTable.relallfrozen;
261
+ } else {
262
+ tableReltuples = DEFAULT_RELTUPLES;
263
+ tableRelpages = DEFAULT_RELPAGES;
264
+ }
265
+ reltuples.push({
266
+ relname: table.tableName,
267
+ schema_name: table.schemaName,
268
+ reltuples: tableReltuples,
269
+ relpages: tableRelpages,
270
+ relallfrozen,
271
+ relallvisible
272
+ });
273
+ if (this.mode.kind === "fromAssumption") for (const index of table.indexes) {
274
+ const indexRelpages = estimateIndexRelpages(this.mode.reltuples, index.columns, index.fillfactor / 100, index.amname, tableRelpages);
275
+ reltuples.push({
276
+ relname: index.indexName,
277
+ schema_name: table.schemaName,
278
+ reltuples: this.mode.reltuples,
279
+ relpages: indexRelpages,
280
+ relallfrozen: 0,
281
+ relallvisible: indexRelpages
282
+ });
283
+ }
284
+ else if (targetTable) for (const index of targetTable.indexes) reltuples.push({
285
+ relname: index.indexName,
286
+ schema_name: targetTable.schemaName,
287
+ reltuples: index.reltuples,
288
+ relpages: index.relpages,
289
+ relallfrozen: index.relallfrozen,
290
+ relallvisible: index.relallvisible
291
+ });
255
292
  }
256
- /**
257
- * Postgres has 5 different slots for storing statistics per column and a potentially unlimited
258
- * number of statistic types to choose from. Each code in `stakindN` can mean different things.
259
- * Some statistics are just numerical values such as `n_distinct` and `correlation`, meaning
260
- * they're only derived from `stanumbersN` and the value of `stanumbersN` is never read.
261
- * Others take advantage of the `stavaluesN` columns which use `anyarray` type to store
262
- * concrete values internally for things like histogram bounds.
263
- * Unfortunately we cannot change anyarrays without a C extension.
264
- *
265
- * (1) = most common values
266
- * (2) = scalar histogram
267
- * (3) = correlation <- can change
268
- * (4) = most common elements
269
- * (5) = distinct elem count histogram <- can change
270
- * (6) = length histogram (?) These don't appear in pg_stats
271
- * (7) = bounds histogram (?) These don't appear in pg_stats
272
- * (N) = potentially many more kinds of statistics. But postgres <=18 only uses these 7.
273
- *
274
- * What we're doing here is setting ANY statistic we cannot directly control
275
- * (anything that relies on stavaluesN) to 0 to make sure the planner isn't influenced by what
276
- * what the db collected from the test data.
277
- * Because we do our tests with `generic_plan` it seems it's already unlikely that the planner will be
278
- * using things like common values or histogram bounds to make the planning decisions we care about.
279
- * This is a just in case.
280
- */
281
- const sql = dedent`
282
- WITH input AS (
283
- SELECT
284
- c.oid AS starelid,
285
- a.attnum AS staattnum,
286
- v.stainherit,
287
- v.stanullfrac,
288
- v.stawidth,
289
- v.stadistinct,
290
- v.stakind1,
291
- v.stakind2,
292
- v.stakind3,
293
- v.stakind4,
294
- v.stakind5,
295
- v.staop1,
296
- v.staop2,
297
- v.staop3,
298
- v.staop4,
299
- v.staop5,
300
- v.stacoll1,
301
- v.stacoll2,
302
- v.stacoll3,
303
- v.stacoll4,
304
- v.stacoll5,
305
- v.stanumbers1,
306
- v.stanumbers2,
307
- v.stanumbers3,
308
- v.stanumbers4,
309
- v.stanumbers5,
310
- v.stavalues1,
311
- v.stavalues2,
312
- v.stavalues3,
313
- v.stavalues4,
314
- v.stavalues5,
315
- _value_type1,
316
- _value_type2,
317
- _value_type3,
318
- _value_type4,
319
- _value_type5
320
- FROM jsonb_to_recordset($1::jsonb) AS v(
321
- schema_name text,
322
- table_name text,
323
- column_name text,
324
- stainherit boolean,
325
- stanullfrac real,
326
- stawidth integer,
327
- stadistinct real,
328
- stakind1 real,
329
- stakind2 real,
330
- stakind3 real,
331
- stakind4 real,
332
- stakind5 real,
333
- staop1 oid,
334
- staop2 oid,
335
- staop3 oid,
336
- staop4 oid,
337
- staop5 oid,
338
- stacoll1 oid,
339
- stacoll2 oid,
340
- stacoll3 oid,
341
- stacoll4 oid,
342
- stacoll5 oid,
343
- stanumbers1 real[],
344
- stanumbers2 real[],
345
- stanumbers3 real[],
346
- stanumbers4 real[],
347
- stanumbers5 real[],
348
- stavalues1 text[],
349
- stavalues2 text[],
350
- stavalues3 text[],
351
- stavalues4 text[],
352
- stavalues5 text[],
353
- _value_type1 text,
354
- _value_type2 text,
355
- _value_type3 text,
356
- _value_type4 text,
357
- _value_type5 text
358
- )
359
- JOIN pg_class c ON c.relname = v.table_name
360
- JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = v.schema_name
361
- JOIN pg_attribute a ON a.attrelid = c.oid AND a.attname = v.column_name
362
- ),
363
- updated AS (
364
- UPDATE pg_statistic s
365
- SET
366
- stanullfrac = i.stanullfrac,
367
- stawidth = i.stawidth,
368
- stadistinct = i.stadistinct,
369
- stakind1 = i.stakind1,
370
- stakind2 = i.stakind2,
371
- stakind3 = i.stakind3,
372
- stakind4 = i.stakind4,
373
- stakind5 = i.stakind5,
374
- staop1 = i.staop1,
375
- staop2 = i.staop2,
376
- staop3 = i.staop3,
377
- staop4 = i.staop4,
378
- staop5 = i.staop5,
379
- stacoll1 = i.stacoll1,
380
- stacoll2 = i.stacoll2,
381
- stacoll3 = i.stacoll3,
382
- stacoll4 = i.stacoll4,
383
- stacoll5 = i.stacoll5,
384
- stanumbers1 = i.stanumbers1,
385
- stanumbers2 = i.stanumbers2,
386
- stanumbers3 = i.stanumbers3,
387
- stanumbers4 = i.stanumbers4,
388
- stanumbers5 = i.stanumbers5,
389
- stavalues1 = case
390
- when i.stavalues1 is null then null
391
- else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
392
- end,
393
- stavalues2 = case
394
- when i.stavalues2 is null then null
395
- else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
396
- end,
397
- stavalues3 = case
398
- when i.stavalues3 is null then null
399
- else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
400
- end,
401
- stavalues4 = case
402
- when i.stavalues4 is null then null
403
- else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
404
- end,
405
- stavalues5 = case
406
- when i.stavalues5 is null then null
407
- else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
408
- end
409
- -- stavalues1 = i.stavalues1,
410
- -- stavalues2 = i.stavalues2,
411
- -- stavalues3 = i.stavalues3,
412
- -- stavalues4 = i.stavalues4,
413
- -- stavalues5 = i.stavalues5
414
- FROM input i
415
- WHERE s.starelid = i.starelid AND s.staattnum = i.staattnum AND s.stainherit = i.stainherit
416
- RETURNING s.starelid, s.staattnum, s.stainherit, s.stakind1, s.stakind2, s.stakind3, s.stakind4, s.stakind5
417
- ),
418
- inserted as (
419
- INSERT INTO pg_statistic (
420
- starelid, staattnum, stainherit,
421
- stanullfrac, stawidth, stadistinct,
422
- stakind1, stakind2, stakind3, stakind4, stakind5,
423
- staop1, staop2, staop3, staop4, staop5,
424
- stacoll1, stacoll2, stacoll3, stacoll4, stacoll5,
425
- stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5,
426
- stavalues1, stavalues2, stavalues3, stavalues4, stavalues5
427
- )
428
- SELECT
429
- i.starelid, i.staattnum, i.stainherit,
430
- i.stanullfrac, i.stawidth, i.stadistinct,
431
- i.stakind1, i.stakind2, i.stakind3, i.stakind4, i.stakind5,
432
- i.staop1, i.staop2, i.staop3, i.staop4, i.staop5,
433
- i.stacoll1, i.stacoll2, i.stacoll3, i.stacoll4, i.stacoll5,
434
- i.stanumbers1, i.stanumbers2, i.stanumbers3, i.stanumbers4, i.stanumbers5,
435
- -- i.stavalues1, i.stavalues2, i.stavalues3, i.stavalues4, i.stavalues5,
436
- case
437
- when i.stavalues1 is null then null
438
- else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
439
- end,
440
- case
441
- when i.stavalues2 is null then null
442
- else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
443
- end,
444
- case
445
- when i.stavalues3 is null then null
446
- else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
447
- end,
448
- case
449
- when i.stavalues4 is null then null
450
- else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
451
- end,
452
- case
453
- when i.stavalues5 is null then null
454
- else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
455
- end
456
- -- i._value_type1, i._value_type2, i._value_type3, i._value_type4, i._value_type5
457
- FROM input i
458
- LEFT JOIN updated u
459
- ON i.starelid = u.starelid AND i.staattnum = u.staattnum AND i.stainherit = u.stainherit
460
- WHERE u.starelid IS NULL
461
- returning starelid, staattnum, stainherit, stakind1, stakind2, stakind3, stakind4, stakind5
462
- )
463
- select * from updated union all (select * from inserted); -- @qd_introspection`;
464
- columnStatsUpdatePromise = tx.exec(sql, [columnStatsValues]).catch((err) => {
293
+ return {
294
+ columnStats,
295
+ reltuples
296
+ };
297
+ }
298
+ static statsModeFromAssumption({ reltuples }) {
299
+ return {
300
+ kind: "fromAssumption",
301
+ reltuples
302
+ };
303
+ }
304
+ /**
305
+ * Create a statistic mode from stats exported from another database
306
+ **/
307
+ static statsModeFromExport(stats) {
308
+ return {
309
+ kind: "fromStatisticsExport",
310
+ source: { kind: "inline" },
311
+ stats
312
+ };
313
+ }
314
+ static async fromPostgres(db, statsMode) {
315
+ const version = await db.serverNum();
316
+ return new Statistics(db, version, await Statistics.dumpStats(db, version, "full"), statsMode);
317
+ }
318
+ restoreStats(tx) {
319
+ return this.restoreStats17(tx);
320
+ }
321
+ approximateTotalRows() {
322
+ if (!this.exportedMetadata) return 0;
323
+ let totalRows = 0;
324
+ for (const table of this.exportedMetadata) totalRows += table.reltuples;
325
+ return totalRows;
326
+ }
327
+ /**
328
+ * We have to cast stavaluesN to the correct type
329
+ * This derives that type for us so it can be used in `array_in`
330
+ */
331
+ stavalueKind(values) {
332
+ if (!values || values.length === 0) return null;
333
+ const [elem] = values;
334
+ if (typeof elem === "number") return "real";
335
+ else if (typeof elem === "boolean") return "boolean";
336
+ return "text";
337
+ }
338
+ /**
339
+ * PostgreSQL's anyarray columns in pg_statistic can hold arrays of arrays
340
+ * for columns with array types (e.g. text[], int4[]). These create
341
+ * multidimensional arrays that can be "ragged" (sub-arrays with different
342
+ * lengths). jsonb_to_recordset can't reconstruct ragged multidimensional
343
+ * arrays from JSON, so we need to drop these values.
344
+ */
345
+ static safeStavalues(values) {
346
+ if (!values || values.length === 0) return values;
347
+ if (values.some((v) => Array.isArray(v))) {
348
+ console.warn("Discarding ragged multidimensional stavalues array");
349
+ return null;
350
+ }
351
+ return values;
352
+ }
353
+ async restoreStats17(tx) {
354
+ const warnings = {
355
+ tablesNotInExports: [],
356
+ tablesNotInTest: [],
357
+ tableNotAnalyzed: [],
358
+ statsMissing: []
359
+ };
360
+ const processedTables = new Set(this.ownMetadata.map((t) => `${t.schemaName}.${t.tableName}`));
361
+ const columnStatsUpdatePromise = tx.exec(Statistics.columnStatsSQL, [this.computedStats.columnStats]).catch((err) => {
465
362
  console.error("Something wrong wrong updating column stats");
466
363
  console.error(err);
467
364
  throw err;
468
365
  });
469
- const reltuplesValues = [];
470
- for (const table of this.ownMetadata) {
471
- processedTables.add(`${table.schemaName}.${table.tableName}`);
472
- let targetTable;
473
- if (this.exportedMetadata) targetTable = this.exportedMetadata.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName);
474
- else targetTable = table;
475
- let reltuples;
476
- let relpages;
477
- let relallvisible = 0;
478
- let relallfrozen;
479
- if (this.mode.kind === "fromAssumption") {
480
- reltuples = this.mode.reltuples;
481
- relpages = estimateRelpages(reltuples, table.columns);
482
- } else if (targetTable) {
483
- reltuples = targetTable.reltuples;
484
- relpages = targetTable.relpages;
485
- relallvisible = targetTable.relallvisible;
486
- relallfrozen = targetTable.relallfrozen;
487
- } else {
488
- warnings.tablesNotInExports.push(`${table.schemaName}.${table.tableName}`);
489
- reltuples = DEFAULT_RELTUPLES;
490
- relpages = DEFAULT_RELPAGES;
491
- }
492
- reltuplesValues.push({
493
- relname: table.tableName,
494
- schema_name: table.schemaName,
495
- reltuples,
496
- relpages,
497
- relallfrozen,
498
- relallvisible
499
- });
500
- if (this.mode.kind === "fromAssumption") for (const index of table.indexes) {
501
- const indexRelpages = estimateIndexRelpages(this.mode.reltuples, index.columns, index.fillfactor / 100, index.amname, relpages);
502
- reltuplesValues.push({
503
- relname: index.indexName,
504
- schema_name: table.schemaName,
505
- reltuples: this.mode.reltuples,
506
- relpages: indexRelpages,
507
- relallfrozen: 0,
508
- relallvisible: indexRelpages
509
- });
510
- }
511
- else if (targetTable) for (const index of targetTable.indexes) reltuplesValues.push({
512
- relname: index.indexName,
513
- schema_name: targetTable.schemaName,
514
- reltuples: index.reltuples,
515
- relpages: index.relpages,
516
- relallfrozen: index.relallfrozen,
517
- relallvisible: index.relallvisible
518
- });
519
- }
366
+ /**
367
+ * Postgres has 5 different slots for storing statistics per column and a potentially unlimited
368
+ * number of statistic types to choose from. Each code in `stakindN` can mean different things.
369
+ * Some statistics are just numerical values such as `n_distinct` and `correlation`, meaning
370
+ * they're only derived from `stanumbersN` and the value of `stanumbersN` is never read.
371
+ * Others take advantage of the `stavaluesN` columns which use `anyarray` type to store
372
+ * concrete values internally for things like histogram bounds.
373
+ * Unfortunately we cannot change anyarrays without a C extension.
374
+ *
375
+ * (1) = most common values
376
+ * (2) = scalar histogram
377
+ * (3) = correlation <- can change
378
+ * (4) = most common elements
379
+ * (5) = distinct elem count histogram <- can change
380
+ * (6) = length histogram (?) These don't appear in pg_stats
381
+ * (7) = bounds histogram (?) These don't appear in pg_stats
382
+ * (N) = potentially many more kinds of statistics. But postgres <=18 only uses these 7.
383
+ *
384
+ * What we're doing here is setting ANY statistic we cannot directly control
385
+ * (anything that relies on stavaluesN) to 0 to make sure the planner isn't influenced by what
386
+ * what the db collected from the test data.
387
+ * Because we do our tests with `generic_plan` it seems it's already unlikely that the planner will be
388
+ * using things like common values or histogram bounds to make the planning decisions we care about.
389
+ * This is a just in case.
390
+ */
520
391
  const reltuplesQuery = dedent`
521
392
  update pg_class p
522
393
  set reltuples = v.reltuples,
@@ -529,7 +400,7 @@ var Statistics = class Statistics {
529
400
  and p.relnamespace = (select oid from pg_namespace where nspname = v.schema_name)
530
401
  returning p.relname, p.relnamespace, p.reltuples, p.relpages;
531
402
  `;
532
- const reltuplesPromise = tx.exec(reltuplesQuery, [reltuplesValues]).catch((err) => {
403
+ const reltuplesPromise = tx.exec(reltuplesQuery, [this.computedStats.reltuples]).catch((err) => {
533
404
  console.error("Something went wrong updating reltuples/relpages");
534
405
  console.error(err);
535
406
  return err;
@@ -544,8 +415,8 @@ var Statistics = class Statistics {
544
415
  warnings.tablesNotInTest.push(`${table.schemaName}.${table.tableName}`);
545
416
  }
546
417
  const [statsUpdates, reltuplesUpdates] = await Promise.all([columnStatsUpdatePromise, reltuplesPromise]);
547
- if (!(statsUpdates ? statsUpdates.length === columnStatsValues.length : true)) console.error(`Did not update expected column stats`);
548
- if (reltuplesUpdates.length !== reltuplesValues.length) console.error(`Did not update expected reltuples/relpages`);
418
+ if (!(statsUpdates ? statsUpdates.length === this.computedStats.columnStats.length : true)) console.error(`Did not update expected column stats`);
419
+ if (reltuplesUpdates.length !== this.computedStats.reltuples.length) console.error(`Did not update expected reltuples/relpages`);
549
420
  return warnings;
550
421
  }
551
422
  static async dumpStats(db, postgresVersion, kind) {
@@ -564,6 +435,7 @@ var Statistics = class Statistics {
564
435
  json_build_object(
565
436
  'columnName', a.attname,
566
437
  'attlen', CASE WHEN a.attlen > 0 THEN a.attlen ELSE NULL END,
438
+ 'dataType', t.typname,
567
439
  'stats', (
568
440
  SELECT json_build_object(
569
441
  'starelid', s.starelid,
@@ -591,6 +463,7 @@ var Statistics = class Statistics {
591
463
  FROM pg_class cl
592
464
  JOIN pg_namespace n ON n.oid = cl.relnamespace
593
465
  JOIN pg_attribute a ON a.attrelid = cl.oid AND a.attnum > 0 AND NOT a.attisdropped
466
+ JOIN pg_type t ON t.oid = a.atttypid
594
467
  WHERE cl.relkind = 'r'
595
468
  AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'tiger', 'tiger_data', 'topology')
596
469
  AND cl.relname NOT IN ('pg_stat_statements', 'pg_stat_statements_info')
@@ -720,10 +593,192 @@ var Statistics = class Statistics {
720
593
  };
721
594
  _defineProperty(Statistics, "defaultStatsMode", Object.freeze({
722
595
  kind: "fromAssumption",
723
- reltuples: DEFAULT_RELTUPLES,
724
- relpages: DEFAULT_RELPAGES
596
+ reltuples: DEFAULT_RELTUPLES
725
597
  }));
598
+ _defineProperty(Statistics, "columnStatsSQL", dedent`
599
+ WITH input AS (
600
+ SELECT
601
+ c.oid AS starelid,
602
+ a.attnum AS staattnum,
603
+ v.stainherit,
604
+ v.stanullfrac,
605
+ v.stawidth,
606
+ v.stadistinct,
607
+ v.stakind1,
608
+ v.stakind2,
609
+ v.stakind3,
610
+ v.stakind4,
611
+ v.stakind5,
612
+ v.staop1,
613
+ v.staop2,
614
+ v.staop3,
615
+ v.staop4,
616
+ v.staop5,
617
+ v.stacoll1,
618
+ v.stacoll2,
619
+ v.stacoll3,
620
+ v.stacoll4,
621
+ v.stacoll5,
622
+ v.stanumbers1,
623
+ v.stanumbers2,
624
+ v.stanumbers3,
625
+ v.stanumbers4,
626
+ v.stanumbers5,
627
+ v.stavalues1,
628
+ v.stavalues2,
629
+ v.stavalues3,
630
+ v.stavalues4,
631
+ v.stavalues5,
632
+ _value_type1,
633
+ _value_type2,
634
+ _value_type3,
635
+ _value_type4,
636
+ _value_type5
637
+ FROM jsonb_to_recordset($1::jsonb) AS v(
638
+ schema_name text,
639
+ table_name text,
640
+ column_name text,
641
+ stainherit boolean,
642
+ stanullfrac real,
643
+ stawidth integer,
644
+ stadistinct real,
645
+ stakind1 real,
646
+ stakind2 real,
647
+ stakind3 real,
648
+ stakind4 real,
649
+ stakind5 real,
650
+ staop1 oid,
651
+ staop2 oid,
652
+ staop3 oid,
653
+ staop4 oid,
654
+ staop5 oid,
655
+ stacoll1 oid,
656
+ stacoll2 oid,
657
+ stacoll3 oid,
658
+ stacoll4 oid,
659
+ stacoll5 oid,
660
+ stanumbers1 real[],
661
+ stanumbers2 real[],
662
+ stanumbers3 real[],
663
+ stanumbers4 real[],
664
+ stanumbers5 real[],
665
+ stavalues1 text[],
666
+ stavalues2 text[],
667
+ stavalues3 text[],
668
+ stavalues4 text[],
669
+ stavalues5 text[],
670
+ _value_type1 text,
671
+ _value_type2 text,
672
+ _value_type3 text,
673
+ _value_type4 text,
674
+ _value_type5 text
675
+ )
676
+ JOIN pg_class c ON c.relname = v.table_name
677
+ JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = v.schema_name
678
+ JOIN pg_attribute a ON a.attrelid = c.oid AND a.attname = v.column_name
679
+ ),
680
+ updated AS (
681
+ UPDATE pg_statistic s
682
+ SET
683
+ stanullfrac = i.stanullfrac,
684
+ stawidth = i.stawidth,
685
+ stadistinct = i.stadistinct,
686
+ stakind1 = i.stakind1,
687
+ stakind2 = i.stakind2,
688
+ stakind3 = i.stakind3,
689
+ stakind4 = i.stakind4,
690
+ stakind5 = i.stakind5,
691
+ staop1 = i.staop1,
692
+ staop2 = i.staop2,
693
+ staop3 = i.staop3,
694
+ staop4 = i.staop4,
695
+ staop5 = i.staop5,
696
+ stacoll1 = i.stacoll1,
697
+ stacoll2 = i.stacoll2,
698
+ stacoll3 = i.stacoll3,
699
+ stacoll4 = i.stacoll4,
700
+ stacoll5 = i.stacoll5,
701
+ stanumbers1 = i.stanumbers1,
702
+ stanumbers2 = i.stanumbers2,
703
+ stanumbers3 = i.stanumbers3,
704
+ stanumbers4 = i.stanumbers4,
705
+ stanumbers5 = i.stanumbers5,
706
+ stavalues1 = case
707
+ when i.stavalues1 is null then null
708
+ else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
709
+ end,
710
+ stavalues2 = case
711
+ when i.stavalues2 is null then null
712
+ else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
713
+ end,
714
+ stavalues3 = case
715
+ when i.stavalues3 is null then null
716
+ else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
717
+ end,
718
+ stavalues4 = case
719
+ when i.stavalues4 is null then null
720
+ else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
721
+ end,
722
+ stavalues5 = case
723
+ when i.stavalues5 is null then null
724
+ else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
725
+ end
726
+ -- stavalues1 = i.stavalues1,
727
+ -- stavalues2 = i.stavalues2,
728
+ -- stavalues3 = i.stavalues3,
729
+ -- stavalues4 = i.stavalues4,
730
+ -- stavalues5 = i.stavalues5
731
+ FROM input i
732
+ WHERE s.starelid = i.starelid AND s.staattnum = i.staattnum AND s.stainherit = i.stainherit
733
+ RETURNING s.starelid, s.staattnum, s.stainherit, s.stakind1, s.stakind2, s.stakind3, s.stakind4, s.stakind5
734
+ ),
735
+ inserted as (
736
+ INSERT INTO pg_statistic (
737
+ starelid, staattnum, stainherit,
738
+ stanullfrac, stawidth, stadistinct,
739
+ stakind1, stakind2, stakind3, stakind4, stakind5,
740
+ staop1, staop2, staop3, staop4, staop5,
741
+ stacoll1, stacoll2, stacoll3, stacoll4, stacoll5,
742
+ stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5,
743
+ stavalues1, stavalues2, stavalues3, stavalues4, stavalues5
744
+ )
745
+ SELECT
746
+ i.starelid, i.staattnum, i.stainherit,
747
+ i.stanullfrac, i.stawidth, i.stadistinct,
748
+ i.stakind1, i.stakind2, i.stakind3, i.stakind4, i.stakind5,
749
+ i.staop1, i.staop2, i.staop3, i.staop4, i.staop5,
750
+ i.stacoll1, i.stacoll2, i.stacoll3, i.stacoll4, i.stacoll5,
751
+ i.stanumbers1, i.stanumbers2, i.stanumbers3, i.stanumbers4, i.stanumbers5,
752
+ -- i.stavalues1, i.stavalues2, i.stavalues3, i.stavalues4, i.stavalues5,
753
+ case
754
+ when i.stavalues1 is null then null
755
+ else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
756
+ end,
757
+ case
758
+ when i.stavalues2 is null then null
759
+ else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
760
+ end,
761
+ case
762
+ when i.stavalues3 is null then null
763
+ else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
764
+ end,
765
+ case
766
+ when i.stavalues4 is null then null
767
+ else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
768
+ end,
769
+ case
770
+ when i.stavalues5 is null then null
771
+ else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
772
+ end
773
+ -- i._value_type1, i._value_type2, i._value_type3, i._value_type4, i._value_type5
774
+ FROM input i
775
+ LEFT JOIN updated u
776
+ ON i.starelid = u.starelid AND i.staattnum = u.staattnum AND i.stainherit = u.stainherit
777
+ WHERE u.starelid IS NULL
778
+ returning starelid, staattnum, stainherit, stakind1, stakind2, stakind3, stakind4, stakind5
779
+ )
780
+ select * from updated union all (select * from inserted); -- @qd_introspection`);
726
781
  //#endregion
727
- export { ExportedStats, ExportedStatsColumns, ExportedStatsIndex, ExportedStatsStatistics, ExportedStatsV1, Statistics, StatisticsMode, StatisticsSource };
782
+ export { ComputedColumnStats, ComputedReltuples, ComputedStats, ExportedStats, ExportedStatsColumns, ExportedStatsIndex, ExportedStatsStatistics, ExportedStatsV1, Statistics, StatisticsMode, StatisticsSource };
728
783
 
729
784
  //# sourceMappingURL=statistics.mjs.map