@query-doctor/core 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/index.cjs +222 -97
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.ts +3 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +218 -97
  6. package/dist/index.js.map +1 -1
  7. package/dist/optimizer/genalgo.d.ts +9 -5
  8. package/dist/optimizer/genalgo.d.ts.map +1 -1
  9. package/dist/optimizer/genalgo.js +304 -0
  10. package/dist/optimizer/statistics.d.ts +1 -24
  11. package/dist/optimizer/statistics.d.ts.map +1 -1
  12. package/dist/optimizer/statistics.js +700 -0
  13. package/dist/package.json +25 -0
  14. package/dist/sql/analyzer.d.ts +3 -3
  15. package/dist/sql/analyzer.js +270 -0
  16. package/dist/sql/analyzer.test.d.ts +2 -0
  17. package/dist/sql/analyzer.test.d.ts.map +1 -0
  18. package/dist/sql/analyzer_test.js +584 -0
  19. package/dist/sql/builder.js +77 -0
  20. package/dist/sql/database.d.ts +5 -0
  21. package/dist/sql/database.d.ts.map +1 -1
  22. package/dist/sql/database.js +20 -0
  23. package/dist/sql/indexes.d.ts +8 -0
  24. package/dist/sql/indexes.d.ts.map +1 -0
  25. package/dist/sql/indexes.js +12 -0
  26. package/dist/sql/nudges.js +241 -0
  27. package/dist/sql/permutations.test.d.ts +2 -0
  28. package/dist/sql/permutations.test.d.ts.map +1 -0
  29. package/dist/sql/permutations_test.js +53 -0
  30. package/dist/sql/pg-identifier.d.ts +9 -0
  31. package/dist/sql/pg-identifier.d.ts.map +1 -0
  32. package/dist/sql/pg-identifier.test.d.ts +2 -0
  33. package/dist/sql/pg-identifier.test.d.ts.map +1 -0
  34. package/dist/sql/walker.d.ts +2 -2
  35. package/dist/sql/walker.js +295 -0
  36. package/package.json +2 -2
  37. package/dist/index.mjs +0 -24297
  38. package/dist/index.mjs.map +0 -1
  39. package/dist/sql/schema_dump.d.ts +0 -132
  40. package/dist/sql/schema_dump.d.ts.map +0 -1
  41. package/dist/sql/trace.d.ts +0 -1
  42. package/dist/sql/trace.d.ts.map +0 -1
@@ -0,0 +1,700 @@
1
+ import { gray } from "colorette";
2
+ import dedent from "dedent";
3
+ import { z } from "zod";
4
+ export const StatisticsSource = z.union([
5
+ z.object({
6
+ kind: z.literal("path"),
7
+ path: z.string().min(1),
8
+ }),
9
+ z.object({
10
+ kind: z.literal("inline"),
11
+ }),
12
+ ]);
13
+ export const ExportedStatsStatistics = z.object({
14
+ stawidth: z.number(),
15
+ stainherit: z.boolean().default(false),
16
+ // 0 representing unknown
17
+ stadistinct: z.number(),
18
+ // this has no "nullable" state
19
+ stanullfrac: z.number(),
20
+ stakind1: z.number().min(0),
21
+ stakind2: z.number().min(0),
22
+ stakind3: z.number().min(0),
23
+ stakind4: z.number().min(0),
24
+ stakind5: z.number().min(0),
25
+ staop1: z.string(),
26
+ staop2: z.string(),
27
+ staop3: z.string(),
28
+ staop4: z.string(),
29
+ staop5: z.string(),
30
+ stacoll1: z.string(),
31
+ stacoll2: z.string(),
32
+ stacoll3: z.string(),
33
+ stacoll4: z.string(),
34
+ stacoll5: z.string(),
35
+ stanumbers1: z.array(z.number()).nullable(),
36
+ stanumbers2: z.array(z.number()).nullable(),
37
+ stanumbers3: z.array(z.number()).nullable(),
38
+ stanumbers4: z.array(z.number()).nullable(),
39
+ stanumbers5: z.array(z.number()).nullable(),
40
+ // theoretically... this could only be strings and numbers
41
+ // but we don't have a crystal ball
42
+ stavalues1: z.array(z.any()).nullable(),
43
+ stavalues2: z.array(z.any()).nullable(),
44
+ stavalues3: z.array(z.any()).nullable(),
45
+ stavalues4: z.array(z.any()).nullable(),
46
+ stavalues5: z.array(z.any()).nullable(),
47
+ });
48
+ export const ExportedStatsColumns = z.object({
49
+ columnName: z.string(),
50
+ stats: ExportedStatsStatistics.nullable(),
51
+ });
52
+ export const ExportedStatsIndex = z.object({
53
+ indexName: z.string(),
54
+ relpages: z.number(),
55
+ reltuples: z.number(),
56
+ relallvisible: z.number(),
57
+ relallfrozen: z.number().optional(),
58
+ });
59
+ // This should match the output of the `_qd_dump_stats` function in the analyzer README.md
60
+ // Need to make sure this is versioned to accept ALL potential outputs from every version of
61
+ // dump functions we make public
62
+ export const ExportedStatsV1 = z.object({
63
+ tableName: z.string(),
64
+ schemaName: z.string(),
65
+ // can be negative
66
+ relpages: z.number(),
67
+ // can be negative
68
+ reltuples: z.number(),
69
+ relallvisible: z.number(),
70
+ // only postgres 18+
71
+ relallfrozen: z.number().optional(),
72
+ columns: z.array(ExportedStatsColumns).nullable(),
73
+ indexes: z.array(ExportedStatsIndex),
74
+ });
75
+ export const ExportedStats = z.union([ExportedStatsV1]);
76
+ export const StatisticsMode = z.discriminatedUnion("kind", [
77
+ z.object({
78
+ kind: z.literal("fromAssumption"),
79
+ reltuples: z.number().min(0),
80
+ relpages: z.number().min(0),
81
+ }),
82
+ z.object({
83
+ kind: z.literal("fromStatisticsExport"),
84
+ stats: z.array(ExportedStats),
85
+ source: StatisticsSource,
86
+ }),
87
+ ]);
88
+ const DEFAULT_RELTUPLES = 10_000;
89
+ const DEFAULT_RELPAGES = 1;
90
+ export class Statistics {
91
+ db;
92
+ postgresVersion;
93
+ ownMetadata;
94
+ mode;
95
+ exportedMetadata;
96
+ // preventing accidental internal mutations
97
+ static defaultStatsMode = Object.freeze({
98
+ kind: "fromAssumption",
99
+ reltuples: DEFAULT_RELTUPLES,
100
+ relpages: DEFAULT_RELPAGES,
101
+ });
102
+ constructor(db, postgresVersion, ownMetadata, statsMode) {
103
+ this.db = db;
104
+ this.postgresVersion = postgresVersion;
105
+ this.ownMetadata = ownMetadata;
106
+ if (statsMode) {
107
+ this.mode = statsMode;
108
+ if (statsMode.kind === "fromStatisticsExport") {
109
+ this.exportedMetadata = statsMode.stats;
110
+ }
111
+ }
112
+ else {
113
+ this.mode = Statistics.defaultStatsMode;
114
+ }
115
+ }
116
+ static statsModeFromAssumption({ reltuples, relpages, }) {
117
+ return {
118
+ kind: "fromAssumption",
119
+ reltuples,
120
+ relpages,
121
+ };
122
+ }
123
+ /**
124
+ * Create a statistic mode from stats exported from another database
125
+ **/
126
+ static statsModeFromExport(stats) {
127
+ return {
128
+ kind: "fromStatisticsExport",
129
+ source: { kind: "inline" },
130
+ stats,
131
+ };
132
+ }
133
+ static async fromPostgres(db, statsMode) {
134
+ const version = await db.serverNum();
135
+ const ownStats = await Statistics.dumpStats(db, version, "full");
136
+ return new Statistics(db, version, ownStats, statsMode);
137
+ }
138
+ restoreStats(tx) {
139
+ // if (this.postgresVersion < "180000") {
140
+ return this.restoreStats17(tx);
141
+ // }
142
+ // return this.restoreStats18(tx);
143
+ }
144
+ /**
145
+ * We have to cast stavaluesN to the correct type
146
+ * This derives that type for us so it can be used in `array_in`
147
+ */
148
+ stavalueKind(values) {
149
+ if (!values || values.length === 0) {
150
+ return null;
151
+ }
152
+ const [elem] = values;
153
+ if (typeof elem === "number") {
154
+ return "real";
155
+ }
156
+ else if (typeof elem === "boolean") {
157
+ return "boolean";
158
+ }
159
+ // is everything else a text? What about strinfied dates?
160
+ // we might need column metadata access here if we do
161
+ return "text";
162
+ }
163
+ async restoreStats17(tx) {
164
+ const warnings = {
165
+ tablesNotInExports: [],
166
+ tablesNotInTest: [],
167
+ tableNotAnalyzed: [],
168
+ statsMissing: [],
169
+ };
170
+ const processedTables = new Set();
171
+ let columnStatsUpdatePromise;
172
+ const columnStatsValues = [];
173
+ if (this.exportedMetadata) {
174
+ for (const table of this.ownMetadata) {
175
+ const targetTable = this.exportedMetadata.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName);
176
+ if (!targetTable?.columns) {
177
+ continue;
178
+ }
179
+ for (const column of targetTable.columns) {
180
+ const { stats } = column;
181
+ if (!stats) {
182
+ continue;
183
+ }
184
+ // TODO: track processed columns too
185
+ columnStatsValues.push({
186
+ schema_name: table.schemaName,
187
+ table_name: table.tableName,
188
+ column_name: column.columnName,
189
+ stainherit: stats.stainherit ?? false,
190
+ stanullfrac: stats.stanullfrac,
191
+ stawidth: stats.stawidth,
192
+ stadistinct: stats.stadistinct,
193
+ stakind1: stats.stakind1,
194
+ stakind2: stats.stakind2,
195
+ stakind3: stats.stakind3,
196
+ stakind4: stats.stakind4,
197
+ stakind5: stats.stakind5,
198
+ staop1: stats.staop1,
199
+ staop2: stats.staop2,
200
+ staop3: stats.staop3,
201
+ staop4: stats.staop4,
202
+ staop5: stats.staop5,
203
+ stacoll1: stats.stacoll1,
204
+ stacoll2: stats.stacoll2,
205
+ stacoll3: stats.stacoll3,
206
+ stacoll4: stats.stacoll4,
207
+ stacoll5: stats.stacoll5,
208
+ stanumbers1: stats.stanumbers1,
209
+ stanumbers2: stats.stanumbers2,
210
+ stanumbers3: stats.stanumbers3,
211
+ stanumbers4: stats.stanumbers4,
212
+ stanumbers5: stats.stanumbers5,
213
+ stavalues1: stats.stavalues1,
214
+ stavalues2: stats.stavalues2,
215
+ stavalues3: stats.stavalues3,
216
+ stavalues4: stats.stavalues4,
217
+ stavalues5: stats.stavalues5,
218
+ _value_type1: this.stavalueKind(stats.stavalues1),
219
+ _value_type2: this.stavalueKind(stats.stavalues2),
220
+ _value_type3: this.stavalueKind(stats.stavalues3),
221
+ _value_type4: this.stavalueKind(stats.stavalues4),
222
+ _value_type5: this.stavalueKind(stats.stavalues5),
223
+ });
224
+ }
225
+ }
226
+ /**
227
+ * Postgres has 5 different slots for storing statistics per column and a potentially unlimited
228
+ * number of statistic types to choose from. Each code in `stakindN` can mean different things.
229
+ * Some statistics are just numerical values such as `n_distinct` and `correlation`, meaning
230
+ * they're only derived from `stanumbersN` and the value of `stanumbersN` is never read.
231
+ * Others take advantage of the `stavaluesN` columns which use `anyarray` type to store
232
+ * concrete values internally for things like histogram bounds.
233
+ * Unfortunately we cannot change anyarrays without a C extension.
234
+ *
235
+ * (1) = most common values
236
+ * (2) = scalar histogram
237
+ * (3) = correlation <- can change
238
+ * (4) = most common elements
239
+ * (5) = distinct elem count histogram <- can change
240
+ * (6) = length histogram (?) These don't appear in pg_stats
241
+ * (7) = bounds histogram (?) These don't appear in pg_stats
242
+ * (N) = potentially many more kinds of statistics. But postgres <=18 only uses these 7.
243
+ *
244
+ * What we're doing here is setting ANY statistic we cannot directly control
245
+ * (anything that relies on stavaluesN) to 0 to make sure the planner isn't influenced by what
246
+ * what the db collected from the test data.
247
+ * Because we do our tests with `generic_plan` it seems it's already unlikely that the planner will be
248
+ * using things like common values or histogram bounds to make the planning decisions we care about.
249
+ * This is a just in case.
250
+ */
251
+ const sql = dedent `
252
+ WITH input AS (
253
+ SELECT
254
+ c.oid AS starelid,
255
+ a.attnum AS staattnum,
256
+ v.stainherit,
257
+ v.stanullfrac,
258
+ v.stawidth,
259
+ v.stadistinct,
260
+ v.stakind1,
261
+ v.stakind2,
262
+ v.stakind3,
263
+ v.stakind4,
264
+ v.stakind5,
265
+ v.staop1,
266
+ v.staop2,
267
+ v.staop3,
268
+ v.staop4,
269
+ v.staop5,
270
+ v.stacoll1,
271
+ v.stacoll2,
272
+ v.stacoll3,
273
+ v.stacoll4,
274
+ v.stacoll5,
275
+ v.stanumbers1,
276
+ v.stanumbers2,
277
+ v.stanumbers3,
278
+ v.stanumbers4,
279
+ v.stanumbers5,
280
+ v.stavalues1,
281
+ v.stavalues2,
282
+ v.stavalues3,
283
+ v.stavalues4,
284
+ v.stavalues5,
285
+ _value_type1,
286
+ _value_type2,
287
+ _value_type3,
288
+ _value_type4,
289
+ _value_type5
290
+ FROM jsonb_to_recordset($1::jsonb) AS v(
291
+ schema_name text,
292
+ table_name text,
293
+ column_name text,
294
+ stainherit boolean,
295
+ stanullfrac real,
296
+ stawidth integer,
297
+ stadistinct real,
298
+ stakind1 real,
299
+ stakind2 real,
300
+ stakind3 real,
301
+ stakind4 real,
302
+ stakind5 real,
303
+ staop1 oid,
304
+ staop2 oid,
305
+ staop3 oid,
306
+ staop4 oid,
307
+ staop5 oid,
308
+ stacoll1 oid,
309
+ stacoll2 oid,
310
+ stacoll3 oid,
311
+ stacoll4 oid,
312
+ stacoll5 oid,
313
+ stanumbers1 real[],
314
+ stanumbers2 real[],
315
+ stanumbers3 real[],
316
+ stanumbers4 real[],
317
+ stanumbers5 real[],
318
+ stavalues1 text[],
319
+ stavalues2 text[],
320
+ stavalues3 text[],
321
+ stavalues4 text[],
322
+ stavalues5 text[],
323
+ _value_type1 text,
324
+ _value_type2 text,
325
+ _value_type3 text,
326
+ _value_type4 text,
327
+ _value_type5 text
328
+ )
329
+ JOIN pg_class c ON c.relname = v.table_name
330
+ JOIN pg_namespace n ON n.oid = c.relnamespace AND n.nspname = v.schema_name
331
+ JOIN pg_attribute a ON a.attrelid = c.oid AND a.attname = v.column_name
332
+ ),
333
+ updated AS (
334
+ UPDATE pg_statistic s
335
+ SET
336
+ stanullfrac = i.stanullfrac,
337
+ stawidth = i.stawidth,
338
+ stadistinct = i.stadistinct,
339
+ stakind1 = i.stakind1,
340
+ stakind2 = i.stakind2,
341
+ stakind3 = i.stakind3,
342
+ stakind4 = i.stakind4,
343
+ stakind5 = i.stakind5,
344
+ staop1 = i.staop1,
345
+ staop2 = i.staop2,
346
+ staop3 = i.staop3,
347
+ staop4 = i.staop4,
348
+ staop5 = i.staop5,
349
+ stacoll1 = i.stacoll1,
350
+ stacoll2 = i.stacoll2,
351
+ stacoll3 = i.stacoll3,
352
+ stacoll4 = i.stacoll4,
353
+ stacoll5 = i.stacoll5,
354
+ stanumbers1 = i.stanumbers1,
355
+ stanumbers2 = i.stanumbers2,
356
+ stanumbers3 = i.stanumbers3,
357
+ stanumbers4 = i.stanumbers4,
358
+ stanumbers5 = i.stanumbers5,
359
+ stavalues1 = case
360
+ when i.stavalues1 is null then null
361
+ else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
362
+ end,
363
+ stavalues2 = case
364
+ when i.stavalues2 is null then null
365
+ else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
366
+ end,
367
+ stavalues3 = case
368
+ when i.stavalues3 is null then null
369
+ else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
370
+ end,
371
+ stavalues4 = case
372
+ when i.stavalues4 is null then null
373
+ else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
374
+ end,
375
+ stavalues5 = case
376
+ when i.stavalues5 is null then null
377
+ else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
378
+ end
379
+ -- stavalues1 = i.stavalues1,
380
+ -- stavalues2 = i.stavalues2,
381
+ -- stavalues3 = i.stavalues3,
382
+ -- stavalues4 = i.stavalues4,
383
+ -- stavalues5 = i.stavalues5
384
+ FROM input i
385
+ WHERE s.starelid = i.starelid AND s.staattnum = i.staattnum AND s.stainherit = i.stainherit
386
+ RETURNING s.starelid, s.staattnum, s.stainherit, s.stakind1, s.stakind2, s.stakind3, s.stakind4, s.stakind5
387
+ ),
388
+ inserted as (
389
+ INSERT INTO pg_statistic (
390
+ starelid, staattnum, stainherit,
391
+ stanullfrac, stawidth, stadistinct,
392
+ stakind1, stakind2, stakind3, stakind4, stakind5,
393
+ staop1, staop2, staop3, staop4, staop5,
394
+ stacoll1, stacoll2, stacoll3, stacoll4, stacoll5,
395
+ stanumbers1, stanumbers2, stanumbers3, stanumbers4, stanumbers5,
396
+ stavalues1, stavalues2, stavalues3, stavalues4, stavalues5
397
+ )
398
+ SELECT
399
+ i.starelid, i.staattnum, i.stainherit,
400
+ i.stanullfrac, i.stawidth, i.stadistinct,
401
+ i.stakind1, i.stakind2, i.stakind3, i.stakind4, i.stakind5,
402
+ i.staop1, i.staop2, i.staop3, i.staop4, i.staop5,
403
+ i.stacoll1, i.stacoll2, i.stacoll3, i.stacoll4, i.stacoll5,
404
+ i.stanumbers1, i.stanumbers2, i.stanumbers3, i.stanumbers4, i.stanumbers5,
405
+ -- i.stavalues1, i.stavalues2, i.stavalues3, i.stavalues4, i.stavalues5,
406
+ case
407
+ when i.stavalues1 is null then null
408
+ else array_in(i.stavalues1::text::cstring, i._value_type1::regtype::oid, -1)
409
+ end,
410
+ case
411
+ when i.stavalues2 is null then null
412
+ else array_in(i.stavalues2::text::cstring, i._value_type2::regtype::oid, -1)
413
+ end,
414
+ case
415
+ when i.stavalues3 is null then null
416
+ else array_in(i.stavalues3::text::cstring, i._value_type3::regtype::oid, -1)
417
+ end,
418
+ case
419
+ when i.stavalues4 is null then null
420
+ else array_in(i.stavalues4::text::cstring, i._value_type4::regtype::oid, -1)
421
+ end,
422
+ case
423
+ when i.stavalues5 is null then null
424
+ else array_in(i.stavalues5::text::cstring, i._value_type5::regtype::oid, -1)
425
+ end
426
+ -- i._value_type1, i._value_type2, i._value_type3, i._value_type4, i._value_type5
427
+ FROM input i
428
+ LEFT JOIN updated u
429
+ ON i.starelid = u.starelid AND i.staattnum = u.staattnum AND i.stainherit = u.stainherit
430
+ WHERE u.starelid IS NULL
431
+ returning starelid, staattnum, stainherit, stakind1, stakind2, stakind3, stakind4, stakind5
432
+ )
433
+ select * from updated union all (select * from inserted); -- @qd_introspection`;
434
+ columnStatsUpdatePromise = tx
435
+ .exec(sql, [columnStatsValues])
436
+ .catch((err) => {
437
+ console.error("Something wrong wrong updating column stats");
438
+ console.error(err);
439
+ throw err;
440
+ // return err;
441
+ // return Promise.reject(err)
442
+ });
443
+ }
444
+ const reltuplesValues = [];
445
+ for (const table of this.ownMetadata) {
446
+ if (!table.columns) {
447
+ continue;
448
+ }
449
+ processedTables.add(`${table.schemaName}.${table.tableName}`);
450
+ let targetTable;
451
+ if (this.exportedMetadata) {
452
+ targetTable = this.exportedMetadata.find((m) => m.tableName === table.tableName && m.schemaName === table.schemaName);
453
+ }
454
+ let reltuples;
455
+ let relpages;
456
+ let relallvisible = 0;
457
+ let relallfrozen;
458
+ if (targetTable) {
459
+ // don't want to run our prod stats with -1 reltuples
460
+ // we warn the user about this later
461
+ // if (targetTable.reltuples < 10 || targetTable.reltuples > 10000) {
462
+ reltuples = targetTable.reltuples;
463
+ relpages = targetTable.relpages;
464
+ relallvisible = targetTable.relallvisible;
465
+ relallfrozen = targetTable.relallfrozen;
466
+ // }
467
+ }
468
+ else if (this.mode.kind === "fromAssumption") {
469
+ reltuples = this.mode.reltuples;
470
+ relpages = this.mode.relpages;
471
+ }
472
+ else {
473
+ // we want to warn about tables that are in the test but not in the exported stats
474
+ // this can happen in case a new table is created in a PR
475
+ warnings.tablesNotInExports.push(`${table.schemaName}.${table.tableName}`);
476
+ reltuples = DEFAULT_RELTUPLES;
477
+ relpages = DEFAULT_RELPAGES;
478
+ }
479
+ reltuplesValues.push({
480
+ relname: table.tableName,
481
+ schema_name: table.schemaName,
482
+ reltuples,
483
+ relpages,
484
+ relallfrozen,
485
+ relallvisible,
486
+ });
487
+ if (targetTable && targetTable.indexes) {
488
+ for (const index of targetTable.indexes) {
489
+ reltuplesValues.push({
490
+ relname: index.indexName,
491
+ schema_name: targetTable.schemaName,
492
+ reltuples: index.reltuples,
493
+ relpages: index.relpages,
494
+ relallfrozen: index.relallfrozen,
495
+ relallvisible: index.relallvisible,
496
+ });
497
+ }
498
+ }
499
+ }
500
+ const reltuplesQuery = dedent `
501
+ update pg_class p
502
+ set reltuples = v.reltuples,
503
+ relpages = v.relpages,
504
+ -- relallfrozen = case when v.relallfrozen is null then p.relallfrozen else v.relallfrozen end,
505
+ relallvisible = case when v.relallvisible is null then p.relallvisible else v.relallvisible end
506
+ from jsonb_to_recordset($1::jsonb)
507
+ as v(reltuples real, relpages integer, relallfrozen integer, relallvisible integer, relname text, schema_name text)
508
+ where p.relname = v.relname
509
+ and p.relnamespace = (select oid from pg_namespace where nspname = v.schema_name)
510
+ returning p.relname, p.relnamespace, p.reltuples, p.relpages;
511
+ `;
512
+ const reltuplesPromise = tx
513
+ .exec(reltuplesQuery, [reltuplesValues])
514
+ .catch((err) => {
515
+ console.error("Something went wrong updating reltuples/relpages");
516
+ console.error(err);
517
+ return err;
518
+ });
519
+ if (this.exportedMetadata) {
520
+ for (const table of this.exportedMetadata) {
521
+ const tableExists = processedTables.has(`${table.schemaName}.${table.tableName}`);
522
+ if (tableExists && table.reltuples === -1) {
523
+ console.warn(`Table ${table.tableName} has reltuples -1. Your production database is probably not analyzed properly`);
524
+ // we expect production stats to have real numbers
525
+ warnings.tableNotAnalyzed.push(`${table.schemaName}.${table.tableName}`);
526
+ }
527
+ if (tableExists) {
528
+ continue;
529
+ }
530
+ // there's a LOT of tables in statistics exports for things like timescaledb
531
+ // that might not show up in the test data. This check might be too strict.
532
+ warnings.tablesNotInTest.push(`${table.schemaName}.${table.tableName}`);
533
+ }
534
+ }
535
+ const [statsUpdates, reltuplesUpdates] = await Promise.all([
536
+ columnStatsUpdatePromise,
537
+ reltuplesPromise,
538
+ ]);
539
+ const updatedColumnsProperly = statsUpdates
540
+ ? statsUpdates.length === columnStatsValues.length
541
+ : true;
542
+ if (!updatedColumnsProperly) {
543
+ console.error(`Did not update expected column stats`);
544
+ }
545
+ if (reltuplesUpdates.length !== reltuplesValues.length) {
546
+ console.error(`Did not update expected reltuples/relpages`);
547
+ }
548
+ return warnings;
549
+ }
550
+ static async dumpStats(db, postgresVersion, kind) {
551
+ const fullDump = kind === "full";
552
+ console.log(`dumping stats for postgres ${gray(postgresVersion)}`);
553
+ // certain things are only supported with pg17
554
+ const stats = await db.exec(`
555
+ WITH table_columns AS (
556
+ SELECT
557
+ c.table_name,
558
+ c.table_schema,
559
+ cl.reltuples,
560
+ cl.relpages,
561
+ cl.relallvisible,
562
+ -- cl.relallfrozen,
563
+ n.nspname AS schema_name,
564
+ json_agg(
565
+ json_build_object(
566
+ 'columnName', c.column_name,
567
+ 'stats', (
568
+ SELECT json_build_object(
569
+ 'starelid', s.starelid,
570
+ 'staattnum', s.staattnum,
571
+ 'stanullfrac', s.stanullfrac,
572
+ 'stawidth', s.stawidth,
573
+ 'stadistinct', s.stadistinct,
574
+ 'stakind1', s.stakind1, 'staop1', s.staop1, 'stacoll1', s.stacoll1, 'stanumbers1', s.stanumbers1,
575
+ 'stakind2', s.stakind2, 'staop2', s.staop2, 'stacoll2', s.stacoll2, 'stanumbers2', s.stanumbers2,
576
+ 'stakind3', s.stakind3, 'staop3', s.staop3, 'stacoll3', s.stacoll3, 'stanumbers3', s.stanumbers3,
577
+ 'stakind4', s.stakind4, 'staop4', s.staop4, 'stacoll4', s.stacoll4, 'stanumbers4', s.stanumbers4,
578
+ 'stakind5', s.stakind5, 'staop5', s.staop5, 'stacoll5', s.stacoll5, 'stanumbers5', s.stanumbers5,
579
+ 'stavalues1', CASE WHEN $1 THEN s.stavalues1 ELSE NULL END,
580
+ 'stavalues2', CASE WHEN $1 THEN s.stavalues2 ELSE NULL END,
581
+ 'stavalues3', CASE WHEN $1 THEN s.stavalues3 ELSE NULL END,
582
+ 'stavalues4', CASE WHEN $1 THEN s.stavalues4 ELSE NULL END,
583
+ 'stavalues5', CASE WHEN $1 THEN s.stavalues5 ELSE NULL END
584
+ )
585
+ FROM pg_statistic s
586
+ WHERE s.starelid = a.attrelid AND s.staattnum = a.attnum
587
+ )
588
+ )
589
+ ORDER BY c.ordinal_position
590
+ ) AS columns
591
+ FROM information_schema.columns c
592
+ JOIN pg_attribute a
593
+ ON a.attrelid = (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass
594
+ AND a.attname = c.column_name
595
+ JOIN pg_class cl
596
+ ON cl.oid = a.attrelid
597
+ JOIN pg_namespace n
598
+ ON n.oid = cl.relnamespace
599
+ WHERE c.table_name NOT LIKE 'pg_%'
600
+ AND n.nspname <> 'information_schema'
601
+ AND c.table_name NOT IN ('pg_stat_statements', 'pg_stat_statements_info')
602
+ GROUP BY c.table_name, c.table_schema, cl.reltuples, cl.relpages, cl.relallvisible, n.nspname
603
+ ),
604
+ table_indexes AS (
605
+ SELECT
606
+ t.relname AS table_name,
607
+ json_agg(
608
+ json_build_object(
609
+ 'indexName', i.relname,
610
+ 'reltuples', i.reltuples,
611
+ 'relpages', i.relpages,
612
+ 'relallvisible', i.relallvisible
613
+ -- 'relallfrozen', i.relallfrozen
614
+ )
615
+ ) AS indexes
616
+ FROM pg_class t
617
+ JOIN pg_index ix ON ix.indrelid = t.oid
618
+ JOIN pg_class i ON i.oid = ix.indexrelid
619
+ JOIN pg_namespace n ON n.oid = t.relnamespace
620
+ WHERE t.relname NOT LIKE 'pg_%'
621
+ AND n.nspname <> 'information_schema'
622
+ GROUP BY t.relname
623
+ )
624
+ SELECT json_agg(
625
+ json_build_object(
626
+ 'tableName', tc.table_name,
627
+ 'schemaName', tc.table_schema,
628
+ 'reltuples', tc.reltuples,
629
+ 'relpages', tc.relpages,
630
+ 'relallvisible', tc.relallvisible,
631
+ -- 'relallfrozen', tc.relallfrozen,
632
+ 'columns', tc.columns,
633
+ 'indexes', COALESCE(ti.indexes, '[]'::json)
634
+ )
635
+ )
636
+ FROM table_columns tc
637
+ LEFT JOIN table_indexes ti
638
+ ON ti.table_name = tc.table_name;
639
+ `, [fullDump]);
640
+ return stats[0].json_agg;
641
+ }
642
+ /**
643
+ * Returns all indexes in the database.
644
+ * ONLY handles regular btree indexes
645
+ */
646
+ async getExistingIndexes() {
647
+ const indexes = await this.db.exec(`
648
+ WITH partitioned_tables AS (
649
+ SELECT
650
+ inhparent::regclass AS parent_table,
651
+ inhrelid::regclass AS partition_table
652
+ FROM
653
+ pg_inherits
654
+ )
655
+ SELECT
656
+ n.nspname AS schema_name,
657
+ COALESCE(pt.parent_table::text, t.relname) AS table_name,
658
+ i.relname AS index_name,
659
+ ix.indisprimary as is_primary,
660
+ ix.indisunique as is_unique,
661
+ am.amname AS index_type,
662
+ array_agg(
663
+ CASE
664
+ -- Handle regular columns
665
+ WHEN a.attname IS NOT NULL THEN
666
+ json_build_object('name', a.attname, 'order',
667
+ CASE
668
+ WHEN (indoption[array_position(ix.indkey, a.attnum)] & 1) = 1 THEN 'DESC'
669
+ ELSE 'ASC'
670
+ END)
671
+ -- Handle expressions
672
+ ELSE
673
+ json_build_object('name', pg_get_expr((ix.indexprs)::pg_node_tree, t.oid), 'order',
674
+ CASE
675
+ WHEN (indoption[array_position(ix.indkey, k.attnum)] & 1) = 1 THEN 'DESC'
676
+ ELSE 'ASC'
677
+ END)
678
+ END
679
+ ORDER BY array_position(ix.indkey, k.attnum)
680
+ ) AS index_columns
681
+ FROM
682
+ pg_class t
683
+ LEFT JOIN partitioned_tables pt ON t.oid = pt.partition_table
684
+ JOIN pg_index ix ON t.oid = ix.indrelid
685
+ JOIN pg_class i ON i.oid = ix.indexrelid
686
+ JOIN pg_am am ON i.relam = am.oid
687
+ LEFT JOIN LATERAL unnest(ix.indkey) WITH ORDINALITY k(attnum, ordinality) ON true
688
+ LEFT JOIN pg_attribute a ON a.attnum = k.attnum AND a.attrelid = t.oid
689
+ JOIN pg_namespace n ON t.relnamespace = n.oid
690
+ WHERE
691
+ n.nspname not like 'pg_%' and
692
+ n.nspname <> 'information_schema'
693
+ GROUP BY
694
+ n.nspname, COALESCE(pt.parent_table::text, t.relname), i.relname, am.amname, ix.indisprimary, ix.indisunique
695
+ ORDER BY
696
+ COALESCE(pt.parent_table::text, t.relname), i.relname; -- @qd_introspection
697
+ `);
698
+ return indexes;
699
+ }
700
+ }