@uwdata/mosaic-core 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +1 -1
  2. package/dist/mosaic-core.js +12960 -21458
  3. package/dist/mosaic-core.min.js +7 -16
  4. package/dist/types/Coordinator.d.ts +169 -0
  5. package/dist/types/MosaicClient.d.ts +94 -0
  6. package/dist/types/Param.d.ts +47 -0
  7. package/dist/types/QueryConsolidator.d.ts +9 -0
  8. package/dist/types/QueryManager.d.ts +64 -0
  9. package/dist/types/Selection.d.ts +224 -0
  10. package/dist/types/SelectionClause.d.ts +105 -0
  11. package/dist/types/connectors/rest.d.ts +17 -0
  12. package/dist/types/connectors/socket.d.ts +18 -0
  13. package/dist/types/connectors/wasm.d.ts +16 -0
  14. package/dist/types/index.d.ts +25 -0
  15. package/dist/types/preagg/PreAggregator.d.ts +178 -0
  16. package/dist/types/preagg/preagg-columns.d.ts +14 -0
  17. package/dist/types/preagg/sufficient-statistics.d.ts +13 -0
  18. package/dist/types/util/AsyncDispatch.d.ts +100 -0
  19. package/dist/types/util/cache.d.ts +13 -0
  20. package/dist/types/util/decode-ipc.d.ts +7 -0
  21. package/dist/types/util/distinct.d.ts +2 -0
  22. package/dist/types/util/field-info.d.ts +13 -0
  23. package/dist/types/util/hash.d.ts +1 -0
  24. package/dist/types/util/is-arrow-table.d.ts +8 -0
  25. package/dist/types/util/js-type.d.ts +1 -0
  26. package/dist/types/util/priority-queue.d.ts +37 -0
  27. package/dist/types/util/query-result.d.ts +44 -0
  28. package/dist/types/util/selection-types.d.ts +114 -0
  29. package/dist/types/util/synchronizer.d.ts +29 -0
  30. package/dist/types/util/throttle.d.ts +11 -0
  31. package/dist/types/util/to-data-columns.d.ts +29 -0
  32. package/dist/types/util/void-logger.d.ts +7 -0
  33. package/jsconfig.json +11 -0
  34. package/package.json +10 -8
  35. package/src/Coordinator.js +66 -41
  36. package/src/MosaicClient.js +14 -4
  37. package/src/QueryConsolidator.js +32 -39
  38. package/src/QueryManager.js +85 -48
  39. package/src/Selection.js +49 -15
  40. package/src/SelectionClause.js +19 -22
  41. package/src/connectors/rest.js +6 -4
  42. package/src/connectors/socket.js +7 -4
  43. package/src/connectors/wasm.js +20 -4
  44. package/src/index.js +16 -8
  45. package/src/preagg/PreAggregator.js +407 -0
  46. package/src/preagg/preagg-columns.js +103 -0
  47. package/src/preagg/sufficient-statistics.js +439 -0
  48. package/src/util/decode-ipc.js +11 -0
  49. package/src/util/field-info.js +19 -16
  50. package/src/util/hash.js +1 -1
  51. package/src/util/is-arrow-table.js +10 -0
  52. package/src/util/priority-queue.js +75 -76
  53. package/src/util/query-result.js +44 -2
  54. package/src/util/selection-types.ts +3 -3
  55. package/src/util/throttle.js +21 -9
  56. package/src/util/to-data-columns.js +4 -15
  57. package/src/util/void-logger.js +6 -5
  58. package/tsconfig.json +11 -0
  59. package/src/DataCubeIndexer.js +0 -320
  60. package/src/util/convert-arrow.js +0 -145
  61. package/src/util/index-columns.js +0 -540
@@ -1,4 +1,4 @@
1
- import { tableFromIPC } from 'apache-arrow';
1
+ import { decodeIPC } from '../util/decode-ipc.js';
2
2
 
3
3
  export function socketConnector(uri = 'ws://localhost:3000/') {
4
4
  const queue = [];
@@ -47,7 +47,7 @@ export function socketConnector(uri = 'ws://localhost:3000/') {
47
47
  } else if (query.type === 'exec') {
48
48
  resolve();
49
49
  } else if (query.type === 'arrow') {
50
- resolve(tableFromIPC(data.arrayBuffer()));
50
+ resolve(decodeIPC(data));
51
51
  } else {
52
52
  throw new Error(`Unexpected socket data: ${data}`);
53
53
  }
@@ -59,6 +59,7 @@ export function socketConnector(uri = 'ws://localhost:3000/') {
59
59
 
60
60
  function init() {
61
61
  ws = new WebSocket(uri);
62
+ ws.binaryType = 'arraybuffer';
62
63
  for (const type in events) {
63
64
  ws.addEventListener(type, events[type]);
64
65
  }
@@ -84,8 +85,10 @@ export function socketConnector(uri = 'ws://localhost:3000/') {
84
85
  /**
85
86
  * Query the DuckDB server.
86
87
  * @param {object} query
87
- * @param {'exec' | 'arrow' | 'json'} [query.type] The query type: 'exec', 'arrow', or 'json'.
88
- * @param {string} query.sql A SQL query string.
88
+ * @param {'exec' | 'arrow' | 'json' | 'create-bundle' | 'load-bundle'} [query.type] The query type.
89
+ * @param {string} [query.sql] A SQL query string.
90
+ * @param {string[]} [query.queries] The queries used to create a bundle.
91
+ * @param {string} [query.name] The name of a bundle to create or load.
89
92
  * @returns the query result
90
93
  */
91
94
  query(query) {
@@ -1,4 +1,20 @@
1
1
  import * as duckdb from '@duckdb/duckdb-wasm';
2
+ import { decodeIPC } from '../util/decode-ipc.js';
3
+
4
+ // bypass duckdb-wasm query method to get Arrow IPC bytes directly
5
+ // https://github.com/duckdb/duckdb-wasm/issues/267#issuecomment-2252749509
6
+ function getArrowIPC(con, query) {
7
+ return new Promise((resolve, reject) => {
8
+ con.useUnsafe(async (bindings, conn) => {
9
+ try {
10
+ const buffer = await bindings.runQuery(conn, query);
11
+ resolve(buffer);
12
+ } catch (error) {
13
+ reject(error);
14
+ }
15
+ });
16
+ });
17
+ }
2
18
 
3
19
  export function wasmConnector(options = {}) {
4
20
  const { duckdb, connection, ...opts } = options;
@@ -45,17 +61,17 @@ export function wasmConnector(options = {}) {
45
61
  /**
46
62
  * Query the DuckDB-WASM instance.
47
63
  * @param {object} query
48
- * @param {'exec' | 'arrow' | 'json'} [query.type] The query type: 'exec', 'arrow', or 'json'.
64
+ * @param {'exec' | 'arrow' | 'json'} [query.type] The query type.
49
65
  * @param {string} query.sql A SQL query string.
50
66
  * @returns the query result
51
67
  */
52
68
  query: async query => {
53
69
  const { type, sql } = query;
54
70
  const con = await getConnection();
55
- const result = await con.query(sql);
71
+ const result = await getArrowIPC(con, sql);
56
72
  return type === 'exec' ? undefined
57
- : type === 'arrow' ? result
58
- : result.toArray();
73
+ : type === 'arrow' ? decodeIPC(result)
74
+ : decodeIPC(result).toArray();
59
75
  }
60
76
  };
61
77
  }
package/src/index.js CHANGED
@@ -16,14 +16,22 @@ export {
16
16
  clauseMatch
17
17
  } from './SelectionClause.js';
18
18
 
19
- export {
20
- isArrowTable,
21
- convertArrowArrayType,
22
- convertArrowValue,
23
- convertArrowColumn
24
- } from './util/convert-arrow.js'
25
-
19
+ export { decodeIPC } from './util/decode-ipc.js';
26
20
  export { distinct } from './util/distinct.js';
21
+ export { isArrowTable } from './util/is-arrow-table.js';
27
22
  export { synchronizer } from './util/synchronizer.js';
28
23
  export { throttle } from './util/throttle.js';
29
- export { toDataColumns } from './util/to-data-columns.js'
24
+ export { toDataColumns } from './util/to-data-columns.js';
25
+
26
+ /**
27
+ * @typedef {import('./util/selection-types.js').ClauseMetadata} ClauseMetadata
28
+ * @typedef {import('./util/selection-types.js').PointMetadata} PointMetadata
29
+ * @typedef {import('./util/selection-types.js').MatchMethod} MatchMethod
30
+ * @typedef {import('./util/selection-types.js').MatchMetadata} MatchMetadata
31
+ * @typedef {import('./util/selection-types.js').ScaleType} ScaleType
32
+ * @typedef {import('./util/selection-types.js').Extent} Extent
33
+ * @typedef {import('./util/selection-types.js').Scale} Scale
34
+ * @typedef {import('./util/selection-types.js').BinMethod} BinMethod
35
+ * @typedef {import('./util/selection-types.js').IntervalMetadata} IntervalMetadata
36
+ * @typedef {import('./util/selection-types.js').SelectionClause} SelectionClause
37
+ */
@@ -0,0 +1,407 @@
1
+ import { Query, and, asNode, ceil, collectColumns, createTable, float64, floor, isBetween, int32, mul, round, scaleTransform, sub, isSelectQuery, ExprNode, SelectQuery } from '@uwdata/mosaic-sql';
2
+ import { preaggColumns } from './preagg-columns.js';
3
+ import { fnv_hash } from '../util/hash.js';
4
+
5
+ const Skip = { skip: true, result: null };
6
+
7
+ /**
8
+ * @typedef {object} PreAggregateOptions
9
+ * @property {string} [schema] Database schema (namespace) in which to write
10
+ * pre-aggregated materialzied views (default 'mosaic').
11
+ * @property {boolean} [options.enabled=true] Flag to enable or disable the
12
+ * pre-aggregation. This flag can be updated later via the `enabled` property.
13
+ */
14
+
15
+ /**
16
+ * Build and query optimized pre-aggregated materaialized views, for fast
17
+ * computation of groupby aggregate queries over compatible client queries
18
+ * and selections. The materialized views contains pre-aggregated data for a
19
+ * Mosaic client, subdivided by possible query values from an active selection
20
+ * clause. These materialized views are database tables that can be queried
21
+ * for rapid updates.
22
+ *
23
+ * Compatible client queries must consist of only groupby dimensions and
24
+ * supported aggregate functions. Compatible selections must contain an active
25
+ * clause that exposes metadata for an interval or point value predicate.
26
+ *
27
+ * Materialized views are written to a dedicated schema (namespace) that
28
+ * can be set using the *schema* constructor option. This schema acts as a
29
+ * persistent cache, and materialized view tables may be used across sessions.
30
+ * The `dropSchema` method issues a query to remove *all* tables within this
31
+ * schema. This may be needed if the original tables have updated data, but
32
+ * should be used with care.
33
+ */
34
+ export class PreAggregator {
35
+ /**
36
+ * Create a new manager of materialized views of pre-aggregated data.
37
+ * @param {import('../Coordinator.js').Coordinator} coordinator A Mosaic coordinator.
38
+ * @param {PreAggregateOptions} [options] Pre-aggregation options.
39
+ */
40
+ constructor(coordinator, {
41
+ schema = 'mosaic',
42
+ enabled = true
43
+ } = {}) {
44
+ /** @type {Map<import('../MosaicClient.js').MosaicClient, PreAggregateInfo | Skip | null>} */
45
+ this.entries = new Map();
46
+ this.active = null;
47
+ this.mc = coordinator;
48
+ this._schema = schema;
49
+ this._enabled = enabled;
50
+ }
51
+
52
+ /**
53
+ * Set the enabled state of this manager. If false, any local state is
54
+ * cleared and subsequent request calls will return null until re-enabled.
55
+ * This method has no effect on any pre-aggregated tables already in the
56
+ * database.
57
+ * @param {boolean} [state] The enabled state to set.
58
+ */
59
+ set enabled(state) {
60
+ if (this._enabled !== state) {
61
+ if (!state) this.clear();
62
+ this._enabled = state;
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Get the enabled state of this manager.
68
+ * @returns {boolean} The current enabled state.
69
+ */
70
+ get enabled() {
71
+ return this._enabled;
72
+ }
73
+
74
+ /**
75
+ * Set the database schema used for pre-aggregated materialized view tables.
76
+ * Upon changes, any local state is cleared. This method does _not_ drop any
77
+ * existing materialized views, use `dropSchema` before changing the schema
78
+ * to also remove existing materalized views in the database.
79
+ * @param {string} [schema] The schema name to set.
80
+ */
81
+ set schema(schema) {
82
+ if (this._schema !== schema) {
83
+ this.clear();
84
+ this._schema = schema;
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Get the database schema used for pre-aggregated materialized view tables.
90
+ * @returns {string} The current schema name.
91
+ */
92
+ get schema() {
93
+ return this._schema;
94
+ }
95
+
96
+ /**
97
+ * Issues a query through the coordinator to drop the current schema for
98
+ * pre-aggregated materialized views. *All* materialized view tables in the
99
+ * schema will be removed and local state is cleared. Call this method if
100
+ * the underlying base tables have been updated, causing materialized view
101
+ * to become stale and inaccurate. Use this method with care! Once dropped,
102
+ * the schema will be repopulated by future pre-aggregation requests.
103
+ * @returns A query result promise.
104
+ */
105
+ dropSchema() {
106
+ this.clear();
107
+ return this.mc.exec(`DROP SCHEMA IF EXISTS "${this.schema}" CASCADE`);
108
+ }
109
+
110
+ /**
111
+ * Clear the cache of pre-aggregation entries for the current active
112
+ * selection clause. This method does _not_ drop any existing materialized
113
+ * views. Use `dropSchema` to remove existing materialized view tables from
114
+ * the database.
115
+ */
116
+ clear() {
117
+ this.entries.clear();
118
+ this.active = null;
119
+ }
120
+
121
+ /**
122
+ * Return pre-aggregation information for the active state of a
123
+ * client-selection pair, or null if the client has unstable filters.
124
+ * This method has multiple possible side effects, including materialized
125
+ * view creation and updating internal caches.
126
+ * @param {import('../MosaicClient.js').MosaicClient} client A Mosaic client.
127
+ * @param {import('../Selection.js').Selection} selection A Mosaic selection
128
+ * to filter the client by.
129
+ * @param {import('../util/selection-types.js').SelectionClause} activeClause
130
+ * A representative active selection clause for which to (possibly) generate
131
+ * materialized views of pre-aggregates.
132
+ * @returns {PreAggregateInfo | Skip | null} Information and query generator
133
+ * for pre-aggregated tables, or null if the client has unstable filters.
134
+ */
135
+ request(client, selection, activeClause) {
136
+ // if not enabled, do nothing
137
+ if (!this.enabled) return null;
138
+
139
+ const { entries, mc, schema } = this;
140
+ const { source } = activeClause;
141
+
142
+ // if there is no clause source to track, do nothing
143
+ if (!source) return null;
144
+
145
+ // if we have cached active columns, check for updates or exit
146
+ if (this.active) {
147
+ // if the active clause source has changed, clear the state
148
+ // this cancels outstanding requests and clears the local cache
149
+ // a clear also sets this.active to null
150
+ if (this.active.source !== source) this.clear();
151
+ // if we've seen this source and it has unstable filters, do nothing
152
+ if (this.active?.source === null) return null;
153
+ }
154
+
155
+ // the current active columns cache value
156
+ let { active } = this;
157
+
158
+ // if cached active columns are unset, analyze the active clause
159
+ if (!active) {
160
+ // generate active dimension columns to select over
161
+ // will return an object with null source if it has unstable filters
162
+ this.active = active = activeColumns(activeClause);
163
+ // if the active clause has unstable filters, exit now
164
+ if (active.source === null) return null;
165
+ }
166
+
167
+ // if we have cached pre-aggregate info, return that
168
+ if (entries.has(client)) {
169
+ return entries.get(client);
170
+ }
171
+
172
+ // get non-active materialized view columns
173
+ const preaggCols = preaggColumns(client);
174
+
175
+ let info;
176
+ if (!preaggCols) {
177
+ // if client is not indexable, record null info
178
+ info = null;
179
+ } else if (selection.skip(client, activeClause)) {
180
+ // skip client if untouched by cross-filtering
181
+ info = Skip;
182
+ } else {
183
+ // generate materialized view table
184
+ const filter = selection.remove(source).predicate(client);
185
+ info = preaggregateInfo(client.query(filter), active, preaggCols, schema);
186
+ info.result = mc.exec([
187
+ `CREATE SCHEMA IF NOT EXISTS ${schema}`,
188
+ createTable(info.table, info.create, { temp: false })
189
+ ]);
190
+ info.result.catch(e => mc.logger().error(e));
191
+ }
192
+
193
+ entries.set(client, info);
194
+ return info;
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Determines the active dimension columns to select over. Returns an object
200
+ * with the clause source, column definitions, and a predicate generator
201
+ * function for the active dimensions of a pre-aggregated materialized view.
202
+ * If the active clause is not indexable or is missing metadata, this method
203
+ * returns an object with a null source property.
204
+ * @param {import('../util/selection-types.js').SelectionClause} clause
205
+ * The active selection clause to analyze.
206
+ */
207
+ function activeColumns(clause) {
208
+ const { source, meta } = clause;
209
+ const clausePred = clause.predicate;
210
+ const clauseCols = collectColumns(clausePred).map(c => c.column);
211
+ let predicate;
212
+ let columns;
213
+
214
+ if (!meta || !clauseCols) {
215
+ return { source: null, columns, predicate };
216
+ }
217
+
218
+ // @ts-ignore
219
+ const { type, scales, bin, pixelSize = 1 } = meta;
220
+
221
+ if (type === 'point') {
222
+ predicate = x => x;
223
+ columns = Object.fromEntries(
224
+ clauseCols.map(col => [`${col}`, asNode(col)])
225
+ );
226
+ } else if (type === 'interval' && scales) {
227
+ // determine pixel-level binning
228
+ const bins = scales.map(s => binInterval(s, pixelSize, bin));
229
+
230
+ if (bins.some(b => !b)) {
231
+ // bail if a scale type is unsupported
232
+ } else if (bins.length === 1) {
233
+ // selection clause predicate has type BetweenOpNode
234
+ // single interval selection
235
+ predicate = p => p ? isBetween('active0', p.extent.map(bins[0])) : [];
236
+ // @ts-ignore
237
+ columns = { active0: bins[0](clausePred.expr) };
238
+ } else {
239
+ // selection clause predicate has type AndNode<BetweenOpNode>
240
+ // multiple interval selection
241
+ predicate = p => p
242
+ ? and(p.clauses.map(
243
+ (c, i) => isBetween(`active${i}`, c.extent.map(bins[i]))
244
+ ))
245
+ : [];
246
+ columns = Object.fromEntries(
247
+ // @ts-ignore
248
+ clausePred.clauses.map((p, i) => [`active${i}`, bins[i](p.expr)])
249
+ );
250
+ }
251
+ }
252
+
253
+ return { source: columns ? source : null, columns, predicate };
254
+ }
255
+
256
+ const BIN = { ceil, round };
257
+
258
+ /**
259
+ * Returns a bin function generator to discretize a selection interval domain.
260
+ * @param {import('../util/selection-types.js').Scale} scale A scale that maps
261
+ * domain values to the output range (typically pixels).
262
+ * @param {number} pixelSize The interactive pixel size. This value indicates
263
+ * the bin step size and may be greater than an actual screen pixel.
264
+ * @param {import('../util/selection-types.js').BinMethod} bin The binning
265
+ * method to apply, one of `floor`, `ceil', or `round`.
266
+ * @returns {(value: any) => ExprNode} A bin function generator.
267
+ */
268
+ function binInterval(scale, pixelSize, bin) {
269
+ const { type, domain, range, apply, sqlApply } = scaleTransform(scale);
270
+ if (!apply) return; // unsupported scale type
271
+ const binFn = BIN[`${bin}`.toLowerCase()] || floor;
272
+ const lo = apply(Math.min(...domain));
273
+ const hi = apply(Math.max(...domain));
274
+ const s = (type === 'identity'
275
+ ? 1
276
+ : Math.abs(range[1] - range[0]) / (hi - lo)) / pixelSize;
277
+ const scalar = s === 1
278
+ ? x => x
279
+ : x => mul(float64(s), x);
280
+ const diff = lo === 0
281
+ ? x => x
282
+ : x => sub(x, float64(lo));
283
+ return value => int32(binFn(scalar(diff(sqlApply(value)))));
284
+ }
285
+
286
+ /**
287
+ * Generate pre-aggregate query information.
288
+ * @param {SelectQuery} clientQuery The original client query.
289
+ * @param {ReturnType<activeColumns>} active Active (selected) columns.
290
+ * @param {ReturnType<preaggColumns>} preaggCols Pre-aggregation columns.
291
+ * @returns {PreAggregateInfo}
292
+ */
293
+ function preaggregateInfo(clientQuery, active, preaggCols, schema) {
294
+ const { group, output, preagg } = preaggCols;
295
+ const { columns } = active;
296
+
297
+ // build materialized view construction query
298
+ const query = clientQuery
299
+ .setSelect({ ...preagg, ...columns })
300
+ .groupby(Object.keys(columns));
301
+
302
+ // ensure active clause columns are selected by subqueries
303
+ const [subq] = query.subqueries;
304
+ if (subq) {
305
+ const cols = Object.values(columns)
306
+ .flatMap(c => collectColumns(c).map(c => c.column));
307
+ subqueryPushdown(subq, cols);
308
+ }
309
+
310
+ // push any having or orderby criteria to output queries
311
+ const having = query._having;
312
+ const order = query._orderby;
313
+ query._having = [];
314
+ query._orderby = [];
315
+
316
+ // generate creation query string and hash id
317
+ const create = query.toString();
318
+ const id = (fnv_hash(create) >>> 0).toString(16);
319
+ const table = `${schema}.preagg_${id}`;
320
+
321
+ // generate preaggregate select query
322
+ const select = Query
323
+ .select(group, output)
324
+ .from(table)
325
+ .groupby(group)
326
+ .having(having)
327
+ .orderby(order);
328
+
329
+ return new PreAggregateInfo({ table, create, active, select });
330
+ }
331
+
332
+ /**
333
+ * Push column selections down to subqueries.
334
+ */
335
+ function subqueryPushdown(query, cols) {
336
+ const memo = new Set;
337
+ const pushdown = q => {
338
+ if (memo.has(q)) return;
339
+ memo.add(q);
340
+ if (isSelectQuery(q) && q._from.length) {
341
+ q.select(cols);
342
+ }
343
+ q.subqueries.forEach(pushdown);
344
+ };
345
+ pushdown(query);
346
+ }
347
+
348
+ /**
349
+ * Metadata and query generator for materialized views of pre-aggregated data.
350
+ * This object provides the information needed to generate and query the
351
+ * materialized views for a client-selection pair relative to a specific
352
+ * active clause and selection state.
353
+ */
354
+ export class PreAggregateInfo {
355
+ /**
356
+ * Create a new pre-aggregation information instance.
357
+ * @param {object} options Options object.
358
+ * @param {string} options.table The materialized view table name.
359
+ * @param {string} options.create The table creation query.
360
+ * @param {*} options.active Active column information.
361
+ * @param {SelectQuery} options.select Base query for requesting updates
362
+ * using a pre-aggregated materialized view.
363
+ */
364
+ constructor({ table, create, active, select }) {
365
+ /**
366
+ * The name of the materialized view.
367
+ * @type {string}
368
+ */
369
+ this.table = table;
370
+ /**
371
+ * The SQL query used to generate the materialized view.
372
+ * @type {string}
373
+ */
374
+ this.create = create;
375
+ /**
376
+ * A result promise returned for the materialized view creation query.
377
+ * @type {Promise | null}
378
+ */
379
+ this.result = null;
380
+ /**
381
+ * Definitions and predicate function for the active columns,
382
+ * which are dynamically filtered by the active clause.
383
+ */
384
+ this.active = active;
385
+ /**
386
+ * Select query (sans where clause) for materialized views.
387
+ * @type {SelectQuery}
388
+ */
389
+ this.select = select;
390
+ /**
391
+ * Boolean flag indicating a client that should be skipped.
392
+ * This value is always false for a created materialized view.
393
+ * @type {boolean}
394
+ */
395
+ this.skip = false;
396
+ }
397
+
398
+ /**
399
+ * Generate a materialized view query for the given predicate.
400
+ * @param {import('@uwdata/mosaic-sql').ExprNode} predicate The current
401
+ * active clause predicate.
402
+ * @returns {SelectQuery} A materialized view query.
403
+ */
404
+ query(predicate) {
405
+ return this.select.clone().where(this.active.predicate(predicate));
406
+ }
407
+ }
@@ -0,0 +1,103 @@
1
+ import { AggregateNode, ExprNode, Query, SelectQuery, collectAggregates, isAggregateExpression, isSelectQuery, isTableRef, rewrite, sql } from '@uwdata/mosaic-sql';
2
+ import { MosaicClient } from '../MosaicClient.js';
3
+ import { sufficientStatistics } from './sufficient-statistics.js';
4
+
5
+ /**
6
+ * Determine pre-aggregation columns for a given Mosaic client.
7
+ * @param {MosaicClient} client The Mosaic client.
8
+ * @returns An object with necessary column data to generate pre-aggregated
9
+ * columns, or null if the client can't be optimized or the client query
10
+ * contains an invalid or unsupported expression.
11
+ */
12
+ export function preaggColumns(client) {
13
+ if (!client.filterStable) return null;
14
+ const q = client.query();
15
+
16
+ // bail if query is not analyzable
17
+ if (!isSelectQuery(q)) return null;
18
+
19
+ // bail if no base table
20
+ const from = getBase(q, q => {
21
+ const ref = q._from[0]?.expr;
22
+ return isTableRef(ref) ? ref.name : ref;
23
+ });
24
+ if (typeof from !== 'string') return null;
25
+
26
+ /** @type {Map<AggregateNode, ExprNode>} */
27
+ const aggrs = new Map;
28
+ /** @type {Record<string, ExprNode>} */
29
+ const preagg = {};
30
+ /** @type {Record<string, ExprNode>} */
31
+ const output = {};
32
+ /** @type {string[]} */
33
+ const group = []; // list of grouping dimension columns
34
+
35
+ // generate a scalar subquery for a global average
36
+ const avg = ref => {
37
+ const name = ref.column;
38
+ const expr = getBase(q, q => q._select.find(c => c.alias === name)?.expr);
39
+ return sql`(SELECT avg(${expr ?? ref}) FROM "${from}")`;
40
+ };
41
+
42
+ // iterate over select clauses and analyze expressions
43
+ for (const { alias, expr } of q._select) {
44
+ // bail if there is an aggregate we can't analyze
45
+ // a value > 1 indicates an aggregate in verbatim text
46
+ if (isAggregateExpression(expr) > 1) return null;
47
+
48
+ const nodes = collectAggregates(expr);
49
+ if (nodes.length === 0) {
50
+ // if no aggregates, expr is a groupby dimension
51
+ group.push(alias);
52
+ preagg[alias] = expr;
53
+ } else {
54
+ for (const node of nodes) {
55
+ // bail if distinct aggregate
56
+ if (node.isDistinct) return null;
57
+
58
+ // bail if aggregate function is unsupported
59
+ // otherwise add output aggregate to rewrite map
60
+ const agg = sufficientStatistics(node, preagg, avg);
61
+ if (!agg) return null;
62
+ aggrs.set(node, agg);
63
+ }
64
+
65
+ // rewrite original select clause to use preaggregates
66
+ output[alias] = rewrite(expr, aggrs);
67
+ }
68
+ }
69
+
70
+ // bail if the query has no aggregates
71
+ if (!aggrs.size) return null;
72
+
73
+ return { group, preagg, output };
74
+ }
75
+
76
+ /**
77
+ * Identify a shared base (source) query and extract a value from it.
78
+ * This method is used to find a shared base table name or extract
79
+ * the original column name within a base table.
80
+ * @param {Query} query The input query.
81
+ * @param {(q: SelectQuery) => any} get A getter function to extract
82
+ * a value from a base query.
83
+ * @returns {string | undefined | NaN} the base query value, or
84
+ * `undefined` if there is no source table, or `NaN` if the
85
+ * query operates over multiple source tables.
86
+ */
87
+ function getBase(query, get) {
88
+ const subq = query.subqueries;
89
+
90
+ // select query
91
+ if (isSelectQuery(query) && subq.length === 0) {
92
+ return get(query);
93
+ }
94
+
95
+ // handle set operations / subqueries
96
+ const base = getBase(subq[0], get);
97
+ for (let i = 1; i < subq.length; ++i) {
98
+ const value = getBase(subq[i], get);
99
+ if (value === undefined) continue;
100
+ if (value !== base) return NaN;
101
+ }
102
+ return base;
103
+ }