@uwdata/mosaic-core 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/mosaic-core.js +11613 -10856
- package/dist/mosaic-core.min.js +7 -7
- package/dist/types/Coordinator.d.ts +169 -0
- package/dist/types/MosaicClient.d.ts +94 -0
- package/dist/types/Param.d.ts +47 -0
- package/dist/types/QueryConsolidator.d.ts +9 -0
- package/dist/types/QueryManager.d.ts +64 -0
- package/dist/types/Selection.d.ts +224 -0
- package/dist/types/SelectionClause.d.ts +105 -0
- package/dist/types/connectors/rest.d.ts +17 -0
- package/dist/types/connectors/socket.d.ts +18 -0
- package/dist/types/connectors/wasm.d.ts +16 -0
- package/dist/types/index.d.ts +25 -0
- package/dist/types/preagg/PreAggregator.d.ts +178 -0
- package/dist/types/preagg/preagg-columns.d.ts +14 -0
- package/dist/types/preagg/sufficient-statistics.d.ts +13 -0
- package/dist/types/util/AsyncDispatch.d.ts +100 -0
- package/dist/types/util/cache.d.ts +13 -0
- package/dist/types/util/decode-ipc.d.ts +7 -0
- package/dist/types/util/distinct.d.ts +2 -0
- package/dist/types/util/field-info.d.ts +13 -0
- package/dist/types/util/hash.d.ts +1 -0
- package/dist/types/util/is-arrow-table.d.ts +8 -0
- package/dist/types/util/js-type.d.ts +1 -0
- package/dist/types/util/priority-queue.d.ts +37 -0
- package/dist/types/util/query-result.d.ts +44 -0
- package/dist/types/util/selection-types.d.ts +114 -0
- package/dist/types/util/synchronizer.d.ts +29 -0
- package/dist/types/util/throttle.d.ts +11 -0
- package/dist/types/util/to-data-columns.d.ts +29 -0
- package/dist/types/util/void-logger.d.ts +7 -0
- package/jsconfig.json +11 -0
- package/package.json +10 -8
- package/src/Coordinator.js +14 -14
- package/src/MosaicClient.js +5 -4
- package/src/QueryConsolidator.js +22 -33
- package/src/QueryManager.js +76 -45
- package/src/Selection.js +8 -5
- package/src/SelectionClause.js +20 -23
- package/src/connectors/rest.js +3 -1
- package/src/connectors/socket.js +3 -1
- package/src/connectors/wasm.js +1 -1
- package/src/index.js +13 -0
- package/src/preagg/PreAggregator.js +407 -0
- package/src/preagg/preagg-columns.js +103 -0
- package/src/preagg/sufficient-statistics.js +439 -0
- package/src/util/field-info.js +16 -5
- package/src/util/hash.js +1 -1
- package/src/util/query-result.js +44 -2
- package/src/util/selection-types.ts +3 -3
- package/src/util/throttle.js +11 -9
- package/src/util/void-logger.js +6 -5
- package/tsconfig.json +11 -0
- package/src/DataCubeIndexer.js +0 -378
- package/src/util/index-columns.js +0 -537
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
import { Query, and, asNode, ceil, collectColumns, createTable, float64, floor, isBetween, int32, mul, round, scaleTransform, sub, isSelectQuery, ExprNode, SelectQuery } from '@uwdata/mosaic-sql';
|
|
2
|
+
import { preaggColumns } from './preagg-columns.js';
|
|
3
|
+
import { fnv_hash } from '../util/hash.js';
|
|
4
|
+
|
|
5
|
+
const Skip = { skip: true, result: null };
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @typedef {object} PreAggregateOptions
|
|
9
|
+
* @property {string} [schema] Database schema (namespace) in which to write
|
|
10
|
+
* pre-aggregated materialzied views (default 'mosaic').
|
|
11
|
+
* @property {boolean} [options.enabled=true] Flag to enable or disable the
|
|
12
|
+
* pre-aggregation. This flag can be updated later via the `enabled` property.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Build and query optimized pre-aggregated materaialized views, for fast
|
|
17
|
+
* computation of groupby aggregate queries over compatible client queries
|
|
18
|
+
* and selections. The materialized views contains pre-aggregated data for a
|
|
19
|
+
* Mosaic client, subdivided by possible query values from an active selection
|
|
20
|
+
* clause. These materialized views are database tables that can be queried
|
|
21
|
+
* for rapid updates.
|
|
22
|
+
*
|
|
23
|
+
* Compatible client queries must consist of only groupby dimensions and
|
|
24
|
+
* supported aggregate functions. Compatible selections must contain an active
|
|
25
|
+
* clause that exposes metadata for an interval or point value predicate.
|
|
26
|
+
*
|
|
27
|
+
* Materialized views are written to a dedicated schema (namespace) that
|
|
28
|
+
* can be set using the *schema* constructor option. This schema acts as a
|
|
29
|
+
* persistent cache, and materialized view tables may be used across sessions.
|
|
30
|
+
* The `dropSchema` method issues a query to remove *all* tables within this
|
|
31
|
+
* schema. This may be needed if the original tables have updated data, but
|
|
32
|
+
* should be used with care.
|
|
33
|
+
*/
|
|
34
|
+
export class PreAggregator {
|
|
35
|
+
/**
|
|
36
|
+
* Create a new manager of materialized views of pre-aggregated data.
|
|
37
|
+
* @param {import('../Coordinator.js').Coordinator} coordinator A Mosaic coordinator.
|
|
38
|
+
* @param {PreAggregateOptions} [options] Pre-aggregation options.
|
|
39
|
+
*/
|
|
40
|
+
constructor(coordinator, {
|
|
41
|
+
schema = 'mosaic',
|
|
42
|
+
enabled = true
|
|
43
|
+
} = {}) {
|
|
44
|
+
/** @type {Map<import('../MosaicClient.js').MosaicClient, PreAggregateInfo | Skip | null>} */
|
|
45
|
+
this.entries = new Map();
|
|
46
|
+
this.active = null;
|
|
47
|
+
this.mc = coordinator;
|
|
48
|
+
this._schema = schema;
|
|
49
|
+
this._enabled = enabled;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Set the enabled state of this manager. If false, any local state is
|
|
54
|
+
* cleared and subsequent request calls will return null until re-enabled.
|
|
55
|
+
* This method has no effect on any pre-aggregated tables already in the
|
|
56
|
+
* database.
|
|
57
|
+
* @param {boolean} [state] The enabled state to set.
|
|
58
|
+
*/
|
|
59
|
+
set enabled(state) {
|
|
60
|
+
if (this._enabled !== state) {
|
|
61
|
+
if (!state) this.clear();
|
|
62
|
+
this._enabled = state;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Get the enabled state of this manager.
|
|
68
|
+
* @returns {boolean} The current enabled state.
|
|
69
|
+
*/
|
|
70
|
+
get enabled() {
|
|
71
|
+
return this._enabled;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Set the database schema used for pre-aggregated materialized view tables.
|
|
76
|
+
* Upon changes, any local state is cleared. This method does _not_ drop any
|
|
77
|
+
* existing materialized views, use `dropSchema` before changing the schema
|
|
78
|
+
* to also remove existing materalized views in the database.
|
|
79
|
+
* @param {string} [schema] The schema name to set.
|
|
80
|
+
*/
|
|
81
|
+
set schema(schema) {
|
|
82
|
+
if (this._schema !== schema) {
|
|
83
|
+
this.clear();
|
|
84
|
+
this._schema = schema;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Get the database schema used for pre-aggregated materialized view tables.
|
|
90
|
+
* @returns {string} The current schema name.
|
|
91
|
+
*/
|
|
92
|
+
get schema() {
|
|
93
|
+
return this._schema;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Issues a query through the coordinator to drop the current schema for
|
|
98
|
+
* pre-aggregated materialized views. *All* materialized view tables in the
|
|
99
|
+
* schema will be removed and local state is cleared. Call this method if
|
|
100
|
+
* the underlying base tables have been updated, causing materialized view
|
|
101
|
+
* to become stale and inaccurate. Use this method with care! Once dropped,
|
|
102
|
+
* the schema will be repopulated by future pre-aggregation requests.
|
|
103
|
+
* @returns A query result promise.
|
|
104
|
+
*/
|
|
105
|
+
dropSchema() {
|
|
106
|
+
this.clear();
|
|
107
|
+
return this.mc.exec(`DROP SCHEMA IF EXISTS "${this.schema}" CASCADE`);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Clear the cache of pre-aggregation entries for the current active
|
|
112
|
+
* selection clause. This method does _not_ drop any existing materialized
|
|
113
|
+
* views. Use `dropSchema` to remove existing materialized view tables from
|
|
114
|
+
* the database.
|
|
115
|
+
*/
|
|
116
|
+
clear() {
|
|
117
|
+
this.entries.clear();
|
|
118
|
+
this.active = null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Return pre-aggregation information for the active state of a
|
|
123
|
+
* client-selection pair, or null if the client has unstable filters.
|
|
124
|
+
* This method has multiple possible side effects, including materialized
|
|
125
|
+
* view creation and updating internal caches.
|
|
126
|
+
* @param {import('../MosaicClient.js').MosaicClient} client A Mosaic client.
|
|
127
|
+
* @param {import('../Selection.js').Selection} selection A Mosaic selection
|
|
128
|
+
* to filter the client by.
|
|
129
|
+
* @param {import('../util/selection-types.js').SelectionClause} activeClause
|
|
130
|
+
* A representative active selection clause for which to (possibly) generate
|
|
131
|
+
* materialized views of pre-aggregates.
|
|
132
|
+
* @returns {PreAggregateInfo | Skip | null} Information and query generator
|
|
133
|
+
* for pre-aggregated tables, or null if the client has unstable filters.
|
|
134
|
+
*/
|
|
135
|
+
request(client, selection, activeClause) {
|
|
136
|
+
// if not enabled, do nothing
|
|
137
|
+
if (!this.enabled) return null;
|
|
138
|
+
|
|
139
|
+
const { entries, mc, schema } = this;
|
|
140
|
+
const { source } = activeClause;
|
|
141
|
+
|
|
142
|
+
// if there is no clause source to track, do nothing
|
|
143
|
+
if (!source) return null;
|
|
144
|
+
|
|
145
|
+
// if we have cached active columns, check for updates or exit
|
|
146
|
+
if (this.active) {
|
|
147
|
+
// if the active clause source has changed, clear the state
|
|
148
|
+
// this cancels outstanding requests and clears the local cache
|
|
149
|
+
// a clear also sets this.active to null
|
|
150
|
+
if (this.active.source !== source) this.clear();
|
|
151
|
+
// if we've seen this source and it has unstable filters, do nothing
|
|
152
|
+
if (this.active?.source === null) return null;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// the current active columns cache value
|
|
156
|
+
let { active } = this;
|
|
157
|
+
|
|
158
|
+
// if cached active columns are unset, analyze the active clause
|
|
159
|
+
if (!active) {
|
|
160
|
+
// generate active dimension columns to select over
|
|
161
|
+
// will return an object with null source if it has unstable filters
|
|
162
|
+
this.active = active = activeColumns(activeClause);
|
|
163
|
+
// if the active clause has unstable filters, exit now
|
|
164
|
+
if (active.source === null) return null;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// if we have cached pre-aggregate info, return that
|
|
168
|
+
if (entries.has(client)) {
|
|
169
|
+
return entries.get(client);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// get non-active materialized view columns
|
|
173
|
+
const preaggCols = preaggColumns(client);
|
|
174
|
+
|
|
175
|
+
let info;
|
|
176
|
+
if (!preaggCols) {
|
|
177
|
+
// if client is not indexable, record null info
|
|
178
|
+
info = null;
|
|
179
|
+
} else if (selection.skip(client, activeClause)) {
|
|
180
|
+
// skip client if untouched by cross-filtering
|
|
181
|
+
info = Skip;
|
|
182
|
+
} else {
|
|
183
|
+
// generate materialized view table
|
|
184
|
+
const filter = selection.remove(source).predicate(client);
|
|
185
|
+
info = preaggregateInfo(client.query(filter), active, preaggCols, schema);
|
|
186
|
+
info.result = mc.exec([
|
|
187
|
+
`CREATE SCHEMA IF NOT EXISTS ${schema}`,
|
|
188
|
+
createTable(info.table, info.create, { temp: false })
|
|
189
|
+
]);
|
|
190
|
+
info.result.catch(e => mc.logger().error(e));
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
entries.set(client, info);
|
|
194
|
+
return info;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Determines the active dimension columns to select over. Returns an object
|
|
200
|
+
* with the clause source, column definitions, and a predicate generator
|
|
201
|
+
* function for the active dimensions of a pre-aggregated materialized view.
|
|
202
|
+
* If the active clause is not indexable or is missing metadata, this method
|
|
203
|
+
* returns an object with a null source property.
|
|
204
|
+
* @param {import('../util/selection-types.js').SelectionClause} clause
|
|
205
|
+
* The active selection clause to analyze.
|
|
206
|
+
*/
|
|
207
|
+
function activeColumns(clause) {
|
|
208
|
+
const { source, meta } = clause;
|
|
209
|
+
const clausePred = clause.predicate;
|
|
210
|
+
const clauseCols = collectColumns(clausePred).map(c => c.column);
|
|
211
|
+
let predicate;
|
|
212
|
+
let columns;
|
|
213
|
+
|
|
214
|
+
if (!meta || !clauseCols) {
|
|
215
|
+
return { source: null, columns, predicate };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// @ts-ignore
|
|
219
|
+
const { type, scales, bin, pixelSize = 1 } = meta;
|
|
220
|
+
|
|
221
|
+
if (type === 'point') {
|
|
222
|
+
predicate = x => x;
|
|
223
|
+
columns = Object.fromEntries(
|
|
224
|
+
clauseCols.map(col => [`${col}`, asNode(col)])
|
|
225
|
+
);
|
|
226
|
+
} else if (type === 'interval' && scales) {
|
|
227
|
+
// determine pixel-level binning
|
|
228
|
+
const bins = scales.map(s => binInterval(s, pixelSize, bin));
|
|
229
|
+
|
|
230
|
+
if (bins.some(b => !b)) {
|
|
231
|
+
// bail if a scale type is unsupported
|
|
232
|
+
} else if (bins.length === 1) {
|
|
233
|
+
// selection clause predicate has type BetweenOpNode
|
|
234
|
+
// single interval selection
|
|
235
|
+
predicate = p => p ? isBetween('active0', p.extent.map(bins[0])) : [];
|
|
236
|
+
// @ts-ignore
|
|
237
|
+
columns = { active0: bins[0](clausePred.expr) };
|
|
238
|
+
} else {
|
|
239
|
+
// selection clause predicate has type AndNode<BetweenOpNode>
|
|
240
|
+
// multiple interval selection
|
|
241
|
+
predicate = p => p
|
|
242
|
+
? and(p.clauses.map(
|
|
243
|
+
(c, i) => isBetween(`active${i}`, c.extent.map(bins[i]))
|
|
244
|
+
))
|
|
245
|
+
: [];
|
|
246
|
+
columns = Object.fromEntries(
|
|
247
|
+
// @ts-ignore
|
|
248
|
+
clausePred.clauses.map((p, i) => [`active${i}`, bins[i](p.expr)])
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return { source: columns ? source : null, columns, predicate };
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const BIN = { ceil, round };
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Returns a bin function generator to discretize a selection interval domain.
|
|
260
|
+
* @param {import('../util/selection-types.js').Scale} scale A scale that maps
|
|
261
|
+
* domain values to the output range (typically pixels).
|
|
262
|
+
* @param {number} pixelSize The interactive pixel size. This value indicates
|
|
263
|
+
* the bin step size and may be greater than an actual screen pixel.
|
|
264
|
+
* @param {import('../util/selection-types.js').BinMethod} bin The binning
|
|
265
|
+
* method to apply, one of `floor`, `ceil', or `round`.
|
|
266
|
+
* @returns {(value: any) => ExprNode} A bin function generator.
|
|
267
|
+
*/
|
|
268
|
+
function binInterval(scale, pixelSize, bin) {
|
|
269
|
+
const { type, domain, range, apply, sqlApply } = scaleTransform(scale);
|
|
270
|
+
if (!apply) return; // unsupported scale type
|
|
271
|
+
const binFn = BIN[`${bin}`.toLowerCase()] || floor;
|
|
272
|
+
const lo = apply(Math.min(...domain));
|
|
273
|
+
const hi = apply(Math.max(...domain));
|
|
274
|
+
const s = (type === 'identity'
|
|
275
|
+
? 1
|
|
276
|
+
: Math.abs(range[1] - range[0]) / (hi - lo)) / pixelSize;
|
|
277
|
+
const scalar = s === 1
|
|
278
|
+
? x => x
|
|
279
|
+
: x => mul(float64(s), x);
|
|
280
|
+
const diff = lo === 0
|
|
281
|
+
? x => x
|
|
282
|
+
: x => sub(x, float64(lo));
|
|
283
|
+
return value => int32(binFn(scalar(diff(sqlApply(value)))));
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Generate pre-aggregate query information.
|
|
288
|
+
* @param {SelectQuery} clientQuery The original client query.
|
|
289
|
+
* @param {ReturnType<activeColumns>} active Active (selected) columns.
|
|
290
|
+
* @param {ReturnType<preaggColumns>} preaggCols Pre-aggregation columns.
|
|
291
|
+
* @returns {PreAggregateInfo}
|
|
292
|
+
*/
|
|
293
|
+
function preaggregateInfo(clientQuery, active, preaggCols, schema) {
|
|
294
|
+
const { group, output, preagg } = preaggCols;
|
|
295
|
+
const { columns } = active;
|
|
296
|
+
|
|
297
|
+
// build materialized view construction query
|
|
298
|
+
const query = clientQuery
|
|
299
|
+
.setSelect({ ...preagg, ...columns })
|
|
300
|
+
.groupby(Object.keys(columns));
|
|
301
|
+
|
|
302
|
+
// ensure active clause columns are selected by subqueries
|
|
303
|
+
const [subq] = query.subqueries;
|
|
304
|
+
if (subq) {
|
|
305
|
+
const cols = Object.values(columns)
|
|
306
|
+
.flatMap(c => collectColumns(c).map(c => c.column));
|
|
307
|
+
subqueryPushdown(subq, cols);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// push any having or orderby criteria to output queries
|
|
311
|
+
const having = query._having;
|
|
312
|
+
const order = query._orderby;
|
|
313
|
+
query._having = [];
|
|
314
|
+
query._orderby = [];
|
|
315
|
+
|
|
316
|
+
// generate creation query string and hash id
|
|
317
|
+
const create = query.toString();
|
|
318
|
+
const id = (fnv_hash(create) >>> 0).toString(16);
|
|
319
|
+
const table = `${schema}.preagg_${id}`;
|
|
320
|
+
|
|
321
|
+
// generate preaggregate select query
|
|
322
|
+
const select = Query
|
|
323
|
+
.select(group, output)
|
|
324
|
+
.from(table)
|
|
325
|
+
.groupby(group)
|
|
326
|
+
.having(having)
|
|
327
|
+
.orderby(order);
|
|
328
|
+
|
|
329
|
+
return new PreAggregateInfo({ table, create, active, select });
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Push column selections down to subqueries.
|
|
334
|
+
*/
|
|
335
|
+
function subqueryPushdown(query, cols) {
|
|
336
|
+
const memo = new Set;
|
|
337
|
+
const pushdown = q => {
|
|
338
|
+
if (memo.has(q)) return;
|
|
339
|
+
memo.add(q);
|
|
340
|
+
if (isSelectQuery(q) && q._from.length) {
|
|
341
|
+
q.select(cols);
|
|
342
|
+
}
|
|
343
|
+
q.subqueries.forEach(pushdown);
|
|
344
|
+
};
|
|
345
|
+
pushdown(query);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Metadata and query generator for materialized views of pre-aggregated data.
|
|
350
|
+
* This object provides the information needed to generate and query the
|
|
351
|
+
* materialized views for a client-selection pair relative to a specific
|
|
352
|
+
* active clause and selection state.
|
|
353
|
+
*/
|
|
354
|
+
export class PreAggregateInfo {
|
|
355
|
+
/**
|
|
356
|
+
* Create a new pre-aggregation information instance.
|
|
357
|
+
* @param {object} options Options object.
|
|
358
|
+
* @param {string} options.table The materialized view table name.
|
|
359
|
+
* @param {string} options.create The table creation query.
|
|
360
|
+
* @param {*} options.active Active column information.
|
|
361
|
+
* @param {SelectQuery} options.select Base query for requesting updates
|
|
362
|
+
* using a pre-aggregated materialized view.
|
|
363
|
+
*/
|
|
364
|
+
constructor({ table, create, active, select }) {
|
|
365
|
+
/**
|
|
366
|
+
* The name of the materialized view.
|
|
367
|
+
* @type {string}
|
|
368
|
+
*/
|
|
369
|
+
this.table = table;
|
|
370
|
+
/**
|
|
371
|
+
* The SQL query used to generate the materialized view.
|
|
372
|
+
* @type {string}
|
|
373
|
+
*/
|
|
374
|
+
this.create = create;
|
|
375
|
+
/**
|
|
376
|
+
* A result promise returned for the materialized view creation query.
|
|
377
|
+
* @type {Promise | null}
|
|
378
|
+
*/
|
|
379
|
+
this.result = null;
|
|
380
|
+
/**
|
|
381
|
+
* Definitions and predicate function for the active columns,
|
|
382
|
+
* which are dynamically filtered by the active clause.
|
|
383
|
+
*/
|
|
384
|
+
this.active = active;
|
|
385
|
+
/**
|
|
386
|
+
* Select query (sans where clause) for materialized views.
|
|
387
|
+
* @type {SelectQuery}
|
|
388
|
+
*/
|
|
389
|
+
this.select = select;
|
|
390
|
+
/**
|
|
391
|
+
* Boolean flag indicating a client that should be skipped.
|
|
392
|
+
* This value is always false for a created materialized view.
|
|
393
|
+
* @type {boolean}
|
|
394
|
+
*/
|
|
395
|
+
this.skip = false;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Generate a materialized view query for the given predicate.
|
|
400
|
+
* @param {import('@uwdata/mosaic-sql').ExprNode} predicate The current
|
|
401
|
+
* active clause predicate.
|
|
402
|
+
* @returns {SelectQuery} A materialized view query.
|
|
403
|
+
*/
|
|
404
|
+
query(predicate) {
|
|
405
|
+
return this.select.clone().where(this.active.predicate(predicate));
|
|
406
|
+
}
|
|
407
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { AggregateNode, ExprNode, Query, SelectQuery, collectAggregates, isAggregateExpression, isSelectQuery, isTableRef, rewrite, sql } from '@uwdata/mosaic-sql';
|
|
2
|
+
import { MosaicClient } from '../MosaicClient.js';
|
|
3
|
+
import { sufficientStatistics } from './sufficient-statistics.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Determine pre-aggregation columns for a given Mosaic client.
|
|
7
|
+
* @param {MosaicClient} client The Mosaic client.
|
|
8
|
+
* @returns An object with necessary column data to generate pre-aggregated
|
|
9
|
+
* columns, or null if the client can't be optimized or the client query
|
|
10
|
+
* contains an invalid or unsupported expression.
|
|
11
|
+
*/
|
|
12
|
+
export function preaggColumns(client) {
|
|
13
|
+
if (!client.filterStable) return null;
|
|
14
|
+
const q = client.query();
|
|
15
|
+
|
|
16
|
+
// bail if query is not analyzable
|
|
17
|
+
if (!isSelectQuery(q)) return null;
|
|
18
|
+
|
|
19
|
+
// bail if no base table
|
|
20
|
+
const from = getBase(q, q => {
|
|
21
|
+
const ref = q._from[0]?.expr;
|
|
22
|
+
return isTableRef(ref) ? ref.name : ref;
|
|
23
|
+
});
|
|
24
|
+
if (typeof from !== 'string') return null;
|
|
25
|
+
|
|
26
|
+
/** @type {Map<AggregateNode, ExprNode>} */
|
|
27
|
+
const aggrs = new Map;
|
|
28
|
+
/** @type {Record<string, ExprNode>} */
|
|
29
|
+
const preagg = {};
|
|
30
|
+
/** @type {Record<string, ExprNode>} */
|
|
31
|
+
const output = {};
|
|
32
|
+
/** @type {string[]} */
|
|
33
|
+
const group = []; // list of grouping dimension columns
|
|
34
|
+
|
|
35
|
+
// generate a scalar subquery for a global average
|
|
36
|
+
const avg = ref => {
|
|
37
|
+
const name = ref.column;
|
|
38
|
+
const expr = getBase(q, q => q._select.find(c => c.alias === name)?.expr);
|
|
39
|
+
return sql`(SELECT avg(${expr ?? ref}) FROM "${from}")`;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
// iterate over select clauses and analyze expressions
|
|
43
|
+
for (const { alias, expr } of q._select) {
|
|
44
|
+
// bail if there is an aggregate we can't analyze
|
|
45
|
+
// a value > 1 indicates an aggregate in verbatim text
|
|
46
|
+
if (isAggregateExpression(expr) > 1) return null;
|
|
47
|
+
|
|
48
|
+
const nodes = collectAggregates(expr);
|
|
49
|
+
if (nodes.length === 0) {
|
|
50
|
+
// if no aggregates, expr is a groupby dimension
|
|
51
|
+
group.push(alias);
|
|
52
|
+
preagg[alias] = expr;
|
|
53
|
+
} else {
|
|
54
|
+
for (const node of nodes) {
|
|
55
|
+
// bail if distinct aggregate
|
|
56
|
+
if (node.isDistinct) return null;
|
|
57
|
+
|
|
58
|
+
// bail if aggregate function is unsupported
|
|
59
|
+
// otherwise add output aggregate to rewrite map
|
|
60
|
+
const agg = sufficientStatistics(node, preagg, avg);
|
|
61
|
+
if (!agg) return null;
|
|
62
|
+
aggrs.set(node, agg);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// rewrite original select clause to use preaggregates
|
|
66
|
+
output[alias] = rewrite(expr, aggrs);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// bail if the query has no aggregates
|
|
71
|
+
if (!aggrs.size) return null;
|
|
72
|
+
|
|
73
|
+
return { group, preagg, output };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Identify a shared base (source) query and extract a value from it.
|
|
78
|
+
* This method is used to find a shared base table name or extract
|
|
79
|
+
* the original column name within a base table.
|
|
80
|
+
* @param {Query} query The input query.
|
|
81
|
+
* @param {(q: SelectQuery) => any} get A getter function to extract
|
|
82
|
+
* a value from a base query.
|
|
83
|
+
* @returns {string | undefined | NaN} the base query value, or
|
|
84
|
+
* `undefined` if there is no source table, or `NaN` if the
|
|
85
|
+
* query operates over multiple source tables.
|
|
86
|
+
*/
|
|
87
|
+
function getBase(query, get) {
|
|
88
|
+
const subq = query.subqueries;
|
|
89
|
+
|
|
90
|
+
// select query
|
|
91
|
+
if (isSelectQuery(query) && subq.length === 0) {
|
|
92
|
+
return get(query);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// handle set operations / subqueries
|
|
96
|
+
const base = getBase(subq[0], get);
|
|
97
|
+
for (let i = 1; i < subq.length; ++i) {
|
|
98
|
+
const value = getBase(subq[i], get);
|
|
99
|
+
if (value === undefined) continue;
|
|
100
|
+
if (value !== base) return NaN;
|
|
101
|
+
}
|
|
102
|
+
return base;
|
|
103
|
+
}
|