@uwdata/mosaic-core 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,252 +1,268 @@
1
- import { Query, and, create, isBetween, scaleTransform, sql } from '@uwdata/mosaic-sql';
1
+ import {
2
+ Query, and, asColumn, create, isBetween, scaleTransform, sql
3
+ } from '@uwdata/mosaic-sql';
4
+ import { indexColumns } from './util/index-columns.js';
2
5
  import { fnv_hash } from './util/hash.js';
3
6
 
7
+ const Skip = { skip: true, result: null };
8
+
4
9
  /**
5
10
  * Build and query optimized indices ("data cubes") for fast computation of
6
11
  * groupby aggregate queries over compatible client queries and selections.
7
12
  * A data cube contains pre-aggregated data for a Mosaic client, subdivided
8
- * by possible query values from an active view. Indexes are realized as
9
- * as temporary database tables that can be queried for rapid updates.
10
- * Compatible client queries must pull data from the same backing table and
11
- * must consist of only groupby dimensions and supported aggregates.
12
- * Compatible selections must contain an active clause that exposes a schema
13
- * for an interval or point value predicate.
13
+ * by possible query values from an active selection clause. These cubes are
14
+ * realized as as database tables that can be queried for rapid updates.
15
+ * Compatible client queries must consist of only groupby dimensions and
16
+ * supported aggregate functions. Compatible selections must contain an active
17
+ * clause that exposes metadata for an interval or point value predicate.
14
18
  */
15
19
  export class DataCubeIndexer {
16
20
  /**
17
- *
18
- * @param {import('./Coordinator.js').Coordinator} mc a Mosaic coordinator
19
- * @param {*} options Options hash to configure the data cube indexes and pass selections to the coordinator.
21
+ * Create a new data cube index table manager.
22
+ * @param {import('./Coordinator.js').Coordinator} coordinator A Mosaic coordinator.
23
+ * @param {object} [options] Indexer options.
24
+ * @param {boolean} [options.enabled=true] Flag to enable/disable indexer.
25
+ * @param {boolean} [options.temp=true] Flag to indicate if generated data
26
+ * cube index tables should be temporary tables.
20
27
  */
21
- constructor(mc, { selection, temp = true }) {
22
- /** @type import('./Coordinator.js').Coordinator */
23
- this.mc = mc;
24
- this.selection = selection;
28
+ constructor(coordinator, {
29
+ enabled = true,
30
+ temp = true
31
+ } = {}) {
32
+ /** @type {Map<import('./MosaicClient.js').MosaicClient, DataCubeInfo | Skip | null>} */
33
+ this.indexes = new Map();
34
+ this.active = null;
25
35
  this.temp = temp;
26
- this.reset();
36
+ this.mc = coordinator;
37
+ this._enabled = enabled;
27
38
  }
28
39
 
29
- reset() {
30
- this.enabled = false;
31
- this.clients = null;
32
- this.indices = null;
33
- this.activeView = null;
40
+ /**
41
+ * Set the enabled state of this indexer. If false, any cached state is
42
+ * cleared and subsequent index calls will return null until re-enabled.
43
+ * @param {boolean} state The enabled state.
44
+ */
45
+ enabled(state) {
46
+ if (state === undefined) {
47
+ return this._enabled;
48
+ } else if (this._enabled !== state) {
49
+ if (!state) this.clear();
50
+ this._enabled = state;
51
+ }
34
52
  }
35
53
 
54
+ /**
55
+ * Clear the cache of data cube index table entries for the current active
56
+ * selection clause. This method will also cancel any queued data cube table
57
+ * creation queries that have not yet been submitted to the database. This
58
+ * method does _not_ drop any existing data cube tables.
59
+ */
36
60
  clear() {
37
- if (this.indices) {
38
- this.mc.cancel(Array.from(this.indices.values(), index => index.result));
39
- this.indices = null;
40
- }
61
+ this.mc.cancel(Array.from(this.indexes.values(), info => info?.result));
62
+ this.indexes.clear();
63
+ this.active = null;
41
64
  }
42
65
 
43
- index(clients, active) {
44
- if (this.clients !== clients) {
45
- // test client views for compatibility
46
- const cols = Array.from(clients, getIndexColumns);
47
- const from = cols[0]?.from;
48
- this.enabled = cols.every(c => c && c.from === from);
49
- this.clients = clients;
50
- this.activeView = null;
51
- this.clear();
52
- }
53
- if (!this.enabled) return false; // client views are not indexable
54
-
55
- active = active || this.selection.active;
56
- const { source } = active;
57
- // exit early if indexes already set up for active view
58
- if (source && source === this.activeView?.source) return true;
59
-
60
- this.clear();
61
- if (!source) return false; // nothing to work with
62
- const activeView = this.activeView = getActiveView(active);
63
- if (!activeView) return false; // active selection clause not compatible
64
-
65
- this.mc.logger().warn('DATA CUBE INDEX CONSTRUCTION');
66
-
67
- // create a selection with the active source removed
68
- const sel = this.selection.remove(source);
69
-
70
- // generate data cube indices
71
- const indices = this.indices = new Map;
72
- const { mc, temp } = this;
73
- for (const client of clients) {
74
- if (sel.skip(client, active)) continue;
75
- const index = getIndexColumns(client);
76
-
77
- // build index construction query
78
- const query = client.query(sel.predicate(client))
79
- .select({ ...activeView.columns, ...index.aux })
80
- .groupby(Object.keys(activeView.columns));
81
-
82
- // ensure active view columns are selected by subqueries
83
- const [subq] = query.subqueries;
84
- if (subq) {
85
- const cols = Object.values(activeView.columns).map(c => c.columns[0]);
86
- subqueryPushdown(subq, cols);
87
- }
88
-
89
- // push orderby criteria to later cube queries
90
- const order = query.orderby();
91
- query.query.orderby = [];
92
-
93
- const sql = query.toString();
94
- const id = (fnv_hash(sql) >>> 0).toString(16);
95
- const table = `cube_index_${id}`;
96
- const result = mc.exec(create(table, sql, { temp }));
97
- indices.set(client, { table, result, order, ...index });
66
+ /**
67
+ * Return data cube index table information for the active state of a
68
+ * client-selection pair, or null if the client is not indexable. This
69
+ * method has multiple possible side effects, including data cube table
70
+ * generation and updating internal caches.
71
+ * @param {import('./MosaicClient.js').MosaicClient} client A Mosaic client.
72
+ * @param {import('./Selection.js').Selection} selection A Mosaic selection
73
+ * to filter the client by.
74
+ * @param {import('./util/selection-types.js').SelectionClause} activeClause
75
+ * A representative active selection clause for which to (possibly) generate
76
+ * data cube index tables.
77
+ * @returns {DataCubeInfo | Skip | null} Data cube index table
78
+ * information and query generator, or null if the client is not indexable.
79
+ */
80
+ index(client, selection, activeClause) {
81
+ // if not enabled, do nothing
82
+ if (!this._enabled) return null;
83
+
84
+ const { indexes, mc, temp } = this;
85
+ const { source } = activeClause;
86
+
87
+ // if there is no clause source to track, do nothing
88
+ if (!source) return null;
89
+
90
+ // if we have cached active columns, check for updates or exit
91
+ if (this.active) {
92
+ // if the active clause source has changed, clear indexer state
93
+ // this cancels outstanding requests and clears the index cache
94
+ // a clear also sets this.active to null
95
+ if (this.active.source !== source) this.clear();
96
+ // if we've seen this source and it's not indexable, do nothing
97
+ if (this.active?.source === null) return null;
98
98
  }
99
99
 
100
- // index creation successful
101
- return true;
102
- }
100
+ // the current active columns cache value
101
+ let { active } = this;
103
102
 
104
- async update() {
105
- const { clients, selection, activeView } = this;
106
- const filter = activeView.predicate(selection.active.predicate);
107
- return Promise.all(
108
- Array.from(clients).map(client => this.updateClient(client, filter))
109
- );
110
- }
103
+ // if cached active columns are unset, analyze the active clause
104
+ if (!active) {
105
+ // generate active data cube dimension columns to select over
106
+ // will return an object with null source if not indexable
107
+ this.active = active = activeColumns(activeClause);
108
+ // if the active clause is not indexable, exit now
109
+ if (active.source === null) return null;
110
+ }
111
+
112
+ // if we have cached data cube index table info, return that
113
+ if (indexes.has(client)) {
114
+ return indexes.get(client);
115
+ }
111
116
 
112
- async updateClient(client, filter) {
113
- const index = this.indices.get(client);
114
- if (!index) return;
117
+ // get non-active data cube index table columns
118
+ const indexCols = indexColumns(client);
115
119
 
116
- if (!filter) {
117
- filter = this.activeView.predicate(this.selection.active.predicate);
120
+ let info;
121
+ if (!indexCols) {
122
+ // if client is not indexable, record null index
123
+ info = null;
124
+ } else if (selection.skip(client, activeClause)) {
125
+ // skip client if untouched by cross-filtering
126
+ info = Skip;
127
+ } else {
128
+ // generate data cube index table
129
+ const filter = selection.remove(source).predicate(client);
130
+ info = dataCubeInfo(client.query(filter), active, indexCols);
131
+ info.result = mc.exec(create(info.table, info.create, { temp }));
132
+ info.result.catch(e => mc.logger().error(e));
118
133
  }
119
134
 
120
- const { table, dims, aggr, order = [] } = index;
121
- const query = Query
122
- .select(dims, aggr)
123
- .from(table)
124
- .groupby(dims)
125
- .where(filter)
126
- .orderby(order);
127
- return this.mc.updateClient(client, query);
135
+ indexes.set(client, info);
136
+ return info;
128
137
  }
129
138
  }
130
139
 
131
- function getActiveView(clause) {
132
- const { source, schema } = clause;
133
- let columns = clause.predicate?.columns;
134
- if (!schema || !columns) return null;
135
- const { type, scales, pixelSize = 1 } = schema;
140
+ /**
141
+ * Determines the active data cube dimension columns to select over. Returns
142
+ * an object with the clause source, column definitions, and a predicate
143
+ * generator function for the active dimensions of a data cube index table. If
144
+ * the active clause is not indexable or is missing metadata, this method
145
+ * returns an object with a null source property.
146
+ * @param {import('./util/selection-types.js').SelectionClause} clause The
147
+ * active selection clause to analyze.
148
+ */
149
+ function activeColumns(clause) {
150
+ const { source, meta } = clause;
151
+ const clausePred = clause.predicate;
152
+ const clauseCols = clausePred?.columns;
136
153
  let predicate;
154
+ let columns;
155
+
156
+ if (!meta || !clauseCols) {
157
+ return { source: null, columns, predicate };
158
+ }
137
159
 
138
- if (type === 'interval' && scales) {
139
- const bins = scales.map(s => binInterval(s, pixelSize));
140
- if (bins.some(b => b == null)) return null; // unsupported scale type
160
+ // @ts-ignore
161
+ const { type, scales, bin, pixelSize = 1 } = meta;
141
162
 
142
- if (bins.length === 1) {
163
+ if (type === 'point') {
164
+ predicate = x => x;
165
+ columns = Object.fromEntries(
166
+ clauseCols.map(col => [`${col}`, asColumn(col)])
167
+ );
168
+ } else if (type === 'interval' && scales) {
169
+ // determine pixel-level binning
170
+ const bins = scales.map(s => binInterval(s, pixelSize, bin));
171
+
172
+ if (bins.some(b => !b)) {
173
+ // bail if a scale type is unsupported
174
+ } else if (bins.length === 1) {
175
+ // single interval selection
143
176
  predicate = p => p ? isBetween('active0', p.range.map(bins[0])) : [];
144
- columns = { active0: bins[0](clause.predicate.field) };
177
+ // @ts-ignore
178
+ columns = { active0: bins[0](clausePred.field) };
145
179
  } else {
180
+ // multiple interval selection
146
181
  predicate = p => p
147
- ? and(p.children.map(({ range }, i) => isBetween(`active${i}`, range.map(bins[i]))))
182
+ ? and(p.children.map(
183
+ ({ range }, i) => isBetween(`active${i}`, range.map(bins[i]))
184
+ ))
148
185
  : [];
149
186
  columns = Object.fromEntries(
150
- clause.predicate.children.map((p, i) => [`active${i}`, bins[i](p.field)])
187
+ // @ts-ignore
188
+ clausePred.children.map((p, i) => [`active${i}`, bins[i](p.field)])
151
189
  );
152
190
  }
153
- } else if (type === 'point') {
154
- predicate = x => x;
155
- columns = Object.fromEntries(columns.map(col => [col.toString(), col]));
156
- } else {
157
- return null; // unsupported type
158
191
  }
159
192
 
160
- return { source, columns, predicate };
193
+ return { source: columns ? source : null, columns, predicate };
161
194
  }
162
195
 
163
- function binInterval(scale, pixelSize) {
164
- const { apply, sqlApply } = scaleTransform(scale);
165
- if (apply) {
166
- const { domain, range } = scale;
167
- const lo = apply(Math.min(...domain));
168
- const hi = apply(Math.max(...domain));
169
- const a = (Math.abs(range[1] - range[0]) / (hi - lo)) / pixelSize;
170
- const s = pixelSize === 1 ? '' : `${pixelSize}::INTEGER * `;
171
- return value => sql`${s}FLOOR(${a}::DOUBLE * (${sqlApply(value)} - ${lo}::DOUBLE))::INTEGER`;
172
- }
196
+ const BIN = { ceil: 'CEIL', round: 'ROUND' };
197
+
198
+ /**
199
+ * Returns a bin function generator to discretize a selection interval domain.
200
+ * @param {import('./util/selection-types.js').Scale} scale A scale that maps
201
+ * domain values to the output range (typically pixels).
202
+ * @param {number} pixelSize The interactive pixel size. This value indicates
203
+ * the bin step size and may be greater than an actual screen pixel.
204
+ * @param {import('./util/selection-types.js').BinMethod} bin The binning
205
+ * method to apply, one of `floor`, `ceil', or `round`.
206
+ * @returns {(value: any) => import('@uwdata/mosaic-sql').SQLExpression}
207
+ * A bin function generator.
208
+ */
209
+ function binInterval(scale, pixelSize, bin) {
210
+ const { type, domain, range, apply, sqlApply } = scaleTransform(scale);
211
+ if (!apply) return; // unsupported scale type
212
+ const fn = BIN[`${bin}`.toLowerCase()] || 'FLOOR';
213
+ const lo = apply(Math.min(...domain));
214
+ const hi = apply(Math.max(...domain));
215
+ const a = type === 'identity' ? 1 : Math.abs(range[1] - range[0]) / (hi - lo);
216
+ const s = a / pixelSize === 1 ? '' : `${a / pixelSize}::DOUBLE * `;
217
+ const d = lo === 0 ? '' : ` - ${lo}::DOUBLE`;
218
+ return value => sql`${fn}(${s}(${sqlApply(value)}${d}))::INTEGER`;
173
219
  }
174
220
 
175
- const NO_INDEX = { from: NaN };
176
-
177
- function getIndexColumns(client) {
178
- if (!client.filterIndexable) return NO_INDEX;
179
- const q = client.query();
180
- const from = getBaseTable(q);
181
- if (!from || !q.groupby) return NO_INDEX;
182
- const g = new Set(q.groupby().map(c => c.column));
183
-
184
- const aggr = [];
185
- const dims = [];
186
- const aux = {}; // auxiliary columns needed by aggregates
187
- let auxAs;
188
-
189
- for (const entry of q.select()) {
190
- const { as, expr: { aggregate, args } } = entry;
191
- const op = aggregate?.toUpperCase?.();
192
- switch (op) {
193
- case 'COUNT':
194
- case 'SUM':
195
- aggr.push({ [as]: sql`SUM("${as}")::DOUBLE` });
196
- break;
197
- case 'AVG':
198
- aux[auxAs = '__count__'] = sql`COUNT(*)`;
199
- aggr.push({ [as]: sql`(SUM("${as}" * ${auxAs}) / SUM(${auxAs}))::DOUBLE` });
200
- break;
201
- case 'ARG_MAX':
202
- aux[auxAs = `__max_${as}__`] = sql`MAX(${args[1]})`;
203
- aggr.push({ [as]: sql`ARG_MAX("${as}", ${auxAs})` });
204
- break;
205
- case 'ARG_MIN':
206
- aux[auxAs = `__min_${as}__`] = sql`MIN(${args[1]})`;
207
- aggr.push({ [as]: sql`ARG_MIN("${as}", ${auxAs})` });
208
- break;
209
-
210
- // aggregates that commute directly
211
- case 'MAX':
212
- case 'MIN':
213
- case 'BIT_AND':
214
- case 'BIT_OR':
215
- case 'BIT_XOR':
216
- case 'BOOL_AND':
217
- case 'BOOL_OR':
218
- case 'PRODUCT':
219
- aggr.push({ [as]: sql`${op}("${as}")` });
220
- break;
221
- default:
222
- if (g.has(as)) dims.push(as);
223
- else return null;
224
- }
221
+ /**
222
+ * Generate data cube table query information.
223
+ * @param {Query} clientQuery The original client query.
224
+ * @param {*} active Active (selected) column definitions.
225
+ * @param {*} indexCols Data cube index column definitions.
226
+ * @returns {DataCubeInfo}
227
+ */
228
+ function dataCubeInfo(clientQuery, active, indexCols) {
229
+ const { dims, aggr, aux } = indexCols;
230
+ const { columns } = active;
231
+
232
+ // build index table construction query
233
+ const query = clientQuery
234
+ .select({ ...columns, ...aux })
235
+ .groupby(Object.keys(columns));
236
+
237
+ // ensure active clause columns are selected by subqueries
238
+ const [subq] = query.subqueries;
239
+ if (subq) {
240
+ const cols = Object.values(columns).flatMap(c => c.columns);
241
+ subqueryPushdown(subq, cols);
225
242
  }
226
243
 
227
- return { aggr, dims, aux, from };
228
- }
244
+ // push orderby criteria to later cube queries
245
+ const order = query.orderby();
246
+ query.query.orderby = [];
229
247
 
230
- function getBaseTable(query) {
231
- const subq = query.subqueries;
248
+ // generate creation query string and hash id
249
+ const create = query.toString();
250
+ const id = (fnv_hash(create) >>> 0).toString(16);
251
+ const table = `cube_index_${id}`;
232
252
 
233
- // select query
234
- if (query.select) {
235
- const from = query.from();
236
- if (!from.length) return undefined;
237
- if (subq.length === 0) return from[0].from.table;
238
- }
253
+ // generate data cube select query
254
+ const select = Query
255
+ .select(dims, aggr)
256
+ .from(table)
257
+ .groupby(dims)
258
+ .orderby(order);
239
259
 
240
- // handle set operations / subqueries
241
- const base = getBaseTable(subq[0]);
242
- for (let i = 1; i < subq.length; ++i) {
243
- const from = getBaseTable(subq[i]);
244
- if (from === undefined) continue;
245
- if (from !== base) return NaN;
246
- }
247
- return base;
260
+ return new DataCubeInfo({ table, create, active, select });
248
261
  }
249
262
 
263
+ /**
264
+ * Push column selections down to subqueries.
265
+ */
250
266
  function subqueryPushdown(query, cols) {
251
267
  const memo = new Set;
252
268
  const pushdown = q => {
@@ -259,3 +275,46 @@ function subqueryPushdown(query, cols) {
259
275
  };
260
276
  pushdown(query);
261
277
  }
278
+
279
+ /**
280
+ * Metadata and query generator for a data cube index table. This
281
+ * object provides the information needed to generate and query
282
+ * a data cube index table for a client-selection pair relative to
283
+ * a specific active clause and selection state.
284
+ */
285
+ export class DataCubeInfo {
286
+ /**
287
+ * Create a new DataCubeInfo instance.
288
+ * @param {object} options
289
+ */
290
+ constructor({ table, create, active, select } = {}) {
291
+ /** The name of the data cube index table. */
292
+ this.table = table;
293
+ /** The SQL query used to generate the data cube index table. */
294
+ this.create = create;
295
+ /** A result promise returned for the data cube creation query. */
296
+ this.result = null;
297
+ /**
298
+ * Definitions and predicate function for the active columns,
299
+ * which are dynamically filtered by the active clause.
300
+ */
301
+ this.active = active;
302
+ /** Select query (sans where clause) for data cube tables. */
303
+ this.select = select;
304
+ /**
305
+ * Boolean flag indicating a client that should be skipped.
306
+ * This value is always false for completed data cube info.
307
+ */
308
+ this.skip = false;
309
+ }
310
+
311
+ /**
312
+ * Generate a data cube index table query for the given predicate.
313
+ * @param {import('@uwdata/mosaic-sql').SQLExpression} predicate The current
314
+ * active clause predicate.
315
+ * @returns {Query} A data cube index table query.
316
+ */
317
+ query(predicate) {
318
+ return this.select.clone().where(this.active.predicate(predicate));
319
+ }
320
+ }
@@ -94,8 +94,8 @@ export class MosaicClient {
94
94
  * @param {*} error
95
95
  * @returns {this}
96
96
  */
97
- queryError(error) {
98
- console.error(error);
97
+ queryError(error) { // eslint-disable-line no-unused-vars
98
+ // do nothing, the coordinator logs the error
99
99
  return this;
100
100
  }
101
101
 
@@ -122,7 +122,7 @@ export class MosaicClient {
122
122
  /**
123
123
  * Requests a client update.
124
124
  * For example to (re-)render an interface component.
125
- *
125
+ *
126
126
  * @returns {this | Promise<any>}
127
127
  */
128
128
  update() {
@@ -1,5 +1,5 @@
1
1
  import { Query, Ref, isDescribeQuery } from '@uwdata/mosaic-sql';
2
- import { queryResult } from './util/query-result.js';
2
+ import { QueryResult } from './util/query-result.js';
3
3
 
4
4
  function wait(callback) {
5
5
  const method = typeof requestAnimationFrame !== 'undefined'
@@ -133,7 +133,7 @@ function consolidate(group, enqueue, record) {
133
133
  record: false,
134
134
  query: (group.query = consolidatedQuery(group, record))
135
135
  },
136
- result: (group.result = queryResult())
136
+ result: (group.result = new QueryResult())
137
137
  });
138
138
  } else {
139
139
  // issue queries directly