@uwdata/mosaic-core 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +3 -1
  2. package/dist/mosaic-core.js +11613 -10856
  3. package/dist/mosaic-core.min.js +7 -7
  4. package/dist/types/Coordinator.d.ts +169 -0
  5. package/dist/types/MosaicClient.d.ts +94 -0
  6. package/dist/types/Param.d.ts +47 -0
  7. package/dist/types/QueryConsolidator.d.ts +9 -0
  8. package/dist/types/QueryManager.d.ts +64 -0
  9. package/dist/types/Selection.d.ts +224 -0
  10. package/dist/types/SelectionClause.d.ts +105 -0
  11. package/dist/types/connectors/rest.d.ts +17 -0
  12. package/dist/types/connectors/socket.d.ts +18 -0
  13. package/dist/types/connectors/wasm.d.ts +16 -0
  14. package/dist/types/index.d.ts +25 -0
  15. package/dist/types/preagg/PreAggregator.d.ts +178 -0
  16. package/dist/types/preagg/preagg-columns.d.ts +14 -0
  17. package/dist/types/preagg/sufficient-statistics.d.ts +13 -0
  18. package/dist/types/util/AsyncDispatch.d.ts +100 -0
  19. package/dist/types/util/cache.d.ts +13 -0
  20. package/dist/types/util/decode-ipc.d.ts +7 -0
  21. package/dist/types/util/distinct.d.ts +2 -0
  22. package/dist/types/util/field-info.d.ts +13 -0
  23. package/dist/types/util/hash.d.ts +1 -0
  24. package/dist/types/util/is-arrow-table.d.ts +8 -0
  25. package/dist/types/util/js-type.d.ts +1 -0
  26. package/dist/types/util/priority-queue.d.ts +37 -0
  27. package/dist/types/util/query-result.d.ts +44 -0
  28. package/dist/types/util/selection-types.d.ts +114 -0
  29. package/dist/types/util/synchronizer.d.ts +29 -0
  30. package/dist/types/util/throttle.d.ts +11 -0
  31. package/dist/types/util/to-data-columns.d.ts +29 -0
  32. package/dist/types/util/void-logger.d.ts +7 -0
  33. package/jsconfig.json +11 -0
  34. package/package.json +10 -8
  35. package/src/Coordinator.js +14 -14
  36. package/src/MosaicClient.js +5 -4
  37. package/src/QueryConsolidator.js +22 -33
  38. package/src/QueryManager.js +76 -45
  39. package/src/Selection.js +8 -5
  40. package/src/SelectionClause.js +20 -23
  41. package/src/connectors/rest.js +3 -1
  42. package/src/connectors/socket.js +3 -1
  43. package/src/connectors/wasm.js +1 -1
  44. package/src/index.js +13 -0
  45. package/src/preagg/PreAggregator.js +407 -0
  46. package/src/preagg/preagg-columns.js +103 -0
  47. package/src/preagg/sufficient-statistics.js +439 -0
  48. package/src/util/field-info.js +16 -5
  49. package/src/util/hash.js +1 -1
  50. package/src/util/query-result.js +44 -2
  51. package/src/util/selection-types.ts +3 -3
  52. package/src/util/throttle.js +11 -9
  53. package/src/util/void-logger.js +6 -5
  54. package/tsconfig.json +11 -0
  55. package/src/DataCubeIndexer.js +0 -378
  56. package/src/util/index-columns.js +0 -537
@@ -1,378 +0,0 @@
1
- import {
2
- Query, and, asColumn, create, isBetween, scaleTransform, sql
3
- } from '@uwdata/mosaic-sql';
4
- import { indexColumns } from './util/index-columns.js';
5
- import { fnv_hash } from './util/hash.js';
6
-
7
- const Skip = { skip: true, result: null };
8
-
9
- /**
10
- * @typedef {object} DataCubeIndexerOptions
11
- * @property {string} [schema] Database schema (namespace) in which to write
12
- * data cube index tables (default 'mosaic').
13
- * @property {boolean} [options.enabled=true] Flag to enable or disable the
14
- * indexer. This setting can later be updated via the `enabled` method.
15
- */
16
-
17
- /**
18
- * Build and query optimized indices ("data cubes") for fast computation of
19
- * groupby aggregate queries over compatible client queries and selections.
20
- * A data cube contains pre-aggregated data for a Mosaic client, subdivided
21
- * by possible query values from an active selection clause. These cubes are
22
- * realized as as database tables that can be queried for rapid updates.
23
- *
24
- * Compatible client queries must consist of only groupby dimensions and
25
- * supported aggregate functions. Compatible selections must contain an active
26
- * clause that exposes metadata for an interval or point value predicate.
27
- *
28
- * Data cube index tables are written to a dedicated schema (namespace) that
29
- * can be set using the *schema* constructor option. This schema acts as a
30
- * persistent cache, and index tables may be used across sessions. The
31
- * `dropIndexTables` method issues a query to remove *all* tables within
32
- * this schema. This may be needed if the original tables have updated data,
33
- * but should be used with care.
34
- */
35
- export class DataCubeIndexer {
36
- /**
37
- * Create a new data cube index table manager.
38
- * @param {import('./Coordinator.js').Coordinator} coordinator A Mosaic coordinator.
39
- * @param {DataCubeIndexerOptions} [options] Data cube indexer options.
40
- */
41
- constructor(coordinator, {
42
- schema = 'mosaic',
43
- enabled = true
44
- } = {}) {
45
- /** @type {Map<import('./MosaicClient.js').MosaicClient, DataCubeInfo | Skip | null>} */
46
- this.indexes = new Map();
47
- this.active = null;
48
- this.mc = coordinator;
49
- this._schema = schema;
50
- this._enabled = enabled;
51
- }
52
-
53
- /**
54
- * Set the enabled state of this indexer. If false, any local state is
55
- * cleared and subsequent index calls will return null until re-enabled.
56
- * This method has no effect on any index tables already in the database.
57
- * @param {boolean} [state] The enabled state to set.
58
- */
59
- set enabled(state) {
60
- if (this._enabled !== state) {
61
- if (!state) this.clear();
62
- this._enabled = state;
63
- }
64
- }
65
-
66
- /**
67
- * Get the enabled state of this indexer.
68
- * @returns {boolean} The current enabled state.
69
- */
70
- get enabled() {
71
- return this._enabled;
72
- }
73
-
74
- /**
75
- * Set the database schema used by this indexer. Upon changes, any local
76
- * state is cleared. This method does _not_ drop any existing data cube
77
- * tables, use `dropIndexTables` before changing the schema to also remove
78
- * existing index tables in the database.
79
- * @param {string} [schema] The schema name to set.
80
- */
81
- set schema(schema) {
82
- if (this._schema !== schema) {
83
- this.clear();
84
- this._schema = schema;
85
- }
86
- }
87
-
88
- /**
89
- * Get the database schema used by this indexer.
90
- * @returns {string} The current schema name.
91
- */
92
- get schema() {
93
- return this._schema;
94
- }
95
-
96
- /**
97
- * Issues a query through the coordinator to drop the current index table
98
- * schema. *All* tables in the schema will be removed and local state is
99
- * cleared. Call this method if the underlying base tables have been updated,
100
- * causing derived index tables to become stale and inaccurate. Use this
101
- * method with care! Once dropped, the schema will be repopulated by future
102
- * data cube indexer requests.
103
- * @returns A query result promise.
104
- */
105
- dropIndexTables() {
106
- this.clear();
107
- return this.mc.exec(`DROP SCHEMA IF EXISTS "${this.schema}" CASCADE`);
108
- }
109
-
110
- /**
111
- * Clear the cache of data cube index table entries for the current active
112
- * selection clause. This method does _not_ drop any existing data cube
113
- * tables. Use `dropIndexTables` to remove existing index tables from the
114
- * database.
115
- */
116
- clear() {
117
- this.indexes.clear();
118
- this.active = null;
119
- }
120
-
121
- /**
122
- * Return data cube index table information for the active state of a
123
- * client-selection pair, or null if the client is not indexable. This
124
- * method has multiple possible side effects, including data cube table
125
- * generation and updating internal caches.
126
- * @param {import('./MosaicClient.js').MosaicClient} client A Mosaic client.
127
- * @param {import('./Selection.js').Selection} selection A Mosaic selection
128
- * to filter the client by.
129
- * @param {import('./util/selection-types.js').SelectionClause} activeClause
130
- * A representative active selection clause for which to (possibly) generate
131
- * data cube index tables.
132
- * @returns {DataCubeInfo | Skip | null} Data cube index table
133
- * information and query generator, or null if the client is not indexable.
134
- */
135
- index(client, selection, activeClause) {
136
- // if not enabled, do nothing
137
- if (!this.enabled) return null;
138
-
139
- const { indexes, mc, schema } = this;
140
- const { source } = activeClause;
141
-
142
- // if there is no clause source to track, do nothing
143
- if (!source) return null;
144
-
145
- // if we have cached active columns, check for updates or exit
146
- if (this.active) {
147
- // if the active clause source has changed, clear indexer state
148
- // this cancels outstanding requests and clears the index cache
149
- // a clear also sets this.active to null
150
- if (this.active.source !== source) this.clear();
151
- // if we've seen this source and it's not indexable, do nothing
152
- if (this.active?.source === null) return null;
153
- }
154
-
155
- // the current active columns cache value
156
- let { active } = this;
157
-
158
- // if cached active columns are unset, analyze the active clause
159
- if (!active) {
160
- // generate active data cube dimension columns to select over
161
- // will return an object with null source if not indexable
162
- this.active = active = activeColumns(activeClause);
163
- // if the active clause is not indexable, exit now
164
- if (active.source === null) return null;
165
- }
166
-
167
- // if we have cached data cube index table info, return that
168
- if (indexes.has(client)) {
169
- return indexes.get(client);
170
- }
171
-
172
- // get non-active data cube index table columns
173
- const indexCols = indexColumns(client);
174
-
175
- let info;
176
- if (!indexCols) {
177
- // if client is not indexable, record null index
178
- info = null;
179
- } else if (selection.skip(client, activeClause)) {
180
- // skip client if untouched by cross-filtering
181
- info = Skip;
182
- } else {
183
- // generate data cube index table
184
- const filter = selection.remove(source).predicate(client);
185
- info = dataCubeInfo(client.query(filter), active, indexCols, schema);
186
- info.result = mc.exec([
187
- `CREATE SCHEMA IF NOT EXISTS ${schema}`,
188
- create(info.table, info.create, { temp: false })
189
- ]);
190
- info.result.catch(e => mc.logger().error(e));
191
- }
192
-
193
- indexes.set(client, info);
194
- return info;
195
- }
196
- }
197
-
198
- /**
199
- * Determines the active data cube dimension columns to select over. Returns
200
- * an object with the clause source, column definitions, and a predicate
201
- * generator function for the active dimensions of a data cube index table. If
202
- * the active clause is not indexable or is missing metadata, this method
203
- * returns an object with a null source property.
204
- * @param {import('./util/selection-types.js').SelectionClause} clause The
205
- * active selection clause to analyze.
206
- */
207
- function activeColumns(clause) {
208
- const { source, meta } = clause;
209
- const clausePred = clause.predicate;
210
- const clauseCols = clausePred?.columns;
211
- let predicate;
212
- let columns;
213
-
214
- if (!meta || !clauseCols) {
215
- return { source: null, columns, predicate };
216
- }
217
-
218
- // @ts-ignore
219
- const { type, scales, bin, pixelSize = 1 } = meta;
220
-
221
- if (type === 'point') {
222
- predicate = x => x;
223
- columns = Object.fromEntries(
224
- clauseCols.map(col => [`${col}`, asColumn(col)])
225
- );
226
- } else if (type === 'interval' && scales) {
227
- // determine pixel-level binning
228
- const bins = scales.map(s => binInterval(s, pixelSize, bin));
229
-
230
- if (bins.some(b => !b)) {
231
- // bail if a scale type is unsupported
232
- } else if (bins.length === 1) {
233
- // single interval selection
234
- predicate = p => p ? isBetween('active0', p.range.map(bins[0])) : [];
235
- // @ts-ignore
236
- columns = { active0: bins[0](clausePred.field) };
237
- } else {
238
- // multiple interval selection
239
- predicate = p => p
240
- ? and(p.children.map(
241
- ({ range }, i) => isBetween(`active${i}`, range.map(bins[i]))
242
- ))
243
- : [];
244
- columns = Object.fromEntries(
245
- // @ts-ignore
246
- clausePred.children.map((p, i) => [`active${i}`, bins[i](p.field)])
247
- );
248
- }
249
- }
250
-
251
- return { source: columns ? source : null, columns, predicate };
252
- }
253
-
254
- const BIN = { ceil: 'CEIL', round: 'ROUND' };
255
-
256
- /**
257
- * Returns a bin function generator to discretize a selection interval domain.
258
- * @param {import('./util/selection-types.js').Scale} scale A scale that maps
259
- * domain values to the output range (typically pixels).
260
- * @param {number} pixelSize The interactive pixel size. This value indicates
261
- * the bin step size and may be greater than an actual screen pixel.
262
- * @param {import('./util/selection-types.js').BinMethod} bin The binning
263
- * method to apply, one of `floor`, `ceil', or `round`.
264
- * @returns {(value: any) => import('@uwdata/mosaic-sql').SQLExpression}
265
- * A bin function generator.
266
- */
267
- function binInterval(scale, pixelSize, bin) {
268
- const { type, domain, range, apply, sqlApply } = scaleTransform(scale);
269
- if (!apply) return; // unsupported scale type
270
- const fn = BIN[`${bin}`.toLowerCase()] || 'FLOOR';
271
- const lo = apply(Math.min(...domain));
272
- const hi = apply(Math.max(...domain));
273
- const a = type === 'identity' ? 1 : Math.abs(range[1] - range[0]) / (hi - lo);
274
- const s = a / pixelSize === 1 ? '' : `${a / pixelSize}::DOUBLE * `;
275
- const d = lo === 0 ? '' : ` - ${lo}::DOUBLE`;
276
- return value => sql`${fn}(${s}(${sqlApply(value)}${d}))::INTEGER`;
277
- }
278
-
279
- /**
280
- * Generate data cube table query information.
281
- * @param {Query} clientQuery The original client query.
282
- * @param {*} active Active (selected) column definitions.
283
- * @param {*} indexCols Data cube index column definitions.
284
- * @returns {DataCubeInfo}
285
- */
286
- function dataCubeInfo(clientQuery, active, indexCols, schema) {
287
- const { dims, aggr, aux } = indexCols;
288
- const { columns } = active;
289
-
290
- // build index table construction query
291
- const query = clientQuery
292
- .select({ ...columns, ...aux })
293
- .groupby(Object.keys(columns));
294
-
295
- // ensure active clause columns are selected by subqueries
296
- const [subq] = query.subqueries;
297
- if (subq) {
298
- const cols = Object.values(columns).flatMap(c => c.columns);
299
- subqueryPushdown(subq, cols);
300
- }
301
-
302
- // push orderby criteria to later cube queries
303
- const order = query.orderby();
304
- query.query.orderby = [];
305
-
306
- // generate creation query string and hash id
307
- const create = query.toString();
308
- const id = (fnv_hash(create) >>> 0).toString(16);
309
- const table = `${schema}.cube_${id}`;
310
-
311
- // generate data cube select query
312
- const select = Query
313
- .select(dims, aggr)
314
- .from(table)
315
- .groupby(dims)
316
- .orderby(order);
317
-
318
- return new DataCubeInfo({ id, table, create, active, select });
319
- }
320
-
321
- /**
322
- * Push column selections down to subqueries.
323
- */
324
- function subqueryPushdown(query, cols) {
325
- const memo = new Set;
326
- const pushdown = q => {
327
- if (memo.has(q)) return;
328
- memo.add(q);
329
- if (q.select && q.from().length) {
330
- q.select(cols);
331
- }
332
- q.subqueries.forEach(pushdown);
333
- };
334
- pushdown(query);
335
- }
336
-
337
- /**
338
- * Metadata and query generator for a data cube index table. This
339
- * object provides the information needed to generate and query
340
- * a data cube index table for a client-selection pair relative to
341
- * a specific active clause and selection state.
342
- */
343
- export class DataCubeInfo {
344
- /**
345
- * Create a new DataCubeInfo instance.
346
- * @param {object} options
347
- */
348
- constructor({ table, create, active, select } = {}) {
349
- /** The name of the data cube index table. */
350
- this.table = table;
351
- /** The SQL query used to generate the data cube index table. */
352
- this.create = create;
353
- /** A result promise returned for the data cube creation query. */
354
- this.result = null;
355
- /**
356
- * Definitions and predicate function for the active columns,
357
- * which are dynamically filtered by the active clause.
358
- */
359
- this.active = active;
360
- /** Select query (sans where clause) for data cube tables. */
361
- this.select = select;
362
- /**
363
- * Boolean flag indicating a client that should be skipped.
364
- * This value is always false for completed data cube info.
365
- */
366
- this.skip = false;
367
- }
368
-
369
- /**
370
- * Generate a data cube index table query for the given predicate.
371
- * @param {import('@uwdata/mosaic-sql').SQLExpression} predicate The current
372
- * active clause predicate.
373
- * @returns {Query} A data cube index table query.
374
- */
375
- query(predicate) {
376
- return this.select.clone().where(this.active.predicate(predicate));
377
- }
378
- }