@uwdata/mosaic-core 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/mosaic-core.js +11613 -10856
- package/dist/mosaic-core.min.js +7 -7
- package/dist/types/Coordinator.d.ts +169 -0
- package/dist/types/MosaicClient.d.ts +94 -0
- package/dist/types/Param.d.ts +47 -0
- package/dist/types/QueryConsolidator.d.ts +9 -0
- package/dist/types/QueryManager.d.ts +64 -0
- package/dist/types/Selection.d.ts +224 -0
- package/dist/types/SelectionClause.d.ts +105 -0
- package/dist/types/connectors/rest.d.ts +17 -0
- package/dist/types/connectors/socket.d.ts +18 -0
- package/dist/types/connectors/wasm.d.ts +16 -0
- package/dist/types/index.d.ts +25 -0
- package/dist/types/preagg/PreAggregator.d.ts +178 -0
- package/dist/types/preagg/preagg-columns.d.ts +14 -0
- package/dist/types/preagg/sufficient-statistics.d.ts +13 -0
- package/dist/types/util/AsyncDispatch.d.ts +100 -0
- package/dist/types/util/cache.d.ts +13 -0
- package/dist/types/util/decode-ipc.d.ts +7 -0
- package/dist/types/util/distinct.d.ts +2 -0
- package/dist/types/util/field-info.d.ts +13 -0
- package/dist/types/util/hash.d.ts +1 -0
- package/dist/types/util/is-arrow-table.d.ts +8 -0
- package/dist/types/util/js-type.d.ts +1 -0
- package/dist/types/util/priority-queue.d.ts +37 -0
- package/dist/types/util/query-result.d.ts +44 -0
- package/dist/types/util/selection-types.d.ts +114 -0
- package/dist/types/util/synchronizer.d.ts +29 -0
- package/dist/types/util/throttle.d.ts +11 -0
- package/dist/types/util/to-data-columns.d.ts +29 -0
- package/dist/types/util/void-logger.d.ts +7 -0
- package/jsconfig.json +11 -0
- package/package.json +10 -8
- package/src/Coordinator.js +14 -14
- package/src/MosaicClient.js +5 -4
- package/src/QueryConsolidator.js +22 -33
- package/src/QueryManager.js +76 -45
- package/src/Selection.js +8 -5
- package/src/SelectionClause.js +20 -23
- package/src/connectors/rest.js +3 -1
- package/src/connectors/socket.js +3 -1
- package/src/connectors/wasm.js +1 -1
- package/src/index.js +13 -0
- package/src/preagg/PreAggregator.js +407 -0
- package/src/preagg/preagg-columns.js +103 -0
- package/src/preagg/sufficient-statistics.js +439 -0
- package/src/util/field-info.js +16 -5
- package/src/util/hash.js +1 -1
- package/src/util/query-result.js +44 -2
- package/src/util/selection-types.ts +3 -3
- package/src/util/throttle.js +11 -9
- package/src/util/void-logger.js +6 -5
- package/tsconfig.json +11 -0
- package/src/DataCubeIndexer.js +0 -378
- package/src/util/index-columns.js +0 -537
package/src/DataCubeIndexer.js
DELETED
|
@@ -1,378 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
Query, and, asColumn, create, isBetween, scaleTransform, sql
|
|
3
|
-
} from '@uwdata/mosaic-sql';
|
|
4
|
-
import { indexColumns } from './util/index-columns.js';
|
|
5
|
-
import { fnv_hash } from './util/hash.js';
|
|
6
|
-
|
|
7
|
-
const Skip = { skip: true, result: null };
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* @typedef {object} DataCubeIndexerOptions
|
|
11
|
-
* @property {string} [schema] Database schema (namespace) in which to write
|
|
12
|
-
* data cube index tables (default 'mosaic').
|
|
13
|
-
* @property {boolean} [options.enabled=true] Flag to enable or disable the
|
|
14
|
-
* indexer. This setting can later be updated via the `enabled` method.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Build and query optimized indices ("data cubes") for fast computation of
|
|
19
|
-
* groupby aggregate queries over compatible client queries and selections.
|
|
20
|
-
* A data cube contains pre-aggregated data for a Mosaic client, subdivided
|
|
21
|
-
* by possible query values from an active selection clause. These cubes are
|
|
22
|
-
* realized as as database tables that can be queried for rapid updates.
|
|
23
|
-
*
|
|
24
|
-
* Compatible client queries must consist of only groupby dimensions and
|
|
25
|
-
* supported aggregate functions. Compatible selections must contain an active
|
|
26
|
-
* clause that exposes metadata for an interval or point value predicate.
|
|
27
|
-
*
|
|
28
|
-
* Data cube index tables are written to a dedicated schema (namespace) that
|
|
29
|
-
* can be set using the *schema* constructor option. This schema acts as a
|
|
30
|
-
* persistent cache, and index tables may be used across sessions. The
|
|
31
|
-
* `dropIndexTables` method issues a query to remove *all* tables within
|
|
32
|
-
* this schema. This may be needed if the original tables have updated data,
|
|
33
|
-
* but should be used with care.
|
|
34
|
-
*/
|
|
35
|
-
export class DataCubeIndexer {
|
|
36
|
-
/**
|
|
37
|
-
* Create a new data cube index table manager.
|
|
38
|
-
* @param {import('./Coordinator.js').Coordinator} coordinator A Mosaic coordinator.
|
|
39
|
-
* @param {DataCubeIndexerOptions} [options] Data cube indexer options.
|
|
40
|
-
*/
|
|
41
|
-
constructor(coordinator, {
|
|
42
|
-
schema = 'mosaic',
|
|
43
|
-
enabled = true
|
|
44
|
-
} = {}) {
|
|
45
|
-
/** @type {Map<import('./MosaicClient.js').MosaicClient, DataCubeInfo | Skip | null>} */
|
|
46
|
-
this.indexes = new Map();
|
|
47
|
-
this.active = null;
|
|
48
|
-
this.mc = coordinator;
|
|
49
|
-
this._schema = schema;
|
|
50
|
-
this._enabled = enabled;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Set the enabled state of this indexer. If false, any local state is
|
|
55
|
-
* cleared and subsequent index calls will return null until re-enabled.
|
|
56
|
-
* This method has no effect on any index tables already in the database.
|
|
57
|
-
* @param {boolean} [state] The enabled state to set.
|
|
58
|
-
*/
|
|
59
|
-
set enabled(state) {
|
|
60
|
-
if (this._enabled !== state) {
|
|
61
|
-
if (!state) this.clear();
|
|
62
|
-
this._enabled = state;
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* Get the enabled state of this indexer.
|
|
68
|
-
* @returns {boolean} The current enabled state.
|
|
69
|
-
*/
|
|
70
|
-
get enabled() {
|
|
71
|
-
return this._enabled;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Set the database schema used by this indexer. Upon changes, any local
|
|
76
|
-
* state is cleared. This method does _not_ drop any existing data cube
|
|
77
|
-
* tables, use `dropIndexTables` before changing the schema to also remove
|
|
78
|
-
* existing index tables in the database.
|
|
79
|
-
* @param {string} [schema] The schema name to set.
|
|
80
|
-
*/
|
|
81
|
-
set schema(schema) {
|
|
82
|
-
if (this._schema !== schema) {
|
|
83
|
-
this.clear();
|
|
84
|
-
this._schema = schema;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
* Get the database schema used by this indexer.
|
|
90
|
-
* @returns {string} The current schema name.
|
|
91
|
-
*/
|
|
92
|
-
get schema() {
|
|
93
|
-
return this._schema;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Issues a query through the coordinator to drop the current index table
|
|
98
|
-
* schema. *All* tables in the schema will be removed and local state is
|
|
99
|
-
* cleared. Call this method if the underlying base tables have been updated,
|
|
100
|
-
* causing derived index tables to become stale and inaccurate. Use this
|
|
101
|
-
* method with care! Once dropped, the schema will be repopulated by future
|
|
102
|
-
* data cube indexer requests.
|
|
103
|
-
* @returns A query result promise.
|
|
104
|
-
*/
|
|
105
|
-
dropIndexTables() {
|
|
106
|
-
this.clear();
|
|
107
|
-
return this.mc.exec(`DROP SCHEMA IF EXISTS "${this.schema}" CASCADE`);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Clear the cache of data cube index table entries for the current active
|
|
112
|
-
* selection clause. This method does _not_ drop any existing data cube
|
|
113
|
-
* tables. Use `dropIndexTables` to remove existing index tables from the
|
|
114
|
-
* database.
|
|
115
|
-
*/
|
|
116
|
-
clear() {
|
|
117
|
-
this.indexes.clear();
|
|
118
|
-
this.active = null;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/**
|
|
122
|
-
* Return data cube index table information for the active state of a
|
|
123
|
-
* client-selection pair, or null if the client is not indexable. This
|
|
124
|
-
* method has multiple possible side effects, including data cube table
|
|
125
|
-
* generation and updating internal caches.
|
|
126
|
-
* @param {import('./MosaicClient.js').MosaicClient} client A Mosaic client.
|
|
127
|
-
* @param {import('./Selection.js').Selection} selection A Mosaic selection
|
|
128
|
-
* to filter the client by.
|
|
129
|
-
* @param {import('./util/selection-types.js').SelectionClause} activeClause
|
|
130
|
-
* A representative active selection clause for which to (possibly) generate
|
|
131
|
-
* data cube index tables.
|
|
132
|
-
* @returns {DataCubeInfo | Skip | null} Data cube index table
|
|
133
|
-
* information and query generator, or null if the client is not indexable.
|
|
134
|
-
*/
|
|
135
|
-
index(client, selection, activeClause) {
|
|
136
|
-
// if not enabled, do nothing
|
|
137
|
-
if (!this.enabled) return null;
|
|
138
|
-
|
|
139
|
-
const { indexes, mc, schema } = this;
|
|
140
|
-
const { source } = activeClause;
|
|
141
|
-
|
|
142
|
-
// if there is no clause source to track, do nothing
|
|
143
|
-
if (!source) return null;
|
|
144
|
-
|
|
145
|
-
// if we have cached active columns, check for updates or exit
|
|
146
|
-
if (this.active) {
|
|
147
|
-
// if the active clause source has changed, clear indexer state
|
|
148
|
-
// this cancels outstanding requests and clears the index cache
|
|
149
|
-
// a clear also sets this.active to null
|
|
150
|
-
if (this.active.source !== source) this.clear();
|
|
151
|
-
// if we've seen this source and it's not indexable, do nothing
|
|
152
|
-
if (this.active?.source === null) return null;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// the current active columns cache value
|
|
156
|
-
let { active } = this;
|
|
157
|
-
|
|
158
|
-
// if cached active columns are unset, analyze the active clause
|
|
159
|
-
if (!active) {
|
|
160
|
-
// generate active data cube dimension columns to select over
|
|
161
|
-
// will return an object with null source if not indexable
|
|
162
|
-
this.active = active = activeColumns(activeClause);
|
|
163
|
-
// if the active clause is not indexable, exit now
|
|
164
|
-
if (active.source === null) return null;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// if we have cached data cube index table info, return that
|
|
168
|
-
if (indexes.has(client)) {
|
|
169
|
-
return indexes.get(client);
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
// get non-active data cube index table columns
|
|
173
|
-
const indexCols = indexColumns(client);
|
|
174
|
-
|
|
175
|
-
let info;
|
|
176
|
-
if (!indexCols) {
|
|
177
|
-
// if client is not indexable, record null index
|
|
178
|
-
info = null;
|
|
179
|
-
} else if (selection.skip(client, activeClause)) {
|
|
180
|
-
// skip client if untouched by cross-filtering
|
|
181
|
-
info = Skip;
|
|
182
|
-
} else {
|
|
183
|
-
// generate data cube index table
|
|
184
|
-
const filter = selection.remove(source).predicate(client);
|
|
185
|
-
info = dataCubeInfo(client.query(filter), active, indexCols, schema);
|
|
186
|
-
info.result = mc.exec([
|
|
187
|
-
`CREATE SCHEMA IF NOT EXISTS ${schema}`,
|
|
188
|
-
create(info.table, info.create, { temp: false })
|
|
189
|
-
]);
|
|
190
|
-
info.result.catch(e => mc.logger().error(e));
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
indexes.set(client, info);
|
|
194
|
-
return info;
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* Determines the active data cube dimension columns to select over. Returns
|
|
200
|
-
* an object with the clause source, column definitions, and a predicate
|
|
201
|
-
* generator function for the active dimensions of a data cube index table. If
|
|
202
|
-
* the active clause is not indexable or is missing metadata, this method
|
|
203
|
-
* returns an object with a null source property.
|
|
204
|
-
* @param {import('./util/selection-types.js').SelectionClause} clause The
|
|
205
|
-
* active selection clause to analyze.
|
|
206
|
-
*/
|
|
207
|
-
function activeColumns(clause) {
|
|
208
|
-
const { source, meta } = clause;
|
|
209
|
-
const clausePred = clause.predicate;
|
|
210
|
-
const clauseCols = clausePred?.columns;
|
|
211
|
-
let predicate;
|
|
212
|
-
let columns;
|
|
213
|
-
|
|
214
|
-
if (!meta || !clauseCols) {
|
|
215
|
-
return { source: null, columns, predicate };
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
// @ts-ignore
|
|
219
|
-
const { type, scales, bin, pixelSize = 1 } = meta;
|
|
220
|
-
|
|
221
|
-
if (type === 'point') {
|
|
222
|
-
predicate = x => x;
|
|
223
|
-
columns = Object.fromEntries(
|
|
224
|
-
clauseCols.map(col => [`${col}`, asColumn(col)])
|
|
225
|
-
);
|
|
226
|
-
} else if (type === 'interval' && scales) {
|
|
227
|
-
// determine pixel-level binning
|
|
228
|
-
const bins = scales.map(s => binInterval(s, pixelSize, bin));
|
|
229
|
-
|
|
230
|
-
if (bins.some(b => !b)) {
|
|
231
|
-
// bail if a scale type is unsupported
|
|
232
|
-
} else if (bins.length === 1) {
|
|
233
|
-
// single interval selection
|
|
234
|
-
predicate = p => p ? isBetween('active0', p.range.map(bins[0])) : [];
|
|
235
|
-
// @ts-ignore
|
|
236
|
-
columns = { active0: bins[0](clausePred.field) };
|
|
237
|
-
} else {
|
|
238
|
-
// multiple interval selection
|
|
239
|
-
predicate = p => p
|
|
240
|
-
? and(p.children.map(
|
|
241
|
-
({ range }, i) => isBetween(`active${i}`, range.map(bins[i]))
|
|
242
|
-
))
|
|
243
|
-
: [];
|
|
244
|
-
columns = Object.fromEntries(
|
|
245
|
-
// @ts-ignore
|
|
246
|
-
clausePred.children.map((p, i) => [`active${i}`, bins[i](p.field)])
|
|
247
|
-
);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
return { source: columns ? source : null, columns, predicate };
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
const BIN = { ceil: 'CEIL', round: 'ROUND' };
|
|
255
|
-
|
|
256
|
-
/**
|
|
257
|
-
* Returns a bin function generator to discretize a selection interval domain.
|
|
258
|
-
* @param {import('./util/selection-types.js').Scale} scale A scale that maps
|
|
259
|
-
* domain values to the output range (typically pixels).
|
|
260
|
-
* @param {number} pixelSize The interactive pixel size. This value indicates
|
|
261
|
-
* the bin step size and may be greater than an actual screen pixel.
|
|
262
|
-
* @param {import('./util/selection-types.js').BinMethod} bin The binning
|
|
263
|
-
* method to apply, one of `floor`, `ceil', or `round`.
|
|
264
|
-
* @returns {(value: any) => import('@uwdata/mosaic-sql').SQLExpression}
|
|
265
|
-
* A bin function generator.
|
|
266
|
-
*/
|
|
267
|
-
function binInterval(scale, pixelSize, bin) {
|
|
268
|
-
const { type, domain, range, apply, sqlApply } = scaleTransform(scale);
|
|
269
|
-
if (!apply) return; // unsupported scale type
|
|
270
|
-
const fn = BIN[`${bin}`.toLowerCase()] || 'FLOOR';
|
|
271
|
-
const lo = apply(Math.min(...domain));
|
|
272
|
-
const hi = apply(Math.max(...domain));
|
|
273
|
-
const a = type === 'identity' ? 1 : Math.abs(range[1] - range[0]) / (hi - lo);
|
|
274
|
-
const s = a / pixelSize === 1 ? '' : `${a / pixelSize}::DOUBLE * `;
|
|
275
|
-
const d = lo === 0 ? '' : ` - ${lo}::DOUBLE`;
|
|
276
|
-
return value => sql`${fn}(${s}(${sqlApply(value)}${d}))::INTEGER`;
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
/**
|
|
280
|
-
* Generate data cube table query information.
|
|
281
|
-
* @param {Query} clientQuery The original client query.
|
|
282
|
-
* @param {*} active Active (selected) column definitions.
|
|
283
|
-
* @param {*} indexCols Data cube index column definitions.
|
|
284
|
-
* @returns {DataCubeInfo}
|
|
285
|
-
*/
|
|
286
|
-
function dataCubeInfo(clientQuery, active, indexCols, schema) {
|
|
287
|
-
const { dims, aggr, aux } = indexCols;
|
|
288
|
-
const { columns } = active;
|
|
289
|
-
|
|
290
|
-
// build index table construction query
|
|
291
|
-
const query = clientQuery
|
|
292
|
-
.select({ ...columns, ...aux })
|
|
293
|
-
.groupby(Object.keys(columns));
|
|
294
|
-
|
|
295
|
-
// ensure active clause columns are selected by subqueries
|
|
296
|
-
const [subq] = query.subqueries;
|
|
297
|
-
if (subq) {
|
|
298
|
-
const cols = Object.values(columns).flatMap(c => c.columns);
|
|
299
|
-
subqueryPushdown(subq, cols);
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
// push orderby criteria to later cube queries
|
|
303
|
-
const order = query.orderby();
|
|
304
|
-
query.query.orderby = [];
|
|
305
|
-
|
|
306
|
-
// generate creation query string and hash id
|
|
307
|
-
const create = query.toString();
|
|
308
|
-
const id = (fnv_hash(create) >>> 0).toString(16);
|
|
309
|
-
const table = `${schema}.cube_${id}`;
|
|
310
|
-
|
|
311
|
-
// generate data cube select query
|
|
312
|
-
const select = Query
|
|
313
|
-
.select(dims, aggr)
|
|
314
|
-
.from(table)
|
|
315
|
-
.groupby(dims)
|
|
316
|
-
.orderby(order);
|
|
317
|
-
|
|
318
|
-
return new DataCubeInfo({ id, table, create, active, select });
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
/**
|
|
322
|
-
* Push column selections down to subqueries.
|
|
323
|
-
*/
|
|
324
|
-
function subqueryPushdown(query, cols) {
|
|
325
|
-
const memo = new Set;
|
|
326
|
-
const pushdown = q => {
|
|
327
|
-
if (memo.has(q)) return;
|
|
328
|
-
memo.add(q);
|
|
329
|
-
if (q.select && q.from().length) {
|
|
330
|
-
q.select(cols);
|
|
331
|
-
}
|
|
332
|
-
q.subqueries.forEach(pushdown);
|
|
333
|
-
};
|
|
334
|
-
pushdown(query);
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
/**
|
|
338
|
-
* Metadata and query generator for a data cube index table. This
|
|
339
|
-
* object provides the information needed to generate and query
|
|
340
|
-
* a data cube index table for a client-selection pair relative to
|
|
341
|
-
* a specific active clause and selection state.
|
|
342
|
-
*/
|
|
343
|
-
export class DataCubeInfo {
|
|
344
|
-
/**
|
|
345
|
-
* Create a new DataCubeInfo instance.
|
|
346
|
-
* @param {object} options
|
|
347
|
-
*/
|
|
348
|
-
constructor({ table, create, active, select } = {}) {
|
|
349
|
-
/** The name of the data cube index table. */
|
|
350
|
-
this.table = table;
|
|
351
|
-
/** The SQL query used to generate the data cube index table. */
|
|
352
|
-
this.create = create;
|
|
353
|
-
/** A result promise returned for the data cube creation query. */
|
|
354
|
-
this.result = null;
|
|
355
|
-
/**
|
|
356
|
-
* Definitions and predicate function for the active columns,
|
|
357
|
-
* which are dynamically filtered by the active clause.
|
|
358
|
-
*/
|
|
359
|
-
this.active = active;
|
|
360
|
-
/** Select query (sans where clause) for data cube tables. */
|
|
361
|
-
this.select = select;
|
|
362
|
-
/**
|
|
363
|
-
* Boolean flag indicating a client that should be skipped.
|
|
364
|
-
* This value is always false for completed data cube info.
|
|
365
|
-
*/
|
|
366
|
-
this.skip = false;
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
/**
|
|
370
|
-
* Generate a data cube index table query for the given predicate.
|
|
371
|
-
* @param {import('@uwdata/mosaic-sql').SQLExpression} predicate The current
|
|
372
|
-
* active clause predicate.
|
|
373
|
-
* @returns {Query} A data cube index table query.
|
|
374
|
-
*/
|
|
375
|
-
query(predicate) {
|
|
376
|
-
return this.select.clone().where(this.active.predicate(predicate));
|
|
377
|
-
}
|
|
378
|
-
}
|