@uwdata/mosaic-core 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +3 -1
  2. package/dist/mosaic-core.js +11613 -10856
  3. package/dist/mosaic-core.min.js +7 -7
  4. package/dist/types/Coordinator.d.ts +169 -0
  5. package/dist/types/MosaicClient.d.ts +94 -0
  6. package/dist/types/Param.d.ts +47 -0
  7. package/dist/types/QueryConsolidator.d.ts +9 -0
  8. package/dist/types/QueryManager.d.ts +64 -0
  9. package/dist/types/Selection.d.ts +224 -0
  10. package/dist/types/SelectionClause.d.ts +105 -0
  11. package/dist/types/connectors/rest.d.ts +17 -0
  12. package/dist/types/connectors/socket.d.ts +18 -0
  13. package/dist/types/connectors/wasm.d.ts +16 -0
  14. package/dist/types/index.d.ts +25 -0
  15. package/dist/types/preagg/PreAggregator.d.ts +178 -0
  16. package/dist/types/preagg/preagg-columns.d.ts +14 -0
  17. package/dist/types/preagg/sufficient-statistics.d.ts +13 -0
  18. package/dist/types/util/AsyncDispatch.d.ts +100 -0
  19. package/dist/types/util/cache.d.ts +13 -0
  20. package/dist/types/util/decode-ipc.d.ts +7 -0
  21. package/dist/types/util/distinct.d.ts +2 -0
  22. package/dist/types/util/field-info.d.ts +13 -0
  23. package/dist/types/util/hash.d.ts +1 -0
  24. package/dist/types/util/is-arrow-table.d.ts +8 -0
  25. package/dist/types/util/js-type.d.ts +1 -0
  26. package/dist/types/util/priority-queue.d.ts +37 -0
  27. package/dist/types/util/query-result.d.ts +44 -0
  28. package/dist/types/util/selection-types.d.ts +114 -0
  29. package/dist/types/util/synchronizer.d.ts +29 -0
  30. package/dist/types/util/throttle.d.ts +11 -0
  31. package/dist/types/util/to-data-columns.d.ts +29 -0
  32. package/dist/types/util/void-logger.d.ts +7 -0
  33. package/jsconfig.json +11 -0
  34. package/package.json +10 -8
  35. package/src/Coordinator.js +14 -14
  36. package/src/MosaicClient.js +5 -4
  37. package/src/QueryConsolidator.js +22 -33
  38. package/src/QueryManager.js +76 -45
  39. package/src/Selection.js +8 -5
  40. package/src/SelectionClause.js +20 -23
  41. package/src/connectors/rest.js +3 -1
  42. package/src/connectors/socket.js +3 -1
  43. package/src/connectors/wasm.js +1 -1
  44. package/src/index.js +13 -0
  45. package/src/preagg/PreAggregator.js +407 -0
  46. package/src/preagg/preagg-columns.js +103 -0
  47. package/src/preagg/sufficient-statistics.js +439 -0
  48. package/src/util/field-info.js +16 -5
  49. package/src/util/hash.js +1 -1
  50. package/src/util/query-result.js +44 -2
  51. package/src/util/selection-types.ts +3 -3
  52. package/src/util/throttle.js +11 -9
  53. package/src/util/void-logger.js +6 -5
  54. package/tsconfig.json +11 -0
  55. package/src/DataCubeIndexer.js +0 -378
  56. package/src/util/index-columns.js +0 -537
@@ -0,0 +1,439 @@
1
+ import { AggregateNode, and, argmax, argmin, count, div, ExprNode, isNotNull, max, min, mul, pow, regrAvgX, regrAvgY, regrCount, sql, sqrt, sub, sum } from '@uwdata/mosaic-sql';
2
+ import { fnv_hash } from '../util/hash.js';
3
+
4
+ /**
5
+ * Determine sufficient statistics to preaggregate the given node. This
6
+ * method populates the *preagg* and *aggrs* arguments with necessary
7
+ * information for preaggregation optimization.
8
+ * @param {AggregateNode} node An aggregate function.
9
+ * @param {Record<string, ExprNode>} preagg Map of column names to
10
+ * expressions to include in the preaggregation table.
11
+ * @returns {ExprNode} Output aggregate expression that uses preaggregated
12
+ * sufficient statistics to service updates.
13
+ */
14
+ export function sufficientStatistics(node, preagg, avg) {
15
+ switch (node.name) {
16
+ case 'count':
17
+ case 'sum':
18
+ return sumExpr(preagg, node);
19
+ case 'avg':
20
+ return avgExpr(preagg, node);
21
+ case 'arg_max':
22
+ return argmaxExpr(preagg, node);
23
+ case 'arg_min':
24
+ return argminExpr(preagg, node);
25
+
26
+ // variance statistics drop the original aggregate operation
27
+ // in favor of tracking sufficient statistics
28
+ case 'variance':
29
+ case 'var_samp':
30
+ return varianceExpr(preagg, node, avg);
31
+ case 'var_pop':
32
+ return varianceExpr(preagg, node, avg, false);
33
+ case 'stddev':
34
+ case 'stddev_samp':
35
+ return sqrt(varianceExpr(preagg, node, avg));
36
+ case 'stddev_pop':
37
+ return sqrt(varianceExpr(preagg, node, avg, false));
38
+ case 'covar_samp':
39
+ return covarianceExpr(preagg, node, avg);
40
+ case 'covar_pop':
41
+ return covarianceExpr(preagg, node, avg, false);
42
+ case 'corr':
43
+ return corrExpr(preagg, node, avg);
44
+
45
+ // regression statistics
46
+ case 'regr_count':
47
+ return regrCountExpr(preagg, node).expr;
48
+ case 'regr_avgx':
49
+ return regrAvgXExpr(preagg, node);
50
+ case 'regr_avgy':
51
+ return regrAvgYExpr(preagg, node);
52
+ case 'regr_syy':
53
+ return regrVarExpr(preagg, 0, node, avg);
54
+ case 'regr_sxx':
55
+ return regrVarExpr(preagg, 1, node, avg);
56
+ case 'regr_sxy':
57
+ return covarianceExpr(preagg, node, avg, null);
58
+ case 'regr_slope':
59
+ return regrSlopeExpr(preagg, node, avg);
60
+ case 'regr_intercept':
61
+ return regrInterceptExpr(preagg, node, avg);
62
+ case 'regr_r2':
63
+ return pow(corrExpr(preagg, node, avg), 2);
64
+
65
+ // aggregates that commute directly
66
+ case 'max':
67
+ case 'min':
68
+ case 'bit_and':
69
+ case 'bit_or':
70
+ case 'bit_xor':
71
+ case 'bool_and':
72
+ case 'bool_or':
73
+ case 'product': {
74
+ const name = colName(node);
75
+ preagg[name] = node;
76
+ return sql`${node.name}("${name}")`;
77
+ }
78
+
79
+ // unsupported aggregate, return null to indicate failure
80
+ default: return null;
81
+ }
82
+ }
83
+
84
+ /**
85
+ * Generate a column name for the given aggregate node. The name is
86
+ * made from a hash of the string-serialized SQL expression.
87
+ * @param {AggregateNode} node The aggregate node to name.
88
+ * @returns {string} The generated column name.
89
+ */
90
+ function colName(node) {
91
+ return 'pre_' + fnv_hash(`${node}`).toString(16);
92
+ }
93
+
94
+ /**
95
+ * Add a sufficient statistic to the preaggregation column set.
96
+ * Generates a unique column name for the statistic and propagates
97
+ * a FILTER clause if one exists on the original aggregate node.
98
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
99
+ * sufficient statistics) to pre-aggregate.
100
+ * @param {AggregateNode} expr The aggregate statistic to add.
101
+ * @param {AggregateNode} [node] The originating aggregate function call.
102
+ * @returns {string} The name of the statistic column.
103
+ */
104
+ function addStat(preagg, expr, node) {
105
+ const filter = node?.filter;
106
+ if (filter) {
107
+ // push filter clause to preaggregate expr
108
+ expr = expr.filter
109
+ ? expr.where(and(filter, expr.filter))
110
+ : expr.where(filter);
111
+ }
112
+ const name = colName(expr);
113
+ preagg[name] = expr;
114
+ return name;
115
+ }
116
+
117
+ /**
118
+ * Generate an expression for calculating counts over data dimensions.
119
+ * As a side effect, this method adds a column to the input *preagg* object
120
+ * to track the count of non-null values per-partition.
121
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
122
+ * sufficient statistics) to pre-aggregate.
123
+ * @param {AggregateNode} node The originating aggregate function call.
124
+ * @returns {{ expr: ExprNode, name: string }} An aggregate expression over
125
+ * pre-aggregated dimensions and associated column name.
126
+ */
127
+ function countExpr(preagg, node) {
128
+ const name = addStat(preagg, count(node.args[0]), node);
129
+ return { expr: sum(name), name };
130
+ }
131
+
132
+ /**
133
+ * Generate an expression for calculating counts or sums over data dimensions.
134
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
135
+ * sufficient statistics) to pre-aggregate.
136
+ * @param {AggregateNode} node The originating aggregate function call.
137
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
138
+ */
139
+ function sumExpr(preagg, node) {
140
+ return sum(addStat(preagg, node));
141
+ }
142
+
143
+ /**
144
+ * Generate an expression for calculating averages over data dimensions.
145
+ * As a side effect, this method adds a column to the input *preagg* object
146
+ * to track the count of non-null values per-partition.
147
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
148
+ * sufficient statistics) to pre-aggregate.
149
+ * @param {AggregateNode} [node] The originating aggregate function call.
150
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
151
+ */
152
+ function avgExpr(preagg, node) {
153
+ const as = addStat(preagg, node);
154
+ const { expr, name } = countExpr(preagg, node);
155
+ return div(sum(mul(as, name)), expr);
156
+ }
157
+
158
+ /**
159
+ * Generate an expression for calculating argmax over data dimensions.
160
+ * As a side effect, this method adds a column to the input *preagg* object
161
+ * to track a maximum value per-partition.
162
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
163
+ * sufficient statistics) to pre-aggregate.
164
+ * @param {AggregateNode} node The originating aggregate function call.
165
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
166
+ */
167
+ function argmaxExpr(preagg, node) {
168
+ const expr = addStat(preagg, node);
169
+ const maxy = addStat(preagg, max(node.args[1]), node);
170
+ return argmax(expr, maxy);
171
+ }
172
+
173
+ /**
174
+ * Generate an expression for calculating argmin over data dimensions.
175
+ * As a side effect, this method adds a column to the input *preagg* object
176
+ * to track a minimum value per-partition.
177
+ * @param {object} preagg An object for columns (such as
178
+ * sufficient statistics) to include in the pre-aggregation.
179
+ * @param {AggregateNode} node Source data table columns. The entries may be strings,
180
+ * column references, SQL expressions, or other string-coercible values.
181
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
182
+ */
183
+ function argminExpr(preagg, node) {
184
+ const expr = addStat(preagg, node);
185
+ const miny = addStat(preagg, min(node.args[1]), node);
186
+ return argmin(expr, miny);
187
+ }
188
+
189
+ /**
190
+ * Generate an expression for calculating variance over data dimensions.
191
+ * This method uses the "textbook" definition of variance (E[X^2] - E[X]^2),
192
+ * but on mean-centered data to reduce floating point error. The variance
193
+ * calculation uses three sufficient statistics: the count of non-null values,
194
+ * the residual sum of squares and the sum of residual (mean-centered) values.
195
+ * As a side effect, this method adds columns for these statistics to the
196
+ * input *preagg* object.
197
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
198
+ * sufficient statistics) to pre-aggregate.
199
+ * @param {AggregateNode} node The originating aggregate function call.
200
+ * @param {(field: any) => ExprNode} avg Global average query generator.
201
+ * @param {boolean} [correction=true] A flag for whether a Bessel
202
+ * correction should be applied to compute the sample variance
203
+ * rather than the populatation variance.
204
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
205
+ */
206
+ function varianceExpr(preagg, node, avg, correction = true) {
207
+ const x = node.args[0];
208
+ const { expr: n } = countExpr(preagg, node);
209
+ const delta = sub(x, avg(x));
210
+ const rssq = addStat(preagg, sum(pow(delta, 2)), node); // residual sum of squares
211
+ const rsum = addStat(preagg, sum(delta), node); // residual sum
212
+ const denom = correction ? sub(n, 1) : n; // Bessel correction
213
+ return div(sub(sum(rssq), div(pow(sum(rsum), 2), n)), denom);
214
+ }
215
+
216
+ /**
217
+ * Generate an expression for calculating covariance over data dimensions.
218
+ * This method uses mean-centered data to reduce floating point error. The
219
+ * covariance calculation uses four sufficient statistics: the count of
220
+ * non-null value pairs, the sum of residual products, and residual sums
221
+ * (of mean-centered values) for x and y. As a side effect, this method
222
+ * adds columns for these statistics to the input *preagg* object.
223
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
224
+ * sufficient statistics) to pre-aggregate.
225
+ * @param {AggregateNode} node The originating aggregate function call.
226
+ * @param {(field: any) => ExprNode} avg Global average query generator.
227
+ * @param {boolean|null} [correction=true] A flag for whether a Bessel
228
+ * correction should be applied to compute the sample covariance rather
229
+ * than the populatation covariance. If null, an expression for the
230
+ * unnormalized covariance (no division by sample count) is returned.
231
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
232
+ */
233
+ function covarianceExpr(preagg, node, avg, correction = true) {
234
+ const { expr: n } = regrCountExpr(preagg, node);
235
+ const sxy = regrSumXYExpr(preagg, node, avg);
236
+ const sx = regrSumExpr(preagg, 1, node, avg);
237
+ const sy = regrSumExpr(preagg, 0, node, avg);
238
+ const num = sub(sxy, div(mul(sx, sy), n));
239
+ return correction === null ? num // do not divide by count
240
+ : correction ? div(num, sub(n, 1)) // Bessel correction (sample)
241
+ : div(num, n); // no correction (population)
242
+ }
243
+
244
+ /**
245
+ * Generate an expression for calculating Pearson product-moment correlation
246
+ * coefficients over data dimensions. This method uses mean-centered data
247
+ * to reduce floating point error. The correlation calculation uses six
248
+ * sufficient statistics: the count of non-null value pairs, the sum of
249
+ * residual products, and both residual sums and sums of squares for x and y.
250
+ * As a side effect, this method adds columns for these statistics to the
251
+ * input *preagg* object.
252
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
253
+ * sufficient statistics) to pre-aggregate.
254
+ * @param {AggregateNode} node The originating aggregate function call.
255
+ * @param {(field: any) => ExprNode} avg Global average query generator.
256
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
257
+ */
258
+ function corrExpr(preagg, node, avg) {
259
+ const { expr: n } = regrCountExpr(preagg, node);
260
+ const sxy = regrSumXYExpr(preagg, node, avg);
261
+ const sxx = regrSumSqExpr(preagg, 1, node, avg);
262
+ const syy = regrSumSqExpr(preagg, 0, node, avg);
263
+ const sx = regrSumExpr(preagg, 1, node, avg);
264
+ const sy = regrSumExpr(preagg, 0, node, avg);
265
+ const vx = sub(sxx, div(pow(sx, 2), n));
266
+ const vy = sub(syy, div(pow(sy, 2), n));
267
+ return div(
268
+ sub(sxy, div(mul(sx, sy), n)),
269
+ sqrt(mul(vx, vy))
270
+ );
271
+ }
272
+
273
+ /**
274
+ * Generate an expression for the count of non-null (x, y) pairs. As a side
275
+ * effect, this method adds columns to the input *preagg* object to the
276
+ * partition-level count of non-null pairs.
277
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
278
+ * sufficient statistics) to pre-aggregate.
279
+ * @param {AggregateNode} node The originating aggregate function call.
280
+ * @returns {{ expr: ExprNode, name: string }} An aggregate expression over
281
+ * pre-aggregated dimensions and associated column name.
282
+ */
283
+ function regrCountExpr(preagg, node) {
284
+ const [x, y] = node.args;
285
+ const n = addStat(preagg, regrCount(x, y), node);
286
+ return { expr: sum(n), name: n };
287
+ }
288
+
289
+ /**
290
+ * Generate an expression for calculating sums of residual values for use in
291
+ * covariance and regression queries. Only values corresponding to non-null
292
+ * (x, y) pairs are included. This method uses mean-centered data to reduce
293
+ * floating point error. As a side effect, this method adds a column for
294
+ * partition-level sums to the input *preagg* object.
295
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
296
+ * sufficient statistics) to pre-aggregate.
297
+ * @param {number} i An index indicating which argument column to sum.
298
+ * @param {AggregateNode} node The originating aggregate function call.
299
+ * @param {(field: any) => ExprNode} avg Global average query generator.
300
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
301
+ */
302
+ function regrSumExpr(preagg, i, node, avg) {
303
+ const args = node.args;
304
+ const v = args[i];
305
+ const o = args[1 - i];
306
+ const rsum = sum(sub(v, avg(v))).where(isNotNull(o));
307
+ return sum(addStat(preagg, rsum, node));
308
+ }
309
+
310
+ /**
311
+ * Generate an expressios for calculating sums of squared residual values for
312
+ * use in covariance and regression queries. Only values corresponding to
313
+ * non-null (x, y) pairs are included. This method uses mean-centered data to
314
+ * reduce floating point error. As a side effect, this method adds a column
315
+ * for partition-level sums to the input *preagg* object.
316
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
317
+ * sufficient statistics) to pre-aggregate.
318
+ * @param {number} i An index indicating which argument column to sum.
319
+ * @param {AggregateNode} node The originating aggregate function call.
320
+ * @param {(field: any) => ExprNode} avg Global average query generator.
321
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
322
+ */
323
+ function regrSumSqExpr(preagg, i, node, avg) {
324
+ const args = node.args;
325
+ const v = args[i];
326
+ const u = args[1 - i];
327
+ const ssq = sum(pow(sub(v, avg(v)), 2)).where(isNotNull(u));
328
+ return sum(addStat(preagg, ssq, node));
329
+ }
330
+
331
+ /**
332
+ * Generate an expression for calculating sums of residual product values for
333
+ * use in covariance and regression queries. Only values corresponding to
334
+ * non-null (x, y) pairs are included. This method uses mean-centered data to
335
+ * reduce floating point error. As a side effect, this method adds a column
336
+ * for partition-level sums to the input *preagg* object.
337
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
338
+ * sufficient statistics) to pre-aggregate.
339
+ * @param {AggregateNode} node The originating aggregate function call.
340
+ * @param {(field: any) => ExprNode} avg Global average query generator.
341
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
342
+ */
343
+ function regrSumXYExpr(preagg, node, avg) {
344
+ const [y, x] = node.args;
345
+ const sxy = sum(mul(sub(x, avg(x)), sub(y, avg(y))));
346
+ return sum(addStat(preagg, sxy, node));
347
+ }
348
+
349
+ /**
350
+ * Generate an expression for the average x value in a regression context.
351
+ * Only values corresponding to non-null (x, y) pairs are included. As a side
352
+ * effect, this method adds columns to the input *preagg* object to track both
353
+ * the count of non-null pairs and partition-level averages.
354
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
355
+ * sufficient statistics) to pre-aggregate.
356
+ * @param {AggregateNode} node The originating aggregate function call.
357
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
358
+ */
359
+ function regrAvgXExpr(preagg, node) {
360
+ const [y, x] = node.args;
361
+ const { expr: n, name } = regrCountExpr(preagg, node);
362
+ const a = addStat(preagg, regrAvgX(y, x), node);
363
+ return div(sum(mul(a, name)), n);
364
+ }
365
+
366
+ /**
367
+ * Generate an expression for the average y value in a regression context.
368
+ * Only values corresponding to non-null (x, y) pairs are included. As a side
369
+ * effect, this method adds columns to the input *preagg* object to track both
370
+ * the count of non-null pairs and partition-level averages.
371
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
372
+ * sufficient statistics) to pre-aggregate.
373
+ * @param {AggregateNode} node The originating aggregate function call.
374
+ * @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
375
+ */
376
+ function regrAvgYExpr(preagg, node) {
377
+ const [y, x] = node.args;
378
+ const { expr: n, name } = regrCountExpr(preagg, node);
379
+ const a = addStat(preagg, regrAvgY(y, x), node);
380
+ return div(sum(mul(a, name)), n);
381
+ }
382
+
383
+ /**
384
+ * Generate an expression for calculating variance over data dimensions for
385
+ * use in covariance and regression queries. Only values corresponding to
386
+ * non-null (x, y) pairs are included. This method uses mean-centered data to
387
+ * reduce floating point error. As a side effect, this method adds columns
388
+ * for partition-level count and sums to the input *preagg* object.
389
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
390
+ * sufficient statistics) to pre-aggregate.
391
+ * @param {number} i The index of the argument to compute the variance for.
392
+ * @param {AggregateNode} node The originating aggregate function call.
393
+ * @param {(field: any) => ExprNode} avg Global average query generator.
394
+ * @returns {ExprNode} An aggregate expression for calculating variance
395
+ * over pre-aggregated data dimensions.
396
+ */
397
+ function regrVarExpr(preagg, i, node, avg) {
398
+ const { expr: n } = regrCountExpr(preagg, node);
399
+ const sum = regrSumExpr(preagg, i, node, avg);
400
+ const ssq = regrSumSqExpr(preagg, i, node, avg);
401
+ return sub(ssq, div(pow(sum, 2), n));
402
+ }
403
+
404
+ /**
405
+ * Generate an expression for calculating a regression slope. The slope is
406
+ * computed as the covariance divided by the variance of the x variable. As a
407
+ * side effect, this method adds columns for sufficient statistics to the
408
+ * input *preagg* object.
409
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
410
+ * sufficient statistics) to pre-aggregate.
411
+ * @param {AggregateNode} node The originating aggregate function call.
412
+ * @param {(field: any) => ExprNode} avg Global average query generator.
413
+ * @returns {ExprNode} An aggregate expression for calculating regression
414
+ * slopes over pre-aggregated data dimensions.
415
+ */
416
+ function regrSlopeExpr(preagg, node, avg) {
417
+ const cov = covarianceExpr(preagg, node, avg, null);
418
+ const varx = regrVarExpr(preagg, 1, node, avg);
419
+ return div(cov, varx);
420
+ }
421
+
422
+ /**
423
+ * Generate an expression for calculating a regression intercept. The intercept
424
+ * is derived from the regression slope and average x and y values. As a
425
+ * side effect, this method adds columns for sufficient statistics to the
426
+ * input *preagg* object.
427
+ * @param {Record<string, ExprNode>} preagg A map of columns (such as
428
+ * sufficient statistics) to pre-aggregate.
429
+ * @param {AggregateNode} node The originating aggregate function call.
430
+ * @param {(field: any) => ExprNode} avg Global average query generator.
431
+ * @returns {ExprNode} An aggregate expression for calculating regression
432
+ * intercepts over pre-aggregated data dimensions.
433
+ */
434
+ function regrInterceptExpr(preagg, node, avg) {
435
+ const ax = regrAvgXExpr(preagg, node);
436
+ const ay = regrAvgYExpr(preagg, node);
437
+ const m = regrSlopeExpr(preagg, node, avg);
438
+ return sub(ay, mul(m, ax));
439
+ }
@@ -1,4 +1,4 @@
1
- import { Query, asRelation, count, isNull, max, min, sql } from '@uwdata/mosaic-sql';
1
+ import { AggregateNode, Query, asTableRef, count, isNull, max, min, sql } from '@uwdata/mosaic-sql';
2
2
  import { jsType } from './js-type.js';
3
3
 
4
4
  export const Count = 'count';
@@ -8,6 +8,9 @@ export const Min = 'min';
8
8
  export const Distinct = 'distinct';
9
9
  export const Stats = { Count, Nulls, Max, Min, Distinct };
10
10
 
11
+ /**
12
+ * @type {Record<string, (column: string) => AggregateNode>}
13
+ */
11
14
  const statMap = {
12
15
  [Count]: count,
13
16
  [Distinct]: column => count(column).distinct(),
@@ -16,14 +19,21 @@ const statMap = {
16
19
  [Nulls]: column => count().where(isNull(column))
17
20
  };
18
21
 
22
+ /**
23
+ *
24
+ * @param {string} table
25
+ * @param {string} column
26
+ * @param {string[]|Set<string>} stats
27
+ * @returns
28
+ */
19
29
  function summarize(table, column, stats) {
20
30
  return Query
21
31
  .from(table)
22
- .select(Array.from(stats, s => [s, statMap[s](column)]));
32
+ .select(Array.from(stats, s => ({[s]: statMap[s](column)})));
23
33
  }
24
34
 
25
35
  export async function queryFieldInfo(mc, fields) {
26
- if (fields.length === 1 && `${fields[0].column}` === '*') {
36
+ if (fields.length === 1 && fields[0].column === '*') {
27
37
  return getTableInfo(mc, fields[0].table);
28
38
  } else {
29
39
  return (await Promise
@@ -35,7 +45,8 @@ export async function queryFieldInfo(mc, fields) {
35
45
  async function getFieldInfo(mc, { table, column, stats }) {
36
46
  // generate and issue a query for field metadata info
37
47
  // use GROUP BY ALL to differentiate & consolidate aggregates
38
- const q = Query.from({ source: table })
48
+ const q = Query
49
+ .from({ source: table })
39
50
  .select({ column })
40
51
  .groupby(column.aggregate ? sql`ALL` : []);
41
52
  const [desc] = Array.from(await mc.query(Query.describe(q)));
@@ -61,7 +72,7 @@ async function getFieldInfo(mc, { table, column, stats }) {
61
72
  }
62
73
 
63
74
  async function getTableInfo(mc, table) {
64
- const result = await mc.query(`DESCRIBE ${asRelation(table)}`);
75
+ const result = await mc.query(`DESCRIBE ${asTableRef(table)}`);
65
76
  return Array.from(result).map(desc => ({
66
77
  table,
67
78
  column: desc.column_name,
package/src/util/hash.js CHANGED
@@ -7,7 +7,7 @@ export function fnv_hash(v) {
7
7
  if (d) a = fnv_multiply(a ^ d >> 8);
8
8
  a = fnv_multiply(a ^ c & 0xff);
9
9
  }
10
- return fnv_mix(a);
10
+ return fnv_mix(a) >>> 0; // ensure non-zero value
11
11
  }
12
12
 
13
13
  // a * 16777619 mod 2**32
@@ -1,3 +1,10 @@
1
+ export const QueryState = Object.freeze({
2
+ pending: Symbol('pending'),
3
+ ready: Symbol('ready'),
4
+ error: Symbol('error'),
5
+ done: Symbol('done')
6
+ });
7
+
1
8
  /**
2
9
  * A query result Promise that can allows external callers
3
10
  * to resolve or reject the Promise.
@@ -15,15 +22,41 @@ export class QueryResult extends Promise {
15
22
  });
16
23
  this._resolve = resolve;
17
24
  this._reject = reject;
25
+ this._state = QueryState.pending;
26
+ this._value = undefined;
18
27
  }
19
28
 
20
29
  /**
21
- * Resolve the result Promise with the provided value.
30
+ * Resolve the result Promise with a prepared value or the provided value.
31
+ * This method will only succeed if either a value is provided or the promise is ready.
22
32
  * @param {*} value The result value.
23
33
  * @returns {this}
24
34
  */
25
35
  fulfill(value) {
26
- this._resolve(value);
36
+ if (this._value !== undefined) {
37
+ if (value !== undefined) {
38
+ throw Error('Promise is ready and fulfill has a provided value');
39
+ }
40
+ this._resolve(this._value);
41
+ } else if (value === undefined) {
42
+ throw Error('Promise is neither ready nor has provided value');
43
+ } else {
44
+ this._resolve(value);
45
+ }
46
+
47
+ this._state = QueryState.done;
48
+
49
+ return this;
50
+ }
51
+
52
+ /**
53
+ * Prepare to resolve with the provided value.
54
+ * @param {*} value The result value.
55
+ * @returns {this}
56
+ */
57
+ ready(value) {
58
+ this._state = QueryState.ready;
59
+ this._value = value;
27
60
  return this;
28
61
  }
29
62
 
@@ -33,9 +66,18 @@ export class QueryResult extends Promise {
33
66
  * @returns {this}
34
67
  */
35
68
  reject(error) {
69
+ this._state = QueryState.error;
36
70
  this._reject(error);
37
71
  return this;
38
72
  }
73
+
74
+ /**
75
+ * Returns the state of this query result.
76
+ * @returns {symbol}
77
+ */
78
+ get state() {
79
+ return this._state;
80
+ }
39
81
  }
40
82
 
41
83
  // necessary to make Promise subclass act like a Promise
@@ -1,4 +1,4 @@
1
- import { SQLExpression } from '@uwdata/mosaic-sql';
1
+ import { ExprNode } from '@uwdata/mosaic-sql';
2
2
  import { MosaicClient } from '../MosaicClient.js';
3
3
 
4
4
  /**
@@ -127,11 +127,11 @@ export interface SelectionClause {
127
127
  * The predicate should apply filtering criteria consistent with this
128
128
  * clause's *value* property.
129
129
  */
130
- predicate: SQLExpression | null;
130
+ predicate: ExprNode | null;
131
131
  /**
132
132
  * Optional clause metadata that varies based on the selection type.
133
133
  * The metadata can be used to optimize selection queries, for example
134
- * by creating pre-aggregated data cubes when applicable.
134
+ * by creating materialized views of pre-aggregated data when applicable.
135
135
  */
136
136
  meta?: ClauseMetadata;
137
137
  }
@@ -16,15 +16,17 @@ export function throttle(callback, debounce = false) {
16
16
  let pending = NIL;
17
17
 
18
18
  function invoke(event) {
19
- curr = callback(event).finally(() => {
20
- if (next) {
21
- const { value } = next;
22
- next = null;
23
- invoke(value);
24
- } else {
25
- curr = null;
26
- }
27
- });
19
+ curr = callback(event)
20
+ .catch(() => {})
21
+ .finally(() => {
22
+ if (next) {
23
+ const { value } = next;
24
+ next = null;
25
+ invoke(value);
26
+ } else {
27
+ curr = null;
28
+ }
29
+ });
28
30
  }
29
31
 
30
32
  function enqueue(event) {
@@ -1,9 +1,10 @@
1
+ /* eslint-disable no-unused-vars */
1
2
  export function voidLogger() {
2
3
  return {
3
- debug() {},
4
- info() {},
5
- log() {},
6
- warn() {},
7
- error() {}
4
+ debug(..._) {},
5
+ info(..._) {},
6
+ log(..._) {},
7
+ warn(..._) {},
8
+ error(..._) {}
8
9
  };
9
10
  }
package/tsconfig.json ADDED
@@ -0,0 +1,11 @@
1
+ {
2
+ "include": ["src/**/*.js", "src/**/*.ts"],
3
+ "compilerOptions": {
4
+ "allowJs": true,
5
+ "declaration": true,
6
+ "emitDeclarationOnly": true,
7
+ "outDir": "dist/types",
8
+ "module": "node16",
9
+ "skipLibCheck": true
10
+ }
11
+ }