@uwdata/mosaic-core 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  import { DataType } from 'apache-arrow';
2
2
 
3
+ /**
4
+ * @typedef {import('apache-arrow').Vector} Vector
5
+ */
6
+
3
7
  /**
4
8
  * Test if a value is an Apache Arrow table.
5
9
  * As sometimes multiple Arrow versions may be used simultaneously,
@@ -58,7 +62,7 @@ export function convertArrowValue(type) {
58
62
  * Large integers (BigInt) are converted to Float64 numbers.
59
63
  * Fixed-point decimal values are convert to Float64 numbers.
60
64
  * Otherwise, the default Arrow values are used.
61
- * @param {*} column An Apache Arrow column
65
+ * @param {Vector} column An Apache Arrow column
62
66
  * @returns an array of values
63
67
  */
64
68
  export function convertArrowColumn(column) {
@@ -78,10 +82,10 @@ export function convertArrowColumn(column) {
78
82
  // map bigint to number
79
83
  if (DataType.isInt(type) && type.bitWidth >= 64) {
80
84
  const size = column.length;
81
- const array = new Float64Array(size);
85
+ const array = column.nullCount ? new Array(size) : new Float64Array(size);
82
86
  for (let row = 0; row < size; ++row) {
83
87
  const v = column.get(row);
84
- array[row] = v == null ? NaN : Number(v);
88
+ array[row] = v == null ? null : Number(v);
85
89
  }
86
90
  return array;
87
91
  }
@@ -90,14 +94,19 @@ export function convertArrowColumn(column) {
90
94
  if (DataType.isDecimal(type)) {
91
95
  const scale = 1 / Math.pow(10, type.scale);
92
96
  const size = column.length;
93
- const array = new Float64Array(size);
97
+ const array = column.nullCount ? new Array(size) : new Float64Array(size);
94
98
  for (let row = 0; row < size; ++row) {
95
99
  const v = column.get(row);
96
- array[row] = v == null ? NaN : decimalToNumber(v, scale);
100
+ array[row] = v == null ? null : decimalToNumber(v, scale);
97
101
  }
98
102
  return array;
99
103
  }
100
104
 
105
+ // if there are null values, use a standard array
106
+ if (column.nullCount) {
107
+ return Array.from(column);
108
+ }
109
+
101
110
  // otherwise use Arrow JS defaults
102
111
  return column.toArray();
103
112
  }
@@ -0,0 +1,540 @@
1
+ import { Query, agg, sql } from '@uwdata/mosaic-sql';
2
+ import { MosaicClient } from '../MosaicClient.js';
3
+
4
+ /**
5
+ * Determine data cube index columns for a given Mosaic client.
6
+ * @param {MosaicClient} client The Mosaic client.
7
+ * @returns An object with necessary column data to generate data
8
+ * cube index columns, or null if the client is not indexable or
9
+ * the client query contains an invalid or unsupported expression.
10
+ */
11
+ export function indexColumns(client) {
12
+ if (!client.filterIndexable) return null;
13
+ const q = client.query();
14
+ const from = getBaseTable(q);
15
+
16
+ // bail if no base table or the query is not analyzable
17
+ if (typeof from !== 'string' || !q.select) return null;
18
+
19
+ const aggr = []; // list of output aggregate columns
20
+ const dims = []; // list of grouping dimension columns
21
+ const aux = {}; // auxiliary columns needed by aggregates
22
+
23
+ for (const entry of q.select()) {
24
+ const { as, expr: { aggregate, args } } = entry;
25
+ const op = aggregate?.toUpperCase?.();
26
+ switch (op) {
27
+ case 'COUNT':
28
+ case 'SUM':
29
+ // TODO: revisit this DOUBLE cast in the future
30
+ // for now, this sidesteps client-side conversions
31
+ // of bignum and fixed decimal types to JS numbers
32
+ aggr.push({ [as]: agg`SUM("${as}")::DOUBLE` });
33
+ break;
34
+ case 'AVG':
35
+ aggr.push({ [as]: avgExpr(aux, as, args[0]) });
36
+ break;
37
+ case 'ARG_MAX':
38
+ aggr.push({ [as]: argmaxExpr(aux, as, args) });
39
+ break;
40
+ case 'ARG_MIN':
41
+ aggr.push({ [as]: argminExpr(aux, as, args) });
42
+ break;
43
+
44
+ // variance statistics drop the original aggregate operation
45
+ // in favor of tracking auxiliary sufficient statistics
46
+ case 'VARIANCE':
47
+ case 'VAR_SAMP':
48
+ aux[as] = null;
49
+ aggr.push({ [as]: varianceExpr(aux, args[0], from) });
50
+ break;
51
+ case 'VAR_POP':
52
+ aux[as] = null;
53
+ aggr.push({ [as]: varianceExpr(aux, args[0], from, false) });
54
+ break;
55
+ case 'STDDEV':
56
+ case 'STDDEV_SAMP':
57
+ aux[as] = null;
58
+ aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], from)})` });
59
+ break;
60
+ case 'STDDEV_POP':
61
+ aux[as] = null;
62
+ aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], from, false)})` });
63
+ break;
64
+ case 'COVAR_SAMP':
65
+ aux[as] = null;
66
+ aggr.push({ [as]: covarianceExpr(aux, args, from) });
67
+ break;
68
+ case 'COVAR_POP':
69
+ aux[as] = null;
70
+ aggr.push({ [as]: covarianceExpr(aux, args, from, false) });
71
+ break;
72
+ case 'CORR':
73
+ aux[as] = null;
74
+ aggr.push({ [as]: corrExpr(aux, args, from) });
75
+ break;
76
+
77
+ // regression statistics
78
+ case 'REGR_COUNT':
79
+ aux[as] = null;
80
+ aggr.push({ [as]: agg`${regrCountExpr(aux, args)}::DOUBLE` });
81
+ break;
82
+ case 'REGR_AVGX':
83
+ aux[as] = null;
84
+ aggr.push({ [as]: regrAvgXExpr(aux, args) });
85
+ break;
86
+ case 'REGR_AVGY':
87
+ aux[as] = null;
88
+ aggr.push({ [as]: regrAvgYExpr(aux, args) });
89
+ break;
90
+ case 'REGR_SYY':
91
+ aux[as] = null;
92
+ aggr.push({ [as]: regrVarExpr(aux, 0, args, from) });
93
+ break;
94
+ case 'REGR_SXX':
95
+ aux[as] = null;
96
+ aggr.push({ [as]: regrVarExpr(aux, 1, args, from) });
97
+ break;
98
+ case 'REGR_SXY':
99
+ aux[as] = null;
100
+ aggr.push({ [as]: covarianceExpr(aux, args, from, null) });
101
+ break;
102
+ case 'REGR_SLOPE':
103
+ aux[as] = null;
104
+ aggr.push({ [as]: regrSlopeExpr(aux, args, from) });
105
+ break;
106
+ case 'REGR_INTERCEPT':
107
+ aux[as] = null;
108
+ aggr.push({ [as]: regrInterceptExpr(aux, args, from) });
109
+ break;
110
+ case 'REGR_R2':
111
+ aux[as] = null;
112
+ aggr.push({ [as]: agg`(${corrExpr(aux, args, from)}) ** 2` });
113
+ break;
114
+
115
+ // aggregates that commute directly
116
+ case 'MAX':
117
+ case 'MIN':
118
+ case 'BIT_AND':
119
+ case 'BIT_OR':
120
+ case 'BIT_XOR':
121
+ case 'BOOL_AND':
122
+ case 'BOOL_OR':
123
+ case 'PRODUCT':
124
+ aggr.push({ [as]: agg`${op}("${as}")` });
125
+ break;
126
+
127
+ // otherwise, check if dimension
128
+ default:
129
+ if (!aggregate) dims.push(as);
130
+ else return null; // unsupported aggregate
131
+ }
132
+ }
133
+
134
+ // bail if the query has no aggregates
135
+ if (!aggr.length) return null;
136
+
137
+ return { from, dims, aggr, aux };
138
+ }
139
+
140
+ /**
141
+ * Generate an output column name for use as an auxiliary column
142
+ * (e.g., for sufficient statistics) within a data cube index.
143
+ * @param {string} type The operation type.
144
+ * @param {...any} args The input column arguments.
145
+ * @returns {string} A sanitized auxiliary column name.
146
+ */
147
+ function auxName(type, ...args) {
148
+ const cols = args.length ? '_' + args.map(sanitize).join('_') : '';
149
+ return `__${type}${cols}__`;
150
+ }
151
+
152
+ /**
153
+ * Sanitize a table column reference as a "safe" string value to
154
+ * use as part of derived column names.
155
+ * @param {*} col The source data table column. This may be a string,
156
+ * column reference, SQL expression, or other string-coercible value.
157
+ * @returns {string} The sanitized column name.
158
+ */
159
+ function sanitize(col) {
160
+ return `${col}`
161
+ .replaceAll('"', '')
162
+ .replaceAll(' ', '_');
163
+ }
164
+
165
+ /**
166
+ * Identify a single base (source) table of a query.
167
+ * @param {Query} query The input query.
168
+ * @returns {string | undefined | NaN} the base table name, or
169
+ * `undefined` if there is no source table, or `NaN` if the
170
+ * query operates over multiple source tables.
171
+ */
172
+ function getBaseTable(query) {
173
+ const subq = query.subqueries;
174
+
175
+ // select query
176
+ if (query.select) {
177
+ const from = query.from();
178
+ // @ts-ignore
179
+ if (!from.length) return undefined;
180
+ if (subq.length === 0) return from[0].from.table;
181
+ }
182
+
183
+ // handle set operations / subqueries
184
+ const base = getBaseTable(subq[0]);
185
+ for (let i = 1; i < subq.length; ++i) {
186
+ const from = getBaseTable(subq[i]);
187
+ if (from === undefined) continue;
188
+ if (from !== base) return NaN;
189
+ }
190
+ return base;
191
+ }
192
+
193
+ /**
194
+ * Generate an expression for calculating counts over data partitions.
195
+ * As a side effect, this method adds a column to the input *aux* object
196
+ * to track the count of non-null values per-partition.
197
+ * @param {object} aux An object for auxiliary columns (such as
198
+ * sufficient statistics) to include in the data cube aggregation.
199
+ * @param {any} arg Source data table column. This value may be a string,
200
+ * column reference, SQL expression, or other string-coercible value.
201
+ * @returns An aggregate expression for calculating counts over
202
+ * pre-aggregated data partitions.
203
+ */
204
+ function countExpr(aux, arg) {
205
+ const n = auxName('count', arg);
206
+ aux[n] = agg`COUNT(${arg})`;
207
+ return agg`SUM(${n})`.annotate({ name: n });
208
+ }
209
+
210
+ /**
211
+ * Generate an expression for calculating averages over data partitions.
212
+ * As a side effect, this method adds a column to the input *aux* object
213
+ * to track the count of non-null values per-partition.
214
+ * @param {object} aux An object for auxiliary columns (such as
215
+ * sufficient statistics) to include in the data cube aggregation.
216
+ * @param {string} as The output column for the original aggregate.
217
+ * @param {any} arg Source data table column. This value may be a string,
218
+ * column reference, SQL expression, or other string-coercible value.
219
+ * @returns An aggregate expression for calculating averages over
220
+ * pre-aggregated data partitions.
221
+ */
222
+ function avgExpr(aux, as, arg) {
223
+ const n = countExpr(aux, arg);
224
+ return agg`(SUM("${as}" * ${n.name}) / ${n})`;
225
+ }
226
+
227
+ /**
228
+ * Generate a scalar subquery for a global average.
229
+ * This value can be used to mean-center data.
230
+ * @param {*} x Souce data table column.
231
+ * @param {string} from The source data table name.
232
+ * @returns A scalar aggregate query
233
+ */
234
+ function avg(x, from) {
235
+ return sql`(SELECT AVG(${x}) FROM "${from}")`;
236
+ }
237
+
238
+ /**
239
+ * Generate an expression for calculating argmax over data partitions.
240
+ * As a side effect, this method adds a column to the input *aux* object
241
+ * to track a maximum value per-partition.
242
+ * @param {object} aux An object for auxiliary columns (such as
243
+ * sufficient statistics) to include in the data cube aggregation.
244
+ * @param {string} as The output column for the original aggregate.
245
+ * @param {any[]} args Source data table columns. The entries may be strings,
246
+ * column references, SQL expressions, or other string-coercible values.
247
+ * @returns An aggregate expression for calculating argmax over
248
+ * pre-aggregated data partitions.
249
+ */
250
+ function argmaxExpr(aux, as, [, y]) {
251
+ const max = auxName('max', y);
252
+ aux[max] = agg`MAX(${y})`;
253
+ return agg`ARG_MAX("${as}", ${max})`;
254
+ }
255
+
256
+ /**
257
+ * Generate an expression for calculating argmin over data partitions.
258
+ * As a side effect, this method adds a column to the input *aux* object
259
+ * to track a minimum value per-partition.
260
+ * @param {object} aux An object for auxiliary columns (such as
261
+ * sufficient statistics) to include in the data cube aggregation.
262
+ * @param {string} as The output column for the original aggregate.
263
+ * @param {any[]} args Source data table columns. The entries may be strings,
264
+ * column references, SQL expressions, or other string-coercible values.
265
+ * @returns An aggregate expression for calculating argmin over
266
+ * pre-aggregated data partitions.
267
+ */
268
+ function argminExpr(aux, as, [, y]) {
269
+ const min = auxName('min', y);
270
+ aux[min] = agg`MIN(${y})`;
271
+ return agg`ARG_MIN("${as}", ${min})`;
272
+ }
273
+
274
+ /**
275
+ * Generate an expression for calculating variance over data partitions.
276
+ * This method uses the "textbook" definition of variance (E[X^2] - E[X]^2),
277
+ * but on mean-centered data to reduce floating point error. The variance
278
+ * calculation uses three sufficient statistics: the count of non-null values,
279
+ * the residual sum of squares and the sum of residual (mean-centered) values.
280
+ * As a side effect, this method adds columns for these statistics to the
281
+ * input *aux* object.
282
+ * @param {object} aux An object for auxiliary columns (such as
283
+ * sufficient statistics) to include in the data cube aggregation.
284
+ * @param {*} x The source data table column. This may be a string,
285
+ * column reference, SQL expression, or other string-coercible value.
286
+ * @param {string} from The source data table name.
287
+ * @param {boolean} [correction=true] A flag for whether a Bessel
288
+ * correction should be applied to compute the sample variance
289
+ * rather than the populatation variance.
290
+ * @returns An aggregate expression for calculating variance over
291
+ * pre-aggregated data partitions.
292
+ */
293
+ function varianceExpr(aux, x, from, correction = true) {
294
+ const n = countExpr(aux, x);
295
+ const ssq = auxName('rssq', x); // residual sum of squares
296
+ const sum = auxName('rsum', x); // residual sum
297
+ const delta = sql`${x} - ${avg(x, from)}`;
298
+ aux[ssq] = agg`SUM((${delta}) ** 2)`;
299
+ aux[sum] = agg`SUM(${delta})`;
300
+ const adj = correction ? ` - 1` : ''; // Bessel correction
301
+ return agg`(SUM(${ssq}) - (SUM(${sum}) ** 2 / ${n})) / (${n}${adj})`;
302
+ }
303
+
304
+ /**
305
+ * Generate an expression for calculating covariance over data partitions.
306
+ * This method uses mean-centered data to reduce floating point error. The
307
+ * covariance calculation uses four sufficient statistics: the count of
308
+ * non-null value pairs, the sum of residual products, and residual sums
309
+ * (of mean-centered values) for x and y. As a side effect, this method
310
+ * adds columns for these statistics to the input *aux* object.
311
+ * @param {object} aux An object for auxiliary columns (such as
312
+ * sufficient statistics) to include in the data cube aggregation.
313
+ * @param {any[]} args Source data table columns. The entries may be strings,
314
+ * column references, SQL expressions, or other string-coercible values.
315
+ * @param {string} from The source data table name.
316
+ * @param {boolean|null} [correction=true] A flag for whether a Bessel
317
+ * correction should be applied to compute the sample covariance rather
318
+ * than the populatation covariance. If null, an expression for the
319
+ * unnormalized covariance (no division by sample count) is returned.
320
+ * @returns An aggregate expression for calculating covariance over
321
+ * pre-aggregated data partitions.
322
+ */
323
+ function covarianceExpr(aux, args, from, correction = true) {
324
+ const n = regrCountExpr(aux, args);
325
+ const sxy = regrSumXYExpr(aux, args, from);
326
+ const sx = regrSumExpr(aux, 1, args, from);
327
+ const sy = regrSumExpr(aux, 0, args, from);
328
+ const adj = correction === null ? '' // do not divide by count
329
+ : correction ? ` / (${n} - 1)` // Bessel correction (sample)
330
+ : ` / ${n}`; // no correction (population)
331
+ return agg`(${sxy} - ${sx} * ${sy} / ${n})${adj}`;
332
+ }
333
+
334
+ /**
335
+ * Generate an expression for calculating Pearson product-moment correlation
336
+ * coefficients over data partitions. This method uses mean-centered data
337
+ * to reduce floating point error. The correlation calculation uses six
338
+ * sufficient statistics: the count of non-null value pairs, the sum of
339
+ * residual products, and both residual sums and sums of squares for x and y.
340
+ * As a side effect, this method adds columns for these statistics to the
341
+ * input *aux* object.
342
+ * @param {object} aux An object for auxiliary columns (such as
343
+ * sufficient statistics) to include in the data cube aggregation.
344
+ * @param {any[]} args Source data table columns. The entries may be strings,
345
+ * column references, SQL expressions, or other string-coercible values.
346
+ * @param {string} from The source data table name.
347
+ * @returns An aggregate expression for calculating correlation over
348
+ * pre-aggregated data partitions.
349
+ */
350
+ function corrExpr(aux, args, from) {
351
+ const n = regrCountExpr(aux, args);
352
+ const sxy = regrSumXYExpr(aux, args, from);
353
+ const sxx = regrSumSqExpr(aux, 1, args, from);
354
+ const syy = regrSumSqExpr(aux, 0, args, from);
355
+ const sx = regrSumExpr(aux, 1, args, from);
356
+ const sy = regrSumExpr(aux, 0, args, from);
357
+ const vx = agg`(${sxx} - (${sx} ** 2) / ${n})`;
358
+ const vy = agg`(${syy} - (${sy} ** 2) / ${n})`;
359
+ return agg`(${sxy} - ${sx} * ${sy} / ${n}) / SQRT(${vx} * ${vy})`;
360
+ }
361
+
362
+ /**
363
+ * Generate an expression for the count of non-null (x, y) pairs. As a side
364
+ * effect, this method adds columns to the input *aux* object to the
365
+ * partition-level count of non-null pairs.
366
+ * @param {object} aux An object for auxiliary columns (such as
367
+ * sufficient statistics) to include in the data cube aggregation.
368
+ * @param {any[]} args Source data table columns. The entries may be strings,
369
+ * column references, SQL expressions, or other string-coercible values.
370
+ * @returns An aggregate expression for calculating regression pair counts
371
+ * over pre-aggregated data partitions.
372
+ */
373
+ function regrCountExpr(aux, [y, x]) {
374
+ const n = auxName('count', y, x);
375
+ aux[n] = agg`REGR_COUNT(${y}, ${x})`;
376
+ return agg`SUM(${n})`.annotate({ name: n });
377
+ }
378
+
379
+ /**
380
+ * Generate an expression for calculating sums of residual values for use in
381
+ * covariance and regression queries. Only values corresponding to non-null
382
+ * (x, y) pairs are included. This method uses mean-centered data to reduce
383
+ * floating point error. As a side effect, this method adds a column for
384
+ * partition-level sums to the input *aux* object.
385
+ * @param {object} aux An object for auxiliary columns (such as
386
+ * sufficient statistics) to include in the data cube aggregation.
387
+ * @param {number} i An index indicating which argument column to sum.
388
+ * @param {any[]} args Source data table columns. The entries may be strings,
389
+ * column references, SQL expressions, or other string-coercible values.
390
+ * @param {string} from The source data table name.
391
+ * @returns An aggregate expression over pre-aggregated data partitions.
392
+ */
393
+ function regrSumExpr(aux, i, args, from) {
394
+ const v = args[i];
395
+ const o = args[1 - i];
396
+ const sum = auxName('rs', v);
397
+ aux[sum] = agg`SUM(${v} - ${avg(v, from)}) FILTER (${o} IS NOT NULL)`;
398
+ return agg`SUM(${sum})`
399
+ }
400
+
401
+ /**
402
+ * Generate an expressios for calculating sums of squared residual values for
403
+ * use in covariance and regression queries. Only values corresponding to
404
+ * non-null (x, y) pairs are included. This method uses mean-centered data to
405
+ * reduce floating point error. As a side effect, this method adds a column
406
+ * for partition-level sums to the input *aux* object.
407
+ * @param {object} aux An object for auxiliary columns (such as
408
+ * sufficient statistics) to include in the data cube aggregation.
409
+ * @param {number} i An index indicating which argument column to sum.
410
+ * @param {any[]} args Source data table columns. The entries may be strings,
411
+ * column references, SQL expressions, or other string-coercible values.
412
+ * @param {string} from The source data table name.
413
+ * @returns An aggregate expression over pre-aggregated data partitions.
414
+ */
415
+ function regrSumSqExpr(aux, i, args, from) {
416
+ const v = args[i];
417
+ const u = args[1 - i];
418
+ const ssq = auxName('rss', v);
419
+ aux[ssq] = agg`SUM((${v} - ${avg(v, from)}) ** 2) FILTER (${u} IS NOT NULL)`;
420
+ return agg`SUM(${ssq})`
421
+ }
422
+
423
+ /**
424
+ * Generate an expression for calculating sums of residual product values for
425
+ * use in covariance and regression queries. Only values corresponding to
426
+ * non-null (x, y) pairs are included. This method uses mean-centered data to
427
+ * reduce floating point error. As a side effect, this method adds a column
428
+ * for partition-level sums to the input *aux* object.
429
+ * @param {object} aux An object for auxiliary columns (such as
430
+ * sufficient statistics) to include in the data cube aggregation.
431
+ * @param {any[]} args Source data table columns. The entries may be strings,
432
+ * column references, SQL expressions, or other string-coercible values.
433
+ * @param {string} from The source data table name.
434
+ * @returns An aggregate expression over pre-aggregated data partitions.
435
+ */
436
+ function regrSumXYExpr(aux, args, from) {
437
+ const [y, x] = args;
438
+ const sxy = auxName('sxy', y, x);
439
+ aux[sxy] = agg`SUM((${x} - ${avg(x, from)}) * (${y} - ${avg(y, from)}))`;
440
+ return agg`SUM(${sxy})`;
441
+ }
442
+
443
+ /**
444
+ * Generate an expression for the average x value in a regression context.
445
+ * Only values corresponding to non-null (x, y) pairs are included. As a side
446
+ * effect, this method adds columns to the input *aux* object to track both
447
+ * the count of non-null pairs and partition-level averages.
448
+ * @param {object} aux An object for auxiliary columns (such as
449
+ * sufficient statistics) to include in the data cube aggregation.
450
+ * @param {any[]} args Source data table columns. The entries may be strings,
451
+ * column references, SQL expressions, or other string-coercible values.
452
+ * @returns An aggregate expression over pre-aggregated data partitions.
453
+ */
454
+ function regrAvgXExpr(aux, args) {
455
+ const [y, x] = args;
456
+ const n = regrCountExpr(aux, args);
457
+ const a = auxName('avg', x, y);
458
+ aux[a] = agg`REGR_AVGX(${y}, ${x})`;
459
+ return agg`(SUM(${a} * ${n.name}) / ${n})`;
460
+ }
461
+
462
+ /**
463
+ * Generate an expression for the average y value in a regression context.
464
+ * Only values corresponding to non-null (x, y) pairs are included. As a side
465
+ * effect, this method adds columns to the input *aux* object to track both
466
+ * the count of non-null pairs and partition-level averages.
467
+ * @param {object} aux An object for auxiliary columns (such as
468
+ * sufficient statistics) to include in the data cube aggregation.
469
+ * @param {any[]} args Source data table columns. The entries may be strings,
470
+ * column references, SQL expressions, or other string-coercible values.
471
+ * @returns An aggregate expression over pre-aggregated data partitions.
472
+ */
473
+ function regrAvgYExpr(aux, args) {
474
+ const [y, x] = args;
475
+ const n = regrCountExpr(aux, args);
476
+ const a = auxName('avg', y, x);
477
+ aux[a] = agg`REGR_AVGY(${y}, ${x})`;
478
+ return agg`(SUM(${a} * ${n.name}) / ${n})`;
479
+ }
480
+
481
+ /**
482
+ * Generate an expression for calculating variance over data partitions for
483
+ * use in covariance and regression queries. Only values corresponding to
484
+ * non-null (x, y) pairs are included. This method uses mean-centered data to
485
+ * reduce floating point error. As a side effect, this method adds columns
486
+ * for partition-level count and sums to the input *aux* object.
487
+ * @param {object} aux An object for auxiliary columns (such as
488
+ * sufficient statistics) to include in the data cube aggregation.
489
+ * @param {number} i The index of the argument to compute the variance for.
490
+ * @param {any[]} args Source data table columns. The entries may be strings,
491
+ * column references, SQL expressions, or other string-coercible values.
492
+ * @param {string} from The source data table name.
493
+ * @returns An aggregate expression for calculating variance over
494
+ * pre-aggregated data partitions.
495
+ */
496
+ function regrVarExpr(aux, i, args, from) {
497
+ const n = regrCountExpr(aux, args);
498
+ const sum = regrSumExpr(aux, i, args, from);
499
+ const ssq = regrSumSqExpr(aux, i, args, from);
500
+ return agg`(${ssq} - (${sum} ** 2 / ${n}))`;
501
+ }
502
+
503
+ /**
504
+ * Generate an expression for calculating a regression slope. The slope is
505
+ * computed as the covariance divided by the variance of the x variable. As a
506
+ * side effect, this method adds columns for sufficient statistics to the
507
+ * input *aux* object.
508
+ * @param {object} aux An object for auxiliary columns (such as
509
+ * sufficient statistics) to include in the data cube aggregation.
510
+ * @param {any[]} args Source data table columns. The entries may be strings,
511
+ * column references, SQL expressions, or other string-coercible values.
512
+ * @param {string} from The source data table name.
513
+ * @returns An aggregate expression for calculating regression slopes over
514
+ * pre-aggregated data partitions.
515
+ */
516
+ function regrSlopeExpr(aux, args, from) {
517
+ const cov = covarianceExpr(aux, args, from, null);
518
+ const varx = regrVarExpr(aux, 1, args, from);
519
+ return agg`(${cov}) / ${varx}`;
520
+ }
521
+
522
+ /**
523
+ * Generate an expression for calculating a regression intercept. The intercept
524
+ * is derived from the regression slope and average x and y values. As a
525
+ * side effect, this method adds columns for sufficient statistics to the
526
+ * input *aux* object.
527
+ * @param {object} aux An object for auxiliary columns (such as
528
+ * sufficient statistics) to include in the data cube aggregation.
529
+ * @param {any[]} args Source data table columns. The entries may be strings,
530
+ * column references, SQL expressions, or other string-coercible values.
531
+ * @param {string} from The source data table name.
532
+ * @returns An aggregate expression for calculating regression intercepts over
533
+ * pre-aggregated data partitions.
534
+ */
535
+ function regrInterceptExpr(aux, args, from) {
536
+ const ax = regrAvgXExpr(aux, args);
537
+ const ay = regrAvgYExpr(aux, args);
538
+ const m = regrSlopeExpr(aux, args, from);
539
+ return agg`${ay} - (${m}) * ${ax}`;
540
+ }
@@ -1,9 +1,42 @@
1
- export function queryResult() {
2
- let resolve;
3
- let reject;
4
- const p = new Promise((r, e) => { resolve = r; reject = e; });
5
- return Object.assign(p, {
6
- fulfill: value => (resolve(value), p),
7
- reject: err => (reject(err), p)
8
- });
1
+ /**
2
+ * A query result Promise that can allows external callers
3
+ * to resolve or reject the Promise.
4
+ */
5
+ export class QueryResult extends Promise {
6
+ /**
7
+ * Create a new query result Promise.
8
+ */
9
+ constructor() {
10
+ let resolve;
11
+ let reject;
12
+ super((r, e) => {
13
+ resolve = r;
14
+ reject = e;
15
+ });
16
+ this._resolve = resolve;
17
+ this._reject = reject;
18
+ }
19
+
20
+ /**
21
+ * Resolve the result Promise with the provided value.
22
+ * @param {*} value The result value.
23
+ * @returns {this}
24
+ */
25
+ fulfill(value) {
26
+ this._resolve(value);
27
+ return this;
28
+ }
29
+
30
+ /**
31
+ * Rejects the result Promise with the provided error.
32
+ * @param {*} error The error value.
33
+ * @returns {this}
34
+ */
35
+ reject(error) {
36
+ this._reject(error);
37
+ return this;
38
+ }
9
39
  }
40
+
41
+ // necessary to make Promise subclass act like a Promise
42
+ QueryResult.prototype.constructor = Promise;