@uwdata/mosaic-core 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mosaic-core.js +5951 -15206
- package/dist/mosaic-core.min.js +6 -15
- package/package.json +7 -7
- package/src/Coordinator.js +56 -31
- package/src/DataCubeIndexer.js +82 -24
- package/src/MosaicClient.js +9 -0
- package/src/QueryConsolidator.js +13 -9
- package/src/QueryManager.js +9 -3
- package/src/Selection.js +41 -10
- package/src/connectors/rest.js +3 -3
- package/src/connectors/socket.js +4 -3
- package/src/connectors/wasm.js +20 -4
- package/src/index.js +3 -8
- package/src/util/decode-ipc.js +11 -0
- package/src/util/field-info.js +3 -11
- package/src/util/index-columns.js +69 -72
- package/src/util/is-arrow-table.js +10 -0
- package/src/util/priority-queue.js +75 -76
- package/src/util/throttle.js +11 -1
- package/src/util/to-data-columns.js +4 -15
- package/src/util/convert-arrow.js +0 -145
package/src/connectors/socket.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { decodeIPC } from '../util/decode-ipc.js';
|
|
2
2
|
|
|
3
3
|
export function socketConnector(uri = 'ws://localhost:3000/') {
|
|
4
4
|
const queue = [];
|
|
@@ -47,7 +47,7 @@ export function socketConnector(uri = 'ws://localhost:3000/') {
|
|
|
47
47
|
} else if (query.type === 'exec') {
|
|
48
48
|
resolve();
|
|
49
49
|
} else if (query.type === 'arrow') {
|
|
50
|
-
resolve(
|
|
50
|
+
resolve(decodeIPC(data));
|
|
51
51
|
} else {
|
|
52
52
|
throw new Error(`Unexpected socket data: ${data}`);
|
|
53
53
|
}
|
|
@@ -59,6 +59,7 @@ export function socketConnector(uri = 'ws://localhost:3000/') {
|
|
|
59
59
|
|
|
60
60
|
function init() {
|
|
61
61
|
ws = new WebSocket(uri);
|
|
62
|
+
ws.binaryType = 'arraybuffer';
|
|
62
63
|
for (const type in events) {
|
|
63
64
|
ws.addEventListener(type, events[type]);
|
|
64
65
|
}
|
|
@@ -84,7 +85,7 @@ export function socketConnector(uri = 'ws://localhost:3000/') {
|
|
|
84
85
|
/**
|
|
85
86
|
* Query the DuckDB server.
|
|
86
87
|
* @param {object} query
|
|
87
|
-
* @param {'exec' | 'arrow' | 'json'} [query.type] The query type
|
|
88
|
+
* @param {'exec' | 'arrow' | 'json' | 'create-bundle' | 'load-bundle'} [query.type] The query type.
|
|
88
89
|
* @param {string} query.sql A SQL query string.
|
|
89
90
|
* @returns the query result
|
|
90
91
|
*/
|
package/src/connectors/wasm.js
CHANGED
|
@@ -1,4 +1,20 @@
|
|
|
1
1
|
import * as duckdb from '@duckdb/duckdb-wasm';
|
|
2
|
+
import { decodeIPC } from '../util/decode-ipc.js';
|
|
3
|
+
|
|
4
|
+
// bypass duckdb-wasm query method to get Arrow IPC bytes directly
|
|
5
|
+
// https://github.com/duckdb/duckdb-wasm/issues/267#issuecomment-2252749509
|
|
6
|
+
function getArrowIPC(con, query) {
|
|
7
|
+
return new Promise((resolve, reject) => {
|
|
8
|
+
con.useUnsafe(async (bindings, conn) => {
|
|
9
|
+
try {
|
|
10
|
+
const buffer = await bindings.runQuery(conn, query);
|
|
11
|
+
resolve(buffer);
|
|
12
|
+
} catch (error) {
|
|
13
|
+
reject(error);
|
|
14
|
+
}
|
|
15
|
+
});
|
|
16
|
+
});
|
|
17
|
+
}
|
|
2
18
|
|
|
3
19
|
export function wasmConnector(options = {}) {
|
|
4
20
|
const { duckdb, connection, ...opts } = options;
|
|
@@ -45,17 +61,17 @@ export function wasmConnector(options = {}) {
|
|
|
45
61
|
/**
|
|
46
62
|
* Query the DuckDB-WASM instance.
|
|
47
63
|
* @param {object} query
|
|
48
|
-
* @param {'exec' | 'arrow' | 'json'} [query.type] The query type
|
|
64
|
+
* @param {'exec' | 'arrow' | 'json' | 'create-bundle' | 'load-bundle'} [query.type] The query type.
|
|
49
65
|
* @param {string} query.sql A SQL query string.
|
|
50
66
|
* @returns the query result
|
|
51
67
|
*/
|
|
52
68
|
query: async query => {
|
|
53
69
|
const { type, sql } = query;
|
|
54
70
|
const con = await getConnection();
|
|
55
|
-
const result = await con
|
|
71
|
+
const result = await getArrowIPC(con, sql);
|
|
56
72
|
return type === 'exec' ? undefined
|
|
57
|
-
: type === 'arrow' ? result
|
|
58
|
-
: result.toArray();
|
|
73
|
+
: type === 'arrow' ? decodeIPC(result)
|
|
74
|
+
: decodeIPC(result).toArray();
|
|
59
75
|
}
|
|
60
76
|
};
|
|
61
77
|
}
|
package/src/index.js
CHANGED
|
@@ -16,14 +16,9 @@ export {
|
|
|
16
16
|
clauseMatch
|
|
17
17
|
} from './SelectionClause.js';
|
|
18
18
|
|
|
19
|
-
export {
|
|
20
|
-
isArrowTable,
|
|
21
|
-
convertArrowArrayType,
|
|
22
|
-
convertArrowValue,
|
|
23
|
-
convertArrowColumn
|
|
24
|
-
} from './util/convert-arrow.js'
|
|
25
|
-
|
|
19
|
+
export { decodeIPC } from './util/decode-ipc.js';
|
|
26
20
|
export { distinct } from './util/distinct.js';
|
|
21
|
+
export { isArrowTable } from './util/is-arrow-table.js';
|
|
27
22
|
export { synchronizer } from './util/synchronizer.js';
|
|
28
23
|
export { throttle } from './util/throttle.js';
|
|
29
|
-
export { toDataColumns } from './util/to-data-columns.js'
|
|
24
|
+
export { toDataColumns } from './util/to-data-columns.js';
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { tableFromIPC } from '@uwdata/flechette';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Decode Arrow IPC bytes to a table instance, with an option to map date and
|
|
5
|
+
* timestamp values to JS Date objects.
|
|
6
|
+
* @param {ArrayBuffer | Uint8Array} data Arrow IPC bytes.
|
|
7
|
+
* @returns {import('@uwdata/flechette').Table} A table instance.
|
|
8
|
+
*/
|
|
9
|
+
export function decodeIPC(data) {
|
|
10
|
+
return tableFromIPC(data, { useDate: true });
|
|
11
|
+
}
|
package/src/util/field-info.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { Query, asRelation, count, isNull, max, min, sql } from '@uwdata/mosaic-sql';
|
|
2
2
|
import { jsType } from './js-type.js';
|
|
3
|
-
import { convertArrowValue } from './convert-arrow.js';
|
|
4
3
|
|
|
5
4
|
export const Count = 'count';
|
|
6
5
|
export const Nulls = 'nulls';
|
|
@@ -52,20 +51,13 @@ async function getFieldInfo(mc, { table, column, stats }) {
|
|
|
52
51
|
if (!(stats?.length || stats?.size)) return info;
|
|
53
52
|
|
|
54
53
|
// query for summary stats
|
|
55
|
-
const result = await mc.query(
|
|
54
|
+
const [result] = await mc.query(
|
|
56
55
|
summarize(table, column, stats),
|
|
57
56
|
{ persist: true }
|
|
58
57
|
);
|
|
59
58
|
|
|
60
|
-
// extract summary stats, copy to field info
|
|
61
|
-
|
|
62
|
-
const { name } = result.schema.fields[i];
|
|
63
|
-
const child = result.getChildAt(i);
|
|
64
|
-
const convert = convertArrowValue(child.type);
|
|
65
|
-
info[name] = convert(child.get(0));
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
return info;
|
|
59
|
+
// extract summary stats, copy to field info, and return
|
|
60
|
+
return Object.assign(info, result);
|
|
69
61
|
}
|
|
70
62
|
|
|
71
63
|
async function getTableInfo(mc, table) {
|
|
@@ -11,7 +11,7 @@ import { MosaicClient } from '../MosaicClient.js';
|
|
|
11
11
|
export function indexColumns(client) {
|
|
12
12
|
if (!client.filterIndexable) return null;
|
|
13
13
|
const q = client.query();
|
|
14
|
-
const from =
|
|
14
|
+
const from = getBase(q, q => q.from()?.[0].from.table);
|
|
15
15
|
|
|
16
16
|
// bail if no base table or the query is not analyzable
|
|
17
17
|
if (typeof from !== 'string' || !q.select) return null;
|
|
@@ -20,6 +20,13 @@ export function indexColumns(client) {
|
|
|
20
20
|
const dims = []; // list of grouping dimension columns
|
|
21
21
|
const aux = {}; // auxiliary columns needed by aggregates
|
|
22
22
|
|
|
23
|
+
const avg = ref => {
|
|
24
|
+
const name = ref.column;
|
|
25
|
+
// @ts-ignore
|
|
26
|
+
const expr = getBase(q, q => q.select().find(c => c.as === name)?.expr);
|
|
27
|
+
return `(SELECT AVG(${expr ?? ref}) FROM "${from}")`;
|
|
28
|
+
};
|
|
29
|
+
|
|
23
30
|
for (const entry of q.select()) {
|
|
24
31
|
const { as, expr: { aggregate, args } } = entry;
|
|
25
32
|
const op = aggregate?.toUpperCase?.();
|
|
@@ -46,32 +53,32 @@ export function indexColumns(client) {
|
|
|
46
53
|
case 'VARIANCE':
|
|
47
54
|
case 'VAR_SAMP':
|
|
48
55
|
aux[as] = null;
|
|
49
|
-
aggr.push({ [as]: varianceExpr(aux, args[0],
|
|
56
|
+
aggr.push({ [as]: varianceExpr(aux, args[0], avg) });
|
|
50
57
|
break;
|
|
51
58
|
case 'VAR_POP':
|
|
52
59
|
aux[as] = null;
|
|
53
|
-
aggr.push({ [as]: varianceExpr(aux, args[0],
|
|
60
|
+
aggr.push({ [as]: varianceExpr(aux, args[0], avg, false) });
|
|
54
61
|
break;
|
|
55
62
|
case 'STDDEV':
|
|
56
63
|
case 'STDDEV_SAMP':
|
|
57
64
|
aux[as] = null;
|
|
58
|
-
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0],
|
|
65
|
+
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], avg)})` });
|
|
59
66
|
break;
|
|
60
67
|
case 'STDDEV_POP':
|
|
61
68
|
aux[as] = null;
|
|
62
|
-
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0],
|
|
69
|
+
aggr.push({ [as]: agg`SQRT(${varianceExpr(aux, args[0], avg, false)})` });
|
|
63
70
|
break;
|
|
64
71
|
case 'COVAR_SAMP':
|
|
65
72
|
aux[as] = null;
|
|
66
|
-
aggr.push({ [as]: covarianceExpr(aux, args,
|
|
73
|
+
aggr.push({ [as]: covarianceExpr(aux, args, avg) });
|
|
67
74
|
break;
|
|
68
75
|
case 'COVAR_POP':
|
|
69
76
|
aux[as] = null;
|
|
70
|
-
aggr.push({ [as]: covarianceExpr(aux, args,
|
|
77
|
+
aggr.push({ [as]: covarianceExpr(aux, args, avg, false) });
|
|
71
78
|
break;
|
|
72
79
|
case 'CORR':
|
|
73
80
|
aux[as] = null;
|
|
74
|
-
aggr.push({ [as]: corrExpr(aux, args,
|
|
81
|
+
aggr.push({ [as]: corrExpr(aux, args, avg) });
|
|
75
82
|
break;
|
|
76
83
|
|
|
77
84
|
// regression statistics
|
|
@@ -89,27 +96,27 @@ export function indexColumns(client) {
|
|
|
89
96
|
break;
|
|
90
97
|
case 'REGR_SYY':
|
|
91
98
|
aux[as] = null;
|
|
92
|
-
aggr.push({ [as]: regrVarExpr(aux, 0, args,
|
|
99
|
+
aggr.push({ [as]: regrVarExpr(aux, 0, args, avg) });
|
|
93
100
|
break;
|
|
94
101
|
case 'REGR_SXX':
|
|
95
102
|
aux[as] = null;
|
|
96
|
-
aggr.push({ [as]: regrVarExpr(aux, 1, args,
|
|
103
|
+
aggr.push({ [as]: regrVarExpr(aux, 1, args, avg) });
|
|
97
104
|
break;
|
|
98
105
|
case 'REGR_SXY':
|
|
99
106
|
aux[as] = null;
|
|
100
|
-
aggr.push({ [as]: covarianceExpr(aux, args,
|
|
107
|
+
aggr.push({ [as]: covarianceExpr(aux, args, avg, null) });
|
|
101
108
|
break;
|
|
102
109
|
case 'REGR_SLOPE':
|
|
103
110
|
aux[as] = null;
|
|
104
|
-
aggr.push({ [as]: regrSlopeExpr(aux, args,
|
|
111
|
+
aggr.push({ [as]: regrSlopeExpr(aux, args, avg) });
|
|
105
112
|
break;
|
|
106
113
|
case 'REGR_INTERCEPT':
|
|
107
114
|
aux[as] = null;
|
|
108
|
-
aggr.push({ [as]: regrInterceptExpr(aux, args,
|
|
115
|
+
aggr.push({ [as]: regrInterceptExpr(aux, args, avg) });
|
|
109
116
|
break;
|
|
110
117
|
case 'REGR_R2':
|
|
111
118
|
aux[as] = null;
|
|
112
|
-
aggr.push({ [as]: agg`(${corrExpr(aux, args,
|
|
119
|
+
aggr.push({ [as]: agg`(${corrExpr(aux, args, avg)}) ** 2` });
|
|
113
120
|
break;
|
|
114
121
|
|
|
115
122
|
// aggregates that commute directly
|
|
@@ -163,29 +170,30 @@ function sanitize(col) {
|
|
|
163
170
|
}
|
|
164
171
|
|
|
165
172
|
/**
|
|
166
|
-
* Identify a
|
|
173
|
+
* Identify a shared base (source) query and extract a value from it.
|
|
174
|
+
* This method is used to find a shared base table name or extract
|
|
175
|
+
* the original column name within a base table.
|
|
167
176
|
* @param {Query} query The input query.
|
|
168
|
-
* @
|
|
177
|
+
* @param {(q: Query) => any} get A getter function to extract
|
|
178
|
+
* a value from a base query.
|
|
179
|
+
* @returns {string | undefined | NaN} the base query value, or
|
|
169
180
|
* `undefined` if there is no source table, or `NaN` if the
|
|
170
181
|
* query operates over multiple source tables.
|
|
171
182
|
*/
|
|
172
|
-
function
|
|
183
|
+
function getBase(query, get) {
|
|
173
184
|
const subq = query.subqueries;
|
|
174
185
|
|
|
175
186
|
// select query
|
|
176
|
-
if (query.select) {
|
|
177
|
-
|
|
178
|
-
// @ts-ignore
|
|
179
|
-
if (!from.length) return undefined;
|
|
180
|
-
if (subq.length === 0) return from[0].from.table;
|
|
187
|
+
if (query.select && subq.length === 0) {
|
|
188
|
+
return get(query);
|
|
181
189
|
}
|
|
182
190
|
|
|
183
191
|
// handle set operations / subqueries
|
|
184
|
-
const base =
|
|
192
|
+
const base = getBase(subq[0], get);
|
|
185
193
|
for (let i = 1; i < subq.length; ++i) {
|
|
186
|
-
const
|
|
187
|
-
if (
|
|
188
|
-
if (
|
|
194
|
+
const value = getBase(subq[i], get);
|
|
195
|
+
if (value === undefined) continue;
|
|
196
|
+
if (value !== base) return NaN;
|
|
189
197
|
}
|
|
190
198
|
return base;
|
|
191
199
|
}
|
|
@@ -224,17 +232,6 @@ function avgExpr(aux, as, arg) {
|
|
|
224
232
|
return agg`(SUM("${as}" * ${n.name}) / ${n})`;
|
|
225
233
|
}
|
|
226
234
|
|
|
227
|
-
/**
|
|
228
|
-
* Generate a scalar subquery for a global average.
|
|
229
|
-
* This value can be used to mean-center data.
|
|
230
|
-
* @param {*} x Souce data table column.
|
|
231
|
-
* @param {string} from The source data table name.
|
|
232
|
-
* @returns A scalar aggregate query
|
|
233
|
-
*/
|
|
234
|
-
function avg(x, from) {
|
|
235
|
-
return sql`(SELECT AVG(${x}) FROM "${from}")`;
|
|
236
|
-
}
|
|
237
|
-
|
|
238
235
|
/**
|
|
239
236
|
* Generate an expression for calculating argmax over data partitions.
|
|
240
237
|
* As a side effect, this method adds a column to the input *aux* object
|
|
@@ -283,18 +280,18 @@ function argminExpr(aux, as, [, y]) {
|
|
|
283
280
|
* sufficient statistics) to include in the data cube aggregation.
|
|
284
281
|
* @param {*} x The source data table column. This may be a string,
|
|
285
282
|
* column reference, SQL expression, or other string-coercible value.
|
|
286
|
-
* @param {string}
|
|
283
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
287
284
|
* @param {boolean} [correction=true] A flag for whether a Bessel
|
|
288
285
|
* correction should be applied to compute the sample variance
|
|
289
286
|
* rather than the populatation variance.
|
|
290
287
|
* @returns An aggregate expression for calculating variance over
|
|
291
288
|
* pre-aggregated data partitions.
|
|
292
289
|
*/
|
|
293
|
-
function varianceExpr(aux, x,
|
|
290
|
+
function varianceExpr(aux, x, avg, correction = true) {
|
|
294
291
|
const n = countExpr(aux, x);
|
|
295
292
|
const ssq = auxName('rssq', x); // residual sum of squares
|
|
296
293
|
const sum = auxName('rsum', x); // residual sum
|
|
297
|
-
const delta = sql`${x} - ${avg(x
|
|
294
|
+
const delta = sql`${x} - ${avg(x)}`;
|
|
298
295
|
aux[ssq] = agg`SUM((${delta}) ** 2)`;
|
|
299
296
|
aux[sum] = agg`SUM(${delta})`;
|
|
300
297
|
const adj = correction ? ` - 1` : ''; // Bessel correction
|
|
@@ -312,7 +309,7 @@ function varianceExpr(aux, x, from, correction = true) {
|
|
|
312
309
|
* sufficient statistics) to include in the data cube aggregation.
|
|
313
310
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
314
311
|
* column references, SQL expressions, or other string-coercible values.
|
|
315
|
-
* @param {string}
|
|
312
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
316
313
|
* @param {boolean|null} [correction=true] A flag for whether a Bessel
|
|
317
314
|
* correction should be applied to compute the sample covariance rather
|
|
318
315
|
* than the populatation covariance. If null, an expression for the
|
|
@@ -320,11 +317,11 @@ function varianceExpr(aux, x, from, correction = true) {
|
|
|
320
317
|
* @returns An aggregate expression for calculating covariance over
|
|
321
318
|
* pre-aggregated data partitions.
|
|
322
319
|
*/
|
|
323
|
-
function covarianceExpr(aux, args,
|
|
320
|
+
function covarianceExpr(aux, args, avg, correction = true) {
|
|
324
321
|
const n = regrCountExpr(aux, args);
|
|
325
|
-
const sxy = regrSumXYExpr(aux, args,
|
|
326
|
-
const sx = regrSumExpr(aux, 1, args,
|
|
327
|
-
const sy = regrSumExpr(aux, 0, args,
|
|
322
|
+
const sxy = regrSumXYExpr(aux, args, avg);
|
|
323
|
+
const sx = regrSumExpr(aux, 1, args, avg);
|
|
324
|
+
const sy = regrSumExpr(aux, 0, args, avg);
|
|
328
325
|
const adj = correction === null ? '' // do not divide by count
|
|
329
326
|
: correction ? ` / (${n} - 1)` // Bessel correction (sample)
|
|
330
327
|
: ` / ${n}`; // no correction (population)
|
|
@@ -343,17 +340,17 @@ function covarianceExpr(aux, args, from, correction = true) {
|
|
|
343
340
|
* sufficient statistics) to include in the data cube aggregation.
|
|
344
341
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
345
342
|
* column references, SQL expressions, or other string-coercible values.
|
|
346
|
-
* @param {string}
|
|
343
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
347
344
|
* @returns An aggregate expression for calculating correlation over
|
|
348
345
|
* pre-aggregated data partitions.
|
|
349
346
|
*/
|
|
350
|
-
function corrExpr(aux, args,
|
|
347
|
+
function corrExpr(aux, args, avg) {
|
|
351
348
|
const n = regrCountExpr(aux, args);
|
|
352
|
-
const sxy = regrSumXYExpr(aux, args,
|
|
353
|
-
const sxx = regrSumSqExpr(aux, 1, args,
|
|
354
|
-
const syy = regrSumSqExpr(aux, 0, args,
|
|
355
|
-
const sx = regrSumExpr(aux, 1, args,
|
|
356
|
-
const sy = regrSumExpr(aux, 0, args,
|
|
349
|
+
const sxy = regrSumXYExpr(aux, args, avg);
|
|
350
|
+
const sxx = regrSumSqExpr(aux, 1, args, avg);
|
|
351
|
+
const syy = regrSumSqExpr(aux, 0, args, avg);
|
|
352
|
+
const sx = regrSumExpr(aux, 1, args, avg);
|
|
353
|
+
const sy = regrSumExpr(aux, 0, args, avg);
|
|
357
354
|
const vx = agg`(${sxx} - (${sx} ** 2) / ${n})`;
|
|
358
355
|
const vy = agg`(${syy} - (${sy} ** 2) / ${n})`;
|
|
359
356
|
return agg`(${sxy} - ${sx} * ${sy} / ${n}) / SQRT(${vx} * ${vy})`;
|
|
@@ -387,14 +384,14 @@ function regrCountExpr(aux, [y, x]) {
|
|
|
387
384
|
* @param {number} i An index indicating which argument column to sum.
|
|
388
385
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
389
386
|
* column references, SQL expressions, or other string-coercible values.
|
|
390
|
-
* @param {string}
|
|
387
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
391
388
|
* @returns An aggregate expression over pre-aggregated data partitions.
|
|
392
389
|
*/
|
|
393
|
-
function regrSumExpr(aux, i, args,
|
|
390
|
+
function regrSumExpr(aux, i, args, avg) {
|
|
394
391
|
const v = args[i];
|
|
395
392
|
const o = args[1 - i];
|
|
396
393
|
const sum = auxName('rs', v);
|
|
397
|
-
aux[sum] = agg`SUM(${v} - ${avg(v
|
|
394
|
+
aux[sum] = agg`SUM(${v} - ${avg(v)}) FILTER (${o} IS NOT NULL)`;
|
|
398
395
|
return agg`SUM(${sum})`
|
|
399
396
|
}
|
|
400
397
|
|
|
@@ -409,14 +406,14 @@ function regrSumExpr(aux, i, args, from) {
|
|
|
409
406
|
* @param {number} i An index indicating which argument column to sum.
|
|
410
407
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
411
408
|
* column references, SQL expressions, or other string-coercible values.
|
|
412
|
-
* @param {string}
|
|
409
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
413
410
|
* @returns An aggregate expression over pre-aggregated data partitions.
|
|
414
411
|
*/
|
|
415
|
-
function regrSumSqExpr(aux, i, args,
|
|
412
|
+
function regrSumSqExpr(aux, i, args, avg) {
|
|
416
413
|
const v = args[i];
|
|
417
414
|
const u = args[1 - i];
|
|
418
415
|
const ssq = auxName('rss', v);
|
|
419
|
-
aux[ssq] = agg`SUM((${v} - ${avg(v
|
|
416
|
+
aux[ssq] = agg`SUM((${v} - ${avg(v)}) ** 2) FILTER (${u} IS NOT NULL)`;
|
|
420
417
|
return agg`SUM(${ssq})`
|
|
421
418
|
}
|
|
422
419
|
|
|
@@ -430,13 +427,13 @@ function regrSumSqExpr(aux, i, args, from) {
|
|
|
430
427
|
* sufficient statistics) to include in the data cube aggregation.
|
|
431
428
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
432
429
|
* column references, SQL expressions, or other string-coercible values.
|
|
433
|
-
* @param {string}
|
|
430
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
434
431
|
* @returns An aggregate expression over pre-aggregated data partitions.
|
|
435
432
|
*/
|
|
436
|
-
function regrSumXYExpr(aux, args,
|
|
433
|
+
function regrSumXYExpr(aux, args, avg) {
|
|
437
434
|
const [y, x] = args;
|
|
438
435
|
const sxy = auxName('sxy', y, x);
|
|
439
|
-
aux[sxy] = agg`SUM((${x} - ${avg(x
|
|
436
|
+
aux[sxy] = agg`SUM((${x} - ${avg(x)}) * (${y} - ${avg(y)}))`;
|
|
440
437
|
return agg`SUM(${sxy})`;
|
|
441
438
|
}
|
|
442
439
|
|
|
@@ -489,14 +486,14 @@ function regrAvgYExpr(aux, args) {
|
|
|
489
486
|
* @param {number} i The index of the argument to compute the variance for.
|
|
490
487
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
491
488
|
* column references, SQL expressions, or other string-coercible values.
|
|
492
|
-
* @param {string}
|
|
489
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
493
490
|
* @returns An aggregate expression for calculating variance over
|
|
494
491
|
* pre-aggregated data partitions.
|
|
495
492
|
*/
|
|
496
|
-
function regrVarExpr(aux, i, args,
|
|
493
|
+
function regrVarExpr(aux, i, args, avg) {
|
|
497
494
|
const n = regrCountExpr(aux, args);
|
|
498
|
-
const sum = regrSumExpr(aux, i, args,
|
|
499
|
-
const ssq = regrSumSqExpr(aux, i, args,
|
|
495
|
+
const sum = regrSumExpr(aux, i, args, avg);
|
|
496
|
+
const ssq = regrSumSqExpr(aux, i, args, avg);
|
|
500
497
|
return agg`(${ssq} - (${sum} ** 2 / ${n}))`;
|
|
501
498
|
}
|
|
502
499
|
|
|
@@ -509,13 +506,13 @@ function regrVarExpr(aux, i, args, from) {
|
|
|
509
506
|
* sufficient statistics) to include in the data cube aggregation.
|
|
510
507
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
511
508
|
* column references, SQL expressions, or other string-coercible values.
|
|
512
|
-
* @param {string}
|
|
509
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
513
510
|
* @returns An aggregate expression for calculating regression slopes over
|
|
514
511
|
* pre-aggregated data partitions.
|
|
515
512
|
*/
|
|
516
|
-
function regrSlopeExpr(aux, args,
|
|
517
|
-
const cov = covarianceExpr(aux, args,
|
|
518
|
-
const varx = regrVarExpr(aux, 1, args,
|
|
513
|
+
function regrSlopeExpr(aux, args, avg) {
|
|
514
|
+
const cov = covarianceExpr(aux, args, avg, null);
|
|
515
|
+
const varx = regrVarExpr(aux, 1, args, avg);
|
|
519
516
|
return agg`(${cov}) / ${varx}`;
|
|
520
517
|
}
|
|
521
518
|
|
|
@@ -528,13 +525,13 @@ function regrSlopeExpr(aux, args, from) {
|
|
|
528
525
|
* sufficient statistics) to include in the data cube aggregation.
|
|
529
526
|
* @param {any[]} args Source data table columns. The entries may be strings,
|
|
530
527
|
* column references, SQL expressions, or other string-coercible values.
|
|
531
|
-
* @param {string}
|
|
528
|
+
* @param {(field: any) => string} avg Global average query generator.
|
|
532
529
|
* @returns An aggregate expression for calculating regression intercepts over
|
|
533
530
|
* pre-aggregated data partitions.
|
|
534
531
|
*/
|
|
535
|
-
function regrInterceptExpr(aux, args,
|
|
532
|
+
function regrInterceptExpr(aux, args, avg) {
|
|
536
533
|
const ax = regrAvgXExpr(aux, args);
|
|
537
534
|
const ay = regrAvgYExpr(aux, args);
|
|
538
|
-
const m = regrSlopeExpr(aux, args,
|
|
535
|
+
const m = regrSlopeExpr(aux, args, avg);
|
|
539
536
|
return agg`${ay} - (${m}) * ${ax}`;
|
|
540
537
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test if a value is a Flechette Arrow table.
|
|
3
|
+
* We use a "duck typing" approach and check for a getChild function.
|
|
4
|
+
* @param {*} values The value to test
|
|
5
|
+
* @returns {values is import('@uwdata/flechette').Table}
|
|
6
|
+
* true if the value duck types as Arrow data
|
|
7
|
+
*/
|
|
8
|
+
export function isArrowTable(values) {
|
|
9
|
+
return typeof values?.getChild === 'function';
|
|
10
|
+
}
|
|
@@ -1,85 +1,84 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
export class PriorityQueue {
|
|
2
|
+
/**
|
|
3
|
+
* Create a new priority queue instance.
|
|
4
|
+
* @param {number} ranks An integer number of rank-order priority levels.
|
|
5
|
+
*/
|
|
6
|
+
constructor(ranks) {
|
|
7
|
+
// one list for each integer priority level
|
|
8
|
+
this.queue = Array.from(
|
|
9
9
|
{ length: ranks },
|
|
10
10
|
() => ({ head: null, tail: null })
|
|
11
11
|
);
|
|
12
|
+
}
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
},
|
|
14
|
+
/**
|
|
15
|
+
* Indicate if the queue is empty.
|
|
16
|
+
* @returns {boolean} true if empty, false otherwise.
|
|
17
|
+
*/
|
|
18
|
+
isEmpty() {
|
|
19
|
+
return this.queue.every(list => !list.head);
|
|
20
|
+
}
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Insert an item into the queue with a given priority rank.
|
|
24
|
+
* @param {*} item The item to add.
|
|
25
|
+
* @param {number} rank The integer priority rank.
|
|
26
|
+
* Priority ranks are integers starting at zero.
|
|
27
|
+
* Lower ranks indicate higher priority.
|
|
28
|
+
*/
|
|
29
|
+
insert(item, rank) {
|
|
30
|
+
const list = this.queue[rank];
|
|
31
|
+
if (!list) {
|
|
32
|
+
throw new Error(`Invalid queue priority rank: ${rank}`);
|
|
33
|
+
}
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
35
|
+
const node = { item, next: null };
|
|
36
|
+
if (list.head === null) {
|
|
37
|
+
list.head = list.tail = node;
|
|
38
|
+
} else {
|
|
39
|
+
list.tail = list.tail.next = node;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
43
|
+
/**
|
|
44
|
+
* Remove a set of items from the queue, regardless of priority rank.
|
|
45
|
+
* If a provided item is not in the queue it will be ignored.
|
|
46
|
+
* @param {(item: *) => boolean} test A predicate function to test
|
|
47
|
+
* if an item should be removed (true to drop, false to keep).
|
|
48
|
+
*/
|
|
49
|
+
remove(test) {
|
|
50
|
+
for (const list of this.queue) {
|
|
51
|
+
let { head, tail } = list;
|
|
52
|
+
for (let prev = null, curr = head; curr; prev = curr, curr = curr.next) {
|
|
53
|
+
if (test(curr.item)) {
|
|
54
|
+
if (curr === head) {
|
|
55
|
+
head = curr.next;
|
|
56
|
+
} else {
|
|
57
|
+
prev.next = curr.next;
|
|
58
|
+
}
|
|
59
|
+
if (curr === tail) tail = prev || head;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
list.head = head;
|
|
63
|
+
list.tail = tail;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
};
|
|
67
|
+
/**
|
|
68
|
+
* Remove and return the next highest priority item.
|
|
69
|
+
* @returns {*} The next item in the queue,
|
|
70
|
+
* or undefined if this queue is empty.
|
|
71
|
+
*/
|
|
72
|
+
next() {
|
|
73
|
+
for (const list of this.queue) {
|
|
74
|
+
const { head } = list;
|
|
75
|
+
if (head !== null) {
|
|
76
|
+
list.head = head.next;
|
|
77
|
+
if (list.tail === head) {
|
|
78
|
+
list.tail = null;
|
|
79
|
+
}
|
|
80
|
+
return head.item;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
85
84
|
}
|