@uwdata/mosaic-core 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +47 -0
- package/README.md +0 -1
- package/dist/src/Coordinator.d.ts +147 -0
- package/dist/src/Coordinator.d.ts.map +1 -0
- package/dist/src/Coordinator.js +269 -0
- package/dist/src/Coordinator.js.map +1 -0
- package/dist/src/MosaicClient.d.ts +138 -0
- package/dist/src/MosaicClient.d.ts.map +1 -0
- package/dist/src/MosaicClient.js +213 -0
- package/dist/src/MosaicClient.js.map +1 -0
- package/dist/src/Param.d.ts +56 -0
- package/dist/src/Param.d.ts.map +1 -0
- package/dist/src/Param.js +89 -0
- package/dist/src/Param.js.map +1 -0
- package/dist/src/QueryConsolidator.d.ts +11 -0
- package/dist/src/QueryConsolidator.d.ts.map +1 -0
- package/dist/src/QueryConsolidator.js +249 -0
- package/dist/src/QueryConsolidator.js.map +1 -0
- package/dist/src/QueryManager.d.ts +77 -0
- package/dist/src/QueryManager.d.ts.map +1 -0
- package/dist/src/QueryManager.js +174 -0
- package/dist/src/QueryManager.js.map +1 -0
- package/dist/src/Selection.d.ts +222 -0
- package/dist/src/Selection.d.ts.map +1 -0
- package/dist/src/Selection.js +319 -0
- package/dist/src/Selection.js.map +1 -0
- package/dist/src/SelectionClause.d.ts +192 -0
- package/dist/src/SelectionClause.d.ts.map +1 -0
- package/dist/src/SelectionClause.js +126 -0
- package/dist/src/SelectionClause.js.map +1 -0
- package/dist/src/connectors/Connector.d.ts +26 -0
- package/dist/src/connectors/Connector.d.ts.map +1 -0
- package/dist/src/connectors/Connector.js +2 -0
- package/dist/src/connectors/Connector.js.map +1 -0
- package/dist/src/connectors/rest.d.ts +24 -0
- package/dist/src/connectors/rest.d.ts.map +1 -0
- package/dist/src/connectors/rest.js +37 -0
- package/dist/src/connectors/rest.js.map +1 -0
- package/dist/src/connectors/socket.d.ts +40 -0
- package/dist/src/connectors/socket.d.ts.map +1 -0
- package/dist/src/connectors/socket.js +115 -0
- package/dist/src/connectors/socket.js.map +1 -0
- package/dist/src/connectors/wasm.d.ts +53 -0
- package/dist/src/connectors/wasm.d.ts.map +1 -0
- package/dist/src/connectors/wasm.js +113 -0
- package/dist/src/connectors/wasm.js.map +1 -0
- package/dist/src/index.d.ts +28 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +25 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/make-client.d.ts +35 -0
- package/dist/src/make-client.d.ts.map +1 -0
- package/dist/src/make-client.js +52 -0
- package/dist/src/make-client.js.map +1 -0
- package/dist/src/preagg/PreAggregator.d.ts +150 -0
- package/dist/src/preagg/PreAggregator.d.ts.map +1 -0
- package/dist/src/preagg/PreAggregator.js +382 -0
- package/dist/src/preagg/PreAggregator.js.map +1 -0
- package/dist/src/preagg/preagg-columns.d.ts +16 -0
- package/dist/src/preagg/preagg-columns.d.ts.map +1 -0
- package/dist/src/preagg/preagg-columns.js +95 -0
- package/dist/src/preagg/preagg-columns.js.map +1 -0
- package/dist/src/preagg/sufficient-statistics.d.ts +14 -0
- package/dist/src/preagg/sufficient-statistics.d.ts.map +1 -0
- package/dist/src/preagg/sufficient-statistics.js +446 -0
- package/dist/src/preagg/sufficient-statistics.js.map +1 -0
- package/dist/src/types.d.ts +77 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +2 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/util/AsyncDispatch.d.ts +121 -0
- package/dist/src/util/AsyncDispatch.d.ts.map +1 -0
- package/dist/src/util/AsyncDispatch.js +188 -0
- package/dist/src/util/AsyncDispatch.js.map +1 -0
- package/dist/src/util/cache.d.ts +19 -0
- package/dist/src/util/cache.d.ts.map +1 -0
- package/dist/src/util/cache.js +66 -0
- package/dist/src/util/cache.js.map +1 -0
- package/dist/src/util/decode-ipc.d.ts +12 -0
- package/dist/src/util/decode-ipc.d.ts.map +1 -0
- package/{src → dist/src}/util/decode-ipc.js +5 -6
- package/dist/src/util/decode-ipc.js.map +1 -0
- package/dist/src/util/distinct.d.ts +3 -0
- package/dist/src/util/distinct.d.ts.map +1 -0
- package/dist/src/util/distinct.js +16 -0
- package/dist/src/util/distinct.js.map +1 -0
- package/dist/src/util/field-info.d.ts +26 -0
- package/dist/src/util/field-info.d.ts.map +1 -0
- package/dist/src/util/field-info.js +91 -0
- package/dist/src/util/field-info.js.map +1 -0
- package/dist/src/util/hash.d.ts +2 -0
- package/dist/src/util/hash.d.ts.map +1 -0
- package/dist/src/util/hash.js +26 -0
- package/dist/src/util/hash.js.map +1 -0
- package/dist/src/util/is-activatable.d.ts +8 -0
- package/dist/src/util/is-activatable.d.ts.map +1 -0
- package/dist/src/util/is-activatable.js +10 -0
- package/dist/src/util/is-activatable.js.map +1 -0
- package/dist/src/util/is-arrow-table.d.ts +9 -0
- package/dist/src/util/is-arrow-table.d.ts.map +1 -0
- package/dist/src/util/is-arrow-table.js +11 -0
- package/dist/src/util/is-arrow-table.js.map +1 -0
- package/dist/src/util/js-type.d.ts +9 -0
- package/dist/src/util/js-type.d.ts.map +1 -0
- package/dist/src/util/js-type.js +59 -0
- package/dist/src/util/js-type.js.map +1 -0
- package/dist/src/util/priority-queue.d.ts +35 -0
- package/dist/src/util/priority-queue.d.ts.map +1 -0
- package/dist/src/util/priority-queue.js +81 -0
- package/dist/src/util/priority-queue.js.map +1 -0
- package/dist/src/util/query-result.d.ts +47 -0
- package/dist/src/util/query-result.d.ts.map +1 -0
- package/dist/src/util/query-result.js +83 -0
- package/dist/src/util/query-result.js.map +1 -0
- package/dist/src/util/synchronizer.d.ts +36 -0
- package/dist/src/util/synchronizer.d.ts.map +1 -0
- package/dist/src/util/synchronizer.js +52 -0
- package/dist/src/util/synchronizer.js.map +1 -0
- package/dist/src/util/throttle.d.ts +12 -0
- package/dist/src/util/throttle.d.ts.map +1 -0
- package/dist/src/util/throttle.js +51 -0
- package/dist/src/util/throttle.js.map +1 -0
- package/dist/src/util/to-data-columns.d.ts +22 -0
- package/dist/src/util/to-data-columns.d.ts.map +1 -0
- package/dist/src/util/to-data-columns.js +51 -0
- package/dist/src/util/to-data-columns.js.map +1 -0
- package/dist/src/util/void-logger.d.ts +13 -0
- package/dist/src/util/void-logger.d.ts.map +1 -0
- package/dist/src/util/void-logger.js +13 -0
- package/dist/src/util/void-logger.js.map +1 -0
- package/package.json +16 -10
- package/src/Coordinator.ts +367 -0
- package/src/{MosaicClient.js → MosaicClient.ts} +49 -43
- package/src/{Param.js → Param.ts} +29 -28
- package/src/{QueryConsolidator.js → QueryConsolidator.ts} +81 -58
- package/src/{QueryManager.js → QueryManager.ts} +61 -54
- package/src/Selection.ts +388 -0
- package/src/SelectionClause.ts +275 -0
- package/src/connectors/Connector.ts +6 -6
- package/src/connectors/rest.ts +56 -0
- package/src/connectors/{socket.js → socket.ts} +53 -42
- package/src/connectors/{wasm.js → wasm.ts} +46 -62
- package/src/{index.js → index.ts} +13 -1
- package/src/make-client.ts +93 -0
- package/src/preagg/{PreAggregator.js → PreAggregator.ts} +164 -145
- package/src/preagg/{preagg-columns.js → preagg-columns.ts} +27 -24
- package/src/preagg/{sufficient-statistics.js → sufficient-statistics.ts} +160 -110
- package/src/types.ts +24 -9
- package/src/util/{AsyncDispatch.js → AsyncDispatch.ts} +62 -43
- package/src/util/{cache.js → cache.ts} +25 -15
- package/src/util/decode-ipc.ts +15 -0
- package/src/util/{distinct.js → distinct.ts} +3 -3
- package/src/util/{field-info.js → field-info.ts} +31 -32
- package/src/util/{hash.js → hash.ts} +4 -4
- package/src/util/is-activatable.ts +11 -0
- package/src/util/is-arrow-table.ts +12 -0
- package/src/util/{js-type.js → js-type.ts} +7 -5
- package/src/util/{priority-queue.js → priority-queue.ts} +32 -20
- package/src/util/{query-result.js → query-result.ts} +24 -17
- package/src/util/synchronizer.ts +56 -0
- package/src/util/throttle.ts +59 -0
- package/src/util/to-data-columns.ts +65 -0
- package/src/util/void-logger.ts +23 -0
- package/src/Coordinator.js +0 -313
- package/src/Selection.js +0 -380
- package/src/SelectionClause.js +0 -159
- package/src/connectors/rest.js +0 -38
- package/src/index-types.ts +0 -5
- package/src/make-client.js +0 -101
- package/src/util/is-activatable.js +0 -8
- package/src/util/is-arrow-table.js +0 -10
- package/src/util/selection-types.ts +0 -137
- package/src/util/synchronizer.js +0 -47
- package/src/util/throttle.js +0 -54
- package/src/util/to-data-columns.js +0 -60
- package/src/util/void-logger.js +0 -13
- package/tsconfig.json +0 -9
- package/vitest.config.ts +0 -3
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import type { AggregateNode, ColumnRefNode, ExprNode } from '@uwdata/mosaic-sql';
|
|
2
2
|
import { and, argmax, argmin, coalesce, count, div, exp, isNotNull, ln, max, min, mul, pow, regrAvgX, regrAvgY, regrCount, sql, sqrt, sub, sum } from '@uwdata/mosaic-sql';
|
|
3
3
|
import { fnv_hash } from '../util/hash.js';
|
|
4
4
|
|
|
@@ -6,13 +6,18 @@ import { fnv_hash } from '../util/hash.js';
|
|
|
6
6
|
* Determine sufficient statistics to preaggregate the given node. This
|
|
7
7
|
* method populates the *preagg* and *aggrs* arguments with necessary
|
|
8
8
|
* information for preaggregation optimization.
|
|
9
|
-
* @param
|
|
10
|
-
* @param
|
|
9
|
+
* @param node An aggregate function.
|
|
10
|
+
* @param preagg Map of column names to
|
|
11
11
|
* expressions to include in the preaggregation table.
|
|
12
|
-
* @
|
|
12
|
+
* @param avg Global average query generator.
|
|
13
|
+
* @returns Output aggregate expression that uses preaggregated
|
|
13
14
|
* sufficient statistics to service updates.
|
|
14
15
|
*/
|
|
15
|
-
export function sufficientStatistics(
|
|
16
|
+
export function sufficientStatistics(
|
|
17
|
+
node: AggregateNode,
|
|
18
|
+
preagg: Record<string, ExprNode>,
|
|
19
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
20
|
+
): ExprNode | null {
|
|
16
21
|
switch (node.name) {
|
|
17
22
|
case 'count':
|
|
18
23
|
case 'count_star':
|
|
@@ -89,10 +94,10 @@ export function sufficientStatistics(node, preagg, avg) {
|
|
|
89
94
|
/**
|
|
90
95
|
* Generate a column name for the given aggregate node. The name is
|
|
91
96
|
* made from a hash of the string-serialized SQL expression.
|
|
92
|
-
* @param
|
|
93
|
-
* @returns
|
|
97
|
+
* @param node The aggregate node to name.
|
|
98
|
+
* @returns The generated column name.
|
|
94
99
|
*/
|
|
95
|
-
function colName(node) {
|
|
100
|
+
function colName(node: AggregateNode): string {
|
|
96
101
|
return 'pre_' + fnv_hash(`${node}`).toString(16);
|
|
97
102
|
}
|
|
98
103
|
|
|
@@ -100,13 +105,13 @@ function colName(node) {
|
|
|
100
105
|
* Add a sufficient statistic to the preaggregation column set.
|
|
101
106
|
* Generates a unique column name for the statistic and propagates
|
|
102
107
|
* a FILTER clause if one exists on the original aggregate node.
|
|
103
|
-
* @param
|
|
108
|
+
* @param preagg A map of columns (such as
|
|
104
109
|
* sufficient statistics) to pre-aggregate.
|
|
105
|
-
* @param
|
|
106
|
-
* @param
|
|
107
|
-
* @returns
|
|
110
|
+
* @param expr The aggregate statistic to add.
|
|
111
|
+
* @param node The originating aggregate function call.
|
|
112
|
+
* @returns The name of the statistic column.
|
|
108
113
|
*/
|
|
109
|
-
function addStat(preagg, expr, node) {
|
|
114
|
+
function addStat(preagg: Record<string, ExprNode>, expr: AggregateNode, node?: AggregateNode): string {
|
|
110
115
|
const filter = node?.filter;
|
|
111
116
|
if (filter) {
|
|
112
117
|
// push filter clause to preaggregate expr
|
|
@@ -123,13 +128,13 @@ function addStat(preagg, expr, node) {
|
|
|
123
128
|
* Generate an expression for calculating counts over data dimensions.
|
|
124
129
|
* As a side effect, this method adds a column to the input *preagg* object
|
|
125
130
|
* to track the count of non-null values per-partition.
|
|
126
|
-
* @param
|
|
131
|
+
* @param preagg A map of columns (such as
|
|
127
132
|
* sufficient statistics) to pre-aggregate.
|
|
128
|
-
* @param
|
|
129
|
-
* @returns
|
|
133
|
+
* @param node The originating aggregate function call.
|
|
134
|
+
* @returns An aggregate expression over
|
|
130
135
|
* pre-aggregated dimensions and associated column name.
|
|
131
136
|
*/
|
|
132
|
-
function countExpr(preagg, node) {
|
|
137
|
+
function countExpr(preagg: Record<string, ExprNode>, node: AggregateNode): { expr: ExprNode; name: string } {
|
|
133
138
|
const name = addStat(preagg, count(node.args[0]), node);
|
|
134
139
|
return { expr: coalesce(sum(name), 0), name };
|
|
135
140
|
}
|
|
@@ -138,23 +143,23 @@ function countExpr(preagg, node) {
|
|
|
138
143
|
* Generate an expression for calculating counts over data dimensions.
|
|
139
144
|
* The expression is a summation with an additional coalesce operation
|
|
140
145
|
* to map null sums to zero-valued counts.
|
|
141
|
-
* @param
|
|
146
|
+
* @param preagg A map of columns (such as
|
|
142
147
|
* sufficient statistics) to pre-aggregate.
|
|
143
|
-
* @param
|
|
144
|
-
* @returns
|
|
148
|
+
* @param node The originating aggregate function call.
|
|
149
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
145
150
|
*/
|
|
146
|
-
function sumCountExpr(preagg, node) {
|
|
151
|
+
function sumCountExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
147
152
|
return coalesce(sumExpr(preagg, node), 0);
|
|
148
153
|
}
|
|
149
154
|
|
|
150
155
|
/**
|
|
151
156
|
* Generate an expression for calculating sums over data dimensions.
|
|
152
|
-
* @param
|
|
157
|
+
* @param preagg A map of columns (such as
|
|
153
158
|
* sufficient statistics) to pre-aggregate.
|
|
154
|
-
* @param
|
|
155
|
-
* @returns
|
|
159
|
+
* @param node The originating aggregate function call.
|
|
160
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
156
161
|
*/
|
|
157
|
-
function sumExpr(preagg, node) {
|
|
162
|
+
function sumExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
158
163
|
return sum(addStat(preagg, node));
|
|
159
164
|
}
|
|
160
165
|
|
|
@@ -162,12 +167,12 @@ function sumExpr(preagg, node) {
|
|
|
162
167
|
* Generate an expression for calculating averages over data dimensions.
|
|
163
168
|
* As a side effect, this method adds a column to the input *preagg* object
|
|
164
169
|
* to track the count of non-null values per-partition.
|
|
165
|
-
* @param
|
|
170
|
+
* @param preagg A map of columns (such as
|
|
166
171
|
* sufficient statistics) to pre-aggregate.
|
|
167
|
-
* @param
|
|
168
|
-
* @returns
|
|
172
|
+
* @param node The originating aggregate function call.
|
|
173
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
169
174
|
*/
|
|
170
|
-
function avgExpr(preagg, node) {
|
|
175
|
+
function avgExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
171
176
|
const as = addStat(preagg, node);
|
|
172
177
|
const { expr, name } = countExpr(preagg, node);
|
|
173
178
|
return div(sum(mul(as, name)), expr);
|
|
@@ -179,12 +184,12 @@ function avgExpr(preagg, node) {
|
|
|
179
184
|
* geomean calculation uses two sufficient statistics: the sum of log values
|
|
180
185
|
* and the count of non-null values. As a side effect, this method adds columns
|
|
181
186
|
* for these statistics to the input *preagg* object.
|
|
182
|
-
* @param
|
|
187
|
+
* @param preagg A map of columns (such as
|
|
183
188
|
* sufficient statistics) to pre-aggregate.
|
|
184
|
-
* @param
|
|
185
|
-
* @returns
|
|
189
|
+
* @param node The originating aggregate function call.
|
|
190
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
186
191
|
*/
|
|
187
|
-
function geomeanExpr(preagg, node) {
|
|
192
|
+
function geomeanExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
188
193
|
const x = node.args[0];
|
|
189
194
|
const expr = addStat(preagg, sum(ln(x)), node);
|
|
190
195
|
const { expr: n } = countExpr(preagg, node);
|
|
@@ -195,12 +200,12 @@ function geomeanExpr(preagg, node) {
|
|
|
195
200
|
* Generate an expression for calculating argmax over data dimensions.
|
|
196
201
|
* As a side effect, this method adds a column to the input *preagg* object
|
|
197
202
|
* to track a maximum value per-partition.
|
|
198
|
-
* @param
|
|
203
|
+
* @param preagg A map of columns (such as
|
|
199
204
|
* sufficient statistics) to pre-aggregate.
|
|
200
|
-
* @param
|
|
201
|
-
* @returns
|
|
205
|
+
* @param node The originating aggregate function call.
|
|
206
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
202
207
|
*/
|
|
203
|
-
function argmaxExpr(preagg, node) {
|
|
208
|
+
function argmaxExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
204
209
|
const expr = addStat(preagg, node);
|
|
205
210
|
const maxy = addStat(preagg, max(node.args[1]), node);
|
|
206
211
|
return argmax(expr, maxy);
|
|
@@ -210,13 +215,12 @@ function argmaxExpr(preagg, node) {
|
|
|
210
215
|
* Generate an expression for calculating argmin over data dimensions.
|
|
211
216
|
* As a side effect, this method adds a column to the input *preagg* object
|
|
212
217
|
* to track a minimum value per-partition.
|
|
213
|
-
* @param
|
|
214
|
-
* sufficient statistics) to
|
|
215
|
-
* @param
|
|
216
|
-
*
|
|
217
|
-
* @returns {ExprNode} An aggregate expression over pre-aggregated dimensions.
|
|
218
|
+
* @param preagg A map of columns (such as
|
|
219
|
+
* sufficient statistics) to pre-aggregate.
|
|
220
|
+
* @param node The originating aggregate function call.
|
|
221
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
218
222
|
*/
|
|
219
|
-
function argminExpr(preagg, node) {
|
|
223
|
+
function argminExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
220
224
|
const expr = addStat(preagg, node);
|
|
221
225
|
const miny = addStat(preagg, min(node.args[1]), node);
|
|
222
226
|
return argmin(expr, miny);
|
|
@@ -230,19 +234,24 @@ function argminExpr(preagg, node) {
|
|
|
230
234
|
* the residual sum of squares and the sum of residual (mean-centered) values.
|
|
231
235
|
* As a side effect, this method adds columns for these statistics to the
|
|
232
236
|
* input *preagg* object.
|
|
233
|
-
* @param
|
|
237
|
+
* @param preagg A map of columns (such as
|
|
234
238
|
* sufficient statistics) to pre-aggregate.
|
|
235
|
-
* @param
|
|
236
|
-
* @param
|
|
237
|
-
* @param
|
|
239
|
+
* @param node The originating aggregate function call.
|
|
240
|
+
* @param avg Global average query generator.
|
|
241
|
+
* @param correction A flag for whether a Bessel
|
|
238
242
|
* correction should be applied to compute the sample variance
|
|
239
243
|
* rather than the populatation variance.
|
|
240
|
-
* @returns
|
|
244
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
241
245
|
*/
|
|
242
|
-
function varianceExpr(
|
|
246
|
+
function varianceExpr(
|
|
247
|
+
preagg: Record<string, ExprNode>,
|
|
248
|
+
node: AggregateNode,
|
|
249
|
+
avg: (field: ColumnRefNode) => ExprNode,
|
|
250
|
+
correction: boolean = true
|
|
251
|
+
): ExprNode {
|
|
243
252
|
const x = node.args[0];
|
|
244
253
|
const { expr: n } = countExpr(preagg, node);
|
|
245
|
-
const delta = sub(x, avg(x));
|
|
254
|
+
const delta = sub(x, avg(x as ColumnRefNode));
|
|
246
255
|
const rssq = addStat(preagg, sum(pow(delta, 2)), node); // residual sum of squares
|
|
247
256
|
const rsum = addStat(preagg, sum(delta), node); // residual sum
|
|
248
257
|
const denom = correction ? sub(n, 1) : n; // Bessel correction
|
|
@@ -256,17 +265,22 @@ function varianceExpr(preagg, node, avg, correction = true) {
|
|
|
256
265
|
* non-null value pairs, the sum of residual products, and residual sums
|
|
257
266
|
* (of mean-centered values) for x and y. As a side effect, this method
|
|
258
267
|
* adds columns for these statistics to the input *preagg* object.
|
|
259
|
-
* @param
|
|
268
|
+
* @param preagg A map of columns (such as
|
|
260
269
|
* sufficient statistics) to pre-aggregate.
|
|
261
|
-
* @param
|
|
262
|
-
* @param
|
|
263
|
-
* @param
|
|
270
|
+
* @param node The originating aggregate function call.
|
|
271
|
+
* @param avg Global average query generator.
|
|
272
|
+
* @param correction A flag for whether a Bessel
|
|
264
273
|
* correction should be applied to compute the sample covariance rather
|
|
265
274
|
* than the populatation covariance. If null, an expression for the
|
|
266
275
|
* unnormalized covariance (no division by sample count) is returned.
|
|
267
|
-
* @returns
|
|
276
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
268
277
|
*/
|
|
269
|
-
function covarianceExpr(
|
|
278
|
+
function covarianceExpr(
|
|
279
|
+
preagg: Record<string, ExprNode>,
|
|
280
|
+
node: AggregateNode,
|
|
281
|
+
avg: (field: ColumnRefNode) => ExprNode,
|
|
282
|
+
correction: boolean | null = true
|
|
283
|
+
): ExprNode {
|
|
270
284
|
const { expr: n } = regrCountExpr(preagg, node);
|
|
271
285
|
const sxy = regrSumXYExpr(preagg, node, avg);
|
|
272
286
|
const sx = regrSumExpr(preagg, 1, node, avg);
|
|
@@ -285,13 +299,17 @@ function covarianceExpr(preagg, node, avg, correction = true) {
|
|
|
285
299
|
* residual products, and both residual sums and sums of squares for x and y.
|
|
286
300
|
* As a side effect, this method adds columns for these statistics to the
|
|
287
301
|
* input *preagg* object.
|
|
288
|
-
* @param
|
|
302
|
+
* @param preagg A map of columns (such as
|
|
289
303
|
* sufficient statistics) to pre-aggregate.
|
|
290
|
-
* @param
|
|
291
|
-
* @param
|
|
292
|
-
* @returns
|
|
304
|
+
* @param node The originating aggregate function call.
|
|
305
|
+
* @param avg Global average query generator.
|
|
306
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
293
307
|
*/
|
|
294
|
-
function corrExpr(
|
|
308
|
+
function corrExpr(
|
|
309
|
+
preagg: Record<string, ExprNode>,
|
|
310
|
+
node: AggregateNode,
|
|
311
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
312
|
+
): ExprNode {
|
|
295
313
|
const { expr: n } = regrCountExpr(preagg, node);
|
|
296
314
|
const sxy = regrSumXYExpr(preagg, node, avg);
|
|
297
315
|
const sxx = regrSumSqExpr(preagg, 1, node, avg);
|
|
@@ -310,13 +328,13 @@ function corrExpr(preagg, node, avg) {
|
|
|
310
328
|
* Generate an expression for the count of non-null (x, y) pairs. As a side
|
|
311
329
|
* effect, this method adds columns to the input *preagg* object to the
|
|
312
330
|
* partition-level count of non-null pairs.
|
|
313
|
-
* @param
|
|
331
|
+
* @param preagg A map of columns (such as
|
|
314
332
|
* sufficient statistics) to pre-aggregate.
|
|
315
|
-
* @param
|
|
316
|
-
* @returns
|
|
333
|
+
* @param node The originating aggregate function call.
|
|
334
|
+
* @returns An aggregate expression over
|
|
317
335
|
* pre-aggregated dimensions and associated column name.
|
|
318
336
|
*/
|
|
319
|
-
function regrCountExpr(preagg, node) {
|
|
337
|
+
function regrCountExpr(preagg: Record<string, ExprNode>, node: AggregateNode): { expr: ExprNode; name: string } {
|
|
320
338
|
const [x, y] = node.args;
|
|
321
339
|
const n = addStat(preagg, regrCount(x, y), node);
|
|
322
340
|
return { expr: sum(n), name: n };
|
|
@@ -328,18 +346,23 @@ function regrCountExpr(preagg, node) {
|
|
|
328
346
|
* (x, y) pairs are included. This method uses mean-centered data to reduce
|
|
329
347
|
* floating point error. As a side effect, this method adds a column for
|
|
330
348
|
* partition-level sums to the input *preagg* object.
|
|
331
|
-
* @param
|
|
349
|
+
* @param preagg A map of columns (such as
|
|
332
350
|
* sufficient statistics) to pre-aggregate.
|
|
333
|
-
* @param
|
|
334
|
-
* @param
|
|
335
|
-
* @param
|
|
336
|
-
* @returns
|
|
351
|
+
* @param i An index indicating which argument column to sum.
|
|
352
|
+
* @param node The originating aggregate function call.
|
|
353
|
+
* @param avg Global average query generator.
|
|
354
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
337
355
|
*/
|
|
338
|
-
function regrSumExpr(
|
|
356
|
+
function regrSumExpr(
|
|
357
|
+
preagg: Record<string, ExprNode>,
|
|
358
|
+
i: number,
|
|
359
|
+
node: AggregateNode,
|
|
360
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
361
|
+
): ExprNode {
|
|
339
362
|
const args = node.args;
|
|
340
363
|
const v = args[i];
|
|
341
364
|
const o = args[1 - i];
|
|
342
|
-
const rsum = sum(sub(v, avg(v))).where(isNotNull(o));
|
|
365
|
+
const rsum = sum(sub(v, avg(v as ColumnRefNode))).where(isNotNull(o));
|
|
343
366
|
return sum(addStat(preagg, rsum, node));
|
|
344
367
|
}
|
|
345
368
|
|
|
@@ -349,18 +372,23 @@ function regrSumExpr(preagg, i, node, avg) {
|
|
|
349
372
|
* non-null (x, y) pairs are included. This method uses mean-centered data to
|
|
350
373
|
* reduce floating point error. As a side effect, this method adds a column
|
|
351
374
|
* for partition-level sums to the input *preagg* object.
|
|
352
|
-
* @param
|
|
375
|
+
* @param preagg A map of columns (such as
|
|
353
376
|
* sufficient statistics) to pre-aggregate.
|
|
354
|
-
* @param
|
|
355
|
-
* @param
|
|
356
|
-
* @param
|
|
357
|
-
* @returns
|
|
377
|
+
* @param i An index indicating which argument column to sum.
|
|
378
|
+
* @param node The originating aggregate function call.
|
|
379
|
+
* @param avg Global average query generator.
|
|
380
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
358
381
|
*/
|
|
359
|
-
function regrSumSqExpr(
|
|
382
|
+
function regrSumSqExpr(
|
|
383
|
+
preagg: Record<string, ExprNode>,
|
|
384
|
+
i: number,
|
|
385
|
+
node: AggregateNode,
|
|
386
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
387
|
+
): ExprNode {
|
|
360
388
|
const args = node.args;
|
|
361
389
|
const v = args[i];
|
|
362
390
|
const u = args[1 - i];
|
|
363
|
-
const ssq = sum(pow(sub(v, avg(v)), 2)).where(isNotNull(u));
|
|
391
|
+
const ssq = sum(pow(sub(v, avg(v as ColumnRefNode)), 2)).where(isNotNull(u));
|
|
364
392
|
return sum(addStat(preagg, ssq, node));
|
|
365
393
|
}
|
|
366
394
|
|
|
@@ -370,15 +398,24 @@ function regrSumSqExpr(preagg, i, node, avg) {
|
|
|
370
398
|
* non-null (x, y) pairs are included. This method uses mean-centered data to
|
|
371
399
|
* reduce floating point error. As a side effect, this method adds a column
|
|
372
400
|
* for partition-level sums to the input *preagg* object.
|
|
373
|
-
* @param
|
|
401
|
+
* @param preagg A map of columns (such as
|
|
374
402
|
* sufficient statistics) to pre-aggregate.
|
|
375
|
-
* @param
|
|
376
|
-
* @param
|
|
377
|
-
* @returns
|
|
403
|
+
* @param node The originating aggregate function call.
|
|
404
|
+
* @param avg Global average query generator.
|
|
405
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
378
406
|
*/
|
|
379
|
-
function regrSumXYExpr(
|
|
407
|
+
function regrSumXYExpr(
|
|
408
|
+
preagg: Record<string, ExprNode>,
|
|
409
|
+
node: AggregateNode,
|
|
410
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
411
|
+
): ExprNode {
|
|
380
412
|
const [y, x] = node.args;
|
|
381
|
-
const sxy = sum(
|
|
413
|
+
const sxy = sum(
|
|
414
|
+
mul(
|
|
415
|
+
sub(x, avg(x as ColumnRefNode)),
|
|
416
|
+
sub(y, avg(y as ColumnRefNode))
|
|
417
|
+
)
|
|
418
|
+
);
|
|
382
419
|
return sum(addStat(preagg, sxy, node));
|
|
383
420
|
}
|
|
384
421
|
|
|
@@ -387,12 +424,12 @@ function regrSumXYExpr(preagg, node, avg) {
|
|
|
387
424
|
* Only values corresponding to non-null (x, y) pairs are included. As a side
|
|
388
425
|
* effect, this method adds columns to the input *preagg* object to track both
|
|
389
426
|
* the count of non-null pairs and partition-level averages.
|
|
390
|
-
* @param
|
|
427
|
+
* @param preagg A map of columns (such as
|
|
391
428
|
* sufficient statistics) to pre-aggregate.
|
|
392
|
-
* @param
|
|
393
|
-
* @returns
|
|
429
|
+
* @param node The originating aggregate function call.
|
|
430
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
394
431
|
*/
|
|
395
|
-
function regrAvgXExpr(preagg, node) {
|
|
432
|
+
function regrAvgXExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
396
433
|
const [y, x] = node.args;
|
|
397
434
|
const { expr: n, name } = regrCountExpr(preagg, node);
|
|
398
435
|
const a = addStat(preagg, regrAvgX(y, x), node);
|
|
@@ -404,12 +441,12 @@ function regrAvgXExpr(preagg, node) {
|
|
|
404
441
|
* Only values corresponding to non-null (x, y) pairs are included. As a side
|
|
405
442
|
* effect, this method adds columns to the input *preagg* object to track both
|
|
406
443
|
* the count of non-null pairs and partition-level averages.
|
|
407
|
-
* @param
|
|
444
|
+
* @param preagg A map of columns (such as
|
|
408
445
|
* sufficient statistics) to pre-aggregate.
|
|
409
|
-
* @param
|
|
410
|
-
* @returns
|
|
446
|
+
* @param node The originating aggregate function call.
|
|
447
|
+
* @returns An aggregate expression over pre-aggregated dimensions.
|
|
411
448
|
*/
|
|
412
|
-
function regrAvgYExpr(preagg, node) {
|
|
449
|
+
function regrAvgYExpr(preagg: Record<string, ExprNode>, node: AggregateNode): ExprNode {
|
|
413
450
|
const [y, x] = node.args;
|
|
414
451
|
const { expr: n, name } = regrCountExpr(preagg, node);
|
|
415
452
|
const a = addStat(preagg, regrAvgY(y, x), node);
|
|
@@ -422,15 +459,20 @@ function regrAvgYExpr(preagg, node) {
|
|
|
422
459
|
* non-null (x, y) pairs are included. This method uses mean-centered data to
|
|
423
460
|
* reduce floating point error. As a side effect, this method adds columns
|
|
424
461
|
* for partition-level count and sums to the input *preagg* object.
|
|
425
|
-
* @param
|
|
462
|
+
* @param preagg A map of columns (such as
|
|
426
463
|
* sufficient statistics) to pre-aggregate.
|
|
427
|
-
* @param
|
|
428
|
-
* @param
|
|
429
|
-
* @param
|
|
430
|
-
* @returns
|
|
464
|
+
* @param i The index of the argument to compute the variance for.
|
|
465
|
+
* @param node The originating aggregate function call.
|
|
466
|
+
* @param avg Global average query generator.
|
|
467
|
+
* @returns An aggregate expression for calculating variance
|
|
431
468
|
* over pre-aggregated data dimensions.
|
|
432
469
|
*/
|
|
433
|
-
function regrVarExpr(
|
|
470
|
+
function regrVarExpr(
|
|
471
|
+
preagg: Record<string, ExprNode>,
|
|
472
|
+
i: number,
|
|
473
|
+
node: AggregateNode,
|
|
474
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
475
|
+
): ExprNode {
|
|
434
476
|
const { expr: n } = regrCountExpr(preagg, node);
|
|
435
477
|
const sum = regrSumExpr(preagg, i, node, avg);
|
|
436
478
|
const ssq = regrSumSqExpr(preagg, i, node, avg);
|
|
@@ -442,14 +484,18 @@ function regrVarExpr(preagg, i, node, avg) {
|
|
|
442
484
|
* computed as the covariance divided by the variance of the x variable. As a
|
|
443
485
|
* side effect, this method adds columns for sufficient statistics to the
|
|
444
486
|
* input *preagg* object.
|
|
445
|
-
* @param
|
|
487
|
+
* @param preagg A map of columns (such as
|
|
446
488
|
* sufficient statistics) to pre-aggregate.
|
|
447
|
-
* @param
|
|
448
|
-
* @param
|
|
449
|
-
* @returns
|
|
489
|
+
* @param node The originating aggregate function call.
|
|
490
|
+
* @param avg Global average query generator.
|
|
491
|
+
* @returns An aggregate expression for calculating regression
|
|
450
492
|
* slopes over pre-aggregated data dimensions.
|
|
451
493
|
*/
|
|
452
|
-
function regrSlopeExpr(
|
|
494
|
+
function regrSlopeExpr(
|
|
495
|
+
preagg: Record<string, ExprNode>,
|
|
496
|
+
node: AggregateNode,
|
|
497
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
498
|
+
): ExprNode {
|
|
453
499
|
const cov = covarianceExpr(preagg, node, avg, null);
|
|
454
500
|
const varx = regrVarExpr(preagg, 1, node, avg);
|
|
455
501
|
return div(cov, varx);
|
|
@@ -460,16 +506,20 @@ function regrSlopeExpr(preagg, node, avg) {
|
|
|
460
506
|
* is derived from the regression slope and average x and y values. As a
|
|
461
507
|
* side effect, this method adds columns for sufficient statistics to the
|
|
462
508
|
* input *preagg* object.
|
|
463
|
-
* @param
|
|
509
|
+
* @param preagg A map of columns (such as
|
|
464
510
|
* sufficient statistics) to pre-aggregate.
|
|
465
|
-
* @param
|
|
466
|
-
* @param
|
|
467
|
-
* @returns
|
|
511
|
+
* @param node The originating aggregate function call.
|
|
512
|
+
* @param avg Global average query generator.
|
|
513
|
+
* @returns An aggregate expression for calculating regression
|
|
468
514
|
* intercepts over pre-aggregated data dimensions.
|
|
469
515
|
*/
|
|
470
|
-
function regrInterceptExpr(
|
|
516
|
+
function regrInterceptExpr(
|
|
517
|
+
preagg: Record<string, ExprNode>,
|
|
518
|
+
node: AggregateNode,
|
|
519
|
+
avg: (field: ColumnRefNode) => ExprNode
|
|
520
|
+
): ExprNode {
|
|
471
521
|
const ax = regrAvgXExpr(preagg, node);
|
|
472
522
|
const ay = regrAvgYExpr(preagg, node);
|
|
473
523
|
const m = regrSlopeExpr(preagg, node, avg);
|
|
474
524
|
return sub(ay, mul(m, ax));
|
|
475
|
-
}
|
|
525
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -1,4 +1,19 @@
|
|
|
1
1
|
import type { DescribeQuery, ExprNode, Query } from '@uwdata/mosaic-sql';
|
|
2
|
+
import type { QueryResult } from './util/query-result.js';
|
|
3
|
+
|
|
4
|
+
/** Type for a query request. */
|
|
5
|
+
export interface QueryRequest {
|
|
6
|
+
type: 'exec' | 'json' | 'arrow';
|
|
7
|
+
query: string | Query | DescribeQuery;
|
|
8
|
+
cache?: boolean;
|
|
9
|
+
options?: Record<string, unknown>;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/** Type for an entry within a query manager. */
|
|
13
|
+
export interface QueryEntry {
|
|
14
|
+
request: QueryRequest;
|
|
15
|
+
result: QueryResult;
|
|
16
|
+
}
|
|
2
17
|
|
|
3
18
|
/** Query type accepted by a coordinator. */
|
|
4
19
|
export type QueryType =
|
|
@@ -67,8 +82,8 @@ export interface Activatable {
|
|
|
67
82
|
* Interface for cache implementations.
|
|
68
83
|
*/
|
|
69
84
|
export interface Cache {
|
|
70
|
-
get(key: string):
|
|
71
|
-
set(key: string, value:
|
|
85
|
+
get(key: string): unknown;
|
|
86
|
+
set(key: string, value: unknown): unknown;
|
|
72
87
|
clear(): void;
|
|
73
88
|
}
|
|
74
89
|
|
|
@@ -76,12 +91,12 @@ export interface Cache {
|
|
|
76
91
|
* Interface for logger implementations
|
|
77
92
|
*/
|
|
78
93
|
export interface Logger {
|
|
79
|
-
debug(...args:
|
|
80
|
-
info(...args:
|
|
81
|
-
log(...args:
|
|
82
|
-
warn(...args:
|
|
83
|
-
error(...args:
|
|
84
|
-
group(label?:
|
|
85
|
-
groupCollapsed(label?:
|
|
94
|
+
debug(...args: unknown[]): void;
|
|
95
|
+
info(...args: unknown[]): void;
|
|
96
|
+
log(...args: unknown[]): void;
|
|
97
|
+
warn(...args: unknown[]): void;
|
|
98
|
+
error(...args: unknown[]): void;
|
|
99
|
+
group(label?: unknown): void;
|
|
100
|
+
groupCollapsed(label?: unknown): void;
|
|
86
101
|
groupEnd(): void;
|
|
87
102
|
}
|