@uwdata/mosaic-core 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -0
- package/dist/mosaic-core.js +233 -146
- package/dist/mosaic-core.min.js +8 -8
- package/package.json +4 -4
- package/src/Coordinator.js +4 -6
- package/src/QueryConsolidator.js +38 -15
- package/src/index.js +6 -0
- package/src/util/convert-arrow.js +148 -0
- package/src/util/field-info.js +80 -0
- package/src/util/js-type.js +9 -1
- package/src/Catalog.js +0 -88
- package/src/util/summarize.js +0 -23
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uwdata/mosaic-core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Scalable and extensible linked data views.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mosaic",
|
|
@@ -28,9 +28,9 @@
|
|
|
28
28
|
"prepublishOnly": "npm run test && npm run lint && npm run build"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@duckdb/duckdb-wasm": "^1.28.1-
|
|
32
|
-
"@uwdata/mosaic-sql": "^0.
|
|
31
|
+
"@duckdb/duckdb-wasm": "^1.28.1-dev109.0",
|
|
32
|
+
"@uwdata/mosaic-sql": "^0.7.0",
|
|
33
33
|
"apache-arrow": "^15.0.0"
|
|
34
34
|
},
|
|
35
|
-
"gitHead": "
|
|
35
|
+
"gitHead": "4680b922f15579b7b527f31507ed71a12230ec35"
|
|
36
36
|
}
|
package/src/Coordinator.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { socketConnector } from './connectors/socket.js';
|
|
2
|
-
import { Catalog } from './Catalog.js';
|
|
3
2
|
import { FilterGroup } from './FilterGroup.js';
|
|
4
3
|
import { QueryManager, Priority } from './QueryManager.js';
|
|
4
|
+
import { queryFieldInfo } from './util/field-info.js';
|
|
5
5
|
import { voidLogger } from './util/void-logger.js';
|
|
6
6
|
|
|
7
7
|
let _instance;
|
|
@@ -27,7 +27,6 @@ export class Coordinator {
|
|
|
27
27
|
logger = console,
|
|
28
28
|
manager = QueryManager()
|
|
29
29
|
} = options;
|
|
30
|
-
this.catalog = new Catalog(this);
|
|
31
30
|
this.manager = manager;
|
|
32
31
|
this.logger(logger);
|
|
33
32
|
this.configure(options);
|
|
@@ -49,7 +48,7 @@ export class Coordinator {
|
|
|
49
48
|
this.indexes = indexes;
|
|
50
49
|
}
|
|
51
50
|
|
|
52
|
-
clear({ clients = true, cache = true
|
|
51
|
+
clear({ clients = true, cache = true } = {}) {
|
|
53
52
|
this.manager.clear();
|
|
54
53
|
if (clients) {
|
|
55
54
|
this.clients?.forEach(client => this.disconnect(client));
|
|
@@ -58,7 +57,6 @@ export class Coordinator {
|
|
|
58
57
|
this.filterGroups = new Map;
|
|
59
58
|
}
|
|
60
59
|
if (cache) this.manager.cache().clear();
|
|
61
|
-
if (catalog) this.catalog.clear();
|
|
62
60
|
}
|
|
63
61
|
|
|
64
62
|
databaseConnector(db) {
|
|
@@ -122,7 +120,7 @@ export class Coordinator {
|
|
|
122
120
|
* @param {import('./MosaicClient.js').MosaicClient} client the client to disconnect
|
|
123
121
|
*/
|
|
124
122
|
async connect(client) {
|
|
125
|
-
const {
|
|
123
|
+
const { clients, filterGroups, indexes } = this;
|
|
126
124
|
|
|
127
125
|
if (clients.has(client)) {
|
|
128
126
|
throw new Error('Client already connected.');
|
|
@@ -133,7 +131,7 @@ export class Coordinator {
|
|
|
133
131
|
// retrieve field statistics
|
|
134
132
|
const fields = client.fields();
|
|
135
133
|
if (fields?.length) {
|
|
136
|
-
client.fieldInfo(await
|
|
134
|
+
client.fieldInfo(await queryFieldInfo(this, fields));
|
|
137
135
|
}
|
|
138
136
|
|
|
139
137
|
// connect filters
|
package/src/QueryConsolidator.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Query, Ref } from '@uwdata/mosaic-sql';
|
|
1
|
+
import { Query, Ref, isDescribeQuery } from '@uwdata/mosaic-sql';
|
|
2
2
|
import { queryResult } from './util/query-result.js';
|
|
3
3
|
|
|
4
4
|
function wait(callback) {
|
|
@@ -125,7 +125,7 @@ function consolidate(group, enqueue, record) {
|
|
|
125
125
|
type: 'arrow',
|
|
126
126
|
cache: false,
|
|
127
127
|
record: false,
|
|
128
|
-
query: consolidatedQuery(group, record)
|
|
128
|
+
query: (group.query = consolidatedQuery(group, record))
|
|
129
129
|
},
|
|
130
130
|
result: (group.result = queryResult())
|
|
131
131
|
});
|
|
@@ -192,7 +192,7 @@ function consolidatedQuery(group, record) {
|
|
|
192
192
|
query.$groupby(groupby.map(e => (e instanceof Ref && map[e.column]) || e));
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
-
// update select
|
|
195
|
+
// update select statement and return
|
|
196
196
|
return query.$select(Array.from(fields.values()));
|
|
197
197
|
}
|
|
198
198
|
|
|
@@ -202,9 +202,13 @@ function consolidatedQuery(group, record) {
|
|
|
202
202
|
* @param {*} cache Client-side query cache (sql -> data)
|
|
203
203
|
*/
|
|
204
204
|
async function processResults(group, cache) {
|
|
205
|
-
const { maps, result } = group;
|
|
206
|
-
if (!maps) return; // no consolidation performed
|
|
205
|
+
const { maps, query, result } = group;
|
|
207
206
|
|
|
207
|
+
// exit early if no consolidation performed
|
|
208
|
+
// in this case results are passed directly
|
|
209
|
+
if (!maps) return;
|
|
210
|
+
|
|
211
|
+
// await consolidated query result, pass errors if needed
|
|
208
212
|
let data;
|
|
209
213
|
try {
|
|
210
214
|
data = await result;
|
|
@@ -216,13 +220,19 @@ async function processResults(group, cache) {
|
|
|
216
220
|
return;
|
|
217
221
|
}
|
|
218
222
|
|
|
223
|
+
// extract result for each query in the consolidation group
|
|
224
|
+
// update cache and pass extract to original issuer
|
|
225
|
+
const describe = isDescribeQuery(query);
|
|
219
226
|
group.forEach(({ entry }, index) => {
|
|
220
227
|
const { request, result } = entry;
|
|
221
|
-
const
|
|
228
|
+
const map = maps[index];
|
|
229
|
+
const extract = describe && map ? filterResult(data, map)
|
|
230
|
+
: map ? projectResult(data, map)
|
|
231
|
+
: data;
|
|
222
232
|
if (request.cache) {
|
|
223
|
-
cache.set(String(request.query),
|
|
233
|
+
cache.set(String(request.query), extract);
|
|
224
234
|
}
|
|
225
|
-
result.fulfill(
|
|
235
|
+
result.fulfill(extract);
|
|
226
236
|
});
|
|
227
237
|
}
|
|
228
238
|
|
|
@@ -233,13 +243,26 @@ async function processResults(group, cache) {
|
|
|
233
243
|
* @returns the projected Apache Arrow table
|
|
234
244
|
*/
|
|
235
245
|
function projectResult(data, map) {
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
246
|
+
const cols = {};
|
|
247
|
+
for (const [name, as] of map) {
|
|
248
|
+
cols[as] = data.getChild(name);
|
|
249
|
+
}
|
|
250
|
+
return new data.constructor(cols);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Filter a consolidated describe query result to a client result
|
|
255
|
+
* @param {*} data Consolidated query result
|
|
256
|
+
* @param {*} map Column name map as [source, target] pairs
|
|
257
|
+
* @returns the filtered table data
|
|
258
|
+
*/
|
|
259
|
+
function filterResult(data, map) {
|
|
260
|
+
const lookup = new Map(map);
|
|
261
|
+
const result = [];
|
|
262
|
+
for (const d of data) {
|
|
263
|
+
if (lookup.has(d.column_name)) {
|
|
264
|
+
result.push({ ...d, column_name: lookup.get(d.column_name) })
|
|
240
265
|
}
|
|
241
|
-
return new data.constructor(cols);
|
|
242
|
-
} else {
|
|
243
|
-
return data;
|
|
244
266
|
}
|
|
267
|
+
return result;
|
|
245
268
|
}
|
package/src/index.js
CHANGED
|
@@ -8,6 +8,12 @@ export { restConnector } from './connectors/rest.js';
|
|
|
8
8
|
export { socketConnector } from './connectors/socket.js';
|
|
9
9
|
export { wasmConnector } from './connectors/wasm.js';
|
|
10
10
|
|
|
11
|
+
export {
|
|
12
|
+
isArrowTable,
|
|
13
|
+
convertArrowArrayType,
|
|
14
|
+
convertArrowValue,
|
|
15
|
+
convertArrowColumn
|
|
16
|
+
} from './util/convert-arrow.js'
|
|
11
17
|
export { distinct } from './util/distinct.js';
|
|
12
18
|
export { synchronizer } from './util/synchronizer.js';
|
|
13
19
|
export { throttle } from './util/throttle.js';
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// arrow type ids
|
|
2
|
+
const INTEGER = 2;
|
|
3
|
+
const FLOAT = 3;
|
|
4
|
+
const DECIMAL = 7;
|
|
5
|
+
const TIMESTAMP = 10;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Test if a value is an Apache Arrow table.
|
|
9
|
+
* As sometimes multiple Arrow versions may be used simultaneously,
|
|
10
|
+
* we use a "duck typing" approach and check for a getChild function.
|
|
11
|
+
* @param {*} values The value to test
|
|
12
|
+
* @returns true if the value duck types as Apache Arrow data
|
|
13
|
+
*/
|
|
14
|
+
export function isArrowTable(values) {
|
|
15
|
+
return typeof values?.getChild === 'function';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Return a JavaScript array type for an Apache Arrow column type.
|
|
20
|
+
* @param {*} type an Apache Arrow column type
|
|
21
|
+
* @returns a JavaScript array constructor
|
|
22
|
+
*/
|
|
23
|
+
export function convertArrowArrayType(type) {
|
|
24
|
+
switch (type.typeId) {
|
|
25
|
+
case INTEGER:
|
|
26
|
+
case FLOAT:
|
|
27
|
+
case DECIMAL:
|
|
28
|
+
return Float64Array;
|
|
29
|
+
default:
|
|
30
|
+
return Array;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Return a function that converts Apache Arrow values to JavaScript values.
|
|
36
|
+
* Timestamps are converted to Date values.
|
|
37
|
+
* Large integers (BigInt) are converted to Float64 numbers.
|
|
38
|
+
* Fixed-point decimal values are convert to Float64 numbers.
|
|
39
|
+
* Otherwise, the default Arrow values are used.
|
|
40
|
+
* @param {*} type an Apache Arrow column type
|
|
41
|
+
* @returns a value conversion function
|
|
42
|
+
*/
|
|
43
|
+
export function convertArrowValue(type) {
|
|
44
|
+
const { typeId } = type;
|
|
45
|
+
|
|
46
|
+
// map timestamp numbers to date objects
|
|
47
|
+
if (typeId === TIMESTAMP) {
|
|
48
|
+
return v => v == null ? v : new Date(v);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// map bigint to number
|
|
52
|
+
if (typeId === INTEGER && type.bitWidth >= 64) {
|
|
53
|
+
return v => v == null ? v : Number(v);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// map decimal to number
|
|
57
|
+
if (typeId === DECIMAL) {
|
|
58
|
+
const scale = 1 / Math.pow(10, type.scale);
|
|
59
|
+
return v => v == null ? v : decimalToNumber(v, scale);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// otherwise use Arrow JS defaults
|
|
63
|
+
return v => v;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Convert an Apache Arrow column to a JavaScript array.
|
|
68
|
+
* Timestamps are converted to Date values.
|
|
69
|
+
* Large integers (BigInt) are converted to Float64 numbers.
|
|
70
|
+
* Fixed-point decimal values are convert to Float64 numbers.
|
|
71
|
+
* Otherwise, the default Arrow values are used.
|
|
72
|
+
* @param {*} column An Apache Arrow column
|
|
73
|
+
* @returns an array of values
|
|
74
|
+
*/
|
|
75
|
+
export function convertArrowColumn(column) {
|
|
76
|
+
const { type } = column;
|
|
77
|
+
const { typeId } = type;
|
|
78
|
+
|
|
79
|
+
// map timestamp numbers to date objects
|
|
80
|
+
if (typeId === TIMESTAMP) {
|
|
81
|
+
const size = column.length;
|
|
82
|
+
const array = new Array(size);
|
|
83
|
+
for (let row = 0; row < size; ++row) {
|
|
84
|
+
const v = column.get(row);
|
|
85
|
+
array[row] = v == null ? null : new Date(v);
|
|
86
|
+
}
|
|
87
|
+
return array;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// map bigint to number
|
|
91
|
+
if (typeId === INTEGER && type.bitWidth >= 64) {
|
|
92
|
+
const size = column.length;
|
|
93
|
+
const array = new Float64Array(size);
|
|
94
|
+
for (let row = 0; row < size; ++row) {
|
|
95
|
+
const v = column.get(row);
|
|
96
|
+
array[row] = v == null ? NaN : Number(v);
|
|
97
|
+
}
|
|
98
|
+
return array;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// map decimal to number
|
|
102
|
+
if (typeId === DECIMAL) {
|
|
103
|
+
const scale = 1 / Math.pow(10, type.scale);
|
|
104
|
+
const size = column.length;
|
|
105
|
+
const array = new Float64Array(size);
|
|
106
|
+
for (let row = 0; row < size; ++row) {
|
|
107
|
+
const v = column.get(row);
|
|
108
|
+
array[row] = v == null ? NaN : decimalToNumber(v, scale);
|
|
109
|
+
}
|
|
110
|
+
return array;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// otherwise use Arrow JS defaults
|
|
114
|
+
return column.toArray();
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// generate base values for big integers
|
|
118
|
+
// represented within a Uint32Array
|
|
119
|
+
const BASE32 = Array.from(
|
|
120
|
+
{ length: 8 },
|
|
121
|
+
(_, i) => Math.pow(2, i * 32)
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Convert a fixed point decimal value to a double precision number.
|
|
126
|
+
* Note: if the value is sufficiently large the conversion may be lossy!
|
|
127
|
+
* @param {Uint32Array} v a fixed decimal value
|
|
128
|
+
* @param {number} scale a scale factor, corresponding to the
|
|
129
|
+
* number of fractional decimal digits in the fixed point value
|
|
130
|
+
* @returns the resulting number
|
|
131
|
+
*/
|
|
132
|
+
function decimalToNumber(v, scale) {
|
|
133
|
+
const n = v.length;
|
|
134
|
+
let x = 0;
|
|
135
|
+
|
|
136
|
+
if (v.signed && (v[n-1]|0) < 0) {
|
|
137
|
+
for (let i = 0; i < n; ++i) {
|
|
138
|
+
x += ~(v[i] | 0) * BASE32[i];
|
|
139
|
+
}
|
|
140
|
+
x = -(x + 1);
|
|
141
|
+
} else {
|
|
142
|
+
for (let i = 0; i < n; ++i) {
|
|
143
|
+
x += v[i] * BASE32[i];
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return x * scale;
|
|
148
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { Query, asRelation, count, isNull, max, min, sql } from '@uwdata/mosaic-sql';
|
|
2
|
+
import { jsType } from './js-type.js';
|
|
3
|
+
import { convertArrowValue } from './convert-arrow.js';
|
|
4
|
+
|
|
5
|
+
export const Count = 'count';
|
|
6
|
+
export const Nulls = 'nulls';
|
|
7
|
+
export const Max = 'max';
|
|
8
|
+
export const Min = 'min';
|
|
9
|
+
export const Distinct = 'distinct';
|
|
10
|
+
export const Stats = { Count, Nulls, Max, Min, Distinct };
|
|
11
|
+
|
|
12
|
+
const statMap = {
|
|
13
|
+
[Count]: count,
|
|
14
|
+
[Distinct]: column => count(column).distinct(),
|
|
15
|
+
[Max]: max,
|
|
16
|
+
[Min]: min,
|
|
17
|
+
[Nulls]: column => count().where(isNull(column))
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
function summarize(table, column, stats) {
|
|
21
|
+
return Query
|
|
22
|
+
.from(table)
|
|
23
|
+
.select(Array.from(stats, s => [s, statMap[s](column)]));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export async function queryFieldInfo(mc, fields) {
|
|
27
|
+
if (fields.length === 1 && `${fields[0].column}` === '*') {
|
|
28
|
+
return getTableInfo(mc, fields[0].table);
|
|
29
|
+
} else {
|
|
30
|
+
return (await Promise
|
|
31
|
+
.all(fields.map(f => getFieldInfo(mc, f))))
|
|
32
|
+
.filter(x => x);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function getFieldInfo(mc, { table, column, stats }) {
|
|
37
|
+
// generate and issue a query for field metadata info
|
|
38
|
+
// use GROUP BY ALL to differentiate & consolidate aggregates
|
|
39
|
+
const q = Query.from({ source: table })
|
|
40
|
+
.select({ column })
|
|
41
|
+
.groupby(column.aggregate ? sql`ALL` : []);
|
|
42
|
+
const [desc] = Array.from(await mc.query(Query.describe(q)));
|
|
43
|
+
const info = {
|
|
44
|
+
table,
|
|
45
|
+
column: `${column}`,
|
|
46
|
+
sqlType: desc.column_type,
|
|
47
|
+
type: jsType(desc.column_type),
|
|
48
|
+
nullable: desc.null === 'YES'
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
// no need for summary statistics
|
|
52
|
+
if (!(stats?.length || stats?.size)) return info;
|
|
53
|
+
|
|
54
|
+
// query for summary stats
|
|
55
|
+
const result = await mc.query(
|
|
56
|
+
summarize(table, column, stats),
|
|
57
|
+
{ persist: true }
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
// extract summary stats, copy to field info
|
|
61
|
+
for (let i = 0; i < result.numCols; ++i) {
|
|
62
|
+
const { name } = result.schema.fields[i];
|
|
63
|
+
const child = result.getChildAt(i);
|
|
64
|
+
const convert = convertArrowValue(child.type);
|
|
65
|
+
info[name] = convert(child.get(0));
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return info;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async function getTableInfo(mc, table) {
|
|
72
|
+
const result = await mc.query(`DESCRIBE ${asRelation(table)}`);
|
|
73
|
+
return Array.from(result).map(desc => ({
|
|
74
|
+
table,
|
|
75
|
+
column: desc.column_name,
|
|
76
|
+
sqlType: desc.column_type,
|
|
77
|
+
type: jsType(desc.column_type),
|
|
78
|
+
nullable: desc.null === 'YES'
|
|
79
|
+
}));
|
|
80
|
+
}
|
package/src/util/js-type.js
CHANGED
|
@@ -12,7 +12,6 @@ export function jsType(type) {
|
|
|
12
12
|
case 'DOUBLE':
|
|
13
13
|
case 'FLOAT':
|
|
14
14
|
case 'REAL':
|
|
15
|
-
case 'DECIMAL':
|
|
16
15
|
return 'number';
|
|
17
16
|
case 'DATE':
|
|
18
17
|
case 'TIMESTAMP':
|
|
@@ -26,13 +25,22 @@ export function jsType(type) {
|
|
|
26
25
|
case 'VARCHAR':
|
|
27
26
|
case 'UUID':
|
|
28
27
|
return 'string';
|
|
28
|
+
case 'ARRAY':
|
|
29
29
|
case 'LIST':
|
|
30
30
|
return 'array';
|
|
31
31
|
case 'BLOB':
|
|
32
32
|
case 'STRUCT':
|
|
33
33
|
case 'MAP':
|
|
34
|
+
case 'GEOMETRY':
|
|
34
35
|
return 'object';
|
|
35
36
|
default:
|
|
37
|
+
if (type.startsWith('DECIMAL')) {
|
|
38
|
+
return 'number';
|
|
39
|
+
} else if (type.startsWith('STRUCT') || type.startsWith('MAP')) {
|
|
40
|
+
return 'object';
|
|
41
|
+
} else if (type.endsWith(']')) {
|
|
42
|
+
return 'array';
|
|
43
|
+
}
|
|
36
44
|
throw new Error(`Unsupported type: ${type}`);
|
|
37
45
|
}
|
|
38
46
|
}
|
package/src/Catalog.js
DELETED
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import { asRelation } from '@uwdata/mosaic-sql';
|
|
2
|
-
import { jsType } from './util/js-type.js';
|
|
3
|
-
import { summarize } from './util/summarize.js';
|
|
4
|
-
|
|
5
|
-
const object = () => Object.create(null);
|
|
6
|
-
|
|
7
|
-
export class Catalog {
|
|
8
|
-
constructor(coordinator) {
|
|
9
|
-
/** @type {import('@uwdata/mosaic-core').Coordinator} */
|
|
10
|
-
this.mc = coordinator;
|
|
11
|
-
this.clear();
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
clear() {
|
|
15
|
-
this.tables = object();
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
tableInfo(table) {
|
|
19
|
-
const cache = this.tables;
|
|
20
|
-
if (cache[table]) {
|
|
21
|
-
return cache[table];
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
const infoPromise = getTableInfo(this.mc, table)
|
|
25
|
-
.catch(err => { cache[table] = null; throw err; });
|
|
26
|
-
|
|
27
|
-
return (cache[table] = infoPromise);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
async fieldInfo({ table, column, stats }) {
|
|
31
|
-
const tableInfo = await this.tableInfo(table);
|
|
32
|
-
const colInfo = tableInfo[column];
|
|
33
|
-
|
|
34
|
-
// column does not exist
|
|
35
|
-
if (colInfo == null) return;
|
|
36
|
-
|
|
37
|
-
// no need for summary statistics
|
|
38
|
-
if (!stats?.length) return colInfo;
|
|
39
|
-
|
|
40
|
-
const result = await this.mc.query(
|
|
41
|
-
summarize(colInfo, stats),
|
|
42
|
-
{ persist: true }
|
|
43
|
-
);
|
|
44
|
-
const info = { ...colInfo, ...(Array.from(result)[0]) };
|
|
45
|
-
|
|
46
|
-
// coerce bigint to number
|
|
47
|
-
for (const key in info) {
|
|
48
|
-
const value = info[key];
|
|
49
|
-
if (typeof value === 'bigint') {
|
|
50
|
-
info[key] = Number(value);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return info;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
async queryFields(fields) {
|
|
58
|
-
const list = await resolveFields(this, fields);
|
|
59
|
-
const data = await Promise.all(list.map(f => this.fieldInfo(f)));
|
|
60
|
-
return data.filter(x => x);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
async function getTableInfo(mc, table) {
|
|
65
|
-
const result = await mc.query(
|
|
66
|
-
`DESCRIBE ${asRelation(table)}`,
|
|
67
|
-
{ type: 'json', cache: false }
|
|
68
|
-
);
|
|
69
|
-
|
|
70
|
-
const columns = object();
|
|
71
|
-
for (const entry of result) {
|
|
72
|
-
columns[entry.column_name] = {
|
|
73
|
-
table,
|
|
74
|
-
column: entry.column_name,
|
|
75
|
-
sqlType: entry.column_type,
|
|
76
|
-
type: jsType(entry.column_type),
|
|
77
|
-
nullable: entry.null === 'YES'
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
return columns;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
async function resolveFields(catalog, list) {
|
|
85
|
-
return list.length === 1 && list[0].column === '*'
|
|
86
|
-
? Object.values(await catalog.tableInfo(list[0].table))
|
|
87
|
-
: list;
|
|
88
|
-
}
|
package/src/util/summarize.js
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { Query, count, isNull, max, min } from '@uwdata/mosaic-sql';
|
|
2
|
-
|
|
3
|
-
export const Count = 'count';
|
|
4
|
-
export const Nulls = 'nulls';
|
|
5
|
-
export const Max = 'max';
|
|
6
|
-
export const Min = 'min';
|
|
7
|
-
export const Distinct = 'distinct';
|
|
8
|
-
|
|
9
|
-
export const Stats = { Count, Nulls, Max, Min, Distinct };
|
|
10
|
-
|
|
11
|
-
export const statMap = {
|
|
12
|
-
[Count]: count,
|
|
13
|
-
[Distinct]: column => count(column).distinct(),
|
|
14
|
-
[Max]: max,
|
|
15
|
-
[Min]: min,
|
|
16
|
-
[Nulls]: column => count().where(isNull(column))
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
export function summarize({ table, column }, stats) {
|
|
20
|
-
return Query
|
|
21
|
-
.from(table)
|
|
22
|
-
.select(stats.map(s => [s, statMap[s](column)]));
|
|
23
|
-
}
|