@uwdata/mosaic-core 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uwdata/mosaic-core",
3
- "version": "0.6.1",
3
+ "version": "0.7.0",
4
4
  "description": "Scalable and extensible linked data views.",
5
5
  "keywords": [
6
6
  "mosaic",
@@ -28,9 +28,9 @@
28
28
  "prepublishOnly": "npm run test && npm run lint && npm run build"
29
29
  },
30
30
  "dependencies": {
31
- "@duckdb/duckdb-wasm": "^1.28.1-dev106.0",
32
- "@uwdata/mosaic-sql": "^0.6.0",
31
+ "@duckdb/duckdb-wasm": "^1.28.1-dev109.0",
32
+ "@uwdata/mosaic-sql": "^0.7.0",
33
33
  "apache-arrow": "^15.0.0"
34
34
  },
35
- "gitHead": "9e788e6dc5241fa1c54967a25fd9599f97da1a41"
35
+ "gitHead": "4680b922f15579b7b527f31507ed71a12230ec35"
36
36
  }
@@ -1,7 +1,7 @@
1
1
  import { socketConnector } from './connectors/socket.js';
2
- import { Catalog } from './Catalog.js';
3
2
  import { FilterGroup } from './FilterGroup.js';
4
3
  import { QueryManager, Priority } from './QueryManager.js';
4
+ import { queryFieldInfo } from './util/field-info.js';
5
5
  import { voidLogger } from './util/void-logger.js';
6
6
 
7
7
  let _instance;
@@ -27,7 +27,6 @@ export class Coordinator {
27
27
  logger = console,
28
28
  manager = QueryManager()
29
29
  } = options;
30
- this.catalog = new Catalog(this);
31
30
  this.manager = manager;
32
31
  this.logger(logger);
33
32
  this.configure(options);
@@ -49,7 +48,7 @@ export class Coordinator {
49
48
  this.indexes = indexes;
50
49
  }
51
50
 
52
- clear({ clients = true, cache = true, catalog = false } = {}) {
51
+ clear({ clients = true, cache = true } = {}) {
53
52
  this.manager.clear();
54
53
  if (clients) {
55
54
  this.clients?.forEach(client => this.disconnect(client));
@@ -58,7 +57,6 @@ export class Coordinator {
58
57
  this.filterGroups = new Map;
59
58
  }
60
59
  if (cache) this.manager.cache().clear();
61
- if (catalog) this.catalog.clear();
62
60
  }
63
61
 
64
62
  databaseConnector(db) {
@@ -122,7 +120,7 @@ export class Coordinator {
122
120
  * @param {import('./MosaicClient.js').MosaicClient} client the client to disconnect
123
121
  */
124
122
  async connect(client) {
125
- const { catalog, clients, filterGroups, indexes } = this;
123
+ const { clients, filterGroups, indexes } = this;
126
124
 
127
125
  if (clients.has(client)) {
128
126
  throw new Error('Client already connected.');
@@ -133,7 +131,7 @@ export class Coordinator {
133
131
  // retrieve field statistics
134
132
  const fields = client.fields();
135
133
  if (fields?.length) {
136
- client.fieldInfo(await catalog.queryFields(fields));
134
+ client.fieldInfo(await queryFieldInfo(this, fields));
137
135
  }
138
136
 
139
137
  // connect filters
@@ -1,4 +1,4 @@
1
- import { Query, Ref } from '@uwdata/mosaic-sql';
1
+ import { Query, Ref, isDescribeQuery } from '@uwdata/mosaic-sql';
2
2
  import { queryResult } from './util/query-result.js';
3
3
 
4
4
  function wait(callback) {
@@ -125,7 +125,7 @@ function consolidate(group, enqueue, record) {
125
125
  type: 'arrow',
126
126
  cache: false,
127
127
  record: false,
128
- query: consolidatedQuery(group, record)
128
+ query: (group.query = consolidatedQuery(group, record))
129
129
  },
130
130
  result: (group.result = queryResult())
131
131
  });
@@ -192,7 +192,7 @@ function consolidatedQuery(group, record) {
192
192
  query.$groupby(groupby.map(e => (e instanceof Ref && map[e.column]) || e));
193
193
  }
194
194
 
195
- // update select statemenet and return
195
+ // update select statement and return
196
196
  return query.$select(Array.from(fields.values()));
197
197
  }
198
198
 
@@ -202,9 +202,13 @@ function consolidatedQuery(group, record) {
202
202
  * @param {*} cache Client-side query cache (sql -> data)
203
203
  */
204
204
  async function processResults(group, cache) {
205
- const { maps, result } = group;
206
- if (!maps) return; // no consolidation performed
205
+ const { maps, query, result } = group;
207
206
 
207
+ // exit early if no consolidation performed
208
+ // in this case results are passed directly
209
+ if (!maps) return;
210
+
211
+ // await consolidated query result, pass errors if needed
208
212
  let data;
209
213
  try {
210
214
  data = await result;
@@ -216,13 +220,19 @@ async function processResults(group, cache) {
216
220
  return;
217
221
  }
218
222
 
223
+ // extract result for each query in the consolidation group
224
+ // update cache and pass extract to original issuer
225
+ const describe = isDescribeQuery(query);
219
226
  group.forEach(({ entry }, index) => {
220
227
  const { request, result } = entry;
221
- const projected = projectResult(data, maps[index]);
228
+ const map = maps[index];
229
+ const extract = describe && map ? filterResult(data, map)
230
+ : map ? projectResult(data, map)
231
+ : data;
222
232
  if (request.cache) {
223
- cache.set(String(request.query), projected);
233
+ cache.set(String(request.query), extract);
224
234
  }
225
- result.fulfill(projected);
235
+ result.fulfill(extract);
226
236
  });
227
237
  }
228
238
 
@@ -233,13 +243,26 @@ async function processResults(group, cache) {
233
243
  * @returns the projected Apache Arrow table
234
244
  */
235
245
  function projectResult(data, map) {
236
- if (map) {
237
- const cols = {};
238
- for (const [name, as] of map) {
239
- cols[as] = data.getChild(name);
246
+ const cols = {};
247
+ for (const [name, as] of map) {
248
+ cols[as] = data.getChild(name);
249
+ }
250
+ return new data.constructor(cols);
251
+ }
252
+
253
+ /**
254
+ * Filter a consolidated describe query result to a client result
255
+ * @param {*} data Consolidated query result
256
+ * @param {*} map Column name map as [source, target] pairs
257
+ * @returns the filtered table data
258
+ */
259
+ function filterResult(data, map) {
260
+ const lookup = new Map(map);
261
+ const result = [];
262
+ for (const d of data) {
263
+ if (lookup.has(d.column_name)) {
264
+ result.push({ ...d, column_name: lookup.get(d.column_name) })
240
265
  }
241
- return new data.constructor(cols);
242
- } else {
243
- return data;
244
266
  }
267
+ return result;
245
268
  }
package/src/index.js CHANGED
@@ -8,6 +8,12 @@ export { restConnector } from './connectors/rest.js';
8
8
  export { socketConnector } from './connectors/socket.js';
9
9
  export { wasmConnector } from './connectors/wasm.js';
10
10
 
11
+ export {
12
+ isArrowTable,
13
+ convertArrowArrayType,
14
+ convertArrowValue,
15
+ convertArrowColumn
16
+ } from './util/convert-arrow.js'
11
17
  export { distinct } from './util/distinct.js';
12
18
  export { synchronizer } from './util/synchronizer.js';
13
19
  export { throttle } from './util/throttle.js';
@@ -0,0 +1,148 @@
1
+ // arrow type ids
2
+ const INTEGER = 2;
3
+ const FLOAT = 3;
4
+ const DECIMAL = 7;
5
+ const TIMESTAMP = 10;
6
+
7
+ /**
8
+ * Test if a value is an Apache Arrow table.
9
+ * As sometimes multiple Arrow versions may be used simultaneously,
10
+ * we use a "duck typing" approach and check for a getChild function.
11
+ * @param {*} values The value to test
12
+ * @returns true if the value duck types as Apache Arrow data
13
+ */
14
+ export function isArrowTable(values) {
15
+ return typeof values?.getChild === 'function';
16
+ }
17
+
18
+ /**
19
+ * Return a JavaScript array type for an Apache Arrow column type.
20
+ * @param {*} type an Apache Arrow column type
21
+ * @returns a JavaScript array constructor
22
+ */
23
+ export function convertArrowArrayType(type) {
24
+ switch (type.typeId) {
25
+ case INTEGER:
26
+ case FLOAT:
27
+ case DECIMAL:
28
+ return Float64Array;
29
+ default:
30
+ return Array;
31
+ }
32
+ }
33
+
34
+ /**
35
+ * Return a function that converts Apache Arrow values to JavaScript values.
36
+ * Timestamps are converted to Date values.
37
+ * Large integers (BigInt) are converted to Float64 numbers.
38
+ * Fixed-point decimal values are convert to Float64 numbers.
39
+ * Otherwise, the default Arrow values are used.
40
+ * @param {*} type an Apache Arrow column type
41
+ * @returns a value conversion function
42
+ */
43
+ export function convertArrowValue(type) {
44
+ const { typeId } = type;
45
+
46
+ // map timestamp numbers to date objects
47
+ if (typeId === TIMESTAMP) {
48
+ return v => v == null ? v : new Date(v);
49
+ }
50
+
51
+ // map bigint to number
52
+ if (typeId === INTEGER && type.bitWidth >= 64) {
53
+ return v => v == null ? v : Number(v);
54
+ }
55
+
56
+ // map decimal to number
57
+ if (typeId === DECIMAL) {
58
+ const scale = 1 / Math.pow(10, type.scale);
59
+ return v => v == null ? v : decimalToNumber(v, scale);
60
+ }
61
+
62
+ // otherwise use Arrow JS defaults
63
+ return v => v;
64
+ }
65
+
66
+ /**
67
+ * Convert an Apache Arrow column to a JavaScript array.
68
+ * Timestamps are converted to Date values.
69
+ * Large integers (BigInt) are converted to Float64 numbers.
70
+ * Fixed-point decimal values are convert to Float64 numbers.
71
+ * Otherwise, the default Arrow values are used.
72
+ * @param {*} column An Apache Arrow column
73
+ * @returns an array of values
74
+ */
75
+ export function convertArrowColumn(column) {
76
+ const { type } = column;
77
+ const { typeId } = type;
78
+
79
+ // map timestamp numbers to date objects
80
+ if (typeId === TIMESTAMP) {
81
+ const size = column.length;
82
+ const array = new Array(size);
83
+ for (let row = 0; row < size; ++row) {
84
+ const v = column.get(row);
85
+ array[row] = v == null ? null : new Date(v);
86
+ }
87
+ return array;
88
+ }
89
+
90
+ // map bigint to number
91
+ if (typeId === INTEGER && type.bitWidth >= 64) {
92
+ const size = column.length;
93
+ const array = new Float64Array(size);
94
+ for (let row = 0; row < size; ++row) {
95
+ const v = column.get(row);
96
+ array[row] = v == null ? NaN : Number(v);
97
+ }
98
+ return array;
99
+ }
100
+
101
+ // map decimal to number
102
+ if (typeId === DECIMAL) {
103
+ const scale = 1 / Math.pow(10, type.scale);
104
+ const size = column.length;
105
+ const array = new Float64Array(size);
106
+ for (let row = 0; row < size; ++row) {
107
+ const v = column.get(row);
108
+ array[row] = v == null ? NaN : decimalToNumber(v, scale);
109
+ }
110
+ return array;
111
+ }
112
+
113
+ // otherwise use Arrow JS defaults
114
+ return column.toArray();
115
+ }
116
+
117
+ // generate base values for big integers
118
+ // represented within a Uint32Array
119
+ const BASE32 = Array.from(
120
+ { length: 8 },
121
+ (_, i) => Math.pow(2, i * 32)
122
+ );
123
+
124
+ /**
125
+ * Convert a fixed point decimal value to a double precision number.
126
+ * Note: if the value is sufficiently large the conversion may be lossy!
127
+ * @param {Uint32Array} v a fixed decimal value
128
+ * @param {number} scale a scale factor, corresponding to the
129
+ * number of fractional decimal digits in the fixed point value
130
+ * @returns the resulting number
131
+ */
132
+ function decimalToNumber(v, scale) {
133
+ const n = v.length;
134
+ let x = 0;
135
+
136
+ if (v.signed && (v[n-1]|0) < 0) {
137
+ for (let i = 0; i < n; ++i) {
138
+ x += ~(v[i] | 0) * BASE32[i];
139
+ }
140
+ x = -(x + 1);
141
+ } else {
142
+ for (let i = 0; i < n; ++i) {
143
+ x += v[i] * BASE32[i];
144
+ }
145
+ }
146
+
147
+ return x * scale;
148
+ }
@@ -0,0 +1,80 @@
1
+ import { Query, asRelation, count, isNull, max, min, sql } from '@uwdata/mosaic-sql';
2
+ import { jsType } from './js-type.js';
3
+ import { convertArrowValue } from './convert-arrow.js';
4
+
5
+ export const Count = 'count';
6
+ export const Nulls = 'nulls';
7
+ export const Max = 'max';
8
+ export const Min = 'min';
9
+ export const Distinct = 'distinct';
10
+ export const Stats = { Count, Nulls, Max, Min, Distinct };
11
+
12
+ const statMap = {
13
+ [Count]: count,
14
+ [Distinct]: column => count(column).distinct(),
15
+ [Max]: max,
16
+ [Min]: min,
17
+ [Nulls]: column => count().where(isNull(column))
18
+ };
19
+
20
+ function summarize(table, column, stats) {
21
+ return Query
22
+ .from(table)
23
+ .select(Array.from(stats, s => [s, statMap[s](column)]));
24
+ }
25
+
26
+ export async function queryFieldInfo(mc, fields) {
27
+ if (fields.length === 1 && `${fields[0].column}` === '*') {
28
+ return getTableInfo(mc, fields[0].table);
29
+ } else {
30
+ return (await Promise
31
+ .all(fields.map(f => getFieldInfo(mc, f))))
32
+ .filter(x => x);
33
+ }
34
+ }
35
+
36
+ async function getFieldInfo(mc, { table, column, stats }) {
37
+ // generate and issue a query for field metadata info
38
+ // use GROUP BY ALL to differentiate & consolidate aggregates
39
+ const q = Query.from({ source: table })
40
+ .select({ column })
41
+ .groupby(column.aggregate ? sql`ALL` : []);
42
+ const [desc] = Array.from(await mc.query(Query.describe(q)));
43
+ const info = {
44
+ table,
45
+ column: `${column}`,
46
+ sqlType: desc.column_type,
47
+ type: jsType(desc.column_type),
48
+ nullable: desc.null === 'YES'
49
+ };
50
+
51
+ // no need for summary statistics
52
+ if (!(stats?.length || stats?.size)) return info;
53
+
54
+ // query for summary stats
55
+ const result = await mc.query(
56
+ summarize(table, column, stats),
57
+ { persist: true }
58
+ );
59
+
60
+ // extract summary stats, copy to field info
61
+ for (let i = 0; i < result.numCols; ++i) {
62
+ const { name } = result.schema.fields[i];
63
+ const child = result.getChildAt(i);
64
+ const convert = convertArrowValue(child.type);
65
+ info[name] = convert(child.get(0));
66
+ }
67
+
68
+ return info;
69
+ }
70
+
71
+ async function getTableInfo(mc, table) {
72
+ const result = await mc.query(`DESCRIBE ${asRelation(table)}`);
73
+ return Array.from(result).map(desc => ({
74
+ table,
75
+ column: desc.column_name,
76
+ sqlType: desc.column_type,
77
+ type: jsType(desc.column_type),
78
+ nullable: desc.null === 'YES'
79
+ }));
80
+ }
@@ -12,7 +12,6 @@ export function jsType(type) {
12
12
  case 'DOUBLE':
13
13
  case 'FLOAT':
14
14
  case 'REAL':
15
- case 'DECIMAL':
16
15
  return 'number';
17
16
  case 'DATE':
18
17
  case 'TIMESTAMP':
@@ -26,13 +25,22 @@ export function jsType(type) {
26
25
  case 'VARCHAR':
27
26
  case 'UUID':
28
27
  return 'string';
28
+ case 'ARRAY':
29
29
  case 'LIST':
30
30
  return 'array';
31
31
  case 'BLOB':
32
32
  case 'STRUCT':
33
33
  case 'MAP':
34
+ case 'GEOMETRY':
34
35
  return 'object';
35
36
  default:
37
+ if (type.startsWith('DECIMAL')) {
38
+ return 'number';
39
+ } else if (type.startsWith('STRUCT') || type.startsWith('MAP')) {
40
+ return 'object';
41
+ } else if (type.endsWith(']')) {
42
+ return 'array';
43
+ }
36
44
  throw new Error(`Unsupported type: ${type}`);
37
45
  }
38
46
  }
package/src/Catalog.js DELETED
@@ -1,88 +0,0 @@
1
- import { asRelation } from '@uwdata/mosaic-sql';
2
- import { jsType } from './util/js-type.js';
3
- import { summarize } from './util/summarize.js';
4
-
5
- const object = () => Object.create(null);
6
-
7
- export class Catalog {
8
- constructor(coordinator) {
9
- /** @type {import('@uwdata/mosaic-core').Coordinator} */
10
- this.mc = coordinator;
11
- this.clear();
12
- }
13
-
14
- clear() {
15
- this.tables = object();
16
- }
17
-
18
- tableInfo(table) {
19
- const cache = this.tables;
20
- if (cache[table]) {
21
- return cache[table];
22
- }
23
-
24
- const infoPromise = getTableInfo(this.mc, table)
25
- .catch(err => { cache[table] = null; throw err; });
26
-
27
- return (cache[table] = infoPromise);
28
- }
29
-
30
- async fieldInfo({ table, column, stats }) {
31
- const tableInfo = await this.tableInfo(table);
32
- const colInfo = tableInfo[column];
33
-
34
- // column does not exist
35
- if (colInfo == null) return;
36
-
37
- // no need for summary statistics
38
- if (!stats?.length) return colInfo;
39
-
40
- const result = await this.mc.query(
41
- summarize(colInfo, stats),
42
- { persist: true }
43
- );
44
- const info = { ...colInfo, ...(Array.from(result)[0]) };
45
-
46
- // coerce bigint to number
47
- for (const key in info) {
48
- const value = info[key];
49
- if (typeof value === 'bigint') {
50
- info[key] = Number(value);
51
- }
52
- }
53
-
54
- return info;
55
- }
56
-
57
- async queryFields(fields) {
58
- const list = await resolveFields(this, fields);
59
- const data = await Promise.all(list.map(f => this.fieldInfo(f)));
60
- return data.filter(x => x);
61
- }
62
- }
63
-
64
- async function getTableInfo(mc, table) {
65
- const result = await mc.query(
66
- `DESCRIBE ${asRelation(table)}`,
67
- { type: 'json', cache: false }
68
- );
69
-
70
- const columns = object();
71
- for (const entry of result) {
72
- columns[entry.column_name] = {
73
- table,
74
- column: entry.column_name,
75
- sqlType: entry.column_type,
76
- type: jsType(entry.column_type),
77
- nullable: entry.null === 'YES'
78
- };
79
- }
80
-
81
- return columns;
82
- }
83
-
84
- async function resolveFields(catalog, list) {
85
- return list.length === 1 && list[0].column === '*'
86
- ? Object.values(await catalog.tableInfo(list[0].table))
87
- : list;
88
- }
@@ -1,23 +0,0 @@
1
- import { Query, count, isNull, max, min } from '@uwdata/mosaic-sql';
2
-
3
- export const Count = 'count';
4
- export const Nulls = 'nulls';
5
- export const Max = 'max';
6
- export const Min = 'min';
7
- export const Distinct = 'distinct';
8
-
9
- export const Stats = { Count, Nulls, Max, Min, Distinct };
10
-
11
- export const statMap = {
12
- [Count]: count,
13
- [Distinct]: column => count(column).distinct(),
14
- [Max]: max,
15
- [Min]: min,
16
- [Nulls]: column => count().where(isNull(column))
17
- };
18
-
19
- export function summarize({ table, column }, stats) {
20
- return Query
21
- .from(table)
22
- .select(stats.map(s => [s, statMap[s](column)]));
23
- }