@uwdata/mosaic-core 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uwdata/mosaic-core",
3
- "version": "0.1.0",
3
+ "version": "0.3.0",
4
4
  "description": "Scalable and extensible linked data views.",
5
5
  "keywords": [
6
6
  "mosaic",
@@ -28,9 +28,9 @@
28
28
  "prepublishOnly": "npm run test && npm run lint && npm run build"
29
29
  },
30
30
  "dependencies": {
31
- "@duckdb/duckdb-wasm": "^1.20.0",
32
- "@uwdata/mosaic-sql": "^0.1.0",
31
+ "@duckdb/duckdb-wasm": "^1.27.0",
32
+ "@uwdata/mosaic-sql": "^0.3.0",
33
33
  "apache-arrow": "^11.0.0"
34
34
  },
35
- "gitHead": "a7967c35349bdf7f00abb113ce1dd9abb233cd62"
35
+ "gitHead": "a8dd23fed4c7a24c0a2ee5261d1aabe4239ce574"
36
36
  }
package/src/Catalog.js CHANGED
@@ -13,30 +13,16 @@ export class Catalog {
13
13
  this.tables = object();
14
14
  }
15
15
 
16
- async tableInfo(table) {
16
+ tableInfo(table) {
17
17
  const cache = this.tables;
18
18
  if (cache[table]) {
19
19
  return cache[table];
20
20
  }
21
21
 
22
- const q = this.mc.query(
23
- `DESCRIBE "${table}"`,
24
- { type: 'json', cache: false }
25
- );
22
+ const infoPromise = getTableInfo(this.mc, table)
23
+ .catch(err => { cache[table] = null; throw err; });
26
24
 
27
- return (cache[table] = q.then(result => {
28
- const columns = object();
29
- for (const entry of result) {
30
- columns[entry.column_name] = {
31
- table,
32
- column: entry.column_name,
33
- sqlType: entry.column_type,
34
- type: jsType(entry.column_type),
35
- nullable: entry.null === 'YES'
36
- };
37
- }
38
- return columns;
39
- }));
25
+ return (cache[table] = infoPromise);
40
26
  }
41
27
 
42
28
  async fieldInfo({ table, column, stats }) {
@@ -49,8 +35,20 @@ export class Catalog {
49
35
  // no need for summary statistics
50
36
  if (!stats?.length) return colInfo;
51
37
 
52
- const result = await this.mc.query(summarize(colInfo, stats));
38
+ const result = await this.mc.query(
39
+ summarize(colInfo, stats),
40
+ { persist: true }
41
+ );
53
42
  const info = { ...colInfo, ...(Array.from(result)[0]) };
43
+
44
+ // coerce bigint to number
45
+ for (const key in info) {
46
+ const value = info[key];
47
+ if (typeof value === 'bigint') {
48
+ info[key] = Number(value);
49
+ }
50
+ }
51
+
54
52
  return info;
55
53
  }
56
54
 
@@ -61,6 +59,26 @@ export class Catalog {
61
59
  }
62
60
  }
63
61
 
62
+ async function getTableInfo(mc, table) {
63
+ const result = await mc.query(
64
+ `DESCRIBE "${table}"`,
65
+ { type: 'json', cache: false }
66
+ );
67
+
68
+ const columns = object();
69
+ for (const entry of result) {
70
+ columns[entry.column_name] = {
71
+ table,
72
+ column: entry.column_name,
73
+ sqlType: entry.column_type,
74
+ type: jsType(entry.column_type),
75
+ nullable: entry.null === 'YES'
76
+ };
77
+ }
78
+
79
+ return columns;
80
+ }
81
+
64
82
  async function resolveFields(catalog, list) {
65
83
  return list.length === 1 && list[0].column === '*'
66
84
  ? Object.values(await catalog.tableInfo(list[0].table))
@@ -1,11 +1,17 @@
1
- import { socketClient } from './clients/socket.js';
1
+ import { socketConnector } from './connectors/socket.js';
2
2
  import { Catalog } from './Catalog.js';
3
3
  import { FilterGroup } from './FilterGroup.js';
4
- import { QueryCache, voidCache } from './QueryCache.js';
4
+ import { QueryManager, Priority } from './QueryManager.js';
5
5
  import { voidLogger } from './util/void-logger.js';
6
6
 
7
7
  let _instance;
8
8
 
9
+ /**
10
+ * Set or retrieve the coordinator instance.
11
+ *
12
+ * @param {Coordinator} coordinator the coordinator instance to set
13
+ * @returns {Coordinator} the coordinator instance
14
+ */
9
15
  export function coordinator(instance) {
10
16
  if (instance) {
11
17
  _instance = instance;
@@ -16,90 +22,100 @@ export function coordinator(instance) {
16
22
  }
17
23
 
18
24
  export class Coordinator {
19
- constructor(db = socketClient(), options = {}) {
25
+ constructor(db = socketConnector(), options = {}) {
20
26
  this.catalog = new Catalog(this);
27
+ this.manager = options.manager || QueryManager();
21
28
  this.logger(options.logger || console);
22
29
  this.configure(options);
23
- this.databaseClient(db);
30
+ this.databaseConnector(db);
24
31
  this.clear();
25
32
  }
26
33
 
27
34
  logger(logger) {
28
- return arguments.length
29
- ? (this._logger = logger || voidLogger())
30
- : this._logger;
35
+ if (arguments.length) {
36
+ this._logger = logger || voidLogger();
37
+ this.manager.logger(this._logger);
38
+ }
39
+ return this._logger;
31
40
  }
32
41
 
33
- configure({ cache = true, indexes = true }) {
34
- this.cache = cache ? new QueryCache() : voidCache();
42
+ configure({ cache = true, consolidate = true, indexes = true }) {
43
+ this.manager.cache(cache);
44
+ this.manager.consolidate(consolidate);
35
45
  this.indexes = indexes;
36
46
  }
37
47
 
38
48
  clear({ clients = true, cache = true, catalog = false } = {}) {
49
+ this.manager.clear();
39
50
  if (clients) {
40
51
  this.clients?.forEach(client => this.disconnect(client));
41
52
  this.filterGroups?.forEach(group => group.finalize());
42
53
  this.clients = new Set;
43
54
  this.filterGroups = new Map;
44
55
  }
45
- if (cache) this.cache.clear();
56
+ if (cache) this.manager.cache().clear();
46
57
  if (catalog) this.catalog.clear();
47
58
  }
48
59
 
49
- databaseClient(db) {
50
- if (arguments.length > 0) {
51
- this.db = db;
52
- }
53
- return this.db;
60
+ databaseConnector(db) {
61
+ return this.manager.connector(db);
54
62
  }
55
63
 
56
- async exec(sql) {
57
- try {
58
- await this.db.query({ type: 'exec', sql });
59
- } catch (err) {
60
- this._logger.error(err);
61
- }
64
+ // -- Query Management ----
65
+
66
+ cancel(requests) {
67
+ this.manager.cancel(requests);
62
68
  }
63
69
 
64
- query(query, { type = 'arrow', cache = true } = {}) {
65
- const sql = String(query);
66
- const t0 = performance.now();
67
- const cached = this.cache.get(sql);
68
- if (cached) {
69
- this._logger.debug('Cache');
70
- return cached;
71
- } else {
72
- const request = this.db.query({ type, sql });
73
- const result = cache ? this.cache.set(sql, request) : request;
74
- result.then(() => this._logger.debug(`Query: ${performance.now() - t0}`));
75
- return result;
76
- }
70
+ exec(query, { priority = Priority.Normal } = {}) {
71
+ return this.manager.request({ type: 'exec', query }, priority);
77
72
  }
78
73
 
79
- async updateClient(client, query) {
80
- let result;
81
- try {
82
- client.queryPending();
83
- result = await this.query(query);
84
- } catch (err) {
85
- this._logger.error(err);
86
- client.queryError(err);
87
- return;
88
- }
89
- try {
90
- client.queryResult(result).update();
91
- } catch (err) {
92
- this._logger.error(err);
93
- }
74
+ query(query, {
75
+ type = 'arrow',
76
+ cache = true,
77
+ priority = Priority.Normal,
78
+ ...options
79
+ } = {}) {
80
+ return this.manager.request({ type, query, cache, options }, priority);
81
+ }
82
+
83
+ prefetch(query, options = {}) {
84
+ return this.query(query, { ...options, cache: true, priority: Priority.Low });
85
+ }
86
+
87
+ createBundle(name, queries, priority = Priority.Low) {
88
+ const options = { name, queries };
89
+ return this.manager.request({ type: 'create-bundle', options }, priority);
90
+ }
91
+
92
+ loadBundle(name, priority = Priority.High) {
93
+ const options = { name };
94
+ return this.manager.request({ type: 'load-bundle', options }, priority);
95
+ }
96
+
97
+ // -- Client Management ----
98
+
99
+ updateClient(client, query, priority = Priority.Normal) {
100
+ client.queryPending();
101
+ return this.query(query, { priority }).then(
102
+ data => client.queryResult(data).update(),
103
+ err => { client.queryError(err); this._logger.error(err); }
104
+ );
94
105
  }
95
106
 
96
- async requestQuery(client, query) {
107
+ requestQuery(client, query) {
97
108
  this.filterGroups.get(client.filterBy)?.reset();
98
109
  return query
99
110
  ? this.updateClient(client, query)
100
111
  : client.update();
101
112
  }
102
113
 
114
+ /**
115
+ * Connect a client to the coordinator.
116
+ *
117
+ * @param {import('./MosaicClient.js').MosaicClient} client the client to disconnect
118
+ */
103
119
  async connect(client) {
104
120
  const { catalog, clients, filterGroups, indexes } = this;
105
121
 
@@ -111,7 +127,7 @@ export class Coordinator {
111
127
  // retrieve field statistics
112
128
  const fields = client.fields();
113
129
  if (fields?.length) {
114
- client.fieldStats(await catalog.queryFields(fields));
130
+ client.fieldInfo(await catalog.queryFields(fields));
115
131
  }
116
132
 
117
133
  // connect filters
@@ -128,6 +144,11 @@ export class Coordinator {
128
144
  client.requestQuery();
129
145
  }
130
146
 
147
+ /**
148
+ * Disconnect a client from the coordinator.
149
+ *
150
+ * @param {import('./MosaicClient.js').MosaicClient} client the client to disconnect
151
+ */
131
152
  disconnect(client) {
132
153
  const { clients, filterGroups } = this;
133
154
  if (!clients.has(client)) return;
@@ -1,6 +1,5 @@
1
- import { Query, expr, and, isBetween, asColumn, epoch_ms } from '@uwdata/mosaic-sql';
1
+ import { Query, and, asColumn, epoch_ms, isBetween, sql } from '@uwdata/mosaic-sql';
2
2
  import { fnv_hash } from './util/hash.js';
3
- import { skipClient } from './util/skip-client.js';
4
3
 
5
4
  const identity = x => x;
6
5
 
@@ -30,35 +29,43 @@ export class DataTileIndexer {
30
29
  this.activeView = null;
31
30
  }
32
31
 
32
+ clear() {
33
+ if (this.indices) {
34
+ this.mc.cancel(Array.from(this.indices.values(), index => index.result));
35
+ this.indices = null;
36
+ }
37
+ }
38
+
33
39
  index(clients, active) {
34
40
  if (this.clients !== clients) {
35
41
  // test client views for compatibility
36
- const cols = Array.from(clients).map(getIndexColumns);
42
+ const cols = Array.from(clients, getIndexColumns);
37
43
  const from = cols[0]?.from;
38
44
  this.enabled = cols.every(c => c && c.from === from);
39
45
  this.clients = clients;
40
- this.indices = null;
41
46
  this.activeView = null;
47
+ this.clear();
42
48
  }
43
49
  if (!this.enabled) return false; // client views are not indexable
44
50
 
45
51
  active = active || this.selection.active;
46
52
  const { source } = active;
53
+ if (source && source === this.activeView?.source) return true; // we're good!
54
+
55
+ this.clear();
47
56
  if (!source) return false; // nothing to work with
48
- if (source === this.activeView?.source) return true; // we're good!
49
57
  const activeView = this.activeView = getActiveView(active);
50
58
  if (!activeView) return false; // active selection clause not compatible
51
59
 
52
60
  this.mc.logger().warn('DATA TILE INDEX CONSTRUCTION');
53
61
 
54
- // create a selection with the active client removed
55
- const sel = this.selection.clone().update({ source });
62
+ // create a selection with the active source removed
63
+ const sel = this.selection.remove(source);
56
64
 
57
65
  // generate data tile indices
58
66
  const indices = this.indices = new Map;
59
- const promises = [];
60
67
  for (const client of clients) {
61
- if (sel.cross && skipClient(client, active)) continue;
68
+ if (sel.skip(client, active)) continue;
62
69
  const index = getIndexColumns(client);
63
70
 
64
71
  // build index construction query
@@ -76,11 +83,9 @@ export class DataTileIndexer {
76
83
  const sql = query.toString();
77
84
  const id = (fnv_hash(sql) >>> 0).toString(16);
78
85
  const table = `tile_index_${id}`;
79
- indices.set(client, { table, ...index });
80
- promises.push(createIndex(this.mc, table, sql));
86
+ const result = createIndex(this.mc, table, sql);
87
+ indices.set(client, { table, result, ...index });
81
88
  }
82
-
83
- return promises;
84
89
  }
85
90
 
86
91
  async update() {
@@ -100,12 +105,12 @@ export class DataTileIndexer {
100
105
  }
101
106
 
102
107
  const { table, dims, aggr } = index;
103
- return this.mc.updateClient(client, Query
108
+ const query = Query
104
109
  .select(dims, aggr)
105
110
  .from(table)
106
111
  .groupby(dims)
107
- .where(filter)
108
- );
112
+ .where(filter);
113
+ return this.mc.updateClient(client, query);
109
114
  }
110
115
  }
111
116
 
@@ -113,22 +118,22 @@ function getActiveView(clause) {
113
118
  const { source, schema } = clause;
114
119
  let columns = clause.predicate?.columns;
115
120
  if (!schema || !columns) return null;
116
- const { type, scales } = schema;
121
+ const { type, scales, pixelSize = 1 } = schema;
117
122
  let predicate;
118
123
 
119
124
  if (type === 'interval' && scales) {
120
- const bins = scales.map(s => binInterval(s));
125
+ const bins = scales.map(s => binInterval(s, pixelSize));
121
126
  if (bins.some(b => b == null)) return null; // unsupported scale type
122
127
 
123
128
  if (bins.length === 1) {
124
- predicate = p => p ? isBetween('active0', p.value.map(bins[0])) : [];
125
- columns = { active0: bins[0](clause.predicate.expr) };
129
+ predicate = p => p ? isBetween('active0', p.range.map(bins[0])) : [];
130
+ columns = { active0: bins[0](clause.predicate.field) };
126
131
  } else {
127
132
  predicate = p => p
128
- ? and(p.value.map(({ value }, i) => isBetween(`active${i}`, value.map(bins[i]))))
133
+ ? and(p.children.map(({ range }, i) => isBetween(`active${i}`, range.map(bins[i]))))
129
134
  : [];
130
135
  columns = Object.fromEntries(
131
- clause.predicate.value.map((p, i) => [`active${i}`, bins[i](p.expr)])
136
+ clause.predicate.children.map((p, i) => [`active${i}`, bins[i](p.field)])
132
137
  );
133
138
  }
134
139
  } else if (type === 'point') {
@@ -141,53 +146,47 @@ function getActiveView(clause) {
141
146
  return { source, columns, predicate };
142
147
  }
143
148
 
144
- function binInterval(scale) {
149
+ function binInterval(scale, pixelSize) {
145
150
  const { type, domain, range } = scale;
146
- let lift, sql;
151
+ let lift, toSql;
147
152
 
148
153
  switch (type) {
149
154
  case 'linear':
150
155
  lift = identity;
151
- sql = asColumn;
156
+ toSql = asColumn;
152
157
  break;
153
158
  case 'log':
154
159
  lift = Math.log;
155
- sql = c => `LN(${asColumn(c)})`;
160
+ toSql = c => sql`LN(${asColumn(c)})`;
156
161
  break;
157
162
  case 'symlog':
158
163
  // TODO: support log constants other than 1?
159
164
  lift = x => Math.sign(x) * Math.log1p(Math.abs(x));
160
- sql = c => (c = asColumn(c), `SIGN(${c}) * LN(1 + ABS(${c}))`);
165
+ toSql = c => (c = asColumn(c), sql`SIGN(${c}) * LN(1 + ABS(${c}))`);
161
166
  break;
162
167
  case 'sqrt':
163
168
  lift = Math.sqrt;
164
- sql = c => `SQRT(${asColumn(c)})`;
169
+ toSql = c => sql`SQRT(${asColumn(c)})`;
165
170
  break;
166
171
  case 'utc':
167
172
  case 'time':
168
173
  lift = x => +x;
169
- sql = c => c instanceof Date ? +c : epoch_ms(asColumn(c));
174
+ toSql = c => c instanceof Date ? +c : epoch_ms(asColumn(c));
170
175
  break;
171
176
  }
172
- return lift ? binFunction(domain, range, lift, sql) : null;
177
+ return lift ? binFunction(domain, range, pixelSize, lift, toSql) : null;
173
178
  }
174
179
 
175
- function binFunction(domain, range, lift, sql) {
180
+ function binFunction(domain, range, pixelSize, lift, toSql) {
176
181
  const lo = lift(Math.min(domain[0], domain[1]));
177
182
  const hi = lift(Math.max(domain[0], domain[1]));
178
- const a = Math.abs(lift(range[1]) - lift(range[0])) / (hi - lo);
179
- return value => expr(
180
- `FLOOR(${a}::DOUBLE * (${sql(value)} - ${lo}::DOUBLE))`,
181
- asColumn(value).columns
182
- );
183
+ const a = (Math.abs(lift(range[1]) - lift(range[0])) / (hi - lo)) / pixelSize;
184
+ const s = pixelSize === 1 ? '' : `${pixelSize}::INTEGER * `;
185
+ return value => sql`${s}FLOOR(${a}::DOUBLE * (${toSql(value)} - ${lo}::DOUBLE))::INTEGER`;
183
186
  }
184
187
 
185
- async function createIndex(mc, table, query) {
186
- try {
187
- await mc.exec(`CREATE TEMP TABLE IF NOT EXISTS ${table} AS ${query}`);
188
- } catch (err) {
189
- mc.logger().error(err);
190
- }
188
+ function createIndex(mc, table, query) {
189
+ return mc.exec(`CREATE TEMP TABLE IF NOT EXISTS ${table} AS ${query}`);
191
190
  }
192
191
 
193
192
  const NO_INDEX = { from: NaN };
@@ -199,25 +198,25 @@ function getIndexColumns(client) {
199
198
  if (!from || !q.groupby) return NO_INDEX;
200
199
  const g = new Set(q.groupby().map(c => c.column));
201
200
 
202
- let aggr = [];
203
- let dims = [];
201
+ const aggr = [];
202
+ const dims = [];
204
203
  let count;
205
204
 
206
205
  for (const { as, expr: { aggregate } } of q.select()) {
207
206
  switch (aggregate?.toUpperCase()) {
208
207
  case 'COUNT':
209
208
  case 'SUM':
210
- aggr.push({ [as]: expr(`SUM("${as}")::DOUBLE`) });
209
+ aggr.push({ [as]: sql`SUM("${as}")::DOUBLE` });
211
210
  break;
212
211
  case 'AVG':
213
212
  count = '_count_';
214
- aggr.push({ [as]: expr(`(SUM("${as}" * ${count}) / SUM(${count}))::DOUBLE`) });
213
+ aggr.push({ [as]: sql`(SUM("${as}" * ${count}) / SUM(${count}))::DOUBLE` });
215
214
  break;
216
215
  case 'MAX':
217
- aggr.push({ [as]: expr(`MAX("${as}")`) });
216
+ aggr.push({ [as]: sql`MAX("${as}")` });
218
217
  break;
219
218
  case 'MIN':
220
- aggr.push({ [as]: expr(`MIN("${as}")`) });
219
+ aggr.push({ [as]: sql`MIN("${as}")` });
221
220
  break;
222
221
  default:
223
222
  if (g.has(as)) dims.push(as);
@@ -228,7 +227,7 @@ function getIndexColumns(client) {
228
227
  return {
229
228
  aggr,
230
229
  dims,
231
- count: count ? { [count]: expr('COUNT(*)') } : {},
230
+ count: count ? { [count]: sql`COUNT(*)` } : {},
232
231
  from
233
232
  };
234
233
  }
@@ -244,7 +243,7 @@ function getBaseTable(query) {
244
243
  }
245
244
 
246
245
  // handle set operations / subqueries
247
- let base = getBaseTable(subq[0]);
246
+ const base = getBaseTable(subq[0]);
248
247
  for (let i = 1; i < subq.length; ++i) {
249
248
  const from = getBaseTable(subq[i]);
250
249
  if (from === undefined) continue;
@@ -1,5 +1,4 @@
1
1
  import { DataTileIndexer } from './DataTileIndexer.js';
2
- import { throttle } from './util/throttle.js';
3
2
 
4
3
  export class FilterGroup {
5
4
  constructor(coordinator, selection, index = true) {
@@ -9,10 +8,8 @@ export class FilterGroup {
9
8
  this.indexer = index ? new DataTileIndexer(this.mc, selection) : null;
10
9
 
11
10
  const { value, activate } = this.handlers = {
12
- value: throttle(() => this.update()),
13
- activate: clause => {
14
- this.indexer?.index(this.clients, clause);
15
- }
11
+ value: () => this.update(),
12
+ activate: clause => this.indexer?.index(this.clients, clause)
16
13
  };
17
14
  selection.addEventListener('value', value);
18
15
  selection.addEventListener('activate', activate);
@@ -40,7 +37,7 @@ export class FilterGroup {
40
37
  return this;
41
38
  }
42
39
 
43
- async update() {
40
+ update() {
44
41
  const { mc, indexer, clients, selection } = this;
45
42
  return indexer?.index(clients)
46
43
  ? indexer.update()
@@ -40,10 +40,10 @@ export class MosaicClient {
40
40
  }
41
41
 
42
42
  /**
43
- * Called by the coordinator to set the field statistics for this client.
43
+ * Called by the coordinator to set the field info for this client.
44
44
  * @returns {this}
45
45
  */
46
- fieldStats() {
46
+ fieldInfo() {
47
47
  return this;
48
48
  }
49
49
 
@@ -63,6 +63,9 @@ export class MosaicClient {
63
63
 
64
64
  /**
65
65
  * Called by the coordinator to return a query result.
66
+ *
67
+ * @param {*} data the query result
68
+ * @returns {this}
66
69
  */
67
70
  queryResult() {
68
71
  return this;