@uwdata/mosaic-core 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uwdata/mosaic-core",
3
- "version": "0.0.1",
3
+ "version": "0.2.0",
4
4
  "description": "Scalable and extensible linked data views.",
5
5
  "keywords": [
6
6
  "mosaic",
@@ -28,8 +28,9 @@
28
28
  "prepublishOnly": "npm run test && npm run lint && npm run build"
29
29
  },
30
30
  "dependencies": {
31
- "@duckdb/duckdb-wasm": "^1.20.0",
32
- "@uwdata/mosaic-sql": "^0.0.1",
33
- "apache-arrow": "^11.0.0"
34
- }
31
+ "@duckdb/duckdb-wasm": "^1.25.0",
32
+ "@uwdata/mosaic-sql": "^0.2.0",
33
+ "apache-arrow": "^12.0.0"
34
+ },
35
+ "gitHead": "e53cd914c807f99aabe78dcbe618dd9543e2f438"
35
36
  }
package/src/Catalog.js CHANGED
@@ -1,17 +1,16 @@
1
- import { Query, count, max, min, isNull } from '@uwdata/mosaic-sql';
2
1
  import { jsType } from './util/js-type.js';
2
+ import { summarize } from './util/summarize.js';
3
3
 
4
4
  const object = () => Object.create(null);
5
5
 
6
6
  export class Catalog {
7
- constructor(mc) {
8
- this.mc = mc;
7
+ constructor(coordinator) {
8
+ this.mc = coordinator;
9
9
  this.clear();
10
10
  }
11
11
 
12
12
  clear() {
13
13
  this.tables = object();
14
- this.fields = object();
15
14
  }
16
15
 
17
16
  async tableInfo(table) {
@@ -21,62 +20,61 @@ export class Catalog {
21
20
  }
22
21
 
23
22
  const q = this.mc.query(
24
- `PRAGMA table_info('${table}')`,
25
- { type: 'json' }
23
+ `DESCRIBE "${table}"`,
24
+ { type: 'json', cache: false }
26
25
  );
27
26
 
28
27
  return (cache[table] = q.then(result => {
29
28
  const columns = object();
30
29
  for (const entry of result) {
31
- columns[entry.name] = { ...entry, jstype: jsType(entry.type) };
30
+ columns[entry.column_name] = {
31
+ table,
32
+ column: entry.column_name,
33
+ sqlType: entry.column_type,
34
+ type: jsType(entry.column_type),
35
+ nullable: entry.null === 'YES'
36
+ };
32
37
  }
33
38
  return columns;
34
39
  }));
35
40
  }
36
41
 
37
- async fieldInfo(table, column) {
38
- const info = await this.tableInfo(table);
39
- const colInfo = info[column];
42
+ async fieldInfo({ table, column, stats }) {
43
+ const tableInfo = await this.tableInfo(table);
44
+ const colInfo = tableInfo[column];
40
45
 
41
46
  // column does not exist
42
47
  if (colInfo == null) return;
43
48
 
44
- const cache = this.fields;
45
- const key = `${table}.${column}`;
46
- if (cache[key]) {
47
- return cache[key];
48
- }
49
+ // no need for summary statistics
50
+ if (!stats?.length) return colInfo;
51
+
52
+ const result = await this.mc.query(
53
+ summarize(colInfo, stats),
54
+ { persist: true }
55
+ );
56
+ const info = { ...colInfo, ...(Array.from(result)[0]) };
49
57
 
50
- const promise = this.mc.query(
51
- Query.from(table).select({
52
- rows: count(),
53
- nulls: count().where(isNull(column)),
54
- min: min(column),
55
- max: max(column)
56
- }, { cache: false })
57
- ).then(result => {
58
- const [ stats ] = Array.from(result);
59
- return { table, column, type: colInfo.jstype, ...stats };
60
- });
58
+ // coerce bigint to number
59
+ for (const key in info) {
60
+ const value = info[key];
61
+ if (typeof value === 'bigint') {
62
+ info[key] = Number(value);
63
+ }
64
+ }
61
65
 
62
- return (cache[key] = promise);
66
+ return info;
63
67
  }
64
68
 
65
69
  async queryFields(fields) {
66
70
  const list = await resolveFields(this, fields);
67
- const data = await Promise.all(
68
- list.map(f => this.fieldInfo(f.table, f.column))
69
- )
71
+ const data = await Promise.all(list.map(f => this.fieldInfo(f)));
70
72
  return data.filter(x => x);
71
73
  }
72
74
  }
73
75
 
74
76
  async function resolveFields(catalog, list) {
75
- if (list.length === 1 && list[0].column === '*') {
76
- const table = list[0].table;
77
- const info = await catalog.tableInfo(table);
78
- return Object.keys(info).map(column => ({ table, column }));
79
- } else {
80
- return list;
81
- }
77
+ return list.length === 1 && list[0].column === '*'
78
+ ? Object.values(await catalog.tableInfo(list[0].table))
79
+ : list;
82
80
  }
@@ -1,7 +1,8 @@
1
- import { socketClient } from './clients/socket.js';
1
+ import { socketConnector } from './connectors/socket.js';
2
2
  import { Catalog } from './Catalog.js';
3
3
  import { FilterGroup } from './FilterGroup.js';
4
- import { QueryCache } from './QueryCache.js';
4
+ import { QueryManager, Priority } from './QueryManager.js';
5
+ import { voidLogger } from './util/void-logger.js';
5
6
 
6
7
  let _instance;
7
8
 
@@ -15,75 +16,92 @@ export function coordinator(instance) {
15
16
  }
16
17
 
17
18
  export class Coordinator {
18
- constructor(db = socketClient()) {
19
- this.cache = new QueryCache();
19
+ constructor(db = socketConnector(), options = {}) {
20
20
  this.catalog = new Catalog(this);
21
- this.indexes = true;
22
- this.databaseClient(db);
21
+ this.manager = options.manager || QueryManager();
22
+ this.logger(options.logger || console);
23
+ this.configure(options);
24
+ this.databaseConnector(db);
23
25
  this.clear();
26
+ this._recorders = [];
27
+ }
28
+
29
+ logger(logger) {
30
+ if (arguments.length) {
31
+ this._logger = logger || voidLogger();
32
+ this.manager.logger(this._logger);
33
+ }
34
+ return this._logger;
24
35
  }
25
36
 
26
37
  configure({ cache = true, indexes = true }) {
27
- this.cache = cache ? new QueryCache() : {
28
- get: () => undefined,
29
- set: (key, result) => result,
30
- clear: () => {}
31
- };
38
+ this.manager.cache(cache);
32
39
  this.indexes = indexes;
33
40
  }
34
41
 
35
42
  clear({ clients = true, cache = true, catalog = false } = {}) {
36
43
  if (clients) {
37
- this.clients?.forEach((_, client) => this.disconnect(client));
44
+ this.clients?.forEach(client => this.disconnect(client));
38
45
  this.filterGroups?.forEach(group => group.finalize());
39
- this.clients = new Map;
46
+ this.clients = new Set;
40
47
  this.filterGroups = new Map;
41
48
  }
42
- if (cache) this.cache.clear();
49
+ if (cache) this.manager.cache().clear();
43
50
  if (catalog) this.catalog.clear();
44
51
  }
45
52
 
46
- databaseClient(db) {
47
- if (arguments.length > 0) {
48
- this.db = db;
49
- }
50
- return this.db;
53
+ databaseConnector(db) {
54
+ return this.manager.connector(db);
51
55
  }
52
56
 
53
- async exec(sql) {
54
- try {
55
- await this.db.query({ type: 'exec', sql });
56
- } catch (err) {
57
- console.error(err);
58
- }
57
+ // -- Query Management ----
58
+
59
+ cancel(requests) {
60
+ this.manager.cancel(requests);
59
61
  }
60
62
 
61
- async query(query, { type = 'arrow', cache = true } = {}) {
62
- const sql = String(query);
63
- const cached = this.cache.get(sql);
64
- if (cached) {
65
- return cached;
66
- } else {
67
- const request = this.db.query({ type, sql });
68
- return cache ? this.cache.set(sql, request) : request;
69
- }
63
+ exec(query, { priority = Priority.Normal } = {}) {
64
+ return this.manager.request({ type: 'exec', query }, priority);
70
65
  }
71
66
 
72
- async updateClient(client, query) {
73
- let result;
74
- try {
75
- client.queryPending();
76
- result = await this.query(query);
77
- } catch (err) {
78
- console.error(err);
79
- client.queryError(err);
80
- return;
81
- }
82
- try {
83
- client.queryResult(result).update();
84
- } catch (err) {
85
- console.error(err);
86
- }
67
+ query(query, {
68
+ type = 'arrow',
69
+ cache = true,
70
+ priority = Priority.Normal,
71
+ ...options
72
+ } = {}) {
73
+ return this.manager.request({ type, query, cache, options }, priority);
74
+ }
75
+
76
+ prefetch(query, options = {}) {
77
+ return this.query(query, { ...options, cache: true, priority: Priority.Low });
78
+ }
79
+
80
+ createBundle(name, queries, priority = Priority.Low) {
81
+ const options = { name, queries };
82
+ return this.manager.request({ type: 'create-bundle', options }, priority);
83
+ }
84
+
85
+ loadBundle(name, priority = Priority.High) {
86
+ const options = { name };
87
+ return this.manager.request({ type: 'load-bundle', options }, priority);
88
+ }
89
+
90
+ // -- Client Management ----
91
+
92
+ updateClient(client, query, priority = Priority.Normal) {
93
+ client.queryPending();
94
+ return this.query(query, { priority }).then(
95
+ data => client.queryResult(data).update(),
96
+ err => { client.queryError(err); this._logger.error(err); }
97
+ );
98
+ }
99
+
100
+ requestQuery(client, query) {
101
+ this.filterGroups.get(client.filterBy)?.reset();
102
+ return query
103
+ ? this.updateClient(client, query)
104
+ : client.update();
87
105
  }
88
106
 
89
107
  async connect(client) {
@@ -92,12 +110,12 @@ export class Coordinator {
92
110
  if (clients.has(client)) {
93
111
  throw new Error('Client already connected.');
94
112
  }
95
- clients.set(client, null); // mark as connected
113
+ clients.add(client); // mark as connected
96
114
 
97
115
  // retrieve field statistics
98
116
  const fields = client.fields();
99
117
  if (fields?.length) {
100
- client.fieldStats(await catalog.queryFields(fields));
118
+ client.fieldInfo(await catalog.queryFields(fields));
101
119
  }
102
120
 
103
121
  // connect filters
@@ -111,27 +129,13 @@ export class Coordinator {
111
129
  }
112
130
  }
113
131
 
114
- // query handler
115
- const handler = async (query) => {
116
- const q = query || client.query(filter?.predicate(client));
117
- filterGroups.get(filter)?.reset();
118
- if (q) this.updateClient(client, q);
119
- };
120
- clients.set(client, handler);
121
-
122
- // register request handler, if defined
123
- client.request?.addEventListener('value', handler);
124
-
125
- // TODO analyze / consolidate queries?
126
- handler();
132
+ client.requestQuery();
127
133
  }
128
134
 
129
135
  disconnect(client) {
130
136
  const { clients, filterGroups } = this;
131
137
  if (!clients.has(client)) return;
132
- const handler = clients.get(client);
133
138
  clients.delete(client);
134
139
  filterGroups.get(client.filterBy)?.remove(client);
135
- client.request?.removeEventListener(handler);
136
140
  }
137
141
  }
@@ -1,6 +1,5 @@
1
- import { Query, expr, and, isBetween, asColumn, epoch_ms } from '@uwdata/mosaic-sql';
1
+ import { Query, and, asColumn, epoch_ms, isBetween, sql } from '@uwdata/mosaic-sql';
2
2
  import { fnv_hash } from './util/hash.js';
3
- import { skipClient } from './util/skip-client.js';
4
3
 
5
4
  const identity = x => x;
6
5
 
@@ -30,34 +29,43 @@ export class DataTileIndexer {
30
29
  this.activeView = null;
31
30
  }
32
31
 
32
+ clear() {
33
+ if (this.indices) {
34
+ this.mc.cancel(Array.from(this.indices.values(), index => index.result));
35
+ this.indices = null;
36
+ }
37
+ }
38
+
33
39
  index(clients, active) {
34
40
  if (this.clients !== clients) {
35
41
  // test client views for compatibility
36
- const cols = Array.from(clients).map(getIndexColumns);
42
+ const cols = Array.from(clients, getIndexColumns);
37
43
  const from = cols[0]?.from;
38
44
  this.enabled = cols.every(c => c && c.from === from);
39
45
  this.clients = clients;
40
- this.indices = null;
41
46
  this.activeView = null;
47
+ this.clear();
42
48
  }
43
49
  if (!this.enabled) return false; // client views are not indexable
44
50
 
45
51
  active = active || this.selection.active;
46
52
  const { source } = active;
53
+ if (source && source === this.activeView?.source) return true; // we're good!
54
+
55
+ this.clear();
47
56
  if (!source) return false; // nothing to work with
48
- if (source === this.activeView?.source) return true; // we're good!
49
57
  const activeView = this.activeView = getActiveView(active);
50
58
  if (!activeView) return false; // active selection clause not compatible
51
59
 
52
- console.warn('DATA TILE INDEX CONSTRUCTION');
60
+ this.mc.logger().warn('DATA TILE INDEX CONSTRUCTION');
53
61
 
54
- // create a selection with the active client removed
55
- const sel = this.selection.clone().update({ source });
62
+ // create a selection with the active source removed
63
+ const sel = this.selection.remove(source);
56
64
 
57
65
  // generate data tile indices
58
66
  const indices = this.indices = new Map;
59
67
  for (const client of clients) {
60
- if (sel.cross && skipClient(client, active)) continue;
68
+ if (sel.skip(client, active)) continue;
61
69
  const index = getIndexColumns(client);
62
70
 
63
71
  // build index construction query
@@ -75,11 +83,9 @@ export class DataTileIndexer {
75
83
  const sql = query.toString();
76
84
  const id = (fnv_hash(sql) >>> 0).toString(16);
77
85
  const table = `tile_index_${id}`;
78
- indices.set(client, { table, ...index });
79
- createIndex(this.mc, table, sql);
86
+ const result = createIndex(this.mc, table, sql);
87
+ indices.set(client, { table, result, ...index });
80
88
  }
81
-
82
- return true;
83
89
  }
84
90
 
85
91
  async update() {
@@ -99,12 +105,12 @@ export class DataTileIndexer {
99
105
  }
100
106
 
101
107
  const { table, dims, aggr } = index;
102
- return this.mc.updateClient(client, Query
108
+ const query = Query
103
109
  .select(dims, aggr)
104
110
  .from(table)
105
111
  .groupby(dims)
106
- .where(filter)
107
- );
112
+ .where(filter);
113
+ return this.mc.updateClient(client, query);
108
114
  }
109
115
  }
110
116
 
@@ -112,22 +118,22 @@ function getActiveView(clause) {
112
118
  const { source, schema } = clause;
113
119
  let columns = clause.predicate?.columns;
114
120
  if (!schema || !columns) return null;
115
- const { type, scales } = schema;
121
+ const { type, scales, pixelSize = 1 } = schema;
116
122
  let predicate;
117
123
 
118
124
  if (type === 'interval' && scales) {
119
- const bins = scales.map(s => binInterval(s));
125
+ const bins = scales.map(s => binInterval(s, pixelSize));
120
126
  if (bins.some(b => b == null)) return null; // unsupported scale type
121
127
 
122
128
  if (bins.length === 1) {
123
- predicate = p => p ? isBetween('active0', p.value.map(bins[0])) : [];
124
- columns = { active0: bins[0](clause.predicate.expr) };
129
+ predicate = p => p ? isBetween('active0', p.range.map(bins[0])) : [];
130
+ columns = { active0: bins[0](clause.predicate.field) };
125
131
  } else {
126
132
  predicate = p => p
127
- ? and(p.value.map(({ value }, i) => isBetween(`active${i}`, value.map(bins[i]))))
133
+ ? and(p.children.map(({ range }, i) => isBetween(`active${i}`, range.map(bins[i]))))
128
134
  : [];
129
135
  columns = Object.fromEntries(
130
- clause.predicate.value.map((p, i) => [`active${i}`, bins[i](p.expr)])
136
+ clause.predicate.children.map((p, i) => [`active${i}`, bins[i](p.field)])
131
137
  );
132
138
  }
133
139
  } else if (type === 'point') {
@@ -140,82 +146,77 @@ function getActiveView(clause) {
140
146
  return { source, columns, predicate };
141
147
  }
142
148
 
143
- function binInterval(scale) {
149
+ function binInterval(scale, pixelSize) {
144
150
  const { type, domain, range } = scale;
145
- let lift, sql;
151
+ let lift, toSql;
146
152
 
147
153
  switch (type) {
148
154
  case 'linear':
149
155
  lift = identity;
150
- sql = asColumn;
156
+ toSql = asColumn;
151
157
  break;
152
158
  case 'log':
153
159
  lift = Math.log;
154
- sql = c => `LN(${asColumn(c)})`;
160
+ toSql = c => sql`LN(${asColumn(c)})`;
155
161
  break;
156
162
  case 'symlog':
157
163
  // TODO: support log constants other than 1?
158
164
  lift = x => Math.sign(x) * Math.log1p(Math.abs(x));
159
- sql = c => (c = asColumn(c), `SIGN(${c}) * LN(1 + ABS(${c}))`);
165
+ toSql = c => (c = asColumn(c), sql`SIGN(${c}) * LN(1 + ABS(${c}))`);
160
166
  break;
161
167
  case 'sqrt':
162
168
  lift = Math.sqrt;
163
- sql = c => `SQRT(${asColumn(c)})`;
169
+ toSql = c => sql`SQRT(${asColumn(c)})`;
164
170
  break;
165
171
  case 'utc':
166
172
  case 'time':
167
173
  lift = x => +x;
168
- sql = c => c instanceof Date ? +c : epoch_ms(asColumn(c));
174
+ toSql = c => c instanceof Date ? +c : epoch_ms(asColumn(c));
169
175
  break;
170
176
  }
171
- return lift ? binFunction(domain, range, lift, sql) : null;
177
+ return lift ? binFunction(domain, range, pixelSize, lift, toSql) : null;
172
178
  }
173
179
 
174
- function binFunction(domain, range, lift, sql) {
180
+ function binFunction(domain, range, pixelSize, lift, toSql) {
175
181
  const lo = lift(Math.min(domain[0], domain[1]));
176
182
  const hi = lift(Math.max(domain[0], domain[1]));
177
- const a = Math.abs(lift(range[1]) - lift(range[0])) / (hi - lo);
178
- return value => expr(
179
- `FLOOR(${a}::DOUBLE * (${sql(value)} - ${lo}::DOUBLE))`,
180
- asColumn(value).columns
181
- );
183
+ const a = (Math.abs(lift(range[1]) - lift(range[0])) / (hi - lo)) / pixelSize;
184
+ const s = pixelSize === 1 ? '' : `${pixelSize}::INTEGER * `;
185
+ return value => sql`${s}FLOOR(${a}::DOUBLE * (${toSql(value)} - ${lo}::DOUBLE))::INTEGER`;
182
186
  }
183
187
 
184
- async function createIndex(mc, table, query) {
185
- try {
186
- await mc.exec(`CREATE TEMP TABLE IF NOT EXISTS ${table} AS ${query}`);
187
- } catch (err) {
188
- console.error(err);
189
- }
188
+ function createIndex(mc, table, query) {
189
+ return mc.exec(`CREATE TEMP TABLE IF NOT EXISTS ${table} AS ${query}`);
190
190
  }
191
191
 
192
+ const NO_INDEX = { from: NaN };
193
+
192
194
  function getIndexColumns(client) {
195
+ if (!client.filterIndexable) return NO_INDEX;
193
196
  const q = client.query();
194
197
  const from = getBaseTable(q);
195
- if (!from || !q.groupby || !client.filterIndexable) {
196
- return { from: NaN }; // early exit
197
- }
198
+ if (!from || !q.groupby) return NO_INDEX;
198
199
  const g = new Set(q.groupby().map(c => c.column));
199
200
 
200
- let aggr = [];
201
- let dims = [];
201
+ const aggr = [];
202
+ const dims = [];
202
203
  let count;
203
204
 
204
205
  for (const { as, expr: { aggregate } } of q.select()) {
205
206
  switch (aggregate?.toUpperCase()) {
206
207
  case 'COUNT':
207
208
  case 'SUM':
208
- aggr.push({ [as]: expr(`SUM("${as}")::DOUBLE`) });
209
+ aggr.push({ [as]: sql`SUM("${as}")::DOUBLE` });
209
210
  break;
210
211
  case 'AVG':
211
212
  count = '_count_';
212
- aggr.push({ [as]: expr(`(SUM("${as}" * ${count}) / SUM(${count}))::DOUBLE`) });
213
+ aggr.push({ [as]: sql`(SUM("${as}" * ${count}) / SUM(${count}))::DOUBLE` });
213
214
  break;
214
215
  case 'MAX':
215
- aggr.push({ [as]: expr(`MAX("${as}")`) });
216
+ aggr.push({ [as]: sql`MAX("${as}")` });
216
217
  break;
217
218
  case 'MIN':
218
- aggr.push({ [as]: expr(`MIN("${as}")`) });
219
+ aggr.push({ [as]: sql`MIN("${as}")` });
219
220
  break;
220
221
  default:
221
222
  if (g.has(as)) dims.push(as);
@@ -226,7 +227,7 @@ function getIndexColumns(client) {
226
227
  return {
227
228
  aggr,
228
229
  dims,
229
- count: count ? { [count]: expr('COUNT(*)') } : {},
230
+ count: count ? { [count]: sql`COUNT(*)` } : {},
230
231
  from
231
232
  };
232
233
  }
@@ -242,7 +243,7 @@ function getBaseTable(query) {
242
243
  }
243
244
 
244
245
  // handle set operations / subqueries
245
- let base = getBaseTable(subq[0]);
246
+ const base = getBaseTable(subq[0]);
246
247
  for (let i = 1; i < subq.length; ++i) {
247
248
  const from = getBaseTable(subq[i]);
248
249
  if (from === undefined) continue;
@@ -1,18 +1,15 @@
1
1
  import { DataTileIndexer } from './DataTileIndexer.js';
2
- import { throttle } from './util/throttle.js';
3
2
 
4
3
  export class FilterGroup {
5
- constructor(mc, selection, index = true) {
6
- this.mc = mc;
4
+ constructor(coordinator, selection, index = true) {
5
+ this.mc = coordinator;
7
6
  this.selection = selection;
8
7
  this.clients = new Set();
9
- this.indexer = index ? new DataTileIndexer(mc, selection) : null;
8
+ this.indexer = index ? new DataTileIndexer(this.mc, selection) : null;
10
9
 
11
10
  const { value, activate } = this.handlers = {
12
- value: throttle(() => this.update()),
13
- activate: clause => {
14
- this.indexer?.index(this.clients, clause);
15
- }
11
+ value: () => this.update(),
12
+ activate: clause => this.indexer?.index(this.clients, clause)
16
13
  };
17
14
  selection.addEventListener('value', value);
18
15
  selection.addEventListener('activate', activate);
@@ -40,7 +37,7 @@ export class FilterGroup {
40
37
  return this;
41
38
  }
42
39
 
43
- async update() {
40
+ update() {
44
41
  const { mc, indexer, clients, selection } = this;
45
42
  return indexer?.index(clients)
46
43
  ? indexer.update()