@uwdata/mosaic-core 0.2.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uwdata/mosaic-core",
3
- "version": "0.2.0",
3
+ "version": "0.3.2",
4
4
  "description": "Scalable and extensible linked data views.",
5
5
  "keywords": [
6
6
  "mosaic",
@@ -28,9 +28,9 @@
28
28
  "prepublishOnly": "npm run test && npm run lint && npm run build"
29
29
  },
30
30
  "dependencies": {
31
- "@duckdb/duckdb-wasm": "^1.25.0",
32
- "@uwdata/mosaic-sql": "^0.2.0",
33
- "apache-arrow": "^12.0.0"
31
+ "@duckdb/duckdb-wasm": "^1.27.0",
32
+ "@uwdata/mosaic-sql": "^0.3.2",
33
+ "apache-arrow": "^11.0.0"
34
34
  },
35
- "gitHead": "e53cd914c807f99aabe78dcbe618dd9543e2f438"
35
+ "gitHead": "788bb137cc402b472fc7e4d84844c78151707c82"
36
36
  }
package/src/Catalog.js CHANGED
@@ -1,3 +1,4 @@
1
+ import { asRelation } from '@uwdata/mosaic-sql';
1
2
  import { jsType } from './util/js-type.js';
2
3
  import { summarize } from './util/summarize.js';
3
4
 
@@ -13,30 +14,16 @@ export class Catalog {
13
14
  this.tables = object();
14
15
  }
15
16
 
16
- async tableInfo(table) {
17
+ tableInfo(table) {
17
18
  const cache = this.tables;
18
19
  if (cache[table]) {
19
20
  return cache[table];
20
21
  }
21
22
 
22
- const q = this.mc.query(
23
- `DESCRIBE "${table}"`,
24
- { type: 'json', cache: false }
25
- );
23
+ const infoPromise = getTableInfo(this.mc, table)
24
+ .catch(err => { cache[table] = null; throw err; });
26
25
 
27
- return (cache[table] = q.then(result => {
28
- const columns = object();
29
- for (const entry of result) {
30
- columns[entry.column_name] = {
31
- table,
32
- column: entry.column_name,
33
- sqlType: entry.column_type,
34
- type: jsType(entry.column_type),
35
- nullable: entry.null === 'YES'
36
- };
37
- }
38
- return columns;
39
- }));
26
+ return (cache[table] = infoPromise);
40
27
  }
41
28
 
42
29
  async fieldInfo({ table, column, stats }) {
@@ -73,6 +60,26 @@ export class Catalog {
73
60
  }
74
61
  }
75
62
 
63
+ async function getTableInfo(mc, table) {
64
+ const result = await mc.query(
65
+ `DESCRIBE ${asRelation(table)}`,
66
+ { type: 'json', cache: false }
67
+ );
68
+
69
+ const columns = object();
70
+ for (const entry of result) {
71
+ columns[entry.column_name] = {
72
+ table,
73
+ column: entry.column_name,
74
+ sqlType: entry.column_type,
75
+ type: jsType(entry.column_type),
76
+ nullable: entry.null === 'YES'
77
+ };
78
+ }
79
+
80
+ return columns;
81
+ }
82
+
76
83
  async function resolveFields(catalog, list) {
77
84
  return list.length === 1 && list[0].column === '*'
78
85
  ? Object.values(await catalog.tableInfo(list[0].table))
@@ -6,6 +6,12 @@ import { voidLogger } from './util/void-logger.js';
6
6
 
7
7
  let _instance;
8
8
 
9
+ /**
10
+ * Set or retrieve the coordinator instance.
11
+ *
12
+ * @param {Coordinator} instance the coordinator instance to set
13
+ * @returns {Coordinator} the coordinator instance
14
+ */
9
15
  export function coordinator(instance) {
10
16
  if (instance) {
11
17
  _instance = instance;
@@ -23,7 +29,6 @@ export class Coordinator {
23
29
  this.configure(options);
24
30
  this.databaseConnector(db);
25
31
  this.clear();
26
- this._recorders = [];
27
32
  }
28
33
 
29
34
  logger(logger) {
@@ -34,12 +39,14 @@ export class Coordinator {
34
39
  return this._logger;
35
40
  }
36
41
 
37
- configure({ cache = true, indexes = true }) {
42
+ configure({ cache = true, consolidate = true, indexes = true }) {
38
43
  this.manager.cache(cache);
44
+ this.manager.consolidate(consolidate);
39
45
  this.indexes = indexes;
40
46
  }
41
47
 
42
48
  clear({ clients = true, cache = true, catalog = false } = {}) {
49
+ this.manager.clear();
43
50
  if (clients) {
44
51
  this.clients?.forEach(client => this.disconnect(client));
45
52
  this.filterGroups?.forEach(group => group.finalize());
@@ -104,6 +111,11 @@ export class Coordinator {
104
111
  : client.update();
105
112
  }
106
113
 
114
+ /**
115
+ * Connect a client to the coordinator.
116
+ *
117
+ * @param {import('./MosaicClient.js').MosaicClient} client the client to disconnect
118
+ */
107
119
  async connect(client) {
108
120
  const { catalog, clients, filterGroups, indexes } = this;
109
121
 
@@ -132,6 +144,11 @@ export class Coordinator {
132
144
  client.requestQuery();
133
145
  }
134
146
 
147
+ /**
148
+ * Disconnect a client from the coordinator.
149
+ *
150
+ * @param {import('./MosaicClient.js').MosaicClient} client the client to disconnect
151
+ */
135
152
  disconnect(client) {
136
153
  const { clients, filterGroups } = this;
137
154
  if (!clients.has(client)) return;
@@ -1,24 +1,30 @@
1
- import { Query, and, asColumn, epoch_ms, isBetween, sql } from '@uwdata/mosaic-sql';
1
+ import { Query, and, asColumn, create, epoch_ms, isBetween, sql } from '@uwdata/mosaic-sql';
2
2
  import { fnv_hash } from './util/hash.js';
3
3
 
4
4
  const identity = x => x;
5
5
 
6
6
  /**
7
- * Build and query optimized indices ("data tiles") for fast computation of
7
+ * Build and query optimized indices ("data cubes") for fast computation of
8
8
  * groupby aggregate queries over compatible client queries and selections.
9
- * A data tile contains pre-aggregated data for a Mosaic client, subdivided
10
- * by possible query values from an active view. Index tiles are realized as
9
+ * A data cube contains pre-aggregated data for a Mosaic client, subdivided
10
+ * by possible query values from an active view. Indexes are realized as
11
11
  * as temporary database tables that can be queried for rapid updates.
12
12
  * Compatible client queries must pull data from the same backing table and
13
13
  * must consist of only groupby dimensions and supported aggregates.
14
14
  * Compatible selections must contain an active clause that exposes a schema
15
15
  * for an interval or point value predicate.
16
16
  */
17
- export class DataTileIndexer {
18
-
19
- constructor(mc, selection) {
17
+ export class DataCubeIndexer {
18
+ /**
19
+ *
20
+ * @param {import('./Coordinator.js').Coordinator} mc a Mosaic coordinator
21
+ * @param {*} options Options hash to configure the data cube indexes and pass selections to the coordinator.
22
+ */
23
+ constructor(mc, { selection, temp = true }) {
24
+ /** @type import('./Coordinator.js').Coordinator */
20
25
  this.mc = mc;
21
26
  this.selection = selection;
27
+ this.temp = temp;
22
28
  this.reset();
23
29
  }
24
30
 
@@ -57,13 +63,14 @@ export class DataTileIndexer {
57
63
  const activeView = this.activeView = getActiveView(active);
58
64
  if (!activeView) return false; // active selection clause not compatible
59
65
 
60
- this.mc.logger().warn('DATA TILE INDEX CONSTRUCTION');
66
+ this.mc.logger().warn('DATA CUBE INDEX CONSTRUCTION');
61
67
 
62
68
  // create a selection with the active source removed
63
69
  const sel = this.selection.remove(source);
64
70
 
65
- // generate data tile indices
71
+ // generate data cube indices
66
72
  const indices = this.indices = new Map;
73
+ const { mc, temp } = this;
67
74
  for (const client of clients) {
68
75
  if (sel.skip(client, active)) continue;
69
76
  const index = getIndexColumns(client);
@@ -82,8 +89,8 @@ export class DataTileIndexer {
82
89
 
83
90
  const sql = query.toString();
84
91
  const id = (fnv_hash(sql) >>> 0).toString(16);
85
- const table = `tile_index_${id}`;
86
- const result = createIndex(this.mc, table, sql);
92
+ const table = `cube_index_${id}`;
93
+ const result = mc.exec(create(table, sql, { temp }));
87
94
  indices.set(client, { table, result, ...index });
88
95
  }
89
96
  }
@@ -185,10 +192,6 @@ function binFunction(domain, range, pixelSize, lift, toSql) {
185
192
  return value => sql`${s}FLOOR(${a}::DOUBLE * (${toSql(value)} - ${lo}::DOUBLE))::INTEGER`;
186
193
  }
187
194
 
188
- function createIndex(mc, table, query) {
189
- return mc.exec(`CREATE TEMP TABLE IF NOT EXISTS ${table} AS ${query}`);
190
- }
191
-
192
195
  const NO_INDEX = { from: NaN };
193
196
 
194
197
  function getIndexColumns(client) {
@@ -1,11 +1,20 @@
1
- import { DataTileIndexer } from './DataTileIndexer.js';
1
+ import { DataCubeIndexer } from './DataCubeIndexer.js';
2
2
 
3
3
  export class FilterGroup {
4
+ /**
5
+ * @param {import('./Coordinator.js').Coordinator} coordinator The Mosaic coordinator.
6
+ * @param {*} selection The shared filter selection.
7
+ * @param {*} index Boolean flag or options hash for data cube indexer.
8
+ * Falsy values disable indexing.
9
+ */
4
10
  constructor(coordinator, selection, index = true) {
11
+ /** @type import('./Coordinator.js').Coordinator */
5
12
  this.mc = coordinator;
6
13
  this.selection = selection;
7
14
  this.clients = new Set();
8
- this.indexer = index ? new DataTileIndexer(this.mc, selection) : null;
15
+ this.indexer = index
16
+ ? new DataCubeIndexer(this.mc, { ...index, selection })
17
+ : null;
9
18
 
10
19
  const { value, activate } = this.handlers = {
11
20
  value: () => this.update(),
@@ -40,10 +40,10 @@ export class MosaicClient {
40
40
  }
41
41
 
42
42
  /**
43
- * Called by the coordinator to set the field statistics for this client.
43
+ * Called by the coordinator to set the field info for this client.
44
44
  * @returns {this}
45
45
  */
46
- fieldStats() {
46
+ fieldInfo() {
47
47
  return this;
48
48
  }
49
49
 
@@ -63,6 +63,9 @@ export class MosaicClient {
63
63
 
64
64
  /**
65
65
  * Called by the coordinator to return a query result.
66
+ *
67
+ * @param {*} data the query result
68
+ * @returns {this}
66
69
  */
67
70
  queryResult() {
68
71
  return this;
@@ -0,0 +1,238 @@
1
+ import { Query, Ref } from '@uwdata/mosaic-sql';
2
+ import { queryResult } from './util/query-result.js';
3
+
4
+ /**
5
+ * Create a consolidator to combine structurally compatible queries.
6
+ * @param {*} enqueue Query manager enqueue method
7
+ * @param {*} cache Client-side query cache (sql -> data)
8
+ * @param {*} record Query recorder function
9
+ * @returns A consolidator object
10
+ */
11
+ export function consolidator(enqueue, cache, record) {
12
+ let pending = [];
13
+ let id = 0;
14
+
15
+ function run() {
16
+ // group queries into bundles that can be consolidated
17
+ const groups = entryGroups(pending, cache);
18
+ pending = [];
19
+ id = 0;
20
+
21
+ // build and issue consolidated queries
22
+ for (const group of groups) {
23
+ consolidate(group, enqueue, record);
24
+ processResults(group, cache);
25
+ }
26
+ }
27
+
28
+ return {
29
+ add(entry, priority) {
30
+ if (entry.request.type === 'arrow') {
31
+ // wait one frame, gather an ordered list of queries
32
+ // only Apache Arrow is supported, so we can project efficiently
33
+ id = id || requestAnimationFrame(() => run());
34
+ pending.push({ entry, priority, index: pending.length });
35
+ } else {
36
+ enqueue(entry, priority);
37
+ }
38
+ }
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Segment query requests into consolidation-compatible groups.
44
+ * @param {*} entries Query request entries ({ request, result } objects)
45
+ * @returns An array of grouped entry arrays
46
+ */
47
+ function entryGroups(entries, cache) {
48
+ const groups = [];
49
+ const groupMap = new Map;
50
+
51
+ for (const query of entries) {
52
+ const { entry: { request } } = query;
53
+ const key = consolidationKey(request.query, cache);
54
+ if (!groupMap.has(key)) {
55
+ const list = [];
56
+ groups.push(list);
57
+ groupMap.set(key, list);
58
+ }
59
+ groupMap.get(key).push(query);
60
+ }
61
+
62
+ return groups;
63
+ }
64
+
65
+ /**
66
+ * Generate a key string for query consolidation.
67
+ * Queries with matching keys are conosolidation-compatible.
68
+ * If a query is found in the cache, it is exempted from consolidation,
69
+ * which is indicated by returning the precise query SQL as the key.
70
+ * @param {*} query The input query.
71
+ * @param {*} cache The query cache (sql -> data).
72
+ * @returns a key string
73
+ */
74
+ function consolidationKey(query, cache) {
75
+ const sql = `${query}`;
76
+ if (query instanceof Query && !cache.get(sql)) {
77
+ if (
78
+ query.orderby().length || query.where().length ||
79
+ query.qualify().length || query.having().length
80
+ ) {
81
+ // do not try to analyze if query includes clauses
82
+ // that may refer to *derived* columns we can't resolve
83
+ return sql;
84
+ }
85
+
86
+ // create a derived query stripped of selections
87
+ const q = query.clone().$select('*');
88
+
89
+ // check group by criteria for compatibility
90
+ // queries may refer to *derived* columns as group by criteria
91
+ // we resolve these against the true grouping expressions
92
+ const groupby = query.groupby();
93
+ if (groupby.length) {
94
+ const map = {}; // expression map (as -> expr)
95
+ query.select().forEach(({ as, expr }) => map[as] = expr);
96
+ q.$groupby(groupby.map(e => (e instanceof Ref && map[e.column]) || e));
97
+ }
98
+
99
+ // key is just the transformed query as SQL
100
+ return `${q}`;
101
+ } else {
102
+ // can not analyze query, simply return as string
103
+ return sql;
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Issue queries, consolidating where possible.
109
+ * @param {*} group Array of bundled query entries
110
+ * @param {*} enqueue Add entry to query queue
111
+ * @param {*} record Query recorder function
112
+ */
113
+ function consolidate(group, enqueue, record) {
114
+ if (shouldConsolidate(group)) {
115
+ // issue a single consolidated query
116
+ enqueue({
117
+ request: {
118
+ type: 'arrow',
119
+ cache: false,
120
+ record: false,
121
+ query: consolidatedQuery(group, record)
122
+ },
123
+ result: (group.result = queryResult())
124
+ });
125
+ } else {
126
+ // issue queries directly
127
+ for (const { entry, priority } of group) {
128
+ enqueue(entry, priority);
129
+ }
130
+ }
131
+ }
132
+
133
+ /**
134
+ * Check if a group contains multiple distinct queries.
135
+ * @param {*} group Array of bundled query entries
136
+ * @returns false if group contains a single (possibly repeated) query,
137
+ * otherwise true
138
+ */
139
+ function shouldConsolidate(group) {
140
+ if (group.length > 1) {
141
+ const sql = `${group[0].entry.request.query}`;
142
+ for (let i = 1; i < group.length; ++i) {
143
+ if (sql !== `${group[i].entry.request.query}`) {
144
+ return true;
145
+ }
146
+ }
147
+ }
148
+ return false;
149
+ }
150
+
151
+ /**
152
+ * Create a consolidated query for a group.
153
+ * @param {*} group Array of bundled query entries
154
+ * @param {*} record Query recorder function
155
+ * @returns A consolidated Query instance
156
+ */
157
+ function consolidatedQuery(group, record) {
158
+ const maps = group.maps = [];
159
+ const fields = new Map;
160
+
161
+ // gather select fields
162
+ for (const item of group) {
163
+ const { query } = item.entry.request;
164
+ const fieldMap = [];
165
+ maps.push(fieldMap);
166
+ for (const { as, expr } of query.select()) {
167
+ const e = `${expr}`;
168
+ if (!fields.has(e)) {
169
+ fields.set(e, [`col${fields.size}`, expr]);
170
+ }
171
+ const [name] = fields.get(e);
172
+ fieldMap.push([name, as]);
173
+ }
174
+ record(`${query}`);
175
+ }
176
+
177
+ // use a cloned query as a starting point
178
+ const query = group[0].entry.request.query.clone();
179
+
180
+ // update group by statement as needed
181
+ const groupby = query.groupby();
182
+ if (groupby.length) {
183
+ const map = {};
184
+ group.maps[0].forEach(([name, as]) => map[as] = name);
185
+ query.$groupby(groupby.map(e => (e instanceof Ref && map[e.column]) || e));
186
+ }
187
+
188
+ // update select statemenet and return
189
+ return query.$select(Array.from(fields.values()));
190
+ }
191
+
192
+ /**
193
+ * Process query results, dispatch results to original requests
194
+ * @param {*} group Array of query requests
195
+ * @param {*} cache Client-side query cache (sql -> data)
196
+ */
197
+ async function processResults(group, cache) {
198
+ const { maps, result } = group;
199
+ if (!maps) return; // no consolidation performed
200
+
201
+ let data;
202
+ try {
203
+ data = await result;
204
+ } catch (err) {
205
+ // pass error to consolidated queries
206
+ for (const { entry } of group) {
207
+ entry.result.reject(err);
208
+ }
209
+ return;
210
+ }
211
+
212
+ group.forEach(({ entry }, index) => {
213
+ const { request, result } = entry;
214
+ const projected = projectResult(data, maps[index]);
215
+ if (request.cache) {
216
+ cache.set(String(request.query), projected);
217
+ }
218
+ result.fulfill(projected);
219
+ });
220
+ }
221
+
222
+ /**
223
+ * Project a consolidated result to a client result
224
+ * @param {*} data Consolidated query result, as an Apache Arrow Table
225
+ * @param {*} map Column name map as [source, target] pairs
226
+ * @returns the projected Apache Arrow table
227
+ */
228
+ function projectResult(data, map) {
229
+ if (map) {
230
+ const cols = {};
231
+ for (const [name, as] of map) {
232
+ cols[as] = data.getChild(name);
233
+ }
234
+ return new data.constructor(cols);
235
+ } else {
236
+ return data;
237
+ }
238
+ }
@@ -1,5 +1,7 @@
1
+ import { consolidator } from './QueryConsolidator.js';
1
2
  import { lruCache, voidCache } from './util/cache.js';
2
3
  import { priorityQueue } from './util/priority-queue.js';
4
+ import { queryResult } from './util/query-result.js';
3
5
 
4
6
  export const Priority = { High: 0, Normal: 1, Low: 2 };
5
7
 
@@ -10,6 +12,7 @@ export function QueryManager() {
10
12
  let logger;
11
13
  let recorders = [];
12
14
  let pending = null;
15
+ let consolidate;
13
16
 
14
17
  function next() {
15
18
  if (pending || queue.isEmpty()) return;
@@ -18,14 +21,25 @@ export function QueryManager() {
18
21
  pending.finally(() => { pending = null; next(); });
19
22
  }
20
23
 
24
+ function enqueue(entry, priority = Priority.Normal) {
25
+ queue.insert(entry, priority);
26
+ next();
27
+ }
28
+
29
+ function recordQuery(sql) {
30
+ if (recorders.length && sql) {
31
+ recorders.forEach(rec => rec.add(sql));
32
+ }
33
+ }
34
+
21
35
  async function submit(request, result) {
22
36
  try {
23
- const { query, type, cache = false, options } = request;
24
- const sql = query ? String(query) : null;
37
+ const { query, type, cache = false, record = true, options } = request;
38
+ const sql = query ? `${query}` : null;
25
39
 
26
40
  // update recorders
27
- if (recorders.length && sql) {
28
- recorders.forEach(rec => rec.add(sql));
41
+ if (record) {
42
+ recordQuery(sql);
29
43
  }
30
44
 
31
45
  // check query cache
@@ -64,10 +78,22 @@ export function QueryManager() {
64
78
  return connector ? (db = connector) : db;
65
79
  },
66
80
 
81
+ consolidate(flag) {
82
+ if (flag && !consolidate) {
83
+ consolidate = consolidator(enqueue, clientCache, recordQuery);
84
+ } else if (!flag && consolidate) {
85
+ consolidate = null;
86
+ }
87
+ },
88
+
67
89
  request(request, priority = Priority.Normal) {
68
90
  const result = queryResult();
69
- queue.insert({ request, result }, priority);
70
- next();
91
+ const entry = { request, result };
92
+ if (consolidate) {
93
+ consolidate.add(entry, priority);
94
+ } else {
95
+ enqueue(entry, priority);
96
+ }
71
97
  return result;
72
98
  },
73
99
 
@@ -76,6 +102,13 @@ export function QueryManager() {
76
102
  queue.remove(({ result }) => set.has(result));
77
103
  },
78
104
 
105
+ clear() {
106
+ queue.remove(({ result }) => {
107
+ result.reject('Cleared');
108
+ return true;
109
+ });
110
+ },
111
+
79
112
  record() {
80
113
  let state = [];
81
114
  const recorder = {
@@ -98,12 +131,3 @@ export function QueryManager() {
98
131
  }
99
132
  };
100
133
  }
101
-
102
- function queryResult() {
103
- let resolve;
104
- let reject;
105
- const p = new Promise((r, e) => { resolve = r; reject = e; });
106
- p.fulfill = value => (resolve(value), p);
107
- p.reject = err => (reject(err), p);
108
- return p;
109
- }
package/src/Selection.js CHANGED
@@ -120,6 +120,13 @@ export class Selection extends Param {
120
120
  return super.value;
121
121
  }
122
122
 
123
+ /**
124
+ * Indicate if this selection has a single resolution strategy.
125
+ */
126
+ get single() {
127
+ return this._resolver.single;
128
+ }
129
+
123
130
  /**
124
131
  * Emit an activate event with the given selection clause.
125
132
  * @param {*} clause The clause repesenting the potential activation.
@@ -0,0 +1,8 @@
1
+ export function queryResult() {
2
+ let resolve;
3
+ let reject;
4
+ const p = new Promise((r, e) => { resolve = r; reject = e; });
5
+ p.fulfill = value => (resolve(value), p);
6
+ p.reject = err => (reject(err), p);
7
+ return p;
8
+ }