@uwdata/mosaic-duckdb 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +28 -0
- package/README.md +2 -2
- package/bin/csv2arrow.js +1 -1
- package/bin/run-server.js +3 -0
- package/package.json +7 -5
- package/src/Cache.js +113 -0
- package/src/DuckDB.js +3 -39
- package/src/data-server.js +62 -48
- package/src/index.js +4 -0
- package/src/load/arrow.js +14 -0
- package/src/load/bundle.js +75 -0
- package/src/load/create-table.js +6 -0
- package/src/load/csv.js +9 -0
- package/src/load/json.js +9 -0
- package/src/load/parameters.js +11 -0
- package/src/load/parquet.js +7 -0
- package/src/merge-buffers.js +5 -3
package/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023, UW Interactive Data Lab
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
package/README.md
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
#
|
|
1
|
+
# mosaic-duckdb
|
|
2
2
|
|
|
3
|
-
A Node.js
|
|
3
|
+
A Promise-based Node.js API to DuckDB, along with a data server that supports transfer of [Apache Arrow](https://arrow.apache.org/) and JSON data over either Web Sockets or HTTP.
|
package/bin/csv2arrow.js
CHANGED
|
@@ -8,7 +8,7 @@ const db = new DuckDB();
|
|
|
8
8
|
await db.csv('data', process.argv[2]);
|
|
9
9
|
|
|
10
10
|
// get output stream of arrow bytes
|
|
11
|
-
const stream = await db.
|
|
11
|
+
const stream = await db.arrowBuffer('SELECT * FROM data');
|
|
12
12
|
|
|
13
13
|
// determine the output stream
|
|
14
14
|
const output = process.argv[3]
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uwdata/mosaic-duckdb",
|
|
3
|
-
"version": "0.0
|
|
4
|
-
"description": "A
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "A Promise-based DuckDB API and Node.js data server.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"duckdb",
|
|
7
7
|
"server",
|
|
@@ -20,11 +20,13 @@
|
|
|
20
20
|
},
|
|
21
21
|
"scripts": {
|
|
22
22
|
"lint": "eslint src test --ext .js",
|
|
23
|
+
"server": "node bin/run-server.js",
|
|
23
24
|
"test": "mocha 'test/**/*-test.js'",
|
|
24
25
|
"prepublishOnly": "npm run test && npm run lint"
|
|
25
26
|
},
|
|
26
27
|
"dependencies": {
|
|
27
|
-
"duckdb": "~0.
|
|
28
|
-
"ws": "^8.
|
|
29
|
-
}
|
|
28
|
+
"duckdb": "~0.7.1",
|
|
29
|
+
"ws": "^8.13.0"
|
|
30
|
+
},
|
|
31
|
+
"gitHead": "e53cd914c807f99aabe78dcbe618dd9543e2f438"
|
|
30
32
|
}
|
package/src/Cache.js
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_CACHE_DIR = '.cache';
|
|
6
|
+
const DEFAULT_TTL = 1000 * 60 * 60 * 24 * 7; // 7 days
|
|
7
|
+
|
|
8
|
+
export function cacheKey(hashable, type) {
|
|
9
|
+
return createHash('sha256').update(hashable).digest('hex') + '.' + type;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
class CacheEntry {
|
|
13
|
+
constructor(data, ttl = DEFAULT_TTL) {
|
|
14
|
+
this.data = data;
|
|
15
|
+
this.touch(ttl);
|
|
16
|
+
}
|
|
17
|
+
touch(ttl = DEFAULT_TTL) {
|
|
18
|
+
this.last = Math.round(Math.max(this.last, performance.now() + ttl));
|
|
19
|
+
return this;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export class Cache {
|
|
24
|
+
constructor({
|
|
25
|
+
max = 10000, // max entries
|
|
26
|
+
dir = DEFAULT_CACHE_DIR,
|
|
27
|
+
ttl = DEFAULT_TTL
|
|
28
|
+
}) {
|
|
29
|
+
this.cache = new Map;
|
|
30
|
+
this.max = max;
|
|
31
|
+
this.dir = dir;
|
|
32
|
+
this.ttl = ttl;
|
|
33
|
+
readEntries(dir, this.cache);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
has(key) {
|
|
37
|
+
return this.cache.has(key);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
delete(key) {
|
|
41
|
+
const deleted = this.cache.delete(key);
|
|
42
|
+
if (deleted) {
|
|
43
|
+
fs.rm(path.resolve(this.dir, key), { force: true });
|
|
44
|
+
}
|
|
45
|
+
return deleted;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
get(key) {
|
|
49
|
+
return this.cache.get(key)?.touch(this.ttl).data;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
set(key, data, { persist = false, ttl = this.ttl } = {}) {
|
|
53
|
+
const entry = new CacheEntry(data, persist ? Infinity : ttl);
|
|
54
|
+
this.cache.set(key, entry);
|
|
55
|
+
if (persist) writeEntry(this.dir, key, entry);
|
|
56
|
+
if (this.shouldEvict()) setTimeout(() => this.evict());
|
|
57
|
+
return this;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
shouldEvict() {
|
|
61
|
+
return this.cache.size > this.max;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
evict() {
|
|
65
|
+
const expire = performance.now();
|
|
66
|
+
let lruKey = null;
|
|
67
|
+
let lruLast = Infinity;
|
|
68
|
+
|
|
69
|
+
for (const [key, entry] of this.cache) {
|
|
70
|
+
const { last } = entry;
|
|
71
|
+
if (last === Infinity) continue;
|
|
72
|
+
|
|
73
|
+
// least recently used entry seen so far
|
|
74
|
+
if (last < lruLast) {
|
|
75
|
+
lruKey = key;
|
|
76
|
+
lruLast = last;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// remove if time since last access exceeds ttl
|
|
80
|
+
if (expire > last) {
|
|
81
|
+
this.cache.delete(key);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// remove lru entry
|
|
86
|
+
if (this.cache.size > this.max && lruKey) {
|
|
87
|
+
this.cache.delete(lruKey);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function readEntries(dir, cache) {
|
|
93
|
+
let files;
|
|
94
|
+
try {
|
|
95
|
+
files = await fs.readdir(dir);
|
|
96
|
+
} catch (err) {
|
|
97
|
+
return; // dir does not exist, nothing to do
|
|
98
|
+
}
|
|
99
|
+
await Promise.allSettled(files.map(async file => {
|
|
100
|
+
const m = file.match(/.*\.(arrow|json)/);
|
|
101
|
+
const key = m?.[1] || null;
|
|
102
|
+
if (key) {
|
|
103
|
+
const data = await fs.readFile(path.resolve(dir, file));
|
|
104
|
+
cache.set(key, new CacheEntry(data, Infinity));
|
|
105
|
+
}
|
|
106
|
+
}));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function writeEntry(dir, key, entry) {
|
|
110
|
+
return fs.mkdir(dir, { recursive: true }).then(
|
|
111
|
+
() => fs.writeFile(path.resolve(dir, key), entry.data)
|
|
112
|
+
);
|
|
113
|
+
}
|
package/src/DuckDB.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import duckdb from 'duckdb';
|
|
2
|
-
import { readFile } from 'node:fs/promises';
|
|
3
2
|
import { mergeBuffers } from './merge-buffers.js';
|
|
4
3
|
|
|
4
|
+
const TEMP_DIR = '.duckdb';
|
|
5
|
+
|
|
5
6
|
const CONFIG = [
|
|
6
|
-
`PRAGMA temp_directory='
|
|
7
|
+
`PRAGMA temp_directory='${TEMP_DIR}'`,
|
|
7
8
|
`INSTALL arrow`,
|
|
8
9
|
`INSTALL httpfs`,
|
|
9
10
|
`LOAD arrow`,
|
|
@@ -29,35 +30,6 @@ export class DuckDB {
|
|
|
29
30
|
});
|
|
30
31
|
}
|
|
31
32
|
|
|
32
|
-
async csv(tableName, fileName, options = {}) {
|
|
33
|
-
const opt = Object.entries({ sample_size: -1, ...options })
|
|
34
|
-
.map(([key, value]) => {
|
|
35
|
-
const t = typeof value;
|
|
36
|
-
const v = t === 'boolean' ? String(value).toUpperCase()
|
|
37
|
-
: t === 'string' ? `'${value}'`
|
|
38
|
-
: value;
|
|
39
|
-
return `${key.toUpperCase()}=${v}`;
|
|
40
|
-
})
|
|
41
|
-
.join(', ');
|
|
42
|
-
return this.exec(`CREATE TABLE ${tableName} AS SELECT *
|
|
43
|
-
FROM read_csv_auto('${fileName}', ${opt});`);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
async parquet(tableName, fileName) {
|
|
47
|
-
return this.exec(`CREATE TABLE ${tableName} AS SELECT *
|
|
48
|
-
FROM read_parquet('${fileName}');`);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
async ipc(tableName, buffer) {
|
|
52
|
-
const bufName = `__ipc__${tableName}`;
|
|
53
|
-
const arrowData = ArrayBuffer.isView(buffer) ? buffer : await readFile(buffer);
|
|
54
|
-
this.con.register_buffer(bufName, [arrowData], true, err => {
|
|
55
|
-
if (err) console.error(err);
|
|
56
|
-
});
|
|
57
|
-
await this.exec(`CREATE TABLE ${tableName} AS SELECT * FROM ${bufName}`);
|
|
58
|
-
this.con.unregister_buffer(bufName);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
33
|
prepare(sql) {
|
|
62
34
|
return new DuckDBStatement(this.con.prepare(sql));
|
|
63
35
|
}
|
|
@@ -97,10 +69,6 @@ export class DuckDB {
|
|
|
97
69
|
});
|
|
98
70
|
});
|
|
99
71
|
}
|
|
100
|
-
|
|
101
|
-
arrowStream(sql) {
|
|
102
|
-
return this.con.arrowIPCStream(sql);
|
|
103
|
-
}
|
|
104
72
|
}
|
|
105
73
|
|
|
106
74
|
export class DuckDBStatement {
|
|
@@ -151,8 +119,4 @@ export class DuckDBStatement {
|
|
|
151
119
|
});
|
|
152
120
|
});
|
|
153
121
|
}
|
|
154
|
-
|
|
155
|
-
arrowStream(params) {
|
|
156
|
-
return this.statement.arrowIPCStream(...params);
|
|
157
|
-
}
|
|
158
122
|
}
|
package/src/data-server.js
CHANGED
|
@@ -1,13 +1,21 @@
|
|
|
1
1
|
import http from 'node:http';
|
|
2
|
+
import path from 'node:path';
|
|
2
3
|
import url from 'node:url';
|
|
3
4
|
import { WebSocketServer } from 'ws';
|
|
5
|
+
import { Cache, cacheKey } from './Cache.js';
|
|
6
|
+
import { createBundle, loadBundle } from './load/bundle.js';
|
|
7
|
+
|
|
8
|
+
const CACHE_DIR = '.mosaic/cache';
|
|
9
|
+
const BUNDLE_DIR = '.mosaic/bundle';
|
|
4
10
|
|
|
5
11
|
export function dataServer(db, {
|
|
12
|
+
cache = true,
|
|
6
13
|
rest = true,
|
|
7
14
|
socket = true,
|
|
8
15
|
port = 3000
|
|
9
16
|
} = {}) {
|
|
10
|
-
const
|
|
17
|
+
const queryCache = cache ? new Cache({ dir: CACHE_DIR }) : null;
|
|
18
|
+
const handleQuery = queryHandler(db, queryCache);
|
|
11
19
|
const app = createHTTPServer(handleQuery, rest);
|
|
12
20
|
if (socket) createSocketServer(app, handleQuery);
|
|
13
21
|
|
|
@@ -25,7 +33,6 @@ function createHTTPServer(handleQuery, rest) {
|
|
|
25
33
|
return;
|
|
26
34
|
}
|
|
27
35
|
|
|
28
|
-
resp.setHeader('Content-Type', 'application/json');
|
|
29
36
|
resp.setHeader('Access-Control-Allow-Origin', '*');
|
|
30
37
|
resp.setHeader('Access-Control-Request-Method', '*');
|
|
31
38
|
resp.setHeader('Access-Control-Allow-Methods', 'OPTIONS, POST, GET');
|
|
@@ -61,7 +68,27 @@ function createSocketServer(server, handleQuery) {
|
|
|
61
68
|
});
|
|
62
69
|
}
|
|
63
70
|
|
|
64
|
-
function queryHandler(db) {
|
|
71
|
+
function queryHandler(db, queryCache) {
|
|
72
|
+
|
|
73
|
+
// retrieve query result
|
|
74
|
+
async function retrieve(query, get) {
|
|
75
|
+
const { sql, type, persist } = query;
|
|
76
|
+
const key = cacheKey(sql, type);
|
|
77
|
+
let result = queryCache?.get(key);
|
|
78
|
+
|
|
79
|
+
if (result) {
|
|
80
|
+
console.log('CACHE HIT');
|
|
81
|
+
} else {
|
|
82
|
+
result = await get(sql);
|
|
83
|
+
if (persist) {
|
|
84
|
+
queryCache?.set(key, result, { persist });
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return result;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// query request handler
|
|
65
92
|
return async (res, data) => {
|
|
66
93
|
const t0 = performance.now();
|
|
67
94
|
|
|
@@ -75,64 +102,56 @@ function queryHandler(db) {
|
|
|
75
102
|
}
|
|
76
103
|
|
|
77
104
|
try {
|
|
78
|
-
const { sql, type } = query;
|
|
79
|
-
console.log('
|
|
80
|
-
|
|
81
|
-
// request the lock to serialize requests
|
|
82
|
-
// we do this to avoid DuckDB + Arrow errors
|
|
83
|
-
await res.lock?.();
|
|
105
|
+
const { sql, type = 'json' } = query;
|
|
106
|
+
console.log(`> ${type.toUpperCase()}${sql ? ' ' + sql : ''}`);
|
|
84
107
|
|
|
85
108
|
// process query and return result
|
|
86
109
|
switch (type) {
|
|
87
|
-
case 'arrow':
|
|
88
|
-
// Apache Arrow response format
|
|
89
|
-
await res.stream(await db.arrowStream(sql));
|
|
90
|
-
break;
|
|
91
110
|
case 'exec':
|
|
92
111
|
// Execute query with no return value
|
|
93
112
|
await db.exec(sql);
|
|
94
113
|
res.done();
|
|
95
114
|
break;
|
|
96
|
-
|
|
115
|
+
case 'arrow':
|
|
116
|
+
// Apache Arrow response format
|
|
117
|
+
res.arrow(await retrieve(query, sql => db.arrowBuffer(sql)));
|
|
118
|
+
break;
|
|
119
|
+
case 'json':
|
|
97
120
|
// JSON response format
|
|
98
|
-
res.json(await db.query(sql));
|
|
121
|
+
res.json(await retrieve(query, sql => db.query(sql)));
|
|
122
|
+
break;
|
|
123
|
+
case 'create-bundle':
|
|
124
|
+
// Create a named bundle of precomputed resources
|
|
125
|
+
await createBundle(
|
|
126
|
+
db, queryCache, query.queries,
|
|
127
|
+
path.resolve(BUNDLE_DIR, query.name)
|
|
128
|
+
);
|
|
129
|
+
res.done();
|
|
130
|
+
break;
|
|
131
|
+
case 'load-bundle':
|
|
132
|
+
// Load a named bundle of precomputed resources
|
|
133
|
+
await loadBundle(db, queryCache, path.resolve(BUNDLE_DIR, query.name));
|
|
134
|
+
res.done();
|
|
135
|
+
break;
|
|
136
|
+
default:
|
|
137
|
+
res.error(`Unrecognized command: ${type}`, 400);
|
|
99
138
|
}
|
|
100
139
|
} catch (err) {
|
|
101
140
|
res.error(err, 500);
|
|
102
|
-
} finally {
|
|
103
|
-
res.unlock?.();
|
|
104
141
|
}
|
|
105
142
|
|
|
106
|
-
console.log('REQUEST',
|
|
143
|
+
console.log('REQUEST', (performance.now() - t0).toFixed(1));
|
|
107
144
|
};
|
|
108
145
|
}
|
|
109
146
|
|
|
110
|
-
let locked = false;
|
|
111
|
-
const queue = [];
|
|
112
|
-
|
|
113
147
|
function httpResponse(res) {
|
|
114
148
|
return {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
return locked
|
|
119
|
-
? new Promise(resolve => queue.push(resolve))
|
|
120
|
-
: (locked = true);
|
|
121
|
-
},
|
|
122
|
-
unlock() {
|
|
123
|
-
locked = queue.length > 0;
|
|
124
|
-
if (locked) {
|
|
125
|
-
// resolve the next promise in the queue
|
|
126
|
-
queue.shift()();
|
|
127
|
-
}
|
|
128
|
-
},
|
|
129
|
-
async stream(iter) {
|
|
130
|
-
for await (const chunk of iter) {
|
|
131
|
-
res.write(chunk);
|
|
132
|
-
}
|
|
133
|
-
res.end();
|
|
149
|
+
arrow(data) {
|
|
150
|
+
res.setHeader('Content-Type', 'application/vnd.apache.arrow.stream');
|
|
151
|
+
res.end(data);
|
|
134
152
|
},
|
|
135
153
|
json(data) {
|
|
154
|
+
res.setHeader('Content-Type', 'application/json');
|
|
136
155
|
res.end(JSON.stringify(data));
|
|
137
156
|
},
|
|
138
157
|
done() {
|
|
@@ -149,16 +168,11 @@ function httpResponse(res) {
|
|
|
149
168
|
|
|
150
169
|
function socketResponse(ws) {
|
|
151
170
|
const STRING = { binary: false, fin: true };
|
|
152
|
-
const
|
|
153
|
-
const DONE = { binary: true, fin: true };
|
|
154
|
-
const NULL = new Uint8Array(0);
|
|
171
|
+
const BINARY = { binary: true, fin: true };
|
|
155
172
|
|
|
156
173
|
return {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
ws.send(chunk, FRAGMENT);
|
|
160
|
-
}
|
|
161
|
-
ws.send(NULL, DONE);
|
|
174
|
+
arrow(data) {
|
|
175
|
+
ws.send(data, BINARY);
|
|
162
176
|
},
|
|
163
177
|
json(data) {
|
|
164
178
|
ws.send(JSON.stringify(data), STRING);
|
package/src/index.js
CHANGED
|
@@ -1,2 +1,6 @@
|
|
|
1
1
|
export { DuckDB } from './DuckDB.js';
|
|
2
2
|
export { dataServer } from './data-server.js';
|
|
3
|
+
export { loadArrow } from './load/arrow.js';
|
|
4
|
+
export { loadCSV } from './load/csv.js';
|
|
5
|
+
export { loadJSON } from './load/json.js';
|
|
6
|
+
export { loadParquet } from './load/parquet.js';
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
|
|
3
|
+
export async function loadArrow(db, tableName, buffer) {
|
|
4
|
+
const arrowData = ArrayBuffer.isView(buffer) ? buffer : await readFile(buffer);
|
|
5
|
+
return new Promise((resolve, reject) => {
|
|
6
|
+
db.con.register_buffer(tableName, [arrowData], true, err => {
|
|
7
|
+
if (err) {
|
|
8
|
+
console.error(err);
|
|
9
|
+
reject(err);
|
|
10
|
+
}
|
|
11
|
+
resolve();
|
|
12
|
+
});
|
|
13
|
+
});
|
|
14
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { cacheKey } from '../Cache.js';
|
|
4
|
+
|
|
5
|
+
async function retrieve(db, cache, sql, type) {
|
|
6
|
+
const key = cacheKey(sql, type);
|
|
7
|
+
const cached = cache.get(key);
|
|
8
|
+
if (cached) return cached;
|
|
9
|
+
switch (type) {
|
|
10
|
+
case 'arrow':
|
|
11
|
+
return db.arrowBuffer(sql);
|
|
12
|
+
case 'json':
|
|
13
|
+
return JSON.stringify(await db.query(sql));
|
|
14
|
+
default:
|
|
15
|
+
throw new Error(`Unsupported query type: ${type}`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function createBundle(db, cache, queries, dir) {
|
|
20
|
+
const describe_re = /^DESCRIBE /;
|
|
21
|
+
const pragma_re = /^PRAGMA /;
|
|
22
|
+
const view_re = /^CREATE( TEMP| TEMPORARY)? VIEW/;
|
|
23
|
+
const table_re = /^CREATE( TEMP| TEMPORARY)? TABLE( IF NOT EXISTS)? ([^\s]+)/;
|
|
24
|
+
|
|
25
|
+
const manifest = { tables: [], queries: [] };
|
|
26
|
+
|
|
27
|
+
await fs.mkdir(dir, { recursive: true });
|
|
28
|
+
|
|
29
|
+
const querySet = new Set(queries);
|
|
30
|
+
for (const query of querySet) {
|
|
31
|
+
const sql = typeof query === 'string' ? query : query.sql;
|
|
32
|
+
if (query.alias) {
|
|
33
|
+
const table = query.alias;
|
|
34
|
+
const file = path.resolve(dir, `${table}.parquet`);
|
|
35
|
+
await db.exec(`COPY (${sql}) TO '${file}' (FORMAT PARQUET)`);
|
|
36
|
+
manifest.tables.push(table);
|
|
37
|
+
} else if (sql.startsWith('CREATE ')) {
|
|
38
|
+
// table or view
|
|
39
|
+
if (view_re.test(sql)) continue; // ignore views
|
|
40
|
+
const table = sql.match(table_re)?.[3];
|
|
41
|
+
const file = path.resolve(dir, `${table}.parquet`);
|
|
42
|
+
await db.exec(`${sql}`);
|
|
43
|
+
await db.exec(`COPY ${table} TO '${file}' (FORMAT PARQUET)`);
|
|
44
|
+
manifest.tables.push(table);
|
|
45
|
+
} else if (!pragma_re.test(sql)) {
|
|
46
|
+
// select query
|
|
47
|
+
const type = describe_re.test(sql) ? 'json' : 'arrow';
|
|
48
|
+
const key = cacheKey(sql, type);
|
|
49
|
+
const result = await retrieve(db, cache, sql, type);
|
|
50
|
+
await fs.writeFile(path.resolve(dir, key), result);
|
|
51
|
+
manifest.queries.push(key);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
await fs.writeFile(path.resolve(dir, 'bundle.json'), JSON.stringify(manifest, 0, 2));
|
|
56
|
+
return manifest;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export async function loadBundle(db, cache, dir) {
|
|
60
|
+
const manifest = JSON.parse(await fs.readFile(path.resolve(dir, 'bundle.json')));
|
|
61
|
+
|
|
62
|
+
// load precomputed query results into the cache
|
|
63
|
+
for (const key of manifest.queries) {
|
|
64
|
+
const file = path.resolve(dir, key);
|
|
65
|
+
const json = path.extname(file) === '.json';
|
|
66
|
+
const data = await fs.readFile(file);
|
|
67
|
+
cache.set(key, json ? JSON.parse(data) : data);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// load precomputed temp tables into the database
|
|
71
|
+
for (const table of manifest.tables) {
|
|
72
|
+
const file = path.resolve(dir, `${table}.parquet`);
|
|
73
|
+
await db.exec(`CREATE TEMP TABLE IF NOT EXISTS ${table} AS SELECT * FROM '${file}'`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export function createTable(db, name, as, options = {}) {
|
|
2
|
+
const { temp, replace } = options;
|
|
3
|
+
const create = `CREATE${replace ? ' OR REPLACE' : ''}`;
|
|
4
|
+
const type = `${temp ? 'TEMP ' : ''}TABLE${replace ? '' : ' IF NOT EXISTS'}`;
|
|
5
|
+
return db.exec(`${create} ${type} ${name} AS ${as}`);
|
|
6
|
+
}
|
package/src/load/csv.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { createTable } from './create-table.js';
|
|
2
|
+
import { parameters } from './parameters.js';
|
|
3
|
+
|
|
4
|
+
export function loadCSV(db, tableName, fileName, options = {}) {
|
|
5
|
+
const { select = ['*'], temp, replace, ...csvOptions } = options;
|
|
6
|
+
const params = parameters({ auto_detect: true, sample_size: -1, ...csvOptions });
|
|
7
|
+
const query = `SELECT ${select.join(', ')} FROM read_csv('${fileName}', ${params})`;
|
|
8
|
+
return createTable(db, tableName, query, { temp, replace });
|
|
9
|
+
}
|
package/src/load/json.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { createTable } from './create-table.js';
|
|
2
|
+
import { parameters } from './parameters.js';
|
|
3
|
+
|
|
4
|
+
export function loadJSON(db, tableName, fileName, options = {}) {
|
|
5
|
+
const { select = ['*'], temp, replace, ...jsonOptions } = options;
|
|
6
|
+
const params = parameters({ auto_detect: true, json_format: 'auto', ...jsonOptions });
|
|
7
|
+
const query = `SELECT ${select.join(', ')} FROM read_json('${fileName}', ${params})`;
|
|
8
|
+
return createTable(db, tableName, query, { temp, replace });
|
|
9
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export function parameters(options) {
|
|
2
|
+
return Object.entries(options)
|
|
3
|
+
.map(([key, value]) => {
|
|
4
|
+
const t = typeof value;
|
|
5
|
+
const v = t === 'boolean' ? String(value)
|
|
6
|
+
: t === 'string' ? `'${value}'`
|
|
7
|
+
: value;
|
|
8
|
+
return `${key}=${v}`;
|
|
9
|
+
})
|
|
10
|
+
.join(', ');
|
|
11
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { createTable } from './create-table.js';
|
|
2
|
+
|
|
3
|
+
export function loadParquet(db, tableName, fileName, options = {}) {
|
|
4
|
+
const { select = ['*'], ...tableOptions } = options;
|
|
5
|
+
const query = `SELECT ${select.join(', ')} FROM read_parquet('${fileName}')`;
|
|
6
|
+
return createTable(db, tableName, query, tableOptions);
|
|
7
|
+
}
|
package/src/merge-buffers.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
export function mergeBuffers(buffers) {
|
|
2
|
-
const len = buffers.reduce((a, b) => a + b.length, 0);
|
|
2
|
+
const len = buffers.reduce((a, b) => a + (b ? b.length : 0), 0);
|
|
3
3
|
const buf = new Uint8Array(len);
|
|
4
4
|
|
|
5
5
|
for (let i = 0, offset = 0; i < buffers.length; ++i) {
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
if (buffers[i]) {
|
|
7
|
+
buf.set(buffers[i], offset);
|
|
8
|
+
offset += buffers[i].length;
|
|
9
|
+
}
|
|
8
10
|
}
|
|
9
11
|
|
|
10
12
|
return buf;
|