@uwdata/mosaic-duckdb 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/bin/csv2arrow.js +29 -0
- package/package.json +30 -0
- package/src/DuckDB.js +158 -0
- package/src/data-server.js +174 -0
- package/src/index.js +2 -0
- package/src/merge-buffers.js +11 -0
package/README.md
ADDED
package/bin/csv2arrow.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#! /usr/bin/env node
|
|
2
|
+
import { DuckDB } from '../src/index.js';
|
|
3
|
+
import { createWriteStream } from 'fs';
|
|
4
|
+
|
|
5
|
+
const db = new DuckDB();
|
|
6
|
+
|
|
7
|
+
// load CSV into duckdb
|
|
8
|
+
await db.csv('data', process.argv[2]);
|
|
9
|
+
|
|
10
|
+
// get output stream of arrow bytes
|
|
11
|
+
const stream = await db.arrowStream('SELECT * FROM data');
|
|
12
|
+
|
|
13
|
+
// determine the output stream
|
|
14
|
+
const output = process.argv[3]
|
|
15
|
+
? createWriteStream(process.argv[3])
|
|
16
|
+
: process.stdout;
|
|
17
|
+
|
|
18
|
+
// set up error handling
|
|
19
|
+
output.on('error', (error) => {
|
|
20
|
+
console.error(`File write error: ${error.message}`);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
// write arrow bytes to output
|
|
24
|
+
for await (const chunk of stream) {
|
|
25
|
+
output.write(chunk);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// finish
|
|
29
|
+
output.end(new Uint8Array(4));
|
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@uwdata/mosaic-duckdb",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "A Node-based DuckDB data server.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"duckdb",
|
|
7
|
+
"server",
|
|
8
|
+
"node",
|
|
9
|
+
"arrow",
|
|
10
|
+
"mosaic"
|
|
11
|
+
],
|
|
12
|
+
"license": "BSD-3-Clause",
|
|
13
|
+
"author": "Jeffrey Heer (http://idl.cs.washington.edu)",
|
|
14
|
+
"type": "module",
|
|
15
|
+
"main": "src/index.js",
|
|
16
|
+
"module": "src/index.js",
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "https://github.com/uwdata/mosaic.git"
|
|
20
|
+
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"lint": "eslint src test --ext .js",
|
|
23
|
+
"test": "mocha 'test/**/*-test.js'",
|
|
24
|
+
"prepublishOnly": "npm run test && npm run lint"
|
|
25
|
+
},
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"duckdb": "~0.6.1",
|
|
28
|
+
"ws": "^8.12.0"
|
|
29
|
+
}
|
|
30
|
+
}
|
package/src/DuckDB.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import duckdb from 'duckdb';
|
|
2
|
+
import { readFile } from 'node:fs/promises';
|
|
3
|
+
import { mergeBuffers } from './merge-buffers.js';
|
|
4
|
+
|
|
5
|
+
const CONFIG = [
|
|
6
|
+
`PRAGMA temp_directory='./duckdb.tmp'`,
|
|
7
|
+
`INSTALL arrow`,
|
|
8
|
+
`INSTALL httpfs`,
|
|
9
|
+
`LOAD arrow`,
|
|
10
|
+
`LOAD httpfs`
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
export class DuckDB {
|
|
14
|
+
constructor(path = ':memory:') {
|
|
15
|
+
this.db = new duckdb.Database(path);
|
|
16
|
+
this.con = this.db.connect();
|
|
17
|
+
this.exec(CONFIG.join(';\n'));
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
close() {
|
|
21
|
+
return new Promise((resolve, reject) => {
|
|
22
|
+
this.db.close((err) => {
|
|
23
|
+
if (err) {
|
|
24
|
+
reject(err);
|
|
25
|
+
} else {
|
|
26
|
+
resolve(this);
|
|
27
|
+
}
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async csv(tableName, fileName, options = {}) {
|
|
33
|
+
const opt = Object.entries({ sample_size: -1, ...options })
|
|
34
|
+
.map(([key, value]) => {
|
|
35
|
+
const t = typeof value;
|
|
36
|
+
const v = t === 'boolean' ? String(value).toUpperCase()
|
|
37
|
+
: t === 'string' ? `'${value}'`
|
|
38
|
+
: value;
|
|
39
|
+
return `${key.toUpperCase()}=${v}`;
|
|
40
|
+
})
|
|
41
|
+
.join(', ');
|
|
42
|
+
return this.exec(`CREATE TABLE ${tableName} AS SELECT *
|
|
43
|
+
FROM read_csv_auto('${fileName}', ${opt});`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async parquet(tableName, fileName) {
|
|
47
|
+
return this.exec(`CREATE TABLE ${tableName} AS SELECT *
|
|
48
|
+
FROM read_parquet('${fileName}');`);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async ipc(tableName, buffer) {
|
|
52
|
+
const bufName = `__ipc__${tableName}`;
|
|
53
|
+
const arrowData = ArrayBuffer.isView(buffer) ? buffer : await readFile(buffer);
|
|
54
|
+
this.con.register_buffer(bufName, [arrowData], true, err => {
|
|
55
|
+
if (err) console.error(err);
|
|
56
|
+
});
|
|
57
|
+
await this.exec(`CREATE TABLE ${tableName} AS SELECT * FROM ${bufName}`);
|
|
58
|
+
this.con.unregister_buffer(bufName);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
prepare(sql) {
|
|
62
|
+
return new DuckDBStatement(this.con.prepare(sql));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
exec(sql) {
|
|
66
|
+
return new Promise((resolve, reject) => {
|
|
67
|
+
this.con.exec(sql, (err) => {
|
|
68
|
+
if (err) {
|
|
69
|
+
reject(err);
|
|
70
|
+
} else {
|
|
71
|
+
resolve(this);
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
query(sql) {
|
|
78
|
+
return new Promise((resolve, reject) => {
|
|
79
|
+
this.con.all(sql, (err, result) => {
|
|
80
|
+
if (err) {
|
|
81
|
+
reject(err);
|
|
82
|
+
} else {
|
|
83
|
+
resolve(result);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
arrowBuffer(sql) {
|
|
90
|
+
return new Promise((resolve, reject) => {
|
|
91
|
+
this.con.arrowIPCAll(sql, (err, result) => {
|
|
92
|
+
if (err) {
|
|
93
|
+
reject(err);
|
|
94
|
+
} else {
|
|
95
|
+
resolve(mergeBuffers(result));
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
arrowStream(sql) {
|
|
102
|
+
return this.con.arrowIPCStream(sql);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export class DuckDBStatement {
|
|
107
|
+
constructor(statement) {
|
|
108
|
+
this.statement = statement;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
finalize() {
|
|
112
|
+
this.statement.finalize();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
run(params) {
|
|
116
|
+
this.statement.run(...params);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
exec(params) {
|
|
120
|
+
return new Promise((resolve, reject) => {
|
|
121
|
+
this.statement.run(...params, (err) => {
|
|
122
|
+
if (err) {
|
|
123
|
+
reject(err);
|
|
124
|
+
} else {
|
|
125
|
+
resolve(this);
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
query(params) {
|
|
132
|
+
return new Promise((resolve, reject) => {
|
|
133
|
+
this.statement.all(...params, (err, result) => {
|
|
134
|
+
if (err) {
|
|
135
|
+
reject(err);
|
|
136
|
+
} else {
|
|
137
|
+
resolve(result);
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
arrowBuffer(params) {
|
|
144
|
+
return new Promise((resolve, reject) => {
|
|
145
|
+
this.con.arrowIPCAll(...params, (err, result) => {
|
|
146
|
+
if (err) {
|
|
147
|
+
reject(err);
|
|
148
|
+
} else {
|
|
149
|
+
resolve(mergeBuffers(result));
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
arrowStream(params) {
|
|
156
|
+
return this.statement.arrowIPCStream(...params);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import http from 'node:http';
|
|
2
|
+
import url from 'node:url';
|
|
3
|
+
import { WebSocketServer } from 'ws';
|
|
4
|
+
|
|
5
|
+
export function dataServer(db, {
|
|
6
|
+
rest = true,
|
|
7
|
+
socket = true,
|
|
8
|
+
port = 3000
|
|
9
|
+
} = {}) {
|
|
10
|
+
const handleQuery = queryHandler(db);
|
|
11
|
+
const app = createHTTPServer(handleQuery, rest);
|
|
12
|
+
if (socket) createSocketServer(app, handleQuery);
|
|
13
|
+
|
|
14
|
+
app.listen(port);
|
|
15
|
+
console.log(`Data server running on port ${port}`);
|
|
16
|
+
if (rest) console.log(` http://localhost:${port}/`);
|
|
17
|
+
if (socket) console.log(` ws://localhost:${port}/`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function createHTTPServer(handleQuery, rest) {
|
|
21
|
+
return http.createServer((req, resp) => {
|
|
22
|
+
const res = httpResponse(resp);
|
|
23
|
+
if (!rest) {
|
|
24
|
+
res.done();
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
resp.setHeader('Content-Type', 'application/json');
|
|
29
|
+
resp.setHeader('Access-Control-Allow-Origin', '*');
|
|
30
|
+
resp.setHeader('Access-Control-Request-Method', '*');
|
|
31
|
+
resp.setHeader('Access-Control-Allow-Methods', 'OPTIONS, POST, GET');
|
|
32
|
+
resp.setHeader('Access-Control-Allow-Headers', '*');
|
|
33
|
+
resp.setHeader('Access-Control-Max-Age', 2592000);
|
|
34
|
+
|
|
35
|
+
switch (req.method) {
|
|
36
|
+
case 'OPTIONS':
|
|
37
|
+
res.done();
|
|
38
|
+
break;
|
|
39
|
+
case 'GET':
|
|
40
|
+
handleQuery(res, url.parse(req.url, true).query);
|
|
41
|
+
break;
|
|
42
|
+
case 'POST': {
|
|
43
|
+
const chunks = [];
|
|
44
|
+
req.on('error', err => res.error(err, 500));
|
|
45
|
+
req.on('data', chunk => chunks.push(chunk));
|
|
46
|
+
req.on('end', () => handleQuery(res, Buffer.concat(chunks)));
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
default:
|
|
50
|
+
res.error(`Unsupported HTTP method: ${req.method}`, 400);
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function createSocketServer(server, handleQuery) {
|
|
56
|
+
const wss = new WebSocketServer({ server });
|
|
57
|
+
|
|
58
|
+
wss.on('connection', socket => {
|
|
59
|
+
const res = socketResponse(socket);
|
|
60
|
+
socket.on('message', data => handleQuery(res, data));
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function queryHandler(db) {
|
|
65
|
+
return async (res, data) => {
|
|
66
|
+
const t0 = performance.now();
|
|
67
|
+
|
|
68
|
+
// parse incoming query
|
|
69
|
+
let query;
|
|
70
|
+
try {
|
|
71
|
+
query = JSON.parse(data);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
res.error(err, 400);
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
const { sql, type } = query;
|
|
79
|
+
console.log('QUERY', sql);
|
|
80
|
+
|
|
81
|
+
// request the lock to serialize requests
|
|
82
|
+
// we do this to avoid DuckDB + Arrow errors
|
|
83
|
+
await res.lock?.();
|
|
84
|
+
|
|
85
|
+
// process query and return result
|
|
86
|
+
switch (type) {
|
|
87
|
+
case 'arrow':
|
|
88
|
+
// Apache Arrow response format
|
|
89
|
+
await res.stream(await db.arrowStream(sql));
|
|
90
|
+
break;
|
|
91
|
+
case 'exec':
|
|
92
|
+
// Execute query with no return value
|
|
93
|
+
await db.exec(sql);
|
|
94
|
+
res.done();
|
|
95
|
+
break;
|
|
96
|
+
default:
|
|
97
|
+
// JSON response format
|
|
98
|
+
res.json(await db.query(sql));
|
|
99
|
+
}
|
|
100
|
+
} catch (err) {
|
|
101
|
+
res.error(err, 500);
|
|
102
|
+
} finally {
|
|
103
|
+
res.unlock?.();
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
console.log('REQUEST', Math.round(performance.now() - t0));
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
let locked = false;
|
|
111
|
+
const queue = [];
|
|
112
|
+
|
|
113
|
+
function httpResponse(res) {
|
|
114
|
+
return {
|
|
115
|
+
lock() {
|
|
116
|
+
// if locked, add a promise to the queue
|
|
117
|
+
// otherwise, grab the lock and proceed
|
|
118
|
+
return locked
|
|
119
|
+
? new Promise(resolve => queue.push(resolve))
|
|
120
|
+
: (locked = true);
|
|
121
|
+
},
|
|
122
|
+
unlock() {
|
|
123
|
+
locked = queue.length > 0;
|
|
124
|
+
if (locked) {
|
|
125
|
+
// resolve the next promise in the queue
|
|
126
|
+
queue.shift()();
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
async stream(iter) {
|
|
130
|
+
for await (const chunk of iter) {
|
|
131
|
+
res.write(chunk);
|
|
132
|
+
}
|
|
133
|
+
res.end();
|
|
134
|
+
},
|
|
135
|
+
json(data) {
|
|
136
|
+
res.end(JSON.stringify(data));
|
|
137
|
+
},
|
|
138
|
+
done() {
|
|
139
|
+
res.writeHead(200);
|
|
140
|
+
res.end();
|
|
141
|
+
},
|
|
142
|
+
error(err, code) {
|
|
143
|
+
console.error(err);
|
|
144
|
+
res.writeHead(code);
|
|
145
|
+
res.end();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function socketResponse(ws) {
|
|
151
|
+
const STRING = { binary: false, fin: true };
|
|
152
|
+
const FRAGMENT = { binary: true, fin: false };
|
|
153
|
+
const DONE = { binary: true, fin: true };
|
|
154
|
+
const NULL = new Uint8Array(0);
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
async stream(iter) {
|
|
158
|
+
for await (const chunk of iter) {
|
|
159
|
+
ws.send(chunk, FRAGMENT);
|
|
160
|
+
}
|
|
161
|
+
ws.send(NULL, DONE);
|
|
162
|
+
},
|
|
163
|
+
json(data) {
|
|
164
|
+
ws.send(JSON.stringify(data), STRING);
|
|
165
|
+
},
|
|
166
|
+
done() {
|
|
167
|
+
this.json({});
|
|
168
|
+
},
|
|
169
|
+
error(err) {
|
|
170
|
+
console.error(err);
|
|
171
|
+
this.json({ error: String(err) });
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export function mergeBuffers(buffers) {
|
|
2
|
+
const len = buffers.reduce((a, b) => a + b.length, 0);
|
|
3
|
+
const buf = new Uint8Array(len);
|
|
4
|
+
|
|
5
|
+
for (let i = 0, offset = 0; i < buffers.length; ++i) {
|
|
6
|
+
buf.set(buffers[i], offset);
|
|
7
|
+
offset += buffers[i].length;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
return buf;
|
|
11
|
+
}
|