@uwdata/mosaic-duckdb 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # @mosaic/duckdb
2
+
3
+ A Node.js + DuckDB data server that supports transfer of [Apache Arrow](https://arrow.apache.org/) and JSON data over either Web Sockets or HTTP.
@@ -0,0 +1,29 @@
1
+ #! /usr/bin/env node
2
+ import { DuckDB } from '../src/index.js';
3
+ import { createWriteStream } from 'fs';
4
+
5
+ const db = new DuckDB();
6
+
7
+ // load CSV into duckdb
8
+ await db.csv('data', process.argv[2]);
9
+
10
+ // get output stream of arrow bytes
11
+ const stream = await db.arrowStream('SELECT * FROM data');
12
+
13
+ // determine the output stream
14
+ const output = process.argv[3]
15
+ ? createWriteStream(process.argv[3])
16
+ : process.stdout;
17
+
18
+ // set up error handling
19
+ output.on('error', (error) => {
20
+ console.error(`File write error: ${error.message}`);
21
+ });
22
+
23
+ // write arrow bytes to output
24
+ for await (const chunk of stream) {
25
+ output.write(chunk);
26
+ }
27
+
28
+ // finish
29
+ output.end(new Uint8Array(4));
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "@uwdata/mosaic-duckdb",
3
+ "version": "0.0.1",
4
+ "description": "A Node-based DuckDB data server.",
5
+ "keywords": [
6
+ "duckdb",
7
+ "server",
8
+ "node",
9
+ "arrow",
10
+ "mosaic"
11
+ ],
12
+ "license": "BSD-3-Clause",
13
+ "author": "Jeffrey Heer (http://idl.cs.washington.edu)",
14
+ "type": "module",
15
+ "main": "src/index.js",
16
+ "module": "src/index.js",
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "https://github.com/uwdata/mosaic.git"
20
+ },
21
+ "scripts": {
22
+ "lint": "eslint src test --ext .js",
23
+ "test": "mocha 'test/**/*-test.js'",
24
+ "prepublishOnly": "npm run test && npm run lint"
25
+ },
26
+ "dependencies": {
27
+ "duckdb": "~0.6.1",
28
+ "ws": "^8.12.0"
29
+ }
30
+ }
package/src/DuckDB.js ADDED
@@ -0,0 +1,158 @@
1
+ import duckdb from 'duckdb';
2
+ import { readFile } from 'node:fs/promises';
3
+ import { mergeBuffers } from './merge-buffers.js';
4
+
5
+ const CONFIG = [
6
+ `PRAGMA temp_directory='./duckdb.tmp'`,
7
+ `INSTALL arrow`,
8
+ `INSTALL httpfs`,
9
+ `LOAD arrow`,
10
+ `LOAD httpfs`
11
+ ];
12
+
13
+ export class DuckDB {
14
+ constructor(path = ':memory:') {
15
+ this.db = new duckdb.Database(path);
16
+ this.con = this.db.connect();
17
+ this.exec(CONFIG.join(';\n'));
18
+ }
19
+
20
+ close() {
21
+ return new Promise((resolve, reject) => {
22
+ this.db.close((err) => {
23
+ if (err) {
24
+ reject(err);
25
+ } else {
26
+ resolve(this);
27
+ }
28
+ });
29
+ });
30
+ }
31
+
32
+ async csv(tableName, fileName, options = {}) {
33
+ const opt = Object.entries({ sample_size: -1, ...options })
34
+ .map(([key, value]) => {
35
+ const t = typeof value;
36
+ const v = t === 'boolean' ? String(value).toUpperCase()
37
+ : t === 'string' ? `'${value}'`
38
+ : value;
39
+ return `${key.toUpperCase()}=${v}`;
40
+ })
41
+ .join(', ');
42
+ return this.exec(`CREATE TABLE ${tableName} AS SELECT *
43
+ FROM read_csv_auto('${fileName}', ${opt});`);
44
+ }
45
+
46
+ async parquet(tableName, fileName) {
47
+ return this.exec(`CREATE TABLE ${tableName} AS SELECT *
48
+ FROM read_parquet('${fileName}');`);
49
+ }
50
+
51
+ async ipc(tableName, buffer) {
52
+ const bufName = `__ipc__${tableName}`;
53
+ const arrowData = ArrayBuffer.isView(buffer) ? buffer : await readFile(buffer);
54
+ this.con.register_buffer(bufName, [arrowData], true, err => {
55
+ if (err) console.error(err);
56
+ });
57
+ await this.exec(`CREATE TABLE ${tableName} AS SELECT * FROM ${bufName}`);
58
+ this.con.unregister_buffer(bufName);
59
+ }
60
+
61
+ prepare(sql) {
62
+ return new DuckDBStatement(this.con.prepare(sql));
63
+ }
64
+
65
+ exec(sql) {
66
+ return new Promise((resolve, reject) => {
67
+ this.con.exec(sql, (err) => {
68
+ if (err) {
69
+ reject(err);
70
+ } else {
71
+ resolve(this);
72
+ }
73
+ });
74
+ });
75
+ }
76
+
77
+ query(sql) {
78
+ return new Promise((resolve, reject) => {
79
+ this.con.all(sql, (err, result) => {
80
+ if (err) {
81
+ reject(err);
82
+ } else {
83
+ resolve(result);
84
+ }
85
+ });
86
+ });
87
+ }
88
+
89
+ arrowBuffer(sql) {
90
+ return new Promise((resolve, reject) => {
91
+ this.con.arrowIPCAll(sql, (err, result) => {
92
+ if (err) {
93
+ reject(err);
94
+ } else {
95
+ resolve(mergeBuffers(result));
96
+ }
97
+ });
98
+ });
99
+ }
100
+
101
+ arrowStream(sql) {
102
+ return this.con.arrowIPCStream(sql);
103
+ }
104
+ }
105
+
106
+ export class DuckDBStatement {
107
+ constructor(statement) {
108
+ this.statement = statement;
109
+ }
110
+
111
+ finalize() {
112
+ this.statement.finalize();
113
+ }
114
+
115
+ run(params) {
116
+ this.statement.run(...params);
117
+ }
118
+
119
+ exec(params) {
120
+ return new Promise((resolve, reject) => {
121
+ this.statement.run(...params, (err) => {
122
+ if (err) {
123
+ reject(err);
124
+ } else {
125
+ resolve(this);
126
+ }
127
+ });
128
+ });
129
+ }
130
+
131
+ query(params) {
132
+ return new Promise((resolve, reject) => {
133
+ this.statement.all(...params, (err, result) => {
134
+ if (err) {
135
+ reject(err);
136
+ } else {
137
+ resolve(result);
138
+ }
139
+ });
140
+ });
141
+ }
142
+
143
+ arrowBuffer(params) {
144
+ return new Promise((resolve, reject) => {
145
+ this.con.arrowIPCAll(...params, (err, result) => {
146
+ if (err) {
147
+ reject(err);
148
+ } else {
149
+ resolve(mergeBuffers(result));
150
+ }
151
+ });
152
+ });
153
+ }
154
+
155
+ arrowStream(params) {
156
+ return this.statement.arrowIPCStream(...params);
157
+ }
158
+ }
@@ -0,0 +1,174 @@
1
+ import http from 'node:http';
2
+ import url from 'node:url';
3
+ import { WebSocketServer } from 'ws';
4
+
5
+ export function dataServer(db, {
6
+ rest = true,
7
+ socket = true,
8
+ port = 3000
9
+ } = {}) {
10
+ const handleQuery = queryHandler(db);
11
+ const app = createHTTPServer(handleQuery, rest);
12
+ if (socket) createSocketServer(app, handleQuery);
13
+
14
+ app.listen(port);
15
+ console.log(`Data server running on port ${port}`);
16
+ if (rest) console.log(` http://localhost:${port}/`);
17
+ if (socket) console.log(` ws://localhost:${port}/`);
18
+ }
19
+
20
+ function createHTTPServer(handleQuery, rest) {
21
+ return http.createServer((req, resp) => {
22
+ const res = httpResponse(resp);
23
+ if (!rest) {
24
+ res.done();
25
+ return;
26
+ }
27
+
28
+ resp.setHeader('Content-Type', 'application/json');
29
+ resp.setHeader('Access-Control-Allow-Origin', '*');
30
+ resp.setHeader('Access-Control-Request-Method', '*');
31
+ resp.setHeader('Access-Control-Allow-Methods', 'OPTIONS, POST, GET');
32
+ resp.setHeader('Access-Control-Allow-Headers', '*');
33
+ resp.setHeader('Access-Control-Max-Age', 2592000);
34
+
35
+ switch (req.method) {
36
+ case 'OPTIONS':
37
+ res.done();
38
+ break;
39
+ case 'GET':
40
+ handleQuery(res, url.parse(req.url, true).query);
41
+ break;
42
+ case 'POST': {
43
+ const chunks = [];
44
+ req.on('error', err => res.error(err, 500));
45
+ req.on('data', chunk => chunks.push(chunk));
46
+ req.on('end', () => handleQuery(res, Buffer.concat(chunks)));
47
+ break;
48
+ }
49
+ default:
50
+ res.error(`Unsupported HTTP method: ${req.method}`, 400);
51
+ }
52
+ });
53
+ }
54
+
55
+ function createSocketServer(server, handleQuery) {
56
+ const wss = new WebSocketServer({ server });
57
+
58
+ wss.on('connection', socket => {
59
+ const res = socketResponse(socket);
60
+ socket.on('message', data => handleQuery(res, data));
61
+ });
62
+ }
63
+
64
+ function queryHandler(db) {
65
+ return async (res, data) => {
66
+ const t0 = performance.now();
67
+
68
+ // parse incoming query
69
+ let query;
70
+ try {
71
+ query = JSON.parse(data);
72
+ } catch (err) {
73
+ res.error(err, 400);
74
+ return;
75
+ }
76
+
77
+ try {
78
+ const { sql, type } = query;
79
+ console.log('QUERY', sql);
80
+
81
+ // request the lock to serialize requests
82
+ // we do this to avoid DuckDB + Arrow errors
83
+ await res.lock?.();
84
+
85
+ // process query and return result
86
+ switch (type) {
87
+ case 'arrow':
88
+ // Apache Arrow response format
89
+ await res.stream(await db.arrowStream(sql));
90
+ break;
91
+ case 'exec':
92
+ // Execute query with no return value
93
+ await db.exec(sql);
94
+ res.done();
95
+ break;
96
+ default:
97
+ // JSON response format
98
+ res.json(await db.query(sql));
99
+ }
100
+ } catch (err) {
101
+ res.error(err, 500);
102
+ } finally {
103
+ res.unlock?.();
104
+ }
105
+
106
+ console.log('REQUEST', Math.round(performance.now() - t0));
107
+ };
108
+ }
109
+
110
+ let locked = false;
111
+ const queue = [];
112
+
113
+ function httpResponse(res) {
114
+ return {
115
+ lock() {
116
+ // if locked, add a promise to the queue
117
+ // otherwise, grab the lock and proceed
118
+ return locked
119
+ ? new Promise(resolve => queue.push(resolve))
120
+ : (locked = true);
121
+ },
122
+ unlock() {
123
+ locked = queue.length > 0;
124
+ if (locked) {
125
+ // resolve the next promise in the queue
126
+ queue.shift()();
127
+ }
128
+ },
129
+ async stream(iter) {
130
+ for await (const chunk of iter) {
131
+ res.write(chunk);
132
+ }
133
+ res.end();
134
+ },
135
+ json(data) {
136
+ res.end(JSON.stringify(data));
137
+ },
138
+ done() {
139
+ res.writeHead(200);
140
+ res.end();
141
+ },
142
+ error(err, code) {
143
+ console.error(err);
144
+ res.writeHead(code);
145
+ res.end();
146
+ }
147
+ }
148
+ }
149
+
150
+ function socketResponse(ws) {
151
+ const STRING = { binary: false, fin: true };
152
+ const FRAGMENT = { binary: true, fin: false };
153
+ const DONE = { binary: true, fin: true };
154
+ const NULL = new Uint8Array(0);
155
+
156
+ return {
157
+ async stream(iter) {
158
+ for await (const chunk of iter) {
159
+ ws.send(chunk, FRAGMENT);
160
+ }
161
+ ws.send(NULL, DONE);
162
+ },
163
+ json(data) {
164
+ ws.send(JSON.stringify(data), STRING);
165
+ },
166
+ done() {
167
+ this.json({});
168
+ },
169
+ error(err) {
170
+ console.error(err);
171
+ this.json({ error: String(err) });
172
+ }
173
+ };
174
+ }
package/src/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export { DuckDB } from './DuckDB.js';
2
+ export { dataServer } from './data-server.js';
@@ -0,0 +1,11 @@
1
+ export function mergeBuffers(buffers) {
2
+ const len = buffers.reduce((a, b) => a + b.length, 0);
3
+ const buf = new Uint8Array(len);
4
+
5
+ for (let i = 0, offset = 0; i < buffers.length; ++i) {
6
+ buf.set(buffers[i], offset);
7
+ offset += buffers[i].length;
8
+ }
9
+
10
+ return buf;
11
+ }