@sqlrooms/duckdb 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,231 @@
1
+ import * as duckdb from '@duckdb/duckdb-wasm';
2
+ const ENABLE_DUCK_LOGGING = false;
3
+ const SilentLogger = {
4
+ log: () => {
5
+ /* do nothing */
6
+ },
7
+ };
8
+ // TODO: shut DB down at some point
9
+ let duckConn;
10
+ let initialize;
11
+ export class DuckQueryError extends Error {
12
+ cause;
13
+ query;
14
+ queryCallStack;
15
+ constructor(err, query, stack) {
16
+ super(`DB query failed: ${err instanceof Error ? err.message : err}\n\nFull query:\n\n${query}\n\nQuery call stack:\n\n${stack}\n\n`);
17
+ this.cause = err;
18
+ this.query = query;
19
+ this.queryCallStack = stack;
20
+ Object.setPrototypeOf(this, DuckQueryError.prototype);
21
+ }
22
+ getMessageForUser() {
23
+ const msg = this.cause instanceof Error ? this.cause.message : this.message;
24
+ return msg;
25
+ }
26
+ }
27
+ /**
28
+ * @deprecated getDuckConn is deprecated, use getDuckDb instead
29
+ */
30
+ export const getDuckConn = getDuckDb;
31
+ export async function getDuckDb() {
32
+ if (!globalThis.Worker) {
33
+ return Promise.reject('No Worker support');
34
+ }
35
+ if (duckConn) {
36
+ return duckConn;
37
+ }
38
+ else if (initialize !== undefined) {
39
+ // The initialization has already been started, wait for it to finish
40
+ return initialize;
41
+ }
42
+ let resolve;
43
+ let reject;
44
+ initialize = new Promise((_resolve, _reject) => {
45
+ resolve = _resolve;
46
+ reject = _reject;
47
+ });
48
+ try {
49
+ // TODO: Consider to load locally https://github.com/duckdb/duckdb-wasm/issues/1425#issuecomment-1742156605
50
+ const allBundles = duckdb.getJsDelivrBundles();
51
+ const bestBundle = await duckdb.selectBundle(allBundles);
52
+ if (!bestBundle.mainWorker) {
53
+ throw new Error('No best bundle found for DuckDB worker');
54
+ }
55
+ const workerUrl = URL.createObjectURL(new Blob([`importScripts("${bestBundle.mainWorker}");`], {
56
+ type: 'text/javascript',
57
+ }));
58
+ // const worker = await duckdb.createWorker(bestBundle.mainWorker);
59
+ const worker = new window.Worker(workerUrl);
60
+ const logger = ENABLE_DUCK_LOGGING
61
+ ? new duckdb.ConsoleLogger()
62
+ : SilentLogger;
63
+ const db = new (class extends duckdb.AsyncDuckDB {
64
+ onError(event) {
65
+ super.onError(event);
66
+ console.error('onError', event);
67
+ }
68
+ })(logger, worker);
69
+ await db.instantiate(bestBundle.mainModule, bestBundle.pthreadWorker);
70
+ URL.revokeObjectURL(workerUrl);
71
+ await db.open({
72
+ path: ':memory:',
73
+ query: {
74
+ // castBigIntToDouble: true
75
+ },
76
+ });
77
+ const conn = await db.connect();
78
+ // Replace conn.query to include full query in the error message
79
+ const connQuery = conn.query;
80
+ conn.query = (async (q) => {
81
+ const stack = new Error().stack;
82
+ try {
83
+ return await connQuery.call(conn, q);
84
+ }
85
+ catch (err) {
86
+ throw new DuckQueryError(err, q, stack);
87
+ // throw new Error(
88
+ // `Query failed: ${err}\n\nFull query:\n\n${q}\n\nQuery call stack:\n\n${stack}\n\n`,
89
+ // );
90
+ }
91
+ });
92
+ await conn.query(`
93
+ SET max_expression_depth TO 100000;
94
+ SET memory_limit = '10GB';
95
+ `);
96
+ duckConn = { db, conn, worker };
97
+ resolve(duckConn);
98
+ }
99
+ catch (err) {
100
+ reject(err);
101
+ throw err;
102
+ }
103
+ return duckConn;
104
+ }
105
+ // Cache the promise to avoid multiple initialization attempts
106
+ let duckPromise = null;
107
+ /**
108
+ * @deprecated useDuckConn is deprecated, use useDuckDb instead
109
+ */
110
+ export const useDuckConn = useDuckDb;
111
+ export function useDuckDb() {
112
+ if (!duckPromise) {
113
+ duckPromise = getDuckDb();
114
+ }
115
+ // If we don't have a connection yet, throw the promise
116
+ // This will trigger Suspense
117
+ if (!duckConn) {
118
+ throw duckPromise;
119
+ }
120
+ return duckConn;
121
+ }
122
+ export const isNumericDuckType = (type) => type.indexOf('INT') >= 0 ||
123
+ type.indexOf('DECIMAL') >= 0 ||
124
+ type.indexOf('FLOAT') >= 0 ||
125
+ type.indexOf('REAL') >= 0 ||
126
+ type.indexOf('DOUBLE') >= 0;
127
+ export function getColValAsNumber(res, column = 0, index = 0) {
128
+ const v = (typeof column === 'number' ? res.getChildAt(column) : res.getChild(column))?.get(index);
129
+ if (v === undefined || v === null) {
130
+ return NaN;
131
+ }
132
+ // if it's an array (can be returned by duckdb as bigint)
133
+ return Number(v[0] ?? v);
134
+ }
135
+ export const escapeVal = (val) => {
136
+ return `'${String(val).replace(/'/g, "''")}'`;
137
+ };
138
+ export const escapeId = (id) => {
139
+ const str = String(id);
140
+ if (str.startsWith('"') && str.endsWith('"')) {
141
+ return str;
142
+ }
143
+ return `"${str.replace(/"/g, '""')}"`;
144
+ };
145
+ export async function getDuckTables(schema = 'main') {
146
+ const { conn } = await getDuckDb();
147
+ const tablesResults = await conn.query(`SELECT * FROM information_schema.tables
148
+ WHERE table_schema = '${schema}'
149
+ ORDER BY table_name`);
150
+ const tableNames = [];
151
+ for (let i = 0; i < tablesResults.numRows; i++) {
152
+ tableNames.push(tablesResults.getChild('table_name')?.get(i));
153
+ }
154
+ return tableNames;
155
+ }
156
+ export async function getDuckTableSchema(tableName, schema = 'main') {
157
+ const { conn } = await getDuckDb();
158
+ const describeResults = await conn.query(`DESCRIBE ${schema}.${tableName}`);
159
+ const columnNames = describeResults.getChild('column_name');
160
+ const columnTypes = describeResults.getChild('column_type');
161
+ const columns = [];
162
+ for (let di = 0; di < describeResults.numRows; di++) {
163
+ const columnName = columnNames?.get(di);
164
+ const columnType = columnTypes?.get(di);
165
+ columns.push({ name: columnName, type: columnType });
166
+ }
167
+ return {
168
+ tableName,
169
+ columns,
170
+ // Costly to get the row count for large tables
171
+ // rowCount: getColValAsNumber(
172
+ // await conn.query(`SELECT COUNT(*) FROM ${schema}.${tableName}`),
173
+ // ),
174
+ };
175
+ }
176
+ export async function getDuckTableSchemas(schema = 'main') {
177
+ const tableNames = await getDuckTables(schema);
178
+ const tablesInfo = [];
179
+ for (const tableName of tableNames) {
180
+ tablesInfo.push(await getDuckTableSchema(tableName, schema));
181
+ }
182
+ return tablesInfo;
183
+ }
184
+ export async function checkTableExists(tableName, schema = 'main') {
185
+ const { conn } = await getDuckDb();
186
+ const res = await conn.query(`SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '${schema}' AND table_name = '${tableName}'`);
187
+ return getColValAsNumber(res) > 0;
188
+ }
189
+ export async function dropAllTables(schema) {
190
+ try {
191
+ const { conn } = await getDuckDb();
192
+ if (schema && schema !== 'main') {
193
+ await conn.query(`DROP SCHEMA IF EXISTS ${schema} CASCADE`);
194
+ }
195
+ else {
196
+ const res = await conn.query(`SELECT table_name, table_schema, table_type FROM information_schema.tables${schema ? ` WHERE table_schema = '${schema}'` : ''}`);
197
+ const schemasCol = res.getChild('table_schema');
198
+ const tableNamesCol = res.getChild('table_name');
199
+ const tableTypesCol = res.getChild('table_type');
200
+ for (let i = 0; i < res.numRows; i++) {
201
+ try {
202
+ const schemaName = schemasCol?.get(i);
203
+ const tableName = tableNamesCol?.get(i);
204
+ const tableType = tableTypesCol?.get(i);
205
+ if (tableName) {
206
+ const query = `DROP ${tableType === 'VIEW' ? 'VIEW' : 'TABLE'} IF EXISTS ${schemaName}.${tableName}`;
207
+ await conn.query(query);
208
+ }
209
+ }
210
+ catch (err) {
211
+ console.error(err);
212
+ }
213
+ }
214
+ }
215
+ }
216
+ catch (err) {
217
+ console.error(err);
218
+ }
219
+ }
220
+ export async function dropTable(tableName) {
221
+ const { conn } = await getDuckDb();
222
+ await conn.query(`DROP TABLE IF EXISTS ${tableName};`);
223
+ }
224
+ export async function dropFile(fname) {
225
+ const { db } = await getDuckDb();
226
+ db.dropFile(fname);
227
+ }
228
+ export async function dropAllFiles() {
229
+ const { db } = await getDuckDb();
230
+ db.dropFiles();
231
+ }
@@ -0,0 +1,4 @@
1
+ import {config} from '@sqlrooms/eslint-config/base';
2
+
3
+ /** @type {import("eslint").Linter.Config} */
4
+ export default config;
package/package.json ADDED
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "@sqlrooms/duckdb",
3
+ "version": "0.0.0",
4
+ "main": "dist/index.js",
5
+ "types": "src/index.ts",
6
+ "module": "dist/index.js",
7
+ "type": "module",
8
+ "private": false,
9
+ "publishConfig": {
10
+ "access": "public"
11
+ },
12
+ "peerDependencies": {
13
+ "@duckdb/duckdb-wasm": "*",
14
+ "apache-arrow": "*"
15
+ },
16
+ "scripts": {
17
+ "dev": "tsc -w",
18
+ "build": "tsc",
19
+ "lint": "eslint ."
20
+ },
21
+ "gitHead": "4b0c709542475e4f95db0b2a8405ecadcf2ec186"
22
+ }
package/src/duckdb.ts ADDED
@@ -0,0 +1,198 @@
1
+ import {DuckDBDataProtocol} from '@duckdb/duckdb-wasm';
2
+ import {escapeVal, getColValAsNumber, getDuckDb} from './useDuckDb';
3
+
4
+ // export function makeTableName(inputFileName: string): string {
5
+ // return inputFileName.replace(/\.[^\.]*$/, '').replace(/\W/g, '_');
6
+ // }
7
+
8
+ export async function createTableFromQuery(tableName: string, query: string) {
9
+ const {conn} = await getDuckDb();
10
+ const rowCount = getColValAsNumber(
11
+ await conn.query(
12
+ `CREATE OR REPLACE TABLE main.${tableName} AS (
13
+ ${query}
14
+ )`,
15
+ ),
16
+ );
17
+ return {tableName, rowCount};
18
+ }
19
+
20
+ export async function createViewFromRegisteredFile(
21
+ filePath: string,
22
+ schema: string,
23
+ tableName: string,
24
+ opts?: {
25
+ // columnSpecs?: ColumnSpec[];
26
+ mode: 'table' | 'view';
27
+ },
28
+ ): Promise<{tableName: string; rowCount: number}> {
29
+ const {mode = 'table'} = opts ?? {};
30
+ const {conn} = await getDuckDb();
31
+ const fileNameLower = filePath.toLowerCase();
32
+ // let rowCount: number;
33
+ // if (fileNameLower.endsWith('.json')) {
34
+ // await conn.insertJSONFromPath(filePath, {schema, name: tableName});
35
+ // // TODO: for JSON we can use insertJSONFromPath https://github.com/duckdb/duckdb-wasm/issues/1262
36
+ // // fileNameLower.endsWith('.json') || fileNameLower.endsWith('.ndjson')
37
+ // // ? `read_json_auto(${escapeVal(fileName)})`
38
+ // rowCount = getColValAsNumber(
39
+ // await conn.query(`SELECT COUNT(*) FROM ${schema}.${tableName}`),
40
+ // );
41
+ // } else {
42
+ const quotedFileName = escapeVal(filePath);
43
+ const readFileQuery =
44
+ fileNameLower.endsWith('.json') ||
45
+ fileNameLower.endsWith('.geojson') ||
46
+ fileNameLower.endsWith('.ndjson')
47
+ ? `read_json_auto(${quotedFileName}, maximum_object_size=104857600)` // 100MB
48
+ : fileNameLower.endsWith('.parquet')
49
+ ? `parquet_scan(${quotedFileName})`
50
+ : fileNameLower.endsWith('.csv') || fileNameLower.endsWith('.tsv')
51
+ ? `read_csv(${quotedFileName}, SAMPLE_SIZE=-1, AUTO_DETECT=TRUE)`
52
+ : quotedFileName;
53
+ // const readFileQuery = fileNameLower.endsWith('.csv')
54
+ // ? `read_csv(${quotedFileName}, SAMPLE_SIZE=-1, AUTO_DETECT=TRUE)`
55
+ // : quotedFileName;
56
+
57
+ // TODO: tableName generate
58
+ const rowCount = getColValAsNumber(
59
+ await conn.query(
60
+ `CREATE OR REPLACE ${mode} ${schema}.${tableName} AS
61
+ SELECT * FROM ${readFileQuery}`,
62
+ ),
63
+ );
64
+ // }
65
+ return {tableName, rowCount};
66
+ }
67
+
68
+ export async function createViewFromFile(
69
+ filePath: string,
70
+ schema: string,
71
+ tableName: string,
72
+ file: File | Uint8Array,
73
+ ): Promise<{tableName: string; rowCount: number}> {
74
+ const duckConn = await getDuckDb();
75
+
76
+ // const fileName = file.name;
77
+ // await duckConn.db.dropFile(fileName);
78
+ // await duckConn.db.registerFileHandle(
79
+ // fileName,
80
+ // file,
81
+ // DuckDBDataProtocol.BROWSER_FILEREADER,
82
+ // true,
83
+ // );
84
+
85
+ // const tableName = makeTableName(fileName);
86
+ // await duckConn.conn.query(`
87
+ // CREATE OR REPLACE VIEW ${tableName} AS SELECT * FROM '${fileName}'
88
+ // `);
89
+
90
+ //const fileName = file.name;
91
+ await duckConn.db.dropFile(filePath);
92
+ if (file instanceof File) {
93
+ await duckConn.db.registerFileHandle(
94
+ filePath,
95
+ file,
96
+ DuckDBDataProtocol.BROWSER_FILEREADER,
97
+ true,
98
+ );
99
+ } else {
100
+ await duckConn.db.registerFileBuffer(filePath, file);
101
+ }
102
+
103
+ return createViewFromRegisteredFile(filePath, schema, tableName);
104
+
105
+ // const res = await duckConn.conn.query(
106
+ // `SELECT count(*) FROM ${inputTableName}`,
107
+ // );
108
+ // const inputRowCount = getColValAsNumber(res, 0);
109
+ // const tableMeta = await duckConn.conn.query(
110
+ // `DESCRIBE TABLE ${inputTableName}`,
111
+ // );
112
+ // const inputTableFields = Array.from(tableMeta).map((row) => ({
113
+ // name: String(row?.column_name),
114
+ // type: String(row?.column_type),
115
+ // }));
116
+
117
+ // const nextResult: DataTable = {
118
+ // inputFileName,
119
+ // tableName: inputTableName,
120
+ // rowCount: inputRowCount,
121
+ // // outputRowCount: undefined,
122
+ // columns: inputTableFields,
123
+ // };
124
+ // // setResult(nextResult);
125
+ // return nextResult;
126
+ }
127
+
128
+ // async function createViewFromFile2(
129
+ // file: File,
130
+ // duckConn: DuckDb,
131
+ // onTableCreated: (
132
+ // inputTableName: string,
133
+ // result: CreateTableDropzoneResult,
134
+ // ) => void,
135
+ // onError: (status:'error', message: string) => void,
136
+ // ) {
137
+ // try {
138
+ // const inputFileName = file.name;
139
+ // await duckConn.db.dropFile(inputFileName);
140
+ // await duckConn.db.registerFileHandle(
141
+ // inputFileName,
142
+ // file,
143
+ // DuckDBDataProtocol.BROWSER_FILEREADER,
144
+ // true,
145
+ // );
146
+
147
+ // const inputTableName = genRandomStr(10, inputFileName).toLowerCase();
148
+ // await duckConn.conn.query(`DROP TABLE IF EXISTS ${inputTableName}`);
149
+ // const readFileQuery = inputFileName.endsWith('.parquet')
150
+ // ? `parquet_scan(${escapeVal(inputFileName)})`
151
+ // : `read_csv(${escapeVal(
152
+ // inputFileName,
153
+ // )}, SAMPLE_SIZE=-1, AUTO_DETECT=TRUE)`;
154
+ // await duckConn.conn.query(
155
+ // `CREATE TABLE ${inputTableName} AS
156
+ // SELECT * FROM ${readFileQuery}`,
157
+ // );
158
+
159
+ // const res = await duckConn.conn.query(
160
+ // `SELECT count(*) FROM ${inputTableName}`,
161
+ // );
162
+ // const inputRowCount = getColValAsNumber(res, 0);
163
+ // const tableMeta = await duckConn.conn.query(
164
+ // `DESCRIBE TABLE ${inputTableName}`,
165
+ // );
166
+ // const inputTableFields = Array.from(tableMeta).map((row) => ({
167
+ // name: String(row?.column_name),
168
+ // type: String(row?.column_type),
169
+ // }));
170
+
171
+ // const nextResult: CreateTableDropzoneResult = {
172
+ // inputFileName,
173
+ // inputTableName,
174
+ // inputRowCount,
175
+ // // outputRowCount: undefined,
176
+ // inputTableFields,
177
+ // columns: {},
178
+ // };
179
+ // // setResult(nextResult);
180
+ // onTableCreated(inputTableName, nextResult);
181
+ // } catch (e) {
182
+ // console.error(e);
183
+ // onError(e instanceof Error ? e.message : String(e));
184
+ // }
185
+ // }
186
+
187
+ // async function maybeDropTable(
188
+ // value: CreateTableDropzoneResult,
189
+ // duckConn: DuckDb,
190
+ // ) {
191
+ // const {inputFileName, inputTableName} = value || {};
192
+ // if (inputFileName) {
193
+ // await duckConn.db.dropFile(inputFileName);
194
+ // }
195
+ // if (inputTableName) {
196
+ // await duckConn.conn.query(`DROP TABLE IF EXISTS ${inputTableName};`);
197
+ // }
198
+ // }
@@ -0,0 +1,101 @@
1
+ import * as arrow from 'apache-arrow';
2
+ import {getDuckDb} from './useDuckDb';
3
+
4
+ export async function exportToCsv(
5
+ query: string,
6
+ fileName: string,
7
+ pageSize = 100000,
8
+ ) {
9
+ const {conn} = await getDuckDb();
10
+
11
+ let offset = 0;
12
+ const blobs: Blob[] = [];
13
+ let headersAdded = false;
14
+
15
+ while (true) {
16
+ const currentQuery = `(
17
+ ${query}
18
+ ) LIMIT ${pageSize} OFFSET ${offset}`;
19
+ const results = await conn.query(currentQuery);
20
+
21
+ // Check if we received any results; if not, we are done.
22
+ if (results.numRows === 0) {
23
+ break;
24
+ }
25
+
26
+ const csvChunk = convertToCsv(results, !headersAdded);
27
+ blobs.push(new Blob([csvChunk], {type: 'text/csv'}));
28
+
29
+ // Ensure that headers are not added in subsequent iterations
30
+ headersAdded = true;
31
+
32
+ // Increment offset to fetch the next chunk
33
+ offset += pageSize;
34
+ }
35
+
36
+ const fullCsvBlob = new Blob(blobs, {type: 'text/csv'});
37
+ downloadBlob(fullCsvBlob, fileName);
38
+ }
39
+
40
+ function convertToCsv(
41
+ arrowTable: arrow.Table,
42
+ includeHeaders: boolean,
43
+ ): string {
44
+ // return includeHeaders
45
+ // ? csvFormat(arrowTable.toArray())
46
+ // : csvFormatBody(arrowTable.toArray());
47
+
48
+ const columnNames = arrowTable.schema.fields.map((field) => field.name);
49
+ const columnsByName = columnNames.reduce(
50
+ (acc, columnName) => {
51
+ const col = arrowTable.getChild(columnName);
52
+ if (col) acc[columnName] = col;
53
+ return acc;
54
+ },
55
+ {} as Record<string, arrow.Vector>,
56
+ );
57
+
58
+ // Add header
59
+ let csvContent = includeHeaders ? columnNames.join(',') + '\r\n' : '';
60
+
61
+ // Add data rows
62
+ for (let i = 0; i < arrowTable.numRows; i++) {
63
+ const csvRow = columnNames
64
+ .map((columnName) => {
65
+ const cellValue = columnsByName[columnName]?.get(i);
66
+
67
+ // If the cell value is null or undefined, set it to an empty string.
68
+ if (cellValue == null) return '';
69
+
70
+ // Convert cell value to string
71
+ let cellValueStr = String(cellValue);
72
+
73
+ // Escape double quotes and wrap cell value in double quotes if necessary
74
+ if (
75
+ cellValueStr.includes('"') ||
76
+ cellValueStr.includes(',') ||
77
+ cellValueStr.includes('\n')
78
+ ) {
79
+ cellValueStr = '"' + cellValueStr.replace(/"/g, '""') + '"';
80
+ }
81
+
82
+ return cellValueStr;
83
+ })
84
+ .join(',');
85
+
86
+ csvContent += csvRow + '\r\n';
87
+ }
88
+
89
+ return csvContent;
90
+ }
91
+
92
+ function downloadBlob(blob: Blob, filename: string) {
93
+ const url = URL.createObjectURL(blob);
94
+ const a = document.createElement('a');
95
+ a.href = url;
96
+ a.download = filename;
97
+ document.body.appendChild(a);
98
+ a.click();
99
+ URL.revokeObjectURL(url);
100
+ document.body.removeChild(a);
101
+ }
package/src/index.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from './duckdb';
2
+ export * from './types';
3
+ export * from './useDuckDb';
4
+ export * from './exportToCsv';
package/src/types.ts ADDED
@@ -0,0 +1,11 @@
1
+ export type TableColumn = {
2
+ name: string;
3
+ type: string;
4
+ };
5
+
6
+ export type DataTable = {
7
+ tableName: string;
8
+ columns: TableColumn[];
9
+ rowCount?: number;
10
+ inputFileName?: string;
11
+ };