@flowblade/sqlduck 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -30
- package/dist/index.d.mts +23 -79
- package/dist/index.mjs +176 -279
- package/dist/types-DCqYqEsa.d.mts +80 -0
- package/dist/validation/zod/index.d.mts +37 -0
- package/dist/validation/zod/index.mjs +2 -0
- package/dist/zod-CwR_oehs.mjs +207 -0
- package/package.json +18 -21
- package/dist/index.cjs +0 -770
- package/dist/index.d.cts +0 -345
package/dist/index.cjs
DELETED
|
@@ -1,770 +0,0 @@
|
|
|
1
|
-
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
-
//#region \0rolldown/runtime.js
|
|
3
|
-
var __create = Object.create;
|
|
4
|
-
var __defProp = Object.defineProperty;
|
|
5
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
8
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
-
var __copyProps = (to, from, except, desc) => {
|
|
10
|
-
if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
|
|
11
|
-
key = keys[i];
|
|
12
|
-
if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
|
|
13
|
-
get: ((k) => from[k]).bind(null, key),
|
|
14
|
-
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
15
|
-
});
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
|
|
20
|
-
value: mod,
|
|
21
|
-
enumerable: true
|
|
22
|
-
}) : target, mod));
|
|
23
|
-
//#endregion
|
|
24
|
-
let _duckdb_node_api = require("@duckdb/node-api");
|
|
25
|
-
let _logtape_logtape = require("@logtape/logtape");
|
|
26
|
-
let zod = require("zod");
|
|
27
|
-
zod = __toESM(zod);
|
|
28
|
-
let _httpx_assert = require("@httpx/assert");
|
|
29
|
-
let _httpx_plain_object = require("@httpx/plain-object");
|
|
30
|
-
//#region src/helpers/duck-exec.ts
|
|
31
|
-
var DuckExec = class {
|
|
32
|
-
#conn;
|
|
33
|
-
constructor(duckConn) {
|
|
34
|
-
this.#conn = duckConn;
|
|
35
|
-
}
|
|
36
|
-
getRowObjectJS = async (sql) => {
|
|
37
|
-
return (await this.#conn.run(sql)).getRowObjectsJS();
|
|
38
|
-
};
|
|
39
|
-
getRowObjectJson = async (sql) => {
|
|
40
|
-
return (await this.#conn.run(sql)).getRowObjectsJson();
|
|
41
|
-
};
|
|
42
|
-
getOneRowObjectJS = async (sql) => {
|
|
43
|
-
const rows = await this.getRowObjectJS(sql);
|
|
44
|
-
if (rows.length === 0) return null;
|
|
45
|
-
this.#ensureOneRow(rows);
|
|
46
|
-
return rows[0];
|
|
47
|
-
};
|
|
48
|
-
getOneRowObjectJson = async (sql) => {
|
|
49
|
-
const rows = await this.getRowObjectJson(sql);
|
|
50
|
-
if (rows.length === 0) return null;
|
|
51
|
-
this.#ensureOneRow(rows);
|
|
52
|
-
return rows[0];
|
|
53
|
-
};
|
|
54
|
-
#ensureOneRow = (rows) => {
|
|
55
|
-
if (rows.length > 1) throw new Error("Expected one row, but got multiple rows");
|
|
56
|
-
};
|
|
57
|
-
};
|
|
58
|
-
//#endregion
|
|
59
|
-
//#region src/helpers/duck-memory.ts
|
|
60
|
-
const duckMemoryTags = [
|
|
61
|
-
"BASE_TABLE",
|
|
62
|
-
"HASH_TABLE",
|
|
63
|
-
"PARQUET_READER",
|
|
64
|
-
"CSV_READER",
|
|
65
|
-
"ORDER_BY",
|
|
66
|
-
"ART_INDEX",
|
|
67
|
-
"COLUMN_DATA",
|
|
68
|
-
"METADATA",
|
|
69
|
-
"OVERFLOW_STRINGS",
|
|
70
|
-
"IN_MEMORY_TABLE",
|
|
71
|
-
"ALLOCATOR",
|
|
72
|
-
"EXTENSION",
|
|
73
|
-
"TRANSACTION",
|
|
74
|
-
"EXTERNAL_FILE_CACHE",
|
|
75
|
-
"WINDOW",
|
|
76
|
-
"OBJECT_CACHE"
|
|
77
|
-
];
|
|
78
|
-
const orderByParams = {
|
|
79
|
-
memory_usage_bytes_desc: "memory_usage_bytes DESC",
|
|
80
|
-
tag_desc: "tag DESC",
|
|
81
|
-
tag_asc: "tag ASC"
|
|
82
|
-
};
|
|
83
|
-
var DuckMemory = class {
|
|
84
|
-
#conn;
|
|
85
|
-
#exec;
|
|
86
|
-
constructor(duckdbConn) {
|
|
87
|
-
this.#conn = duckdbConn;
|
|
88
|
-
this.#exec = new DuckExec(duckdbConn);
|
|
89
|
-
}
|
|
90
|
-
getAll = async (params) => {
|
|
91
|
-
const { orderBy } = params ?? {};
|
|
92
|
-
const query = this.#applyOrderBy(`SELECT tag, memory_usage_bytes, temporary_storage_bytes
|
|
93
|
-
FROM duckdb_memory() as m`, orderBy);
|
|
94
|
-
return (await this.#conn.run(query)).getRowObjectsJS();
|
|
95
|
-
};
|
|
96
|
-
getByTag = async (tag) => {
|
|
97
|
-
if (!duckMemoryTags.includes(tag)) throw new Error(`Invalid DuckDB memory tag: ${tag}`);
|
|
98
|
-
const query = `SELECT tag, memory_usage_bytes, temporary_storage_bytes
|
|
99
|
-
FROM duckdb_memory() as m
|
|
100
|
-
WHERE tag = '${tag}'`;
|
|
101
|
-
return this.#exec.getOneRowObjectJS(query);
|
|
102
|
-
};
|
|
103
|
-
getSummary = async () => {
|
|
104
|
-
const rows = await this.getAll();
|
|
105
|
-
const summaryInBytes = {
|
|
106
|
-
total: 0n,
|
|
107
|
-
totalTemp: 0n
|
|
108
|
-
};
|
|
109
|
-
for (const row of rows) {
|
|
110
|
-
summaryInBytes.total += row.memory_usage_bytes;
|
|
111
|
-
summaryInBytes.totalTemp += row.temporary_storage_bytes;
|
|
112
|
-
}
|
|
113
|
-
return {
|
|
114
|
-
totalMB: Math.round(Number(summaryInBytes.total / 1048576n)),
|
|
115
|
-
totalTempMB: Math.round(Number(summaryInBytes.totalTemp / 1048576n))
|
|
116
|
-
};
|
|
117
|
-
};
|
|
118
|
-
#applyOrderBy = (query, orderBy) => {
|
|
119
|
-
if (orderBy === void 0) return query;
|
|
120
|
-
const orderByClause = orderByParams[orderBy];
|
|
121
|
-
if (orderByClause === void 0) throw new Error(`Invalid orderBy parameter: ${orderBy}`);
|
|
122
|
-
return `${query} ORDER BY ${orderByClause}`;
|
|
123
|
-
};
|
|
124
|
-
};
|
|
125
|
-
//#endregion
|
|
126
|
-
//#region src/appender/data-appender-callback.ts
|
|
127
|
-
const isOnDataAppendedAsyncCb = (v) => {
|
|
128
|
-
return v.constructor.name === "AsyncFunction";
|
|
129
|
-
};
|
|
130
|
-
const createOnDataAppendedCollector = () => {
|
|
131
|
-
let lastCallbackTimeStart = Date.now();
|
|
132
|
-
let appendedTotalRows = 0;
|
|
133
|
-
return (currentTotalRows) => {
|
|
134
|
-
const cbTimeMs = Math.round(Date.now() - lastCallbackTimeStart);
|
|
135
|
-
const cbTotalRows = currentTotalRows - appendedTotalRows;
|
|
136
|
-
const stats = {
|
|
137
|
-
totalRows: currentTotalRows,
|
|
138
|
-
timeMs: cbTimeMs,
|
|
139
|
-
rowsPerSecond: Math.round(cbTotalRows / cbTimeMs * 1e3)
|
|
140
|
-
};
|
|
141
|
-
appendedTotalRows = currentTotalRows;
|
|
142
|
-
lastCallbackTimeStart = Date.now();
|
|
143
|
-
return stats;
|
|
144
|
-
};
|
|
145
|
-
};
|
|
146
|
-
//#endregion
|
|
147
|
-
//#region src/config/flowblade-logtape-sqlduck.config.ts
|
|
148
|
-
const flowbladeLogtapeSqlduckConfig = { categories: ["flowblade", "sqlduck"] };
|
|
149
|
-
//#endregion
|
|
150
|
-
//#region src/logger/sqlduck-default-logtape-logger.ts
|
|
151
|
-
const sqlduckDefaultLogtapeLogger = (0, _logtape_logtape.getLogger)(flowbladeLogtapeSqlduckConfig.categories);
|
|
152
|
-
//#endregion
|
|
153
|
-
//#region src/table/get-duckdb-number-column-type.ts
|
|
154
|
-
const isFloatValue = (value) => {
|
|
155
|
-
if (!Number.isFinite(value)) return true;
|
|
156
|
-
if (Math.abs(value) > Number.MAX_SAFE_INTEGER) return true;
|
|
157
|
-
return !Number.isInteger(value);
|
|
158
|
-
};
|
|
159
|
-
const getDuckdbNumberColumnType = (params) => {
|
|
160
|
-
const { minimum, maximum } = params;
|
|
161
|
-
if (minimum === void 0 || maximum === void 0) return _duckdb_node_api.BIGINT;
|
|
162
|
-
if (isFloatValue(minimum) || isFloatValue(maximum)) {
|
|
163
|
-
if (minimum >= -34028235e31 && maximum <= 34028235e31) return _duckdb_node_api.FLOAT;
|
|
164
|
-
return _duckdb_node_api.DOUBLE;
|
|
165
|
-
}
|
|
166
|
-
if (minimum >= 0) {
|
|
167
|
-
if (maximum <= 255) return _duckdb_node_api.UTINYINT;
|
|
168
|
-
if (maximum <= 65535) return _duckdb_node_api.USMALLINT;
|
|
169
|
-
if (maximum <= 4294967295) return _duckdb_node_api.UINTEGER;
|
|
170
|
-
if (maximum <= 18446744073709551615n) return _duckdb_node_api.UBIGINT;
|
|
171
|
-
return _duckdb_node_api.UHUGEINT;
|
|
172
|
-
}
|
|
173
|
-
if (minimum >= -128 && maximum <= 127) return _duckdb_node_api.TINYINT;
|
|
174
|
-
if (minimum >= -32768 && maximum <= 32767) return _duckdb_node_api.SMALLINT;
|
|
175
|
-
if (minimum >= -2147483648 && maximum <= 2147483647) return _duckdb_node_api.INTEGER;
|
|
176
|
-
if (minimum >= -9223372036854775808n && maximum <= 9223372036854775807n) return _duckdb_node_api.BIGINT;
|
|
177
|
-
return _duckdb_node_api.HUGEINT;
|
|
178
|
-
};
|
|
179
|
-
//#endregion
|
|
180
|
-
//#region src/table/get-table-create-from-zod.ts
|
|
181
|
-
const createOptions = {
|
|
182
|
-
CREATE: "CREATE TABLE",
|
|
183
|
-
CREATE_OR_REPLACE: "CREATE OR REPLACE TABLE",
|
|
184
|
-
IF_NOT_EXISTS: "CREATE TABLE IF NOT EXISTS"
|
|
185
|
-
};
|
|
186
|
-
const duckDbTypesMap = new Map([
|
|
187
|
-
["VARCHAR", _duckdb_node_api.VARCHAR],
|
|
188
|
-
["BIGINT", _duckdb_node_api.BIGINT],
|
|
189
|
-
["TIMESTAMP", _duckdb_node_api.TIMESTAMP],
|
|
190
|
-
["UUID", _duckdb_node_api.UUID],
|
|
191
|
-
["BOOLEAN", _duckdb_node_api.BOOLEAN],
|
|
192
|
-
["INTEGER", _duckdb_node_api.INTEGER],
|
|
193
|
-
["DOUBLE", _duckdb_node_api.DOUBLE],
|
|
194
|
-
["FLOAT", _duckdb_node_api.FLOAT]
|
|
195
|
-
]);
|
|
196
|
-
const getTableCreateFromZod = (params) => {
|
|
197
|
-
const { table, schema, options } = params;
|
|
198
|
-
const { create = "CREATE" } = options ?? {};
|
|
199
|
-
const fqTable = table.getFullName();
|
|
200
|
-
const json = schema.toJSONSchema({
|
|
201
|
-
target: "openapi-3.0",
|
|
202
|
-
unrepresentable: "throw"
|
|
203
|
-
});
|
|
204
|
-
const columns = [];
|
|
205
|
-
if (json.properties === void 0) throw new TypeError("Schema must have at least one property");
|
|
206
|
-
const columnTypesMap = /* @__PURE__ */ new Map();
|
|
207
|
-
for (const [columnName, def] of Object.entries(json.properties)) {
|
|
208
|
-
const { type, duckdbType, nullable, format, primaryKey, minimum, maximum } = def;
|
|
209
|
-
const c = { name: columnName };
|
|
210
|
-
if (duckdbType !== void 0 && duckDbTypesMap.has(duckdbType)) c.duckdbType = duckDbTypesMap.get(duckdbType);
|
|
211
|
-
else switch (type) {
|
|
212
|
-
case "string":
|
|
213
|
-
switch (format) {
|
|
214
|
-
case "date-time":
|
|
215
|
-
c.duckdbType = _duckdb_node_api.TIMESTAMP;
|
|
216
|
-
break;
|
|
217
|
-
case "int64":
|
|
218
|
-
c.duckdbType = _duckdb_node_api.BIGINT;
|
|
219
|
-
break;
|
|
220
|
-
case "uuid":
|
|
221
|
-
c.duckdbType = _duckdb_node_api.UUID;
|
|
222
|
-
break;
|
|
223
|
-
default: c.duckdbType = _duckdb_node_api.VARCHAR;
|
|
224
|
-
}
|
|
225
|
-
break;
|
|
226
|
-
case "number":
|
|
227
|
-
c.duckdbType = getDuckdbNumberColumnType({
|
|
228
|
-
minimum,
|
|
229
|
-
maximum
|
|
230
|
-
});
|
|
231
|
-
break;
|
|
232
|
-
case "integer":
|
|
233
|
-
c.duckdbType = getDuckdbNumberColumnType({
|
|
234
|
-
minimum,
|
|
235
|
-
maximum
|
|
236
|
-
});
|
|
237
|
-
break;
|
|
238
|
-
case "boolean":
|
|
239
|
-
c.duckdbType = _duckdb_node_api.BOOLEAN;
|
|
240
|
-
break;
|
|
241
|
-
default: throw new Error(`Cannot guess '${columnName}' type - ${JSON.stringify(def)}`);
|
|
242
|
-
}
|
|
243
|
-
if (primaryKey === true) c.constraint = "PRIMARY KEY";
|
|
244
|
-
else if (nullable !== true) c.constraint = "NOT NULL";
|
|
245
|
-
columnTypesMap.set(columnName, c.duckdbType);
|
|
246
|
-
columns.push(c);
|
|
247
|
-
}
|
|
248
|
-
return {
|
|
249
|
-
ddl: [
|
|
250
|
-
`${createOptions[create]} ${fqTable} (\n`,
|
|
251
|
-
columns.map((colDDL) => {
|
|
252
|
-
const { name, duckdbType, constraint } = colDDL;
|
|
253
|
-
return ` ${[
|
|
254
|
-
name,
|
|
255
|
-
duckdbType.toString(),
|
|
256
|
-
constraint
|
|
257
|
-
].filter(Boolean).join(" ")}`;
|
|
258
|
-
}).join(",\n"),
|
|
259
|
-
"\n)"
|
|
260
|
-
].join(""),
|
|
261
|
-
columnTypes: columnTypesMap
|
|
262
|
-
};
|
|
263
|
-
};
|
|
264
|
-
//#endregion
|
|
265
|
-
//#region src/table/create-table-from-zod.ts
|
|
266
|
-
const createTableFromZod = async (params) => {
|
|
267
|
-
const { conn, table, schema, options, logger = sqlduckDefaultLogtapeLogger } = params;
|
|
268
|
-
const { ddl, columnTypes } = getTableCreateFromZod({
|
|
269
|
-
table,
|
|
270
|
-
schema,
|
|
271
|
-
options
|
|
272
|
-
});
|
|
273
|
-
logger.debug(`Generate DDL for table '${table.getFullName()}'`, { ddl });
|
|
274
|
-
try {
|
|
275
|
-
await conn.run(ddl);
|
|
276
|
-
logger.info(`Table '${table.getFullName()}' successfully created`, { ddl });
|
|
277
|
-
} catch (e) {
|
|
278
|
-
logger.error(`Failed to create table '${table.getFullName()}': ${e.message}`, { ddl });
|
|
279
|
-
throw new Error(`Failed to create table '${table.getFullName()}': ${e.message}`, { cause: e });
|
|
280
|
-
}
|
|
281
|
-
return {
|
|
282
|
-
ddl,
|
|
283
|
-
columnTypes
|
|
284
|
-
};
|
|
285
|
-
};
|
|
286
|
-
//#endregion
|
|
287
|
-
//#region src/utils/rows-to-columns-chunks.ts
|
|
288
|
-
const toDuckValue = (value) => {
|
|
289
|
-
if (value instanceof Date) return new _duckdb_node_api.DuckDBTimestampValue(BigInt(value.getTime() * 1e3));
|
|
290
|
-
if (typeof value === "bigint") return value.toString(10);
|
|
291
|
-
return value === void 0 ? null : value;
|
|
292
|
-
};
|
|
293
|
-
/**
|
|
294
|
-
* Similar to `rowsToColumns` but yields results in chunks to avoid buffering
|
|
295
|
-
* the entire dataset in memory. Each yielded item is a columns array for up to
|
|
296
|
-
* `chunkSize` rows.
|
|
297
|
-
*
|
|
298
|
-
* Example for chunkSize = 2:
|
|
299
|
-
* input rows: [{id:'1',name:'A'}, {id:'2',name:'B'}, {id:'3',name:'C'}]
|
|
300
|
-
* yields: [[['1','2'], ['A','B']], [['3'], ['C']]]
|
|
301
|
-
*/
|
|
302
|
-
async function* rowsToColumnsChunks(params) {
|
|
303
|
-
const { rows, chunkSize } = params;
|
|
304
|
-
if (!Number.isSafeInteger(chunkSize) || chunkSize <= 0) throw new Error(`chunkSize must be a positive integer, got ${chunkSize}`);
|
|
305
|
-
const first = await rows.next();
|
|
306
|
-
if (first.done) return;
|
|
307
|
-
const keys = Object.keys(first.value);
|
|
308
|
-
let columns = keys.map(() => []);
|
|
309
|
-
let rowsInChunk = 0;
|
|
310
|
-
keys.forEach((k, i) => columns[i].push(toDuckValue(first.value[k])));
|
|
311
|
-
rowsInChunk++;
|
|
312
|
-
if (rowsInChunk >= chunkSize) {
|
|
313
|
-
yield columns;
|
|
314
|
-
columns = keys.map(() => []);
|
|
315
|
-
rowsInChunk = 0;
|
|
316
|
-
}
|
|
317
|
-
for await (const row of rows) {
|
|
318
|
-
keys.forEach((k, i) => columns[i].push(toDuckValue(row[k])));
|
|
319
|
-
rowsInChunk++;
|
|
320
|
-
if (rowsInChunk >= chunkSize) {
|
|
321
|
-
yield columns;
|
|
322
|
-
columns = keys.map(() => []);
|
|
323
|
-
rowsInChunk = 0;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
if (rowsInChunk > 0) yield columns;
|
|
327
|
-
}
|
|
328
|
-
//#endregion
|
|
329
|
-
//#region src/sql-duck.ts
|
|
330
|
-
var SqlDuck = class {
|
|
331
|
-
#conn;
|
|
332
|
-
#logger;
|
|
333
|
-
constructor(params) {
|
|
334
|
-
this.#conn = params.conn;
|
|
335
|
-
this.#logger = params.logger ?? sqlduckDefaultLogtapeLogger;
|
|
336
|
-
}
|
|
337
|
-
/**
|
|
338
|
-
* Create a table from a Zod schema and fill it with data from a row stream.
|
|
339
|
-
*
|
|
340
|
-
* @example
|
|
341
|
-
* ```typescript
|
|
342
|
-
* import * as z from 'zod';
|
|
343
|
-
*
|
|
344
|
-
* const sqlDuck = new SqlDuck({ conn: duckDbConnection });
|
|
345
|
-
*
|
|
346
|
-
* // Schema of the table, not that you can use meta to add information
|
|
347
|
-
* const userSchema = z.object({
|
|
348
|
-
* id: z.number().int().meta({ primaryKey: true }),
|
|
349
|
-
* name: z.string(),
|
|
350
|
-
* });
|
|
351
|
-
*
|
|
352
|
-
* // Async generator function that yields rows to insert
|
|
353
|
-
* async function* getUserRows(): AsyncIterableIterator<z.infer<typeof userSchema>> {
|
|
354
|
-
* // database or api call
|
|
355
|
-
* }
|
|
356
|
-
*
|
|
357
|
-
* const result = sqlDuck.toTable({
|
|
358
|
-
* table: new Table({ name: 'user', database: 'mydb' }),
|
|
359
|
-
* schema: userSchema,
|
|
360
|
-
* rowStream: getUserRows(),
|
|
361
|
-
* chunkSize: 2048,
|
|
362
|
-
* onDataAppended: ({ total }) => {
|
|
363
|
-
* console.log(`Appended ${total} rows so far`);
|
|
364
|
-
* },
|
|
365
|
-
* createOptions: {
|
|
366
|
-
* create: 'CREATE_OR_REPLACE',
|
|
367
|
-
* },
|
|
368
|
-
* });
|
|
369
|
-
*
|
|
370
|
-
* console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
|
|
371
|
-
* console.log(`Table created with DDL: ${result.createTableDDL}`);
|
|
372
|
-
* ```
|
|
373
|
-
*/
|
|
374
|
-
toTable = async (params) => {
|
|
375
|
-
const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended } = params;
|
|
376
|
-
if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
|
|
377
|
-
const timeStart = Date.now();
|
|
378
|
-
const { columnTypes, ddl } = await createTableFromZod({
|
|
379
|
-
conn: this.#conn,
|
|
380
|
-
schema,
|
|
381
|
-
table,
|
|
382
|
-
options: createOptions
|
|
383
|
-
});
|
|
384
|
-
const appender = await this.#conn.createAppender(table.tableName, table.schemaName, table.databaseName);
|
|
385
|
-
const chunkTypes = Array.from(columnTypes.values());
|
|
386
|
-
let totalRows = 0;
|
|
387
|
-
const dataAppendedCollector = createOnDataAppendedCollector();
|
|
388
|
-
const columnStream = rowsToColumnsChunks({
|
|
389
|
-
rows: rowStream,
|
|
390
|
-
chunkSize
|
|
391
|
-
});
|
|
392
|
-
try {
|
|
393
|
-
for await (const dataChunk of columnStream) {
|
|
394
|
-
const chunk = _duckdb_node_api.DuckDBDataChunk.create(chunkTypes);
|
|
395
|
-
this.#logger.debug(`Inserting chunk of ${dataChunk.length} rows`, { table: table.getFullName() });
|
|
396
|
-
totalRows += dataChunk?.[0]?.length ?? 0;
|
|
397
|
-
chunk.setColumns(dataChunk);
|
|
398
|
-
appender.appendDataChunk(chunk);
|
|
399
|
-
appender.flushSync();
|
|
400
|
-
if (onDataAppended !== void 0) {
|
|
401
|
-
const payload = dataAppendedCollector(totalRows);
|
|
402
|
-
if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
|
|
403
|
-
else onDataAppended(payload);
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
appender.closeSync();
|
|
407
|
-
const timeMs = Math.round(Date.now() - timeStart);
|
|
408
|
-
this.#logger.info(`Successfully appended ${totalRows} rows into '${table.getFullName()}' in ${timeMs}ms`, {
|
|
409
|
-
table: table.getFullName(),
|
|
410
|
-
timeMs,
|
|
411
|
-
totalRows
|
|
412
|
-
});
|
|
413
|
-
return {
|
|
414
|
-
timeMs,
|
|
415
|
-
totalRows,
|
|
416
|
-
createTableDDL: ddl
|
|
417
|
-
};
|
|
418
|
-
} catch (e) {
|
|
419
|
-
appender.closeSync();
|
|
420
|
-
const msg = `Failed to append data into table '${table.getFullName()}' - ${e?.message ?? ""}`;
|
|
421
|
-
this.#logger.error(msg, { table: table.getFullName() });
|
|
422
|
-
throw new Error(msg, { cause: e });
|
|
423
|
-
}
|
|
424
|
-
};
|
|
425
|
-
};
|
|
426
|
-
//#endregion
|
|
427
|
-
//#region src/utils/zod-codecs.ts
|
|
428
|
-
const zodCodecs = {
|
|
429
|
-
dateToString: zod.codec(zod.date(), zod.iso.datetime(), {
|
|
430
|
-
decode: (date) => date.toISOString(),
|
|
431
|
-
encode: (isoString) => new Date(isoString)
|
|
432
|
-
}),
|
|
433
|
-
bigintToString: zod.codec(zod.bigint(), zod.string().meta({ format: "int64" }), {
|
|
434
|
-
decode: (bigint) => bigint.toString(),
|
|
435
|
-
encode: BigInt
|
|
436
|
-
})
|
|
437
|
-
};
|
|
438
|
-
//#endregion
|
|
439
|
-
//#region src/objects/database.ts
|
|
440
|
-
var Database = class {
|
|
441
|
-
#params;
|
|
442
|
-
get alias() {
|
|
443
|
-
return this.#params.alias;
|
|
444
|
-
}
|
|
445
|
-
constructor(params) {
|
|
446
|
-
this.#params = params;
|
|
447
|
-
}
|
|
448
|
-
toJson() {
|
|
449
|
-
return {
|
|
450
|
-
type: "database",
|
|
451
|
-
params: { alias: this.#params.alias }
|
|
452
|
-
};
|
|
453
|
-
}
|
|
454
|
-
[Symbol.toStringTag]() {
|
|
455
|
-
return this.alias;
|
|
456
|
-
}
|
|
457
|
-
};
|
|
458
|
-
//#endregion
|
|
459
|
-
//#region src/objects/table.ts
|
|
460
|
-
var Table = class Table {
|
|
461
|
-
#fqTable;
|
|
462
|
-
get tableName() {
|
|
463
|
-
return this.#fqTable.name;
|
|
464
|
-
}
|
|
465
|
-
get schemaName() {
|
|
466
|
-
return this.#fqTable.schema;
|
|
467
|
-
}
|
|
468
|
-
get databaseName() {
|
|
469
|
-
return this.#fqTable.database;
|
|
470
|
-
}
|
|
471
|
-
constructor(fqTableOrName) {
|
|
472
|
-
this.#fqTable = typeof fqTableOrName === "string" ? { name: fqTableOrName } : fqTableOrName;
|
|
473
|
-
}
|
|
474
|
-
/**
|
|
475
|
-
* Return fully qualified table name by concatenating
|
|
476
|
-
* database, schema and table with a 'dot' separator.
|
|
477
|
-
*/
|
|
478
|
-
getFullName = (options) => {
|
|
479
|
-
const { defaultDatabase, defaultSchema } = options ?? {};
|
|
480
|
-
const { name, database = defaultDatabase, schema = defaultSchema } = this.#fqTable;
|
|
481
|
-
return [
|
|
482
|
-
database,
|
|
483
|
-
schema,
|
|
484
|
-
name
|
|
485
|
-
].filter(Boolean).join(".");
|
|
486
|
-
};
|
|
487
|
-
withDatabase = (database) => {
|
|
488
|
-
return new Table({
|
|
489
|
-
...this.#fqTable,
|
|
490
|
-
database
|
|
491
|
-
});
|
|
492
|
-
};
|
|
493
|
-
withSchema = (schema) => {
|
|
494
|
-
return new Table({
|
|
495
|
-
...this.#fqTable,
|
|
496
|
-
schema
|
|
497
|
-
});
|
|
498
|
-
};
|
|
499
|
-
};
|
|
500
|
-
//#endregion
|
|
501
|
-
//#region src/validation/core/duckdb-reserved-keywords.ts
|
|
502
|
-
/**
|
|
503
|
-
* DuckDB reserved keywords that cannot be used as unquoted identifiers.
|
|
504
|
-
* @see https://duckdb.org/docs/sql/keywords-and-identifiers.html
|
|
505
|
-
*/
|
|
506
|
-
const duckdbReservedKeywords = [
|
|
507
|
-
"ALL",
|
|
508
|
-
"ANALYSE",
|
|
509
|
-
"ANALYZE",
|
|
510
|
-
"AND",
|
|
511
|
-
"ANY",
|
|
512
|
-
"ARRAY",
|
|
513
|
-
"AS",
|
|
514
|
-
"ASC",
|
|
515
|
-
"ASYMMETRIC",
|
|
516
|
-
"BOTH",
|
|
517
|
-
"CASE",
|
|
518
|
-
"CAST",
|
|
519
|
-
"CHECK",
|
|
520
|
-
"COLLATE",
|
|
521
|
-
"COLUMN",
|
|
522
|
-
"CONSTRAINT",
|
|
523
|
-
"CREATE",
|
|
524
|
-
"CROSS",
|
|
525
|
-
"CURRENT_CATALOG",
|
|
526
|
-
"CURRENT_DATE",
|
|
527
|
-
"CURRENT_ROLE",
|
|
528
|
-
"CURRENT_SCHEMA",
|
|
529
|
-
"CURRENT_TIME",
|
|
530
|
-
"CURRENT_TIMESTAMP",
|
|
531
|
-
"CURRENT_USER",
|
|
532
|
-
"DEFAULT",
|
|
533
|
-
"DEFERRABLE",
|
|
534
|
-
"DESC",
|
|
535
|
-
"DISTINCT",
|
|
536
|
-
"DO",
|
|
537
|
-
"ELSE",
|
|
538
|
-
"END",
|
|
539
|
-
"EXCEPT",
|
|
540
|
-
"EXISTS",
|
|
541
|
-
"EXTRACT",
|
|
542
|
-
"FALSE",
|
|
543
|
-
"FETCH",
|
|
544
|
-
"FOR",
|
|
545
|
-
"FOREIGN",
|
|
546
|
-
"FROM",
|
|
547
|
-
"GRANT",
|
|
548
|
-
"GROUP",
|
|
549
|
-
"HAVING",
|
|
550
|
-
"IF",
|
|
551
|
-
"ILIKE",
|
|
552
|
-
"IN",
|
|
553
|
-
"INITIALLY",
|
|
554
|
-
"INNER",
|
|
555
|
-
"INTERSECT",
|
|
556
|
-
"INTO",
|
|
557
|
-
"IS",
|
|
558
|
-
"ISNULL",
|
|
559
|
-
"JOIN",
|
|
560
|
-
"LATERAL",
|
|
561
|
-
"LEADING",
|
|
562
|
-
"LEFT",
|
|
563
|
-
"LIKE",
|
|
564
|
-
"LIMIT",
|
|
565
|
-
"LOCALTIME",
|
|
566
|
-
"LOCALTIMESTAMP",
|
|
567
|
-
"NATURAL",
|
|
568
|
-
"NOT",
|
|
569
|
-
"NOTNULL",
|
|
570
|
-
"NULL",
|
|
571
|
-
"OFFSET",
|
|
572
|
-
"ON",
|
|
573
|
-
"ONLY",
|
|
574
|
-
"OR",
|
|
575
|
-
"ORDER",
|
|
576
|
-
"OUTER",
|
|
577
|
-
"OVERLAPS",
|
|
578
|
-
"PLACING",
|
|
579
|
-
"PRIMARY",
|
|
580
|
-
"REFERENCES",
|
|
581
|
-
"RETURNING",
|
|
582
|
-
"RIGHT",
|
|
583
|
-
"ROW",
|
|
584
|
-
"SELECT",
|
|
585
|
-
"SESSION_USER",
|
|
586
|
-
"SIMILAR",
|
|
587
|
-
"SOME",
|
|
588
|
-
"SYMMETRIC",
|
|
589
|
-
"TABLE",
|
|
590
|
-
"THEN",
|
|
591
|
-
"TO",
|
|
592
|
-
"TRAILING",
|
|
593
|
-
"TRUE",
|
|
594
|
-
"UNION",
|
|
595
|
-
"UNIQUE",
|
|
596
|
-
"USING",
|
|
597
|
-
"VARIADIC",
|
|
598
|
-
"VERBOSE",
|
|
599
|
-
"WHEN",
|
|
600
|
-
"WHERE",
|
|
601
|
-
"WINDOW",
|
|
602
|
-
"WITH"
|
|
603
|
-
];
|
|
604
|
-
//#endregion
|
|
605
|
-
//#region src/validation/zod/duckdb-valid-names.schemas.ts
|
|
606
|
-
const duckdbMaximumObjectNameLength = 120;
|
|
607
|
-
const duckDbObjectNameRegex = /^[a-z_]\w*$/i;
|
|
608
|
-
const duckdbReservedKeywordsSet = new Set(duckdbReservedKeywords.map((k) => k.toUpperCase()));
|
|
609
|
-
const duckTableNameSchema = zod.string().min(1).max(duckdbMaximumObjectNameLength).regex(duckDbObjectNameRegex, "Table name must start with a letter or underscore, and contain only letters, numbers and underscores").refine((value) => !duckdbReservedKeywordsSet.has(value.toUpperCase()), { error: `Value is a DuckDB reserved keyword and cannot be used as a table name` });
|
|
610
|
-
const duckTableAliasSchema = duckTableNameSchema;
|
|
611
|
-
//#endregion
|
|
612
|
-
//#region src/manager/database/duck-database-manager.schemas.ts
|
|
613
|
-
const duckdbAttachOptionsSchema = zod.strictObject({
|
|
614
|
-
ACCESS_MODE: zod.optional(zod.enum([
|
|
615
|
-
"READ_ONLY",
|
|
616
|
-
"READ_WRITE",
|
|
617
|
-
"AUTOMATIC"
|
|
618
|
-
])),
|
|
619
|
-
COMPRESS: zod.optional(zod.enum(["true", "false"])),
|
|
620
|
-
TYPE: zod.optional(zod.enum(["DUCKDB", "SQLITE"])),
|
|
621
|
-
BLOCK_SIZE: zod.optional(zod.int32().min(16384).max(262144)),
|
|
622
|
-
ROW_GROUP_SIZE: zod.optional(zod.int32().positive()),
|
|
623
|
-
STORAGE_VERSION: zod.optional(zod.string().startsWith("v").regex(/^v?\d{1,4}\.\d{1,4}\.\d{1,4}$/)),
|
|
624
|
-
ENCRYPTION_KEY: zod.optional(zod.string().min(8)),
|
|
625
|
-
ENCRYPTION_CIPHER: zod.optional(zod.enum([
|
|
626
|
-
"CBC",
|
|
627
|
-
"CTR",
|
|
628
|
-
"GCM"
|
|
629
|
-
]))
|
|
630
|
-
});
|
|
631
|
-
const duckDatabaseManagerDbParamsSchema = zod.discriminatedUnion("type", [zod.strictObject({
|
|
632
|
-
type: zod.literal(":memory:"),
|
|
633
|
-
alias: duckTableAliasSchema,
|
|
634
|
-
options: zod.optional(duckdbAttachOptionsSchema)
|
|
635
|
-
}), zod.strictObject({
|
|
636
|
-
type: zod.literal("duckdb"),
|
|
637
|
-
path: zod.string().min(4).endsWith(".db"),
|
|
638
|
-
alias: duckTableAliasSchema,
|
|
639
|
-
options: zod.optional(duckdbAttachOptionsSchema)
|
|
640
|
-
})]);
|
|
641
|
-
//#endregion
|
|
642
|
-
//#region src/manager/database/commands/duck-database-attach-command.ts
|
|
643
|
-
var DuckDatabaseAttachCommand = class {
|
|
644
|
-
options;
|
|
645
|
-
dbParams;
|
|
646
|
-
constructor(dbParams, options) {
|
|
647
|
-
this.dbParams = dbParams;
|
|
648
|
-
this.options = options ?? {};
|
|
649
|
-
}
|
|
650
|
-
getRawSql = () => {
|
|
651
|
-
const dbParams = this.dbParams;
|
|
652
|
-
const parts = ["ATTACH", this.options.behaviour].filter(Boolean);
|
|
653
|
-
const { type, alias } = dbParams;
|
|
654
|
-
switch (type) {
|
|
655
|
-
case ":memory:":
|
|
656
|
-
parts.push("':memory:'");
|
|
657
|
-
break;
|
|
658
|
-
case "duckdb":
|
|
659
|
-
parts.push(`'${dbParams.path}'`);
|
|
660
|
-
break;
|
|
661
|
-
default: (0, _httpx_assert.assertNever)(type);
|
|
662
|
-
}
|
|
663
|
-
if (alias !== null) parts.push("AS", `${alias}`);
|
|
664
|
-
const options = (0, _httpx_plain_object.isPlainObject)(dbParams.options) ? Object.entries(dbParams.options).map(([key, value]) => {
|
|
665
|
-
return key === "ACCESS_MODE" ? value : `${key} '${value}'`;
|
|
666
|
-
}) : [];
|
|
667
|
-
if (options.length > 0) parts.push(`(${options.join(", ")})`);
|
|
668
|
-
return parts.filter(Boolean).join(" ");
|
|
669
|
-
};
|
|
670
|
-
};
|
|
671
|
-
//#endregion
|
|
672
|
-
//#region src/manager/database/duck-database-manager.ts
|
|
673
|
-
var DuckDatabaseManager = class {
|
|
674
|
-
#conn;
|
|
675
|
-
#logger;
|
|
676
|
-
constructor(conn, params) {
|
|
677
|
-
this.#conn = conn;
|
|
678
|
-
this.#logger = params?.logger ?? sqlduckDefaultLogtapeLogger.with({ source: "DuckDatabaseManager" });
|
|
679
|
-
}
|
|
680
|
-
/**
|
|
681
|
-
* Attach a database to the current connection
|
|
682
|
-
*
|
|
683
|
-
* @example
|
|
684
|
-
* ```typescript
|
|
685
|
-
* const dbManager = new DuckDatabaseManager(conn);
|
|
686
|
-
* const database = dbManager.attach({
|
|
687
|
-
* type: ':memory:', // can be 'duckdb', 's3'...
|
|
688
|
-
* alias: 'mydb',
|
|
689
|
-
* options: { COMPRESS: 'true' }
|
|
690
|
-
* });
|
|
691
|
-
*
|
|
692
|
-
* console.log(database.alias); // 'mydb'
|
|
693
|
-
* ```
|
|
694
|
-
*/
|
|
695
|
-
attach = async (dbParams, options) => {
|
|
696
|
-
const params = zod.parse(duckDatabaseManagerDbParamsSchema, dbParams);
|
|
697
|
-
const rawSql = new DuckDatabaseAttachCommand(params, options).getRawSql();
|
|
698
|
-
await this.#executeRawSqlCommand(`attach(${params.alias})`, rawSql);
|
|
699
|
-
return new Database({ alias: params.alias });
|
|
700
|
-
};
|
|
701
|
-
attachOrReplace = async (dbParams) => {
|
|
702
|
-
return this.attach(dbParams, { behaviour: "OR REPLACE" });
|
|
703
|
-
};
|
|
704
|
-
attachIfNotExists = async (dbParams) => {
|
|
705
|
-
return this.attach(dbParams, { behaviour: "IF NOT EXISTS" });
|
|
706
|
-
};
|
|
707
|
-
showDatabases = async () => {
|
|
708
|
-
return await this.#executeRawSqlCommand("showDatabases()", `SHOW DATABASES`);
|
|
709
|
-
};
|
|
710
|
-
detach = async (dbAlias) => {
|
|
711
|
-
const safeAlias = zod.parse(duckTableAliasSchema, dbAlias);
|
|
712
|
-
await this.#executeRawSqlCommand(`detach(${safeAlias})`, `DETACH ${safeAlias}`);
|
|
713
|
-
return true;
|
|
714
|
-
};
|
|
715
|
-
detachIfExists = async (dbAlias) => {
|
|
716
|
-
const safeAlias = zod.parse(duckTableAliasSchema, dbAlias);
|
|
717
|
-
await this.#executeRawSqlCommand(`detachIfExists(${safeAlias})`, `DETACH IF EXISTS ${safeAlias}`);
|
|
718
|
-
return true;
|
|
719
|
-
};
|
|
720
|
-
/**
|
|
721
|
-
* The statistics recomputed by the ANALYZE statement are only used for join order optimization.
|
|
722
|
-
*
|
|
723
|
-
* It is therefore recommended to recompute these statistics for improved join orders,
|
|
724
|
-
* especially after performing large updates (inserts and/or deletes).
|
|
725
|
-
*
|
|
726
|
-
* @link https://duckdb.org/docs/stable/sql/statements/analyze
|
|
727
|
-
*/
|
|
728
|
-
analyze = async () => {
|
|
729
|
-
await this.#executeRawSqlCommand("analyze()", "ANALYZE");
|
|
730
|
-
return true;
|
|
731
|
-
};
|
|
732
|
-
checkpoint = async (dbAlias) => {
|
|
733
|
-
const safeAlias = zod.parse(duckTableAliasSchema, dbAlias);
|
|
734
|
-
await this.#executeRawSqlCommand(`checkpoint(${safeAlias})`, `CHECKPOINT ${safeAlias}`);
|
|
735
|
-
return true;
|
|
736
|
-
};
|
|
737
|
-
#executeRawSqlCommand = async (name, rawSql) => {
|
|
738
|
-
const startTime = Date.now();
|
|
739
|
-
try {
|
|
740
|
-
const result = await this.#conn.runAndReadAll(rawSql);
|
|
741
|
-
const timeMs = Math.round(Date.now() - startTime);
|
|
742
|
-
const data = result.getRowObjectsJS();
|
|
743
|
-
this.#logger.info(`DuckDatabaseManager.${name} in ${timeMs}ms`, { timeMs });
|
|
744
|
-
return data;
|
|
745
|
-
} catch (e) {
|
|
746
|
-
const msg = `DuckDatabaseManager: failed to run "${name}" - ${e?.message ?? ""}`;
|
|
747
|
-
const timeMs = Math.round(Date.now() - startTime);
|
|
748
|
-
this.#logger.error(msg, {
|
|
749
|
-
name,
|
|
750
|
-
sql: rawSql,
|
|
751
|
-
timeMs
|
|
752
|
-
});
|
|
753
|
-
throw new Error(msg, { cause: e });
|
|
754
|
-
}
|
|
755
|
-
};
|
|
756
|
-
};
|
|
757
|
-
//#endregion
|
|
758
|
-
exports.Database = Database;
|
|
759
|
-
exports.DuckDatabaseManager = DuckDatabaseManager;
|
|
760
|
-
exports.DuckMemory = DuckMemory;
|
|
761
|
-
exports.SqlDuck = SqlDuck;
|
|
762
|
-
exports.Table = Table;
|
|
763
|
-
exports.duckDatabaseManagerDbParamsSchema = duckDatabaseManagerDbParamsSchema;
|
|
764
|
-
exports.duckTableAliasSchema = duckTableAliasSchema;
|
|
765
|
-
exports.duckTableNameSchema = duckTableNameSchema;
|
|
766
|
-
exports.duckdbReservedKeywords = duckdbReservedKeywords;
|
|
767
|
-
exports.flowbladeLogtapeSqlduckConfig = flowbladeLogtapeSqlduckConfig;
|
|
768
|
-
exports.getTableCreateFromZod = getTableCreateFromZod;
|
|
769
|
-
exports.sqlduckDefaultLogtapeLogger = sqlduckDefaultLogtapeLogger;
|
|
770
|
-
exports.zodCodecs = zodCodecs;
|