@flowblade/sqlduck 0.8.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,6 +54,72 @@ const queryResult = await dbDuckDbMemoryConn.query<{
54
54
  `);
55
55
  ```
56
56
 
57
+ ## Benchmarks
58
+
59
+ ### Node 24
60
+
61
+ ```
62
+ RUN v4.1.1 /home/sebastien/github/flowblade/packages/sqlduck
63
+
64
+
65
+ ✓ bench/appender.bench.ts > appender benches 66030ms
66
+ name hz min max mean p75 p99 p995 p999 rme samples
67
+ · duckdb appender, count: 1000000, chunk size 2048 0.0642 15,577.01 15,577.01 15,577.01 15,577.01 15,577.01 15,577.01 15,577.01 ±0.00% 1
68
+ · duckdb appender, count: 1000000, chunk size 1024 0.0579 17,263.44 17,263.44 17,263.44 17,263.44 17,263.44 17,263.44 17,263.44 ±0.00% 1
69
+
70
+ ✓ bench/stream.bench.ts > Bench stream 22923ms
71
+ name hz min max mean p75 p99 p995 p999 rme samples
72
+ · rowToColumnsChunk with chunkSize 2048 (count: 1000000) 1.2126 742.59 906.65 824.67 863.91 906.65 906.65 906.65 ±4.26% 10
73
+ · mapFakeRowStream with chunkSize 2048 (count: 1000000) 0.8049 1,123.18 1,498.68 1,242.43 1,257.91 1,498.68 1,498.68 1,498.68 ±6.40% 10
74
+
75
+ ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 615ms
76
+ name hz min max mean p75 p99 p995 p999 rme samples
77
+ · getTableCreateFromZod 16,562.93 0.0242 2.5902 0.0604 0.0734 0.2555 0.3741 0.8135 ±2.32% 8282
78
+
79
+ BENCH Summary
80
+
81
+ duckdb appender, count: 1000000, chunk size 2048 - bench/appender.bench.ts > appender benches
82
+ 1.11x faster than duckdb appender, count: 1000000, chunk size 1024
83
+
84
+ rowToColumnsChunk with chunkSize 2048 (count: 1000000) - bench/stream.bench.ts > Bench stream
85
+ 1.51x faster than mapFakeRowStream with chunkSize 2048 (count: 1000000)
86
+
87
+ getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
88
+
89
+ ```
90
+
91
+ ### Bun 1.3.11
92
+
93
+ ```
94
+ RUN v4.1.1 /home/sebastien/github/flowblade/packages/sqlduck
95
+
96
+
97
+ ✓ bench/appender.bench.ts > appender benches 36627ms
98
+ name hz min max mean p75 p99 p995 p999 rme samples
99
+ · duckdb appender, count: 1000000, chunk size 2048 0.1177 8,495.41 8,495.41 8,495.41 8,495.41 8,495.41 8,495.41 8,495.41 ±0.00% 1
100
+ · duckdb appender, count: 1000000, chunk size 1024 0.1064 9,397.97 9,397.97 9,397.97 9,397.97 9,397.97 9,397.97 9,397.97 ±0.00% 1
101
+
102
+ ✓ bench/stream.bench.ts > Bench stream 23421ms
103
+ name hz min max mean p75 p99 p995 p999 rme samples
104
+ · rowToColumnsChunk with chunkSize 2048 (count: 1000000) 1.1378 801.60 1,080.22 878.91 910.91 1,080.22 1,080.22 1,080.22 ±6.85% 10
105
+ · mapFakeRowStream with chunkSize 2048 (count: 1000000) 0.8118 1,130.36 1,448.99 1,231.78 1,268.45 1,448.99 1,448.99 1,448.99 ±5.34% 10
106
+
107
+ ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 622ms
108
+ name hz min max mean p75 p99 p995 p999 rme samples
109
+ · getTableCreateFromZod 22,447.94 0.0210 5.4621 0.0445 0.0442 0.1657 0.2167 2.5852 ±5.37% 11224
110
+
111
+ BENCH Summary
112
+
113
+ rowToColumnsChunk with chunkSize 2048 (count: 1000000) - bench/stream.bench.ts > Bench stream
114
+ 1.40x faster than mapFakeRowStream with chunkSize 2048 (count: 1000000)
115
+
116
+ getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
117
+
118
+ duckdb appender, count: 1000000, chunk size 2048 - bench/appender.bench.ts > appender benches
119
+ 1.11x faster than duckdb appender, count: 1000000, chunk size 1024
120
+
121
+ ```
122
+
57
123
  ### Local scripts
58
124
 
59
125
  | Name | Description |
package/dist/index.cjs CHANGED
@@ -21,9 +21,13 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
21
21
  enumerable: true
22
22
  }) : target, mod));
23
23
  //#endregion
24
+ let _logtape_logtape = require("@logtape/logtape");
24
25
  let _duckdb_node_api = require("@duckdb/node-api");
25
26
  let zod = require("zod");
26
27
  zod = __toESM(zod);
28
+ //#region src/config/flowblade-logtape-sqlduck.config.ts
29
+ const flowbladeLogtapeSqlduckConfig = { categories: ["flowblade", "sqlduck"] };
30
+ //#endregion
27
31
  //#region src/helpers/duck-exec.ts
28
32
  var DuckExec = class {
29
33
  #conn;
@@ -120,6 +124,9 @@ var DuckMemory = class {
120
124
  };
121
125
  };
122
126
  //#endregion
127
+ //#region src/logger/sqlduck-default-logtape-logger.ts
128
+ const sqlduckDefaultLogtapeLogger = (0, _logtape_logtape.getLogger)(flowbladeLogtapeSqlduckConfig.categories);
129
+ //#endregion
123
130
  //#region src/appender/data-appender-callback.ts
124
131
  const isOnDataAppendedAsyncCb = (v) => {
125
132
  return v.constructor.name === "AsyncFunction";
@@ -141,23 +148,66 @@ const createOnDataAppendedCollector = () => {
141
148
  };
142
149
  };
143
150
  //#endregion
151
+ //#region src/table/get-duckdb-number-column-type.ts
152
+ const isFloatValue = (value) => {
153
+ if (!Number.isFinite(value)) return true;
154
+ if (Math.abs(value) > Number.MAX_SAFE_INTEGER) return true;
155
+ return !Number.isInteger(value);
156
+ };
157
+ const getDuckdbNumberColumnType = (params) => {
158
+ const { minimum, maximum } = params;
159
+ if (minimum === void 0 || maximum === void 0) return _duckdb_node_api.BIGINT;
160
+ if (isFloatValue(minimum) || isFloatValue(maximum)) {
161
+ if (minimum >= -34028235e31 && maximum <= 34028235e31) return _duckdb_node_api.FLOAT;
162
+ return _duckdb_node_api.DOUBLE;
163
+ }
164
+ if (minimum >= 0) {
165
+ if (maximum <= 255) return _duckdb_node_api.UTINYINT;
166
+ if (maximum <= 65535) return _duckdb_node_api.USMALLINT;
167
+ if (maximum <= 4294967295) return _duckdb_node_api.UINTEGER;
168
+ if (maximum <= 18446744073709551615n) return _duckdb_node_api.UBIGINT;
169
+ return _duckdb_node_api.UHUGEINT;
170
+ }
171
+ if (minimum >= -128 && maximum <= 127) return _duckdb_node_api.TINYINT;
172
+ if (minimum >= -32768 && maximum <= 32767) return _duckdb_node_api.SMALLINT;
173
+ if (minimum >= -2147483648 && maximum <= 2147483647) return _duckdb_node_api.INTEGER;
174
+ if (minimum >= -9223372036854775808n && maximum <= 9223372036854775807n) return _duckdb_node_api.BIGINT;
175
+ return _duckdb_node_api.HUGEINT;
176
+ };
177
+ //#endregion
144
178
  //#region src/table/get-table-create-from-zod.ts
145
- const createMap = {
179
+ const createOptions = {
146
180
  CREATE: "CREATE TABLE",
147
181
  CREATE_OR_REPLACE: "CREATE OR REPLACE TABLE",
148
182
  IF_NOT_EXISTS: "CREATE TABLE IF NOT EXISTS"
149
183
  };
150
- const getTableCreateFromZod = (table, schema, options) => {
184
+ const duckDbTypes = [
185
+ ["VARCHAR", _duckdb_node_api.VARCHAR],
186
+ ["BIGINT", _duckdb_node_api.BIGINT],
187
+ ["TIMESTAMP", _duckdb_node_api.TIMESTAMP],
188
+ ["UUID", _duckdb_node_api.UUID],
189
+ ["BOOLEAN", _duckdb_node_api.BOOLEAN],
190
+ ["INTEGER", _duckdb_node_api.INTEGER],
191
+ ["DOUBLE", _duckdb_node_api.DOUBLE],
192
+ ["FLOAT", _duckdb_node_api.FLOAT]
193
+ ];
194
+ const duckDbTypesMap = new Map(duckDbTypes);
195
+ const getTableCreateFromZod = (params) => {
196
+ const { table, schema, options } = params;
151
197
  const { create = "CREATE" } = options ?? {};
152
198
  const fqTable = table.getFullName();
153
- const json = schema.toJSONSchema({ target: "openapi-3.0" });
199
+ const json = schema.toJSONSchema({
200
+ target: "openapi-3.0",
201
+ unrepresentable: "throw"
202
+ });
154
203
  const columns = [];
155
204
  if (json.properties === void 0) throw new TypeError("Schema must have at least one property");
156
- const columnTypes = [];
205
+ const columnTypesMap = /* @__PURE__ */ new Map();
157
206
  for (const [columnName, def] of Object.entries(json.properties)) {
158
- const { type, nullable, format, primaryKey } = def;
207
+ const { type, duckdbType, nullable, format, primaryKey, minimum, maximum } = def;
159
208
  const c = { name: columnName };
160
- switch (type) {
209
+ if (duckdbType !== void 0 && duckDbTypesMap.has(duckdbType)) c.duckdbType = duckDbTypesMap.get(duckdbType);
210
+ else switch (type) {
161
211
  case "string":
162
212
  switch (format) {
163
213
  case "date-time":
@@ -166,22 +216,37 @@ const getTableCreateFromZod = (table, schema, options) => {
166
216
  case "int64":
167
217
  c.duckdbType = _duckdb_node_api.BIGINT;
168
218
  break;
219
+ case "uuid":
220
+ c.duckdbType = _duckdb_node_api.UUID;
221
+ break;
169
222
  default: c.duckdbType = _duckdb_node_api.VARCHAR;
170
223
  }
171
224
  break;
172
225
  case "number":
173
- c.duckdbType = _duckdb_node_api.INTEGER;
226
+ c.duckdbType = getDuckdbNumberColumnType({
227
+ minimum,
228
+ maximum
229
+ });
174
230
  break;
175
- default: throw new Error("Not a supported type");
231
+ case "integer":
232
+ c.duckdbType = getDuckdbNumberColumnType({
233
+ minimum,
234
+ maximum
235
+ });
236
+ break;
237
+ case "boolean":
238
+ c.duckdbType = _duckdb_node_api.BOOLEAN;
239
+ break;
240
+ default: throw new Error(`Cannot guess '${columnName}' type - ${JSON.stringify(def)}`);
176
241
  }
177
242
  if (primaryKey === true) c.constraint = "PRIMARY KEY";
178
243
  else if (nullable !== true) c.constraint = "NOT NULL";
179
- columnTypes.push([columnName, c.duckdbType]);
244
+ columnTypesMap.set(columnName, c.duckdbType);
180
245
  columns.push(c);
181
246
  }
182
247
  return {
183
248
  ddl: [
184
- `${createMap[create]} ${fqTable} (\n`,
249
+ `${createOptions[create]} ${fqTable} (\n`,
185
250
  columns.map((colDDL) => {
186
251
  const { name, duckdbType, constraint } = colDDL;
187
252
  return ` ${[
@@ -192,17 +257,24 @@ const getTableCreateFromZod = (table, schema, options) => {
192
257
  }).join(",\n"),
193
258
  "\n)"
194
259
  ].join(""),
195
- columnTypes
260
+ columnTypes: columnTypesMap
196
261
  };
197
262
  };
198
263
  //#endregion
199
264
  //#region src/table/create-table-from-zod.ts
200
265
  const createTableFromZod = async (params) => {
201
- const { conn, table, schema, options } = params;
202
- const { ddl, columnTypes } = getTableCreateFromZod(table, schema, options);
266
+ const { conn, table, schema, options, logger = sqlduckDefaultLogtapeLogger } = params;
267
+ const { ddl, columnTypes } = getTableCreateFromZod({
268
+ table,
269
+ schema,
270
+ options
271
+ });
272
+ logger.debug(`Generate DDL for table '${table.getFullName()}'`, { ddl });
203
273
  try {
204
274
  await conn.run(ddl);
275
+ logger.info(`Table '${table.getFullName()}' successfully created`, { ddl });
205
276
  } catch (e) {
277
+ logger.error(`Failed to create table '${table.getFullName()}': ${e.message}`, { ddl });
206
278
  throw new Error(`Failed to create table '${table.getFullName()}': ${e.message}`, { cause: e });
207
279
  }
208
280
  return {
@@ -214,6 +286,7 @@ const createTableFromZod = async (params) => {
214
286
  //#region src/utils/rows-to-columns-chunks.ts
215
287
  const toDuckValue = (value) => {
216
288
  if (value instanceof Date) return new _duckdb_node_api.DuckDBTimestampValue(BigInt(value.getTime() * 1e3));
289
+ if (typeof value === "bigint") return value.toString(10);
217
290
  return value === void 0 ? null : value;
218
291
  };
219
292
  /**
@@ -225,7 +298,8 @@ const toDuckValue = (value) => {
225
298
  * input rows: [{id:'1',name:'A'}, {id:'2',name:'B'}, {id:'3',name:'C'}]
226
299
  * yields: [[['1','2'], ['A','B']], [['3'], ['C']]]
227
300
  */
228
- async function* rowsToColumnsChunks(rows, chunkSize) {
301
+ async function* rowsToColumnsChunks(params) {
302
+ const { rows, chunkSize } = params;
229
303
  if (!Number.isSafeInteger(chunkSize) || chunkSize <= 0) throw new Error(`chunkSize must be a positive integer, got ${chunkSize}`);
230
304
  const first = await rows.next();
231
305
  if (first.done) return;
@@ -257,12 +331,11 @@ var SqlDuck = class {
257
331
  #logger;
258
332
  constructor(params) {
259
333
  this.#duck = params.conn;
260
- this.#logger = params.logger;
334
+ this.#logger = params.logger ?? sqlduckDefaultLogtapeLogger;
261
335
  }
262
336
  /**
263
337
  * Create a table from a Zod schema and fill it with data from a row stream.
264
338
  *
265
- *
266
339
  * @example
267
340
  * ```typescript
268
341
  * import * as z from 'zod';
@@ -308,28 +381,45 @@ var SqlDuck = class {
308
381
  options: createOptions
309
382
  });
310
383
  const appender = await this.#duck.createAppender(table.tableName, table.schemaName, table.databaseName);
311
- const chunkTypes = columnTypes.map((v) => v[1]);
384
+ const chunkTypes = Array.from(columnTypes.values());
312
385
  let totalRows = 0;
313
386
  const dataAppendedCollector = createOnDataAppendedCollector();
314
- const columnStream = rowsToColumnsChunks(rowStream, chunkSize);
315
- for await (const dataChunk of columnStream) {
316
- const chunk = _duckdb_node_api.DuckDBDataChunk.create(chunkTypes);
317
- if (this.#logger) this.#logger(`Inserting chunk of ${dataChunk.length} rows`);
318
- totalRows += dataChunk?.[0]?.length ?? 0;
319
- chunk.setColumns(dataChunk);
320
- appender.appendDataChunk(chunk);
321
- appender.flushSync();
322
- if (onDataAppended !== void 0) {
323
- const payload = dataAppendedCollector(totalRows);
324
- if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
325
- else onDataAppended(payload);
387
+ const columnStream = rowsToColumnsChunks({
388
+ rows: rowStream,
389
+ chunkSize
390
+ });
391
+ try {
392
+ for await (const dataChunk of columnStream) {
393
+ const chunk = _duckdb_node_api.DuckDBDataChunk.create(chunkTypes);
394
+ this.#logger.debug(`Inserting chunk of ${dataChunk.length} rows`, { table: table.getFullName() });
395
+ totalRows += dataChunk?.[0]?.length ?? 0;
396
+ chunk.setColumns(dataChunk);
397
+ appender.appendDataChunk(chunk);
398
+ appender.flushSync();
399
+ if (onDataAppended !== void 0) {
400
+ const payload = dataAppendedCollector(totalRows);
401
+ if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
402
+ else onDataAppended(payload);
403
+ }
326
404
  }
405
+ appender.closeSync();
406
+ const timeMs = Math.round(Date.now() - timeStart);
407
+ this.#logger.info(`Successfully appended ${totalRows} rows into '${table.getFullName()}' in ${timeMs}ms`, {
408
+ table: table.getFullName(),
409
+ timeMs,
410
+ totalRows
411
+ });
412
+ return {
413
+ timeMs,
414
+ totalRows,
415
+ createTableDDL: ddl
416
+ };
417
+ } catch (e) {
418
+ appender.closeSync();
419
+ const msg = `Failed to append data into table '${table.getFullName()}' - ${e?.message ?? ""}`;
420
+ this.#logger.error(msg, { table: table.getFullName() });
421
+ throw new Error(msg, { cause: e });
327
422
  }
328
- return {
329
- timeMs: Math.round(Date.now() - timeStart),
330
- totalRows,
331
- createTableDDL: ddl
332
- };
333
423
  };
334
424
  };
335
425
  //#endregion
@@ -390,5 +480,7 @@ const zodCodecs = {
390
480
  exports.DuckMemory = DuckMemory;
391
481
  exports.SqlDuck = SqlDuck;
392
482
  exports.Table = Table;
483
+ exports.flowbladeLogtapeSqlduckConfig = flowbladeLogtapeSqlduckConfig;
393
484
  exports.getTableCreateFromZod = getTableCreateFromZod;
485
+ exports.sqlduckDefaultLogtapeLogger = sqlduckDefaultLogtapeLogger;
394
486
  exports.zodCodecs = zodCodecs;
package/dist/index.d.cts CHANGED
@@ -1,4 +1,6 @@
1
1
  import { DuckDBConnection, DuckDBType } from "@duckdb/node-api";
2
+ import * as _logtape_logtape0 from "@logtape/logtape";
3
+ import { Logger } from "@logtape/logtape";
2
4
  import * as z from "zod";
3
5
  import { ZodObject } from "zod";
4
6
 
@@ -21,6 +23,11 @@ type OnDataAppendedSyncCb = (stats: OnDataAppendedStats) => void;
21
23
  type OnDataAppendedAsyncCb = (stats: OnDataAppendedStats) => Promise<void>;
22
24
  type OnDataAppendedCb = OnDataAppendedSyncCb | OnDataAppendedAsyncCb;
23
25
  //#endregion
26
+ //#region src/config/flowblade-logtape-sqlduck.config.d.ts
27
+ declare const flowbladeLogtapeSqlduckConfig: {
28
+ categories: string[];
29
+ };
30
+ //#endregion
24
31
  //#region src/helpers/duck-memory.d.ts
25
32
  declare const duckMemoryTags: readonly ["BASE_TABLE", "HASH_TABLE", "PARQUET_READER", "CSV_READER", "ORDER_BY", "ART_INDEX", "COLUMN_DATA", "METADATA", "OVERFLOW_STRINGS", "IN_MEMORY_TABLE", "ALLOCATOR", "EXTENSION", "TRANSACTION", "EXTERNAL_FILE_CACHE", "WINDOW", "OBJECT_CACHE"];
26
33
  type DuckMemoryTag = (typeof duckMemoryTags)[number];
@@ -49,6 +56,9 @@ declare class DuckMemory {
49
56
  getSummary: () => Promise<DuckMemorySummary>;
50
57
  }
51
58
  //#endregion
59
+ //#region src/logger/sqlduck-default-logtape-logger.d.ts
60
+ declare const sqlduckDefaultLogtapeLogger: _logtape_logtape0.Logger;
61
+ //#endregion
52
62
  //#region src/table/table.d.ts
53
63
  /**
54
64
  * Fully qualified table information
@@ -76,24 +86,37 @@ declare class Table {
76
86
  withSchema: (schema: string) => Table;
77
87
  }
78
88
  //#endregion
89
+ //#region src/table/table-schema-zod.type.d.ts
90
+ type ZodSchemaSupportedTypes = z.ZodString | z.ZodNumber | z.ZodInt | z.ZodInt32 | z.ZodUInt32 | z.ZodBigInt | z.ZodBoolean | z.ZodDate | z.ZodISODateTime | z.ZodISOTime | z.ZodISODate | z.ZodEmail | z.ZodURL | z.ZodUUID | z.ZodCUID | z.ZodCUID2 | z.ZodULID;
91
+ type TableSchemaZod = z.ZodObject<Record<string, ZodSchemaSupportedTypes | z.ZodNullable<ZodSchemaSupportedTypes> | z.ZodCodec | z.ZodNullable<z.ZodCodec>>>;
92
+ //#endregion
79
93
  //#region src/table/get-table-create-from-zod.d.ts
80
94
  type TableCreateOptions = {
81
95
  create?: 'CREATE' | 'CREATE_OR_REPLACE' | 'IF_NOT_EXISTS';
82
96
  };
83
- declare const getTableCreateFromZod: <T extends ZodObject>(table: Table, schema: T, options?: TableCreateOptions) => {
97
+ type DuckdbColumnTypeMap<TKeys extends string> = Map<TKeys, DuckDBType>;
98
+ type TableCreateFromZodResult<TSchema extends TableSchemaZod> = {
84
99
  ddl: string;
85
- columnTypes: [name: string, type: DuckDBType][];
100
+ columnTypes: DuckdbColumnTypeMap<Exclude<keyof TSchema['shape'], symbol | number>>;
86
101
  };
87
- //#endregion
88
- //#region src/table/table-schema-zod.type.d.ts
89
- type TableSchemaZod = ZodObject;
102
+ type GetTableCreateFromZodParams<TSchema extends TableSchemaZod> = {
103
+ table: Table;
104
+ schema: TSchema;
105
+ options?: TableCreateOptions;
106
+ };
107
+ declare const getTableCreateFromZod: <TSchema extends TableSchemaZod>(params: GetTableCreateFromZodParams<TSchema>) => TableCreateFromZodResult<TSchema>;
90
108
  //#endregion
91
109
  //#region src/sql-duck.d.ts
92
110
  type SqlDuckParams = {
93
111
  conn: DuckDBConnection;
94
- logger?: (msg: string) => void;
112
+ /**
113
+ * Optional logtape/logger to use for logging.
114
+ * If not provided, a default logger will be used.
115
+ * @see {@link https://github.com/logtape/logtape}
116
+ */
117
+ logger?: Logger;
95
118
  };
96
- type RowStream<T> = AsyncIterableIterator<T>;
119
+ type RowStream<T> = AsyncIterableIterator<T> | AsyncGenerator<T> | Generator<T>;
97
120
  type ToTableParams<TSchema extends TableSchemaZod> = {
98
121
  /**
99
122
  * Used to create and fill the data into the table
@@ -142,7 +165,6 @@ declare class SqlDuck {
142
165
  /**
143
166
  * Create a table from a Zod schema and fill it with data from a row stream.
144
167
  *
145
- *
146
168
  * @example
147
169
  * ```typescript
148
170
  * import * as z from 'zod';
@@ -186,4 +208,4 @@ declare const zodCodecs: {
186
208
  readonly bigintToString: z.ZodCodec<z.ZodBigInt, z.ZodString>;
187
209
  };
188
210
  //#endregion
189
- export { DuckMemory, DuckMemoryTag, type OnDataAppendedCb, type OnDataAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, getTableCreateFromZod, zodCodecs };
211
+ export { DuckMemory, DuckMemoryTag, type OnDataAppendedCb, type OnDataAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
package/dist/index.d.mts CHANGED
@@ -1,3 +1,5 @@
1
+ import * as _logtape_logtape0 from "@logtape/logtape";
2
+ import { Logger } from "@logtape/logtape";
1
3
  import { DuckDBConnection, DuckDBType } from "@duckdb/node-api";
2
4
  import * as z from "zod";
3
5
  import { ZodObject } from "zod";
@@ -21,6 +23,11 @@ type OnDataAppendedSyncCb = (stats: OnDataAppendedStats) => void;
21
23
  type OnDataAppendedAsyncCb = (stats: OnDataAppendedStats) => Promise<void>;
22
24
  type OnDataAppendedCb = OnDataAppendedSyncCb | OnDataAppendedAsyncCb;
23
25
  //#endregion
26
+ //#region src/config/flowblade-logtape-sqlduck.config.d.ts
27
+ declare const flowbladeLogtapeSqlduckConfig: {
28
+ categories: string[];
29
+ };
30
+ //#endregion
24
31
  //#region src/helpers/duck-memory.d.ts
25
32
  declare const duckMemoryTags: readonly ["BASE_TABLE", "HASH_TABLE", "PARQUET_READER", "CSV_READER", "ORDER_BY", "ART_INDEX", "COLUMN_DATA", "METADATA", "OVERFLOW_STRINGS", "IN_MEMORY_TABLE", "ALLOCATOR", "EXTENSION", "TRANSACTION", "EXTERNAL_FILE_CACHE", "WINDOW", "OBJECT_CACHE"];
26
33
  type DuckMemoryTag = (typeof duckMemoryTags)[number];
@@ -49,6 +56,9 @@ declare class DuckMemory {
49
56
  getSummary: () => Promise<DuckMemorySummary>;
50
57
  }
51
58
  //#endregion
59
+ //#region src/logger/sqlduck-default-logtape-logger.d.ts
60
+ declare const sqlduckDefaultLogtapeLogger: _logtape_logtape0.Logger;
61
+ //#endregion
52
62
  //#region src/table/table.d.ts
53
63
  /**
54
64
  * Fully qualified table information
@@ -76,24 +86,37 @@ declare class Table {
76
86
  withSchema: (schema: string) => Table;
77
87
  }
78
88
  //#endregion
89
+ //#region src/table/table-schema-zod.type.d.ts
90
+ type ZodSchemaSupportedTypes = z.ZodString | z.ZodNumber | z.ZodInt | z.ZodInt32 | z.ZodUInt32 | z.ZodBigInt | z.ZodBoolean | z.ZodDate | z.ZodISODateTime | z.ZodISOTime | z.ZodISODate | z.ZodEmail | z.ZodURL | z.ZodUUID | z.ZodCUID | z.ZodCUID2 | z.ZodULID;
91
+ type TableSchemaZod = z.ZodObject<Record<string, ZodSchemaSupportedTypes | z.ZodNullable<ZodSchemaSupportedTypes> | z.ZodCodec | z.ZodNullable<z.ZodCodec>>>;
92
+ //#endregion
79
93
  //#region src/table/get-table-create-from-zod.d.ts
80
94
  type TableCreateOptions = {
81
95
  create?: 'CREATE' | 'CREATE_OR_REPLACE' | 'IF_NOT_EXISTS';
82
96
  };
83
- declare const getTableCreateFromZod: <T extends ZodObject>(table: Table, schema: T, options?: TableCreateOptions) => {
97
+ type DuckdbColumnTypeMap<TKeys extends string> = Map<TKeys, DuckDBType>;
98
+ type TableCreateFromZodResult<TSchema extends TableSchemaZod> = {
84
99
  ddl: string;
85
- columnTypes: [name: string, type: DuckDBType][];
100
+ columnTypes: DuckdbColumnTypeMap<Exclude<keyof TSchema['shape'], symbol | number>>;
86
101
  };
87
- //#endregion
88
- //#region src/table/table-schema-zod.type.d.ts
89
- type TableSchemaZod = ZodObject;
102
+ type GetTableCreateFromZodParams<TSchema extends TableSchemaZod> = {
103
+ table: Table;
104
+ schema: TSchema;
105
+ options?: TableCreateOptions;
106
+ };
107
+ declare const getTableCreateFromZod: <TSchema extends TableSchemaZod>(params: GetTableCreateFromZodParams<TSchema>) => TableCreateFromZodResult<TSchema>;
90
108
  //#endregion
91
109
  //#region src/sql-duck.d.ts
92
110
  type SqlDuckParams = {
93
111
  conn: DuckDBConnection;
94
- logger?: (msg: string) => void;
112
+ /**
113
+ * Optional logtape/logger to use for logging.
114
+ * If not provided, a default logger will be used.
115
+ * @see {@link https://github.com/logtape/logtape}
116
+ */
117
+ logger?: Logger;
95
118
  };
96
- type RowStream<T> = AsyncIterableIterator<T>;
119
+ type RowStream<T> = AsyncIterableIterator<T> | AsyncGenerator<T> | Generator<T>;
97
120
  type ToTableParams<TSchema extends TableSchemaZod> = {
98
121
  /**
99
122
  * Used to create and fill the data into the table
@@ -142,7 +165,6 @@ declare class SqlDuck {
142
165
  /**
143
166
  * Create a table from a Zod schema and fill it with data from a row stream.
144
167
  *
145
- *
146
168
  * @example
147
169
  * ```typescript
148
170
  * import * as z from 'zod';
@@ -186,4 +208,4 @@ declare const zodCodecs: {
186
208
  readonly bigintToString: z.ZodCodec<z.ZodBigInt, z.ZodString>;
187
209
  };
188
210
  //#endregion
189
- export { DuckMemory, type DuckMemoryTag, type OnDataAppendedCb, type OnDataAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, getTableCreateFromZod, zodCodecs };
211
+ export { DuckMemory, type DuckMemoryTag, type OnDataAppendedCb, type OnDataAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
package/dist/index.mjs CHANGED
@@ -1,5 +1,9 @@
1
- import { BIGINT, DuckDBDataChunk, DuckDBTimestampValue, INTEGER, TIMESTAMP, VARCHAR } from "@duckdb/node-api";
1
+ import { getLogger } from "@logtape/logtape";
2
+ import { BIGINT, BOOLEAN, DOUBLE, DuckDBDataChunk, DuckDBTimestampValue, FLOAT, HUGEINT, INTEGER, SMALLINT, TIMESTAMP, TINYINT, UBIGINT, UHUGEINT, UINTEGER, USMALLINT, UTINYINT, UUID, VARCHAR } from "@duckdb/node-api";
2
3
  import * as z from "zod";
4
+ //#region src/config/flowblade-logtape-sqlduck.config.ts
5
+ const flowbladeLogtapeSqlduckConfig = { categories: ["flowblade", "sqlduck"] };
6
+ //#endregion
3
7
  //#region src/helpers/duck-exec.ts
4
8
  var DuckExec = class {
5
9
  #conn;
@@ -96,6 +100,9 @@ var DuckMemory = class {
96
100
  };
97
101
  };
98
102
  //#endregion
103
+ //#region src/logger/sqlduck-default-logtape-logger.ts
104
+ const sqlduckDefaultLogtapeLogger = getLogger(flowbladeLogtapeSqlduckConfig.categories);
105
+ //#endregion
99
106
  //#region src/appender/data-appender-callback.ts
100
107
  const isOnDataAppendedAsyncCb = (v) => {
101
108
  return v.constructor.name === "AsyncFunction";
@@ -117,23 +124,66 @@ const createOnDataAppendedCollector = () => {
117
124
  };
118
125
  };
119
126
  //#endregion
127
+ //#region src/table/get-duckdb-number-column-type.ts
128
+ const isFloatValue = (value) => {
129
+ if (!Number.isFinite(value)) return true;
130
+ if (Math.abs(value) > Number.MAX_SAFE_INTEGER) return true;
131
+ return !Number.isInteger(value);
132
+ };
133
+ const getDuckdbNumberColumnType = (params) => {
134
+ const { minimum, maximum } = params;
135
+ if (minimum === void 0 || maximum === void 0) return BIGINT;
136
+ if (isFloatValue(minimum) || isFloatValue(maximum)) {
137
+ if (minimum >= -34028235e31 && maximum <= 34028235e31) return FLOAT;
138
+ return DOUBLE;
139
+ }
140
+ if (minimum >= 0) {
141
+ if (maximum <= 255) return UTINYINT;
142
+ if (maximum <= 65535) return USMALLINT;
143
+ if (maximum <= 4294967295) return UINTEGER;
144
+ if (maximum <= 18446744073709551615n) return UBIGINT;
145
+ return UHUGEINT;
146
+ }
147
+ if (minimum >= -128 && maximum <= 127) return TINYINT;
148
+ if (minimum >= -32768 && maximum <= 32767) return SMALLINT;
149
+ if (minimum >= -2147483648 && maximum <= 2147483647) return INTEGER;
150
+ if (minimum >= -9223372036854775808n && maximum <= 9223372036854775807n) return BIGINT;
151
+ return HUGEINT;
152
+ };
153
+ //#endregion
120
154
  //#region src/table/get-table-create-from-zod.ts
121
- const createMap = {
155
+ const createOptions = {
122
156
  CREATE: "CREATE TABLE",
123
157
  CREATE_OR_REPLACE: "CREATE OR REPLACE TABLE",
124
158
  IF_NOT_EXISTS: "CREATE TABLE IF NOT EXISTS"
125
159
  };
126
- const getTableCreateFromZod = (table, schema, options) => {
160
+ const duckDbTypes = [
161
+ ["VARCHAR", VARCHAR],
162
+ ["BIGINT", BIGINT],
163
+ ["TIMESTAMP", TIMESTAMP],
164
+ ["UUID", UUID],
165
+ ["BOOLEAN", BOOLEAN],
166
+ ["INTEGER", INTEGER],
167
+ ["DOUBLE", DOUBLE],
168
+ ["FLOAT", FLOAT]
169
+ ];
170
+ const duckDbTypesMap = new Map(duckDbTypes);
171
+ const getTableCreateFromZod = (params) => {
172
+ const { table, schema, options } = params;
127
173
  const { create = "CREATE" } = options ?? {};
128
174
  const fqTable = table.getFullName();
129
- const json = schema.toJSONSchema({ target: "openapi-3.0" });
175
+ const json = schema.toJSONSchema({
176
+ target: "openapi-3.0",
177
+ unrepresentable: "throw"
178
+ });
130
179
  const columns = [];
131
180
  if (json.properties === void 0) throw new TypeError("Schema must have at least one property");
132
- const columnTypes = [];
181
+ const columnTypesMap = /* @__PURE__ */ new Map();
133
182
  for (const [columnName, def] of Object.entries(json.properties)) {
134
- const { type, nullable, format, primaryKey } = def;
183
+ const { type, duckdbType, nullable, format, primaryKey, minimum, maximum } = def;
135
184
  const c = { name: columnName };
136
- switch (type) {
185
+ if (duckdbType !== void 0 && duckDbTypesMap.has(duckdbType)) c.duckdbType = duckDbTypesMap.get(duckdbType);
186
+ else switch (type) {
137
187
  case "string":
138
188
  switch (format) {
139
189
  case "date-time":
@@ -142,22 +192,37 @@ const getTableCreateFromZod = (table, schema, options) => {
142
192
  case "int64":
143
193
  c.duckdbType = BIGINT;
144
194
  break;
195
+ case "uuid":
196
+ c.duckdbType = UUID;
197
+ break;
145
198
  default: c.duckdbType = VARCHAR;
146
199
  }
147
200
  break;
148
201
  case "number":
149
- c.duckdbType = INTEGER;
202
+ c.duckdbType = getDuckdbNumberColumnType({
203
+ minimum,
204
+ maximum
205
+ });
150
206
  break;
151
- default: throw new Error("Not a supported type");
207
+ case "integer":
208
+ c.duckdbType = getDuckdbNumberColumnType({
209
+ minimum,
210
+ maximum
211
+ });
212
+ break;
213
+ case "boolean":
214
+ c.duckdbType = BOOLEAN;
215
+ break;
216
+ default: throw new Error(`Cannot guess '${columnName}' type - ${JSON.stringify(def)}`);
152
217
  }
153
218
  if (primaryKey === true) c.constraint = "PRIMARY KEY";
154
219
  else if (nullable !== true) c.constraint = "NOT NULL";
155
- columnTypes.push([columnName, c.duckdbType]);
220
+ columnTypesMap.set(columnName, c.duckdbType);
156
221
  columns.push(c);
157
222
  }
158
223
  return {
159
224
  ddl: [
160
- `${createMap[create]} ${fqTable} (\n`,
225
+ `${createOptions[create]} ${fqTable} (\n`,
161
226
  columns.map((colDDL) => {
162
227
  const { name, duckdbType, constraint } = colDDL;
163
228
  return ` ${[
@@ -168,17 +233,24 @@ const getTableCreateFromZod = (table, schema, options) => {
168
233
  }).join(",\n"),
169
234
  "\n)"
170
235
  ].join(""),
171
- columnTypes
236
+ columnTypes: columnTypesMap
172
237
  };
173
238
  };
174
239
  //#endregion
175
240
  //#region src/table/create-table-from-zod.ts
176
241
  const createTableFromZod = async (params) => {
177
- const { conn, table, schema, options } = params;
178
- const { ddl, columnTypes } = getTableCreateFromZod(table, schema, options);
242
+ const { conn, table, schema, options, logger = sqlduckDefaultLogtapeLogger } = params;
243
+ const { ddl, columnTypes } = getTableCreateFromZod({
244
+ table,
245
+ schema,
246
+ options
247
+ });
248
+ logger.debug(`Generate DDL for table '${table.getFullName()}'`, { ddl });
179
249
  try {
180
250
  await conn.run(ddl);
251
+ logger.info(`Table '${table.getFullName()}' successfully created`, { ddl });
181
252
  } catch (e) {
253
+ logger.error(`Failed to create table '${table.getFullName()}': ${e.message}`, { ddl });
182
254
  throw new Error(`Failed to create table '${table.getFullName()}': ${e.message}`, { cause: e });
183
255
  }
184
256
  return {
@@ -190,6 +262,7 @@ const createTableFromZod = async (params) => {
190
262
  //#region src/utils/rows-to-columns-chunks.ts
191
263
  const toDuckValue = (value) => {
192
264
  if (value instanceof Date) return new DuckDBTimestampValue(BigInt(value.getTime() * 1e3));
265
+ if (typeof value === "bigint") return value.toString(10);
193
266
  return value === void 0 ? null : value;
194
267
  };
195
268
  /**
@@ -201,7 +274,8 @@ const toDuckValue = (value) => {
201
274
  * input rows: [{id:'1',name:'A'}, {id:'2',name:'B'}, {id:'3',name:'C'}]
202
275
  * yields: [[['1','2'], ['A','B']], [['3'], ['C']]]
203
276
  */
204
- async function* rowsToColumnsChunks(rows, chunkSize) {
277
+ async function* rowsToColumnsChunks(params) {
278
+ const { rows, chunkSize } = params;
205
279
  if (!Number.isSafeInteger(chunkSize) || chunkSize <= 0) throw new Error(`chunkSize must be a positive integer, got ${chunkSize}`);
206
280
  const first = await rows.next();
207
281
  if (first.done) return;
@@ -233,12 +307,11 @@ var SqlDuck = class {
233
307
  #logger;
234
308
  constructor(params) {
235
309
  this.#duck = params.conn;
236
- this.#logger = params.logger;
310
+ this.#logger = params.logger ?? sqlduckDefaultLogtapeLogger;
237
311
  }
238
312
  /**
239
313
  * Create a table from a Zod schema and fill it with data from a row stream.
240
314
  *
241
- *
242
315
  * @example
243
316
  * ```typescript
244
317
  * import * as z from 'zod';
@@ -284,28 +357,45 @@ var SqlDuck = class {
284
357
  options: createOptions
285
358
  });
286
359
  const appender = await this.#duck.createAppender(table.tableName, table.schemaName, table.databaseName);
287
- const chunkTypes = columnTypes.map((v) => v[1]);
360
+ const chunkTypes = Array.from(columnTypes.values());
288
361
  let totalRows = 0;
289
362
  const dataAppendedCollector = createOnDataAppendedCollector();
290
- const columnStream = rowsToColumnsChunks(rowStream, chunkSize);
291
- for await (const dataChunk of columnStream) {
292
- const chunk = DuckDBDataChunk.create(chunkTypes);
293
- if (this.#logger) this.#logger(`Inserting chunk of ${dataChunk.length} rows`);
294
- totalRows += dataChunk?.[0]?.length ?? 0;
295
- chunk.setColumns(dataChunk);
296
- appender.appendDataChunk(chunk);
297
- appender.flushSync();
298
- if (onDataAppended !== void 0) {
299
- const payload = dataAppendedCollector(totalRows);
300
- if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
301
- else onDataAppended(payload);
363
+ const columnStream = rowsToColumnsChunks({
364
+ rows: rowStream,
365
+ chunkSize
366
+ });
367
+ try {
368
+ for await (const dataChunk of columnStream) {
369
+ const chunk = DuckDBDataChunk.create(chunkTypes);
370
+ this.#logger.debug(`Inserting chunk of ${dataChunk.length} rows`, { table: table.getFullName() });
371
+ totalRows += dataChunk?.[0]?.length ?? 0;
372
+ chunk.setColumns(dataChunk);
373
+ appender.appendDataChunk(chunk);
374
+ appender.flushSync();
375
+ if (onDataAppended !== void 0) {
376
+ const payload = dataAppendedCollector(totalRows);
377
+ if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
378
+ else onDataAppended(payload);
379
+ }
302
380
  }
381
+ appender.closeSync();
382
+ const timeMs = Math.round(Date.now() - timeStart);
383
+ this.#logger.info(`Successfully appended ${totalRows} rows into '${table.getFullName()}' in ${timeMs}ms`, {
384
+ table: table.getFullName(),
385
+ timeMs,
386
+ totalRows
387
+ });
388
+ return {
389
+ timeMs,
390
+ totalRows,
391
+ createTableDDL: ddl
392
+ };
393
+ } catch (e) {
394
+ appender.closeSync();
395
+ const msg = `Failed to append data into table '${table.getFullName()}' - ${e?.message ?? ""}`;
396
+ this.#logger.error(msg, { table: table.getFullName() });
397
+ throw new Error(msg, { cause: e });
303
398
  }
304
- return {
305
- timeMs: Math.round(Date.now() - timeStart),
306
- totalRows,
307
- createTableDDL: ddl
308
- };
309
399
  };
310
400
  };
311
401
  //#endregion
@@ -363,4 +453,4 @@ const zodCodecs = {
363
453
  })
364
454
  };
365
455
  //#endregion
366
- export { DuckMemory, SqlDuck, Table, getTableCreateFromZod, zodCodecs };
456
+ export { DuckMemory, SqlDuck, Table, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flowblade/sqlduck",
3
- "version": "0.8.3",
3
+ "version": "0.10.0",
4
4
  "type": "module",
5
5
  "sideEffects": false,
6
6
  "exports": {
@@ -35,8 +35,8 @@
35
35
  "docgen-typedoc": "rimraf ./docs/api && typedoc --plugin typedoc-plugin-markdown --out ./docs/api",
36
36
  "bench": "vitest bench --run",
37
37
  "bench-bun": "bun --bun run vitest bench --run",
38
- "bench-mitata": "node --experimental-strip-types --expose-gc ./bench/stream.mitata.ts ",
39
- "bench-mitata-bun": "bun --expose-gc ./bench/stream.mitata.ts ",
38
+ "bench-mitata": "node --experimental-strip-types --expose-gc bench/run_mitata.ts",
39
+ "bench-mitata-bun": "bun --expose-gc bench/run_mitata.ts",
40
40
  "bench-codspeed": "cross-env CODSPEED=1 vitest bench --run",
41
41
  "bench-watch": "vitest bench",
42
42
  "test": "vitest run",
@@ -57,6 +57,7 @@
57
57
  "@flowblade/core": "^0.2.26",
58
58
  "@flowblade/source-duckdb": "^0.20.1",
59
59
  "@flowblade/sql-tag": "^0.3.2",
60
+ "@logtape/logtape": "2.0.4",
60
61
  "@standard-schema/spec": "^1.1.0",
61
62
  "p-mutex": "^1.0.0",
62
63
  "valibot": "^1.3.1",
@@ -67,7 +68,7 @@
67
68
  },
68
69
  "devDependencies": {
69
70
  "@belgattitude/eslint-config-bases": "8.10.0",
70
- "@dotenvx/dotenvx": "1.55.1",
71
+ "@dotenvx/dotenvx": "1.57.2",
71
72
  "@duckdb/node-api": "1.5.1-r.1",
72
73
  "@faker-js/faker": "10.3.0",
73
74
  "@flowblade/source-kysely": "^1.3.0",