@flowblade/sqlduck 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -58,7 +58,7 @@ const result = await sqlDuck.toTable({
58
58
  rowStream: getUsers(), // The async iterable that yields rows
59
59
  // 👇Optional:
60
60
  chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
61
- onDataAppended: ({ timeMs, totalRows, rowsPerSecond }) => {
61
+ onChunkAppended: ({ timeMs, totalRows, rowsPerSecond }) => {
62
62
  console.log(
63
63
  `Appended ${totalRows} in time ${timeMs}ms, est: ${rowsPerSecond} rows/s`
64
64
  );
@@ -105,8 +105,8 @@ const result = sqlDuck.toTable({
105
105
  rowStream: getUserRows(), // The async iterable that yields rows
106
106
  // 👇Optional:
107
107
  chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
108
- onDataAppended: ({ total }) => {
109
- console.log(`Appended ${total} rows so far`);
108
+ onChunkAppended: ({ totalRows }) => {
109
+ console.log(`Appended ${totalRows} rows so far`);
110
110
  },
111
111
  // Optional table creation options
112
112
  createOptions: {
@@ -131,67 +131,72 @@ const queryResult = await dbDuckDbMemoryConn.query<{
131
131
  ### Node 24
132
132
 
133
133
  ```
134
- RUN v4.1.1 /home/sebastien/github/flowblade/packages/sqlduck
135
134
 
135
+ RUN v4.1.4 /home/sebastien/github/flowblade/packages/sqlduck
136
136
 
137
- ✓ bench/appender.bench.ts > appender benches 4157ms
138
- name hz min max mean p75 p99 p995 p999 rme samples
139
- · duckdb appender memory, count: 100000, chunk size 2048 2.6950 357.08 385.05 371.06 385.05 385.05 385.05 385.05 ±47.90% 2
140
- · duckdb appender file, count: 100000, chunk size 2048 1.4218 703.35 703.35 703.35 703.35 703.35 703.35 703.35 ±0.00% 1
141
- · duckdb appender, count: 100000, chunk size 1024 2.5157 391.12 403.89 397.50 403.89 403.89 403.89 403.89 ±20.41% 2
142
137
 
143
- ✓ bench/stream.bench.ts > Bench stream 2809ms
144
- name hz min max mean p75 p99 p995 p999 rme samples
145
- · rowToColumnsChunk with chunkSize 2048 (count: 100000) 9.2627 87.7271 151.56 107.96 116.92 151.56 151.56 151.56 ±15.98% 10
146
- · mapFakeRowStream with chunkSize 2048 (count: 100000) 7.1479 125.04 168.13 139.90 152.97 168.13 168.13 168.13 ±7.78% 10
138
+ ✓ bench/appender.bench.ts > appender benches 2910ms
139
+ name hz min max mean p75 p99 p995 p999 rme samples
140
+ · duckdb appender memory, count: 100000, chunk size 2048 3.5446 265.91 298.32 282.12 298.32 298.32 298.32 298.32 ±72.99% 2
141
+ · duckdb appender file, count: 100000, chunk size 2048 2.6130 355.30 410.10 382.70 410.10 410.10 410.10 410.10 ±91.00% 2
142
+ · duckdb appender, count: 100000, chunk size 1024 3.8027 226.52 299.42 262.97 299.42 299.42 299.42 299.42 ±176.17% 2
143
+
144
+ ✓ bench/stream.bench.ts > Bench rowsToColumnsChunks 2998ms
145
+ name hz min max mean p75 p99 p995 p999 rme samples
146
+ · rowToColumnsChunk with chunkSize 2048 (count: 100000) 11.3182 60.6404 190.56 88.3532 79.4999 190.56 190.56 190.56 ±30.64% 10
147
+ · rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) 13.4430 63.9716 102.73 74.3883 75.2151 102.73 102.73 102.73 ±10.40% 10
148
+ · mapFakeRowStream with chunkSize 2048 (count: 100000) 10.7785 84.3040 117.57 92.7773 97.1595 117.57 117.57 117.57 ±7.61% 10
147
149
 
148
- ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 614ms
150
+ ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 615ms
149
151
  name hz min max mean p75 p99 p995 p999 rme samples
150
- · getTableCreateFromZod 18,899.24 0.0334 5.3351 0.0529 0.0546 0.1943 0.3087 0.7214 ±2.72% 9450
152
+ · getTableCreateFromZod 27,048.71 0.0239 3.2029 0.0370 0.0404 0.1310 0.1980 0.5674 ±2.08% 13525
151
153
 
152
154
  BENCH Summary
153
155
 
154
- duckdb appender memory, count: 100000, chunk size 2048 - bench/appender.bench.ts > appender benches
155
- 1.07x faster than duckdb appender, count: 100000, chunk size 1024
156
- 1.90x faster than duckdb appender file, count: 100000, chunk size 2048
156
+ duckdb appender, count: 100000, chunk size 1024 - bench/appender.bench.ts > appender benches
157
+ 1.07x faster than duckdb appender memory, count: 100000, chunk size 2048
158
+ 1.46x faster than duckdb appender file, count: 100000, chunk size 2048
157
159
 
158
- rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench stream
159
- 1.30x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
160
+ rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench rowsToColumnsChunks
161
+ 1.19x faster than rowToColumnsChunk with chunkSize 2048 (count: 100000)
162
+ 1.25x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
160
163
 
161
164
  getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
162
165
  ```
163
166
 
164
- ### Bun 1.3.11
167
+ ### Bun 1.3.12
165
168
 
166
169
  ```
167
- RUN v4.1.1 /home/sebastien/github/flowblade/packages/sqlduck
170
+ RUN v4.1.4 /home/sebastien/github/flowblade/packages/sqlduck
168
171
 
169
172
 
170
- ✓ bench/appender.bench.ts > appender benches 4159ms
173
+ ✓ bench/appender.bench.ts > appender benches 2811ms
171
174
  name hz min max mean p75 p99 p995 p999 rme samples
172
- · duckdb appender memory, count: 100000, chunk size 2048 2.6465 375.34 380.38 377.86 380.38 380.38 380.38 380.38 ±8.48% 2
173
- · duckdb appender file, count: 100000, chunk size 2048 1.5016 665.98 665.98 665.98 665.98 665.98 665.98 665.98 ±0.00% 1
174
- · duckdb appender, count: 100000, chunk size 1024 2.2828 413.11 463.01 438.06 463.01 463.01 463.01 463.01 ±72.39% 2
175
+ · duckdb appender memory, count: 100000, chunk size 2048 3.9242 224.75 285.38 254.83 285.38 285.38 285.38 285.38 ±29.56% 3
176
+ · duckdb appender file, count: 100000, chunk size 2048 3.8209 256.09 267.34 261.72 267.34 267.34 267.34 267.34 ±27.31% 2
177
+ · duckdb appender, count: 100000, chunk size 1024 4.6118 196.77 234.22 216.84 234.22 234.22 234.22 234.22 ±21.62% 3
175
178
 
176
- ✓ bench/stream.bench.ts > Bench stream 2690ms
177
- name hz min max mean p75 p99 p995 p999 rme samples
178
- · rowToColumnsChunk with chunkSize 2048 (count: 100000) 9.5675 95.6610 114.11 104.52 107.75 114.11 114.11 114.11 ±3.46% 10
179
- · mapFakeRowStream with chunkSize 2048 (count: 100000) 7.6895 117.83 138.26 130.05 137.51 138.26 138.26 138.26 ±4.05% 10
179
+ ✓ bench/stream.bench.ts > Bench rowsToColumnsChunks 2667ms
180
+ name hz min max mean p75 p99 p995 p999 rme samples
181
+ · rowToColumnsChunk with chunkSize 2048 (count: 100000) 14.4994 57.7717 81.8690 68.9683 78.5234 81.8690 81.8690 81.8690 ±9.43% 10
182
+ · rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) 13.2052 68.7139 96.8400 75.7275 79.4380 96.8400 96.8400 96.8400 ±8.22% 10
183
+ · mapFakeRowStream with chunkSize 2048 (count: 100000) 12.7827 73.9127 85.0696 78.2310 82.5510 85.0696 85.0696 85.0696 ±3.69% 10
180
184
 
181
- ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 629ms
185
+ ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 624ms
182
186
  name hz min max mean p75 p99 p995 p999 rme samples
183
- · getTableCreateFromZod 18,892.06 0.0281 7.5844 0.0529 0.0516 0.1893 0.2477 3.2823 ±6.26% 9447
187
+ · getTableCreateFromZod 28,477.04 0.0191 6.4836 0.0351 0.0335 0.1071 0.1530 2.4823 ±6.21% 14239
184
188
 
185
189
  BENCH Summary
186
190
 
187
- rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench stream
188
- 1.24x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
191
+ rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench rowsToColumnsChunks
192
+ 1.10x faster than rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000)
193
+ 1.13x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
189
194
 
190
195
  getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
191
196
 
192
- duckdb appender memory, count: 100000, chunk size 2048 - bench/appender.bench.ts > appender benches
193
- 1.16x faster than duckdb appender, count: 100000, chunk size 1024
194
- 1.76x faster than duckdb appender file, count: 100000, chunk size 2048
197
+ duckdb appender, count: 100000, chunk size 1024 - bench/appender.bench.ts > appender benches
198
+ 1.18x faster than duckdb appender memory, count: 100000, chunk size 2048
199
+ 1.21x faster than duckdb appender file, count: 100000, chunk size 2048
195
200
 
196
201
  ```
197
202
 
package/dist/index.d.mts CHANGED
@@ -7,7 +7,7 @@ import * as z from "zod";
7
7
  import { ZodObject } from "zod";
8
8
 
9
9
  //#region src/appender/data-appender-callback.d.ts
10
- type OnDataAppendedStats = {
10
+ type OnChunkAppendedStats = {
11
11
  /**
12
12
  * Total number of rows appended so far (all batches included)
13
13
  */
@@ -21,9 +21,9 @@ type OnDataAppendedStats = {
21
21
  */
22
22
  rowsPerSecond: number;
23
23
  };
24
- type OnDataAppendedSyncCb = (stats: OnDataAppendedStats) => void;
25
- type OnDataAppendedAsyncCb = (stats: OnDataAppendedStats) => Promise<void>;
26
- type OnDataAppendedCb = OnDataAppendedSyncCb | OnDataAppendedAsyncCb;
24
+ type OnChunkAppendedSyncCb = (stats: OnChunkAppendedStats) => void;
25
+ type OnChunkAppendedAsyncCb = (stats: OnChunkAppendedStats) => Promise<void>;
26
+ type OnChunkAppendedCb = OnChunkAppendedSyncCb | OnChunkAppendedAsyncCb;
27
27
  //#endregion
28
28
  //#region src/helpers/duck-memory.d.ts
29
29
  declare const duckMemoryTags: readonly ["BASE_TABLE", "HASH_TABLE", "PARQUET_READER", "CSV_READER", "ORDER_BY", "ART_INDEX", "COLUMN_DATA", "METADATA", "OVERFLOW_STRINGS", "IN_MEMORY_TABLE", "ALLOCATOR", "EXTENSION", "TRANSACTION", "EXTERNAL_FILE_CACHE", "WINDOW", "OBJECT_CACHE"];
@@ -81,7 +81,7 @@ declare class Table {
81
81
  }
82
82
  //#endregion
83
83
  //#region src/table/table-schema-zod.type.d.ts
84
- type ZodSchemaSupportedTypes = z.ZodString | z.ZodNumber | z.ZodInt | z.ZodInt32 | z.ZodUInt32 | z.ZodBigInt | z.ZodBoolean | z.ZodDate | z.ZodISODateTime | z.ZodISOTime | z.ZodISODate | z.ZodEmail | z.ZodURL | z.ZodUUID | z.ZodCUID | z.ZodCUID2 | z.ZodULID;
84
+ type ZodSchemaSupportedTypes = z.ZodString | z.ZodNumber | z.ZodInt | z.ZodInt32 | z.ZodUInt32 | z.ZodBigInt | z.ZodBoolean | z.ZodDate | z.ZodISODateTime | z.ZodISOTime | z.ZodISODate | z.ZodEmail | z.ZodURL | z.ZodUUID | z.ZodCUID | z.ZodCUID2 | z.ZodULID | z.ZodEnum;
85
85
  type TableSchemaZod = z.ZodObject<Record<string, ZodSchemaSupportedTypes | z.ZodNullable<ZodSchemaSupportedTypes> | z.ZodCodec | z.ZodNullable<z.ZodCodec>>>;
86
86
  //#endregion
87
87
  //#region src/table/get-table-create-from-zod.d.ts
@@ -113,41 +113,65 @@ type SqlDuckParams = {
113
113
  type RowStream<T> = AsyncIterableIterator<T> | AsyncGenerator<T> | Generator<T>;
114
114
  type ToTableParams<TSchema extends TableSchemaZod> = {
115
115
  /**
116
- * Used to create and fill the data into the table
116
+ * The target table where the data will be inserted.
117
+ * This object contains the table name and optionally the schema and database name.
117
118
  */
118
119
  table: Table;
119
120
  /**
120
- * Schema describing the table structure and rowStream content
121
+ * A Zod schema that defines the structure of the table and the expected format of the rows in the `rowStream`.
122
+ * The schema is used to generate the `CREATE TABLE` DDL and to convert row values to DuckDB types.
121
123
  */
122
124
  schema: TSchema;
123
125
  /**
124
- * Stream of rows to insert into the table
126
+ * An iterable (async or sync) or generator that yields rows to be inserted.
127
+ * Each row must match the structure defined in the `schema`.
125
128
  */
126
129
  rowStream: RowStream<z.infer<TSchema>>;
127
130
  /**
128
- * Chunk size when using appender to insert data.
129
- * Valid numbers between 1 and 2048.
131
+ * The number of rows to accumulate before appending them to the DuckDB table as a single data chunk.
132
+ * Tuning this value can impact memory usage and insertion performance.
133
+ * Valid values are between 1 and 2048.
130
134
  * @default 2048
131
135
  */
132
136
  chunkSize?: number;
133
137
  /**
134
- * Extra options when creating the table
138
+ * Configuration options for the `CREATE TABLE` statement (e.g., `IF NOT EXISTS`, `CREATE OR REPLACE`).
139
+ * If omitted, a standard `CREATE TABLE` statement is used.
135
140
  */
136
141
  createOptions?: TableCreateOptions;
137
142
  /**
138
- * Callback called each time a datachunk is appended to the table
143
+ * An optional callback invoked after each data chunk is successfully appended to the table.
144
+ * Useful for tracking progress, logging statistics, or implementing custom hooks during the insertion process.
139
145
  */
140
- onDataAppended?: OnDataAppendedCb;
146
+ onChunkAppended?: OnChunkAppendedCb;
141
147
  /**
142
- * Automatically checkpoint the table after all chunks have been appended.
148
+ * Specifies the frequency (in number of chunks) at which the `onChunkAppended` callback should be triggered.
149
+ *
150
+ * For example, if `chunkSize` is 2048 and `onChunkAppendedFrequency` is 5,
151
+ * the callback will be called every 10,240 rows (5 chunks * 2048 rows/chunk).
152
+ *
153
+ * @default 1
154
+ */
155
+ onChunkAppendedFrequency?: number;
156
+ /**
157
+ * Specifies the frequency (in number of chunks) at which the `appender.flushSync()` should be called.
158
+ * Calling `flushSync()` can help to clear internal buffers and make the data visible.
159
+ *
160
+ * For example, if `chunkSize` is 2048 and `flushSyncFrequency` is 5,
161
+ * the appender will be flushed every 10,240 rows (5 chunks * 2048 rows/chunk).
162
+ */
163
+ flushSyncFrequency?: number;
164
+ /**
165
+ * If set to `true`, a checkpoint is automatically performed after all rows from the `rowStream` have been processed.
166
+ * This ensures that all data is persisted and WAL is cleared.
143
167
  * @default true
144
168
  */
145
169
  autoCheckpoint?: boolean;
146
170
  /**
147
- * Checkpoint the table after 'n' chunks have been appended
171
+ * Specifies the frequency (in number of chunks) at which a checkpoint should be triggered.
148
172
  *
149
- * For example if the chunkSize is 2048, setting frequency to 2
150
- * will checkpoint the table every 4096 rows (2x chunksize)
173
+ * For example, if `chunkSize` is 2048 and `checkpointChunksFrequency` is 5,
174
+ * a checkpoint will occur every 10,240 rows (5 chunks * 2048 rows/chunk).
151
175
  */
152
176
  checkpointChunksFrequency?: number;
153
177
  };
@@ -193,9 +217,13 @@ declare class SqlDuck {
193
217
  * schema: userSchema,
194
218
  * rowStream: getUserRows(),
195
219
  * chunkSize: 2048,
196
- * onDataAppended: ({ total }) => {
197
- * console.log(`Appended ${total} rows so far`);
220
+ * flushSyncFrequency: 10, // flush after every 10 chunks
221
+ * onChunkAppendedFrequency: 1, // multiple of chunks
222
+ * onChunkAppended: ({ totalRows }) => {
223
+ * console.log(`Appended ${totalRows} rows so far`);
198
224
  * },
225
+ * autoCheckpoint: true,
226
+ * checkpointChunksFrequency: 100, // checkpoint after every 100 chunks
199
227
  * createOptions: {
200
228
  * create: 'CREATE_OR_REPLACE',
201
229
  * },
@@ -302,4 +330,4 @@ declare const flowbladeLogtapeSqlduckConfig: {
302
330
  //#region src/logger/sqlduck-default-logtape-logger.d.ts
303
331
  declare const sqlduckDefaultLogtapeLogger: _$_logtape_logtape0.Logger;
304
332
  //#endregion
305
- export { Database, type DuckConnectionParams, DuckDatabaseManager, DuckMemory, type DuckMemoryTag, type DuckdbReservedKeywords, type OnDataAppendedCb, type OnDataAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, duckReservedKeywords, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
333
+ export { Database, type DuckConnectionParams, DuckDatabaseManager, DuckMemory, type DuckMemoryTag, type DuckdbReservedKeywords, type OnChunkAppendedCb, type OnChunkAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, duckReservedKeywords, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
package/dist/index.mjs CHANGED
@@ -1,6 +1,6 @@
1
1
  import { t as duckReservedKeywords } from "./duck-reserved-keywords-B8XUjnaY.mjs";
2
2
  import { n as assertValidAliasName, o as duckConnectionParamsZodSchema, s as duckValidatorsZod } from "./zod-CuPjTLv8.mjs";
3
- import { BIGINT, BOOLEAN, DOUBLE, DuckDBDataChunk, DuckDBInstanceCache, DuckDBTimestampValue, FLOAT, HUGEINT, INTEGER, SMALLINT, TIMESTAMP, TINYINT, UBIGINT, UHUGEINT, UINTEGER, USMALLINT, UTINYINT, UUID, VARCHAR } from "@duckdb/node-api";
3
+ import { BIGINT, BOOLEAN, DOUBLE, DuckDBDataChunk, DuckDBInstanceCache, DuckDBTimestampMillisecondsValue, DuckDBTypeId, ENUM, FLOAT, HUGEINT, INTEGER, SMALLINT, TIMESTAMP, TIMESTAMP_MS, TINYINT, UBIGINT, UHUGEINT, UINTEGER, USMALLINT, UTINYINT, UUID, VARCHAR } from "@duckdb/node-api";
4
4
  import { getLogger } from "@logtape/logtape";
5
5
  import fs from "node:fs";
6
6
  import { basename, dirname } from "node:path";
@@ -104,10 +104,12 @@ var DuckMemory = class {
104
104
  };
105
105
  //#endregion
106
106
  //#region src/appender/data-appender-callback.ts
107
- const isOnDataAppendedAsyncCb = (v) => {
108
- return v.constructor.name === "AsyncFunction";
107
+ const isOnChunkAppendedAsyncCb = (v) => {
108
+ return v.constructor.name === "AsyncFunction" || v.constructor === (async () => {
109
+ await Promise.resolve();
110
+ }).constructor;
109
111
  };
110
- const createOnDataAppendedCollector = () => {
112
+ const createOnChunkAppendedCollector = () => {
111
113
  let lastCallbackTimeStart = Date.now();
112
114
  let appendedTotalRows = 0;
113
115
  return (currentTotalRows) => {
@@ -124,6 +126,110 @@ const createOnDataAppendedCollector = () => {
124
126
  };
125
127
  };
126
128
  //#endregion
129
+ //#region src/converter/duck-value-converter.ts
130
+ const stringTimestampRegexp = /^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d{3,6})?Z?$/i;
131
+ const dateRegexp = /^\d{4}-\d{2}-\d{2}$/;
132
+ const createDuckValueConverterTypeError = (params) => {
133
+ let serializableValue;
134
+ try {
135
+ serializableValue = JSON.stringify(params.value);
136
+ } catch {
137
+ serializableValue = "<unserializable>";
138
+ }
139
+ return /* @__PURE__ */ new TypeError(`[DuckValueConverter.${params.method}]: Unsupported type ${typeof params.value} with value ${serializableValue}`);
140
+ };
141
+ var DuckValueConverter = class {
142
+ toUUID = (value) => {
143
+ if (typeof value === "bigint") return value;
144
+ else if (typeof value === "string") return BigInt("0x" + value.replaceAll("-", ""));
145
+ if (value === void 0 || value === null) return null;
146
+ throw createDuckValueConverterTypeError({
147
+ method: "toUUID",
148
+ value
149
+ });
150
+ };
151
+ toStringEnum = (value) => {
152
+ if (typeof value === "string") return value;
153
+ if (value === void 0 || value === null) return null;
154
+ throw createDuckValueConverterTypeError({
155
+ method: "toStringEnum",
156
+ value
157
+ });
158
+ };
159
+ toBigIntString = (value) => {
160
+ if (typeof value === "string") return value;
161
+ if (typeof value === "number" || typeof value === "bigint") return value.toString(10);
162
+ if (value === void 0 || value === null) return null;
163
+ throw createDuckValueConverterTypeError({
164
+ method: "toBigIntString",
165
+ value
166
+ });
167
+ };
168
+ toTimestampMs = (value) => {
169
+ if (value instanceof Date) return new DuckDBTimestampMillisecondsValue(BigInt(value.getTime()));
170
+ if (value === void 0 || value === null) return null;
171
+ if (typeof value === "string") {
172
+ const len = value.length;
173
+ if (len > 18 && len < 31 && stringTimestampRegexp.test(value)) {
174
+ const date = /* @__PURE__ */ new Date(value + (value.endsWith("Z") ? "" : "Z"));
175
+ return new DuckDBTimestampMillisecondsValue(BigInt(date.getTime()));
176
+ }
177
+ if (len === 10 && dateRegexp.test(value)) {
178
+ const date = /* @__PURE__ */ new Date(value + "T00:00:00Z");
179
+ return new DuckDBTimestampMillisecondsValue(BigInt(date.getTime()));
180
+ }
181
+ }
182
+ if (typeof value === "bigint") return new DuckDBTimestampMillisecondsValue(value);
183
+ if (typeof value === "number") return new DuckDBTimestampMillisecondsValue(BigInt(value));
184
+ throw createDuckValueConverterTypeError({
185
+ method: "toTimestampMs",
186
+ value
187
+ });
188
+ };
189
+ };
190
+ //#endregion
191
+ //#region src/converter/create-duck-column-converters.ts
192
+ const createDuckColumnConverters = (duckTypes) => {
193
+ const convMap = {};
194
+ const converter = new DuckValueConverter();
195
+ for (const [key, duckType] of Object.entries(duckTypes)) {
196
+ let conv;
197
+ const duckTypeId = duckType.typeId;
198
+ switch (duckTypeId) {
199
+ case DuckDBTypeId.TIMESTAMP_MS:
200
+ conv = converter.toTimestampMs;
201
+ break;
202
+ case DuckDBTypeId.BIGINT:
203
+ case DuckDBTypeId.UBIGINT:
204
+ case DuckDBTypeId.HUGEINT:
205
+ case DuckDBTypeId.UHUGEINT:
206
+ case DuckDBTypeId.INTEGER:
207
+ case DuckDBTypeId.UINTEGER:
208
+ case DuckDBTypeId.BIGNUM:
209
+ conv = converter.toBigIntString;
210
+ break;
211
+ case DuckDBTypeId.ENUM:
212
+ conv = converter.toStringEnum;
213
+ break;
214
+ case DuckDBTypeId.UUID:
215
+ conv = converter.toUUID;
216
+ break;
217
+ case DuckDBTypeId.BIT:
218
+ case DuckDBTypeId.BOOLEAN:
219
+ case DuckDBTypeId.TINYINT:
220
+ case DuckDBTypeId.USMALLINT:
221
+ case DuckDBTypeId.UTINYINT:
222
+ case DuckDBTypeId.VARCHAR:
223
+ case DuckDBTypeId.SMALLINT:
224
+ conv = false;
225
+ break;
226
+ default: throw new Error(`Unsupported duck type ${duckTypeId} / ${duckType.toString()} for column '${key}'`);
227
+ }
228
+ if (conv !== false) convMap[key] = conv;
229
+ }
230
+ return convMap;
231
+ };
232
+ //#endregion
127
233
  //#region src/config/flowblade-logtape-sqlduck.config.ts
128
234
  const flowbladeLogtapeSqlduckConfig = { categories: ["flowblade", "sqlduck"] };
129
235
  //#endregion
@@ -442,7 +548,10 @@ const createOptions = {
442
548
  const duckDbTypesMap = new Map([
443
549
  ["VARCHAR", VARCHAR],
444
550
  ["BIGINT", BIGINT],
551
+ ["UBIGINT", UBIGINT],
552
+ ["HUGEINT", HUGEINT],
445
553
  ["TIMESTAMP", TIMESTAMP],
554
+ ["TIMESTAMP_MS", TIMESTAMP_MS],
446
555
  ["UUID", UUID],
447
556
  ["BOOLEAN", BOOLEAN],
448
557
  ["INTEGER", INTEGER],
@@ -466,9 +575,10 @@ const getTableCreateFromZod = (params) => {
466
575
  if (duckdbType !== void 0 && duckDbTypesMap.has(duckdbType)) c.duckdbType = duckDbTypesMap.get(duckdbType);
467
576
  else switch (type) {
468
577
  case "string":
469
- switch (format) {
578
+ if (Array.isArray(def.enum)) c.duckdbType = ENUM(def.enum);
579
+ else switch (format) {
470
580
  case "date-time":
471
- c.duckdbType = TIMESTAMP;
581
+ c.duckdbType = TIMESTAMP_MS;
472
582
  break;
473
583
  case "int64":
474
584
  c.duckdbType = BIGINT;
@@ -541,41 +651,90 @@ const createTableFromZod = async (params) => {
541
651
  };
542
652
  //#endregion
543
653
  //#region src/utils/rows-to-columns-chunks.ts
544
- const toDuckValue = (value) => {
545
- if (value instanceof Date) return new DuckDBTimestampValue(BigInt(value.getTime() * 1e3));
546
- if (typeof value === "bigint") return value.toString(10);
547
- return value === void 0 ? null : value;
548
- };
549
654
  /**
550
- * Similar to `rowsToColumns` but yields results in chunks to avoid buffering
551
- * the entire dataset in memory. Each yielded item is a columns array for up to
552
- * `chunkSize` rows.
655
+ * Converts a stream of rows (row-oriented) into a stream of column-oriented chunks.
656
+ *
657
+ * This function processes row data incrementally using an async generator, which prevents
658
+ * loading the entire dataset into memory. Each yielded chunk is an object where keys are
659
+ * column names and values are arrays of up to `chunkSize` elements.
660
+ *
661
+ * This is particularly useful for DuckDB's Appender API or other columnar processing
662
+ * engines that expect data in chunks of columns.
663
+ *
664
+ * @param params - Configuration for the transformation.
665
+ * @param params.rows - An async or sync iterable of rows.
666
+ * @param params.chunkSize - The maximum number of rows per yielded chunk. Must be a positive integer.
667
+ * @param params.transformers - Optional mappers for specific columns to transform values before chunking.
553
668
  *
554
- * Example for chunkSize = 2:
555
- * input rows: [{id:'1',name:'A'}, {id:'2',name:'B'}, {id:'3',name:'C'}]
556
- * yields: [[['1','2'], ['A','B']], [['3'], ['C']]]
669
+ * @returns An async iterator yielding chunks of column-oriented data.
670
+ *
671
+ * @example
672
+ * ```typescript
673
+ * async function* generateRows() {
674
+ * yield { id: 1, name: 'A' };
675
+ * yield { id: 2, name: 'B' };
676
+ * yield { id: 3, name: 'C' };
677
+ * }
678
+ *
679
+ * const columnChunks = rowsToColumnsChunks({
680
+ * rows: generateRows(),
681
+ * chunkSize: 2,
682
+ * })
683
+ *
684
+ * for await (const chunk of columnChunks) {
685
+ * console.log(chunk);
686
+ * }
687
+ * // Output:
688
+ * // { id: [1, 2], name: ['A', 'B'] } // first chunk
689
+ * // { id: [3], name: ['C'] } // second chunk
690
+ * ```
557
691
  */
558
692
  async function* rowsToColumnsChunks(params) {
559
- const { rows, chunkSize } = params;
693
+ const { rows, chunkSize, transformers } = params;
560
694
  if (!Number.isSafeInteger(chunkSize) || chunkSize <= 0) throw new Error(`chunkSize must be a positive integer, got ${chunkSize}`);
561
695
  const first = await rows.next();
562
696
  if (first.done) return;
563
697
  const keys = Object.keys(first.value);
564
- let columns = keys.map(() => []);
698
+ const numKeys = keys.length;
699
+ const mappers = new Array(numKeys);
700
+ if (transformers !== void 0) {
701
+ const unknownKeys = Object.keys(transformers).filter((k) => !keys.includes(k));
702
+ if (unknownKeys.length > 0) throw new Error(`transformers parameter contains unknown row ids: ${unknownKeys.join(", ")}`);
703
+ for (let i = 0; i < numKeys; i++) mappers[i] = transformers[keys[i]];
704
+ }
705
+ function createColumns() {
706
+ const obj = {};
707
+ for (let i = 0; i < numKeys; i++) {
708
+ const k = keys[i];
709
+ obj[k] = [];
710
+ }
711
+ return obj;
712
+ }
713
+ let columns = createColumns();
565
714
  let rowsInChunk = 0;
566
- keys.forEach((k, i) => columns[i].push(toDuckValue(first.value[k])));
715
+ for (let i = 0; i < numKeys; i++) {
716
+ const k = keys[i];
717
+ const fn = mappers[i];
718
+ const val = first.value[k];
719
+ columns[k].push(fn === void 0 ? val : fn(val));
720
+ }
567
721
  rowsInChunk++;
568
722
  if (rowsInChunk >= chunkSize) {
569
723
  yield columns;
570
- columns = keys.map(() => []);
724
+ columns = createColumns();
571
725
  rowsInChunk = 0;
572
726
  }
573
727
  for await (const row of rows) {
574
- keys.forEach((k, i) => columns[i].push(toDuckValue(row[k])));
728
+ for (let i = 0; i < numKeys; i++) {
729
+ const k = keys[i];
730
+ const fn = mappers[i];
731
+ const val = row[k];
732
+ columns[k].push(fn === void 0 ? val : fn(val));
733
+ }
575
734
  rowsInChunk++;
576
735
  if (rowsInChunk >= chunkSize) {
577
736
  yield columns;
578
- columns = keys.map(() => []);
737
+ columns = createColumns();
579
738
  rowsInChunk = 0;
580
739
  }
581
740
  }
@@ -615,9 +774,13 @@ var SqlDuck = class {
615
774
  * schema: userSchema,
616
775
  * rowStream: getUserRows(),
617
776
  * chunkSize: 2048,
618
- * onDataAppended: ({ total }) => {
619
- * console.log(`Appended ${total} rows so far`);
777
+ * flushSyncFrequency: 10, // flush after every 10 chunks
778
+ * onChunkAppendedFrequency: 1, // multiple of chunks
779
+ * onChunkAppended: ({ totalRows }) => {
780
+ * console.log(`Appended ${totalRows} rows so far`);
620
781
  * },
782
+ * autoCheckpoint: true,
783
+ * checkpointChunksFrequency: 100, // checkpoint after every 100 chunks
621
784
  * createOptions: {
622
785
  * create: 'CREATE_OR_REPLACE',
623
786
  * },
@@ -628,11 +791,13 @@ var SqlDuck = class {
628
791
  * ```
629
792
  */
630
793
  toTable = async (params) => {
631
- const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended, autoCheckpoint = true, checkpointChunksFrequency = 10 } = params;
794
+ const { table, schema, chunkSize = 2048, rowStream, createOptions, onChunkAppended, onChunkAppendedFrequency, flushSyncFrequency = 10, autoCheckpoint = true, checkpointChunksFrequency } = params;
632
795
  if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
633
796
  if (autoCheckpoint && typeof table.databaseName !== "string") throw new Error("autoCheckpoint requires table.databaseName to be provided.");
634
- if (checkpointChunksFrequency && typeof table.databaseName !== "string") throw new Error("checkpointChunksFrequency requires table.databaseName to be provided.");
635
- if (checkpointChunksFrequency !== void 0 && checkpointChunksFrequency < 1) throw new Error("checkpointChunksFrequency must be a positive number.");
797
+ if (checkpointChunksFrequency !== void 0 && typeof table.databaseName !== "string") throw new Error("checkpointChunksFrequency requires table.databaseName to be provided.");
798
+ if (checkpointChunksFrequency !== void 0 && (checkpointChunksFrequency < 1 || checkpointChunksFrequency > 1e5)) throw new Error("checkpointChunksFrequency must be a number between 1 and 100_000.");
799
+ if (onChunkAppendedFrequency !== void 0 && (onChunkAppendedFrequency < 1 || onChunkAppendedFrequency > 1e5)) throw new Error("onChunkAppendedFrequency must be a number between 1 and 100_000.");
800
+ if (flushSyncFrequency !== void 0 && (flushSyncFrequency < 1 || flushSyncFrequency > 1e5)) throw new Error("flushSyncFrequency must be a number between 1 and 100_000.");
636
801
  const dbManager = new DuckDatabaseManager(this.#conn);
637
802
  const timeStart = Date.now();
638
803
  const { columnTypes, ddl } = await createTableFromZod({
@@ -643,42 +808,56 @@ var SqlDuck = class {
643
808
  });
644
809
  const appender = await this.#conn.createAppender(table.tableName, table.schemaName, table.databaseName);
645
810
  const chunkTypes = Array.from(columnTypes.values());
811
+ const columnTypeIds = {};
812
+ const columnKeys = [];
813
+ for (const [key, duckType] of columnTypes) {
814
+ columnKeys.push(key);
815
+ columnTypeIds[key] = duckType;
816
+ }
817
+ const numColumns = columnKeys.length;
818
+ const transformers = createDuckColumnConverters(columnTypeIds);
646
819
  let totalRows = 0;
647
- const dataAppendedCollector = createOnDataAppendedCollector();
820
+ const chunkAppendedCollector = createOnChunkAppendedCollector();
648
821
  const columnStream = rowsToColumnsChunks({
649
822
  rows: rowStream,
650
- chunkSize
823
+ chunkSize,
824
+ transformers
651
825
  });
652
826
  let appendedChunkCount = 0;
827
+ const tableFullName = table.getFullName();
828
+ const tableName = table.tableName;
653
829
  try {
830
+ const isAsyncCb = onChunkAppended !== void 0 && isOnChunkAppendedAsyncCb(onChunkAppended);
654
831
  for await (const dataChunk of columnStream) {
655
832
  const chunk = DuckDBDataChunk.create(chunkTypes);
656
- this.#logger.debug(`Inserting chunk of ${dataChunk.length} rows`, { table: table.getFullName() });
657
- totalRows += dataChunk?.[0]?.length ?? 0;
658
- chunk.setColumns(dataChunk);
833
+ const columns = new Array(numColumns);
834
+ for (let i = 0; i < numColumns; i++) columns[i] = dataChunk[columnKeys[i]];
835
+ totalRows += columns[0]?.length ?? 0;
836
+ chunk.setColumns(columns);
659
837
  appender.appendDataChunk(chunk);
660
- appender.flushSync();
661
838
  appendedChunkCount += 1;
662
- if (onDataAppended !== void 0) {
663
- const payload = dataAppendedCollector(totalRows);
664
- if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
665
- else onDataAppended(payload);
839
+ if (onChunkAppended !== void 0 && (onChunkAppendedFrequency === void 0 || appendedChunkCount % onChunkAppendedFrequency === 0)) {
840
+ const payload = chunkAppendedCollector(totalRows);
841
+ if (isAsyncCb) await onChunkAppended(payload);
842
+ else onChunkAppended(payload);
666
843
  }
844
+ if (flushSyncFrequency !== void 0 && appendedChunkCount % flushSyncFrequency === 0) appender.flushSync();
667
845
  if (checkpointChunksFrequency !== void 0 && appendedChunkCount % checkpointChunksFrequency === 0 && typeof table.databaseName === "string") try {
668
846
  await dbManager.checkpoint(table.databaseName);
669
847
  } catch (e) {
670
- this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending chunk into table '${table.getFullName()}' - ${e?.message ?? ""}`, { table: table.getFullName() });
848
+ this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending chunk into table '${tableName}' - ${e?.message ?? ""}`, { table: tableFullName });
671
849
  }
672
850
  }
851
+ appender.flushSync();
673
852
  appender.closeSync();
674
853
  if (autoCheckpoint && typeof table.databaseName === "string") try {
675
854
  await dbManager.checkpoint(table.databaseName);
676
855
  } catch (e) {
677
- this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending data into table '${table.getFullName()}' - ${e?.message ?? ""}`, { table: table.getFullName() });
856
+ this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending data into table '${tableName}' - ${e?.message ?? ""}`, { table: tableFullName });
678
857
  }
679
858
  const timeMs = Math.round(Date.now() - timeStart);
680
859
  this.#logger.info(`Successfully appended ${totalRows} rows into '${table.getFullName()}' in ${timeMs}ms`, {
681
- table: table.getFullName(),
860
+ table: tableFullName,
682
861
  timeMs,
683
862
  totalRows
684
863
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flowblade/sqlduck",
3
- "version": "0.15.0",
3
+ "version": "0.17.0",
4
4
  "type": "module",
5
5
  "sideEffects": false,
6
6
  "exports": {
@@ -57,7 +57,7 @@
57
57
  },
58
58
  "dependencies": {
59
59
  "@flowblade/core": "^0.2.26",
60
- "@flowblade/source-duckdb": "^0.20.1",
60
+ "@flowblade/source-duckdb": "^0.20.2",
61
61
  "@flowblade/sql-tag": "^0.3.2",
62
62
  "@httpx/assert": "^0.16.9",
63
63
  "@httpx/dsn-parser": "^1.9.9",
@@ -65,11 +65,11 @@
65
65
  "@logtape/logtape": "^2.0.5",
66
66
  "@standard-schema/spec": "^1.1.0",
67
67
  "is-safe-filename": "0.1.1",
68
- "p-queue": "9.1.1",
68
+ "p-queue": "9.1.2",
69
69
  "zod": "^4.3.6"
70
70
  },
71
71
  "peerDependencies": {
72
- "@duckdb/node-api": "^1.5.0-r.1",
72
+ "@duckdb/node-api": "^1.5.1-r.2",
73
73
  "valibot": "^1.3.1"
74
74
  },
75
75
  "peerDependenciesMeta": {
@@ -79,22 +79,22 @@
79
79
  },
80
80
  "devDependencies": {
81
81
  "@belgattitude/eslint-config-bases": "8.12.0",
82
- "@dotenvx/dotenvx": "1.59.1",
83
- "@duckdb/node-api": "1.5.1-r.1",
82
+ "@dotenvx/dotenvx": "1.61.0",
83
+ "@duckdb/node-api": "1.5.1-r.2",
84
84
  "@faker-js/faker": "10.4.0",
85
85
  "@flowblade/source-kysely": "^1.3.0",
86
86
  "@httpx/assert": "0.16.9",
87
87
  "@mitata/counters": "0.0.8",
88
88
  "@size-limit/esbuild": "12.0.1",
89
89
  "@size-limit/file": "12.0.1",
90
- "@testcontainers/mssqlserver": "11.13.0",
90
+ "@testcontainers/mssqlserver": "11.14.0",
91
91
  "@total-typescript/ts-reset": "0.6.1",
92
92
  "@types/node": "25.5.2",
93
93
  "@typescript-eslint/eslint-plugin": "8.58.1",
94
94
  "@typescript-eslint/parser": "8.58.1",
95
- "@typescript/native-preview": "7.0.0-dev.20260406.1",
96
- "@vitest/coverage-v8": "4.1.3",
97
- "@vitest/ui": "4.1.3",
95
+ "@typescript/native-preview": "7.0.0-dev.20260410.1",
96
+ "@vitest/coverage-v8": "4.1.4",
97
+ "@vitest/ui": "4.1.4",
98
98
  "ansis": "4.2.0",
99
99
  "browserslist-to-esbuild": "2.1.1",
100
100
  "core-js": "3.49.0",
@@ -105,10 +105,10 @@
105
105
  "eslint": "8.57.1",
106
106
  "execa": "9.6.1",
107
107
  "is-in-ci": "2.0.0",
108
- "kysely": "0.28.15",
108
+ "kysely": "0.28.16",
109
109
  "mitata": "1.0.34",
110
110
  "npm-run-all2": "8.0.4",
111
- "prettier": "3.8.1",
111
+ "prettier": "3.8.2",
112
112
  "publint": "0.3.18",
113
113
  "regexp.escape": "2.0.1",
114
114
  "rimraf": "6.1.3",
@@ -116,14 +116,14 @@
116
116
  "sql-formatter": "15.7.3",
117
117
  "tarn": "3.0.2",
118
118
  "tedious": "19.2.1",
119
- "testcontainers": "11.13.0",
119
+ "testcontainers": "11.14.0",
120
120
  "tsdown": "0.21.7",
121
121
  "tsx": "4.21.0",
122
122
  "typedoc": "0.28.18",
123
123
  "typedoc-plugin-markdown": "4.11.0",
124
124
  "typescript": "6.0.2",
125
125
  "valibot": "1.3.1",
126
- "vitest": "4.1.3"
126
+ "vitest": "4.1.4"
127
127
  },
128
128
  "files": [
129
129
  "dist"