@flowblade/sqlduck 0.16.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -58,7 +58,7 @@ const result = await sqlDuck.toTable({
58
58
  rowStream: getUsers(), // The async iterable that yields rows
59
59
  // 👇Optional:
60
60
  chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
61
- onDataAppended: ({ timeMs, totalRows, rowsPerSecond }) => {
61
+ onChunkAppended: ({ timeMs, totalRows, rowsPerSecond }) => {
62
62
  console.log(
63
63
  `Appended ${totalRows} in time ${timeMs}ms, est: ${rowsPerSecond} rows/s`
64
64
  );
@@ -105,8 +105,8 @@ const result = sqlDuck.toTable({
105
105
  rowStream: getUserRows(), // The async iterable that yields rows
106
106
  // 👇Optional:
107
107
  chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
108
- onDataAppended: ({ total }) => {
109
- console.log(`Appended ${total} rows so far`);
108
+ onChunkAppended: ({ totalRows }) => {
109
+ console.log(`Appended ${totalRows} rows so far`);
110
110
  },
111
111
  // Optional table creation options
112
112
  createOptions: {
@@ -131,67 +131,72 @@ const queryResult = await dbDuckDbMemoryConn.query<{
131
131
  ### Node 24
132
132
 
133
133
  ```
134
+
134
135
  RUN v4.1.4 /home/sebastien/github/flowblade/packages/sqlduck
135
136
 
136
137
 
137
- ✓ bench/appender.bench.ts > appender benches 3412ms
138
+ ✓ bench/appender.bench.ts > appender benches 2910ms
138
139
  name hz min max mean p75 p99 p995 p999 rme samples
139
- · duckdb appender memory, count: 100000, chunk size 2048 2.8872 295.84 396.87 346.35 396.87 396.87 396.87 396.87 ±185.36% 2
140
- · duckdb appender file, count: 100000, chunk size 2048 1.7908 558.40 558.40 558.40 558.40 558.40 558.40 558.40 ±0.00% 1
141
- · duckdb appender, count: 100000, chunk size 1024 1.9967 500.82 500.82 500.82 500.82 500.82 500.82 500.82 ±0.00% 1
140
+ · duckdb appender memory, count: 100000, chunk size 2048 3.5446 265.91 298.32 282.12 298.32 298.32 298.32 298.32 ±72.99% 2
141
+ · duckdb appender file, count: 100000, chunk size 2048 2.6130 355.30 410.10 382.70 410.10 410.10 410.10 410.10 ±91.00% 2
142
+ · duckdb appender, count: 100000, chunk size 1024 3.8027 226.52 299.42 262.97 299.42 299.42 299.42 299.42 ±176.17% 2
142
143
 
143
- ✓ bench/stream.bench.ts > Bench stream 2140ms
144
- name hz min max mean p75 p99 p995 p999 rme samples
145
- · rowToColumnsChunk with chunkSize 2048 (count: 100000) 11.8099 63.0430 172.82 84.6748 81.4757 172.82 172.82 172.82 ±27.12% 10
146
- · mapFakeRowStream with chunkSize 2048 (count: 100000) 10.0442 79.5805 125.81 99.5603 112.35 125.81 125.81 125.81 ±12.04% 10
144
+ ✓ bench/stream.bench.ts > Bench rowsToColumnsChunks 2998ms
145
+ name hz min max mean p75 p99 p995 p999 rme samples
146
+ · rowToColumnsChunk with chunkSize 2048 (count: 100000) 11.3182 60.6404 190.56 88.3532 79.4999 190.56 190.56 190.56 ±30.64% 10
147
+ · rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) 13.4430 63.9716 102.73 74.3883 75.2151 102.73 102.73 102.73 ±10.40% 10
148
+ · mapFakeRowStream with chunkSize 2048 (count: 100000) 10.7785 84.3040 117.57 92.7773 97.1595 117.57 117.57 117.57 ±7.61% 10
147
149
 
148
- ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 617ms
150
+ ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 615ms
149
151
  name hz min max mean p75 p99 p995 p999 rme samples
150
- · getTableCreateFromZod 27,583.03 0.0226 2.7818 0.0363 0.0403 0.1185 0.1583 0.4321 ±1.66% 13792
152
+ · getTableCreateFromZod 27,048.71 0.0239 3.2029 0.0370 0.0404 0.1310 0.1980 0.5674 ±2.08% 13525
151
153
 
152
154
  BENCH Summary
153
155
 
154
- duckdb appender memory, count: 100000, chunk size 2048 - bench/appender.bench.ts > appender benches
155
- 1.45x faster than duckdb appender, count: 100000, chunk size 1024
156
- 1.61x faster than duckdb appender file, count: 100000, chunk size 2048
156
+ duckdb appender, count: 100000, chunk size 1024 - bench/appender.bench.ts > appender benches
157
+ 1.07x faster than duckdb appender memory, count: 100000, chunk size 2048
158
+ 1.46x faster than duckdb appender file, count: 100000, chunk size 2048
157
159
 
158
- rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench stream
159
- 1.18x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
160
+ rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench rowsToColumnsChunks
161
+ 1.19x faster than rowToColumnsChunk with chunkSize 2048 (count: 100000)
162
+ 1.25x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
160
163
 
161
164
  getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
162
165
  ```
163
166
 
164
- ### Bun 1.3.11
167
+ ### Bun 1.3.12
165
168
 
166
169
  ```
167
170
  RUN v4.1.4 /home/sebastien/github/flowblade/packages/sqlduck
168
171
 
169
172
 
170
- ✓ bench/appender.bench.ts > appender benches 3357ms
173
+ ✓ bench/appender.bench.ts > appender benches 2811ms
171
174
  name hz min max mean p75 p99 p995 p999 rme samples
172
- · duckdb appender memory, count: 100000, chunk size 2048 2.9741 315.71 356.77 336.24 356.77 356.77 356.77 356.77 ±77.59% 2
173
- · duckdb appender file, count: 100000, chunk size 2048 1.8953 527.62 527.62 527.62 527.62 527.62 527.62 527.62 ±0.00% 1
174
- · duckdb appender, count: 100000, chunk size 1024 1.7803 561.70 561.70 561.70 561.70 561.70 561.70 561.70 ±0.00% 1
175
+ · duckdb appender memory, count: 100000, chunk size 2048 3.9242 224.75 285.38 254.83 285.38 285.38 285.38 285.38 ±29.56% 3
176
+ · duckdb appender file, count: 100000, chunk size 2048 3.8209 256.09 267.34 261.72 267.34 267.34 267.34 267.34 ±27.31% 2
177
+ · duckdb appender, count: 100000, chunk size 1024 4.6118 196.77 234.22 216.84 234.22 234.22 234.22 234.22 ±21.62% 3
175
178
 
176
- ✓ bench/stream.bench.ts > Bench stream 2058ms
177
- name hz min max mean p75 p99 p995 p999 rme samples
178
- · rowToColumnsChunk with chunkSize 2048 (count: 100000) 12.0130 60.7081 111.32 83.2432 99.7116 111.32 111.32 111.32 ±14.15% 10
179
- · mapFakeRowStream with chunkSize 2048 (count: 100000) 10.9710 76.4253 145.14 91.1493 91.1424 145.14 145.14 145.14 ±16.04% 10
179
+ ✓ bench/stream.bench.ts > Bench rowsToColumnsChunks 2667ms
180
+ name hz min max mean p75 p99 p995 p999 rme samples
181
+ · rowToColumnsChunk with chunkSize 2048 (count: 100000) 14.4994 57.7717 81.8690 68.9683 78.5234 81.8690 81.8690 81.8690 ±9.43% 10
182
+ · rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) 13.2052 68.7139 96.8400 75.7275 79.4380 96.8400 96.8400 96.8400 ±8.22% 10
183
+ · mapFakeRowStream with chunkSize 2048 (count: 100000) 12.7827 73.9127 85.0696 78.2310 82.5510 85.0696 85.0696 85.0696 ±3.69% 10
180
184
 
181
- ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 621ms
185
+ ✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 624ms
182
186
  name hz min max mean p75 p99 p995 p999 rme samples
183
- · getTableCreateFromZod 27,472.91 0.0185 4.6226 0.0364 0.0369 0.1137 0.1476 1.9138 ±5.20% 13737
187
+ · getTableCreateFromZod 28,477.04 0.0191 6.4836 0.0351 0.0335 0.1071 0.1530 2.4823 ±6.21% 14239
184
188
 
185
189
  BENCH Summary
186
190
 
187
- rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench stream
188
- 1.09x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
191
+ rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench rowsToColumnsChunks
192
+ 1.10x faster than rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000)
193
+ 1.13x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
189
194
 
190
195
  getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
191
196
 
192
- duckdb appender memory, count: 100000, chunk size 2048 - bench/appender.bench.ts > appender benches
193
- 1.57x faster than duckdb appender file, count: 100000, chunk size 2048
194
- 1.67x faster than duckdb appender, count: 100000, chunk size 1024
197
+ duckdb appender, count: 100000, chunk size 1024 - bench/appender.bench.ts > appender benches
198
+ 1.18x faster than duckdb appender memory, count: 100000, chunk size 2048
199
+ 1.21x faster than duckdb appender file, count: 100000, chunk size 2048
195
200
 
196
201
  ```
197
202
 
package/dist/index.d.mts CHANGED
@@ -7,7 +7,7 @@ import * as z from "zod";
7
7
  import { ZodObject } from "zod";
8
8
 
9
9
  //#region src/appender/data-appender-callback.d.ts
10
- type OnDataAppendedStats = {
10
+ type OnChunkAppendedStats = {
11
11
  /**
12
12
  * Total number of rows appended so far (all batches included)
13
13
  */
@@ -21,9 +21,9 @@ type OnDataAppendedStats = {
21
21
  */
22
22
  rowsPerSecond: number;
23
23
  };
24
- type OnDataAppendedSyncCb = (stats: OnDataAppendedStats) => void;
25
- type OnDataAppendedAsyncCb = (stats: OnDataAppendedStats) => Promise<void>;
26
- type OnDataAppendedCb = OnDataAppendedSyncCb | OnDataAppendedAsyncCb;
24
+ type OnChunkAppendedSyncCb = (stats: OnChunkAppendedStats) => void;
25
+ type OnChunkAppendedAsyncCb = (stats: OnChunkAppendedStats) => Promise<void>;
26
+ type OnChunkAppendedCb = OnChunkAppendedSyncCb | OnChunkAppendedAsyncCb;
27
27
  //#endregion
28
28
  //#region src/helpers/duck-memory.d.ts
29
29
  declare const duckMemoryTags: readonly ["BASE_TABLE", "HASH_TABLE", "PARQUET_READER", "CSV_READER", "ORDER_BY", "ART_INDEX", "COLUMN_DATA", "METADATA", "OVERFLOW_STRINGS", "IN_MEMORY_TABLE", "ALLOCATOR", "EXTENSION", "TRANSACTION", "EXTERNAL_FILE_CACHE", "WINDOW", "OBJECT_CACHE"];
@@ -113,41 +113,65 @@ type SqlDuckParams = {
113
113
  type RowStream<T> = AsyncIterableIterator<T> | AsyncGenerator<T> | Generator<T>;
114
114
  type ToTableParams<TSchema extends TableSchemaZod> = {
115
115
  /**
116
- * Used to create and fill the data into the table
116
+ * The target table where the data will be inserted.
117
+ * This object contains the table name and optionally the schema and database name.
117
118
  */
118
119
  table: Table;
119
120
  /**
120
- * Schema describing the table structure and rowStream content
121
+ * A Zod schema that defines the structure of the table and the expected format of the rows in the `rowStream`.
122
+ * The schema is used to generate the `CREATE TABLE` DDL and to convert row values to DuckDB types.
121
123
  */
122
124
  schema: TSchema;
123
125
  /**
124
- * Stream of rows to insert into the table
126
+ * An iterable (async or sync) or generator that yields rows to be inserted.
127
+ * Each row must match the structure defined in the `schema`.
125
128
  */
126
129
  rowStream: RowStream<z.infer<TSchema>>;
127
130
  /**
128
- * Chunk size when using appender to insert data.
129
- * Valid numbers between 1 and 2048.
131
+ * The number of rows to accumulate before appending them to the DuckDB table as a single data chunk.
132
+ * Tuning this value can impact memory usage and insertion performance.
133
+ * Valid values are between 1 and 2048.
130
134
  * @default 2048
131
135
  */
132
136
  chunkSize?: number;
133
137
  /**
134
- * Extra options when creating the table
138
+ * Configuration options for the `CREATE TABLE` statement (e.g., `IF NOT EXISTS`, `CREATE OR REPLACE`).
139
+ * If omitted, a standard `CREATE TABLE` statement is used.
135
140
  */
136
141
  createOptions?: TableCreateOptions;
137
142
  /**
138
- * Callback called each time a datachunk is appended to the table
143
+ * An optional callback invoked after each data chunk is successfully appended to the table.
144
+ * Useful for tracking progress, logging statistics, or implementing custom hooks during the insertion process.
139
145
  */
140
- onDataAppended?: OnDataAppendedCb;
146
+ onChunkAppended?: OnChunkAppendedCb;
141
147
  /**
142
- * Automatically checkpoint the table after all chunks have been appended.
148
+ * Specifies the frequency (in number of chunks) at which the `onChunkAppended` callback should be triggered.
149
+ *
150
+ * For example, if `chunkSize` is 2048 and `onChunkAppendedFrequency` is 5,
151
+ * the callback will be called every 10,240 rows (5 chunks * 2048 rows/chunk).
152
+ *
153
+ * @default 1
154
+ */
155
+ onChunkAppendedFrequency?: number;
156
+ /**
157
+ * Specifies the frequency (in number of chunks) at which the `appender.flushSync()` should be called.
158
+ * Calling `flushSync()` can help to clear internal buffers and make the data visible.
159
+ *
160
+ * For example, if `chunkSize` is 2048 and `flushSyncFrequency` is 5,
161
+ * the appender will be flushed every 10,240 rows (5 chunks * 2048 rows/chunk).
162
+ */
163
+ flushSyncFrequency?: number;
164
+ /**
165
+ * If set to `true`, a checkpoint is automatically performed after all rows from the `rowStream` have been processed.
166
+ * This ensures that all data is persisted and WAL is cleared.
143
167
  * @default true
144
168
  */
145
169
  autoCheckpoint?: boolean;
146
170
  /**
147
- * Checkpoint the table after 'n' chunks have been appended
171
+ * Specifies the frequency (in number of chunks) at which a checkpoint should be triggered.
148
172
  *
149
- * For example if the chunkSize is 2048, setting frequency to 2
150
- * will checkpoint the table every 4096 rows (2x chunksize)
173
+ * For example, if `chunkSize` is 2048 and `checkpointChunksFrequency` is 5,
174
+ * a checkpoint will occur every 10,240 rows (5 chunks * 2048 rows/chunk).
151
175
  */
152
176
  checkpointChunksFrequency?: number;
153
177
  };
@@ -193,9 +217,13 @@ declare class SqlDuck {
193
217
  * schema: userSchema,
194
218
  * rowStream: getUserRows(),
195
219
  * chunkSize: 2048,
196
- * onDataAppended: ({ total }) => {
197
- * console.log(`Appended ${total} rows so far`);
220
+ * flushSyncFrequency: 10, // flush after every 10 chunks
221
+ * onChunkAppendedFrequency: 1, // multiple of chunks
222
+ * onChunkAppended: ({ totalRows }) => {
223
+ * console.log(`Appended ${totalRows} rows so far`);
198
224
  * },
225
+ * autoCheckpoint: true,
226
+ * checkpointChunksFrequency: 100, // checkpoint after every 100 chunks
199
227
  * createOptions: {
200
228
  * create: 'CREATE_OR_REPLACE',
201
229
  * },
@@ -302,4 +330,4 @@ declare const flowbladeLogtapeSqlduckConfig: {
302
330
  //#region src/logger/sqlduck-default-logtape-logger.d.ts
303
331
  declare const sqlduckDefaultLogtapeLogger: _$_logtape_logtape0.Logger;
304
332
  //#endregion
305
- export { Database, type DuckConnectionParams, DuckDatabaseManager, DuckMemory, type DuckMemoryTag, type DuckdbReservedKeywords, type OnDataAppendedCb, type OnDataAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, duckReservedKeywords, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
333
+ export { Database, type DuckConnectionParams, DuckDatabaseManager, DuckMemory, type DuckMemoryTag, type DuckdbReservedKeywords, type OnChunkAppendedCb, type OnChunkAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, duckReservedKeywords, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
package/dist/index.mjs CHANGED
@@ -104,10 +104,12 @@ var DuckMemory = class {
104
104
  };
105
105
  //#endregion
106
106
  //#region src/appender/data-appender-callback.ts
107
- const isOnDataAppendedAsyncCb = (v) => {
108
- return v.constructor.name === "AsyncFunction";
107
+ const isOnChunkAppendedAsyncCb = (v) => {
108
+ return v.constructor.name === "AsyncFunction" || v.constructor === (async () => {
109
+ await Promise.resolve();
110
+ }).constructor;
109
111
  };
110
- const createOnDataAppendedCollector = () => {
112
+ const createOnChunkAppendedCollector = () => {
111
113
  let lastCallbackTimeStart = Date.now();
112
114
  let appendedTotalRows = 0;
113
115
  return (currentTotalRows) => {
@@ -188,7 +190,7 @@ var DuckValueConverter = class {
188
190
  //#endregion
189
191
  //#region src/converter/create-duck-column-converters.ts
190
192
  const createDuckColumnConverters = (duckTypes) => {
191
- const convMap = /* @__PURE__ */ new Map();
193
+ const convMap = {};
192
194
  const converter = new DuckValueConverter();
193
195
  for (const [key, duckType] of Object.entries(duckTypes)) {
194
196
  let conv;
@@ -223,7 +225,7 @@ const createDuckColumnConverters = (duckTypes) => {
223
225
  break;
224
226
  default: throw new Error(`Unsupported duck type ${duckTypeId} / ${duckType.toString()} for column '${key}'`);
225
227
  }
226
- if (conv !== false) convMap.set(key, conv);
228
+ if (conv !== false) convMap[key] = conv;
227
229
  }
228
230
  return convMap;
229
231
  };
@@ -650,13 +652,42 @@ const createTableFromZod = async (params) => {
650
652
  //#endregion
651
653
  //#region src/utils/rows-to-columns-chunks.ts
652
654
  /**
653
- * Similar to `rowsToColumns` but yields results in chunks to avoid buffering
654
- * the entire dataset in memory. Each yielded item is a columns array for up to
655
- * `chunkSize` rows.
655
+ * Converts a stream of rows (row-oriented) into a stream of column-oriented chunks.
656
656
  *
657
- * Example for chunkSize = 2:
658
- * input rows: [{id:'1',name:'A'}, {id:'2',name:'B'}, {id:'3',name:'C'}]
659
- * yields: [[['1','2'], ['A','B']], [['3'], ['C']]]
657
+ * This function processes row data incrementally using an async generator, which prevents
658
+ * loading the entire dataset into memory. Each yielded chunk is an object where keys are
659
+ * column names and values are arrays of up to `chunkSize` elements.
660
+ *
661
+ * This is particularly useful for DuckDB's Appender API or other columnar processing
662
+ * engines that expect data in chunks of columns.
663
+ *
664
+ * @param params - Configuration for the transformation.
665
+ * @param params.rows - An async or sync iterable of rows.
666
+ * @param params.chunkSize - The maximum number of rows per yielded chunk. Must be a positive integer.
667
+ * @param params.transformers - Optional mappers for specific columns to transform values before chunking.
668
+ *
669
+ * @returns An async iterator yielding chunks of column-oriented data.
670
+ *
671
+ * @example
672
+ * ```typescript
673
+ * async function* generateRows() {
674
+ * yield { id: 1, name: 'A' };
675
+ * yield { id: 2, name: 'B' };
676
+ * yield { id: 3, name: 'C' };
677
+ * }
678
+ *
679
+ * const columnChunks = rowsToColumnsChunks({
680
+ * rows: generateRows(),
681
+ * chunkSize: 2,
682
+ * })
683
+ *
684
+ * for await (const chunk of columnChunks) {
685
+ * console.log(chunk);
686
+ * }
687
+ * // Output:
688
+ * // { id: [1, 2], name: ['A', 'B'] } // first chunk
689
+ * // { id: [3], name: ['C'] } // second chunk
690
+ * ```
660
691
  */
661
692
  async function* rowsToColumnsChunks(params) {
662
693
  const { rows, chunkSize, transformers } = params;
@@ -664,27 +695,46 @@ async function* rowsToColumnsChunks(params) {
664
695
  const first = await rows.next();
665
696
  if (first.done) return;
666
697
  const keys = Object.keys(first.value);
667
- let columns = keys.map(() => []);
698
+ const numKeys = keys.length;
699
+ const mappers = new Array(numKeys);
700
+ if (transformers !== void 0) {
701
+ const unknownKeys = Object.keys(transformers).filter((k) => !keys.includes(k));
702
+ if (unknownKeys.length > 0) throw new Error(`transformers parameter contains unknown row ids: ${unknownKeys.join(", ")}`);
703
+ for (let i = 0; i < numKeys; i++) mappers[i] = transformers[keys[i]];
704
+ }
705
+ function createColumns() {
706
+ const obj = {};
707
+ for (let i = 0; i < numKeys; i++) {
708
+ const k = keys[i];
709
+ obj[k] = [];
710
+ }
711
+ return obj;
712
+ }
713
+ let columns = createColumns();
668
714
  let rowsInChunk = 0;
669
- keys.forEach((k, i) => {
670
- const fn = transformers?.get(k);
671
- columns[i].push(fn === void 0 ? first.value[k] : fn(first.value[k]));
672
- });
715
+ for (let i = 0; i < numKeys; i++) {
716
+ const k = keys[i];
717
+ const fn = mappers[i];
718
+ const val = first.value[k];
719
+ columns[k].push(fn === void 0 ? val : fn(val));
720
+ }
673
721
  rowsInChunk++;
674
722
  if (rowsInChunk >= chunkSize) {
675
723
  yield columns;
676
- columns = keys.map(() => []);
724
+ columns = createColumns();
677
725
  rowsInChunk = 0;
678
726
  }
679
727
  for await (const row of rows) {
680
- keys.forEach((k, i) => {
681
- const fn = transformers?.get(k);
682
- columns[i].push(fn === void 0 ? row[k] : fn(row[k]));
683
- });
728
+ for (let i = 0; i < numKeys; i++) {
729
+ const k = keys[i];
730
+ const fn = mappers[i];
731
+ const val = row[k];
732
+ columns[k].push(fn === void 0 ? val : fn(val));
733
+ }
684
734
  rowsInChunk++;
685
735
  if (rowsInChunk >= chunkSize) {
686
736
  yield columns;
687
- columns = keys.map(() => []);
737
+ columns = createColumns();
688
738
  rowsInChunk = 0;
689
739
  }
690
740
  }
@@ -724,9 +774,13 @@ var SqlDuck = class {
724
774
  * schema: userSchema,
725
775
  * rowStream: getUserRows(),
726
776
  * chunkSize: 2048,
727
- * onDataAppended: ({ total }) => {
728
- * console.log(`Appended ${total} rows so far`);
777
+ * flushSyncFrequency: 10, // flush after every 10 chunks
778
+ * onChunkAppendedFrequency: 1, // multiple of chunks
779
+ * onChunkAppended: ({ totalRows }) => {
780
+ * console.log(`Appended ${totalRows} rows so far`);
729
781
  * },
782
+ * autoCheckpoint: true,
783
+ * checkpointChunksFrequency: 100, // checkpoint after every 100 chunks
730
784
  * createOptions: {
731
785
  * create: 'CREATE_OR_REPLACE',
732
786
  * },
@@ -737,11 +791,13 @@ var SqlDuck = class {
737
791
  * ```
738
792
  */
739
793
  toTable = async (params) => {
740
- const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended, autoCheckpoint = true, checkpointChunksFrequency = 10 } = params;
794
+ const { table, schema, chunkSize = 2048, rowStream, createOptions, onChunkAppended, onChunkAppendedFrequency, flushSyncFrequency = 10, autoCheckpoint = true, checkpointChunksFrequency } = params;
741
795
  if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
742
796
  if (autoCheckpoint && typeof table.databaseName !== "string") throw new Error("autoCheckpoint requires table.databaseName to be provided.");
743
- if (checkpointChunksFrequency && typeof table.databaseName !== "string") throw new Error("checkpointChunksFrequency requires table.databaseName to be provided.");
744
- if (checkpointChunksFrequency !== void 0 && checkpointChunksFrequency < 1) throw new Error("checkpointChunksFrequency must be a positive number.");
797
+ if (checkpointChunksFrequency !== void 0 && typeof table.databaseName !== "string") throw new Error("checkpointChunksFrequency requires table.databaseName to be provided.");
798
+ if (checkpointChunksFrequency !== void 0 && (checkpointChunksFrequency < 1 || checkpointChunksFrequency > 1e5)) throw new Error("checkpointChunksFrequency must be a number between 1 and 100_000.");
799
+ if (onChunkAppendedFrequency !== void 0 && (onChunkAppendedFrequency < 1 || onChunkAppendedFrequency > 1e5)) throw new Error("onChunkAppendedFrequency must be a number between 1 and 100_000.");
800
+ if (flushSyncFrequency !== void 0 && (flushSyncFrequency < 1 || flushSyncFrequency > 1e5)) throw new Error("flushSyncFrequency must be a number between 1 and 100_000.");
745
801
  const dbManager = new DuckDatabaseManager(this.#conn);
746
802
  const timeStart = Date.now();
747
803
  const { columnTypes, ddl } = await createTableFromZod({
@@ -752,46 +808,56 @@ var SqlDuck = class {
752
808
  });
753
809
  const appender = await this.#conn.createAppender(table.tableName, table.schemaName, table.databaseName);
754
810
  const chunkTypes = Array.from(columnTypes.values());
755
- const transformers = createDuckColumnConverters(Object.fromEntries(Array.from(columnTypes).map(([key, duckType]) => {
756
- return [key, duckType];
757
- })));
811
+ const columnTypeIds = {};
812
+ const columnKeys = [];
813
+ for (const [key, duckType] of columnTypes) {
814
+ columnKeys.push(key);
815
+ columnTypeIds[key] = duckType;
816
+ }
817
+ const numColumns = columnKeys.length;
818
+ const transformers = createDuckColumnConverters(columnTypeIds);
758
819
  let totalRows = 0;
759
- const dataAppendedCollector = createOnDataAppendedCollector();
820
+ const chunkAppendedCollector = createOnChunkAppendedCollector();
760
821
  const columnStream = rowsToColumnsChunks({
761
822
  rows: rowStream,
762
823
  chunkSize,
763
824
  transformers
764
825
  });
765
826
  let appendedChunkCount = 0;
827
+ const tableFullName = table.getFullName();
828
+ const tableName = table.tableName;
766
829
  try {
830
+ const isAsyncCb = onChunkAppended !== void 0 && isOnChunkAppendedAsyncCb(onChunkAppended);
767
831
  for await (const dataChunk of columnStream) {
768
832
  const chunk = DuckDBDataChunk.create(chunkTypes);
769
- this.#logger.debug(`Inserting chunk of ${dataChunk.length} rows`, { table: table.getFullName() });
770
- totalRows += dataChunk?.[0]?.length ?? 0;
771
- chunk.setColumns(dataChunk);
833
+ const columns = new Array(numColumns);
834
+ for (let i = 0; i < numColumns; i++) columns[i] = dataChunk[columnKeys[i]];
835
+ totalRows += columns[0]?.length ?? 0;
836
+ chunk.setColumns(columns);
772
837
  appender.appendDataChunk(chunk);
773
- appender.flushSync();
774
838
  appendedChunkCount += 1;
775
- if (onDataAppended !== void 0) {
776
- const payload = dataAppendedCollector(totalRows);
777
- if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
778
- else onDataAppended(payload);
839
+ if (onChunkAppended !== void 0 && (onChunkAppendedFrequency === void 0 || appendedChunkCount % onChunkAppendedFrequency === 0)) {
840
+ const payload = chunkAppendedCollector(totalRows);
841
+ if (isAsyncCb) await onChunkAppended(payload);
842
+ else onChunkAppended(payload);
779
843
  }
844
+ if (flushSyncFrequency !== void 0 && appendedChunkCount % flushSyncFrequency === 0) appender.flushSync();
780
845
  if (checkpointChunksFrequency !== void 0 && appendedChunkCount % checkpointChunksFrequency === 0 && typeof table.databaseName === "string") try {
781
846
  await dbManager.checkpoint(table.databaseName);
782
847
  } catch (e) {
783
- this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending chunk into table '${table.getFullName()}' - ${e?.message ?? ""}`, { table: table.getFullName() });
848
+ this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending chunk into table '${tableName}' - ${e?.message ?? ""}`, { table: tableFullName });
784
849
  }
785
850
  }
851
+ appender.flushSync();
786
852
  appender.closeSync();
787
853
  if (autoCheckpoint && typeof table.databaseName === "string") try {
788
854
  await dbManager.checkpoint(table.databaseName);
789
855
  } catch (e) {
790
- this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending data into table '${table.getFullName()}' - ${e?.message ?? ""}`, { table: table.getFullName() });
856
+ this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending data into table '${tableName}' - ${e?.message ?? ""}`, { table: tableFullName });
791
857
  }
792
858
  const timeMs = Math.round(Date.now() - timeStart);
793
859
  this.#logger.info(`Successfully appended ${totalRows} rows into '${table.getFullName()}' in ${timeMs}ms`, {
794
- table: table.getFullName(),
860
+ table: tableFullName,
795
861
  timeMs,
796
862
  totalRows
797
863
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flowblade/sqlduck",
3
- "version": "0.16.0",
3
+ "version": "0.17.1",
4
4
  "type": "module",
5
5
  "sideEffects": false,
6
6
  "exports": {
@@ -56,12 +56,12 @@
56
56
  "check-size-disabled": "size-limit"
57
57
  },
58
58
  "dependencies": {
59
- "@flowblade/core": "^0.2.26",
60
- "@flowblade/source-duckdb": "^0.20.1",
61
- "@flowblade/sql-tag": "^0.3.2",
59
+ "@flowblade/core": "^0.2.27",
60
+ "@flowblade/source-duckdb": "^0.20.3",
61
+ "@flowblade/sql-tag": "^0.3.3",
62
62
  "@httpx/assert": "^0.16.9",
63
63
  "@httpx/dsn-parser": "^1.9.9",
64
- "@httpx/plain-object": "^2.1.8",
64
+ "@httpx/plain-object": "^2.1.9",
65
65
  "@logtape/logtape": "^2.0.5",
66
66
  "@standard-schema/spec": "^1.1.0",
67
67
  "is-safe-filename": "0.1.1",
@@ -69,7 +69,7 @@
69
69
  "zod": "^4.3.6"
70
70
  },
71
71
  "peerDependencies": {
72
- "@duckdb/node-api": "^1.5.0-r.1",
72
+ "@duckdb/node-api": "^1.5.2-r.1",
73
73
  "valibot": "^1.3.1"
74
74
  },
75
75
  "peerDependenciesMeta": {
@@ -79,20 +79,20 @@
79
79
  },
80
80
  "devDependencies": {
81
81
  "@belgattitude/eslint-config-bases": "8.12.0",
82
- "@dotenvx/dotenvx": "1.59.1",
83
- "@duckdb/node-api": "1.5.1-r.1",
82
+ "@dotenvx/dotenvx": "1.61.0",
83
+ "@duckdb/node-api": "1.5.2-r.1",
84
84
  "@faker-js/faker": "10.4.0",
85
- "@flowblade/source-kysely": "^1.3.0",
85
+ "@flowblade/source-kysely": "^1.3.1",
86
86
  "@httpx/assert": "0.16.9",
87
87
  "@mitata/counters": "0.0.8",
88
88
  "@size-limit/esbuild": "12.0.1",
89
89
  "@size-limit/file": "12.0.1",
90
- "@testcontainers/mssqlserver": "11.13.0",
90
+ "@testcontainers/mssqlserver": "11.14.0",
91
91
  "@total-typescript/ts-reset": "0.6.1",
92
- "@types/node": "25.5.2",
93
- "@typescript-eslint/eslint-plugin": "8.58.1",
94
- "@typescript-eslint/parser": "8.58.1",
95
- "@typescript/native-preview": "7.0.0-dev.20260406.1",
92
+ "@types/node": "25.6.0",
93
+ "@typescript-eslint/eslint-plugin": "8.58.2",
94
+ "@typescript-eslint/parser": "8.58.2",
95
+ "@typescript/native-preview": "7.0.0-dev.20260410.1",
96
96
  "@vitest/coverage-v8": "4.1.4",
97
97
  "@vitest/ui": "4.1.4",
98
98
  "ansis": "4.2.0",
@@ -105,10 +105,10 @@
105
105
  "eslint": "8.57.1",
106
106
  "execa": "9.6.1",
107
107
  "is-in-ci": "2.0.0",
108
- "kysely": "0.28.15",
108
+ "kysely": "0.28.16",
109
109
  "mitata": "1.0.34",
110
110
  "npm-run-all2": "8.0.4",
111
- "prettier": "3.8.1",
111
+ "prettier": "3.8.2",
112
112
  "publint": "0.3.18",
113
113
  "regexp.escape": "2.0.1",
114
114
  "rimraf": "6.1.3",
@@ -116,10 +116,10 @@
116
116
  "sql-formatter": "15.7.3",
117
117
  "tarn": "3.0.2",
118
118
  "tedious": "19.2.1",
119
- "testcontainers": "11.13.0",
120
- "tsdown": "0.21.7",
119
+ "testcontainers": "11.14.0",
120
+ "tsdown": "0.21.8",
121
121
  "tsx": "4.21.0",
122
- "typedoc": "0.28.18",
122
+ "typedoc": "0.28.19",
123
123
  "typedoc-plugin-markdown": "4.11.0",
124
124
  "typescript": "6.0.2",
125
125
  "valibot": "1.3.1",