@flowblade/sqlduck 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -34
- package/dist/index.d.mts +47 -19
- package/dist/index.mjs +108 -42
- package/package.json +10 -10
package/README.md
CHANGED
|
@@ -58,7 +58,7 @@ const result = await sqlDuck.toTable({
|
|
|
58
58
|
rowStream: getUsers(), // The async iterable that yields rows
|
|
59
59
|
// 👇Optional:
|
|
60
60
|
chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
|
|
61
|
-
|
|
61
|
+
onChunkAppended: ({ timeMs, totalRows, rowsPerSecond }) => {
|
|
62
62
|
console.log(
|
|
63
63
|
`Appended ${totalRows} in time ${timeMs}ms, est: ${rowsPerSecond} rows/s`
|
|
64
64
|
);
|
|
@@ -105,8 +105,8 @@ const result = sqlDuck.toTable({
|
|
|
105
105
|
rowStream: getUserRows(), // The async iterable that yields rows
|
|
106
106
|
// 👇Optional:
|
|
107
107
|
chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
|
|
108
|
-
|
|
109
|
-
console.log(`Appended ${
|
|
108
|
+
onChunkAppended: ({ totalRows }) => {
|
|
109
|
+
console.log(`Appended ${totalRows} rows so far`);
|
|
110
110
|
},
|
|
111
111
|
// Optional table creation options
|
|
112
112
|
createOptions: {
|
|
@@ -131,67 +131,72 @@ const queryResult = await dbDuckDbMemoryConn.query<{
|
|
|
131
131
|
### Node 24
|
|
132
132
|
|
|
133
133
|
```
|
|
134
|
+
|
|
134
135
|
RUN v4.1.4 /home/sebastien/github/flowblade/packages/sqlduck
|
|
135
136
|
|
|
136
137
|
|
|
137
|
-
✓ bench/appender.bench.ts > appender benches
|
|
138
|
+
✓ bench/appender.bench.ts > appender benches 2910ms
|
|
138
139
|
name hz min max mean p75 p99 p995 p999 rme samples
|
|
139
|
-
· duckdb appender memory, count: 100000, chunk size 2048
|
|
140
|
-
· duckdb appender file, count: 100000, chunk size 2048
|
|
141
|
-
· duckdb appender, count: 100000, chunk size 1024
|
|
140
|
+
· duckdb appender memory, count: 100000, chunk size 2048 3.5446 265.91 298.32 282.12 298.32 298.32 298.32 298.32 ±72.99% 2
|
|
141
|
+
· duckdb appender file, count: 100000, chunk size 2048 2.6130 355.30 410.10 382.70 410.10 410.10 410.10 410.10 ±91.00% 2
|
|
142
|
+
· duckdb appender, count: 100000, chunk size 1024 3.8027 226.52 299.42 262.97 299.42 299.42 299.42 299.42 ±176.17% 2
|
|
142
143
|
|
|
143
|
-
✓ bench/stream.bench.ts > Bench
|
|
144
|
-
name
|
|
145
|
-
· rowToColumnsChunk with chunkSize 2048 (count: 100000)
|
|
146
|
-
·
|
|
144
|
+
✓ bench/stream.bench.ts > Bench rowsToColumnsChunks 2998ms
|
|
145
|
+
name hz min max mean p75 p99 p995 p999 rme samples
|
|
146
|
+
· rowToColumnsChunk with chunkSize 2048 (count: 100000) 11.3182 60.6404 190.56 88.3532 79.4999 190.56 190.56 190.56 ±30.64% 10
|
|
147
|
+
· rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) 13.4430 63.9716 102.73 74.3883 75.2151 102.73 102.73 102.73 ±10.40% 10
|
|
148
|
+
· mapFakeRowStream with chunkSize 2048 (count: 100000) 10.7785 84.3040 117.57 92.7773 97.1595 117.57 117.57 117.57 ±7.61% 10
|
|
147
149
|
|
|
148
|
-
✓ bench/table-create.bench.ts > Bench getTableCreateFromZod
|
|
150
|
+
✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 615ms
|
|
149
151
|
name hz min max mean p75 p99 p995 p999 rme samples
|
|
150
|
-
· getTableCreateFromZod 27,
|
|
152
|
+
· getTableCreateFromZod 27,048.71 0.0239 3.2029 0.0370 0.0404 0.1310 0.1980 0.5674 ±2.08% 13525
|
|
151
153
|
|
|
152
154
|
BENCH Summary
|
|
153
155
|
|
|
154
|
-
duckdb appender
|
|
155
|
-
1.
|
|
156
|
-
1.
|
|
156
|
+
duckdb appender, count: 100000, chunk size 1024 - bench/appender.bench.ts > appender benches
|
|
157
|
+
1.07x faster than duckdb appender memory, count: 100000, chunk size 2048
|
|
158
|
+
1.46x faster than duckdb appender file, count: 100000, chunk size 2048
|
|
157
159
|
|
|
158
|
-
rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench
|
|
159
|
-
1.
|
|
160
|
+
rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench rowsToColumnsChunks
|
|
161
|
+
1.19x faster than rowToColumnsChunk with chunkSize 2048 (count: 100000)
|
|
162
|
+
1.25x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
|
|
160
163
|
|
|
161
164
|
getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
|
|
162
165
|
```
|
|
163
166
|
|
|
164
|
-
### Bun 1.3.
|
|
167
|
+
### Bun 1.3.12
|
|
165
168
|
|
|
166
169
|
```
|
|
167
170
|
RUN v4.1.4 /home/sebastien/github/flowblade/packages/sqlduck
|
|
168
171
|
|
|
169
172
|
|
|
170
|
-
✓ bench/appender.bench.ts > appender benches
|
|
173
|
+
✓ bench/appender.bench.ts > appender benches 2811ms
|
|
171
174
|
name hz min max mean p75 p99 p995 p999 rme samples
|
|
172
|
-
· duckdb appender memory, count: 100000, chunk size 2048
|
|
173
|
-
· duckdb appender file, count: 100000, chunk size 2048
|
|
174
|
-
· duckdb appender, count: 100000, chunk size 1024
|
|
175
|
+
· duckdb appender memory, count: 100000, chunk size 2048 3.9242 224.75 285.38 254.83 285.38 285.38 285.38 285.38 ±29.56% 3
|
|
176
|
+
· duckdb appender file, count: 100000, chunk size 2048 3.8209 256.09 267.34 261.72 267.34 267.34 267.34 267.34 ±27.31% 2
|
|
177
|
+
· duckdb appender, count: 100000, chunk size 1024 4.6118 196.77 234.22 216.84 234.22 234.22 234.22 234.22 ±21.62% 3
|
|
175
178
|
|
|
176
|
-
✓ bench/stream.bench.ts > Bench
|
|
177
|
-
name
|
|
178
|
-
· rowToColumnsChunk with chunkSize 2048 (count: 100000)
|
|
179
|
-
·
|
|
179
|
+
✓ bench/stream.bench.ts > Bench rowsToColumnsChunks 2667ms
|
|
180
|
+
name hz min max mean p75 p99 p995 p999 rme samples
|
|
181
|
+
· rowToColumnsChunk with chunkSize 2048 (count: 100000) 14.4994 57.7717 81.8690 68.9683 78.5234 81.8690 81.8690 81.8690 ±9.43% 10
|
|
182
|
+
· rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000) 13.2052 68.7139 96.8400 75.7275 79.4380 96.8400 96.8400 96.8400 ±8.22% 10
|
|
183
|
+
· mapFakeRowStream with chunkSize 2048 (count: 100000) 12.7827 73.9127 85.0696 78.2310 82.5510 85.0696 85.0696 85.0696 ±3.69% 10
|
|
180
184
|
|
|
181
|
-
✓ bench/table-create.bench.ts > Bench getTableCreateFromZod
|
|
185
|
+
✓ bench/table-create.bench.ts > Bench getTableCreateFromZod 624ms
|
|
182
186
|
name hz min max mean p75 p99 p995 p999 rme samples
|
|
183
|
-
· getTableCreateFromZod
|
|
187
|
+
· getTableCreateFromZod 28,477.04 0.0191 6.4836 0.0351 0.0335 0.1071 0.1530 2.4823 ±6.21% 14239
|
|
184
188
|
|
|
185
189
|
BENCH Summary
|
|
186
190
|
|
|
187
|
-
rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench
|
|
188
|
-
1.
|
|
191
|
+
rowToColumnsChunk with chunkSize 2048 (count: 100000) - bench/stream.bench.ts > Bench rowsToColumnsChunks
|
|
192
|
+
1.10x faster than rowToColumnsChunk with transformer with chunkSize 2048 (count: 100000)
|
|
193
|
+
1.13x faster than mapFakeRowStream with chunkSize 2048 (count: 100000)
|
|
189
194
|
|
|
190
195
|
getTableCreateFromZod - bench/table-create.bench.ts > Bench getTableCreateFromZod
|
|
191
196
|
|
|
192
|
-
duckdb appender
|
|
193
|
-
1.
|
|
194
|
-
1.
|
|
197
|
+
duckdb appender, count: 100000, chunk size 1024 - bench/appender.bench.ts > appender benches
|
|
198
|
+
1.18x faster than duckdb appender memory, count: 100000, chunk size 2048
|
|
199
|
+
1.21x faster than duckdb appender file, count: 100000, chunk size 2048
|
|
195
200
|
|
|
196
201
|
```
|
|
197
202
|
|
package/dist/index.d.mts
CHANGED
|
@@ -7,7 +7,7 @@ import * as z from "zod";
|
|
|
7
7
|
import { ZodObject } from "zod";
|
|
8
8
|
|
|
9
9
|
//#region src/appender/data-appender-callback.d.ts
|
|
10
|
-
type
|
|
10
|
+
type OnChunkAppendedStats = {
|
|
11
11
|
/**
|
|
12
12
|
* Total number of rows appended so far (all batches included)
|
|
13
13
|
*/
|
|
@@ -21,9 +21,9 @@ type OnDataAppendedStats = {
|
|
|
21
21
|
*/
|
|
22
22
|
rowsPerSecond: number;
|
|
23
23
|
};
|
|
24
|
-
type
|
|
25
|
-
type
|
|
26
|
-
type
|
|
24
|
+
type OnChunkAppendedSyncCb = (stats: OnChunkAppendedStats) => void;
|
|
25
|
+
type OnChunkAppendedAsyncCb = (stats: OnChunkAppendedStats) => Promise<void>;
|
|
26
|
+
type OnChunkAppendedCb = OnChunkAppendedSyncCb | OnChunkAppendedAsyncCb;
|
|
27
27
|
//#endregion
|
|
28
28
|
//#region src/helpers/duck-memory.d.ts
|
|
29
29
|
declare const duckMemoryTags: readonly ["BASE_TABLE", "HASH_TABLE", "PARQUET_READER", "CSV_READER", "ORDER_BY", "ART_INDEX", "COLUMN_DATA", "METADATA", "OVERFLOW_STRINGS", "IN_MEMORY_TABLE", "ALLOCATOR", "EXTENSION", "TRANSACTION", "EXTERNAL_FILE_CACHE", "WINDOW", "OBJECT_CACHE"];
|
|
@@ -113,41 +113,65 @@ type SqlDuckParams = {
|
|
|
113
113
|
type RowStream<T> = AsyncIterableIterator<T> | AsyncGenerator<T> | Generator<T>;
|
|
114
114
|
type ToTableParams<TSchema extends TableSchemaZod> = {
|
|
115
115
|
/**
|
|
116
|
-
*
|
|
116
|
+
* The target table where the data will be inserted.
|
|
117
|
+
* This object contains the table name and optionally the schema and database name.
|
|
117
118
|
*/
|
|
118
119
|
table: Table;
|
|
119
120
|
/**
|
|
120
|
-
*
|
|
121
|
+
* A Zod schema that defines the structure of the table and the expected format of the rows in the `rowStream`.
|
|
122
|
+
* The schema is used to generate the `CREATE TABLE` DDL and to convert row values to DuckDB types.
|
|
121
123
|
*/
|
|
122
124
|
schema: TSchema;
|
|
123
125
|
/**
|
|
124
|
-
*
|
|
126
|
+
* An iterable (async or sync) or generator that yields rows to be inserted.
|
|
127
|
+
* Each row must match the structure defined in the `schema`.
|
|
125
128
|
*/
|
|
126
129
|
rowStream: RowStream<z.infer<TSchema>>;
|
|
127
130
|
/**
|
|
128
|
-
*
|
|
129
|
-
*
|
|
131
|
+
* The number of rows to accumulate before appending them to the DuckDB table as a single data chunk.
|
|
132
|
+
* Tuning this value can impact memory usage and insertion performance.
|
|
133
|
+
* Valid values are between 1 and 2048.
|
|
130
134
|
* @default 2048
|
|
131
135
|
*/
|
|
132
136
|
chunkSize?: number;
|
|
133
137
|
/**
|
|
134
|
-
*
|
|
138
|
+
* Configuration options for the `CREATE TABLE` statement (e.g., `IF NOT EXISTS`, `CREATE OR REPLACE`).
|
|
139
|
+
* If omitted, a standard `CREATE TABLE` statement is used.
|
|
135
140
|
*/
|
|
136
141
|
createOptions?: TableCreateOptions;
|
|
137
142
|
/**
|
|
138
|
-
*
|
|
143
|
+
* An optional callback invoked after each data chunk is successfully appended to the table.
|
|
144
|
+
* Useful for tracking progress, logging statistics, or implementing custom hooks during the insertion process.
|
|
139
145
|
*/
|
|
140
|
-
|
|
146
|
+
onChunkAppended?: OnChunkAppendedCb;
|
|
141
147
|
/**
|
|
142
|
-
*
|
|
148
|
+
* Specifies the frequency (in number of chunks) at which the `onChunkAppended` callback should be triggered.
|
|
149
|
+
*
|
|
150
|
+
* For example, if `chunkSize` is 2048 and `onChunkAppendedFrequency` is 5,
|
|
151
|
+
* the callback will be called every 10,240 rows (5 chunks * 2048 rows/chunk).
|
|
152
|
+
*
|
|
153
|
+
* @default 1
|
|
154
|
+
*/
|
|
155
|
+
onChunkAppendedFrequency?: number;
|
|
156
|
+
/**
|
|
157
|
+
* Specifies the frequency (in number of chunks) at which the `appender.flushSync()` should be called.
|
|
158
|
+
* Calling `flushSync()` can help to clear internal buffers and make the data visible.
|
|
159
|
+
*
|
|
160
|
+
* For example, if `chunkSize` is 2048 and `flushSyncFrequency` is 5,
|
|
161
|
+
* the appender will be flushed every 10,240 rows (5 chunks * 2048 rows/chunk).
|
|
162
|
+
*/
|
|
163
|
+
flushSyncFrequency?: number;
|
|
164
|
+
/**
|
|
165
|
+
* If set to `true`, a checkpoint is automatically performed after all rows from the `rowStream` have been processed.
|
|
166
|
+
* This ensures that all data is persisted and WAL is cleared.
|
|
143
167
|
* @default true
|
|
144
168
|
*/
|
|
145
169
|
autoCheckpoint?: boolean;
|
|
146
170
|
/**
|
|
147
|
-
*
|
|
171
|
+
* Specifies the frequency (in number of chunks) at which a checkpoint should be triggered.
|
|
148
172
|
*
|
|
149
|
-
* For example if
|
|
150
|
-
*
|
|
173
|
+
* For example, if `chunkSize` is 2048 and `checkpointChunksFrequency` is 5,
|
|
174
|
+
* a checkpoint will occur every 10,240 rows (5 chunks * 2048 rows/chunk).
|
|
151
175
|
*/
|
|
152
176
|
checkpointChunksFrequency?: number;
|
|
153
177
|
};
|
|
@@ -193,9 +217,13 @@ declare class SqlDuck {
|
|
|
193
217
|
* schema: userSchema,
|
|
194
218
|
* rowStream: getUserRows(),
|
|
195
219
|
* chunkSize: 2048,
|
|
196
|
-
*
|
|
197
|
-
*
|
|
220
|
+
* flushSyncFrequency: 10, // flush after every 10 chunks
|
|
221
|
+
* onChunkAppendedFrequency: 1, // multiple of chunks
|
|
222
|
+
* onChunkAppended: ({ totalRows }) => {
|
|
223
|
+
* console.log(`Appended ${totalRows} rows so far`);
|
|
198
224
|
* },
|
|
225
|
+
* autoCheckpoint: true,
|
|
226
|
+
* checkpointChunksFrequency: 100, // checkpoint after every 100 chunks
|
|
199
227
|
* createOptions: {
|
|
200
228
|
* create: 'CREATE_OR_REPLACE',
|
|
201
229
|
* },
|
|
@@ -302,4 +330,4 @@ declare const flowbladeLogtapeSqlduckConfig: {
|
|
|
302
330
|
//#region src/logger/sqlduck-default-logtape-logger.d.ts
|
|
303
331
|
declare const sqlduckDefaultLogtapeLogger: _$_logtape_logtape0.Logger;
|
|
304
332
|
//#endregion
|
|
305
|
-
export { Database, type DuckConnectionParams, DuckDatabaseManager, DuckMemory, type DuckMemoryTag, type DuckdbReservedKeywords, type
|
|
333
|
+
export { Database, type DuckConnectionParams, DuckDatabaseManager, DuckMemory, type DuckMemoryTag, type DuckdbReservedKeywords, type OnChunkAppendedCb, type OnChunkAppendedStats, SqlDuck, type SqlDuckParams, Table, type ToTableParams, duckReservedKeywords, flowbladeLogtapeSqlduckConfig, getTableCreateFromZod, sqlduckDefaultLogtapeLogger, zodCodecs };
|
package/dist/index.mjs
CHANGED
|
@@ -104,10 +104,12 @@ var DuckMemory = class {
|
|
|
104
104
|
};
|
|
105
105
|
//#endregion
|
|
106
106
|
//#region src/appender/data-appender-callback.ts
|
|
107
|
-
const
|
|
108
|
-
return v.constructor.name === "AsyncFunction"
|
|
107
|
+
const isOnChunkAppendedAsyncCb = (v) => {
|
|
108
|
+
return v.constructor.name === "AsyncFunction" || v.constructor === (async () => {
|
|
109
|
+
await Promise.resolve();
|
|
110
|
+
}).constructor;
|
|
109
111
|
};
|
|
110
|
-
const
|
|
112
|
+
const createOnChunkAppendedCollector = () => {
|
|
111
113
|
let lastCallbackTimeStart = Date.now();
|
|
112
114
|
let appendedTotalRows = 0;
|
|
113
115
|
return (currentTotalRows) => {
|
|
@@ -188,7 +190,7 @@ var DuckValueConverter = class {
|
|
|
188
190
|
//#endregion
|
|
189
191
|
//#region src/converter/create-duck-column-converters.ts
|
|
190
192
|
const createDuckColumnConverters = (duckTypes) => {
|
|
191
|
-
const convMap =
|
|
193
|
+
const convMap = {};
|
|
192
194
|
const converter = new DuckValueConverter();
|
|
193
195
|
for (const [key, duckType] of Object.entries(duckTypes)) {
|
|
194
196
|
let conv;
|
|
@@ -223,7 +225,7 @@ const createDuckColumnConverters = (duckTypes) => {
|
|
|
223
225
|
break;
|
|
224
226
|
default: throw new Error(`Unsupported duck type ${duckTypeId} / ${duckType.toString()} for column '${key}'`);
|
|
225
227
|
}
|
|
226
|
-
if (conv !== false) convMap
|
|
228
|
+
if (conv !== false) convMap[key] = conv;
|
|
227
229
|
}
|
|
228
230
|
return convMap;
|
|
229
231
|
};
|
|
@@ -650,13 +652,42 @@ const createTableFromZod = async (params) => {
|
|
|
650
652
|
//#endregion
|
|
651
653
|
//#region src/utils/rows-to-columns-chunks.ts
|
|
652
654
|
/**
|
|
653
|
-
*
|
|
654
|
-
* the entire dataset in memory. Each yielded item is a columns array for up to
|
|
655
|
-
* `chunkSize` rows.
|
|
655
|
+
* Converts a stream of rows (row-oriented) into a stream of column-oriented chunks.
|
|
656
656
|
*
|
|
657
|
-
*
|
|
658
|
-
*
|
|
659
|
-
*
|
|
657
|
+
* This function processes row data incrementally using an async generator, which prevents
|
|
658
|
+
* loading the entire dataset into memory. Each yielded chunk is an object where keys are
|
|
659
|
+
* column names and values are arrays of up to `chunkSize` elements.
|
|
660
|
+
*
|
|
661
|
+
* This is particularly useful for DuckDB's Appender API or other columnar processing
|
|
662
|
+
* engines that expect data in chunks of columns.
|
|
663
|
+
*
|
|
664
|
+
* @param params - Configuration for the transformation.
|
|
665
|
+
* @param params.rows - An async or sync iterable of rows.
|
|
666
|
+
* @param params.chunkSize - The maximum number of rows per yielded chunk. Must be a positive integer.
|
|
667
|
+
* @param params.transformers - Optional mappers for specific columns to transform values before chunking.
|
|
668
|
+
*
|
|
669
|
+
* @returns An async iterator yielding chunks of column-oriented data.
|
|
670
|
+
*
|
|
671
|
+
* @example
|
|
672
|
+
* ```typescript
|
|
673
|
+
* async function* generateRows() {
|
|
674
|
+
* yield { id: 1, name: 'A' };
|
|
675
|
+
* yield { id: 2, name: 'B' };
|
|
676
|
+
* yield { id: 3, name: 'C' };
|
|
677
|
+
* }
|
|
678
|
+
*
|
|
679
|
+
* const columnChunks = rowsToColumnsChunks({
|
|
680
|
+
* rows: generateRows(),
|
|
681
|
+
* chunkSize: 2,
|
|
682
|
+
* })
|
|
683
|
+
*
|
|
684
|
+
* for await (const chunk of columnChunks) {
|
|
685
|
+
* console.log(chunk);
|
|
686
|
+
* }
|
|
687
|
+
* // Output:
|
|
688
|
+
* // { id: [1, 2], name: ['A', 'B'] } // first chunk
|
|
689
|
+
* // { id: [3], name: ['C'] } // second chunk
|
|
690
|
+
* ```
|
|
660
691
|
*/
|
|
661
692
|
async function* rowsToColumnsChunks(params) {
|
|
662
693
|
const { rows, chunkSize, transformers } = params;
|
|
@@ -664,27 +695,46 @@ async function* rowsToColumnsChunks(params) {
|
|
|
664
695
|
const first = await rows.next();
|
|
665
696
|
if (first.done) return;
|
|
666
697
|
const keys = Object.keys(first.value);
|
|
667
|
-
|
|
698
|
+
const numKeys = keys.length;
|
|
699
|
+
const mappers = new Array(numKeys);
|
|
700
|
+
if (transformers !== void 0) {
|
|
701
|
+
const unknownKeys = Object.keys(transformers).filter((k) => !keys.includes(k));
|
|
702
|
+
if (unknownKeys.length > 0) throw new Error(`transformers parameter contains unknown row ids: ${unknownKeys.join(", ")}`);
|
|
703
|
+
for (let i = 0; i < numKeys; i++) mappers[i] = transformers[keys[i]];
|
|
704
|
+
}
|
|
705
|
+
function createColumns() {
|
|
706
|
+
const obj = {};
|
|
707
|
+
for (let i = 0; i < numKeys; i++) {
|
|
708
|
+
const k = keys[i];
|
|
709
|
+
obj[k] = [];
|
|
710
|
+
}
|
|
711
|
+
return obj;
|
|
712
|
+
}
|
|
713
|
+
let columns = createColumns();
|
|
668
714
|
let rowsInChunk = 0;
|
|
669
|
-
|
|
670
|
-
const
|
|
671
|
-
|
|
672
|
-
|
|
715
|
+
for (let i = 0; i < numKeys; i++) {
|
|
716
|
+
const k = keys[i];
|
|
717
|
+
const fn = mappers[i];
|
|
718
|
+
const val = first.value[k];
|
|
719
|
+
columns[k].push(fn === void 0 ? val : fn(val));
|
|
720
|
+
}
|
|
673
721
|
rowsInChunk++;
|
|
674
722
|
if (rowsInChunk >= chunkSize) {
|
|
675
723
|
yield columns;
|
|
676
|
-
columns =
|
|
724
|
+
columns = createColumns();
|
|
677
725
|
rowsInChunk = 0;
|
|
678
726
|
}
|
|
679
727
|
for await (const row of rows) {
|
|
680
|
-
|
|
681
|
-
const
|
|
682
|
-
|
|
683
|
-
|
|
728
|
+
for (let i = 0; i < numKeys; i++) {
|
|
729
|
+
const k = keys[i];
|
|
730
|
+
const fn = mappers[i];
|
|
731
|
+
const val = row[k];
|
|
732
|
+
columns[k].push(fn === void 0 ? val : fn(val));
|
|
733
|
+
}
|
|
684
734
|
rowsInChunk++;
|
|
685
735
|
if (rowsInChunk >= chunkSize) {
|
|
686
736
|
yield columns;
|
|
687
|
-
columns =
|
|
737
|
+
columns = createColumns();
|
|
688
738
|
rowsInChunk = 0;
|
|
689
739
|
}
|
|
690
740
|
}
|
|
@@ -724,9 +774,13 @@ var SqlDuck = class {
|
|
|
724
774
|
* schema: userSchema,
|
|
725
775
|
* rowStream: getUserRows(),
|
|
726
776
|
* chunkSize: 2048,
|
|
727
|
-
*
|
|
728
|
-
*
|
|
777
|
+
* flushSyncFrequency: 10, // flush after every 10 chunks
|
|
778
|
+
* onChunkAppendedFrequency: 1, // multiple of chunks
|
|
779
|
+
* onChunkAppended: ({ totalRows }) => {
|
|
780
|
+
* console.log(`Appended ${totalRows} rows so far`);
|
|
729
781
|
* },
|
|
782
|
+
* autoCheckpoint: true,
|
|
783
|
+
* checkpointChunksFrequency: 100, // checkpoint after every 100 chunks
|
|
730
784
|
* createOptions: {
|
|
731
785
|
* create: 'CREATE_OR_REPLACE',
|
|
732
786
|
* },
|
|
@@ -737,11 +791,13 @@ var SqlDuck = class {
|
|
|
737
791
|
* ```
|
|
738
792
|
*/
|
|
739
793
|
toTable = async (params) => {
|
|
740
|
-
const { table, schema, chunkSize = 2048, rowStream, createOptions,
|
|
794
|
+
const { table, schema, chunkSize = 2048, rowStream, createOptions, onChunkAppended, onChunkAppendedFrequency, flushSyncFrequency = 10, autoCheckpoint = true, checkpointChunksFrequency } = params;
|
|
741
795
|
if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
|
|
742
796
|
if (autoCheckpoint && typeof table.databaseName !== "string") throw new Error("autoCheckpoint requires table.databaseName to be provided.");
|
|
743
|
-
if (checkpointChunksFrequency && typeof table.databaseName !== "string") throw new Error("checkpointChunksFrequency requires table.databaseName to be provided.");
|
|
744
|
-
if (checkpointChunksFrequency !== void 0 && checkpointChunksFrequency < 1) throw new Error("checkpointChunksFrequency must be a
|
|
797
|
+
if (checkpointChunksFrequency !== void 0 && typeof table.databaseName !== "string") throw new Error("checkpointChunksFrequency requires table.databaseName to be provided.");
|
|
798
|
+
if (checkpointChunksFrequency !== void 0 && (checkpointChunksFrequency < 1 || checkpointChunksFrequency > 1e5)) throw new Error("checkpointChunksFrequency must be a number between 1 and 100_000.");
|
|
799
|
+
if (onChunkAppendedFrequency !== void 0 && (onChunkAppendedFrequency < 1 || onChunkAppendedFrequency > 1e5)) throw new Error("onChunkAppendedFrequency must be a number between 1 and 100_000.");
|
|
800
|
+
if (flushSyncFrequency !== void 0 && (flushSyncFrequency < 1 || flushSyncFrequency > 1e5)) throw new Error("flushSyncFrequency must be a number between 1 and 100_000.");
|
|
745
801
|
const dbManager = new DuckDatabaseManager(this.#conn);
|
|
746
802
|
const timeStart = Date.now();
|
|
747
803
|
const { columnTypes, ddl } = await createTableFromZod({
|
|
@@ -752,46 +808,56 @@ var SqlDuck = class {
|
|
|
752
808
|
});
|
|
753
809
|
const appender = await this.#conn.createAppender(table.tableName, table.schemaName, table.databaseName);
|
|
754
810
|
const chunkTypes = Array.from(columnTypes.values());
|
|
755
|
-
const
|
|
756
|
-
|
|
757
|
-
|
|
811
|
+
const columnTypeIds = {};
|
|
812
|
+
const columnKeys = [];
|
|
813
|
+
for (const [key, duckType] of columnTypes) {
|
|
814
|
+
columnKeys.push(key);
|
|
815
|
+
columnTypeIds[key] = duckType;
|
|
816
|
+
}
|
|
817
|
+
const numColumns = columnKeys.length;
|
|
818
|
+
const transformers = createDuckColumnConverters(columnTypeIds);
|
|
758
819
|
let totalRows = 0;
|
|
759
|
-
const
|
|
820
|
+
const chunkAppendedCollector = createOnChunkAppendedCollector();
|
|
760
821
|
const columnStream = rowsToColumnsChunks({
|
|
761
822
|
rows: rowStream,
|
|
762
823
|
chunkSize,
|
|
763
824
|
transformers
|
|
764
825
|
});
|
|
765
826
|
let appendedChunkCount = 0;
|
|
827
|
+
const tableFullName = table.getFullName();
|
|
828
|
+
const tableName = table.tableName;
|
|
766
829
|
try {
|
|
830
|
+
const isAsyncCb = onChunkAppended !== void 0 && isOnChunkAppendedAsyncCb(onChunkAppended);
|
|
767
831
|
for await (const dataChunk of columnStream) {
|
|
768
832
|
const chunk = DuckDBDataChunk.create(chunkTypes);
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
833
|
+
const columns = new Array(numColumns);
|
|
834
|
+
for (let i = 0; i < numColumns; i++) columns[i] = dataChunk[columnKeys[i]];
|
|
835
|
+
totalRows += columns[0]?.length ?? 0;
|
|
836
|
+
chunk.setColumns(columns);
|
|
772
837
|
appender.appendDataChunk(chunk);
|
|
773
|
-
appender.flushSync();
|
|
774
838
|
appendedChunkCount += 1;
|
|
775
|
-
if (
|
|
776
|
-
const payload =
|
|
777
|
-
if (
|
|
778
|
-
else
|
|
839
|
+
if (onChunkAppended !== void 0 && (onChunkAppendedFrequency === void 0 || appendedChunkCount % onChunkAppendedFrequency === 0)) {
|
|
840
|
+
const payload = chunkAppendedCollector(totalRows);
|
|
841
|
+
if (isAsyncCb) await onChunkAppended(payload);
|
|
842
|
+
else onChunkAppended(payload);
|
|
779
843
|
}
|
|
844
|
+
if (flushSyncFrequency !== void 0 && appendedChunkCount % flushSyncFrequency === 0) appender.flushSync();
|
|
780
845
|
if (checkpointChunksFrequency !== void 0 && appendedChunkCount % checkpointChunksFrequency === 0 && typeof table.databaseName === "string") try {
|
|
781
846
|
await dbManager.checkpoint(table.databaseName);
|
|
782
847
|
} catch (e) {
|
|
783
|
-
this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending chunk into table '${
|
|
848
|
+
this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending chunk into table '${tableName}' - ${e?.message ?? ""}`, { table: tableFullName });
|
|
784
849
|
}
|
|
785
850
|
}
|
|
851
|
+
appender.flushSync();
|
|
786
852
|
appender.closeSync();
|
|
787
853
|
if (autoCheckpoint && typeof table.databaseName === "string") try {
|
|
788
854
|
await dbManager.checkpoint(table.databaseName);
|
|
789
855
|
} catch (e) {
|
|
790
|
-
this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending data into table '${
|
|
856
|
+
this.#logger.warning(`Failed to checkpoint database '${table.databaseName}' after appending data into table '${tableName}' - ${e?.message ?? ""}`, { table: tableFullName });
|
|
791
857
|
}
|
|
792
858
|
const timeMs = Math.round(Date.now() - timeStart);
|
|
793
859
|
this.#logger.info(`Successfully appended ${totalRows} rows into '${table.getFullName()}' in ${timeMs}ms`, {
|
|
794
|
-
table:
|
|
860
|
+
table: tableFullName,
|
|
795
861
|
timeMs,
|
|
796
862
|
totalRows
|
|
797
863
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flowblade/sqlduck",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"sideEffects": false,
|
|
6
6
|
"exports": {
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
59
|
"@flowblade/core": "^0.2.26",
|
|
60
|
-
"@flowblade/source-duckdb": "^0.20.
|
|
60
|
+
"@flowblade/source-duckdb": "^0.20.2",
|
|
61
61
|
"@flowblade/sql-tag": "^0.3.2",
|
|
62
62
|
"@httpx/assert": "^0.16.9",
|
|
63
63
|
"@httpx/dsn-parser": "^1.9.9",
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
"zod": "^4.3.6"
|
|
70
70
|
},
|
|
71
71
|
"peerDependencies": {
|
|
72
|
-
"@duckdb/node-api": "^1.5.
|
|
72
|
+
"@duckdb/node-api": "^1.5.1-r.2",
|
|
73
73
|
"valibot": "^1.3.1"
|
|
74
74
|
},
|
|
75
75
|
"peerDependenciesMeta": {
|
|
@@ -79,20 +79,20 @@
|
|
|
79
79
|
},
|
|
80
80
|
"devDependencies": {
|
|
81
81
|
"@belgattitude/eslint-config-bases": "8.12.0",
|
|
82
|
-
"@dotenvx/dotenvx": "1.
|
|
83
|
-
"@duckdb/node-api": "1.5.1-r.
|
|
82
|
+
"@dotenvx/dotenvx": "1.61.0",
|
|
83
|
+
"@duckdb/node-api": "1.5.1-r.2",
|
|
84
84
|
"@faker-js/faker": "10.4.0",
|
|
85
85
|
"@flowblade/source-kysely": "^1.3.0",
|
|
86
86
|
"@httpx/assert": "0.16.9",
|
|
87
87
|
"@mitata/counters": "0.0.8",
|
|
88
88
|
"@size-limit/esbuild": "12.0.1",
|
|
89
89
|
"@size-limit/file": "12.0.1",
|
|
90
|
-
"@testcontainers/mssqlserver": "11.
|
|
90
|
+
"@testcontainers/mssqlserver": "11.14.0",
|
|
91
91
|
"@total-typescript/ts-reset": "0.6.1",
|
|
92
92
|
"@types/node": "25.5.2",
|
|
93
93
|
"@typescript-eslint/eslint-plugin": "8.58.1",
|
|
94
94
|
"@typescript-eslint/parser": "8.58.1",
|
|
95
|
-
"@typescript/native-preview": "7.0.0-dev.
|
|
95
|
+
"@typescript/native-preview": "7.0.0-dev.20260410.1",
|
|
96
96
|
"@vitest/coverage-v8": "4.1.4",
|
|
97
97
|
"@vitest/ui": "4.1.4",
|
|
98
98
|
"ansis": "4.2.0",
|
|
@@ -105,10 +105,10 @@
|
|
|
105
105
|
"eslint": "8.57.1",
|
|
106
106
|
"execa": "9.6.1",
|
|
107
107
|
"is-in-ci": "2.0.0",
|
|
108
|
-
"kysely": "0.28.
|
|
108
|
+
"kysely": "0.28.16",
|
|
109
109
|
"mitata": "1.0.34",
|
|
110
110
|
"npm-run-all2": "8.0.4",
|
|
111
|
-
"prettier": "3.8.
|
|
111
|
+
"prettier": "3.8.2",
|
|
112
112
|
"publint": "0.3.18",
|
|
113
113
|
"regexp.escape": "2.0.1",
|
|
114
114
|
"rimraf": "6.1.3",
|
|
@@ -116,7 +116,7 @@
|
|
|
116
116
|
"sql-formatter": "15.7.3",
|
|
117
117
|
"tarn": "3.0.2",
|
|
118
118
|
"tedious": "19.2.1",
|
|
119
|
-
"testcontainers": "11.
|
|
119
|
+
"testcontainers": "11.14.0",
|
|
120
120
|
"tsdown": "0.21.7",
|
|
121
121
|
"tsx": "4.21.0",
|
|
122
122
|
"typedoc": "0.28.18",
|