@flowblade/sqlduck 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,11 +2,57 @@
2
2
 
3
3
  > Currently experimental
4
4
 
5
- ### Quick start
5
+ ## Quick start
6
6
 
7
- ### Environment variables
7
+ ### Create a memory table
8
8
 
9
- ### Schema
9
+ ```typescript
10
+ import { SqlDuck } from "@flowblade/sqlduck";
11
+ import * as z from "zod";
12
+ import { dbDuckDbMemoryConn } from "./db.duckdb-memory.config";
13
+
14
+ const sqlDuck = new SqlDuck({ conn: duckDbConnection });
15
+
16
+ // Schema of the table, not that you can use meta to add information
17
+ const userSchema = z.object({
18
+ id: z.number().int().meta({ primaryKey: true }),
19
+ name: z.string(),
20
+ });
21
+
22
+ // Async generator function that yields rows to insert
23
+ async function* getUserRows(): AsyncIterableIterator<
24
+ z.infer<typeof userSchema>
25
+ > {
26
+ // database or api call
27
+ }
28
+
29
+ const result = sqlDuck.toTable({
30
+ table: new Table({ name: "user", database: "mydb" }), // Table definition
31
+ schema: userSchema, // The schema to use to create the table
32
+ rowStream: getUserRows(), // The async iterable that yields rows
33
+ // 👇Optional:
34
+ chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
35
+ onDataAppended: ({ total }) => {
36
+ console.log(`Appended ${total} rows so far`);
37
+ },
38
+ onDataAppendedBatchSize: 4096, // Call onDataAppended every 4096 rows
39
+ // Optional table creation options
40
+ createOptions: {
41
+ create: "CREATE_OR_REPLACE",
42
+ },
43
+ });
44
+
45
+ console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
46
+ console.log(`Table created with DDL: ${result.createTableDDL}`);
47
+
48
+ // You can now use the table in your queries
49
+ const queryResult = await dbDuckDbMemoryConn.query<{
50
+ id: number;
51
+ name: string;
52
+ }>(`
53
+ SELECT id, name FROM mydb.user WHERE id < 1000
54
+ `);
55
+ ```
10
56
 
11
57
  ### Local scripts
12
58
 
package/dist/index.cjs CHANGED
@@ -50,6 +50,25 @@ async function* rowsToColumnsChunks(rows, chunkSize) {
50
50
  }
51
51
  if (rowsInChunk > 0) yield columns;
52
52
  }
53
+ const isOnDataAppendedAsyncCb = (v) => {
54
+ return v.constructor.name === "AsyncFunction";
55
+ };
56
+ const createOnDataAppendedCollector = () => {
57
+ let lastCallbackTimeStart = Date.now();
58
+ let appendedTotalRows = 0;
59
+ return (currentTotalRows) => {
60
+ const cbTimeMs = Math.round(Date.now() - lastCallbackTimeStart);
61
+ const cbTotalRows = currentTotalRows - appendedTotalRows;
62
+ const payload = {
63
+ rowsCount: cbTotalRows,
64
+ timeMs: cbTimeMs,
65
+ rowsPerSecond: Math.round(cbTotalRows / cbTimeMs * 1e3)
66
+ };
67
+ appendedTotalRows = currentTotalRows;
68
+ lastCallbackTimeStart = Date.now();
69
+ return payload;
70
+ };
71
+ };
53
72
  const createMap = {
54
73
  CREATE: "CREATE TABLE",
55
74
  CREATE_OR_REPLACE: "CREATE OR REPLACE TABLE",
@@ -124,7 +143,8 @@ var SqlDuck = class {
124
143
  this.#logger = params.logger;
125
144
  }
126
145
  toTable = async (params) => {
127
- const { table, schema, chunkSize, rowStream, createOptions } = params;
146
+ const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended } = params;
147
+ if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
128
148
  const timeStart = Date.now();
129
149
  const { columnTypes, ddl } = await createTableFromZod({
130
150
  conn: this.#duck,
@@ -134,9 +154,9 @@ var SqlDuck = class {
134
154
  });
135
155
  const appender = await this.#duck.createAppender(table.tableName, table.schemaName, table.databaseName);
136
156
  const chunkTypes = columnTypes.map((v) => v[1]);
137
- const chunkLimit = chunkSize ?? 2048;
138
157
  let totalRows = 0;
139
- const columnStream = rowsToColumnsChunks(rowStream, chunkLimit);
158
+ const dataAppendedCollector = createOnDataAppendedCollector();
159
+ const columnStream = rowsToColumnsChunks(rowStream, chunkSize);
140
160
  for await (const dataChunk of columnStream) {
141
161
  const chunk = _duckdb_node_api.DuckDBDataChunk.create(chunkTypes);
142
162
  if (this.#logger) this.#logger(`Inserting chunk of ${dataChunk.length} rows`);
@@ -144,6 +164,11 @@ var SqlDuck = class {
144
164
  chunk.setColumns(dataChunk);
145
165
  appender.appendDataChunk(chunk);
146
166
  appender.flushSync();
167
+ if (onDataAppended !== void 0) {
168
+ const payload = dataAppendedCollector(totalRows);
169
+ if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
170
+ else onDataAppended(payload);
171
+ }
147
172
  }
148
173
  return {
149
174
  timeMs: Math.round(Date.now() - timeStart),
package/dist/index.d.cts CHANGED
@@ -2,6 +2,25 @@ import { DuckDBConnection, DuckDBType } from "@duckdb/node-api";
2
2
  import * as z from "zod";
3
3
  import { ZodObject } from "zod";
4
4
 
5
+ //#region src/appender/data-appender-callback.d.ts
6
+ type OnDataAppendedParams = {
7
+ /**
8
+ * Total number of rows appended so far
9
+ */
10
+ rowsCount: number;
11
+ /**
12
+ * Time taken to append the last batch in milliseconds
13
+ */
14
+ timeMs: number;
15
+ /**
16
+ * Estimated rows per seconds
17
+ */
18
+ rowsPerSecond: number;
19
+ };
20
+ type OnDataAppendedSyncCb = (params: OnDataAppendedParams) => void;
21
+ type OnDataAppendedAsyncCb = (params: OnDataAppendedParams) => Promise<void>;
22
+ type OnDataAppendedCb = OnDataAppendedSyncCb | OnDataAppendedAsyncCb;
23
+ //#endregion
5
24
  //#region src/table/table.d.ts
6
25
  /**
7
26
  * Fully qualified table information
@@ -46,15 +65,43 @@ type SqlDuckParams = {
46
65
  conn: DuckDBConnection;
47
66
  logger?: (msg: string) => void;
48
67
  };
68
+ type RowStream<T> = AsyncIterableIterator<T>;
49
69
  type ToTableParams<TSchema extends TableSchemaZod> = {
70
+ /**
71
+ * Used to create and fill the data into the table
72
+ */
50
73
  table: Table;
74
+ /**
75
+ * Schema describing the table structure and rowStream content
76
+ */
51
77
  schema: TSchema;
52
- rowStream: AsyncIterableIterator<z.infer<TSchema>>;
78
+ /**
79
+ * Stream of rows to insert into the table
80
+ */
81
+ rowStream: RowStream<z.infer<TSchema>>;
82
+ /**
83
+ * Chunk size when using appender to insert data.
84
+ * Valid numbers between 1 and 2048.
85
+ * @default 2048
86
+ */
53
87
  chunkSize?: number;
88
+ /**
89
+ * Extra options when creating the table
90
+ */
54
91
  createOptions?: TableCreateOptions;
92
+ /**
93
+ * Callback called each time a datachunk is appended to the table
94
+ */
95
+ onDataAppended?: OnDataAppendedCb;
55
96
  };
56
97
  type ToTableResult = {
98
+ /**
99
+ * Total time taken to insert the data in milliseconds.
100
+ */
57
101
  timeMs: number;
102
+ /**
103
+ * Total number of rows inserted into the table.
104
+ */
58
105
  totalRows: number;
59
106
  /**
60
107
  * The DDL statement used to create the table.
@@ -64,6 +111,45 @@ type ToTableResult = {
64
111
  declare class SqlDuck {
65
112
  #private;
66
113
  constructor(params: SqlDuckParams);
114
+ /**
115
+ * Create a table from a Zod schema and fill it with data from a row stream.
116
+ *
117
+ *
118
+ * @example
119
+ * ```typescript
120
+ * import * as z from 'zod';
121
+ *
122
+ * const sqlDuck = new SqlDuck({ conn: duckDbConnection });
123
+ *
124
+ * // Schema of the table, not that you can use meta to add information
125
+ * const userSchema = z.object({
126
+ * id: z.number().int().meta({ primaryKey: true }),
127
+ * name: z.string(),
128
+ * });
129
+ *
130
+ * // Async generator function that yields rows to insert
131
+ * async function* getUserRows(): AsyncIterableIterator<z.infer<typeof userSchema>> {
132
+ * // database or api call
133
+ * }
134
+ *
135
+ * const result = sqlDuck.toTable({
136
+ * table: new Table({ name: 'user', database: 'mydb' }),
137
+ * schema: userSchema,
138
+ * rowStream: getUserRows(),
139
+ * chunkSize: 2048,
140
+ * onDataAppended: ({ total }) => {
141
+ * console.log(`Appended ${total} rows so far`);
142
+ * },
143
+ * onDataAppendedBatchSize: 4096, // Call onDataAppended every 4096 rows
144
+ * createOptions: {
145
+ * create: 'CREATE_OR_REPLACE',
146
+ * },
147
+ * });
148
+ *
149
+ * console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
150
+ * console.log(`Table created with DDL: ${result.createTableDDL}`);
151
+ * ```
152
+ */
67
153
  toTable: <TSchema extends ZodObject>(params: ToTableParams<TSchema>) => Promise<ToTableResult>;
68
154
  }
69
155
  //#endregion
package/dist/index.d.mts CHANGED
@@ -2,6 +2,25 @@ import { DuckDBConnection, DuckDBType } from "@duckdb/node-api";
2
2
  import * as z from "zod";
3
3
  import { ZodObject } from "zod";
4
4
 
5
+ //#region src/appender/data-appender-callback.d.ts
6
+ type OnDataAppendedParams = {
7
+ /**
8
+ * Total number of rows appended so far
9
+ */
10
+ rowsCount: number;
11
+ /**
12
+ * Time taken to append the last batch in milliseconds
13
+ */
14
+ timeMs: number;
15
+ /**
16
+ * Estimated rows per seconds
17
+ */
18
+ rowsPerSecond: number;
19
+ };
20
+ type OnDataAppendedSyncCb = (params: OnDataAppendedParams) => void;
21
+ type OnDataAppendedAsyncCb = (params: OnDataAppendedParams) => Promise<void>;
22
+ type OnDataAppendedCb = OnDataAppendedSyncCb | OnDataAppendedAsyncCb;
23
+ //#endregion
5
24
  //#region src/table/table.d.ts
6
25
  /**
7
26
  * Fully qualified table information
@@ -46,15 +65,43 @@ type SqlDuckParams = {
46
65
  conn: DuckDBConnection;
47
66
  logger?: (msg: string) => void;
48
67
  };
68
+ type RowStream<T> = AsyncIterableIterator<T>;
49
69
  type ToTableParams<TSchema extends TableSchemaZod> = {
70
+ /**
71
+ * Used to create and fill the data into the table
72
+ */
50
73
  table: Table;
74
+ /**
75
+ * Schema describing the table structure and rowStream content
76
+ */
51
77
  schema: TSchema;
52
- rowStream: AsyncIterableIterator<z.infer<TSchema>>;
78
+ /**
79
+ * Stream of rows to insert into the table
80
+ */
81
+ rowStream: RowStream<z.infer<TSchema>>;
82
+ /**
83
+ * Chunk size when using appender to insert data.
84
+ * Valid numbers between 1 and 2048.
85
+ * @default 2048
86
+ */
53
87
  chunkSize?: number;
88
+ /**
89
+ * Extra options when creating the table
90
+ */
54
91
  createOptions?: TableCreateOptions;
92
+ /**
93
+ * Callback called each time a datachunk is appended to the table
94
+ */
95
+ onDataAppended?: OnDataAppendedCb;
55
96
  };
56
97
  type ToTableResult = {
98
+ /**
99
+ * Total time taken to insert the data in milliseconds.
100
+ */
57
101
  timeMs: number;
102
+ /**
103
+ * Total number of rows inserted into the table.
104
+ */
58
105
  totalRows: number;
59
106
  /**
60
107
  * The DDL statement used to create the table.
@@ -64,6 +111,45 @@ type ToTableResult = {
64
111
  declare class SqlDuck {
65
112
  #private;
66
113
  constructor(params: SqlDuckParams);
114
+ /**
115
+ * Create a table from a Zod schema and fill it with data from a row stream.
116
+ *
117
+ *
118
+ * @example
119
+ * ```typescript
120
+ * import * as z from 'zod';
121
+ *
122
+ * const sqlDuck = new SqlDuck({ conn: duckDbConnection });
123
+ *
124
+ * // Schema of the table, not that you can use meta to add information
125
+ * const userSchema = z.object({
126
+ * id: z.number().int().meta({ primaryKey: true }),
127
+ * name: z.string(),
128
+ * });
129
+ *
130
+ * // Async generator function that yields rows to insert
131
+ * async function* getUserRows(): AsyncIterableIterator<z.infer<typeof userSchema>> {
132
+ * // database or api call
133
+ * }
134
+ *
135
+ * const result = sqlDuck.toTable({
136
+ * table: new Table({ name: 'user', database: 'mydb' }),
137
+ * schema: userSchema,
138
+ * rowStream: getUserRows(),
139
+ * chunkSize: 2048,
140
+ * onDataAppended: ({ total }) => {
141
+ * console.log(`Appended ${total} rows so far`);
142
+ * },
143
+ * onDataAppendedBatchSize: 4096, // Call onDataAppended every 4096 rows
144
+ * createOptions: {
145
+ * create: 'CREATE_OR_REPLACE',
146
+ * },
147
+ * });
148
+ *
149
+ * console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
150
+ * console.log(`Table created with DDL: ${result.createTableDDL}`);
151
+ * ```
152
+ */
67
153
  toTable: <TSchema extends ZodObject>(params: ToTableParams<TSchema>) => Promise<ToTableResult>;
68
154
  }
69
155
  //#endregion
package/dist/index.mjs CHANGED
@@ -29,6 +29,25 @@ async function* rowsToColumnsChunks(rows, chunkSize) {
29
29
  }
30
30
  if (rowsInChunk > 0) yield columns;
31
31
  }
32
+ const isOnDataAppendedAsyncCb = (v) => {
33
+ return v.constructor.name === "AsyncFunction";
34
+ };
35
+ const createOnDataAppendedCollector = () => {
36
+ let lastCallbackTimeStart = Date.now();
37
+ let appendedTotalRows = 0;
38
+ return (currentTotalRows) => {
39
+ const cbTimeMs = Math.round(Date.now() - lastCallbackTimeStart);
40
+ const cbTotalRows = currentTotalRows - appendedTotalRows;
41
+ const payload = {
42
+ rowsCount: cbTotalRows,
43
+ timeMs: cbTimeMs,
44
+ rowsPerSecond: Math.round(cbTotalRows / cbTimeMs * 1e3)
45
+ };
46
+ appendedTotalRows = currentTotalRows;
47
+ lastCallbackTimeStart = Date.now();
48
+ return payload;
49
+ };
50
+ };
32
51
  const createMap = {
33
52
  CREATE: "CREATE TABLE",
34
53
  CREATE_OR_REPLACE: "CREATE OR REPLACE TABLE",
@@ -103,7 +122,8 @@ var SqlDuck = class {
103
122
  this.#logger = params.logger;
104
123
  }
105
124
  toTable = async (params) => {
106
- const { table, schema, chunkSize, rowStream, createOptions } = params;
125
+ const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended } = params;
126
+ if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
107
127
  const timeStart = Date.now();
108
128
  const { columnTypes, ddl } = await createTableFromZod({
109
129
  conn: this.#duck,
@@ -113,9 +133,9 @@ var SqlDuck = class {
113
133
  });
114
134
  const appender = await this.#duck.createAppender(table.tableName, table.schemaName, table.databaseName);
115
135
  const chunkTypes = columnTypes.map((v) => v[1]);
116
- const chunkLimit = chunkSize ?? 2048;
117
136
  let totalRows = 0;
118
- const columnStream = rowsToColumnsChunks(rowStream, chunkLimit);
137
+ const dataAppendedCollector = createOnDataAppendedCollector();
138
+ const columnStream = rowsToColumnsChunks(rowStream, chunkSize);
119
139
  for await (const dataChunk of columnStream) {
120
140
  const chunk = DuckDBDataChunk.create(chunkTypes);
121
141
  if (this.#logger) this.#logger(`Inserting chunk of ${dataChunk.length} rows`);
@@ -123,6 +143,11 @@ var SqlDuck = class {
123
143
  chunk.setColumns(dataChunk);
124
144
  appender.appendDataChunk(chunk);
125
145
  appender.flushSync();
146
+ if (onDataAppended !== void 0) {
147
+ const payload = dataAppendedCollector(totalRows);
148
+ if (isOnDataAppendedAsyncCb(onDataAppended)) await onDataAppended(payload);
149
+ else onDataAppended(payload);
150
+ }
126
151
  }
127
152
  return {
128
153
  timeMs: Math.round(Date.now() - timeStart),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flowblade/sqlduck",
3
- "version": "0.3.0",
3
+ "version": "0.5.0",
4
4
  "type": "module",
5
5
  "sideEffects": false,
6
6
  "exports": {
@@ -31,6 +31,8 @@
31
31
  "clean": "rimraf ./dist ./coverage ./tsconfig.tsbuildinfo",
32
32
  "build": "tsdown",
33
33
  "build-release": "yarn build && rimraf ./_release && yarn pack && mkdir ./_release && tar zxvf ./package.tgz --directory ./_release && rm ./package.tgz",
34
+ "docgen": "run-s docgen-typedoc",
35
+ "docgen-typedoc": "rimraf ./docs/api && typedoc --plugin typedoc-plugin-markdown --out ./docs/api",
34
36
  "test": "vitest run",
35
37
  "test-unit": "vitest run",
36
38
  "test-unit-bun": "bun --bun run vitest run",
@@ -61,7 +63,7 @@
61
63
  "@belgattitude/eslint-config-bases": "8.8.0",
62
64
  "@dotenvx/dotenvx": "1.51.4",
63
65
  "@duckdb/node-api": "1.4.3-r.3",
64
- "@faker-js/faker": "10.1.0",
66
+ "@faker-js/faker": "10.2.0",
65
67
  "@flowblade/source-kysely": "^1.2.0",
66
68
  "@httpx/assert": "0.16.7",
67
69
  "@size-limit/esbuild": "12.0.0",
@@ -94,8 +96,10 @@
94
96
  "tarn": "3.0.2",
95
97
  "tedious": "19.2.0",
96
98
  "testcontainers": "11.11.0",
97
- "tsdown": "0.18.3",
99
+ "tsdown": "0.18.4",
98
100
  "tsx": "4.21.0",
101
+ "typedoc": "0.28.15",
102
+ "typedoc-plugin-markdown": "4.9.0",
99
103
  "typescript": "5.9.3",
100
104
  "vite-tsconfig-paths": "6.0.3",
101
105
  "vitest": "4.0.16"