@flowblade/sqlduck 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,11 +2,57 @@
2
2
 
3
3
  > Currently experimental
4
4
 
5
- ### Quick start
5
+ ## Quick start
6
6
 
7
- ### Environment variables
7
+ ### Create a memory table
8
8
 
9
- ### Schema
9
+ ```typescript
10
+ import { SqlDuck } from "@flowblade/sqlduck";
11
+ import * as z from "zod";
12
+ import { dbDuckDbMemoryConn } from "./db.duckdb-memory.config";
13
+
14
+ const sqlDuck = new SqlDuck({ conn: duckDbConnection });
15
+
16
+ // Schema of the table, not that you can use meta to add information
17
+ const userSchema = z.object({
18
+ id: z.number().int().meta({ primaryKey: true }),
19
+ name: z.string(),
20
+ });
21
+
22
+ // Async generator function that yields rows to insert
23
+ async function* getUserRows(): AsyncIterableIterator<
24
+ z.infer<typeof userSchema>
25
+ > {
26
+ // database or api call
27
+ }
28
+
29
+ const result = sqlDuck.toTable({
30
+ table: new Table({ name: "user", database: "mydb" }), // Table definition
31
+ schema: userSchema, // The schema to use to create the table
32
+ rowStream: getUserRows(), // The async iterable that yields rows
33
+ // 👇Optional:
34
+ chunkSize: 2048, // Number of rows to append when using duckdb appender. Default is 2048
35
+ onDataAppended: ({ total }) => {
36
+ console.log(`Appended ${total} rows so far`);
37
+ },
38
+ onDataAppendedBatchSize: 4096, // Call onDataAppended every 4096 rows
39
+ // Optional table creation options
40
+ createOptions: {
41
+ create: "CREATE_OR_REPLACE",
42
+ },
43
+ });
44
+
45
+ console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
46
+ console.log(`Table created with DDL: ${result.createTableDDL}`);
47
+
48
+ // You can now use the table in your queries
49
+ const queryResult = await dbDuckDbMemoryConn.query<{
50
+ id: number;
51
+ name: string;
52
+ }>(`
53
+ SELECT id, name FROM mydb.user WHERE id < 1000
54
+ `);
55
+ ```
10
56
 
11
57
  ### Local scripts
12
58
 
package/dist/index.cjs CHANGED
@@ -124,7 +124,10 @@ var SqlDuck = class {
124
124
  this.#logger = params.logger;
125
125
  }
126
126
  toTable = async (params) => {
127
- const { table, schema, chunkSize, rowStream, createOptions } = params;
127
+ const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended, onDataAppendedBatchSize } = params;
128
+ if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
129
+ const callbackBatchSize = onDataAppendedBatchSize ?? chunkSize;
130
+ if (!Number.isSafeInteger(callbackBatchSize) || callbackBatchSize < 1) throw new Error("onDataAppendedBatchSize must be a number greater than 0");
128
131
  const timeStart = Date.now();
129
132
  const { columnTypes, ddl } = await createTableFromZod({
130
133
  conn: this.#duck,
@@ -134,9 +137,8 @@ var SqlDuck = class {
134
137
  });
135
138
  const appender = await this.#duck.createAppender(table.tableName, table.schemaName, table.databaseName);
136
139
  const chunkTypes = columnTypes.map((v) => v[1]);
137
- const chunkLimit = chunkSize ?? 2048;
138
140
  let totalRows = 0;
139
- const columnStream = rowsToColumnsChunks(rowStream, chunkLimit);
141
+ const columnStream = rowsToColumnsChunks(rowStream, chunkSize);
140
142
  for await (const dataChunk of columnStream) {
141
143
  const chunk = _duckdb_node_api.DuckDBDataChunk.create(chunkTypes);
142
144
  if (this.#logger) this.#logger(`Inserting chunk of ${dataChunk.length} rows`);
@@ -144,7 +146,9 @@ var SqlDuck = class {
144
146
  chunk.setColumns(dataChunk);
145
147
  appender.appendDataChunk(chunk);
146
148
  appender.flushSync();
149
+ if (onDataAppended !== void 0 && totalRows % callbackBatchSize === 0) onDataAppended({ total: totalRows });
147
150
  }
151
+ if (onDataAppended !== void 0 && totalRows % callbackBatchSize !== 0) onDataAppended({ total: totalRows });
148
152
  return {
149
153
  timeMs: Math.round(Date.now() - timeStart),
150
154
  totalRows,
package/dist/index.d.cts CHANGED
@@ -46,15 +46,55 @@ type SqlDuckParams = {
46
46
  conn: DuckDBConnection;
47
47
  logger?: (msg: string) => void;
48
48
  };
49
+ type RowStream<T> = AsyncIterableIterator<T>;
49
50
  type ToTableParams<TSchema extends TableSchemaZod> = {
51
+ /**
52
+ * Used to create and fill the data into the table
53
+ */
50
54
  table: Table;
55
+ /**
56
+ * Schema describing the table structure and rowStream content
57
+ */
51
58
  schema: TSchema;
52
- rowStream: AsyncIterableIterator<z.infer<TSchema>>;
59
+ /**
60
+ * Stream of rows to insert into the table
61
+ */
62
+ rowStream: RowStream<z.infer<TSchema>>;
63
+ /**
64
+ * Chunk size when using appender to insert data.
65
+ * Valid numbers between 1 and 2048.
66
+ * @default 2048
67
+ */
53
68
  chunkSize?: number;
69
+ /**
70
+ * Extra options when creating the table
71
+ */
54
72
  createOptions?: TableCreateOptions;
73
+ /**
74
+ * Callback called each time data is appended to the table
75
+ * See also `onDataAppendedBatchSize` to limit the number of calls
76
+ * when appending a lot of data
77
+ */
78
+ onDataAppended?: (params: {
79
+ /**
80
+ * Total number of rows appended so far
81
+ */
82
+ total: number;
83
+ }) => void;
84
+ /**
85
+ * Number of rows appended before calling `onDataAppended` callback
86
+ * @default chunkSize
87
+ */
88
+ onDataAppendedBatchSize?: number;
55
89
  };
56
90
  type ToTableResult = {
91
+ /**
92
+ * Total time taken to insert the data in milliseconds.
93
+ */
57
94
  timeMs: number;
95
+ /**
96
+ * Total number of rows inserted into the table.
97
+ */
58
98
  totalRows: number;
59
99
  /**
60
100
  * The DDL statement used to create the table.
@@ -64,6 +104,45 @@ type ToTableResult = {
64
104
  declare class SqlDuck {
65
105
  #private;
66
106
  constructor(params: SqlDuckParams);
107
+ /**
108
+ * Create a table from a Zod schema and fill it with data from a row stream.
109
+ *
110
+ *
111
+ * @example
112
+ * ```typescript
113
+ * import * as z from 'zod';
114
+ *
115
+ * const sqlDuck = new SqlDuck({ conn: duckDbConnection });
116
+ *
117
+ * // Schema of the table, not that you can use meta to add information
118
+ * const userSchema = z.object({
119
+ * id: z.number().int().meta({ primaryKey: true }),
120
+ * name: z.string(),
121
+ * });
122
+ *
123
+ * // Async generator function that yields rows to insert
124
+ * async function* getUserRows(): AsyncIterableIterator<z.infer<typeof userSchema>> {
125
+ * // database or api call
126
+ * }
127
+ *
128
+ * const result = sqlDuck.toTable({
129
+ * table: new Table({ name: 'user', database: 'mydb' }),
130
+ * schema: userSchema,
131
+ * rowStream: getUserRows(),
132
+ * chunkSize: 2048,
133
+ * onDataAppended: ({ total }) => {
134
+ * console.log(`Appended ${total} rows so far`);
135
+ * },
136
+ * onDataAppendedBatchSize: 4096, // Call onDataAppended every 4096 rows
137
+ * createOptions: {
138
+ * create: 'CREATE_OR_REPLACE',
139
+ * },
140
+ * });
141
+ *
142
+ * console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
143
+ * console.log(`Table created with DDL: ${result.createTableDDL}`);
144
+ * ```
145
+ */
67
146
  toTable: <TSchema extends ZodObject>(params: ToTableParams<TSchema>) => Promise<ToTableResult>;
68
147
  }
69
148
  //#endregion
package/dist/index.d.mts CHANGED
@@ -46,15 +46,55 @@ type SqlDuckParams = {
46
46
  conn: DuckDBConnection;
47
47
  logger?: (msg: string) => void;
48
48
  };
49
+ type RowStream<T> = AsyncIterableIterator<T>;
49
50
  type ToTableParams<TSchema extends TableSchemaZod> = {
51
+ /**
52
+ * Used to create and fill the data into the table
53
+ */
50
54
  table: Table;
55
+ /**
56
+ * Schema describing the table structure and rowStream content
57
+ */
51
58
  schema: TSchema;
52
- rowStream: AsyncIterableIterator<z.infer<TSchema>>;
59
+ /**
60
+ * Stream of rows to insert into the table
61
+ */
62
+ rowStream: RowStream<z.infer<TSchema>>;
63
+ /**
64
+ * Chunk size when using appender to insert data.
65
+ * Valid numbers between 1 and 2048.
66
+ * @default 2048
67
+ */
53
68
  chunkSize?: number;
69
+ /**
70
+ * Extra options when creating the table
71
+ */
54
72
  createOptions?: TableCreateOptions;
73
+ /**
74
+ * Callback called each time data is appended to the table
75
+ * See also `onDataAppendedBatchSize` to limit the number of calls
76
+ * when appending a lot of data
77
+ */
78
+ onDataAppended?: (params: {
79
+ /**
80
+ * Total number of rows appended so far
81
+ */
82
+ total: number;
83
+ }) => void;
84
+ /**
85
+ * Number of rows appended before calling `onDataAppended` callback
86
+ * @default chunkSize
87
+ */
88
+ onDataAppendedBatchSize?: number;
55
89
  };
56
90
  type ToTableResult = {
91
+ /**
92
+ * Total time taken to insert the data in milliseconds.
93
+ */
57
94
  timeMs: number;
95
+ /**
96
+ * Total number of rows inserted into the table.
97
+ */
58
98
  totalRows: number;
59
99
  /**
60
100
  * The DDL statement used to create the table.
@@ -64,6 +104,45 @@ type ToTableResult = {
64
104
  declare class SqlDuck {
65
105
  #private;
66
106
  constructor(params: SqlDuckParams);
107
+ /**
108
+ * Create a table from a Zod schema and fill it with data from a row stream.
109
+ *
110
+ *
111
+ * @example
112
+ * ```typescript
113
+ * import * as z from 'zod';
114
+ *
115
+ * const sqlDuck = new SqlDuck({ conn: duckDbConnection });
116
+ *
117
+ * // Schema of the table, not that you can use meta to add information
118
+ * const userSchema = z.object({
119
+ * id: z.number().int().meta({ primaryKey: true }),
120
+ * name: z.string(),
121
+ * });
122
+ *
123
+ * // Async generator function that yields rows to insert
124
+ * async function* getUserRows(): AsyncIterableIterator<z.infer<typeof userSchema>> {
125
+ * // database or api call
126
+ * }
127
+ *
128
+ * const result = sqlDuck.toTable({
129
+ * table: new Table({ name: 'user', database: 'mydb' }),
130
+ * schema: userSchema,
131
+ * rowStream: getUserRows(),
132
+ * chunkSize: 2048,
133
+ * onDataAppended: ({ total }) => {
134
+ * console.log(`Appended ${total} rows so far`);
135
+ * },
136
+ * onDataAppendedBatchSize: 4096, // Call onDataAppended every 4096 rows
137
+ * createOptions: {
138
+ * create: 'CREATE_OR_REPLACE',
139
+ * },
140
+ * });
141
+ *
142
+ * console.log(`Inserted ${result.totalRows} rows in ${result.timeMs}ms`);
143
+ * console.log(`Table created with DDL: ${result.createTableDDL}`);
144
+ * ```
145
+ */
67
146
  toTable: <TSchema extends ZodObject>(params: ToTableParams<TSchema>) => Promise<ToTableResult>;
68
147
  }
69
148
  //#endregion
package/dist/index.mjs CHANGED
@@ -103,7 +103,10 @@ var SqlDuck = class {
103
103
  this.#logger = params.logger;
104
104
  }
105
105
  toTable = async (params) => {
106
- const { table, schema, chunkSize, rowStream, createOptions } = params;
106
+ const { table, schema, chunkSize = 2048, rowStream, createOptions, onDataAppended, onDataAppendedBatchSize } = params;
107
+ if (!Number.isSafeInteger(chunkSize) || chunkSize < 1 || chunkSize > 2048) throw new Error("chunkSize must be a number between 1 and 2048");
108
+ const callbackBatchSize = onDataAppendedBatchSize ?? chunkSize;
109
+ if (!Number.isSafeInteger(callbackBatchSize) || callbackBatchSize < 1) throw new Error("onDataAppendedBatchSize must be a number greater than 0");
107
110
  const timeStart = Date.now();
108
111
  const { columnTypes, ddl } = await createTableFromZod({
109
112
  conn: this.#duck,
@@ -113,9 +116,8 @@ var SqlDuck = class {
113
116
  });
114
117
  const appender = await this.#duck.createAppender(table.tableName, table.schemaName, table.databaseName);
115
118
  const chunkTypes = columnTypes.map((v) => v[1]);
116
- const chunkLimit = chunkSize ?? 2048;
117
119
  let totalRows = 0;
118
- const columnStream = rowsToColumnsChunks(rowStream, chunkLimit);
120
+ const columnStream = rowsToColumnsChunks(rowStream, chunkSize);
119
121
  for await (const dataChunk of columnStream) {
120
122
  const chunk = DuckDBDataChunk.create(chunkTypes);
121
123
  if (this.#logger) this.#logger(`Inserting chunk of ${dataChunk.length} rows`);
@@ -123,7 +125,9 @@ var SqlDuck = class {
123
125
  chunk.setColumns(dataChunk);
124
126
  appender.appendDataChunk(chunk);
125
127
  appender.flushSync();
128
+ if (onDataAppended !== void 0 && totalRows % callbackBatchSize === 0) onDataAppended({ total: totalRows });
126
129
  }
130
+ if (onDataAppended !== void 0 && totalRows % callbackBatchSize !== 0) onDataAppended({ total: totalRows });
127
131
  return {
128
132
  timeMs: Math.round(Date.now() - timeStart),
129
133
  totalRows,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flowblade/sqlduck",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "type": "module",
5
5
  "sideEffects": false,
6
6
  "exports": {
@@ -31,6 +31,8 @@
31
31
  "clean": "rimraf ./dist ./coverage ./tsconfig.tsbuildinfo",
32
32
  "build": "tsdown",
33
33
  "build-release": "yarn build && rimraf ./_release && yarn pack && mkdir ./_release && tar zxvf ./package.tgz --directory ./_release && rm ./package.tgz",
34
+ "docgen": "run-s docgen-typedoc",
35
+ "docgen-typedoc": "rimraf ./docs/api && typedoc --plugin typedoc-plugin-markdown --out ./docs/api",
34
36
  "test": "vitest run",
35
37
  "test-unit": "vitest run",
36
38
  "test-unit-bun": "bun --bun run vitest run",
@@ -61,7 +63,7 @@
61
63
  "@belgattitude/eslint-config-bases": "8.8.0",
62
64
  "@dotenvx/dotenvx": "1.51.4",
63
65
  "@duckdb/node-api": "1.4.3-r.3",
64
- "@faker-js/faker": "10.1.0",
66
+ "@faker-js/faker": "10.2.0",
65
67
  "@flowblade/source-kysely": "^1.2.0",
66
68
  "@httpx/assert": "0.16.7",
67
69
  "@size-limit/esbuild": "12.0.0",
@@ -96,6 +98,8 @@
96
98
  "testcontainers": "11.11.0",
97
99
  "tsdown": "0.18.3",
98
100
  "tsx": "4.21.0",
101
+ "typedoc": "0.28.15",
102
+ "typedoc-plugin-markdown": "4.9.0",
99
103
  "typescript": "5.9.3",
100
104
  "vite-tsconfig-paths": "6.0.3",
101
105
  "vitest": "4.0.16"