@mkven/samples-generation 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +484 -0
  3. package/dist/generator/base-generator.d.ts +39 -0
  4. package/dist/generator/base-generator.d.ts.map +1 -0
  5. package/dist/generator/base-generator.js +72 -0
  6. package/dist/generator/base-generator.js.map +1 -0
  7. package/dist/generator/clickhouse-generator.d.ts +44 -0
  8. package/dist/generator/clickhouse-generator.d.ts.map +1 -0
  9. package/dist/generator/clickhouse-generator.js +452 -0
  10. package/dist/generator/clickhouse-generator.js.map +1 -0
  11. package/dist/generator/escape.d.ts +18 -0
  12. package/dist/generator/escape.d.ts.map +1 -0
  13. package/dist/generator/escape.js +25 -0
  14. package/dist/generator/escape.js.map +1 -0
  15. package/dist/generator/index.d.ts +9 -0
  16. package/dist/generator/index.d.ts.map +1 -0
  17. package/dist/generator/index.js +8 -0
  18. package/dist/generator/index.js.map +1 -0
  19. package/dist/generator/postgres-generator.d.ts +33 -0
  20. package/dist/generator/postgres-generator.d.ts.map +1 -0
  21. package/dist/generator/postgres-generator.js +317 -0
  22. package/dist/generator/postgres-generator.js.map +1 -0
  23. package/dist/generator/sqlite-generator.d.ts +30 -0
  24. package/dist/generator/sqlite-generator.d.ts.map +1 -0
  25. package/dist/generator/sqlite-generator.js +334 -0
  26. package/dist/generator/sqlite-generator.js.map +1 -0
  27. package/dist/generator/trino-generator.d.ts +38 -0
  28. package/dist/generator/trino-generator.d.ts.map +1 -0
  29. package/dist/generator/trino-generator.js +408 -0
  30. package/dist/generator/trino-generator.js.map +1 -0
  31. package/dist/generator/types.d.ts +230 -0
  32. package/dist/generator/types.d.ts.map +1 -0
  33. package/dist/generator/types.js +2 -0
  34. package/dist/generator/types.js.map +1 -0
  35. package/dist/generator/utils.d.ts +14 -0
  36. package/dist/generator/utils.d.ts.map +1 -0
  37. package/dist/generator/utils.js +48 -0
  38. package/dist/generator/utils.js.map +1 -0
  39. package/dist/index.d.ts +2 -0
  40. package/dist/index.d.ts.map +1 -0
  41. package/dist/index.js +2 -0
  42. package/dist/index.js.map +1 -0
  43. package/package.json +75 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Damir Manapov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,484 @@
1
+ # samples-generation
2
+
3
+ Generate sample data for multiple databases with a unified interface.
4
+
5
+ ## Objective
6
+
7
+ We often need to prefill tables during tests, checks, and measurements. These generators support filling tables with random data and applying transformations (template-based construction of values from other columns, lookups in another table, data corruptions, etc). May be used for simple prefill and for controlled corruptions for further testing entity resolution.
8
+
9
+ ## Supported Databases
10
+
11
+ - **PostgreSQL** - via `postgres` package
12
+ - **ClickHouse** - via `@clickhouse/client`
13
+ - **SQLite** - via `better-sqlite3`
14
+ - **Trino** - via `trino-client` (writes to Iceberg tables)
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pnpm install
20
+ ```
21
+
22
+ ## Measurements of simple generations
23
+
24
+ Environment: local databases, simple setup, 1 billion rows, 5 columns (id, 10-char string, 0 - 1000 float, string choice out of 3 variants, datetime)
25
+
26
+ _ClickHouse:_ Generated in 11m 2s (generation: 6m 8s, optimisation: 4m 54s), table size: 23.81 GB
27
+
28
+ _Trino:_ Generated in 5m 35s (generation: 5m 34s, optimisation: 120ms), table size: 17.41 GB
29
+
30
+ _PostgreSQL:_ Generated in 54m 26s (generation: 35m 55s, optimisation: 18m 30s), table size: 63.60 GB
31
+
32
+ Same setup but 10 billion rows:
33
+
34
+ ## Quick Start
35
+
36
+ ### Using the Generator API
37
+
38
+ ```typescript
39
+ import {
40
+ PostgresDataGenerator,
41
+ ClickHouseDataGenerator,
42
+ SQLiteDataGenerator,
43
+ TrinoDataGenerator,
44
+ type TableConfig,
45
+ } from "./src/generator/index.js";
46
+
47
+ const table: TableConfig = {
48
+ name: "users",
49
+ columns: [
50
+ { name: "id", type: "integer", generator: { kind: "sequence", start: 1 } },
51
+ {
52
+ name: "name",
53
+ type: "string",
54
+ generator: { kind: "randomString", length: 10 },
55
+ },
56
+ {
57
+ name: "score",
58
+ type: "float",
59
+ generator: { kind: "randomFloat", min: 0, max: 100 },
60
+ },
61
+ {
62
+ name: "status",
63
+ type: "string",
64
+ generator: { kind: "choice", values: ["active", "inactive"] },
65
+ },
66
+ { name: "created_at", type: "datetime", generator: { kind: "datetime" } },
67
+ ],
68
+ };
69
+
70
+ // All generators have the same interface
71
+ const generator = new PostgresDataGenerator({
72
+ host: "localhost",
73
+ port: 5432,
74
+ database: "appdb",
75
+ username: "postgres",
76
+ password: "postgres",
77
+ });
78
+
79
+ await generator.connect();
80
+ const result = await generator.generate({
81
+ table,
82
+ rowCount: 1000,
83
+ truncateFirst: true,
84
+ resumeSequences: true, // Continue sequence from last max value
85
+ });
86
+ console.log(
87
+ `Inserted ${result.rowsInserted} rows in ${result.generateMs}ms (optimize: ${result.optimizeMs}ms)`
88
+ );
89
+ await generator.disconnect();
90
+ ```
91
+
92
+ ### Database Configurations
93
+
94
+ All databases use a consistent `host`/`port` configuration:
95
+
96
+ ```typescript
97
+ // PostgreSQL
98
+ new PostgresDataGenerator({
99
+ host: "localhost",
100
+ port: 5432,
101
+ database: "appdb",
102
+ username: "postgres",
103
+ password: "postgres",
104
+ });
105
+
106
+ // ClickHouse
107
+ new ClickHouseDataGenerator({
108
+ host: "localhost",
109
+ port: 8123,
110
+ database: "default",
111
+ username: "default",
112
+ password: "clickhouse",
113
+ });
114
+
115
+ // SQLite
116
+ new SQLiteDataGenerator({
117
+ path: "data/samples.db",
118
+ });
119
+
120
+ // Trino/Iceberg
121
+ new TrinoDataGenerator({
122
+ host: "localhost",
123
+ port: 8080,
124
+ catalog: "iceberg",
125
+ schema: "warehouse",
126
+ user: "trino",
127
+ });
128
+ ```
129
+
130
+ ### Column Types
131
+
132
+ | Type | PostgreSQL | ClickHouse | SQLite | Trino |
133
+ | ---------- | ---------------- | ---------- | ------- | --------- |
134
+ | `integer` | INTEGER | Int32 | INTEGER | INTEGER |
135
+ | `bigint` | BIGINT | Int64 | INTEGER | BIGINT |
136
+ | `float` | DOUBLE PRECISION | Float64 | REAL | DOUBLE |
137
+ | `string` | TEXT | String | TEXT | VARCHAR |
138
+ | `boolean` | BOOLEAN | Bool | INTEGER | BOOLEAN |
139
+ | `datetime` | TIMESTAMP | DateTime | TEXT | TIMESTAMP |
140
+ | `date` | DATE | Date | TEXT | DATE |
141
+
142
+ ### Column Options
143
+
144
+ Each column can have additional options:
145
+
146
+ | Option | Type | Default | Description |
147
+ | ----------------- | --------- | ------- | ------------------------------------------------ |
148
+ | `nullable` | `boolean` | `false` | If `true`, omits `NOT NULL` constraint on column |
149
+ | `nullProbability` | `number` | `0` | Probability of NULL values (0-1) |
150
+
151
+ Example with nullable column:
152
+
153
+ ```typescript
154
+ {
155
+ name: "middle_name",
156
+ type: "string",
157
+ generator: { kind: "randomString", length: 10 },
158
+ nullable: true,
159
+ nullProbability: 0.3 // 30% of rows will have NULL
160
+ }
161
+ ```
162
+
163
+ ### Value Generators
164
+
165
+ | Generator | Kind | Options |
166
+ | ---------------- | ---------------- | -------------------------------------- |
167
+ | `sequence` | Auto-increment | `start`, `step` |
168
+ | `randomInt` | Random integer | `min`, `max` |
169
+ | `randomFloat` | Random float | `min`, `max`, `precision` (default: 2) |
170
+ | `randomString` | Random string | `length` |
171
+ | `choice` | Pick from list | `values` |
172
+ | `choiceByLookup` | Optimized choice | `values` (large arrays) |
173
+ | `constant` | Fixed value | `value` |
174
+ | `datetime` | Random datetime | `from`, `to` |
175
+ | `uuid` | UUID v4 | - |
176
+
177
+ #### `choiceByLookup` Generator
178
+
179
+ Use `choiceByLookup` instead of `choice` when selecting from thousands of values. It uses CTEs with arrays for O(1) random selection, making it efficient for billions of rows:
180
+
181
+ ```typescript
182
+ {
183
+ name: "last_name",
184
+ type: "string",
185
+ generator: {
186
+ kind: "choiceByLookup",
187
+ values: ["Smith", "Johnson", "Williams", ...] // thousands of values
188
+ }
189
+ }
190
+ ```
191
+
192
+ - PostgreSQL: CTE with `ARRAY[]` and `array_length()` indexing
193
+ - ClickHouse: `WITH` clause with array variable
194
+ - SQLite: CTE with JSON array and `json_extract()`
195
+ - Trino: CTE with `ARRAY[]` and `element_at()`
196
+
197
+ ### Generate Options
198
+
199
+ ```typescript
200
+ interface GenerateOptions {
201
+ table: TableConfig;
202
+ rowCount: number;
203
+ createTable?: boolean; // Default: true
204
+ dropFirst?: boolean; // Default: false - drop table before generating
205
+ truncateFirst?: boolean; // Default: false
206
+ resumeSequences?: boolean; // Default: true - continue from max value
207
+ optimize?: boolean; // Default: true - run VACUUM/OPTIMIZE after insert
208
+ }
209
+ ```
210
+
211
+ ### Transformations
212
+
213
+ Apply transformations to existing tables. Useful for creating derived columns (like email from first/last name) or introducing realistic data quality issues.
214
+
215
+ ```typescript
216
+ // Generate data first
217
+ await generator.generate({ table: usersTable, rowCount: 10000 });
218
+
219
+ // Then apply transformations
220
+ await generator.transform("users", [
221
+ {
222
+ description: "Generate email addresses",
223
+ transformations: [
224
+ {
225
+ kind: "template",
226
+ column: "email",
227
+ template: "{first_name}.{last_name}@example.com",
228
+ },
229
+ ],
230
+ },
231
+ ]);
232
+ ```
233
+
234
+ ```typescript
235
+ interface TransformResult {
236
+ durationMs: number;
237
+ batchesApplied: number;
238
+ }
239
+ ```
240
+
241
+ #### Transformation Types
242
+
243
+ **Template Transformation** - Build column values from other columns:
244
+
245
+ ```typescript
246
+ {
247
+ kind: "template",
248
+ column: "email",
249
+ template: "{first_name}.{last_name}@example.com",
250
+ lowercase: true // Optional: convert result to lowercase
251
+ }
252
+ ```
253
+
254
+ **Mutate Transformation** - Introduce random character mutations:
255
+
256
+ ```typescript
257
+ {
258
+ kind: "mutate",
259
+ column: "name",
260
+ probability: 0.1, // 10% of rows get mutated
261
+ operations: ["replace", "delete", "insert"] // Random operation selected per row
262
+ }
263
+ ```
264
+
265
+ **Lookup Transformation** - Assign values from another table via join:
266
+
267
+ ```typescript
268
+ {
269
+ kind: "lookup",
270
+ column: "category_name", // Column to update
271
+ fromTable: "categories", // Source table
272
+ fromColumn: "name", // Column to copy value from
273
+ joinOn: {
274
+ targetColumn: "category_id", // Column in target table
275
+ lookupColumn: "id" // Column in source table to match
276
+ }
277
+ }
278
+ ```
279
+
280
+ > **Note:** For ClickHouse, lookup transformation uses a table swap approach (CREATE → INSERT SELECT with JOIN → RENAME) since ClickHouse doesn't support correlated subqueries in `ALTER TABLE UPDATE`. This means lookups execute **before** other transformations in the same batch. If order matters, place lookups in a separate batch.
281
+
282
+ **Swap Transformation** - Swap values between two columns with probability:
283
+
284
+ ```typescript
285
+ {
286
+ kind: "swap",
287
+ column1: "first_name",
288
+ column2: "last_name",
289
+ probability: 0.1 // 10% of rows get swapped
290
+ }
291
+ ```
292
+
293
+ > **Note:** Both columns use the same random decision per row, ensuring atomic swaps (if column1 gets column2's value, column2 always gets column1's value). For ClickHouse, swap also uses the table swap approach (like lookup) since ClickHouse evaluates each `rand()` call separately. Multiple swaps in the same batch are combined into a single table swap operation for efficiency.
294
+
295
+ > **Design Note:** PostgreSQL, SQLite, and Trino execute each swap as a separate `UPDATE ... WHERE random() < probability` statement. This is intentionally not batched because UPDATE is a lightweight operation on these databases. ClickHouse batches swaps because each swap would otherwise require a full table copy (CREATE → INSERT → RENAME → DROP), making the overhead significant.
296
+
297
+ #### Batching Transformations
298
+
299
+ Transformations are organized in batches for efficiency:
300
+
301
+ - Each batch becomes a separate UPDATE statement (executed sequentially)
302
+ - Transformations within a batch are combined into a single UPDATE
303
+ - Batches support optional descriptions for logging and debugging
304
+
305
+ ```typescript
306
+ await generator.transform("users", [
307
+ {
308
+ description: "Generate email addresses",
309
+ transformations: [
310
+ {
311
+ kind: "template",
312
+ column: "email",
313
+ template: "{first_name}.{last_name}@example.com",
314
+ },
315
+ ],
316
+ },
317
+ {
318
+ description: "Introduce data quality issues",
319
+ transformations: [
320
+ {
321
+ kind: "mutate",
322
+ column: "email",
323
+ probability: 0.1,
324
+ operations: ["replace"],
325
+ },
326
+ ],
327
+ },
328
+ ]);
329
+ ```
330
+
331
+ With descriptions, you'll see helpful logs:
332
+
333
+ ```
334
+ [postgres] Applying transformations: Generate email addresses (1 transformation(s))
335
+ [postgres] Applying transformations: Introduce data quality issues (1 transformation(s))
336
+ ```
337
+
338
+ ### Escape Utilities
339
+
340
+ For custom queries, use the exported escape functions:
341
+
342
+ ```typescript
343
+ import {
344
+ escapePostgresIdentifier,
345
+ escapeClickHouseIdentifier,
346
+ escapeTrinoIdentifier,
347
+ } from "./src/generator/index.js";
348
+
349
+ escapePostgresIdentifier("my-table"); // "my-table"
350
+ escapePostgresIdentifier('table"name'); // "table""name"
351
+
352
+ escapeClickHouseIdentifier("my-table"); // `my-table`
353
+ escapeClickHouseIdentifier("table`name"); // `table``name`
354
+
355
+ escapeTrinoIdentifier("samples"); // "samples"
356
+ escapeTrinoIdentifier("samples$files"); // "samples$files" (for metadata tables)
357
+ ```
358
+
359
+ ### Table Size
360
+
361
+ Get the size of a table (including indexes):
362
+
363
+ ```typescript
364
+ // Get size in bytes
365
+ const bytes = await generator.getTableSize("users");
366
+ // 1234567
367
+
368
+ // Get human-readable size
369
+ const size = await generator.getTableSizeForHuman("users");
370
+ // "1.18 MB"
371
+ ```
372
+
373
+ You can also use the `formatBytes` utility directly:
374
+
375
+ ```typescript
376
+ import { formatBytes } from "./src/generator/index.js";
377
+
378
+ formatBytes(1024); // "1.00 KB"
379
+ formatBytes(1048576); // "1.00 MB"
380
+ ```
381
+
382
+ ### Optimization
383
+
384
+ By default, `generate()` runs database-specific optimization after inserting rows:
385
+
386
+ | Database | Optimization |
387
+ | ---------- | --------------------------------------------------------------------------- |
388
+ | PostgreSQL | `VACUUM ANALYZE` - reclaims storage and updates statistics |
389
+ | ClickHouse | `OPTIMIZE TABLE FINAL` - merges all parts for MergeTree engines |
390
+ | SQLite | `VACUUM` + `ANALYZE` - rebuilds file and gathers statistics |
391
+ | Trino | `rewrite_data_files` + `expire_snapshots` + `remove_orphan_files` - Iceberg |
392
+
393
+ Disable for quick tests:
394
+
395
+ ```typescript
396
+ await generator.generate({
397
+ table,
398
+ rowCount: 1000,
399
+ optimize: false, // Skip VACUUM/OPTIMIZE
400
+ });
401
+ ```
402
+
403
+ Or call manually:
404
+
405
+ ```typescript
406
+ await generator.optimize("users");
407
+ ```
408
+
409
+ ## Scripts
410
+
411
+ ### Generate Data
412
+
413
+ ```bash
414
+ # Generate 1000 rows in all databases (requires docker-compose up)
415
+ npx tsx scripts/generate-all.ts
416
+
417
+ # Specify row count
418
+ npx tsx scripts/generate-all.ts --rows 1000
419
+ npx tsx scripts/generate-all.ts -r 1_000_000
420
+
421
+ # Generate for specific databases only
422
+ npx tsx scripts/generate-all.ts --sqlite
423
+ npx tsx scripts/generate-all.ts --postgres
424
+ npx tsx scripts/generate-all.ts --clickhouse
425
+ npx tsx scripts/generate-all.ts --trino
426
+
427
+ # Combine options
428
+ npx tsx scripts/generate-all.ts -r 10000 --postgres --clickhouse
429
+
430
+ # Show help
431
+ npx tsx scripts/generate-all.ts --help
432
+ ```
433
+
434
+ ## Docker Compose
435
+
436
+ Start all databases:
437
+
438
+ ```bash
439
+ pnpm compose:up
440
+ ```
441
+
442
+ Services available:
443
+
444
+ | Service | Port(s) | Credentials |
445
+ | ---------- | -------------------------- | --------------------- |
446
+ | PostgreSQL | 5432 | postgres:postgres |
447
+ | ClickHouse | 8123 (HTTP), 9009 (native) | default:clickhouse |
448
+ | Trino | 8080 | trino (no password) |
449
+ | MinIO | 9000 (S3), 9001 (console) | minioadmin:minioadmin |
450
+ | Nessie | 19120 | - |
451
+
452
+ ## Testing
453
+
454
+ ```bash
455
+ # Run tests (SQLite only by default)
456
+ pnpm test
457
+
458
+ # Run tests with specific databases
459
+ TEST_POSTGRES=1 pnpm test
460
+ TEST_CLICKHOUSE=1 pnpm test
461
+ TEST_TRINO=1 pnpm test
462
+
463
+ # Run all database tests
464
+ TEST_POSTGRES=1 TEST_CLICKHOUSE=1 TEST_TRINO=1 pnpm test
465
+
466
+ # Or use the shortcut script
467
+ ./test-all-dbs.sh
468
+ ```
469
+
470
+ ## Quality Checks
471
+
472
+ ```bash
473
+ # Run formatting, linting, type checking, and tests
474
+ ./check.sh
475
+
476
+ # Check for security vulnerabilities and outdated dependencies
477
+ ./health.sh
478
+
479
+ # Check for dependency updates (requires npx renovate)
480
+ ./renovate-check.sh
481
+
482
+ # Run all checks
483
+ ./all-checks.sh
484
+ ```
@@ -0,0 +1,39 @@
1
+ import type { DataGenerator, TableConfig, GenerateOptions, GenerateResult, GeneratedRow, Transformation, TransformationBatch, TransformResult } from "./types.js";
2
+ export declare abstract class BaseDataGenerator implements DataGenerator {
3
+ abstract readonly name: string;
4
+ abstract connect(): Promise<void>;
5
+ abstract disconnect(): Promise<void>;
6
+ abstract createTable(table: TableConfig): Promise<void>;
7
+ abstract truncateTable(tableName: string): Promise<void>;
8
+ abstract dropTable(tableName: string): Promise<void>;
9
+ abstract queryRows(tableName: string, limit?: number): Promise<GeneratedRow[]>;
10
+ abstract countRows(tableName: string): Promise<number>;
11
+ /**
12
+ * Get the maximum value of a column (for resuming sequences)
13
+ */
14
+ abstract getMaxValue(tableName: string, columnName: string): Promise<number | null>;
15
+ /**
16
+ * Get the total size of a table in bytes (including indexes if applicable)
17
+ */
18
+ abstract getTableSize(tableName: string): Promise<number | null>;
19
+ /**
20
+ * Run database-specific optimization after large inserts
21
+ */
22
+ abstract optimize(tableName: string): Promise<void>;
23
+ /**
24
+ * Apply a batch of transformations via UPDATE statement
25
+ */
26
+ protected abstract applyTransformations(tableName: string, transformations: Transformation[]): Promise<void>;
27
+ /**
28
+ * Generate rows using database-native SQL functions.
29
+ * This is much faster than JavaScript-based generation.
30
+ */
31
+ protected abstract generateNative(table: TableConfig, rowCount: number, startSequence: number): Promise<void>;
32
+ /**
33
+ * Get the total size of a table as a human-readable string
34
+ */
35
+ getTableSizeForHuman(tableName: string): Promise<string | null>;
36
+ generate(options: GenerateOptions): Promise<GenerateResult>;
37
+ transform(tableName: string, batches: TransformationBatch[]): Promise<TransformResult>;
38
+ }
39
+ //# sourceMappingURL=base-generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-generator.d.ts","sourceRoot":"","sources":["../../src/generator/base-generator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,aAAa,EACb,WAAW,EACX,eAAe,EACf,cAAc,EACd,YAAY,EACZ,cAAc,EACd,mBAAmB,EACnB,eAAe,EAChB,MAAM,YAAY,CAAC;AAGpB,8BAAsB,iBAAkB,YAAW,aAAa;IAC9D,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAE/B,QAAQ,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IACjC,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IACpC,QAAQ,CAAC,WAAW,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IACvD,QAAQ,CAAC,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IACxD,QAAQ,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IACpD,QAAQ,CAAC,SAAS,CAChB,SAAS,EAAE,MAAM,EACjB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,EAAE,CAAC;IAC1B,QAAQ,CAAC,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAEtD;;OAEG;IACH,QAAQ,CAAC,WAAW,CAClB,SAAS,EAAE,MAAM,EACjB,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAEzB;;OAEG;IACH,QAAQ,CAAC,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAEhE;;OAEG;IACH,QAAQ,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAEnD;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,oBAAoB,CACrC,SAAS,EAAE,MAAM,EACjB,eAAe,EAAE,cAAc,EAAE,GAChC,OAAO,CAAC,IAAI,CAAC;IAEhB;;;OAGG;IACH,SAAS,CAAC,QAAQ,CAAC,cAAc,CAC/B,KAAK,EAAE,WAAW,EAClB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC,IAAI,CAAC;IAEhB;;OAEG;IACG,oBAAoB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAK/D,QAAQ,CAAC,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,cAAc,CAAC;IA+D3D,SAAS,CACb,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,mBAAmB,EAAE,GAC7B,OAAO,CAAC,eAAe,CAAC;CAqB5B"}
@@ -0,0 +1,72 @@
1
+ import { formatBytes } from "./utils.js";
2
+ export class BaseDataGenerator {
3
+ /**
4
+ * Get the total size of a table as a human-readable string
5
+ */
6
+ async getTableSizeForHuman(tableName) {
7
+ const size = await this.getTableSize(tableName);
8
+ return size === null ? null : formatBytes(size);
9
+ }
10
+ async generate(options) {
11
+ const { table, rowCount, createTable = true, dropFirst = false, truncateFirst = false, resumeSequences = true, optimize = true, } = options;
12
+ const startTime = Date.now();
13
+ const tableLabel = table.description
14
+ ? `${table.name} (${table.description})`
15
+ : table.name;
16
+ console.log(`[${this.name}] Generating: ${tableLabel} - ${rowCount.toLocaleString()} rows`);
17
+ if (dropFirst) {
18
+ await this.dropTable(table.name);
19
+ }
20
+ if (createTable) {
21
+ await this.createTable(table);
22
+ }
23
+ if (truncateFirst && !dropFirst) {
24
+ await this.truncateTable(table.name);
25
+ }
26
+ let startSequence = 1;
27
+ if (resumeSequences) {
28
+ for (const column of table.columns) {
29
+ if (column.generator.kind === "sequence") {
30
+ const maxVal = await this.getMaxValue(table.name, column.name);
31
+ if (maxVal !== null) {
32
+ const step = column.generator.step ?? 1;
33
+ startSequence = maxVal + step;
34
+ }
35
+ break; // Only check first sequence column
36
+ }
37
+ }
38
+ }
39
+ await this.generateNative(table, rowCount, startSequence);
40
+ const generateMs = Date.now() - startTime;
41
+ let optimizeMs = 0;
42
+ if (optimize) {
43
+ const optimizeStart = Date.now();
44
+ await this.optimize(table.name);
45
+ optimizeMs = Date.now() - optimizeStart;
46
+ }
47
+ return {
48
+ rowsInserted: rowCount,
49
+ durationMs: Date.now() - startTime,
50
+ generateMs,
51
+ optimizeMs,
52
+ };
53
+ }
54
+ async transform(tableName, batches) {
55
+ const startTime = Date.now();
56
+ let batchesApplied = 0;
57
+ for (let i = 0; i < batches.length; i++) {
58
+ const batch = batches[i]; // eslint-disable-line @typescript-eslint/no-non-null-assertion
59
+ if (batch.transformations.length > 0) {
60
+ const batchLabel = batch.description ?? `batch ${String(i + 1)}`;
61
+ console.log(`[${this.name}] Applying transformations: ${batchLabel} (${String(batch.transformations.length)} transformation(s))`);
62
+ await this.applyTransformations(tableName, batch.transformations);
63
+ batchesApplied++;
64
+ }
65
+ }
66
+ return {
67
+ durationMs: Date.now() - startTime,
68
+ batchesApplied,
69
+ };
70
+ }
71
+ }
72
+ //# sourceMappingURL=base-generator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-generator.js","sourceRoot":"","sources":["../../src/generator/base-generator.ts"],"names":[],"mappings":"AAUA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAEzC,MAAM,OAAgB,iBAAiB;IAkDrC;;OAEG;IACH,KAAK,CAAC,oBAAoB,CAAC,SAAiB;QAC1C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QAChD,OAAO,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAClD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,MAAM,EACJ,KAAK,EACL,QAAQ,EACR,WAAW,GAAG,IAAI,EAClB,SAAS,GAAG,KAAK,EACjB,aAAa,GAAG,KAAK,EACrB,eAAe,GAAG,IAAI,EACtB,QAAQ,GAAG,IAAI,GAChB,GAAG,OAAO,CAAC;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW;YAClC,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,WAAW,GAAG;YACxC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;QACf,OAAO,CAAC,GAAG,CACT,IAAI,IAAI,CAAC,IAAI,iBAAiB,UAAU,MAAM,QAAQ,CAAC,cAAc,EAAE,OAAO,CAC/E,CAAC;QAEF,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QAChC,CAAC;QAED,IAAI,aAAa,IAAI,CAAC,SAAS,EAAE,CAAC;YAChC,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,eAAe,EAAE,CAAC;YACpB,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;gBACnC,IAAI,MAAM,CAAC,SAAS,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;oBACzC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC/D,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;wBACpB,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,CAAC;wBACxC,aAAa,GAAG,MAAM,GAAG,IAAI,CAAC;oBAChC,CAAC;oBACD,MAAM,CAAC,mCAAmC;gBAC5C,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,QAAQ,EAAE,aAAa,CAAC,CAAC;QAC1D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAE1C,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACjC,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAChC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,aAAa,CAAC;QAC1C,CAAC;QAED,OAAO;YACL,YAAY,EAAE,QAAQ;YACtB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,UAAU;YACV,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,SAAS,CACb,SAAiB,EACjB,OAA8B;QAE9B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,IAAI,cAAc,GAAG,CAAC,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC,+DAA+D;YAC1F,IAAI,KAAK,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrC,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,IAAI,SAAS,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;gBACjE,OAAO,CAAC,GAAG,CACT,IAAI,IAAI,CAAC,IAAI,+BAA+B,UAAU,KAAK,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,qBAAqB,CACrH,CAAC;gBACF,MAAM,IAAI,CAAC,oBAAoB,CAAC,SAAS,EAAE,KAAK,CAAC,eAAe,CAAC,CAAC;gBAClE,cAAc,EAAE,CAAC;YACnB,CAAC;QACH,CAAC;QAED,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,cAAc;SACf,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,44 @@
1
+ import type { TableConfig, GeneratedRow, GeneratorConfig, Transformation } from "./types.js";
2
+ import { BaseDataGenerator } from "./base-generator.js";
3
+ export interface ClickHouseConfig {
4
+ host: string;
5
+ port: number;
6
+ username: string;
7
+ password: string;
8
+ database: string;
9
+ }
10
+ /**
11
+ * Convert a generator config to a ClickHouse SQL expression
12
+ */
13
+ export declare function generatorToClickHouseExpr(gen: GeneratorConfig, seqExpr: string): string;
14
+ export declare class ClickHouseDataGenerator extends BaseDataGenerator {
15
+ private config;
16
+ readonly name = "clickhouse";
17
+ private client;
18
+ constructor(config: ClickHouseConfig);
19
+ connect(): Promise<void>;
20
+ disconnect(): Promise<void>;
21
+ private getClient;
22
+ createTable(table: TableConfig): Promise<void>;
23
+ truncateTable(tableName: string): Promise<void>;
24
+ dropTable(tableName: string): Promise<void>;
25
+ protected generateNative(table: TableConfig, rowCount: number, startSequence: number): Promise<void>;
26
+ queryRows(tableName: string, limit?: number): Promise<GeneratedRow[]>;
27
+ countRows(tableName: string): Promise<number>;
28
+ getMaxValue(tableName: string, columnName: string): Promise<number | null>;
29
+ getTableSize(tableName: string): Promise<number | null>;
30
+ optimize(tableName: string): Promise<void>;
31
+ protected applyTransformations(tableName: string, transformations: Transformation[]): Promise<void>;
32
+ /**
33
+ * Apply multiple swap transformations in a single table swap operation.
34
+ * Each swap gets its own random value to ensure independent swap decisions.
35
+ */
36
+ private applySwapTransformations;
37
+ /**
38
+ * Apply a lookup transformation using ClickHouse's table swap approach.
39
+ * Since ClickHouse doesn't support correlated subqueries in ALTER TABLE UPDATE,
40
+ * we use: CREATE TABLE new -> INSERT SELECT with JOIN -> RENAME swap
41
+ */
42
+ private applyLookupTransformation;
43
+ }
44
+ //# sourceMappingURL=clickhouse-generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"clickhouse-generator.d.ts","sourceRoot":"","sources":["../../src/generator/clickhouse-generator.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,WAAW,EACX,YAAY,EAEZ,eAAe,EACf,cAAc,EAIf,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAIxD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAkCD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,GAAG,EAAE,eAAe,EACpB,OAAO,EAAE,MAAM,GACd,MAAM,CA4CR;AAED,qBAAa,uBAAwB,SAAQ,iBAAiB;IAIhD,OAAO,CAAC,MAAM;IAH1B,QAAQ,CAAC,IAAI,gBAAgB;IAC7B,OAAO,CAAC,MAAM,CAAiC;gBAE3B,MAAM,EAAE,gBAAgB;IAI5C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAYlB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAOjC,OAAO,CAAC,SAAS;IAOX,WAAW,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAsB9C,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAO/C,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;cAOjC,cAAc,CAC5B,KAAK,EAAE,WAAW,EAClB,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,MAAM,GACpB,OAAO,CAAC,IAAI,CAAC;IA4CV,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,SAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAUlE,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAc7C,WAAW,CACf,SAAS,EAAE,MAAM,EACjB,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAcnB,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAevD,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;cAWhC,oBAAoB,CAClC,SAAS,EAAE,MAAM,EACjB,eAAe,EAAE,cAAc,EAAE,GAChC,OAAO,CAAC,IAAI,CAAC;IAsHhB;;;OAGG;YACW,wBAAwB;IA0FtC;;;;OAIG;YACW,yBAAyB;CAoDxC"}