@tmlmobilidade/writers 20260121.1805.44 → 20260121.2317.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,39 @@
1
1
  import { type ClickHouseClientConfigOptions } from '@clickhouse/client';
2
+ /**
3
+ * Supported ClickHouse data types
4
+ */
5
+ export type ClickHouseType = 'Bool' | 'Boolean' | 'Date32' | 'Date' | 'DateTime' | 'Decimal' | 'Float32' | 'Float64' | 'Int8' | 'Int16' | 'Int32' | 'Int64' | 'Int128' | 'Int256' | 'String' | 'UInt8' | 'UInt16' | 'UInt32' | 'UInt64' | 'UInt128' | 'UInt256' | 'UUID' | `Array(${string})` | `DateTime64(${number})` | `Decimal(${number}, ${number})` | `Enum8(${string})` | `Enum16(${string})` | `FixedString(${number})` | `LowCardinality(${string})` | `Map(${string}, ${string})` | `Nullable(${string})`;
6
+ export interface ClickHouseColumn<T> {
7
+ /** Alias expression (computed on read) */
8
+ alias?: string;
9
+ /** Column codec for compression */
10
+ codec?: string;
11
+ /** Comment for the column */
12
+ comment?: string;
13
+ /** Default value expression */
14
+ default?: string;
15
+ /** Create a secondary index (skipping index) on this column */
16
+ indexed?: boolean;
17
+ /** Granularity for the index. Default: 4 */
18
+ indexGranularity?: number;
19
+ /** Type of skipping index. Default: 'minmax' */
20
+ indexType?: 'bloom_filter' | 'minmax' | 'ngrambf_v1' | 'set' | 'tokenbf_v1';
21
+ /** Use LowCardinality wrapper for low-cardinality strings */
22
+ lowCardinality?: boolean;
23
+ /** Materialized value expression (computed on insert) */
24
+ materialized?: string;
25
+ name: Extract<keyof T, string>;
26
+ /** Whether the column can be null (wraps type in Nullable) */
27
+ nullable?: boolean;
28
+ /** Include this column in the ORDER BY clause (ClickHouse's primary index) */
29
+ primaryKey?: boolean;
30
+ /** Order of this column in the primary key (lower = first). Default: 0 */
31
+ primaryKeyOrder?: number;
32
+ /** TTL expression for this column */
33
+ ttl?: string;
34
+ /** The ClickHouse data type */
35
+ type: ClickHouseType;
36
+ }
2
37
  interface ClickHouseWriterParams<T> {
3
38
  /**
4
39
  * The maximum number of items to hold in memory
@@ -32,35 +67,23 @@ interface ClickHouseWriterParams<T> {
32
67
  */
33
68
  table: string;
34
69
  /**
35
- * Optional SQL schema definition for auto-creating the table.
36
- * Should be the column definitions part of a CREATE TABLE statement.
37
- * Example: "_id String, name String, created_at Int64"
70
+ * Optional ClickHouse column definitions for auto-creating the table.
38
71
  */
39
- tableSchema?: string;
72
+ tableSchema: ClickHouseColumn<T>[];
40
73
  /**
41
74
  * Optional transformation function to convert documents before writing to ClickHouse.
42
75
  * Use this to map MongoDB document fields to ClickHouse column names.
43
76
  */
44
77
  transformFn?: (data: T) => Record<string, unknown>;
45
78
  }
46
- export interface ClickHouseWriterWriteOps<T> {
47
- data: T;
48
- }
49
79
  export declare class ClickHouseWriter<T> {
50
- private BATCH_SIZE;
51
- private BATCH_TIMEOUT_ENABLED;
52
- private BATCH_TIMEOUT_TIMER;
53
- private BATCH_TIMEOUT_VALUE;
54
- private CLIENT;
55
- private DATA_BUCKET_ALWAYS_AVAILABLE;
56
- private DATA_BUCKET_FLUSH_OPS;
57
- private IDLE_TIMEOUT_ENABLED;
58
- private IDLE_TIMEOUT_TIMER;
59
- private IDLE_TIMEOUT_VALUE;
60
- private SESSION_TIMER;
61
- private TABLE;
62
- private TABLE_SCHEMA?;
63
- private TRANSFORM_FN?;
80
+ private params;
81
+ private client;
82
+ private dataBucketAlwaysAvailable;
83
+ private dataBucketFlushOps;
84
+ private batchTimeoutTimer;
85
+ private idleTimeoutTimer;
86
+ private sessionTimer;
64
87
  constructor(params: ClickHouseWriterParams<T>);
65
88
  close(): Promise<void>;
66
89
  /**
@@ -71,8 +94,8 @@ export declare class ClickHouseWriter<T> {
71
94
  * @param engine The ClickHouse table engine to use (default: MergeTree)
72
95
  * @param orderBy The ORDER BY clause for the table (default: tuple())
73
96
  */
74
- ensureTable(schema?: string, engine?: string, orderBy?: string): Promise<void>;
75
- flush(callback?: (data?: ClickHouseWriterWriteOps<T>[]) => Promise<void>): Promise<void>;
97
+ ensureTable(schema?: ClickHouseColumn<T>[], engine?: string, orderBy?: string): Promise<void>;
98
+ flush(callback?: (data?: T[]) => Promise<void>): Promise<void>;
76
99
  /**
77
100
  * Write data to the ClickHouse table.
78
101
  *
@@ -81,6 +104,6 @@ export declare class ClickHouseWriter<T> {
81
104
  * @param writeCallback Callback function to call after the write operation is complete
82
105
  * @param flushCallback Callback function to call after the flush operation is complete
83
106
  */
84
- write(data: T | T[], writeCallback?: () => Promise<void>, flushCallback?: (data?: ClickHouseWriterWriteOps<T>[]) => Promise<void>): Promise<void>;
107
+ write(data: T | T[], writeCallback?: () => Promise<void>, flushCallback?: (data?: T[]) => Promise<void>): Promise<void>;
85
108
  }
86
109
  export {};
@@ -1,3 +1,4 @@
1
+ /* eslint-disable perfectionist/sort-classes */
1
2
  /* * */
2
3
  import { createClient } from '@clickhouse/client';
3
4
  import { Logger } from '@tmlmobilidade/logger';
@@ -5,57 +6,33 @@ import { Timer } from '@tmlmobilidade/timer';
5
6
  /* * */
6
7
  export class ClickHouseWriter {
7
8
  //
8
- BATCH_SIZE = 10000;
9
- BATCH_TIMEOUT_ENABLED = false;
10
- BATCH_TIMEOUT_TIMER = null;
11
- BATCH_TIMEOUT_VALUE = -1;
12
- CLIENT;
13
- DATA_BUCKET_ALWAYS_AVAILABLE = [];
14
- DATA_BUCKET_FLUSH_OPS = [];
15
- IDLE_TIMEOUT_ENABLED = false;
16
- IDLE_TIMEOUT_TIMER = null;
17
- IDLE_TIMEOUT_VALUE = -1;
18
- SESSION_TIMER = new Timer();
19
- TABLE;
20
- TABLE_SCHEMA;
21
- TRANSFORM_FN;
9
+ //
10
+ params;
11
+ client;
12
+ //
13
+ dataBucketAlwaysAvailable = [];
14
+ dataBucketFlushOps = [];
15
+ //
16
+ batchTimeoutTimer = null;
17
+ idleTimeoutTimer = null;
18
+ sessionTimer = new Timer();
22
19
  /* * */
23
20
  constructor(params) {
24
21
  // Ensure that the table name is provided
25
22
  if (!params.table)
26
23
  throw new Error('CLICKHOUSEWRITER: Table name is required');
27
- this.TABLE = params.table;
28
24
  // Ensure that the client config is provided
29
25
  if (!params.clientConfig)
30
26
  throw new Error('CLICKHOUSEWRITER: Client configuration is required');
31
- this.CLIENT = createClient(params.clientConfig);
32
- // Setup the optional transformation function
33
- if (params.transformFn) {
34
- this.TRANSFORM_FN = params.transformFn;
35
- }
36
- // Setup the optional table schema for auto-creation
37
- if (params.tableSchema) {
38
- this.TABLE_SCHEMA = params.tableSchema;
39
- }
40
- // Setup the optional idle timeout functionality
41
- if (params.idle_timeout && params.idle_timeout > 0) {
42
- this.IDLE_TIMEOUT_ENABLED = true;
43
- this.IDLE_TIMEOUT_VALUE = params.idle_timeout;
44
- }
45
- // Override the default batch size
46
- if (params.batch_size && params.batch_size > 0) {
47
- this.BATCH_SIZE = params.batch_size;
48
- }
49
- // Setup the optional batch timeout functionality
50
- if (params.batch_timeout && params.batch_timeout > 0) {
51
- this.BATCH_TIMEOUT_ENABLED = true;
52
- this.BATCH_TIMEOUT_VALUE = params.batch_timeout;
53
- }
27
+ if (!params.clientConfig.database || !params.clientConfig.password || !params.clientConfig.url || !params.clientConfig.username)
28
+ throw new Error('CLICKHOUSEWRITER: Client configuration is invalid. Ensure database, password, url and username are provided.');
29
+ this.params = params;
30
+ this.client = createClient(params.clientConfig);
54
31
  }
55
32
  /* * */
56
33
  async close() {
57
- await this.CLIENT.close();
58
- Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Connection closed.`);
34
+ await this.client.close();
35
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Connection closed.`);
59
36
  }
60
37
  /* * */
61
38
  /**
@@ -67,22 +44,22 @@ export class ClickHouseWriter {
67
44
  * @param orderBy The ORDER BY clause for the table (default: tuple())
68
45
  */
69
46
  async ensureTable(schema, engine = 'MergeTree', orderBy = 'tuple()') {
70
- const tableSchema = schema || this.TABLE_SCHEMA;
47
+ const tableSchema = schema?.map(column => `${column.name} ${column.type}`).join(', ');
71
48
  if (!tableSchema) {
72
- throw new Error(`CLICKHOUSEWRITER [${this.TABLE}]: Cannot ensure table without a schema. Provide tableSchema in constructor or as parameter.`);
49
+ throw new Error(`CLICKHOUSEWRITER [${this.params.table}]: Cannot ensure table without a schema. Provide tableSchema in constructor or as parameter.`);
73
50
  }
74
51
  try {
75
52
  const createTableQuery = `
76
- CREATE TABLE IF NOT EXISTS ${this.TABLE} (
53
+ CREATE TABLE IF NOT EXISTS ${this.params.table} (
77
54
  ${tableSchema}
78
55
  ) ENGINE = ${engine}
79
56
  ORDER BY ${orderBy}
80
57
  `;
81
- await this.CLIENT.command({ query: createTableQuery });
82
- Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Table ensured.`);
58
+ await this.client.command({ query: createTableQuery });
59
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Table ensured.`);
83
60
  }
84
61
  catch (error) {
85
- Logger.error(`CLICKHOUSEWRITER [${this.TABLE}]: Error @ ensureTable(): ${error.message}`);
62
+ Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ ensureTable(): ${error.message}`);
86
63
  throw error;
87
64
  }
88
65
  }
@@ -91,62 +68,62 @@ export class ClickHouseWriter {
91
68
  try {
92
69
  //
93
70
  const flushTimer = new Timer();
94
- const sessionTimerResult = this.SESSION_TIMER.get();
71
+ const sessionTimerResult = this.sessionTimer.get();
95
72
  //
96
73
  // Invalidate all timers since a flush operation is being performed
97
- if (this.IDLE_TIMEOUT_TIMER) {
98
- clearTimeout(this.IDLE_TIMEOUT_TIMER);
99
- this.IDLE_TIMEOUT_TIMER = null;
74
+ if (this.idleTimeoutTimer) {
75
+ clearTimeout(this.idleTimeoutTimer);
76
+ this.idleTimeoutTimer = null;
100
77
  }
101
- if (this.BATCH_TIMEOUT_TIMER) {
102
- clearTimeout(this.BATCH_TIMEOUT_TIMER);
103
- this.BATCH_TIMEOUT_TIMER = null;
78
+ if (this.batchTimeoutTimer) {
79
+ clearTimeout(this.batchTimeoutTimer);
80
+ this.batchTimeoutTimer = null;
104
81
  }
105
82
  //
106
83
  // Skip if there is no data to flush
107
- if (this.DATA_BUCKET_ALWAYS_AVAILABLE.length === 0)
84
+ if (this.dataBucketAlwaysAvailable.length === 0)
108
85
  return;
109
86
  //
110
- // Copy everything in DATA_BUCKET_ALWAYS_AVAILABLE to DATA_BUCKET_FLUSH_OPS
87
+ // Copy everything in dataBucketAlwaysAvailable to dataBucketFlushOps
111
88
  // to prevent any new incoming data to be added to the batch. This is to ensure
112
89
  // that the batch is not modified while it is being processed.
113
- this.DATA_BUCKET_FLUSH_OPS = [...this.DATA_BUCKET_FLUSH_OPS, ...this.DATA_BUCKET_ALWAYS_AVAILABLE];
114
- this.DATA_BUCKET_ALWAYS_AVAILABLE = [];
90
+ this.dataBucketFlushOps = [...this.dataBucketFlushOps, ...this.dataBucketAlwaysAvailable];
91
+ this.dataBucketAlwaysAvailable = [];
115
92
  //
116
93
  // Process the data for ClickHouse insert
117
94
  try {
118
95
  // Transform data if a transformation function is provided
119
- const insertData = this.DATA_BUCKET_FLUSH_OPS.map((item) => {
120
- if (this.TRANSFORM_FN) {
121
- return this.TRANSFORM_FN(item.data);
96
+ const insertData = this.dataBucketFlushOps.map((item) => {
97
+ if (this.params.transformFn) {
98
+ return this.params.transformFn(item);
122
99
  }
123
- return item.data;
100
+ return item;
124
101
  });
125
102
  // Insert data using ClickHouse client
126
- await this.CLIENT.insert({
103
+ await this.client.insert({
127
104
  format: 'JSONEachRow',
128
- table: this.TABLE,
105
+ table: this.params.table,
129
106
  values: insertData,
130
107
  });
131
- Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Flush | Length: ${this.DATA_BUCKET_FLUSH_OPS.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
108
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Flush | Length: ${this.dataBucketFlushOps.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
132
109
  //
133
110
  // Call the flush callback, if provided
134
111
  if (callback) {
135
- await callback(this.DATA_BUCKET_FLUSH_OPS);
112
+ await callback(this.dataBucketFlushOps);
136
113
  }
137
114
  //
138
115
  // Reset the flush bucket
139
- this.DATA_BUCKET_FLUSH_OPS = [];
116
+ this.dataBucketFlushOps = [];
140
117
  //
141
118
  }
142
119
  catch (error) {
143
- Logger.error(`CLICKHOUSEWRITER [${this.TABLE}]: Error @ flush().insert(): ${error.message}`);
120
+ Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ flush().insert(): ${error.message}`);
144
121
  throw error; // Re-throw to allow retry logic at higher level
145
122
  }
146
123
  //
147
124
  }
148
125
  catch (error) {
149
- Logger.error(`CLICKHOUSEWRITER [${this.TABLE}]: Error @ flush(): ${error.message}`);
126
+ Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ flush(): ${error.message}`);
150
127
  throw error; // Re-throw to allow retry logic at higher level
151
128
  }
152
129
  }
@@ -164,29 +141,30 @@ export class ClickHouseWriter {
164
141
  //
165
142
  // Invalidate the previously set idle timeout timer
166
143
  // since we are performing a write operation again.
167
- if (this.IDLE_TIMEOUT_TIMER) {
168
- clearTimeout(this.IDLE_TIMEOUT_TIMER);
169
- this.IDLE_TIMEOUT_TIMER = null;
144
+ if (this.idleTimeoutTimer) {
145
+ clearTimeout(this.idleTimeoutTimer);
146
+ this.idleTimeoutTimer = null;
170
147
  }
171
148
  //
172
149
  // Check if the batch is full
173
- if (this.DATA_BUCKET_ALWAYS_AVAILABLE.length >= this.BATCH_SIZE) {
174
- Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Batch full. Flushing data...`);
150
+ const batchSize = this.params.batch_size ?? 10_000;
151
+ if (this.dataBucketAlwaysAvailable.length >= batchSize) {
152
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Batch full. Flushing data...`);
175
153
  await this.flush(flushCallback);
176
154
  }
177
155
  //
178
156
  // Reset the session timer (for logging purposes)
179
- if (this.DATA_BUCKET_ALWAYS_AVAILABLE.length === 0) {
180
- this.SESSION_TIMER.reset();
157
+ if (this.dataBucketAlwaysAvailable.length === 0) {
158
+ this.sessionTimer.reset();
181
159
  }
182
160
  //
183
161
  // Add the current data to the batch
184
162
  if (Array.isArray(data)) {
185
- const combinedDataWithOptions = data.map(item => ({ data: item }));
186
- this.DATA_BUCKET_ALWAYS_AVAILABLE = [...this.DATA_BUCKET_ALWAYS_AVAILABLE, ...combinedDataWithOptions];
163
+ const combinedDataWithOptions = data.map(item => item);
164
+ this.dataBucketAlwaysAvailable = [...this.dataBucketAlwaysAvailable, ...combinedDataWithOptions];
187
165
  }
188
166
  else {
189
- this.DATA_BUCKET_ALWAYS_AVAILABLE.push({ data: data });
167
+ this.dataBucketAlwaysAvailable.push(data);
190
168
  }
191
169
  //
192
170
  // Call the write callback, if provided
@@ -196,20 +174,20 @@ export class ClickHouseWriter {
196
174
  //
197
175
  // Setup the idle timeout timer to flush the data if too long has passed
198
176
  // since the last write operation. Check if this functionality is enabled.
199
- if (this.IDLE_TIMEOUT_ENABLED && !this.IDLE_TIMEOUT_TIMER) {
200
- this.IDLE_TIMEOUT_TIMER = setTimeout(async () => {
201
- Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Idle timeout reached. Flushing data...`);
177
+ if (this.params.idle_timeout && this.params.idle_timeout > 0 && !this.idleTimeoutTimer) {
178
+ this.idleTimeoutTimer = setTimeout(async () => {
179
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Idle timeout reached. Flushing data...`);
202
180
  await this.flush(flushCallback);
203
- }, this.IDLE_TIMEOUT_VALUE);
181
+ }, this.params.idle_timeout);
204
182
  }
205
183
  //
206
184
  // Setup the batch timeout timer to flush the data, if the timeout value is reached,
207
185
  // even if the batch is not full. Check if this functionality is enabled.
208
- if (this.BATCH_TIMEOUT_ENABLED && !this.BATCH_TIMEOUT_TIMER) {
209
- this.BATCH_TIMEOUT_TIMER = setTimeout(async () => {
210
- Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Batch timeout reached. Flushing data...`);
186
+ if (this.params.batch_timeout && this.params.batch_timeout > 0 && !this.batchTimeoutTimer) {
187
+ this.batchTimeoutTimer = setTimeout(async () => {
188
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Batch timeout reached. Flushing data...`);
211
189
  await this.flush(flushCallback);
212
- }, this.BATCH_TIMEOUT_VALUE);
190
+ }, this.params.batch_timeout);
213
191
  }
214
192
  //
215
193
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tmlmobilidade/writers",
3
- "version": "20260121.1805.44",
3
+ "version": "20260121.2317.50",
4
4
  "author": {
5
5
  "email": "iso@tmlmobilidade.pt",
6
6
  "name": "TML-ISO"