@tmlmobilidade/writers 20260113.1457.7 → 20260121.2317.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,109 @@
1
+ import { type ClickHouseClientConfigOptions } from '@clickhouse/client';
2
+ /**
3
+ * Supported ClickHouse data types
4
+ */
5
+ export type ClickHouseType = 'Bool' | 'Boolean' | 'Date32' | 'Date' | 'DateTime' | 'Decimal' | 'Float32' | 'Float64' | 'Int8' | 'Int16' | 'Int32' | 'Int64' | 'Int128' | 'Int256' | 'String' | 'UInt8' | 'UInt16' | 'UInt32' | 'UInt64' | 'UInt128' | 'UInt256' | 'UUID' | `Array(${string})` | `DateTime64(${number})` | `Decimal(${number}, ${number})` | `Enum8(${string})` | `Enum16(${string})` | `FixedString(${number})` | `LowCardinality(${string})` | `Map(${string}, ${string})` | `Nullable(${string})`;
6
+ export interface ClickHouseColumn<T> {
7
+ /** Alias expression (computed on read) */
8
+ alias?: string;
9
+ /** Column codec for compression */
10
+ codec?: string;
11
+ /** Comment for the column */
12
+ comment?: string;
13
+ /** Default value expression */
14
+ default?: string;
15
+ /** Create a secondary index (skipping index) on this column */
16
+ indexed?: boolean;
17
+ /** Granularity for the index. Default: 4 */
18
+ indexGranularity?: number;
19
+ /** Type of skipping index. Default: 'minmax' */
20
+ indexType?: 'bloom_filter' | 'minmax' | 'ngrambf_v1' | 'set' | 'tokenbf_v1';
21
+ /** Use LowCardinality wrapper for low-cardinality strings */
22
+ lowCardinality?: boolean;
23
+ /** Materialized value expression (computed on insert) */
24
+ materialized?: string;
25
+ name: Extract<keyof T, string>;
26
+ /** Whether the column can be null (wraps type in Nullable) */
27
+ nullable?: boolean;
28
+ /** Include this column in the ORDER BY clause (ClickHouse's primary index) */
29
+ primaryKey?: boolean;
30
+ /** Order of this column in the primary key (lower = first). Default: 0 */
31
+ primaryKeyOrder?: number;
32
+ /** TTL expression for this column */
33
+ ttl?: string;
34
+ /** The ClickHouse data type */
35
+ type: ClickHouseType;
36
+ }
37
+ interface ClickHouseWriterParams<T> {
38
+ /**
39
+ * The maximum number of items to hold in memory
40
+ * before flushing to the database.
41
+ * @default 10000
42
+ */
43
+ batch_size?: number;
44
+ /**
45
+ * How long, in milliseconds, data should be kept in memory before
46
+ * flushing to the database. If this feature is enabled, a flush will
47
+ * be triggered even if the batch is not full. Disabled by default.
48
+ * @default disabled
49
+ */
50
+ batch_timeout?: number;
51
+ /**
52
+ * ClickHouse client configuration options.
53
+ * @required
54
+ */
55
+ clientConfig: ClickHouseClientConfigOptions;
56
+ /**
57
+ * How long to wait, in milliseconds, after the last write operation
58
+ * before flushing the data to the database. This can be used to prevent
59
+ * items staying in memory for too long if the batch size is not reached
60
+ * frequently enough. Disabled by default.
61
+ * @default disabled
62
+ */
63
+ idle_timeout?: number;
64
+ /**
65
+ * The ClickHouse table name to write to.
66
+ * @required
67
+ */
68
+ table: string;
69
+ /**
70
+ * Optional ClickHouse column definitions for auto-creating the table.
71
+ */
72
+ tableSchema: ClickHouseColumn<T>[];
73
+ /**
74
+ * Optional transformation function to convert documents before writing to ClickHouse.
75
+ * Use this to map MongoDB document fields to ClickHouse column names.
76
+ */
77
+ transformFn?: (data: T) => Record<string, unknown>;
78
+ }
79
+ export declare class ClickHouseWriter<T> {
80
+ private params;
81
+ private client;
82
+ private dataBucketAlwaysAvailable;
83
+ private dataBucketFlushOps;
84
+ private batchTimeoutTimer;
85
+ private idleTimeoutTimer;
86
+ private sessionTimer;
87
+ constructor(params: ClickHouseWriterParams<T>);
88
+ close(): Promise<void>;
89
+ /**
90
+ * Ensures the table exists in ClickHouse by creating it if it doesn't exist.
91
+ * Uses the tableSchema provided in the constructor, or an optional schema parameter.
92
+ *
93
+ * @param schema Optional schema to use instead of the constructor-provided tableSchema
94
+ * @param engine The ClickHouse table engine to use (default: MergeTree)
95
+ * @param orderBy The ORDER BY clause for the table (default: tuple())
96
+ */
97
+ ensureTable(schema?: ClickHouseColumn<T>[], engine?: string, orderBy?: string): Promise<void>;
98
+ flush(callback?: (data?: T[]) => Promise<void>): Promise<void>;
99
+ /**
100
+ * Write data to the ClickHouse table.
101
+ *
102
+ * @param data The data to write
103
+ * @param options Options for the write operation (reserved for future use)
104
+ * @param writeCallback Callback function to call after the write operation is complete
105
+ * @param flushCallback Callback function to call after the flush operation is complete
106
+ */
107
+ write(data: T | T[], writeCallback?: () => Promise<void>, flushCallback?: (data?: T[]) => Promise<void>): Promise<void>;
108
+ }
109
+ export {};
@@ -0,0 +1,194 @@
1
+ /* eslint-disable perfectionist/sort-classes */
2
+ /* * */
3
+ import { createClient } from '@clickhouse/client';
4
+ import { Logger } from '@tmlmobilidade/logger';
5
+ import { Timer } from '@tmlmobilidade/timer';
6
+ /* * */
7
+ export class ClickHouseWriter {
8
+ //
9
+ //
10
+ params;
11
+ client;
12
+ //
13
+ dataBucketAlwaysAvailable = [];
14
+ dataBucketFlushOps = [];
15
+ //
16
+ batchTimeoutTimer = null;
17
+ idleTimeoutTimer = null;
18
+ sessionTimer = new Timer();
19
+ /* * */
20
+ constructor(params) {
21
+ // Ensure that the table name is provided
22
+ if (!params.table)
23
+ throw new Error('CLICKHOUSEWRITER: Table name is required');
24
+ // Ensure that the client config is provided
25
+ if (!params.clientConfig)
26
+ throw new Error('CLICKHOUSEWRITER: Client configuration is required');
27
+ if (!params.clientConfig.database || !params.clientConfig.password || !params.clientConfig.url || !params.clientConfig.username)
28
+ throw new Error('CLICKHOUSEWRITER: Client configuration is invalid. Ensure database, password, url and username are provided.');
29
+ this.params = params;
30
+ this.client = createClient(params.clientConfig);
31
+ }
32
+ /* * */
33
+ async close() {
34
+ await this.client.close();
35
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Connection closed.`);
36
+ }
37
+ /* * */
38
+ /**
39
+ * Ensures the table exists in ClickHouse by creating it if it doesn't exist.
40
+ * Uses the tableSchema provided in the constructor, or an optional schema parameter.
41
+ *
42
+ * @param schema Optional schema to use instead of the constructor-provided tableSchema
43
+ * @param engine The ClickHouse table engine to use (default: MergeTree)
44
+ * @param orderBy The ORDER BY clause for the table (default: tuple())
45
+ */
46
+ async ensureTable(schema, engine = 'MergeTree', orderBy = 'tuple()') {
47
+ const tableSchema = schema?.map(column => `${column.name} ${column.type}`).join(', ');
48
+ if (!tableSchema) {
49
+ throw new Error(`CLICKHOUSEWRITER [${this.params.table}]: Cannot ensure table without a schema. Provide tableSchema in constructor or as parameter.`);
50
+ }
51
+ try {
52
+ const createTableQuery = `
53
+ CREATE TABLE IF NOT EXISTS ${this.params.table} (
54
+ ${tableSchema}
55
+ ) ENGINE = ${engine}
56
+ ORDER BY ${orderBy}
57
+ `;
58
+ await this.client.command({ query: createTableQuery });
59
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Table ensured.`);
60
+ }
61
+ catch (error) {
62
+ Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ ensureTable(): ${error.message}`);
63
+ throw error;
64
+ }
65
+ }
66
+ /* * */
67
+ async flush(callback) {
68
+ try {
69
+ //
70
+ const flushTimer = new Timer();
71
+ const sessionTimerResult = this.sessionTimer.get();
72
+ //
73
+ // Invalidate all timers since a flush operation is being performed
74
+ if (this.idleTimeoutTimer) {
75
+ clearTimeout(this.idleTimeoutTimer);
76
+ this.idleTimeoutTimer = null;
77
+ }
78
+ if (this.batchTimeoutTimer) {
79
+ clearTimeout(this.batchTimeoutTimer);
80
+ this.batchTimeoutTimer = null;
81
+ }
82
+ //
83
+ // Skip if there is no data to flush
84
+ if (this.dataBucketAlwaysAvailable.length === 0)
85
+ return;
86
+ //
87
+ // Copy everything in dataBucketAlwaysAvailable to dataBucketFlushOps
88
+ // to prevent any new incoming data to be added to the batch. This is to ensure
89
+ // that the batch is not modified while it is being processed.
90
+ this.dataBucketFlushOps = [...this.dataBucketFlushOps, ...this.dataBucketAlwaysAvailable];
91
+ this.dataBucketAlwaysAvailable = [];
92
+ //
93
+ // Process the data for ClickHouse insert
94
+ try {
95
+ // Transform data if a transformation function is provided
96
+ const insertData = this.dataBucketFlushOps.map((item) => {
97
+ if (this.params.transformFn) {
98
+ return this.params.transformFn(item);
99
+ }
100
+ return item;
101
+ });
102
+ // Insert data using ClickHouse client
103
+ await this.client.insert({
104
+ format: 'JSONEachRow',
105
+ table: this.params.table,
106
+ values: insertData,
107
+ });
108
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Flush | Length: ${this.dataBucketFlushOps.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
109
+ //
110
+ // Call the flush callback, if provided
111
+ if (callback) {
112
+ await callback(this.dataBucketFlushOps);
113
+ }
114
+ //
115
+ // Reset the flush bucket
116
+ this.dataBucketFlushOps = [];
117
+ //
118
+ }
119
+ catch (error) {
120
+ Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ flush().insert(): ${error.message}`);
121
+ throw error; // Re-throw to allow retry logic at higher level
122
+ }
123
+ //
124
+ }
125
+ catch (error) {
126
+ Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ flush(): ${error.message}`);
127
+ throw error; // Re-throw to allow retry logic at higher level
128
+ }
129
+ }
130
+ /* * */
131
+ /**
132
+ * Write data to the ClickHouse table.
133
+ *
134
+ * @param data The data to write
135
+ * @param options Options for the write operation (reserved for future use)
136
+ * @param writeCallback Callback function to call after the write operation is complete
137
+ * @param flushCallback Callback function to call after the flush operation is complete
138
+ */
139
+ async write(data, writeCallback, flushCallback) {
140
+ //
141
+ //
142
+ // Invalidate the previously set idle timeout timer
143
+ // since we are performing a write operation again.
144
+ if (this.idleTimeoutTimer) {
145
+ clearTimeout(this.idleTimeoutTimer);
146
+ this.idleTimeoutTimer = null;
147
+ }
148
+ //
149
+ // Check if the batch is full
150
+ const batchSize = this.params.batch_size ?? 10_000;
151
+ if (this.dataBucketAlwaysAvailable.length >= batchSize) {
152
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Batch full. Flushing data...`);
153
+ await this.flush(flushCallback);
154
+ }
155
+ //
156
+ // Reset the session timer (for logging purposes)
157
+ if (this.dataBucketAlwaysAvailable.length === 0) {
158
+ this.sessionTimer.reset();
159
+ }
160
+ //
161
+ // Add the current data to the batch
162
+ if (Array.isArray(data)) {
163
+ const combinedDataWithOptions = data.map(item => item);
164
+ this.dataBucketAlwaysAvailable = [...this.dataBucketAlwaysAvailable, ...combinedDataWithOptions];
165
+ }
166
+ else {
167
+ this.dataBucketAlwaysAvailable.push(data);
168
+ }
169
+ //
170
+ // Call the write callback, if provided
171
+ if (writeCallback) {
172
+ await writeCallback();
173
+ }
174
+ //
175
+ // Setup the idle timeout timer to flush the data if too long has passed
176
+ // since the last write operation. Check if this functionality is enabled.
177
+ if (this.params.idle_timeout && this.params.idle_timeout > 0 && !this.idleTimeoutTimer) {
178
+ this.idleTimeoutTimer = setTimeout(async () => {
179
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Idle timeout reached. Flushing data...`);
180
+ await this.flush(flushCallback);
181
+ }, this.params.idle_timeout);
182
+ }
183
+ //
184
+ // Setup the batch timeout timer to flush the data, if the timeout value is reached,
185
+ // even if the batch is not full. Check if this functionality is enabled.
186
+ if (this.params.batch_timeout && this.params.batch_timeout > 0 && !this.batchTimeoutTimer) {
187
+ this.batchTimeoutTimer = setTimeout(async () => {
188
+ Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Batch timeout reached. Flushing data...`);
189
+ await this.flush(flushCallback);
190
+ }, this.params.batch_timeout);
191
+ }
192
+ //
193
+ }
194
+ }
package/dist/index.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ export * from './clickhouse.js';
1
2
  export * from './csv.js';
2
3
  export * from './json.js';
3
4
  export * from './mongo.js';
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ export * from './clickhouse.js';
1
2
  export * from './csv.js';
2
3
  export * from './json.js';
3
4
  export * from './mongo.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tmlmobilidade/writers",
3
- "version": "20260113.1457.7",
3
+ "version": "20260121.2317.50",
4
4
  "author": {
5
5
  "email": "iso@tmlmobilidade.pt",
6
6
  "name": "TML-ISO"
@@ -36,6 +36,7 @@
36
36
  "watch": "tsc-watch --onSuccess 'resolve-tspaths'"
37
37
  },
38
38
  "dependencies": {
39
+ "@clickhouse/client": "1.12.1",
39
40
  "@tmlmobilidade/logger": "*",
40
41
  "@tmlmobilidade/timer": "*",
41
42
  "papaparse": "5.5.3"