@tmlmobilidade/writers 20260113.1457.7 → 20260121.1805.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clickhouse.d.ts +86 -0
- package/dist/clickhouse.js +216 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/package.json +2 -1
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { type ClickHouseClientConfigOptions } from '@clickhouse/client';
|
|
2
|
+
interface ClickHouseWriterParams<T> {
|
|
3
|
+
/**
|
|
4
|
+
* The maximum number of items to hold in memory
|
|
5
|
+
* before flushing to the database.
|
|
6
|
+
* @default 10000
|
|
7
|
+
*/
|
|
8
|
+
batch_size?: number;
|
|
9
|
+
/**
|
|
10
|
+
* How long, in milliseconds, data should be kept in memory before
|
|
11
|
+
* flushing to the database. If this feature is enabled, a flush will
|
|
12
|
+
* be triggered even if the batch is not full. Disabled by default.
|
|
13
|
+
* @default disabled
|
|
14
|
+
*/
|
|
15
|
+
batch_timeout?: number;
|
|
16
|
+
/**
|
|
17
|
+
* ClickHouse client configuration options.
|
|
18
|
+
* @required
|
|
19
|
+
*/
|
|
20
|
+
clientConfig: ClickHouseClientConfigOptions;
|
|
21
|
+
/**
|
|
22
|
+
* How long to wait, in milliseconds, after the last write operation
|
|
23
|
+
* before flushing the data to the database. This can be used to prevent
|
|
24
|
+
* items staying in memory for too long if the batch size is not reached
|
|
25
|
+
* frequently enough. Disabled by default.
|
|
26
|
+
* @default disabled
|
|
27
|
+
*/
|
|
28
|
+
idle_timeout?: number;
|
|
29
|
+
/**
|
|
30
|
+
* The ClickHouse table name to write to.
|
|
31
|
+
* @required
|
|
32
|
+
*/
|
|
33
|
+
table: string;
|
|
34
|
+
/**
|
|
35
|
+
* Optional SQL schema definition for auto-creating the table.
|
|
36
|
+
* Should be the column definitions part of a CREATE TABLE statement.
|
|
37
|
+
* Example: "_id String, name String, created_at Int64"
|
|
38
|
+
*/
|
|
39
|
+
tableSchema?: string;
|
|
40
|
+
/**
|
|
41
|
+
* Optional transformation function to convert documents before writing to ClickHouse.
|
|
42
|
+
* Use this to map MongoDB document fields to ClickHouse column names.
|
|
43
|
+
*/
|
|
44
|
+
transformFn?: (data: T) => Record<string, unknown>;
|
|
45
|
+
}
|
|
46
|
+
export interface ClickHouseWriterWriteOps<T> {
|
|
47
|
+
data: T;
|
|
48
|
+
}
|
|
49
|
+
export declare class ClickHouseWriter<T> {
|
|
50
|
+
private BATCH_SIZE;
|
|
51
|
+
private BATCH_TIMEOUT_ENABLED;
|
|
52
|
+
private BATCH_TIMEOUT_TIMER;
|
|
53
|
+
private BATCH_TIMEOUT_VALUE;
|
|
54
|
+
private CLIENT;
|
|
55
|
+
private DATA_BUCKET_ALWAYS_AVAILABLE;
|
|
56
|
+
private DATA_BUCKET_FLUSH_OPS;
|
|
57
|
+
private IDLE_TIMEOUT_ENABLED;
|
|
58
|
+
private IDLE_TIMEOUT_TIMER;
|
|
59
|
+
private IDLE_TIMEOUT_VALUE;
|
|
60
|
+
private SESSION_TIMER;
|
|
61
|
+
private TABLE;
|
|
62
|
+
private TABLE_SCHEMA?;
|
|
63
|
+
private TRANSFORM_FN?;
|
|
64
|
+
constructor(params: ClickHouseWriterParams<T>);
|
|
65
|
+
close(): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Ensures the table exists in ClickHouse by creating it if it doesn't exist.
|
|
68
|
+
* Uses the tableSchema provided in the constructor, or an optional schema parameter.
|
|
69
|
+
*
|
|
70
|
+
* @param schema Optional schema to use instead of the constructor-provided tableSchema
|
|
71
|
+
* @param engine The ClickHouse table engine to use (default: MergeTree)
|
|
72
|
+
* @param orderBy The ORDER BY clause for the table (default: tuple())
|
|
73
|
+
*/
|
|
74
|
+
ensureTable(schema?: string, engine?: string, orderBy?: string): Promise<void>;
|
|
75
|
+
flush(callback?: (data?: ClickHouseWriterWriteOps<T>[]) => Promise<void>): Promise<void>;
|
|
76
|
+
/**
|
|
77
|
+
* Write data to the ClickHouse table.
|
|
78
|
+
*
|
|
79
|
+
* @param data The data to write
|
|
80
|
+
* @param options Options for the write operation (reserved for future use)
|
|
81
|
+
* @param writeCallback Callback function to call after the write operation is complete
|
|
82
|
+
* @param flushCallback Callback function to call after the flush operation is complete
|
|
83
|
+
*/
|
|
84
|
+
write(data: T | T[], writeCallback?: () => Promise<void>, flushCallback?: (data?: ClickHouseWriterWriteOps<T>[]) => Promise<void>): Promise<void>;
|
|
85
|
+
}
|
|
86
|
+
export {};
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/* * */
|
|
2
|
+
import { createClient } from '@clickhouse/client';
|
|
3
|
+
import { Logger } from '@tmlmobilidade/logger';
|
|
4
|
+
import { Timer } from '@tmlmobilidade/timer';
|
|
5
|
+
/* * */
|
|
6
|
+
export class ClickHouseWriter {
|
|
7
|
+
//
|
|
8
|
+
BATCH_SIZE = 10000;
|
|
9
|
+
BATCH_TIMEOUT_ENABLED = false;
|
|
10
|
+
BATCH_TIMEOUT_TIMER = null;
|
|
11
|
+
BATCH_TIMEOUT_VALUE = -1;
|
|
12
|
+
CLIENT;
|
|
13
|
+
DATA_BUCKET_ALWAYS_AVAILABLE = [];
|
|
14
|
+
DATA_BUCKET_FLUSH_OPS = [];
|
|
15
|
+
IDLE_TIMEOUT_ENABLED = false;
|
|
16
|
+
IDLE_TIMEOUT_TIMER = null;
|
|
17
|
+
IDLE_TIMEOUT_VALUE = -1;
|
|
18
|
+
SESSION_TIMER = new Timer();
|
|
19
|
+
TABLE;
|
|
20
|
+
TABLE_SCHEMA;
|
|
21
|
+
TRANSFORM_FN;
|
|
22
|
+
/* * */
|
|
23
|
+
constructor(params) {
|
|
24
|
+
// Ensure that the table name is provided
|
|
25
|
+
if (!params.table)
|
|
26
|
+
throw new Error('CLICKHOUSEWRITER: Table name is required');
|
|
27
|
+
this.TABLE = params.table;
|
|
28
|
+
// Ensure that the client config is provided
|
|
29
|
+
if (!params.clientConfig)
|
|
30
|
+
throw new Error('CLICKHOUSEWRITER: Client configuration is required');
|
|
31
|
+
this.CLIENT = createClient(params.clientConfig);
|
|
32
|
+
// Setup the optional transformation function
|
|
33
|
+
if (params.transformFn) {
|
|
34
|
+
this.TRANSFORM_FN = params.transformFn;
|
|
35
|
+
}
|
|
36
|
+
// Setup the optional table schema for auto-creation
|
|
37
|
+
if (params.tableSchema) {
|
|
38
|
+
this.TABLE_SCHEMA = params.tableSchema;
|
|
39
|
+
}
|
|
40
|
+
// Setup the optional idle timeout functionality
|
|
41
|
+
if (params.idle_timeout && params.idle_timeout > 0) {
|
|
42
|
+
this.IDLE_TIMEOUT_ENABLED = true;
|
|
43
|
+
this.IDLE_TIMEOUT_VALUE = params.idle_timeout;
|
|
44
|
+
}
|
|
45
|
+
// Override the default batch size
|
|
46
|
+
if (params.batch_size && params.batch_size > 0) {
|
|
47
|
+
this.BATCH_SIZE = params.batch_size;
|
|
48
|
+
}
|
|
49
|
+
// Setup the optional batch timeout functionality
|
|
50
|
+
if (params.batch_timeout && params.batch_timeout > 0) {
|
|
51
|
+
this.BATCH_TIMEOUT_ENABLED = true;
|
|
52
|
+
this.BATCH_TIMEOUT_VALUE = params.batch_timeout;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/* * */
|
|
56
|
+
async close() {
|
|
57
|
+
await this.CLIENT.close();
|
|
58
|
+
Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Connection closed.`);
|
|
59
|
+
}
|
|
60
|
+
/* * */
|
|
61
|
+
/**
|
|
62
|
+
* Ensures the table exists in ClickHouse by creating it if it doesn't exist.
|
|
63
|
+
* Uses the tableSchema provided in the constructor, or an optional schema parameter.
|
|
64
|
+
*
|
|
65
|
+
* @param schema Optional schema to use instead of the constructor-provided tableSchema
|
|
66
|
+
* @param engine The ClickHouse table engine to use (default: MergeTree)
|
|
67
|
+
* @param orderBy The ORDER BY clause for the table (default: tuple())
|
|
68
|
+
*/
|
|
69
|
+
async ensureTable(schema, engine = 'MergeTree', orderBy = 'tuple()') {
|
|
70
|
+
const tableSchema = schema || this.TABLE_SCHEMA;
|
|
71
|
+
if (!tableSchema) {
|
|
72
|
+
throw new Error(`CLICKHOUSEWRITER [${this.TABLE}]: Cannot ensure table without a schema. Provide tableSchema in constructor or as parameter.`);
|
|
73
|
+
}
|
|
74
|
+
try {
|
|
75
|
+
const createTableQuery = `
|
|
76
|
+
CREATE TABLE IF NOT EXISTS ${this.TABLE} (
|
|
77
|
+
${tableSchema}
|
|
78
|
+
) ENGINE = ${engine}
|
|
79
|
+
ORDER BY ${orderBy}
|
|
80
|
+
`;
|
|
81
|
+
await this.CLIENT.command({ query: createTableQuery });
|
|
82
|
+
Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Table ensured.`);
|
|
83
|
+
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
Logger.error(`CLICKHOUSEWRITER [${this.TABLE}]: Error @ ensureTable(): ${error.message}`);
|
|
86
|
+
throw error;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/* * */
|
|
90
|
+
async flush(callback) {
|
|
91
|
+
try {
|
|
92
|
+
//
|
|
93
|
+
const flushTimer = new Timer();
|
|
94
|
+
const sessionTimerResult = this.SESSION_TIMER.get();
|
|
95
|
+
//
|
|
96
|
+
// Invalidate all timers since a flush operation is being performed
|
|
97
|
+
if (this.IDLE_TIMEOUT_TIMER) {
|
|
98
|
+
clearTimeout(this.IDLE_TIMEOUT_TIMER);
|
|
99
|
+
this.IDLE_TIMEOUT_TIMER = null;
|
|
100
|
+
}
|
|
101
|
+
if (this.BATCH_TIMEOUT_TIMER) {
|
|
102
|
+
clearTimeout(this.BATCH_TIMEOUT_TIMER);
|
|
103
|
+
this.BATCH_TIMEOUT_TIMER = null;
|
|
104
|
+
}
|
|
105
|
+
//
|
|
106
|
+
// Skip if there is no data to flush
|
|
107
|
+
if (this.DATA_BUCKET_ALWAYS_AVAILABLE.length === 0)
|
|
108
|
+
return;
|
|
109
|
+
//
|
|
110
|
+
// Copy everything in DATA_BUCKET_ALWAYS_AVAILABLE to DATA_BUCKET_FLUSH_OPS
|
|
111
|
+
// to prevent any new incoming data to be added to the batch. This is to ensure
|
|
112
|
+
// that the batch is not modified while it is being processed.
|
|
113
|
+
this.DATA_BUCKET_FLUSH_OPS = [...this.DATA_BUCKET_FLUSH_OPS, ...this.DATA_BUCKET_ALWAYS_AVAILABLE];
|
|
114
|
+
this.DATA_BUCKET_ALWAYS_AVAILABLE = [];
|
|
115
|
+
//
|
|
116
|
+
// Process the data for ClickHouse insert
|
|
117
|
+
try {
|
|
118
|
+
// Transform data if a transformation function is provided
|
|
119
|
+
const insertData = this.DATA_BUCKET_FLUSH_OPS.map((item) => {
|
|
120
|
+
if (this.TRANSFORM_FN) {
|
|
121
|
+
return this.TRANSFORM_FN(item.data);
|
|
122
|
+
}
|
|
123
|
+
return item.data;
|
|
124
|
+
});
|
|
125
|
+
// Insert data using ClickHouse client
|
|
126
|
+
await this.CLIENT.insert({
|
|
127
|
+
format: 'JSONEachRow',
|
|
128
|
+
table: this.TABLE,
|
|
129
|
+
values: insertData,
|
|
130
|
+
});
|
|
131
|
+
Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Flush | Length: ${this.DATA_BUCKET_FLUSH_OPS.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
|
|
132
|
+
//
|
|
133
|
+
// Call the flush callback, if provided
|
|
134
|
+
if (callback) {
|
|
135
|
+
await callback(this.DATA_BUCKET_FLUSH_OPS);
|
|
136
|
+
}
|
|
137
|
+
//
|
|
138
|
+
// Reset the flush bucket
|
|
139
|
+
this.DATA_BUCKET_FLUSH_OPS = [];
|
|
140
|
+
//
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
Logger.error(`CLICKHOUSEWRITER [${this.TABLE}]: Error @ flush().insert(): ${error.message}`);
|
|
144
|
+
throw error; // Re-throw to allow retry logic at higher level
|
|
145
|
+
}
|
|
146
|
+
//
|
|
147
|
+
}
|
|
148
|
+
catch (error) {
|
|
149
|
+
Logger.error(`CLICKHOUSEWRITER [${this.TABLE}]: Error @ flush(): ${error.message}`);
|
|
150
|
+
throw error; // Re-throw to allow retry logic at higher level
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/* * */
|
|
154
|
+
/**
|
|
155
|
+
* Write data to the ClickHouse table.
|
|
156
|
+
*
|
|
157
|
+
* @param data The data to write
|
|
158
|
+
* @param options Options for the write operation (reserved for future use)
|
|
159
|
+
* @param writeCallback Callback function to call after the write operation is complete
|
|
160
|
+
* @param flushCallback Callback function to call after the flush operation is complete
|
|
161
|
+
*/
|
|
162
|
+
async write(data, writeCallback, flushCallback) {
|
|
163
|
+
//
|
|
164
|
+
//
|
|
165
|
+
// Invalidate the previously set idle timeout timer
|
|
166
|
+
// since we are performing a write operation again.
|
|
167
|
+
if (this.IDLE_TIMEOUT_TIMER) {
|
|
168
|
+
clearTimeout(this.IDLE_TIMEOUT_TIMER);
|
|
169
|
+
this.IDLE_TIMEOUT_TIMER = null;
|
|
170
|
+
}
|
|
171
|
+
//
|
|
172
|
+
// Check if the batch is full
|
|
173
|
+
if (this.DATA_BUCKET_ALWAYS_AVAILABLE.length >= this.BATCH_SIZE) {
|
|
174
|
+
Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Batch full. Flushing data...`);
|
|
175
|
+
await this.flush(flushCallback);
|
|
176
|
+
}
|
|
177
|
+
//
|
|
178
|
+
// Reset the session timer (for logging purposes)
|
|
179
|
+
if (this.DATA_BUCKET_ALWAYS_AVAILABLE.length === 0) {
|
|
180
|
+
this.SESSION_TIMER.reset();
|
|
181
|
+
}
|
|
182
|
+
//
|
|
183
|
+
// Add the current data to the batch
|
|
184
|
+
if (Array.isArray(data)) {
|
|
185
|
+
const combinedDataWithOptions = data.map(item => ({ data: item }));
|
|
186
|
+
this.DATA_BUCKET_ALWAYS_AVAILABLE = [...this.DATA_BUCKET_ALWAYS_AVAILABLE, ...combinedDataWithOptions];
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
this.DATA_BUCKET_ALWAYS_AVAILABLE.push({ data: data });
|
|
190
|
+
}
|
|
191
|
+
//
|
|
192
|
+
// Call the write callback, if provided
|
|
193
|
+
if (writeCallback) {
|
|
194
|
+
await writeCallback();
|
|
195
|
+
}
|
|
196
|
+
//
|
|
197
|
+
// Setup the idle timeout timer to flush the data if too long has passed
|
|
198
|
+
// since the last write operation. Check if this functionality is enabled.
|
|
199
|
+
if (this.IDLE_TIMEOUT_ENABLED && !this.IDLE_TIMEOUT_TIMER) {
|
|
200
|
+
this.IDLE_TIMEOUT_TIMER = setTimeout(async () => {
|
|
201
|
+
Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Idle timeout reached. Flushing data...`);
|
|
202
|
+
await this.flush(flushCallback);
|
|
203
|
+
}, this.IDLE_TIMEOUT_VALUE);
|
|
204
|
+
}
|
|
205
|
+
//
|
|
206
|
+
// Setup the batch timeout timer to flush the data, if the timeout value is reached,
|
|
207
|
+
// even if the batch is not full. Check if this functionality is enabled.
|
|
208
|
+
if (this.BATCH_TIMEOUT_ENABLED && !this.BATCH_TIMEOUT_TIMER) {
|
|
209
|
+
this.BATCH_TIMEOUT_TIMER = setTimeout(async () => {
|
|
210
|
+
Logger.info(`CLICKHOUSEWRITER [${this.TABLE}]: Batch timeout reached. Flushing data...`);
|
|
211
|
+
await this.flush(flushCallback);
|
|
212
|
+
}, this.BATCH_TIMEOUT_VALUE);
|
|
213
|
+
}
|
|
214
|
+
//
|
|
215
|
+
}
|
|
216
|
+
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tmlmobilidade/writers",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "20260121.1805.44",
|
|
4
4
|
"author": {
|
|
5
5
|
"email": "iso@tmlmobilidade.pt",
|
|
6
6
|
"name": "TML-ISO"
|
|
@@ -36,6 +36,7 @@
|
|
|
36
36
|
"watch": "tsc-watch --onSuccess 'resolve-tspaths'"
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
|
+
"@clickhouse/client": "1.12.1",
|
|
39
40
|
"@tmlmobilidade/logger": "*",
|
|
40
41
|
"@tmlmobilidade/timer": "*",
|
|
41
42
|
"papaparse": "5.5.3"
|