@tmlmobilidade/writers 20260320.1746.41 → 20260322.1655.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generic.d.ts +62 -0
- package/dist/{clickhouse.js → generic.js} +22 -100
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -2
- package/dist/clickhouse.d.ts +0 -96
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
interface BatchWriterParams<T> {
|
|
2
|
+
/**
|
|
3
|
+
* The maximum number of items to hold in memory
|
|
4
|
+
* before flushing to the database.
|
|
5
|
+
* @required
|
|
6
|
+
*/
|
|
7
|
+
batch_size: number;
|
|
8
|
+
/**
|
|
9
|
+
* How long, in milliseconds, data should be kept in memory before
|
|
10
|
+
* flushing to the database. If this feature is enabled, a flush will
|
|
11
|
+
* be triggered even if the batch is not full. Disabled by default.
|
|
12
|
+
* @default disabled
|
|
13
|
+
*/
|
|
14
|
+
batch_timeout?: number;
|
|
15
|
+
/**
|
|
16
|
+
* How long to wait, in milliseconds, after the last write operation
|
|
17
|
+
* before flushing the data to the database. This can be used to prevent
|
|
18
|
+
* items staying in memory for too long if the batch size is not reached
|
|
19
|
+
* frequently enough. Disabled by default.
|
|
20
|
+
* @default disabled
|
|
21
|
+
*/
|
|
22
|
+
idle_timeout?: number;
|
|
23
|
+
/**
|
|
24
|
+
* The insert function to use for inserting data into the batch.
|
|
25
|
+
* @required
|
|
26
|
+
*/
|
|
27
|
+
insertFn: (data: T[]) => Promise<void>;
|
|
28
|
+
/**
|
|
29
|
+
* The title of this BatchWriter instance,
|
|
30
|
+
* used to identify the source of the logs.
|
|
31
|
+
* @required
|
|
32
|
+
*/
|
|
33
|
+
title: string;
|
|
34
|
+
}
|
|
35
|
+
export declare class BatchWriter<T> {
|
|
36
|
+
private params;
|
|
37
|
+
private dataBucketAlwaysAvailable;
|
|
38
|
+
private dataBucketFlushOps;
|
|
39
|
+
private batchTimeoutTimer;
|
|
40
|
+
private idleTimeoutTimer;
|
|
41
|
+
private sessionTimer;
|
|
42
|
+
constructor(params: BatchWriterParams<T>);
|
|
43
|
+
/**
|
|
44
|
+
* Flushes the current batch of data.
|
|
45
|
+
* This method is called internally when the batch size or timeouts are reached,
|
|
46
|
+
* but can also be called manually if needed.
|
|
47
|
+
* @param callback Optional callback to execute after the flush is complete, receiving the flushed data as a parameter
|
|
48
|
+
*/
|
|
49
|
+
flush(callback?: (data?: T[]) => Promise<void>): Promise<void>;
|
|
50
|
+
/**
|
|
51
|
+
* Write data to the batch.
|
|
52
|
+
* @param data The data to write
|
|
53
|
+
* @param options Options for the write operation (reserved for future use)
|
|
54
|
+
* @param writeCallback Callback function to call after the write operation is complete
|
|
55
|
+
* @param flushCallback Callback function to call after the flush operation is complete
|
|
56
|
+
*/
|
|
57
|
+
write(data: T | T[], { flushCallback, writeCallback }?: {
|
|
58
|
+
flushCallback?: (data?: T[]) => Promise<void>;
|
|
59
|
+
writeCallback?: () => Promise<void>;
|
|
60
|
+
}): Promise<void>;
|
|
61
|
+
}
|
|
62
|
+
export {};
|
|
@@ -1,97 +1,33 @@
|
|
|
1
1
|
/* eslint-disable perfectionist/sort-classes */
|
|
2
2
|
/* * */
|
|
3
|
-
import { createClient } from '@clickhouse/client';
|
|
4
3
|
import { Logger } from '@tmlmobilidade/logger';
|
|
5
4
|
import { Timer } from '@tmlmobilidade/timer';
|
|
6
5
|
/* * */
|
|
7
|
-
export class
|
|
8
|
-
//
|
|
6
|
+
export class BatchWriter {
|
|
9
7
|
//
|
|
10
8
|
params;
|
|
11
|
-
client;
|
|
12
|
-
//
|
|
13
9
|
dataBucketAlwaysAvailable = [];
|
|
14
10
|
dataBucketFlushOps = [];
|
|
15
|
-
//
|
|
16
11
|
batchTimeoutTimer = null;
|
|
17
12
|
idleTimeoutTimer = null;
|
|
18
13
|
sessionTimer = new Timer();
|
|
19
|
-
isInitialized = false;
|
|
20
|
-
/* * */
|
|
21
14
|
constructor(params) {
|
|
22
|
-
if (!params.
|
|
23
|
-
throw new Error('
|
|
24
|
-
if (params.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
const { database, password, url, username } = params.clientConfig;
|
|
30
|
-
if (!database || !password || !url || !username) {
|
|
31
|
-
throw new Error('CLICKHOUSEWRITER: Client configuration is invalid. Ensure database, password, url and username are provided.');
|
|
32
|
-
}
|
|
33
|
-
this.params = params;
|
|
34
|
-
this.client = createClient(params.clientConfig);
|
|
35
|
-
}
|
|
36
|
-
else {
|
|
37
|
-
throw new Error('CLICKHOUSEWRITER: Either client or clientConfig is required.');
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
/*
|
|
41
|
-
* Closes the ClickHouse client connection
|
|
42
|
-
* and clears any active timers.
|
|
43
|
-
*/
|
|
44
|
-
async close() {
|
|
45
|
-
await this.client.close();
|
|
46
|
-
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Connection closed.`);
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Initializes the writer by ensuring the table exists.
|
|
50
|
-
* Safe to call multiple times.
|
|
51
|
-
*/
|
|
52
|
-
async init() {
|
|
53
|
-
if (this.isInitialized)
|
|
54
|
-
return;
|
|
55
|
-
await this.ensureTable();
|
|
56
|
-
this.isInitialized = true;
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* Ensures the table exists in ClickHouse by creating it if it doesn't exist.
|
|
60
|
-
* Uses the tableSchema provided in the constructor, or an optional schema parameter.
|
|
61
|
-
* @param schema Optional schema to use instead of the constructor-provided tableSchema
|
|
62
|
-
* @param engine The ClickHouse table engine to use (default: ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}'))
|
|
63
|
-
* @param orderBy The ORDER BY clause for the table (default: tuple())
|
|
64
|
-
*/
|
|
65
|
-
async ensureTable(schema, engine = 'ReplicatedMergeTree(\'/clickhouse/tables/{shard}/{table}\', \'{replica}\')', orderBy = 'tuple()') {
|
|
66
|
-
const tableSchemaToUse = schema ?? this.params.tableSchema;
|
|
67
|
-
const tableSchema = tableSchemaToUse?.map(column => `${column.name} ${column.type}`).join(', ');
|
|
68
|
-
if (!tableSchema) {
|
|
69
|
-
throw new Error(`CLICKHOUSEWRITER [${this.params.table}]: Cannot ensure table without a schema. Provide tableSchema in constructor or as parameter.`);
|
|
70
|
-
}
|
|
71
|
-
try {
|
|
72
|
-
const createTableQuery = `
|
|
73
|
-
CREATE TABLE IF NOT EXISTS ${this.params.table} ON CLUSTER 'clickhouse-replica' (
|
|
74
|
-
${tableSchema}
|
|
75
|
-
) ENGINE = ${engine}
|
|
76
|
-
ORDER BY ${orderBy}
|
|
77
|
-
`;
|
|
78
|
-
await this.client.command({ query: createTableQuery });
|
|
79
|
-
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Table ensured.`);
|
|
80
|
-
}
|
|
81
|
-
catch (error) {
|
|
82
|
-
Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ ensureTable(): ${error.message}`);
|
|
83
|
-
throw error;
|
|
84
|
-
}
|
|
15
|
+
if (!params.title)
|
|
16
|
+
throw new Error('BATCHWRITER: Title is required.');
|
|
17
|
+
if (!params.insertFn)
|
|
18
|
+
throw new Error('BATCHWRITER: Insert function is required.');
|
|
19
|
+
if (!params.batch_size)
|
|
20
|
+
throw new Error('BATCHWRITER: Batch size is required.');
|
|
21
|
+
this.params = params;
|
|
85
22
|
}
|
|
86
23
|
/**
|
|
87
|
-
* Flushes the current batch of data
|
|
24
|
+
* Flushes the current batch of data.
|
|
88
25
|
* This method is called internally when the batch size or timeouts are reached,
|
|
89
26
|
* but can also be called manually if needed.
|
|
90
27
|
* @param callback Optional callback to execute after the flush is complete, receiving the flushed data as a parameter
|
|
91
28
|
*/
|
|
92
29
|
async flush(callback) {
|
|
93
30
|
try {
|
|
94
|
-
await this.init();
|
|
95
31
|
//
|
|
96
32
|
const flushTimer = new Timer();
|
|
97
33
|
const sessionTimerResult = this.sessionTimer.get();
|
|
@@ -116,45 +52,32 @@ export class ClickHouseWriter {
|
|
|
116
52
|
this.dataBucketFlushOps = [...this.dataBucketFlushOps, ...this.dataBucketAlwaysAvailable];
|
|
117
53
|
this.dataBucketAlwaysAvailable = [];
|
|
118
54
|
//
|
|
119
|
-
// Process the data for
|
|
55
|
+
// Process the data for batch insert
|
|
120
56
|
try {
|
|
121
|
-
//
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
return item;
|
|
127
|
-
});
|
|
128
|
-
// Insert data using ClickHouse client
|
|
129
|
-
await this.client.insert({
|
|
130
|
-
format: 'JSONEachRow',
|
|
131
|
-
table: this.params.table,
|
|
132
|
-
values: insertData,
|
|
133
|
-
});
|
|
134
|
-
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Flush | Length: ${this.dataBucketFlushOps.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
|
|
135
|
-
//
|
|
57
|
+
// Call the insert function provided in the params to perform the actual database insertion.
|
|
58
|
+
if (!this.params.insertFn)
|
|
59
|
+
throw new Error('BATCHWRITER: No insert function provided in params');
|
|
60
|
+
await this.params.insertFn(this.dataBucketFlushOps);
|
|
61
|
+
Logger.info(`BATCHWRITER [${this.params.title}]: Flush | Length: ${this.dataBucketFlushOps.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
|
|
136
62
|
// Call the flush callback, if provided
|
|
137
|
-
if (callback)
|
|
63
|
+
if (callback)
|
|
138
64
|
await callback(this.dataBucketFlushOps);
|
|
139
|
-
}
|
|
140
|
-
//
|
|
141
65
|
// Reset the flush bucket
|
|
142
66
|
this.dataBucketFlushOps = [];
|
|
143
|
-
//
|
|
144
67
|
}
|
|
145
68
|
catch (error) {
|
|
146
|
-
Logger.error(`
|
|
69
|
+
Logger.error(`BATCHWRITER [${this.params.title}]: Error @ flush().insert(): ${error.message}`);
|
|
147
70
|
throw error; // Re-throw to allow retry logic at higher level
|
|
148
71
|
}
|
|
149
72
|
//
|
|
150
73
|
}
|
|
151
74
|
catch (error) {
|
|
152
|
-
Logger.error(`
|
|
75
|
+
Logger.error(`BATCHWRITER [${this.params.title}]: Error @ flush(): ${error.message}`);
|
|
153
76
|
throw error; // Re-throw to allow retry logic at higher level
|
|
154
77
|
}
|
|
155
78
|
}
|
|
156
79
|
/**
|
|
157
|
-
* Write data to the
|
|
80
|
+
* Write data to the batch.
|
|
158
81
|
* @param data The data to write
|
|
159
82
|
* @param options Options for the write operation (reserved for future use)
|
|
160
83
|
* @param writeCallback Callback function to call after the write operation is complete
|
|
@@ -162,7 +85,6 @@ export class ClickHouseWriter {
|
|
|
162
85
|
*/
|
|
163
86
|
async write(data, { flushCallback, writeCallback } = {}) {
|
|
164
87
|
//
|
|
165
|
-
await this.init();
|
|
166
88
|
//
|
|
167
89
|
// Invalidate the previously set idle timeout timer
|
|
168
90
|
// since we are performing a write operation again.
|
|
@@ -174,7 +96,7 @@ export class ClickHouseWriter {
|
|
|
174
96
|
// Check if the batch is full
|
|
175
97
|
const batchSize = this.params.batch_size ?? 10_000;
|
|
176
98
|
if (this.dataBucketAlwaysAvailable.length >= batchSize) {
|
|
177
|
-
Logger.info(`
|
|
99
|
+
Logger.info(`BATCHWRITER [${this.params.title}]: Batch full. Flushing data...`);
|
|
178
100
|
await this.flush(flushCallback);
|
|
179
101
|
}
|
|
180
102
|
//
|
|
@@ -201,7 +123,7 @@ export class ClickHouseWriter {
|
|
|
201
123
|
// since the last write operation. Check if this functionality is enabled.
|
|
202
124
|
if (this.params.idle_timeout && this.params.idle_timeout > 0 && !this.idleTimeoutTimer) {
|
|
203
125
|
this.idleTimeoutTimer = setTimeout(async () => {
|
|
204
|
-
Logger.info(`
|
|
126
|
+
Logger.info(`BATCHWRITER [${this.params.title}]: Idle timeout reached. Flushing data...`);
|
|
205
127
|
await this.flush(flushCallback);
|
|
206
128
|
}, this.params.idle_timeout);
|
|
207
129
|
}
|
|
@@ -210,7 +132,7 @@ export class ClickHouseWriter {
|
|
|
210
132
|
// even if the batch is not full. Check if this functionality is enabled.
|
|
211
133
|
if (this.params.batch_timeout && this.params.batch_timeout > 0 && !this.batchTimeoutTimer) {
|
|
212
134
|
this.batchTimeoutTimer = setTimeout(async () => {
|
|
213
|
-
Logger.info(`
|
|
135
|
+
Logger.info(`BATCHWRITER [${this.params.title}]: Batch timeout reached. Flushing data...`);
|
|
214
136
|
await this.flush(flushCallback);
|
|
215
137
|
}, this.params.batch_timeout);
|
|
216
138
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tmlmobilidade/writers",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "20260322.1655.45",
|
|
4
4
|
"author": {
|
|
5
5
|
"email": "iso@tmlmobilidade.pt",
|
|
6
6
|
"name": "TML-ISO"
|
|
@@ -36,7 +36,6 @@
|
|
|
36
36
|
"watch": "tsc-watch --onSuccess 'resolve-tspaths'"
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
|
-
"@clickhouse/client": "1.18.2",
|
|
40
39
|
"@tmlmobilidade/clickhouse": "*",
|
|
41
40
|
"@tmlmobilidade/logger": "*",
|
|
42
41
|
"@tmlmobilidade/timer": "*",
|
package/dist/clickhouse.d.ts
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
import { type ClickHouseClientConfigOptions } from '@clickhouse/client';
|
|
2
|
-
import { NodeClickHouseClient } from '@clickhouse/client/dist/client.js';
|
|
3
|
-
import { ClickHouseColumn } from '@tmlmobilidade/clickhouse';
|
|
4
|
-
type ClickHouseWriterParams<T> = {
|
|
5
|
-
/**
|
|
6
|
-
* The maximum number of items to hold in memory
|
|
7
|
-
* before flushing to the database.
|
|
8
|
-
* @default 10_000
|
|
9
|
-
*/
|
|
10
|
-
batch_size?: number;
|
|
11
|
-
/**
|
|
12
|
-
* How long, in milliseconds, data should be kept in memory before
|
|
13
|
-
* flushing to the database. If this feature is enabled, a flush will
|
|
14
|
-
* be triggered even if the batch is not full. Disabled by default.
|
|
15
|
-
* @default disabled
|
|
16
|
-
*/
|
|
17
|
-
batch_timeout?: number;
|
|
18
|
-
/**
|
|
19
|
-
* If enabled, starts an async one-time table ensure operation in the constructor.
|
|
20
|
-
* Use `await writer.init()` if you need to block startup until it is completed.
|
|
21
|
-
* @default false
|
|
22
|
-
*/
|
|
23
|
-
ensure_table_on_init?: boolean;
|
|
24
|
-
/**
|
|
25
|
-
* How long to wait, in milliseconds, after the last write operation
|
|
26
|
-
* before flushing the data to the database. This can be used to prevent
|
|
27
|
-
* items staying in memory for too long if the batch size is not reached
|
|
28
|
-
* frequently enough. Disabled by default.
|
|
29
|
-
* @default disabled
|
|
30
|
-
*/
|
|
31
|
-
idle_timeout?: number;
|
|
32
|
-
/**
|
|
33
|
-
* The ClickHouse table name to write to.
|
|
34
|
-
* @required
|
|
35
|
-
*/
|
|
36
|
-
table: string;
|
|
37
|
-
/**
|
|
38
|
-
* Optional ClickHouse column definitions for auto-creating the table.
|
|
39
|
-
*/
|
|
40
|
-
tableSchema: ClickHouseColumn<T>[];
|
|
41
|
-
/**
|
|
42
|
-
* Optional transformation function to convert documents before writing to ClickHouse.
|
|
43
|
-
* Use this to map MongoDB document fields to ClickHouse column names.
|
|
44
|
-
*/
|
|
45
|
-
transformFn?: (data: T) => Record<string, unknown>;
|
|
46
|
-
} & ({
|
|
47
|
-
client: NodeClickHouseClient;
|
|
48
|
-
clientConfig?: never;
|
|
49
|
-
} | {
|
|
50
|
-
client?: never;
|
|
51
|
-
clientConfig: ClickHouseClientConfigOptions;
|
|
52
|
-
});
|
|
53
|
-
export declare class ClickHouseWriter<T> {
|
|
54
|
-
private params;
|
|
55
|
-
private client;
|
|
56
|
-
private dataBucketAlwaysAvailable;
|
|
57
|
-
private dataBucketFlushOps;
|
|
58
|
-
private batchTimeoutTimer;
|
|
59
|
-
private idleTimeoutTimer;
|
|
60
|
-
private sessionTimer;
|
|
61
|
-
private isInitialized;
|
|
62
|
-
constructor(params: ClickHouseWriterParams<T>);
|
|
63
|
-
close(): Promise<void>;
|
|
64
|
-
/**
|
|
65
|
-
* Initializes the writer by ensuring the table exists.
|
|
66
|
-
* Safe to call multiple times.
|
|
67
|
-
*/
|
|
68
|
-
init(): Promise<void>;
|
|
69
|
-
/**
|
|
70
|
-
* Ensures the table exists in ClickHouse by creating it if it doesn't exist.
|
|
71
|
-
* Uses the tableSchema provided in the constructor, or an optional schema parameter.
|
|
72
|
-
* @param schema Optional schema to use instead of the constructor-provided tableSchema
|
|
73
|
-
* @param engine The ClickHouse table engine to use (default: ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}'))
|
|
74
|
-
* @param orderBy The ORDER BY clause for the table (default: tuple())
|
|
75
|
-
*/
|
|
76
|
-
ensureTable(schema?: ClickHouseColumn<T>[], engine?: string, orderBy?: string): Promise<void>;
|
|
77
|
-
/**
|
|
78
|
-
* Flushes the current batch of data to ClickHouse.
|
|
79
|
-
* This method is called internally when the batch size or timeouts are reached,
|
|
80
|
-
* but can also be called manually if needed.
|
|
81
|
-
* @param callback Optional callback to execute after the flush is complete, receiving the flushed data as a parameter
|
|
82
|
-
*/
|
|
83
|
-
flush(callback?: (data?: T[]) => Promise<void>): Promise<void>;
|
|
84
|
-
/**
|
|
85
|
-
* Write data to the ClickHouse table.
|
|
86
|
-
* @param data The data to write
|
|
87
|
-
* @param options Options for the write operation (reserved for future use)
|
|
88
|
-
* @param writeCallback Callback function to call after the write operation is complete
|
|
89
|
-
* @param flushCallback Callback function to call after the flush operation is complete
|
|
90
|
-
*/
|
|
91
|
-
write(data: T | T[], { flushCallback, writeCallback }?: {
|
|
92
|
-
flushCallback?: (data?: T[]) => Promise<void>;
|
|
93
|
-
writeCallback?: () => Promise<void>;
|
|
94
|
-
}): Promise<void>;
|
|
95
|
-
}
|
|
96
|
-
export {};
|