@tmlmobilidade/writers 20260121.1805.44 → 20260121.2332.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clickhouse.d.ts +47 -24
- package/dist/clickhouse.js +65 -86
- package/package.json +1 -1
package/dist/clickhouse.d.ts
CHANGED
|
@@ -1,4 +1,39 @@
|
|
|
1
1
|
import { type ClickHouseClientConfigOptions } from '@clickhouse/client';
|
|
2
|
+
/**
|
|
3
|
+
* Supported ClickHouse data types
|
|
4
|
+
*/
|
|
5
|
+
export type ClickHouseType = 'Bool' | 'Boolean' | 'Date32' | 'Date' | 'DateTime' | 'Decimal' | 'Float32' | 'Float64' | 'Int8' | 'Int16' | 'Int32' | 'Int64' | 'Int128' | 'Int256' | 'String' | 'UInt8' | 'UInt16' | 'UInt32' | 'UInt64' | 'UInt128' | 'UInt256' | 'UUID' | `Array(${string})` | `DateTime64(${number})` | `Decimal(${number}, ${number})` | `Enum8(${string})` | `Enum16(${string})` | `FixedString(${number})` | `LowCardinality(${string})` | `Map(${string}, ${string})` | `Nullable(${string})`;
|
|
6
|
+
export interface ClickHouseColumn<T> {
|
|
7
|
+
/** Alias expression (computed on read) */
|
|
8
|
+
alias?: string;
|
|
9
|
+
/** Column codec for compression */
|
|
10
|
+
codec?: string;
|
|
11
|
+
/** Comment for the column */
|
|
12
|
+
comment?: string;
|
|
13
|
+
/** Default value expression */
|
|
14
|
+
default?: string;
|
|
15
|
+
/** Create a secondary index (skipping index) on this column */
|
|
16
|
+
indexed?: boolean;
|
|
17
|
+
/** Granularity for the index. Default: 4 */
|
|
18
|
+
indexGranularity?: number;
|
|
19
|
+
/** Type of skipping index. Default: 'minmax' */
|
|
20
|
+
indexType?: 'bloom_filter' | 'minmax' | 'ngrambf_v1' | 'set' | 'tokenbf_v1';
|
|
21
|
+
/** Use LowCardinality wrapper for low-cardinality strings */
|
|
22
|
+
lowCardinality?: boolean;
|
|
23
|
+
/** Materialized value expression (computed on insert) */
|
|
24
|
+
materialized?: string;
|
|
25
|
+
name: Extract<keyof T, string>;
|
|
26
|
+
/** Whether the column can be null (wraps type in Nullable) */
|
|
27
|
+
nullable?: boolean;
|
|
28
|
+
/** Include this column in the ORDER BY clause (ClickHouse's primary index) */
|
|
29
|
+
primaryKey?: boolean;
|
|
30
|
+
/** Order of this column in the primary key (lower = first). Default: 0 */
|
|
31
|
+
primaryKeyOrder?: number;
|
|
32
|
+
/** TTL expression for this column */
|
|
33
|
+
ttl?: string;
|
|
34
|
+
/** The ClickHouse data type */
|
|
35
|
+
type: ClickHouseType;
|
|
36
|
+
}
|
|
2
37
|
interface ClickHouseWriterParams<T> {
|
|
3
38
|
/**
|
|
4
39
|
* The maximum number of items to hold in memory
|
|
@@ -32,35 +67,23 @@ interface ClickHouseWriterParams<T> {
|
|
|
32
67
|
*/
|
|
33
68
|
table: string;
|
|
34
69
|
/**
|
|
35
|
-
* Optional
|
|
36
|
-
* Should be the column definitions part of a CREATE TABLE statement.
|
|
37
|
-
* Example: "_id String, name String, created_at Int64"
|
|
70
|
+
* Optional ClickHouse column definitions for auto-creating the table.
|
|
38
71
|
*/
|
|
39
|
-
tableSchema
|
|
72
|
+
tableSchema: ClickHouseColumn<T>[];
|
|
40
73
|
/**
|
|
41
74
|
* Optional transformation function to convert documents before writing to ClickHouse.
|
|
42
75
|
* Use this to map MongoDB document fields to ClickHouse column names.
|
|
43
76
|
*/
|
|
44
77
|
transformFn?: (data: T) => Record<string, unknown>;
|
|
45
78
|
}
|
|
46
|
-
export interface ClickHouseWriterWriteOps<T> {
|
|
47
|
-
data: T;
|
|
48
|
-
}
|
|
49
79
|
export declare class ClickHouseWriter<T> {
|
|
50
|
-
private
|
|
51
|
-
private
|
|
52
|
-
private
|
|
53
|
-
private
|
|
54
|
-
private
|
|
55
|
-
private
|
|
56
|
-
private
|
|
57
|
-
private IDLE_TIMEOUT_ENABLED;
|
|
58
|
-
private IDLE_TIMEOUT_TIMER;
|
|
59
|
-
private IDLE_TIMEOUT_VALUE;
|
|
60
|
-
private SESSION_TIMER;
|
|
61
|
-
private TABLE;
|
|
62
|
-
private TABLE_SCHEMA?;
|
|
63
|
-
private TRANSFORM_FN?;
|
|
80
|
+
private params;
|
|
81
|
+
private client;
|
|
82
|
+
private dataBucketAlwaysAvailable;
|
|
83
|
+
private dataBucketFlushOps;
|
|
84
|
+
private batchTimeoutTimer;
|
|
85
|
+
private idleTimeoutTimer;
|
|
86
|
+
private sessionTimer;
|
|
64
87
|
constructor(params: ClickHouseWriterParams<T>);
|
|
65
88
|
close(): Promise<void>;
|
|
66
89
|
/**
|
|
@@ -71,8 +94,8 @@ export declare class ClickHouseWriter<T> {
|
|
|
71
94
|
* @param engine The ClickHouse table engine to use (default: MergeTree)
|
|
72
95
|
* @param orderBy The ORDER BY clause for the table (default: tuple())
|
|
73
96
|
*/
|
|
74
|
-
ensureTable(schema?:
|
|
75
|
-
flush(callback?: (data?:
|
|
97
|
+
ensureTable(schema?: ClickHouseColumn<T>[], engine?: string, orderBy?: string): Promise<void>;
|
|
98
|
+
flush(callback?: (data?: T[]) => Promise<void>): Promise<void>;
|
|
76
99
|
/**
|
|
77
100
|
* Write data to the ClickHouse table.
|
|
78
101
|
*
|
|
@@ -81,6 +104,6 @@ export declare class ClickHouseWriter<T> {
|
|
|
81
104
|
* @param writeCallback Callback function to call after the write operation is complete
|
|
82
105
|
* @param flushCallback Callback function to call after the flush operation is complete
|
|
83
106
|
*/
|
|
84
|
-
write(data: T | T[], writeCallback?: () => Promise<void>, flushCallback?: (data?:
|
|
107
|
+
write(data: T | T[], writeCallback?: () => Promise<void>, flushCallback?: (data?: T[]) => Promise<void>): Promise<void>;
|
|
85
108
|
}
|
|
86
109
|
export {};
|
package/dist/clickhouse.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable perfectionist/sort-classes */
|
|
1
2
|
/* * */
|
|
2
3
|
import { createClient } from '@clickhouse/client';
|
|
3
4
|
import { Logger } from '@tmlmobilidade/logger';
|
|
@@ -5,57 +6,33 @@ import { Timer } from '@tmlmobilidade/timer';
|
|
|
5
6
|
/* * */
|
|
6
7
|
export class ClickHouseWriter {
|
|
7
8
|
//
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
SESSION_TIMER = new Timer();
|
|
19
|
-
TABLE;
|
|
20
|
-
TABLE_SCHEMA;
|
|
21
|
-
TRANSFORM_FN;
|
|
9
|
+
//
|
|
10
|
+
params;
|
|
11
|
+
client;
|
|
12
|
+
//
|
|
13
|
+
dataBucketAlwaysAvailable = [];
|
|
14
|
+
dataBucketFlushOps = [];
|
|
15
|
+
//
|
|
16
|
+
batchTimeoutTimer = null;
|
|
17
|
+
idleTimeoutTimer = null;
|
|
18
|
+
sessionTimer = new Timer();
|
|
22
19
|
/* * */
|
|
23
20
|
constructor(params) {
|
|
24
21
|
// Ensure that the table name is provided
|
|
25
22
|
if (!params.table)
|
|
26
23
|
throw new Error('CLICKHOUSEWRITER: Table name is required');
|
|
27
|
-
this.TABLE = params.table;
|
|
28
24
|
// Ensure that the client config is provided
|
|
29
25
|
if (!params.clientConfig)
|
|
30
26
|
throw new Error('CLICKHOUSEWRITER: Client configuration is required');
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
}
|
|
36
|
-
// Setup the optional table schema for auto-creation
|
|
37
|
-
if (params.tableSchema) {
|
|
38
|
-
this.TABLE_SCHEMA = params.tableSchema;
|
|
39
|
-
}
|
|
40
|
-
// Setup the optional idle timeout functionality
|
|
41
|
-
if (params.idle_timeout && params.idle_timeout > 0) {
|
|
42
|
-
this.IDLE_TIMEOUT_ENABLED = true;
|
|
43
|
-
this.IDLE_TIMEOUT_VALUE = params.idle_timeout;
|
|
44
|
-
}
|
|
45
|
-
// Override the default batch size
|
|
46
|
-
if (params.batch_size && params.batch_size > 0) {
|
|
47
|
-
this.BATCH_SIZE = params.batch_size;
|
|
48
|
-
}
|
|
49
|
-
// Setup the optional batch timeout functionality
|
|
50
|
-
if (params.batch_timeout && params.batch_timeout > 0) {
|
|
51
|
-
this.BATCH_TIMEOUT_ENABLED = true;
|
|
52
|
-
this.BATCH_TIMEOUT_VALUE = params.batch_timeout;
|
|
53
|
-
}
|
|
27
|
+
if (!params.clientConfig.database || !params.clientConfig.password || !params.clientConfig.url || !params.clientConfig.username)
|
|
28
|
+
throw new Error('CLICKHOUSEWRITER: Client configuration is invalid. Ensure database, password, url and username are provided.');
|
|
29
|
+
this.params = params;
|
|
30
|
+
this.client = createClient(params.clientConfig);
|
|
54
31
|
}
|
|
55
32
|
/* * */
|
|
56
33
|
async close() {
|
|
57
|
-
await this.
|
|
58
|
-
Logger.info(`CLICKHOUSEWRITER [${this.
|
|
34
|
+
await this.client.close();
|
|
35
|
+
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Connection closed.`);
|
|
59
36
|
}
|
|
60
37
|
/* * */
|
|
61
38
|
/**
|
|
@@ -67,22 +44,23 @@ export class ClickHouseWriter {
|
|
|
67
44
|
* @param orderBy The ORDER BY clause for the table (default: tuple())
|
|
68
45
|
*/
|
|
69
46
|
async ensureTable(schema, engine = 'MergeTree', orderBy = 'tuple()') {
|
|
70
|
-
const
|
|
47
|
+
const tableSchemaToUse = schema ?? this.params.tableSchema;
|
|
48
|
+
const tableSchema = tableSchemaToUse?.map(column => `${column.name} ${column.type}`).join(', ');
|
|
71
49
|
if (!tableSchema) {
|
|
72
|
-
throw new Error(`CLICKHOUSEWRITER [${this.
|
|
50
|
+
throw new Error(`CLICKHOUSEWRITER [${this.params.table}]: Cannot ensure table without a schema. Provide tableSchema in constructor or as parameter.`);
|
|
73
51
|
}
|
|
74
52
|
try {
|
|
75
53
|
const createTableQuery = `
|
|
76
|
-
CREATE TABLE IF NOT EXISTS ${this.
|
|
54
|
+
CREATE TABLE IF NOT EXISTS ${this.params.table} (
|
|
77
55
|
${tableSchema}
|
|
78
56
|
) ENGINE = ${engine}
|
|
79
57
|
ORDER BY ${orderBy}
|
|
80
58
|
`;
|
|
81
|
-
await this.
|
|
82
|
-
Logger.info(`CLICKHOUSEWRITER [${this.
|
|
59
|
+
await this.client.command({ query: createTableQuery });
|
|
60
|
+
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Table ensured.`);
|
|
83
61
|
}
|
|
84
62
|
catch (error) {
|
|
85
|
-
Logger.error(`CLICKHOUSEWRITER [${this.
|
|
63
|
+
Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ ensureTable(): ${error.message}`);
|
|
86
64
|
throw error;
|
|
87
65
|
}
|
|
88
66
|
}
|
|
@@ -91,62 +69,62 @@ export class ClickHouseWriter {
|
|
|
91
69
|
try {
|
|
92
70
|
//
|
|
93
71
|
const flushTimer = new Timer();
|
|
94
|
-
const sessionTimerResult = this.
|
|
72
|
+
const sessionTimerResult = this.sessionTimer.get();
|
|
95
73
|
//
|
|
96
74
|
// Invalidate all timers since a flush operation is being performed
|
|
97
|
-
if (this.
|
|
98
|
-
clearTimeout(this.
|
|
99
|
-
this.
|
|
75
|
+
if (this.idleTimeoutTimer) {
|
|
76
|
+
clearTimeout(this.idleTimeoutTimer);
|
|
77
|
+
this.idleTimeoutTimer = null;
|
|
100
78
|
}
|
|
101
|
-
if (this.
|
|
102
|
-
clearTimeout(this.
|
|
103
|
-
this.
|
|
79
|
+
if (this.batchTimeoutTimer) {
|
|
80
|
+
clearTimeout(this.batchTimeoutTimer);
|
|
81
|
+
this.batchTimeoutTimer = null;
|
|
104
82
|
}
|
|
105
83
|
//
|
|
106
84
|
// Skip if there is no data to flush
|
|
107
|
-
if (this.
|
|
85
|
+
if (this.dataBucketAlwaysAvailable.length === 0)
|
|
108
86
|
return;
|
|
109
87
|
//
|
|
110
|
-
// Copy everything in
|
|
88
|
+
// Copy everything in dataBucketAlwaysAvailable to dataBucketFlushOps
|
|
111
89
|
// to prevent any new incoming data to be added to the batch. This is to ensure
|
|
112
90
|
// that the batch is not modified while it is being processed.
|
|
113
|
-
this.
|
|
114
|
-
this.
|
|
91
|
+
this.dataBucketFlushOps = [...this.dataBucketFlushOps, ...this.dataBucketAlwaysAvailable];
|
|
92
|
+
this.dataBucketAlwaysAvailable = [];
|
|
115
93
|
//
|
|
116
94
|
// Process the data for ClickHouse insert
|
|
117
95
|
try {
|
|
118
96
|
// Transform data if a transformation function is provided
|
|
119
|
-
const insertData = this.
|
|
120
|
-
if (this.
|
|
121
|
-
return this.
|
|
97
|
+
const insertData = this.dataBucketFlushOps.map((item) => {
|
|
98
|
+
if (this.params.transformFn) {
|
|
99
|
+
return this.params.transformFn(item);
|
|
122
100
|
}
|
|
123
|
-
return item
|
|
101
|
+
return item;
|
|
124
102
|
});
|
|
125
103
|
// Insert data using ClickHouse client
|
|
126
|
-
await this.
|
|
104
|
+
await this.client.insert({
|
|
127
105
|
format: 'JSONEachRow',
|
|
128
|
-
table: this.
|
|
106
|
+
table: this.params.table,
|
|
129
107
|
values: insertData,
|
|
130
108
|
});
|
|
131
|
-
Logger.info(`CLICKHOUSEWRITER [${this.
|
|
109
|
+
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Flush | Length: ${this.dataBucketFlushOps.length} (session: ${sessionTimerResult}) (flush: ${flushTimer.get()})`);
|
|
132
110
|
//
|
|
133
111
|
// Call the flush callback, if provided
|
|
134
112
|
if (callback) {
|
|
135
|
-
await callback(this.
|
|
113
|
+
await callback(this.dataBucketFlushOps);
|
|
136
114
|
}
|
|
137
115
|
//
|
|
138
116
|
// Reset the flush bucket
|
|
139
|
-
this.
|
|
117
|
+
this.dataBucketFlushOps = [];
|
|
140
118
|
//
|
|
141
119
|
}
|
|
142
120
|
catch (error) {
|
|
143
|
-
Logger.error(`CLICKHOUSEWRITER [${this.
|
|
121
|
+
Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ flush().insert(): ${error.message}`);
|
|
144
122
|
throw error; // Re-throw to allow retry logic at higher level
|
|
145
123
|
}
|
|
146
124
|
//
|
|
147
125
|
}
|
|
148
126
|
catch (error) {
|
|
149
|
-
Logger.error(`CLICKHOUSEWRITER [${this.
|
|
127
|
+
Logger.error(`CLICKHOUSEWRITER [${this.params.table}]: Error @ flush(): ${error.message}`);
|
|
150
128
|
throw error; // Re-throw to allow retry logic at higher level
|
|
151
129
|
}
|
|
152
130
|
}
|
|
@@ -164,29 +142,30 @@ export class ClickHouseWriter {
|
|
|
164
142
|
//
|
|
165
143
|
// Invalidate the previously set idle timeout timer
|
|
166
144
|
// since we are performing a write operation again.
|
|
167
|
-
if (this.
|
|
168
|
-
clearTimeout(this.
|
|
169
|
-
this.
|
|
145
|
+
if (this.idleTimeoutTimer) {
|
|
146
|
+
clearTimeout(this.idleTimeoutTimer);
|
|
147
|
+
this.idleTimeoutTimer = null;
|
|
170
148
|
}
|
|
171
149
|
//
|
|
172
150
|
// Check if the batch is full
|
|
173
|
-
|
|
174
|
-
|
|
151
|
+
const batchSize = this.params.batch_size ?? 10_000;
|
|
152
|
+
if (this.dataBucketAlwaysAvailable.length >= batchSize) {
|
|
153
|
+
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Batch full. Flushing data...`);
|
|
175
154
|
await this.flush(flushCallback);
|
|
176
155
|
}
|
|
177
156
|
//
|
|
178
157
|
// Reset the session timer (for logging purposes)
|
|
179
|
-
if (this.
|
|
180
|
-
this.
|
|
158
|
+
if (this.dataBucketAlwaysAvailable.length === 0) {
|
|
159
|
+
this.sessionTimer.reset();
|
|
181
160
|
}
|
|
182
161
|
//
|
|
183
162
|
// Add the current data to the batch
|
|
184
163
|
if (Array.isArray(data)) {
|
|
185
|
-
const combinedDataWithOptions = data.map(item =>
|
|
186
|
-
this.
|
|
164
|
+
const combinedDataWithOptions = data.map(item => item);
|
|
165
|
+
this.dataBucketAlwaysAvailable = [...this.dataBucketAlwaysAvailable, ...combinedDataWithOptions];
|
|
187
166
|
}
|
|
188
167
|
else {
|
|
189
|
-
this.
|
|
168
|
+
this.dataBucketAlwaysAvailable.push(data);
|
|
190
169
|
}
|
|
191
170
|
//
|
|
192
171
|
// Call the write callback, if provided
|
|
@@ -196,20 +175,20 @@ export class ClickHouseWriter {
|
|
|
196
175
|
//
|
|
197
176
|
// Setup the idle timeout timer to flush the data if too long has passed
|
|
198
177
|
// since the last write operation. Check if this functionality is enabled.
|
|
199
|
-
if (this.
|
|
200
|
-
this.
|
|
201
|
-
Logger.info(`CLICKHOUSEWRITER [${this.
|
|
178
|
+
if (this.params.idle_timeout && this.params.idle_timeout > 0 && !this.idleTimeoutTimer) {
|
|
179
|
+
this.idleTimeoutTimer = setTimeout(async () => {
|
|
180
|
+
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Idle timeout reached. Flushing data...`);
|
|
202
181
|
await this.flush(flushCallback);
|
|
203
|
-
}, this.
|
|
182
|
+
}, this.params.idle_timeout);
|
|
204
183
|
}
|
|
205
184
|
//
|
|
206
185
|
// Setup the batch timeout timer to flush the data, if the timeout value is reached,
|
|
207
186
|
// even if the batch is not full. Check if this functionality is enabled.
|
|
208
|
-
if (this.
|
|
209
|
-
this.
|
|
210
|
-
Logger.info(`CLICKHOUSEWRITER [${this.
|
|
187
|
+
if (this.params.batch_timeout && this.params.batch_timeout > 0 && !this.batchTimeoutTimer) {
|
|
188
|
+
this.batchTimeoutTimer = setTimeout(async () => {
|
|
189
|
+
Logger.info(`CLICKHOUSEWRITER [${this.params.table}]: Batch timeout reached. Flushing data...`);
|
|
211
190
|
await this.flush(flushCallback);
|
|
212
|
-
}, this.
|
|
191
|
+
}, this.params.batch_timeout);
|
|
213
192
|
}
|
|
214
193
|
//
|
|
215
194
|
}
|