bulltrackers-module 1.0.735 → 1.0.737
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/config/bulltrackers.config.js +80 -6
- package/functions/computation-system-v2/docs/architecture.md +59 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +107 -105
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +357 -150
- package/functions/computation-system-v2/framework/execution/RemoteTaskRunner.js +327 -0
- package/functions/computation-system-v2/framework/execution/middleware/LineageMiddleware.js +9 -4
- package/functions/computation-system-v2/framework/execution/middleware/ProfilerMiddleware.js +9 -21
- package/functions/computation-system-v2/framework/index.js +10 -3
- package/functions/computation-system-v2/framework/lineage/LineageTracker.js +53 -57
- package/functions/computation-system-v2/framework/monitoring/Profiler.js +54 -52
- package/functions/computation-system-v2/framework/resilience/Checkpointer.js +173 -27
- package/functions/computation-system-v2/framework/storage/StorageManager.js +419 -187
- package/functions/computation-system-v2/handlers/index.js +10 -1
- package/functions/computation-system-v2/handlers/scheduler.js +85 -193
- package/functions/computation-system-v2/handlers/worker.js +242 -0
- package/functions/computation-system-v2/index.js +2 -0
- package/functions/computation-system-v2/test/analyze-results.js +238 -0
- package/functions/computation-system-v2/test/{test-dispatcher.js → other/test-dispatcher.js} +6 -6
- package/functions/computation-system-v2/test/{test-framework.js → other/test-framework.js} +14 -14
- package/functions/computation-system-v2/test/{test-real-execution.js → other/test-real-execution.js} +1 -1
- package/functions/computation-system-v2/test/{test-real-integration.js → other/test-real-integration.js} +3 -3
- package/functions/computation-system-v2/test/{test-refactor-e2e.js → other/test-refactor-e2e.js} +3 -3
- package/functions/computation-system-v2/test/{test-risk-metrics-computation.js → other/test-risk-metrics-computation.js} +4 -4
- package/functions/computation-system-v2/test/{test-scheduler.js → other/test-scheduler.js} +1 -1
- package/functions/computation-system-v2/test/{test-storage.js → other/test-storage.js} +2 -2
- package/functions/computation-system-v2/test/run-pipeline-test.js +554 -0
- package/functions/computation-system-v2/test/test-full-pipeline.js +227 -0
- package/functions/computation-system-v2/test/test-worker-pool.js +266 -0
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/TestComputation.js +0 -46
- /package/functions/computation-system-v2/test/{test-results.json → other/test-results.json} +0 -0
|
@@ -1,11 +1,21 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview StorageManager - Unified storage for computation results
|
|
3
|
-
* Includes Optimized Checkpointing and
|
|
3
|
+
* Includes Optimized Checkpointing, Heartbeating, and Zombie Detection with Dead Letter Logic.
|
|
4
|
+
* * V2.1 UPDATE: Replaced Streaming/DML with Batch Load Jobs.
|
|
5
|
+
* - commitResults: Now buffers data to GCS (NDJSON)
|
|
6
|
+
* - finalizeResults: Loads all GCS files to a temp table and performs a single MERGE
|
|
7
|
+
* * V2.2 UPDATE: Added saveCheckpoint for Append-Only Checkpointer support.
|
|
8
|
+
* * V2.3 UPDATE: Parallelized GCS and Firestore writes in commitResults.
|
|
9
|
+
* * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
|
|
10
|
+
* * FIX: Improved error logging to catch swallowed BigQuery insert errors.
|
|
11
|
+
* * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
|
|
4
12
|
*/
|
|
5
13
|
|
|
6
14
|
const { Firestore } = require('@google-cloud/firestore');
|
|
7
15
|
const { BigQuery } = require('@google-cloud/bigquery');
|
|
16
|
+
const { Storage } = require('@google-cloud/storage');
|
|
8
17
|
const pLimit = require('p-limit');
|
|
18
|
+
const crypto = require('crypto');
|
|
9
19
|
|
|
10
20
|
class StorageManager {
|
|
11
21
|
constructor(config, logger = null) {
|
|
@@ -17,6 +27,10 @@ class StorageManager {
|
|
|
17
27
|
location: config.bigquery?.location || 'EU'
|
|
18
28
|
});
|
|
19
29
|
|
|
30
|
+
this.storage = new Storage({
|
|
31
|
+
projectId: config.bigquery?.projectId
|
|
32
|
+
});
|
|
33
|
+
|
|
20
34
|
this._firestore = null;
|
|
21
35
|
this.tableExists = new Map();
|
|
22
36
|
}
|
|
@@ -30,71 +44,208 @@ class StorageManager {
|
|
|
30
44
|
return this._firestore;
|
|
31
45
|
}
|
|
32
46
|
|
|
47
|
+
// =========================================================================
|
|
48
|
+
// RESULT COMMITTING (Batch -> GCS Buffer)
|
|
49
|
+
// =========================================================================
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Staging Phase: Write results to GCS in NDJSON format.
|
|
53
|
+
* Replaces the old direct-to-BigQuery INSERT/MERGE.
|
|
54
|
+
* NOW PARALLELIZED: GCS and Firestore writes happen concurrently.
|
|
55
|
+
*/
|
|
33
56
|
async commitResults(dateStr, entry, results, depResultHashes = {}) {
|
|
34
57
|
const storageConfig = this._resolveStorageConfig(entry);
|
|
35
|
-
const writeResults = { bigquery: null, firestore: null };
|
|
36
58
|
const startTime = Date.now();
|
|
37
59
|
|
|
38
|
-
|
|
60
|
+
// Define GCS Task (Fatal on error)
|
|
61
|
+
const gcsTask = async () => {
|
|
62
|
+
if (storageConfig.bigquery === false) return null;
|
|
39
63
|
try {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
);
|
|
64
|
+
// Modified: Write to GCS instead of BQ directly
|
|
65
|
+
return await this._stageToGCS(dateStr, entry, results, depResultHashes);
|
|
43
66
|
} catch (error) {
|
|
44
|
-
this.
|
|
67
|
+
this._logError(`GCS staging failed for ${entry.name}`, error);
|
|
45
68
|
throw error;
|
|
46
69
|
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
// Define Firestore Task (Non-fatal, logs warning)
|
|
73
|
+
const firestoreTask = async () => {
|
|
74
|
+
if (!storageConfig.firestore?.enabled) return null;
|
|
50
75
|
try {
|
|
51
|
-
|
|
52
|
-
dateStr, entry, results, storageConfig.firestore
|
|
53
|
-
);
|
|
76
|
+
return await this._writeToFirestore(dateStr, entry, results, storageConfig.firestore);
|
|
54
77
|
} catch (error) {
|
|
55
78
|
this._log('WARN', `Firestore write failed for ${entry.name}: ${error.message}`);
|
|
56
|
-
|
|
79
|
+
return { error: error.message };
|
|
57
80
|
}
|
|
58
|
-
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Execute IO operations in parallel
|
|
84
|
+
const [bigqueryResult, firestoreResult] = await Promise.all([
|
|
85
|
+
gcsTask(),
|
|
86
|
+
firestoreTask()
|
|
87
|
+
]);
|
|
88
|
+
|
|
89
|
+
const writeResults = {
|
|
90
|
+
bigquery: bigqueryResult,
|
|
91
|
+
firestore: firestoreResult
|
|
92
|
+
};
|
|
59
93
|
|
|
60
94
|
const duration = Date.now() - startTime;
|
|
61
|
-
this._log('INFO', `Committed ${entry.name} results in ${duration}ms`);
|
|
95
|
+
this._log('INFO', `Committed (Staged) ${entry.name} results in ${duration}ms`);
|
|
62
96
|
return writeResults;
|
|
63
97
|
}
|
|
64
98
|
|
|
65
99
|
// =========================================================================
|
|
66
|
-
//
|
|
100
|
+
// FINALIZATION (GCS -> Load Job -> Merge)
|
|
67
101
|
// =========================================================================
|
|
68
102
|
|
|
69
|
-
|
|
103
|
+
/**
|
|
104
|
+
* Finalize Phase: Bulk Load GCS files -> Temp Table -> Merge to Final.
|
|
105
|
+
* This is called ONCE per computation run.
|
|
106
|
+
*/
|
|
107
|
+
async finalizeResults(dateStr, entry) {
|
|
108
|
+
const table = this.config.resultStore?.table || 'computation_results';
|
|
109
|
+
const bucketName = this.config.gcs?.bucket;
|
|
110
|
+
const prefix = this.config.gcs?.prefix || 'staging';
|
|
111
|
+
|
|
112
|
+
// 1. Define GCS path pattern: gs://bucket/prefix/date/computation/*.json
|
|
113
|
+
const filePrefix = `${prefix}/${dateStr}/${entry.name}/`;
|
|
114
|
+
const gcsPath = `gs://${bucketName}/${filePrefix}*.json`;
|
|
115
|
+
|
|
116
|
+
this._log('INFO', `Finalizing ${entry.name}...`);
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
// FIX: Check if files actually exist before trying to load them
|
|
120
|
+
// If the computation produced 0 results, no files exist, and BQ will throw "Not Found".
|
|
121
|
+
const [files] = await this.storage.bucket(bucketName).getFiles({ prefix: filePrefix });
|
|
122
|
+
|
|
123
|
+
if (!files || files.length === 0) {
|
|
124
|
+
this._log('INFO', `No staged files found for ${entry.name}. Skipping finalization (Empty Result).`);
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const tempTableId = `${table}_temp_${crypto.randomUUID().replace(/-/g, '')}`;
|
|
129
|
+
this._log('INFO', `Loading ${files.length} files from ${gcsPath} into ${tempTableId}...`);
|
|
130
|
+
|
|
131
|
+
// 2. Load GCS files into a Temporary Table
|
|
132
|
+
// We create the temp table with the exact schema we expect first
|
|
133
|
+
await this._createTempTableForLoad(tempTableId);
|
|
134
|
+
|
|
135
|
+
// FIX: Use bigquery.createJob directly.
|
|
136
|
+
const [job] = await this.bigquery.createJob({
|
|
137
|
+
configuration: {
|
|
138
|
+
load: {
|
|
139
|
+
destinationTable: {
|
|
140
|
+
projectId: this.config.bigquery.projectId,
|
|
141
|
+
datasetId: this.config.bigquery.dataset,
|
|
142
|
+
tableId: tempTableId
|
|
143
|
+
},
|
|
144
|
+
sourceUris: [gcsPath],
|
|
145
|
+
sourceFormat: 'NEWLINE_DELIMITED_JSON',
|
|
146
|
+
writeDisposition: 'WRITE_TRUNCATE',
|
|
147
|
+
autodetect: false // Use schema from table creation
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// Wait for job completion
|
|
153
|
+
await new Promise((resolve, reject) => {
|
|
154
|
+
job.on('complete', (metadata) => resolve(metadata));
|
|
155
|
+
job.on('error', (err) => reject(err));
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
this._log('INFO', `Load Job completed for ${entry.name}`);
|
|
159
|
+
|
|
160
|
+
// 3. Perform the bulk MERGE from Temp -> Final
|
|
161
|
+
await this._mergeStagedData(table, tempTableId);
|
|
162
|
+
|
|
163
|
+
// 4. Cleanup: Delete Temp Table & GCS Files
|
|
164
|
+
try {
|
|
165
|
+
await this.bigquery.dataset(this.config.bigquery.dataset).table(tempTableId).delete();
|
|
166
|
+
await this._cleanupGCSFiles(bucketName, filePrefix);
|
|
167
|
+
} catch (cleanupErr) {
|
|
168
|
+
this._log('WARN', `Cleanup warning for ${entry.name}: ${cleanupErr.message}`);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
this._log('INFO', `Finalization complete for ${entry.name}`);
|
|
172
|
+
} catch (e) {
|
|
173
|
+
this._logError(`Finalization failed for ${entry.name}`, e);
|
|
174
|
+
throw e;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// =========================================================================
|
|
179
|
+
// CHECKPOINTING & RESILIENCE
|
|
180
|
+
// =========================================================================
|
|
181
|
+
|
|
182
|
+
async saveCheckpoint(state) {
|
|
183
|
+
const table = 'computation_checkpoints';
|
|
184
|
+
const dataset = this.config.bigquery.dataset;
|
|
185
|
+
await this._ensureCheckpointTable(table);
|
|
186
|
+
|
|
187
|
+
const row = {
|
|
188
|
+
date: state.date,
|
|
189
|
+
computation_name: state.computation_name,
|
|
190
|
+
checkpoint_id: state.checkpoint_id,
|
|
191
|
+
worker_instance_id: state.worker_instance_id,
|
|
192
|
+
code_hash: state.code_hash || null,
|
|
193
|
+
status: state.status,
|
|
194
|
+
processed_count: state.processed_count,
|
|
195
|
+
total_entities: state.total_entities,
|
|
196
|
+
last_entity_id: state.last_entity_id,
|
|
197
|
+
completed_batches: state.completed_batches || [],
|
|
198
|
+
attempts: state.attempts,
|
|
199
|
+
started_at: this.bigquery.timestamp(new Date(state.started_at)),
|
|
200
|
+
last_updated: this.bigquery.timestamp(new Date(state.last_updated))
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
try {
|
|
204
|
+
await this.bigquery.dataset(dataset).table(table).insert([row]);
|
|
205
|
+
} catch (error) {
|
|
206
|
+
if (error.name === 'PartialFailureError' || error.errors) {
|
|
207
|
+
this._log('ERROR', `Checkpoint insert failed: ${JSON.stringify(error.errors)}`);
|
|
208
|
+
} else {
|
|
209
|
+
this._log('ERROR', `Checkpoint insert failed: ${error.message}`);
|
|
210
|
+
}
|
|
211
|
+
throw error;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Legacy/Alternative methods
|
|
216
|
+
async initCheckpoint(dateStr, computationName, checkpointId, totalEntities, workerId, codeHash) {
|
|
70
217
|
const table = 'computation_checkpoints';
|
|
71
218
|
const dataset = this.config.bigquery.dataset;
|
|
72
|
-
|
|
73
219
|
await this._ensureCheckpointTable(table);
|
|
74
220
|
|
|
75
221
|
const row = {
|
|
76
222
|
date: dateStr,
|
|
77
223
|
computation_name: computationName,
|
|
78
224
|
checkpoint_id: checkpointId,
|
|
225
|
+
worker_instance_id: workerId,
|
|
226
|
+
code_hash: codeHash || null,
|
|
79
227
|
status: 'running',
|
|
80
228
|
processed_count: 0,
|
|
81
229
|
total_entities: totalEntities,
|
|
82
230
|
last_entity_id: null,
|
|
83
231
|
completed_batches: [],
|
|
84
|
-
|
|
232
|
+
attempts: 1,
|
|
85
233
|
started_at: this.bigquery.timestamp(new Date()),
|
|
86
234
|
last_updated: this.bigquery.timestamp(new Date())
|
|
87
235
|
};
|
|
88
236
|
|
|
89
|
-
|
|
90
|
-
|
|
237
|
+
try {
|
|
238
|
+
await this.bigquery.dataset(dataset).table(table).insert([row]);
|
|
239
|
+
} catch (e) {
|
|
240
|
+
this._logError('initCheckpoint failed', e);
|
|
241
|
+
throw e;
|
|
242
|
+
}
|
|
91
243
|
}
|
|
92
244
|
|
|
93
245
|
async updateCheckpoint(dateStr, computationName, checkpointId, state) {
|
|
94
246
|
const table = 'computation_checkpoints';
|
|
95
247
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
96
248
|
|
|
97
|
-
// Efficient UPDATE using structured fields
|
|
98
249
|
const query = `
|
|
99
250
|
UPDATE ${fullTable}
|
|
100
251
|
SET
|
|
@@ -115,131 +266,131 @@ class StorageManager {
|
|
|
115
266
|
checkpointId,
|
|
116
267
|
processedCount: state.processedCount,
|
|
117
268
|
lastEntityId: state.lastEntityId,
|
|
118
|
-
batchIndex: state.batchIndex
|
|
269
|
+
batchIndex: state.batchIndex
|
|
119
270
|
},
|
|
120
271
|
location: this.config.bigquery.location
|
|
121
272
|
});
|
|
122
273
|
}
|
|
123
274
|
|
|
124
|
-
async
|
|
275
|
+
async updateHeartbeat(checkpointId, workerId) {
|
|
276
|
+
const table = 'computation_checkpoints';
|
|
277
|
+
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
278
|
+
const query = `
|
|
279
|
+
UPDATE ${fullTable}
|
|
280
|
+
SET last_updated = CURRENT_TIMESTAMP()
|
|
281
|
+
WHERE checkpoint_id = @checkpointId AND worker_instance_id = @workerId
|
|
282
|
+
`;
|
|
283
|
+
this.bigquery.query({ query, params: { checkpointId, workerId }, location: this.config.bigquery.location })
|
|
284
|
+
.catch(e => this._log('WARN', `Heartbeat failed: ${e.message}`));
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
async claimCheckpoint(checkpointId, newWorkerId, minutesThreshold = 15) {
|
|
288
|
+
const table = 'computation_checkpoints';
|
|
289
|
+
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
290
|
+
const query = `
|
|
291
|
+
UPDATE ${fullTable}
|
|
292
|
+
SET worker_instance_id = @newWorkerId, attempts = IFNULL(attempts, 1) + 1, last_updated = CURRENT_TIMESTAMP()
|
|
293
|
+
WHERE checkpoint_id = @checkpointId AND status = 'running'
|
|
294
|
+
AND last_updated < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @minutes MINUTE)
|
|
295
|
+
`;
|
|
296
|
+
try {
|
|
297
|
+
await this.bigquery.query({
|
|
298
|
+
query,
|
|
299
|
+
params: { checkpointId, newWorkerId, minutes: minutesThreshold },
|
|
300
|
+
location: this.config.bigquery.location
|
|
301
|
+
});
|
|
302
|
+
const verify = await this.getLatestCheckpointById(checkpointId);
|
|
303
|
+
return verify && verify.worker_instance_id === newWorkerId;
|
|
304
|
+
} catch (e) {
|
|
305
|
+
this._log('ERROR', `Failed to claim checkpoint: ${e.message}`);
|
|
306
|
+
return false;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
async failCheckpoint(checkpointId, reason) {
|
|
125
311
|
const table = 'computation_checkpoints';
|
|
126
312
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
313
|
+
const query = `UPDATE ${fullTable} SET status = 'failed', last_updated = CURRENT_TIMESTAMP() WHERE checkpoint_id = @checkpointId`;
|
|
314
|
+
await this.bigquery.query({ query, params: { checkpointId }, location: this.config.bigquery.location });
|
|
315
|
+
}
|
|
127
316
|
|
|
317
|
+
async findZombies(minutesThreshold = 15) {
|
|
318
|
+
const table = 'computation_checkpoints';
|
|
319
|
+
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
320
|
+
try {
|
|
321
|
+
const query = `
|
|
322
|
+
SELECT computation_name, date, checkpoint_id, last_updated
|
|
323
|
+
FROM ${fullTable}
|
|
324
|
+
WHERE status = 'running' AND last_updated < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL @minutes MINUTE)
|
|
325
|
+
LIMIT 50
|
|
326
|
+
`;
|
|
327
|
+
const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
|
|
328
|
+
return rows.map(r => ({ name: r.computation_name, date: r.date.value || r.date, checkpointId: r.checkpoint_id }));
|
|
329
|
+
} catch (e) { return []; }
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
async completeCheckpoint(dateStr, computationName, checkpointId) {
|
|
333
|
+
const table = 'computation_checkpoints';
|
|
334
|
+
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
128
335
|
const query = `
|
|
129
336
|
UPDATE ${fullTable}
|
|
130
337
|
SET status = 'completed', last_updated = CURRENT_TIMESTAMP()
|
|
131
338
|
WHERE date = @date AND computation_name = @computationName AND checkpoint_id = @checkpointId
|
|
132
339
|
`;
|
|
133
|
-
|
|
134
|
-
await this.bigquery.query({
|
|
135
|
-
query,
|
|
136
|
-
params: { date: dateStr, computationName, checkpointId },
|
|
137
|
-
location: this.config.bigquery.location
|
|
138
|
-
});
|
|
340
|
+
await this.bigquery.query({ query, params: { date: dateStr, computationName, checkpointId }, location: this.config.bigquery.location });
|
|
139
341
|
}
|
|
140
342
|
|
|
141
343
|
async getLatestCheckpoint(dateStr, computationName) {
|
|
142
344
|
const table = 'computation_checkpoints';
|
|
143
345
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
144
|
-
|
|
145
346
|
try {
|
|
146
|
-
// Get the most recent attempt
|
|
147
347
|
const query = `
|
|
148
|
-
SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches
|
|
348
|
+
SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
|
|
149
349
|
FROM ${fullTable}
|
|
150
350
|
WHERE date = @date AND computation_name = @computationName
|
|
151
|
-
ORDER BY
|
|
152
|
-
LIMIT 1
|
|
351
|
+
ORDER BY last_updated DESC LIMIT 1
|
|
153
352
|
`;
|
|
154
|
-
|
|
155
|
-
const [rows] = await this.bigquery.query({
|
|
156
|
-
query,
|
|
157
|
-
params: { date: dateStr, computationName },
|
|
158
|
-
location: this.config.bigquery.location
|
|
159
|
-
});
|
|
160
|
-
|
|
353
|
+
const [rows] = await this.bigquery.query({ query, params: { date: dateStr, computationName }, location: this.config.bigquery.location });
|
|
161
354
|
if (rows.length === 0) return null;
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
return
|
|
165
|
-
|
|
355
|
+
|
|
356
|
+
const r = rows[0];
|
|
357
|
+
return {
|
|
358
|
+
...r,
|
|
359
|
+
started_at: r.started_at?.value || r.started_at,
|
|
360
|
+
last_updated: r.last_updated?.value || r.last_updated
|
|
361
|
+
};
|
|
362
|
+
} catch (e) { return null; }
|
|
166
363
|
}
|
|
167
364
|
|
|
168
|
-
async
|
|
169
|
-
|
|
170
|
-
const
|
|
171
|
-
const
|
|
172
|
-
const [
|
|
173
|
-
|
|
174
|
-
if (!exists) {
|
|
175
|
-
await dataset.createTable(tableName, {
|
|
176
|
-
schema: [
|
|
177
|
-
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
178
|
-
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
179
|
-
{ name: 'checkpoint_id', type: 'STRING', mode: 'REQUIRED' },
|
|
180
|
-
{ name: 'status', type: 'STRING', mode: 'REQUIRED' }, // running, completed, failed
|
|
181
|
-
{ name: 'processed_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
182
|
-
{ name: 'total_entities', type: 'INTEGER', mode: 'NULLABLE' },
|
|
183
|
-
{ name: 'last_entity_id', type: 'STRING', mode: 'NULLABLE' },
|
|
184
|
-
{ name: 'completed_batches', type: 'INTEGER', mode: 'REPEATED' }, // ARRAY<INT64>
|
|
185
|
-
{ name: 'started_at', type: 'TIMESTAMP', mode: 'REQUIRED' },
|
|
186
|
-
{ name: 'last_updated', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
187
|
-
],
|
|
188
|
-
timePartitioning: { type: 'DAY', field: 'date' },
|
|
189
|
-
clustering: { fields: ['computation_name', 'status'] }
|
|
190
|
-
});
|
|
191
|
-
}
|
|
192
|
-
this.tableExists.set(tableName, true);
|
|
365
|
+
async getLatestCheckpointById(checkpointId) {
|
|
366
|
+
const table = 'computation_checkpoints';
|
|
367
|
+
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
368
|
+
const query = `SELECT worker_instance_id, status FROM ${fullTable} WHERE checkpoint_id = @id LIMIT 1`;
|
|
369
|
+
const [rows] = await this.bigquery.query({ query, params: { id: checkpointId }, location: this.config.bigquery.location });
|
|
370
|
+
return rows[0];
|
|
193
371
|
}
|
|
194
372
|
|
|
195
|
-
|
|
196
|
-
// PERFORMANCE REPORTING
|
|
197
|
-
// =========================================================================
|
|
198
|
-
|
|
199
|
-
async savePerformanceReport(dateStr, report) {
|
|
373
|
+
async savePerformanceReport(report) {
|
|
200
374
|
const table = 'computation_performance';
|
|
201
|
-
|
|
202
|
-
await this._ensurePerformanceTable(table);
|
|
203
|
-
|
|
204
|
-
const rows = report.computations.map(c => ({
|
|
205
|
-
date: dateStr,
|
|
206
|
-
computation_name: c.name,
|
|
207
|
-
entity_count: c.entityCount,
|
|
208
|
-
avg_duration_ms: c.avgDuration,
|
|
209
|
-
max_duration_ms: c.maxDuration,
|
|
210
|
-
p95_duration_ms: c.p95Duration,
|
|
211
|
-
avg_memory_delta: c.avgMemory,
|
|
212
|
-
total_duration_ms: c.totalDuration,
|
|
213
|
-
recorded_at: new Date().toISOString()
|
|
214
|
-
}));
|
|
215
|
-
|
|
216
|
-
if (rows.length === 0) return;
|
|
217
|
-
await this.bigquery.dataset(this.config.bigquery.dataset).table(table).insert(rows);
|
|
218
|
-
}
|
|
375
|
+
const dataset = this.config.bigquery.dataset;
|
|
376
|
+
await this._ensurePerformanceTable(table);
|
|
219
377
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
{ name: 'avg_memory_delta', type: 'FLOAT', mode: 'NULLABLE' },
|
|
236
|
-
{ name: 'total_duration_ms', type: 'FLOAT', mode: 'NULLABLE' },
|
|
237
|
-
{ name: 'recorded_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
238
|
-
],
|
|
239
|
-
timePartitioning: { type: 'DAY', field: 'date' }
|
|
240
|
-
});
|
|
378
|
+
const row = {
|
|
379
|
+
run_id : report.runId || 'unknown',
|
|
380
|
+
computation_name : report.computationName,
|
|
381
|
+
date: report.date,
|
|
382
|
+
duration_ms: report.durationMs,
|
|
383
|
+
metrics: JSON.stringify (report.metrics || {}),
|
|
384
|
+
entity_count: report.entityCount || 0,
|
|
385
|
+
status : report.status || 'completed',
|
|
386
|
+
created_at : this.bigquery.timestamp(new Date())
|
|
387
|
+
};
|
|
388
|
+
try {
|
|
389
|
+
await this.bigquery.dataset(dataset).table(table).insert([row]);
|
|
390
|
+
this._log('INFO', `Performance report saved for ${report.computationName}`);
|
|
391
|
+
} catch (e) {
|
|
392
|
+
this._logError('Failed to save performance report', e);
|
|
241
393
|
}
|
|
242
|
-
this.tableExists.set(tableName, true);
|
|
243
394
|
}
|
|
244
395
|
|
|
245
396
|
// =========================================================================
|
|
@@ -259,21 +410,85 @@ class StorageManager {
|
|
|
259
410
|
};
|
|
260
411
|
}
|
|
261
412
|
|
|
262
|
-
async
|
|
263
|
-
const table = this.config.resultStore?.table || 'computation_results';
|
|
264
|
-
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
265
|
-
|
|
413
|
+
async _stageToGCS(dateStr, entry, results, depResultHashes) {
|
|
266
414
|
const rows = this._buildBigQueryRows(dateStr, entry, results, depResultHashes);
|
|
267
415
|
if (rows.length === 0) return { rowCount: 0 };
|
|
268
416
|
|
|
269
|
-
|
|
417
|
+
const bucketName = this.config.gcs?.bucket;
|
|
418
|
+
const prefix = this.config.gcs?.prefix || 'staging';
|
|
419
|
+
const filename = `${prefix}/${dateStr}/${entry.name}/${crypto.randomUUID()}.json`;
|
|
420
|
+
|
|
421
|
+
const file = this.storage.bucket(bucketName).file(filename);
|
|
422
|
+
|
|
423
|
+
const ndjson = rows.map(r => JSON.stringify(r)).join('\n');
|
|
424
|
+
|
|
425
|
+
await file.save(ndjson, {
|
|
426
|
+
contentType: 'application/json',
|
|
427
|
+
resumable: false
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
return { rowCount: rows.length, gcsUri: `gs://${bucketName}/${filename}` };
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
async _createTempTableForLoad(tableName) {
|
|
434
|
+
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
435
|
+
const table = dataset.table(tableName);
|
|
436
|
+
|
|
437
|
+
const schema = [
|
|
438
|
+
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
439
|
+
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
440
|
+
{ name: 'category', type: 'STRING', mode: 'NULLABLE' },
|
|
441
|
+
{ name: 'entity_id', type: 'STRING', mode: 'REQUIRED' },
|
|
442
|
+
{ name: 'code_hash', type: 'STRING', mode: 'NULLABLE' },
|
|
443
|
+
{ name: 'result_hash', type: 'STRING', mode: 'NULLABLE' },
|
|
444
|
+
{ name: 'dependency_result_hashes', type: 'STRING', mode: 'NULLABLE' },
|
|
445
|
+
{ name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
446
|
+
{ name: 'result_data', type: 'STRING', mode: 'NULLABLE' },
|
|
447
|
+
{ name: 'updated_at', type: 'TIMESTAMP', mode: 'NULLABLE' }
|
|
448
|
+
];
|
|
449
|
+
|
|
450
|
+
await table.create({ schema, timePartitioning: { type: 'DAY', field: 'date' } });
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
async _mergeStagedData(targetTable, tempTable) {
|
|
454
|
+
const fullTarget = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${targetTable}\``;
|
|
455
|
+
const fullTemp = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${tempTable}\``;
|
|
456
|
+
|
|
457
|
+
await this._ensureBigQueryTable(targetTable);
|
|
458
|
+
|
|
459
|
+
const mergeQuery = `
|
|
460
|
+
MERGE INTO ${fullTarget} T
|
|
461
|
+
USING (
|
|
462
|
+
SELECT * FROM (
|
|
463
|
+
SELECT *, ROW_NUMBER() OVER(PARTITION BY date, computation_name, entity_id ORDER BY updated_at DESC) as rn
|
|
464
|
+
FROM ${fullTemp}
|
|
465
|
+
) WHERE rn = 1
|
|
466
|
+
) S
|
|
467
|
+
ON T.date = S.date AND T.computation_name = S.computation_name AND T.entity_id = S.entity_id
|
|
468
|
+
WHEN MATCHED THEN
|
|
469
|
+
UPDATE SET
|
|
470
|
+
code_hash = S.code_hash,
|
|
471
|
+
result_hash = S.result_hash,
|
|
472
|
+
dependency_result_hashes = S.dependency_result_hashes,
|
|
473
|
+
entity_count = S.entity_count,
|
|
474
|
+
result_data = S.result_data,
|
|
475
|
+
updated_at = S.updated_at
|
|
476
|
+
WHEN NOT MATCHED THEN
|
|
477
|
+
INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
|
|
478
|
+
dependency_result_hashes, entity_count, result_data, updated_at)
|
|
479
|
+
VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
|
|
480
|
+
S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
|
|
481
|
+
`;
|
|
270
482
|
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
483
|
+
await this.bigquery.query({ query: mergeQuery, location: this.config.bigquery.location });
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
async _cleanupGCSFiles(bucketName, prefix) {
|
|
487
|
+
const [files] = await this.storage.bucket(bucketName).getFiles({ prefix });
|
|
488
|
+
if (files.length > 0) {
|
|
489
|
+
const limit = pLimit(10);
|
|
490
|
+
await Promise.all(files.map(f => limit(() => f.delete())));
|
|
275
491
|
}
|
|
276
|
-
return { rowCount: rows.length };
|
|
277
492
|
}
|
|
278
493
|
|
|
279
494
|
_buildBigQueryRows(dateStr, entry, results, depResultHashes) {
|
|
@@ -313,60 +528,6 @@ class StorageManager {
|
|
|
313
528
|
return rows;
|
|
314
529
|
}
|
|
315
530
|
|
|
316
|
-
async _upsertBatch(fullTable, rows) {
|
|
317
|
-
const dates = rows.map(r => r.date);
|
|
318
|
-
const names = rows.map(r => r.computation_name);
|
|
319
|
-
const entityIds = rows.map(r => r.entity_id);
|
|
320
|
-
const codeHashes = rows.map(r => r.code_hash);
|
|
321
|
-
const resultHashes = rows.map(r => r.result_hash);
|
|
322
|
-
const depHashes = rows.map(r => r.dependency_result_hashes);
|
|
323
|
-
const entityCounts = rows.map(r => r.entity_count);
|
|
324
|
-
const resultData = rows.map(r => r.result_data);
|
|
325
|
-
const updatedAts = rows.map(r => r.updated_at);
|
|
326
|
-
|
|
327
|
-
const mergeQuery = `
|
|
328
|
-
MERGE INTO ${fullTable} T
|
|
329
|
-
USING (
|
|
330
|
-
SELECT
|
|
331
|
-
PARSE_DATE('%Y-%m-%d', date) as date,
|
|
332
|
-
computation_name, entity_id, code_hash, result_hash,
|
|
333
|
-
dependency_result_hashes, entity_count, result_data,
|
|
334
|
-
PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*SZ', updated_at) as updated_at
|
|
335
|
-
FROM UNNEST(@dates) AS date WITH OFFSET pos
|
|
336
|
-
JOIN UNNEST(@names) AS computation_name WITH OFFSET USING(pos)
|
|
337
|
-
JOIN UNNEST(@entity_ids) AS entity_id WITH OFFSET USING(pos)
|
|
338
|
-
JOIN UNNEST(@code_hashes) AS code_hash WITH OFFSET USING(pos)
|
|
339
|
-
JOIN UNNEST(@result_hashes) AS result_hash WITH OFFSET USING(pos)
|
|
340
|
-
JOIN UNNEST(@dep_hashes) AS dependency_result_hashes WITH OFFSET USING(pos)
|
|
341
|
-
JOIN UNNEST(@entity_counts) AS entity_count WITH OFFSET USING(pos)
|
|
342
|
-
JOIN UNNEST(@result_data) AS result_data WITH OFFSET USING(pos)
|
|
343
|
-
JOIN UNNEST(@updated_ats) AS updated_at WITH OFFSET USING(pos)
|
|
344
|
-
) S
|
|
345
|
-
ON T.date = S.date AND T.computation_name = S.computation_name AND T.entity_id = S.entity_id
|
|
346
|
-
WHEN MATCHED THEN
|
|
347
|
-
UPDATE SET code_hash = S.code_hash, result_hash = S.result_hash,
|
|
348
|
-
dependency_result_hashes = S.dependency_result_hashes,
|
|
349
|
-
entity_count = S.entity_count, result_data = S.result_data,
|
|
350
|
-
updated_at = S.updated_at
|
|
351
|
-
WHEN NOT MATCHED THEN
|
|
352
|
-
INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
|
|
353
|
-
dependency_result_hashes, entity_count, result_data, updated_at)
|
|
354
|
-
VALUES (S.date, S.computation_name, @category, S.entity_id, S.code_hash, S.result_hash,
|
|
355
|
-
S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
|
|
356
|
-
`;
|
|
357
|
-
|
|
358
|
-
await this.bigquery.query({
|
|
359
|
-
query: mergeQuery,
|
|
360
|
-
params: {
|
|
361
|
-
dates, names, entity_ids: entityIds, code_hashes: codeHashes,
|
|
362
|
-
result_hashes: resultHashes, dep_hashes: depHashes,
|
|
363
|
-
entity_counts: entityCounts, result_data: resultData,
|
|
364
|
-
updated_ats: updatedAts, category: rows[0].category
|
|
365
|
-
},
|
|
366
|
-
location: this.config.bigquery.location
|
|
367
|
-
});
|
|
368
|
-
}
|
|
369
|
-
|
|
370
531
|
async _ensureBigQueryTable(tableName) {
|
|
371
532
|
if (this.tableExists.get(tableName)) return;
|
|
372
533
|
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
@@ -393,7 +554,69 @@ class StorageManager {
|
|
|
393
554
|
this.tableExists.set(tableName, true);
|
|
394
555
|
}
|
|
395
556
|
|
|
396
|
-
async
|
|
557
|
+
async _ensurePerformanceTable(tableName) {
|
|
558
|
+
if (this.tableExists.get(tableName)) return;
|
|
559
|
+
|
|
560
|
+
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
561
|
+
const table = dataset.table(tableName);
|
|
562
|
+
const [exists] = await table.exists();
|
|
563
|
+
|
|
564
|
+
if (!exists) {
|
|
565
|
+
const schema = [
|
|
566
|
+
{ name: 'run_id', type: 'STRING', mode: 'REQUIRED' },
|
|
567
|
+
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
568
|
+
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
569
|
+
{ name: 'duration_ms', type: 'INTEGER', mode: 'NULLABLE' },
|
|
570
|
+
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' }, // JSON string
|
|
571
|
+
{ name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
572
|
+
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
|
573
|
+
{ name: 'created_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
574
|
+
];
|
|
575
|
+
|
|
576
|
+
await dataset.createTable(tableName, {
|
|
577
|
+
schema,
|
|
578
|
+
timePartitioning: { type: 'DAY', field: 'date' },
|
|
579
|
+
clustering: { fields: ['computation_name', 'run_id'] }
|
|
580
|
+
});
|
|
581
|
+
this._log('INFO', `Created table ${tableName}`);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
this.tableExists.set(tableName, true);
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
async _ensureCheckpointTable(tableName) {
|
|
588
|
+
if (this.tableExists.get(tableName)) return;
|
|
589
|
+
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
590
|
+
const table = dataset.table(tableName);
|
|
591
|
+
const [exists] = await table.exists();
|
|
592
|
+
|
|
593
|
+
const schema = [
|
|
594
|
+
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
595
|
+
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
596
|
+
{ name: 'checkpoint_id', type: 'STRING', mode: 'REQUIRED' },
|
|
597
|
+
{ name: 'worker_instance_id', type: 'STRING', mode: 'NULLABLE' },
|
|
598
|
+
{ name: 'code_hash', type: 'STRING', mode: 'NULLABLE' },
|
|
599
|
+
{ name: 'status', type: 'STRING', mode: 'REQUIRED' },
|
|
600
|
+
{ name: 'processed_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
601
|
+
{ name: 'total_entities', type: 'INTEGER', mode: 'NULLABLE' },
|
|
602
|
+
{ name: 'last_entity_id', type: 'STRING', mode: 'NULLABLE' },
|
|
603
|
+
{ name: 'completed_batches', type: 'INTEGER', mode: 'REPEATED' },
|
|
604
|
+
{ name: 'attempts', type: 'INTEGER', mode: 'NULLABLE' },
|
|
605
|
+
{ name: 'started_at', type: 'TIMESTAMP', mode: 'REQUIRED' },
|
|
606
|
+
{ name: 'last_updated', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
607
|
+
];
|
|
608
|
+
|
|
609
|
+
if (!exists) {
|
|
610
|
+
await dataset.createTable(tableName, {
|
|
611
|
+
schema,
|
|
612
|
+
timePartitioning: { type: 'DAY', field: 'date' },
|
|
613
|
+
clustering: { fields: ['computation_name', 'status'] }
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
this.tableExists.set(tableName, true);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
_writeToFirestore(dateStr, entry, results, firestoreConfig) {
|
|
397
620
|
const { path, merge, includeMetadata } = firestoreConfig;
|
|
398
621
|
if (!path) throw new Error(`Firestore path not configured for ${entry.name}`);
|
|
399
622
|
|
|
@@ -431,7 +654,7 @@ class StorageManager {
|
|
|
431
654
|
if (batchCount > 0) batches.push(currentBatch);
|
|
432
655
|
|
|
433
656
|
const limit = pLimit(10);
|
|
434
|
-
|
|
657
|
+
return Promise.all(batches.map(b => limit(() => b.commit()))).then(() => ({ docCount }));
|
|
435
658
|
|
|
436
659
|
} else {
|
|
437
660
|
const docPath = this._resolvePath(path, {
|
|
@@ -439,10 +662,8 @@ class StorageManager {
|
|
|
439
662
|
});
|
|
440
663
|
const docRef = this.firestore.doc(docPath);
|
|
441
664
|
const docData = { ...results, ...metadata };
|
|
442
|
-
merge ?
|
|
443
|
-
docCount = 1;
|
|
665
|
+
return (merge ? docRef.set(docData, { merge: true }) : docRef.set(docData)).then(() => ({ docCount: 1 }));
|
|
444
666
|
}
|
|
445
|
-
return { docCount };
|
|
446
667
|
}
|
|
447
668
|
|
|
448
669
|
_resolvePath(pathTemplate, values) {
|
|
@@ -456,7 +677,6 @@ class StorageManager {
|
|
|
456
677
|
}
|
|
457
678
|
|
|
458
679
|
_hashResult(data) {
|
|
459
|
-
const crypto = require('crypto');
|
|
460
680
|
const str = typeof data === 'string' ? data : JSON.stringify(data);
|
|
461
681
|
return crypto.createHash('md5').update(str).digest('hex').substring(0, 16);
|
|
462
682
|
}
|
|
@@ -464,6 +684,18 @@ class StorageManager {
|
|
|
464
684
|
_log(level, message) {
|
|
465
685
|
this.logger?.log ? this.logger.log(level, `[StorageManager] ${message}`) : console.log(`[${level}] [StorageManager] ${message}`);
|
|
466
686
|
}
|
|
687
|
+
|
|
688
|
+
_logError(context, error) {
|
|
689
|
+
// Safe logging for BigQuery PartialFailureError which hides details in .errors
|
|
690
|
+
let details = error.message;
|
|
691
|
+
if (error.errors && Array.isArray(error.errors)) {
|
|
692
|
+
details = JSON.stringify(error.errors, null, 2);
|
|
693
|
+
} else if (error.response && error.response.insertErrors) {
|
|
694
|
+
details = JSON.stringify(error.response.insertErrors, null, 2);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
this._log('ERROR', `${context}: ${details}`);
|
|
698
|
+
}
|
|
467
699
|
}
|
|
468
700
|
|
|
469
701
|
module.exports = { StorageManager };
|