bulltrackers-module 1.0.256 → 1.0.257
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_worker.js
|
|
3
3
|
* PURPOSE: Consumes computation tasks from Pub/Sub and executes them.
|
|
4
|
-
* UPDATED:
|
|
4
|
+
* UPDATED: Integrated Run Ledger for per-run/per-date success/failure tracking.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
|
|
8
8
|
const { getManifest } = require('../topology/ManifestLoader');
|
|
9
9
|
const { StructuredLogger } = require('../logger/logger');
|
|
10
|
+
const { recordRunAttempt } = require('../persistence/RunRecorder'); // [NEW IMPORT]
|
|
10
11
|
|
|
11
12
|
// 1. IMPORT CALCULATIONS
|
|
12
13
|
let calculationPackage;
|
|
@@ -21,7 +22,6 @@ const calculations = calculationPackage.calculations;
|
|
|
21
22
|
|
|
22
23
|
/**
|
|
23
24
|
* Handles a single Pub/Sub message.
|
|
24
|
-
* Assumes the message contains a VALID, RUNNABLE task from the Smart Dispatcher.
|
|
25
25
|
*/
|
|
26
26
|
async function handleComputationTask(message, config, dependencies) {
|
|
27
27
|
|
|
@@ -33,7 +33,7 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
33
33
|
});
|
|
34
34
|
|
|
35
35
|
const runDependencies = { ...dependencies, logger: systemLogger };
|
|
36
|
-
const { logger } = runDependencies;
|
|
36
|
+
const { logger, db } = runDependencies;
|
|
37
37
|
|
|
38
38
|
// 3. PARSE PAYLOAD
|
|
39
39
|
let data;
|
|
@@ -67,14 +67,19 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
67
67
|
computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
|
|
68
68
|
} catch (manifestError) {
|
|
69
69
|
logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
|
|
70
|
+
// Record Fatal Manifest Error
|
|
71
|
+
await recordRunAttempt(db, { date, computation, pass }, 'CRASH', {
|
|
72
|
+
message: manifestError.message,
|
|
73
|
+
stage: 'MANIFEST_LOAD'
|
|
74
|
+
});
|
|
70
75
|
return;
|
|
71
76
|
}
|
|
72
77
|
|
|
73
|
-
// 5. EXECUTE (
|
|
74
|
-
// We do not check DB status or analyze feasibility. We assume Dispatcher did its job.
|
|
78
|
+
// 5. EXECUTE (With Run Ledger)
|
|
75
79
|
try {
|
|
76
80
|
logger.log('INFO', `[Worker] 📥 Received: ${computation} for ${date}`);
|
|
77
81
|
|
|
82
|
+
const startTime = Date.now();
|
|
78
83
|
const result = await executeDispatchTask(
|
|
79
84
|
date,
|
|
80
85
|
pass,
|
|
@@ -83,13 +88,44 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
83
88
|
runDependencies,
|
|
84
89
|
computationManifest
|
|
85
90
|
);
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
91
|
+
const duration = Date.now() - startTime;
|
|
92
|
+
|
|
93
|
+
// CHECK FOR INTERNAL FAILURES (Trapped by ResultCommitter)
|
|
94
|
+
const failureReport = result?.updates?.failureReport || [];
|
|
95
|
+
const successUpdates = result?.updates?.successUpdates || {};
|
|
96
|
+
|
|
97
|
+
if (failureReport.length > 0) {
|
|
98
|
+
// Task ran, but logic or storage failed (e.g., Sharding Limit)
|
|
99
|
+
const failReason = failureReport[0]; // Assuming 1 calc per task
|
|
100
|
+
|
|
101
|
+
logger.log('ERROR', `[Worker] ❌ Failed logic/storage for ${computation}`, failReason.error);
|
|
102
|
+
|
|
103
|
+
await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', failReason.error, { durationMs: duration });
|
|
104
|
+
|
|
105
|
+
// Throw error to ensure Pub/Sub retry (if transient) or Visibility (if permanent)
|
|
106
|
+
throw new Error(failReason.error.message || 'Computation Logic Failed');
|
|
107
|
+
}
|
|
108
|
+
else if (Object.keys(successUpdates).length > 0) {
|
|
109
|
+
// Success
|
|
110
|
+
logger.log('INFO', `[Worker] ✅ Stored: ${computation} for ${date}`);
|
|
111
|
+
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, { durationMs: duration });
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
// No updates, but no error (e.g. Empty Result) - Log as Success/Skipped
|
|
115
|
+
logger.log('WARN', `[Worker] ⚠️ No results produced for ${computation} (Empty?)`);
|
|
116
|
+
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration });
|
|
89
117
|
}
|
|
90
118
|
|
|
91
119
|
} catch (err) {
|
|
92
|
-
|
|
120
|
+
// Catch System Crashes (OOM, Timeout, Unhandled Exception)
|
|
121
|
+
logger.log('ERROR', `[Worker] ❌ Crash: ${computation} for ${date}: ${err.message}`);
|
|
122
|
+
|
|
123
|
+
await recordRunAttempt(db, { date, computation, pass }, 'CRASH', {
|
|
124
|
+
message: err.message,
|
|
125
|
+
stack: err.stack,
|
|
126
|
+
stage: 'SYSTEM_CRASH'
|
|
127
|
+
});
|
|
128
|
+
|
|
93
129
|
throw err; // Trigger Pub/Sub retry
|
|
94
130
|
}
|
|
95
131
|
}
|
|
@@ -1,22 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
|
-
* UPDATED:
|
|
3
|
+
* UPDATED: Returns detailed failure reports for the Run Ledger.
|
|
4
4
|
*/
|
|
5
5
|
const { commitBatchInChunks } = require('./FirestoreUtils');
|
|
6
6
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
7
7
|
const { batchStoreSchemas } = require('../utils/schema_capture');
|
|
8
8
|
const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
|
|
9
|
-
// Note: normalizeName is typically needed for doc IDs, but keys in stateObj are usually already normalized.
|
|
10
|
-
// If not, ensure it is imported. Based on StandardExecutor, keys are normalized.
|
|
11
9
|
|
|
12
10
|
async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false) {
|
|
13
11
|
const successUpdates = {};
|
|
12
|
+
const failureReport = []; // [NEW] Track failures per calculation
|
|
14
13
|
const schemas = [];
|
|
15
|
-
const cleanupTasks = [];
|
|
14
|
+
const cleanupTasks = [];
|
|
16
15
|
const { logger, db } = deps;
|
|
17
16
|
const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
|
|
18
17
|
|
|
19
|
-
// [NEW] Extract numeric pass ID from string (e.g., "Pass 1" -> "1")
|
|
20
18
|
const passNum = passName.replace(/[^0-9]/g, '');
|
|
21
19
|
|
|
22
20
|
for (const name in stateObj) {
|
|
@@ -24,20 +22,18 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
24
22
|
try {
|
|
25
23
|
const result = await calc.getResult();
|
|
26
24
|
|
|
27
|
-
// Validate Result
|
|
25
|
+
// Validate Result
|
|
28
26
|
const isEmpty = !result ||
|
|
29
27
|
(typeof result === 'object' && Object.keys(result).length === 0) ||
|
|
30
28
|
(typeof result === 'number' && result === 0);
|
|
31
29
|
|
|
32
30
|
if (isEmpty) {
|
|
33
|
-
// Mark status as FALSE (Failed/Empty) so it re-runs or is flagged
|
|
34
31
|
if (calc.manifest.hash) {
|
|
35
32
|
successUpdates[name] = {
|
|
36
33
|
hash: false,
|
|
37
34
|
category: calc.manifest.category
|
|
38
35
|
};
|
|
39
36
|
}
|
|
40
|
-
// Do not store empty results
|
|
41
37
|
continue;
|
|
42
38
|
}
|
|
43
39
|
|
|
@@ -48,9 +44,16 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
48
44
|
.collection(config.computationsSubcollection)
|
|
49
45
|
.doc(name);
|
|
50
46
|
|
|
51
|
-
|
|
47
|
+
// [CRITICAL UPDATE] Catch errors specifically during Sharding/Prep
|
|
48
|
+
let updates;
|
|
49
|
+
try {
|
|
50
|
+
updates = await prepareAutoShardedWrites(result, mainDocRef, logger);
|
|
51
|
+
} catch (prepError) {
|
|
52
|
+
// If this fails, it's likely a memory or logic issue before DB commit
|
|
53
|
+
throw { message: prepError.message, stack: prepError.stack, stage: 'PREPARE_SHARDS' };
|
|
54
|
+
}
|
|
52
55
|
|
|
53
|
-
//
|
|
56
|
+
// Audit Ledger
|
|
54
57
|
if (passNum && calc.manifest) {
|
|
55
58
|
const ledgerRef = db.collection(`computation_audit_ledger/${dStr}/passes/${passNum}/tasks`).doc(name);
|
|
56
59
|
updates.push({
|
|
@@ -64,7 +67,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
64
67
|
options: { merge: true }
|
|
65
68
|
});
|
|
66
69
|
}
|
|
67
|
-
// --------------------------------------------------
|
|
68
70
|
|
|
69
71
|
// Capture Schema
|
|
70
72
|
if (calc.manifest.class.getSchema) {
|
|
@@ -81,14 +83,25 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
81
83
|
const totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
82
84
|
const isSharded = updates.some(u => u.data._sharded === true);
|
|
83
85
|
|
|
84
|
-
|
|
86
|
+
try {
|
|
87
|
+
await commitBatchInChunks(config, deps, updates, `${name} Results`);
|
|
88
|
+
} catch (commitErr) {
|
|
89
|
+
// Check for Firestore specific limits
|
|
90
|
+
let stage = 'COMMIT_BATCH';
|
|
91
|
+
let msg = commitErr.message;
|
|
92
|
+
if (msg.includes('Transaction too big') || msg.includes('payload is too large')) {
|
|
93
|
+
stage = 'SHARDING_LIMIT_EXCEEDED';
|
|
94
|
+
msg = `Firestore Limit Exceeded: ${msg}`;
|
|
95
|
+
}
|
|
96
|
+
throw { message: msg, stack: commitErr.stack, stage };
|
|
97
|
+
}
|
|
85
98
|
|
|
86
|
-
//
|
|
99
|
+
// Log Storage
|
|
87
100
|
if (logger && logger.logStorage) {
|
|
88
101
|
logger.logStorage(pid, name, dStr, mainDocRef.path, totalSize, isSharded);
|
|
89
102
|
}
|
|
90
103
|
|
|
91
|
-
//
|
|
104
|
+
// Mark Success
|
|
92
105
|
if (calc.manifest.hash) {
|
|
93
106
|
successUpdates[name] = {
|
|
94
107
|
hash: calc.manifest.hash,
|
|
@@ -96,22 +109,29 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
96
109
|
};
|
|
97
110
|
}
|
|
98
111
|
|
|
99
|
-
//
|
|
112
|
+
// Cleanup Migration
|
|
100
113
|
if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category) {
|
|
101
|
-
logger.log('INFO', `[Migration] Scheduled cleanup for ${name} from '${calc.manifest.previousCategory}'`);
|
|
102
114
|
cleanupTasks.push(deleteOldCalculationData(dStr, calc.manifest.previousCategory, name, config, deps));
|
|
103
115
|
}
|
|
104
116
|
}
|
|
105
117
|
} catch (e) {
|
|
118
|
+
// [NEW] Intelligent Failure Reporting
|
|
119
|
+
const stage = e.stage || 'EXECUTION';
|
|
120
|
+
const msg = e.message || 'Unknown error';
|
|
121
|
+
|
|
106
122
|
if (logger && logger.log) {
|
|
107
|
-
logger.log('ERROR', `Commit failed for ${name}`, { processId: pid, error:
|
|
123
|
+
logger.log('ERROR', `Commit failed for ${name} [${stage}]`, { processId: pid, error: msg });
|
|
108
124
|
}
|
|
125
|
+
|
|
126
|
+
failureReport.push({
|
|
127
|
+
name,
|
|
128
|
+
error: { message: msg, stack: e.stack, stage }
|
|
129
|
+
});
|
|
109
130
|
}
|
|
110
131
|
}
|
|
111
132
|
|
|
112
133
|
if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => {});
|
|
113
134
|
|
|
114
|
-
// Execute Cleanup Tasks (orphaned data from category changes)
|
|
115
135
|
if (cleanupTasks.length > 0) {
|
|
116
136
|
await Promise.allSettled(cleanupTasks);
|
|
117
137
|
}
|
|
@@ -119,12 +139,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
119
139
|
if (!skipStatusWrite && Object.keys(successUpdates).length > 0) {
|
|
120
140
|
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
121
141
|
}
|
|
122
|
-
|
|
142
|
+
|
|
143
|
+
// [UPDATE] Return both success and failures so the Worker can log them
|
|
144
|
+
return { successUpdates, failureReport };
|
|
123
145
|
}
|
|
124
146
|
|
|
125
147
|
/**
|
|
126
148
|
* Deletes result documents from a previous category location.
|
|
127
|
-
* Must handle standard docs AND sharded docs (subcollections).
|
|
128
149
|
*/
|
|
129
150
|
async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
|
|
130
151
|
const { db, logger, calculationUtils } = deps;
|
|
@@ -138,20 +159,16 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
|
|
|
138
159
|
.collection(config.computationsSubcollection)
|
|
139
160
|
.doc(calcName);
|
|
140
161
|
|
|
141
|
-
// 1. Check for Shards Subcollection
|
|
142
162
|
const shardsCol = oldDocRef.collection('_shards');
|
|
143
163
|
const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
|
|
144
164
|
|
|
145
165
|
const batch = db.batch();
|
|
146
166
|
let ops = 0;
|
|
147
167
|
|
|
148
|
-
// Delete shards
|
|
149
168
|
for (const shardDoc of shardsSnap) {
|
|
150
169
|
batch.delete(shardDoc);
|
|
151
170
|
ops++;
|
|
152
171
|
}
|
|
153
|
-
|
|
154
|
-
// Delete main doc
|
|
155
172
|
batch.delete(oldDocRef);
|
|
156
173
|
ops++;
|
|
157
174
|
|
|
@@ -188,7 +205,6 @@ async function prepareAutoShardedWrites(result, docRef, logger) {
|
|
|
188
205
|
let currentChunkSize = 0;
|
|
189
206
|
let shardIndex = 0;
|
|
190
207
|
|
|
191
|
-
// [UPDATE] Add _lastUpdated to non-sharded writes
|
|
192
208
|
if ((totalSize + docPathSize) < CHUNK_LIMIT) {
|
|
193
209
|
const data = {
|
|
194
210
|
...result,
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Utility for recording computation run attempts (The Run Ledger).
|
|
3
|
+
* Tracks success, failure, and error contexts for every execution pass.
|
|
4
|
+
*/
|
|
5
|
+
const { generateProcessId } = require('../logger/logger');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Records a run attempt to the computation_run_history collection.
|
|
9
|
+
* * @param {Firestore} db - Firestore instance
|
|
10
|
+
* @param {Object} context - { date, computation, pass }
|
|
11
|
+
* @param {string} status - 'SUCCESS', 'FAILURE', or 'CRASH'
|
|
12
|
+
* @param {Object|null} error - Error object or null
|
|
13
|
+
* @param {Object} metrics - { durationMs, ... }
|
|
14
|
+
*/
|
|
15
|
+
async function recordRunAttempt(db, context, status, error = null, metrics = {}) {
|
|
16
|
+
if (!db || !context) return;
|
|
17
|
+
|
|
18
|
+
const { date, computation, pass } = context;
|
|
19
|
+
// Generate a unique ID for this specific run attempt
|
|
20
|
+
const runId = `${Date.now()}_${generateProcessId('run', computation, date)}`;
|
|
21
|
+
|
|
22
|
+
const docRef = db.collection('computation_run_history')
|
|
23
|
+
.doc(date)
|
|
24
|
+
.collection('runs')
|
|
25
|
+
.doc(runId);
|
|
26
|
+
|
|
27
|
+
const entry = {
|
|
28
|
+
computationName: computation,
|
|
29
|
+
date: date,
|
|
30
|
+
pass: String(pass),
|
|
31
|
+
timestamp: new Date().toISOString(),
|
|
32
|
+
status: status,
|
|
33
|
+
metrics: metrics
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
if (error) {
|
|
37
|
+
entry.error = {
|
|
38
|
+
message: error.message || 'Unknown Error',
|
|
39
|
+
// Capture specific sharding/firestore stages if available
|
|
40
|
+
stage: error.stage || 'UNKNOWN',
|
|
41
|
+
code: error.code || null,
|
|
42
|
+
stack: error.stack || null
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Fire and forget (await but catch to ensure logging doesn't crash the worker)
|
|
47
|
+
try {
|
|
48
|
+
await docRef.set(entry);
|
|
49
|
+
} catch (e) {
|
|
50
|
+
console.error(`[RunRecorder] Failed to save history for ${computation}:`, e.message);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
module.exports = { recordRunAttempt };
|