bulltrackers-module 1.0.260 → 1.0.261
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -81,19 +81,28 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
81
81
|
const successUpdates = result?.updates?.successUpdates || {};
|
|
82
82
|
|
|
83
83
|
if (failureReport.length > 0) {
|
|
84
|
-
// Task ran, but logic or storage failed
|
|
84
|
+
// Task ran, but logic or storage failed
|
|
85
85
|
const failReason = failureReport[0]; // Assuming 1 calc per task
|
|
86
86
|
logger.log('ERROR', `[Worker] ❌ Failed logic/storage for ${computation}`, failReason.error);
|
|
87
|
-
|
|
87
|
+
|
|
88
|
+
// Extract any metrics gathered before failure (e.g., anomalies)
|
|
89
|
+
const metrics = failReason.metrics || {};
|
|
90
|
+
metrics.durationMs = duration;
|
|
91
|
+
|
|
92
|
+
await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', failReason.error, metrics);
|
|
88
93
|
throw new Error(failReason.error.message || 'Computation Logic Failed');
|
|
89
94
|
}
|
|
90
95
|
else if (Object.keys(successUpdates).length > 0) {
|
|
91
96
|
// Success
|
|
92
|
-
|
|
93
|
-
|
|
97
|
+
const successData = successUpdates[computation]; // Extract specific calc data
|
|
98
|
+
const metrics = successData.metrics || {};
|
|
99
|
+
metrics.durationMs = duration;
|
|
100
|
+
|
|
101
|
+
logger.log('INFO', `[Worker] ✅ Stored: ${computation} for ${date} (${metrics.storage?.sizeBytes} bytes)`);
|
|
102
|
+
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics);
|
|
94
103
|
}
|
|
95
104
|
else {
|
|
96
|
-
// No updates, but no error (e.g. Empty Result)
|
|
105
|
+
// No updates, but no error (e.g. Empty Result)
|
|
97
106
|
logger.log('WARN', `[Worker] ⚠️ No results produced for ${computation} (Empty?)`);
|
|
98
107
|
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration });
|
|
99
108
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
|
-
* UPDATED: Returns detailed failure reports for the
|
|
4
|
-
* UPDATED: Stops retrying on non-transient errors
|
|
3
|
+
* UPDATED: Returns detailed failure reports AND metrics for the Audit Logger.
|
|
4
|
+
* UPDATED: Stops retrying on non-transient errors.
|
|
5
5
|
*/
|
|
6
6
|
const { commitBatchInChunks } = require('./FirestoreUtils');
|
|
7
7
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
@@ -30,6 +30,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
30
30
|
|
|
31
31
|
for (const name in stateObj) {
|
|
32
32
|
const calc = stateObj[name];
|
|
33
|
+
|
|
34
|
+
// Prep metrics container
|
|
35
|
+
const runMetrics = {
|
|
36
|
+
storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
|
|
37
|
+
validation: { isValid: true, anomalies: [] }
|
|
38
|
+
};
|
|
39
|
+
|
|
33
40
|
try {
|
|
34
41
|
const result = await calc.getResult();
|
|
35
42
|
|
|
@@ -37,14 +44,30 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
37
44
|
const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
|
|
38
45
|
|
|
39
46
|
if (!healthCheck.valid) {
|
|
47
|
+
// If validation failed, we consider it an anomaly but we BLOCK the write (throw error)
|
|
48
|
+
runMetrics.validation.isValid = false;
|
|
49
|
+
runMetrics.validation.anomalies.push(healthCheck.reason);
|
|
40
50
|
throw { message: healthCheck.reason, stage: 'QUALITY_CIRCUIT_BREAKER' };
|
|
41
51
|
}
|
|
42
52
|
|
|
53
|
+
// Check for minor anomalies (validation warnings that didn't fail) - optional implementation
|
|
54
|
+
// For now, we assume if valid=true, anomalies are empty unless we add warning logic later.
|
|
55
|
+
|
|
43
56
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0) || (typeof result === 'number' && result === 0);
|
|
44
57
|
if (isEmpty) {
|
|
45
|
-
|
|
58
|
+
// Log empty success
|
|
59
|
+
if (calc.manifest.hash) {
|
|
60
|
+
successUpdates[name] = {
|
|
61
|
+
hash: false,
|
|
62
|
+
category: calc.manifest.category,
|
|
63
|
+
metrics: runMetrics // Return empty metrics
|
|
64
|
+
};
|
|
65
|
+
}
|
|
46
66
|
continue;
|
|
47
67
|
}
|
|
68
|
+
|
|
69
|
+
// Calculate Key Count rough estimate
|
|
70
|
+
if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
|
|
48
71
|
|
|
49
72
|
const mainDocRef = db.collection(config.resultsCollection)
|
|
50
73
|
.doc(dStr)
|
|
@@ -71,7 +94,18 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
71
94
|
|
|
72
95
|
try {
|
|
73
96
|
// 1. Prepare Shards with current constraints
|
|
97
|
+
// This function now needs to help us determine sharding info
|
|
74
98
|
const updates = await prepareAutoShardedWrites(result, mainDocRef, logger, constraints.bytes, constraints.keys);
|
|
99
|
+
|
|
100
|
+
// METRICS CALCULATION
|
|
101
|
+
const pointer = updates.find(u => u.data._completed === true);
|
|
102
|
+
const isSharded = pointer && pointer.data._sharded === true;
|
|
103
|
+
const shardCount = isSharded ? (pointer.data._shardCount || 1) : 1;
|
|
104
|
+
const totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
105
|
+
|
|
106
|
+
runMetrics.storage.sizeBytes = totalSize;
|
|
107
|
+
runMetrics.storage.isSharded = isSharded;
|
|
108
|
+
runMetrics.storage.shardCount = shardCount;
|
|
75
109
|
|
|
76
110
|
// 2. Audit Ledger (Only add to the first update batch)
|
|
77
111
|
if (passNum && calc.manifest) {
|
|
@@ -83,16 +117,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
83
117
|
completedAt: new Date(),
|
|
84
118
|
actualHash: calc.manifest.hash,
|
|
85
119
|
_verified: true,
|
|
86
|
-
_shardingStrategy: attempt + 1
|
|
120
|
+
_shardingStrategy: attempt + 1
|
|
87
121
|
},
|
|
88
122
|
options: { merge: true }
|
|
89
123
|
});
|
|
90
124
|
}
|
|
91
125
|
|
|
92
126
|
// 3. Attempt Commit
|
|
93
|
-
const totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
94
|
-
const isSharded = updates.some(u => u.data._sharded === true);
|
|
95
|
-
|
|
96
127
|
await commitBatchInChunks(config, deps, updates, `${name} Results (Att ${attempt+1})`);
|
|
97
128
|
|
|
98
129
|
// Log Success
|
|
@@ -106,23 +137,19 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
106
137
|
lastError = commitErr;
|
|
107
138
|
const msg = commitErr.message || '';
|
|
108
139
|
|
|
109
|
-
// [IMPROVED] Check for non-retryable errors
|
|
110
140
|
const isNonRetryable = NON_RETRYABLE_ERRORS.some(code => msg.includes(code));
|
|
111
141
|
if (isNonRetryable) {
|
|
112
142
|
logger.log('ERROR', `[SelfHealing] ${name} encountered FATAL error (Attempt ${attempt + 1}): ${msg}. Aborting.`);
|
|
113
|
-
throw commitErr;
|
|
143
|
+
throw commitErr;
|
|
114
144
|
}
|
|
115
145
|
|
|
116
|
-
// Check if error is related to size/indexes
|
|
117
146
|
const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
|
|
118
|
-
const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
|
|
147
|
+
const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
|
|
119
148
|
|
|
120
149
|
if (isSizeError || isIndexError) {
|
|
121
150
|
logger.log('WARN', `[SelfHealing] ${name} failed write attempt ${attempt + 1}. Retrying with tighter constraints...`, { error: msg });
|
|
122
151
|
continue; // Try next strategy
|
|
123
152
|
} else {
|
|
124
|
-
// If it's a network error or unknown, re-throw or handle based on policy.
|
|
125
|
-
// For now, we allow retrying loop if it wasn't explicitly fatal.
|
|
126
153
|
logger.log('WARN', `[SelfHealing] ${name} unknown error (Attempt ${attempt + 1}). Retrying...`, { error: msg });
|
|
127
154
|
}
|
|
128
155
|
}
|
|
@@ -137,8 +164,14 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
137
164
|
}
|
|
138
165
|
// ----------------------------------
|
|
139
166
|
|
|
140
|
-
// Mark Success
|
|
141
|
-
if (calc.manifest.hash) {
|
|
167
|
+
// Mark Success & Pass Metrics
|
|
168
|
+
if (calc.manifest.hash) {
|
|
169
|
+
successUpdates[name] = {
|
|
170
|
+
hash: calc.manifest.hash,
|
|
171
|
+
category: calc.manifest.category,
|
|
172
|
+
metrics: runMetrics // Pass metrics up
|
|
173
|
+
};
|
|
174
|
+
}
|
|
142
175
|
|
|
143
176
|
// Capture Schema
|
|
144
177
|
if (calc.manifest.class.getSchema) {
|
|
@@ -164,7 +197,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
164
197
|
|
|
165
198
|
failureReport.push({
|
|
166
199
|
name,
|
|
167
|
-
error: { message: msg, stack: e.stack, stage }
|
|
200
|
+
error: { message: msg, stack: e.stack, stage },
|
|
201
|
+
metrics: runMetrics // Pass incomplete metrics for debugging
|
|
168
202
|
});
|
|
169
203
|
}
|
|
170
204
|
}
|
|
@@ -180,7 +214,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
180
214
|
* Deletes result documents from a previous category location.
|
|
181
215
|
*/
|
|
182
216
|
async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
|
|
183
|
-
|
|
184
217
|
const { db, logger, calculationUtils } = deps;
|
|
185
218
|
const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
|
|
186
219
|
|
|
@@ -227,8 +260,6 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
|
|
|
227
260
|
const OVERHEAD_ALLOWANCE = 20 * 1024;
|
|
228
261
|
const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
|
|
229
262
|
|
|
230
|
-
// We only calculate totalSize loosely here for the "skip sharding" check.
|
|
231
|
-
// The loop below enforces the real limits.
|
|
232
263
|
const totalSize = calculateFirestoreBytes(result);
|
|
233
264
|
const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
|
|
234
265
|
|
|
@@ -1,53 +1,148 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview Utility for recording computation run attempts (The
|
|
3
|
-
*
|
|
2
|
+
* @fileoverview Utility for recording computation run attempts (The Audit Logger).
|
|
3
|
+
* REFACTORED: Organizes logs by Computation Name -> History.
|
|
4
|
+
* Implements aggregated error stats and advanced performance metrics.
|
|
4
5
|
*/
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
const { FieldValue } = require('../utils/utils');
|
|
8
|
+
const os = require('os');
|
|
9
|
+
|
|
10
|
+
// Root collection for the new audit system
|
|
11
|
+
const AUDIT_COLLECTION = 'computation_audit_logs';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Sanitizes error messages to be used as Firestore Map keys.
|
|
15
|
+
* Replaces invalid characters (. / [ ] *) with underscores.
|
|
16
|
+
*/
|
|
17
|
+
function sanitizeErrorKey(message) {
|
|
18
|
+
if (!message) return 'Unknown_Error';
|
|
19
|
+
// Take first 100 chars to avoid key limit issues
|
|
20
|
+
const shortMsg = message.toString().substring(0, 100);
|
|
21
|
+
return shortMsg.replace(/[./\[\]*`]/g, '_').trim();
|
|
22
|
+
}
|
|
6
23
|
|
|
7
24
|
/**
|
|
8
|
-
* Records a run attempt
|
|
9
|
-
*
|
|
10
|
-
* @param {Object} context -
|
|
11
|
-
* @param {string}
|
|
12
|
-
* @param {
|
|
13
|
-
* @param {
|
|
25
|
+
* Records a run attempt with detailed metrics and aggregated stats.
|
|
26
|
+
* @param {Firestore} db - Firestore instance
|
|
27
|
+
* @param {Object} context - Context object
|
|
28
|
+
* @param {string} context.date - The "Target Date" of the computation
|
|
29
|
+
* @param {string} context.computation - The name of the calculation
|
|
30
|
+
* @param {string} context.pass - The topology pass number
|
|
31
|
+
* @param {string} status - 'SUCCESS', 'FAILURE', 'CRASH', or 'SKIPPED'
|
|
32
|
+
* @param {Object|null} error - Error object if failed
|
|
33
|
+
* @param {Object} detailedMetrics - Expanded metrics object (Optional, defaults provided)
|
|
34
|
+
* @param {number} [detailedMetrics.durationMs] - Execution time
|
|
35
|
+
* @param {Object} [detailedMetrics.storage] - { sizeBytes, isSharded, shardCount }
|
|
36
|
+
* @param {Object} [detailedMetrics.validation] - { isValid, anomalies: [] }
|
|
14
37
|
*/
|
|
15
|
-
async function recordRunAttempt(db, context, status, error = null,
|
|
38
|
+
async function recordRunAttempt(db, context, status, error = null, detailedMetrics = { durationMs: 0 }) {
|
|
16
39
|
if (!db || !context) return;
|
|
17
40
|
|
|
18
|
-
const { date, computation, pass } = context;
|
|
19
|
-
|
|
20
|
-
const
|
|
41
|
+
const { date: targetDate, computation, pass } = context;
|
|
42
|
+
const now = new Date();
|
|
43
|
+
const triggerTimestamp = now.getTime();
|
|
44
|
+
|
|
45
|
+
// 1. Construct Paths
|
|
46
|
+
// Parent Doc: Stores global aggregates for this computation
|
|
47
|
+
const computationDocRef = db.collection(AUDIT_COLLECTION).doc(computation);
|
|
21
48
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
49
|
+
// History Doc: Stores this specific run
|
|
50
|
+
// ID Format: targetDate_triggerTimestamp (Sortable by data date, then execution time)
|
|
51
|
+
const runId = `${targetDate}_${triggerTimestamp}`;
|
|
52
|
+
const runDocRef = computationDocRef.collection('history').doc(runId);
|
|
53
|
+
|
|
54
|
+
// 2. Prepare Metrics & Environment Info
|
|
55
|
+
const workerId = process.env.FUNCTION_TARGET || process.env.K_REVISION || os.hostname();
|
|
56
|
+
|
|
57
|
+
// Calculate size in MB
|
|
58
|
+
let sizeMB = 0;
|
|
59
|
+
if (detailedMetrics.storage && detailedMetrics.storage.sizeBytes) {
|
|
60
|
+
sizeMB = Number((detailedMetrics.storage.sizeBytes / (1024 * 1024)).toFixed(4));
|
|
61
|
+
}
|
|
26
62
|
|
|
27
|
-
|
|
63
|
+
// Extract Validation Anomalies (Unusual Keys/Values)
|
|
64
|
+
const anomalies = detailedMetrics.validation?.anomalies || [];
|
|
65
|
+
if (error && error.message && error.message.includes('Data Integrity')) {
|
|
66
|
+
// If the error itself was a validation failure, add it to anomalies
|
|
67
|
+
anomalies.push(error.message);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// 3. Construct the Run Log Entry
|
|
71
|
+
const runEntry = {
|
|
72
|
+
// Identity
|
|
73
|
+
runId: runId,
|
|
28
74
|
computationName: computation,
|
|
29
|
-
date: date,
|
|
30
75
|
pass: String(pass),
|
|
31
|
-
|
|
76
|
+
workerId: workerId,
|
|
77
|
+
|
|
78
|
+
// Timing
|
|
79
|
+
targetDate: targetDate, // The date the data belongs to
|
|
80
|
+
triggerTime: now.toISOString(), // The date the code ran
|
|
81
|
+
durationMs: detailedMetrics.durationMs || 0,
|
|
82
|
+
|
|
83
|
+
// Status
|
|
32
84
|
status: status,
|
|
33
|
-
|
|
85
|
+
|
|
86
|
+
// Data Metrics
|
|
87
|
+
outputStats: {
|
|
88
|
+
sizeMB: sizeMB,
|
|
89
|
+
isSharded: !!detailedMetrics.storage?.isSharded,
|
|
90
|
+
shardCount: detailedMetrics.storage?.shardCount || 1,
|
|
91
|
+
keysWritten: detailedMetrics.storage?.keys || 0 // If available
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
// Health & Diagnostics
|
|
95
|
+
anomalies: anomalies, // Logs "Consistent 0s", "N/As" etc.
|
|
96
|
+
|
|
97
|
+
// Metadata
|
|
98
|
+
_schemaVersion: '2.0'
|
|
34
99
|
};
|
|
35
100
|
|
|
101
|
+
// Attach Error Details if present
|
|
36
102
|
if (error) {
|
|
37
|
-
|
|
103
|
+
runEntry.error = {
|
|
38
104
|
message: error.message || 'Unknown Error',
|
|
39
|
-
// Capture specific sharding/firestore stages if available
|
|
40
105
|
stage: error.stage || 'UNKNOWN',
|
|
41
|
-
|
|
42
|
-
|
|
106
|
+
stack: error.stack ? error.stack.substring(0, 1000) : null, // Truncate stack
|
|
107
|
+
code: error.code || null
|
|
43
108
|
};
|
|
44
109
|
}
|
|
45
110
|
|
|
46
|
-
//
|
|
111
|
+
// 4. Prepare Aggregation Update (Atomic Increments)
|
|
112
|
+
const statsUpdate = {
|
|
113
|
+
lastRunAt: now,
|
|
114
|
+
lastRunStatus: status,
|
|
115
|
+
totalRuns: FieldValue.increment(1)
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
if (status === 'SUCCESS') {
|
|
119
|
+
statsUpdate.successCount = FieldValue.increment(1);
|
|
120
|
+
} else {
|
|
121
|
+
statsUpdate.failureCount = FieldValue.increment(1);
|
|
122
|
+
// Increment specific error type counter
|
|
123
|
+
if (error) {
|
|
124
|
+
const safeKey = sanitizeErrorKey(error.message);
|
|
125
|
+
statsUpdate[`errorCounts.${safeKey}`] = FieldValue.increment(1);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// 5. Execute as Batch
|
|
47
130
|
try {
|
|
48
|
-
|
|
131
|
+
const batch = db.batch();
|
|
132
|
+
|
|
133
|
+
// Set the specific run log
|
|
134
|
+
batch.set(runDocRef, runEntry);
|
|
135
|
+
|
|
136
|
+
// Merge updates into the parent computation document
|
|
137
|
+
// We use { merge: true } implicitly with set or explicit update.
|
|
138
|
+
// Using set({ merge: true }) ensures doc creation if it doesn't exist.
|
|
139
|
+
batch.set(computationDocRef, statsUpdate, { merge: true });
|
|
140
|
+
|
|
141
|
+
await batch.commit();
|
|
142
|
+
|
|
49
143
|
} catch (e) {
|
|
50
|
-
|
|
144
|
+
// Fallback logging if Firestore fails (prevents infinite loop crashing)
|
|
145
|
+
console.error(`[RunRecorder] ❌ CRITICAL: Failed to write audit log for ${computation}`, e);
|
|
51
146
|
}
|
|
52
147
|
}
|
|
53
148
|
|