bulltrackers-module 1.0.291 → 1.0.293
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/context/ManifestBuilder.js +5 -5
- package/functions/computation-system/helpers/computation_dispatcher.js +91 -37
- package/functions/computation-system/helpers/computation_worker.js +73 -75
- package/functions/computation-system/tools/BuildReporter.js +24 -119
- package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +10 -8
- package/package.json +1 -1
|
@@ -89,12 +89,12 @@ function getDependencySet(endpoints, adjacencyList) {
|
|
|
89
89
|
* Returns a string description of the first cycle found.
|
|
90
90
|
*/
|
|
91
91
|
function detectCircularDependencies(manifestMap) {
|
|
92
|
-
let index
|
|
93
|
-
const stack
|
|
94
|
-
const indices
|
|
92
|
+
let index = 0;
|
|
93
|
+
const stack = [];
|
|
94
|
+
const indices = new Map();
|
|
95
95
|
const lowLinks = new Map();
|
|
96
|
-
const onStack
|
|
97
|
-
const cycles
|
|
96
|
+
const onStack = new Set();
|
|
97
|
+
const cycles = [];
|
|
98
98
|
|
|
99
99
|
function strongconnect(v) {
|
|
100
100
|
indices.set(v, index);
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
3
|
* PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
|
|
4
4
|
* UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
|
|
5
|
+
* UPDATED: Implements Forensic Crash Analysis & Intelligent Resource Routing.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -13,12 +14,49 @@ const { generateCodeHash } = require('../topology/HashManag
|
|
|
13
14
|
const pLimit = require('p-limit');
|
|
14
15
|
const crypto = require('crypto');
|
|
15
16
|
|
|
16
|
-
const
|
|
17
|
-
|
|
17
|
+
const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
|
|
18
|
+
|
|
19
|
+
// Threshold to trigger high-mem routing (e.g., 1.5 GB for a 2GB worker)
|
|
20
|
+
const OOM_THRESHOLD_MB = 1500;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* [NEW] Forensics: Checks if the calculation crashed previously due to Memory.
|
|
24
|
+
* Reads the 'telemetry.lastMemory' from the audit ledger.
|
|
25
|
+
*/
|
|
26
|
+
async function checkCrashForensics(db, date, pass, computationName) {
|
|
27
|
+
try {
|
|
28
|
+
const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computationName}`;
|
|
29
|
+
const doc = await db.doc(ledgerPath).get();
|
|
30
|
+
|
|
31
|
+
if (!doc.exists) return 'standard';
|
|
32
|
+
|
|
33
|
+
const data = doc.data();
|
|
34
|
+
|
|
35
|
+
// Check if we have telemetry from a previous run
|
|
36
|
+
if (data.telemetry && data.telemetry.lastMemory) {
|
|
37
|
+
const lastRSS = data.telemetry.lastMemory.rssMB || 0;
|
|
38
|
+
|
|
39
|
+
if (lastRSS > OOM_THRESHOLD_MB) {
|
|
40
|
+
console.log(`[Dispatcher] 🕵️♀️ Forensics: ${computationName} likely OOM'd at ${lastRSS}MB. Routing to HIGH-MEM.`);
|
|
41
|
+
return 'high-mem';
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Also check if it's explicitly marked FAILED with 'Memory' in error
|
|
46
|
+
if (data.status === 'FAILED' && data.error && /memory/i.test(data.error)) {
|
|
47
|
+
return 'high-mem';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
} catch (e) {
|
|
51
|
+
console.warn(`[Dispatcher] Forensics check failed for ${computationName}: ${e.message}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return 'standard';
|
|
55
|
+
}
|
|
18
56
|
|
|
19
57
|
/**
|
|
20
58
|
* Dispatches computation tasks for a specific pass.
|
|
21
|
-
* @param {Object} config - System config
|
|
59
|
+
* @param {Object} config - System config (Injected with topics)
|
|
22
60
|
* @param {Object} dependencies - { db, logger, ... }
|
|
23
61
|
* @param {Array} computationManifest - List of calculations
|
|
24
62
|
* @param {Object} reqBody - (Optional) HTTP Body containing 'callbackUrl' and 'date'
|
|
@@ -28,7 +66,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
28
66
|
const pubsubUtils = new PubSubUtils(dependencies);
|
|
29
67
|
const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
|
|
30
68
|
|
|
31
|
-
//
|
|
69
|
+
// Extract Date and Callback from request body (pushed by Workflow)
|
|
32
70
|
const dateStr = reqBody.date || config.date;
|
|
33
71
|
const callbackUrl = reqBody.callbackUrl || null;
|
|
34
72
|
|
|
@@ -44,18 +82,14 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
44
82
|
|
|
45
83
|
if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
|
|
46
84
|
|
|
47
|
-
const calcNames = calcsInThisPass.map(c => c.name);
|
|
48
85
|
logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} for ${dateStr}`);
|
|
49
86
|
|
|
50
|
-
// -- DATE ANALYSIS LOGIC
|
|
51
|
-
const passEarliestDate = Object.values(DEFINITIVE_EARLIEST_DATES).reduce((a, b) => a < b ? a : b);
|
|
52
|
-
const endDateUTC = new Date(Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate() - 1));
|
|
53
|
-
|
|
54
|
-
// We only analyze the specific requested date to keep dispatch fast for the workflow
|
|
87
|
+
// -- DATE ANALYSIS LOGIC --
|
|
55
88
|
const allExpectedDates = [dateStr];
|
|
56
|
-
|
|
57
89
|
const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
|
|
58
90
|
const tasksToDispatch = [];
|
|
91
|
+
|
|
92
|
+
// Concurrency limit for analysis & forensics
|
|
59
93
|
const limit = pLimit(20);
|
|
60
94
|
|
|
61
95
|
const analysisPromises = allExpectedDates.map(d => limit(async () => {
|
|
@@ -87,18 +121,16 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
87
121
|
|
|
88
122
|
const report = analyzeDateExecution(d, calcsInThisPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
|
|
89
123
|
|
|
124
|
+
// Handle Status Updates (Impossible / Blocked)
|
|
90
125
|
const statusUpdates = {};
|
|
91
|
-
|
|
92
126
|
report.impossible.forEach(item => {
|
|
93
127
|
if (dailyStatus[item.name]?.hash !== STATUS_IMPOSSIBLE) {
|
|
94
128
|
statusUpdates[item.name] = { hash: STATUS_IMPOSSIBLE, category: 'unknown', reason: item.reason };
|
|
95
129
|
}
|
|
96
130
|
});
|
|
97
|
-
|
|
98
131
|
report.blocked.forEach(item => {
|
|
99
132
|
statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
|
|
100
133
|
});
|
|
101
|
-
|
|
102
134
|
report.failedDependency.forEach(item => {
|
|
103
135
|
const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
|
|
104
136
|
statusUpdates[item.name] = { hash: false, category: 'unknown', reason: `Dependency Missing: ${missingStr}` };
|
|
@@ -109,21 +141,29 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
109
141
|
}
|
|
110
142
|
|
|
111
143
|
const validToRun = [...report.runnable, ...report.reRuns];
|
|
112
|
-
|
|
144
|
+
|
|
145
|
+
// [NEW] Parallel Forensics Check
|
|
146
|
+
await Promise.all(validToRun.map(item => limit(async () => {
|
|
147
|
+
const compName = normalizeName(item.name);
|
|
148
|
+
|
|
149
|
+
// 1. Determine Resource Requirements
|
|
150
|
+
const requiredResource = await checkCrashForensics(db, d, passToRun, compName);
|
|
151
|
+
|
|
113
152
|
const uniqueDispatchId = crypto.randomUUID();
|
|
114
153
|
tasksToDispatch.push({
|
|
115
154
|
action: 'RUN_COMPUTATION_DATE',
|
|
116
155
|
dispatchId: uniqueDispatchId,
|
|
117
156
|
date: d,
|
|
118
157
|
pass: passToRun,
|
|
119
|
-
computation:
|
|
158
|
+
computation: compName,
|
|
120
159
|
hash: item.hash || item.newHash,
|
|
121
160
|
previousCategory: item.previousCategory || null,
|
|
122
161
|
triggerReason: item.reason || "Unknown",
|
|
123
162
|
dependencyResultHashes: item.dependencyResultHashes || {},
|
|
124
|
-
timestamp: Date.now()
|
|
163
|
+
timestamp: Date.now(),
|
|
164
|
+
resources: requiredResource // 'standard' or 'high-mem'
|
|
125
165
|
});
|
|
126
|
-
});
|
|
166
|
+
})));
|
|
127
167
|
|
|
128
168
|
} catch (e) {
|
|
129
169
|
logger.log('ERROR', `[Dispatcher] Failed analysis for ${d}: ${e.message}`);
|
|
@@ -132,10 +172,9 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
132
172
|
|
|
133
173
|
await Promise.all(analysisPromises);
|
|
134
174
|
|
|
135
|
-
// --
|
|
175
|
+
// -- CALLBACK & COUNTER INITIALIZATION --
|
|
136
176
|
|
|
137
177
|
if (tasksToDispatch.length > 0) {
|
|
138
|
-
logger.log('INFO', `[Dispatcher] 📝 Preparing ${tasksToDispatch.length} tasks for execution...`);
|
|
139
178
|
|
|
140
179
|
// 1. Initialize Shared State Document (The Counter)
|
|
141
180
|
const runId = crypto.randomUUID();
|
|
@@ -147,14 +186,14 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
147
186
|
date: dateStr,
|
|
148
187
|
pass: passToRun,
|
|
149
188
|
totalTasks: tasksToDispatch.length,
|
|
150
|
-
remainingTasks: tasksToDispatch.length,
|
|
151
|
-
callbackUrl: callbackUrl,
|
|
189
|
+
remainingTasks: tasksToDispatch.length,
|
|
190
|
+
callbackUrl: callbackUrl,
|
|
152
191
|
status: 'IN_PROGRESS'
|
|
153
192
|
});
|
|
154
|
-
logger.log('INFO', `[Dispatcher] 🏁 Run State Initialized: ${runId}`);
|
|
193
|
+
logger.log('INFO', `[Dispatcher] 🏁 Run State Initialized: ${runId}. Tasks: ${tasksToDispatch.length}`);
|
|
155
194
|
}
|
|
156
195
|
|
|
157
|
-
// 2. Attach Run Metadata
|
|
196
|
+
// 2. Attach Run Metadata
|
|
158
197
|
tasksToDispatch.forEach(task => {
|
|
159
198
|
task.runId = runId;
|
|
160
199
|
task.metaStatePath = callbackUrl ? metaStatePath : null;
|
|
@@ -180,12 +219,13 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
180
219
|
t.set(ledgerRef, {
|
|
181
220
|
status: 'PENDING',
|
|
182
221
|
dispatchId: task.dispatchId,
|
|
183
|
-
runId: task.runId,
|
|
222
|
+
runId: task.runId,
|
|
184
223
|
computation: task.computation,
|
|
185
224
|
expectedHash: task.hash || 'unknown',
|
|
186
225
|
createdAt: new Date(),
|
|
187
226
|
dispatcherHash: currentManifestHash,
|
|
188
227
|
triggerReason: task.triggerReason,
|
|
228
|
+
resources: task.resources, // Log intended resource type
|
|
189
229
|
retries: 0
|
|
190
230
|
}, { merge: true });
|
|
191
231
|
|
|
@@ -201,22 +241,36 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
201
241
|
|
|
202
242
|
await Promise.all(txnPromises);
|
|
203
243
|
|
|
204
|
-
// 4. Publish to Pub/Sub
|
|
244
|
+
// 4. Publish to Pub/Sub (Segregated by Resources)
|
|
205
245
|
if (finalDispatched.length > 0) {
|
|
206
|
-
logger.log('INFO', `[Dispatcher] ✅ Publishing ${finalDispatched.length} tasks to Pub/Sub...`);
|
|
207
246
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
247
|
+
const standardTasks = finalDispatched.filter(t => t.resources !== 'high-mem');
|
|
248
|
+
const highMemTasks = finalDispatched.filter(t => t.resources === 'high-mem');
|
|
249
|
+
|
|
250
|
+
// Publish Standard
|
|
251
|
+
if (standardTasks.length > 0) {
|
|
252
|
+
logger.log('INFO', `[Dispatcher] ✅ Publishing ${standardTasks.length} Standard tasks...`);
|
|
253
|
+
await pubsubUtils.batchPublishTasks(dependencies, {
|
|
254
|
+
topicName: config.computationTopicStandard || 'computation-tasks',
|
|
255
|
+
tasks: standardTasks,
|
|
256
|
+
taskType: `computation-pass-${passToRun}-std`,
|
|
257
|
+
maxPubsubBatchSize: 100
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Publish High-Mem
|
|
262
|
+
if (highMemTasks.length > 0) {
|
|
263
|
+
logger.log('INFO', `[Dispatcher] 🏋️♀️ Publishing ${highMemTasks.length} tasks to HIGH-MEM infrastructure.`);
|
|
264
|
+
await pubsubUtils.batchPublishTasks(dependencies, {
|
|
265
|
+
topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
|
|
266
|
+
tasks: highMemTasks,
|
|
267
|
+
taskType: `computation-pass-${passToRun}-highmem`,
|
|
268
|
+
maxPubsubBatchSize: 100
|
|
269
|
+
});
|
|
270
|
+
}
|
|
214
271
|
|
|
215
|
-
// Return count so workflow knows to wait
|
|
216
272
|
return { dispatched: finalDispatched.length, runId };
|
|
217
273
|
} else {
|
|
218
|
-
// Edge Case: Analysis said "Run", but Ledger said "Already Done"
|
|
219
|
-
// We must update the state doc to 0 or delete it, OR return 0 so workflow doesn't wait.
|
|
220
274
|
logger.log('INFO', `[Dispatcher] All tasks were already COMPLETED.`);
|
|
221
275
|
return { dispatched: 0 };
|
|
222
276
|
}
|
|
@@ -227,4 +281,4 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
227
281
|
}
|
|
228
282
|
}
|
|
229
283
|
|
|
230
|
-
module.exports = { dispatchComputationPass };
|
|
284
|
+
module.exports = { dispatchComputationPass };
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_worker.js
|
|
3
3
|
* PURPOSE: Consumes tasks, executes logic, and signals Workflow upon Batch Completion.
|
|
4
4
|
* UPDATED: Implements IAM Auth for Workflow Callbacks.
|
|
5
|
+
* UPDATED: Implements Memory Heartbeat (Flight Recorder) for OOM detection.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
|
|
@@ -9,7 +10,7 @@ const { getManifest } = require('../topology/ManifestLoader');
|
|
|
9
10
|
const { StructuredLogger } = require('../logger/logger');
|
|
10
11
|
const { recordRunAttempt } = require('../persistence/RunRecorder');
|
|
11
12
|
const https = require('https');
|
|
12
|
-
const { GoogleAuth } = require('google-auth-library');
|
|
13
|
+
const { GoogleAuth } = require('google-auth-library');
|
|
13
14
|
|
|
14
15
|
let calculationPackage;
|
|
15
16
|
try { calculationPackage = require('aiden-shared-calculations-unified');
|
|
@@ -19,51 +20,68 @@ const calculations = calculationPackage.calculations;
|
|
|
19
20
|
const MAX_RETRIES = 3;
|
|
20
21
|
|
|
21
22
|
/**
|
|
22
|
-
* [NEW] Helper:
|
|
23
|
-
*
|
|
23
|
+
* [NEW] Helper: Starts a background heartbeat to track memory usage.
|
|
24
|
+
* This acts as a "Black Box Recorder". If the worker crashes (OOM),
|
|
25
|
+
* the last written value will remain in Firestore for the Dispatcher to analyze.
|
|
26
|
+
*/
|
|
27
|
+
function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
|
|
28
|
+
const getMemStats = () => {
|
|
29
|
+
const mem = process.memoryUsage();
|
|
30
|
+
return {
|
|
31
|
+
rssMB: Math.round(mem.rss / 1024 / 1024), // Resident Set Size (OOM Killer Metric)
|
|
32
|
+
heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
|
|
33
|
+
timestamp: new Date()
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const timer = setInterval(async () => {
|
|
38
|
+
try {
|
|
39
|
+
const stats = getMemStats();
|
|
40
|
+
// Use update() to minimize payload size and avoid overwriting status
|
|
41
|
+
await db.doc(ledgerPath).update({
|
|
42
|
+
'telemetry.lastMemory': stats,
|
|
43
|
+
'telemetry.lastHeartbeat': new Date()
|
|
44
|
+
}).catch(() => {}); // Ignore write errors to prevent crashing the worker
|
|
45
|
+
} catch (e) {
|
|
46
|
+
// Silently fail on telemetry errors
|
|
47
|
+
}
|
|
48
|
+
}, intervalMs);
|
|
49
|
+
|
|
50
|
+
// Unref so this timer doesn't prevent the process from exiting naturally
|
|
51
|
+
timer.unref();
|
|
52
|
+
|
|
53
|
+
return timer;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Helper: Fires the webhook back to Google Cloud Workflows.
|
|
24
58
|
*/
|
|
25
59
|
async function triggerWorkflowCallback(url, status, logger) {
|
|
26
60
|
if (!url) return;
|
|
27
61
|
logger.log('INFO', `[Worker] 🔔 BATCH COMPLETE! Triggering Workflow Callback: ${status}`);
|
|
28
62
|
|
|
29
63
|
try {
|
|
30
|
-
|
|
31
|
-
const auth = new GoogleAuth({
|
|
32
|
-
scopes: ['https://www.googleapis.com/auth/cloud-platform']
|
|
33
|
-
});
|
|
64
|
+
const auth = new GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] });
|
|
34
65
|
const client = await auth.getClient();
|
|
35
66
|
const accessToken = await client.getAccessToken();
|
|
36
67
|
const token = accessToken.token;
|
|
37
68
|
|
|
38
|
-
// 2. Send Authenticated Request
|
|
39
69
|
return new Promise((resolve, reject) => {
|
|
40
|
-
const body = JSON.stringify({
|
|
41
|
-
status: status,
|
|
42
|
-
timestamp: new Date().toISOString()
|
|
43
|
-
});
|
|
70
|
+
const body = JSON.stringify({ status: status, timestamp: new Date().toISOString() });
|
|
44
71
|
|
|
45
72
|
const req = https.request(url, {
|
|
46
73
|
method: 'POST',
|
|
47
74
|
headers: {
|
|
48
75
|
'Content-Type': 'application/json',
|
|
49
76
|
'Content-Length': Buffer.byteLength(body),
|
|
50
|
-
'Authorization': `Bearer ${token}`
|
|
77
|
+
'Authorization': `Bearer ${token}`
|
|
51
78
|
}
|
|
52
79
|
}, (res) => {
|
|
53
|
-
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
54
|
-
|
|
55
|
-
} else {
|
|
56
|
-
logger.log('WARN', `Callback responded with ${res.statusCode}`);
|
|
57
|
-
// We resolve anyway to avoid crashing the worker logic
|
|
58
|
-
resolve();
|
|
59
|
-
}
|
|
80
|
+
if (res.statusCode >= 200 && res.statusCode < 300) { resolve(); }
|
|
81
|
+
else { logger.log('WARN', `Callback responded with ${res.statusCode}`); resolve(); }
|
|
60
82
|
});
|
|
61
83
|
|
|
62
|
-
req.on('error', (e) => {
|
|
63
|
-
logger.log('ERROR', `Failed to trigger callback: ${e.message}`);
|
|
64
|
-
resolve();
|
|
65
|
-
});
|
|
66
|
-
|
|
84
|
+
req.on('error', (e) => { logger.log('ERROR', `Failed to trigger callback: ${e.message}`); resolve(); });
|
|
67
85
|
req.write(body);
|
|
68
86
|
req.end();
|
|
69
87
|
});
|
|
@@ -73,37 +91,21 @@ async function triggerWorkflowCallback(url, status, logger) {
|
|
|
73
91
|
}
|
|
74
92
|
|
|
75
93
|
/**
|
|
76
|
-
*
|
|
77
|
-
* Returns the callbackUrl IF this was the last task.
|
|
94
|
+
* Helper: Decrements 'remainingTasks' in Firestore.
|
|
78
95
|
*/
|
|
79
96
|
async function decrementAndCheck(db, metaStatePath, logger) {
|
|
80
97
|
if (!metaStatePath) return null;
|
|
81
|
-
|
|
82
98
|
try {
|
|
83
99
|
const result = await db.runTransaction(async (t) => {
|
|
84
100
|
const ref = db.doc(metaStatePath);
|
|
85
101
|
const doc = await t.get(ref);
|
|
86
|
-
|
|
87
|
-
if (!doc.exists) return null; // State might have expired or been deleted
|
|
102
|
+
if (!doc.exists) return null;
|
|
88
103
|
const data = doc.data();
|
|
89
|
-
|
|
90
104
|
const newRemaining = (data.remainingTasks || 0) - 1;
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
remainingTasks: newRemaining,
|
|
94
|
-
lastUpdated: new Date()
|
|
95
|
-
});
|
|
96
|
-
|
|
97
|
-
// Return needed data only if we hit 0 (or lower, for safety)
|
|
98
|
-
return {
|
|
99
|
-
remaining: newRemaining,
|
|
100
|
-
callbackUrl: data.callbackUrl
|
|
101
|
-
};
|
|
105
|
+
t.update(ref, { remainingTasks: newRemaining, lastUpdated: new Date() });
|
|
106
|
+
return { remaining: newRemaining, callbackUrl: data.callbackUrl };
|
|
102
107
|
});
|
|
103
|
-
|
|
104
|
-
if (result && result.remaining <= 0) {
|
|
105
|
-
return result.callbackUrl;
|
|
106
|
-
}
|
|
108
|
+
if (result && result.remaining <= 0) return result.callbackUrl;
|
|
107
109
|
} catch (e) {
|
|
108
110
|
logger.log('ERROR', `[Worker] Failed to decrement batch counter: ${e.message}`);
|
|
109
111
|
}
|
|
@@ -125,12 +127,12 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
125
127
|
|
|
126
128
|
if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
|
|
127
129
|
|
|
128
|
-
// Extract fields including new metaStatePath
|
|
129
130
|
const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, metaStatePath } = data;
|
|
130
131
|
|
|
131
132
|
if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
|
|
132
133
|
|
|
133
134
|
const retryCount = message.deliveryAttempt || 1;
|
|
135
|
+
const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
|
|
134
136
|
|
|
135
137
|
// --- POISON MESSAGE HANDLING (DLQ) ---
|
|
136
138
|
if (retryCount > MAX_RETRIES) {
|
|
@@ -144,36 +146,38 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
144
146
|
failureReason: 'MAX_RETRIES_EXCEEDED'
|
|
145
147
|
});
|
|
146
148
|
|
|
147
|
-
await db.
|
|
149
|
+
await db.doc(ledgerPath).set({
|
|
148
150
|
status: 'FAILED',
|
|
149
151
|
error: 'Max Retries Exceeded (Poison Message)',
|
|
150
152
|
failedAt: new Date()
|
|
151
153
|
}, { merge: true });
|
|
152
154
|
|
|
153
155
|
const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
|
|
154
|
-
if (callbackUrl) {
|
|
155
|
-
await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
|
|
156
|
-
}
|
|
157
|
-
|
|
156
|
+
if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
|
|
158
157
|
return;
|
|
159
158
|
} catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
|
|
160
159
|
}
|
|
161
160
|
|
|
162
161
|
logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}]`);
|
|
163
162
|
|
|
164
|
-
// Update Status to IN_PROGRESS
|
|
163
|
+
// 1. Update Status to IN_PROGRESS & Initialize Telemetry
|
|
165
164
|
try {
|
|
166
|
-
await db.
|
|
165
|
+
await db.doc(ledgerPath).set({
|
|
167
166
|
status: 'IN_PROGRESS',
|
|
168
167
|
workerId: process.env.K_REVISION || 'unknown',
|
|
169
168
|
startedAt: new Date(),
|
|
170
|
-
dispatchId: dispatchId
|
|
169
|
+
dispatchId: dispatchId,
|
|
170
|
+
telemetry: { startTime: new Date(), lastMemory: null } // Init for heartbeat
|
|
171
171
|
}, { merge: true });
|
|
172
172
|
} catch (leaseErr) {}
|
|
173
173
|
|
|
174
|
+
// 2. START HEARTBEAT (The Flight Recorder)
|
|
175
|
+
const heartbeatTimer = startMemoryHeartbeat(db, ledgerPath, 2000);
|
|
176
|
+
|
|
174
177
|
let computationManifest;
|
|
175
178
|
try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
|
|
176
179
|
} catch (manifestError) {
|
|
180
|
+
clearInterval(heartbeatTimer); // Stop if we fail early
|
|
177
181
|
logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
|
|
178
182
|
return;
|
|
179
183
|
}
|
|
@@ -186,6 +190,9 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
186
190
|
);
|
|
187
191
|
const duration = Date.now() - startTime;
|
|
188
192
|
|
|
193
|
+
// STOP HEARTBEAT ON SUCCESS
|
|
194
|
+
clearInterval(heartbeatTimer);
|
|
195
|
+
|
|
189
196
|
const failureReport = result?.updates?.failureReport || [];
|
|
190
197
|
const successUpdates = result?.updates?.successUpdates || {};
|
|
191
198
|
|
|
@@ -194,26 +201,23 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
194
201
|
throw new Error(failReason.error.message || 'Computation Logic Failed');
|
|
195
202
|
}
|
|
196
203
|
else {
|
|
197
|
-
if (Object.keys(successUpdates).length > 0) {
|
|
198
|
-
|
|
199
|
-
} else {
|
|
200
|
-
logger.log('WARN', `[Worker] ⚠️ Empty Result: ${computation}`);
|
|
201
|
-
}
|
|
204
|
+
if (Object.keys(successUpdates).length > 0) { logger.log('INFO', `[Worker] ✅ Stored: ${computation}`); }
|
|
205
|
+
else { logger.log('WARN', `[Worker] ⚠️ Empty Result: ${computation}`); }
|
|
202
206
|
|
|
203
|
-
await db.
|
|
207
|
+
await db.doc(ledgerPath).update({
|
|
204
208
|
status: 'COMPLETED',
|
|
205
209
|
completedAt: new Date()
|
|
206
210
|
}).catch(() => {});
|
|
207
211
|
|
|
208
212
|
await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, { durationMs: duration }, triggerReason);
|
|
209
213
|
|
|
210
|
-
// Decrement & Callback
|
|
211
214
|
const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
|
|
212
|
-
if (callbackUrl) {
|
|
213
|
-
await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
|
|
214
|
-
}
|
|
215
|
+
if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
|
|
215
216
|
}
|
|
216
217
|
} catch (err) {
|
|
218
|
+
// STOP HEARTBEAT ON ERROR
|
|
219
|
+
clearInterval(heartbeatTimer);
|
|
220
|
+
|
|
217
221
|
// --- ERROR HANDLING ---
|
|
218
222
|
const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
|
|
219
223
|
err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
|
|
@@ -231,7 +235,7 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
231
235
|
failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
|
|
232
236
|
});
|
|
233
237
|
|
|
234
|
-
await db.
|
|
238
|
+
await db.doc(ledgerPath).set({
|
|
235
239
|
status: 'FAILED',
|
|
236
240
|
error: err.message || 'Permanent Deterministic Error',
|
|
237
241
|
failedAt: new Date()
|
|
@@ -240,23 +244,17 @@ async function handleComputationTask(message, config, dependencies) {
|
|
|
240
244
|
await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
|
|
241
245
|
|
|
242
246
|
const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
|
|
243
|
-
if (callbackUrl) {
|
|
244
|
-
await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
+
if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
|
|
247
248
|
return;
|
|
248
249
|
} catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
|
|
249
250
|
}
|
|
250
251
|
|
|
251
|
-
if (retryCount >= MAX_RETRIES) {
|
|
252
|
-
throw err;
|
|
253
|
-
}
|
|
252
|
+
if (retryCount >= MAX_RETRIES) { throw err; }
|
|
254
253
|
|
|
255
254
|
logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
|
|
256
255
|
await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
|
|
257
|
-
|
|
258
256
|
throw err;
|
|
259
257
|
}
|
|
260
258
|
}
|
|
261
259
|
|
|
262
|
-
module.exports = { handleComputationTask };
|
|
260
|
+
module.exports = { handleComputationTask };
|
|
@@ -6,41 +6,29 @@ const SimRunner = require('../simulation/SimRunner');
|
|
|
6
6
|
const pLimit = require('p-limit');
|
|
7
7
|
const path = require('path');
|
|
8
8
|
const crypto = require('crypto');
|
|
9
|
-
const fs = require('fs');
|
|
9
|
+
const fs = require('fs');
|
|
10
10
|
const packageJson = require(path.join(__dirname, '..', '..', '..', 'package.json'));
|
|
11
11
|
const packageVersion = packageJson.version;
|
|
12
|
-
const { generateCodeHash } = require('../utils/utils');
|
|
12
|
+
const { generateCodeHash } = require('../utils/utils');
|
|
13
13
|
|
|
14
|
-
// Persistent Registry for SimHashes
|
|
15
14
|
const SIMHASH_REGISTRY_COLLECTION = 'system_simhash_registry';
|
|
16
15
|
|
|
17
|
-
// ---
|
|
18
|
-
|
|
19
|
-
// 1. define the Root of the system (one level up from 'tools')
|
|
16
|
+
// --- RECURSIVE SYSTEM HASHING ---
|
|
20
17
|
const SYSTEM_ROOT = path.resolve(__dirname, '..');
|
|
21
|
-
|
|
22
|
-
// 2. Define what to ignore to prevent noise or infinite loops
|
|
23
18
|
const IGNORED_DIRS = new Set(['node_modules', '.git', '.idea', 'coverage', 'logs', 'tests']);
|
|
24
19
|
const IGNORED_FILES = new Set(['package-lock.json', '.DS_Store', '.env']);
|
|
25
20
|
|
|
26
|
-
/**
|
|
27
|
-
* Recursively walks a directory and returns a list of file paths.
|
|
28
|
-
*/
|
|
29
21
|
function walkSync(dir, fileList = []) {
|
|
30
22
|
const files = fs.readdirSync(dir);
|
|
31
|
-
|
|
32
23
|
files.forEach(file => {
|
|
33
24
|
if (IGNORED_FILES.has(file)) return;
|
|
34
|
-
|
|
35
25
|
const filePath = path.join(dir, file);
|
|
36
26
|
const stat = fs.statSync(filePath);
|
|
37
|
-
|
|
38
27
|
if (stat.isDirectory()) {
|
|
39
28
|
if (!IGNORED_DIRS.has(file)) {
|
|
40
29
|
walkSync(filePath, fileList);
|
|
41
30
|
}
|
|
42
31
|
} else {
|
|
43
|
-
// Only hash code files (add .yaml if you want workflows included)
|
|
44
32
|
if (file.endsWith('.js') || file.endsWith('.json') || file.endsWith('.yaml')) {
|
|
45
33
|
fileList.push(filePath);
|
|
46
34
|
}
|
|
@@ -49,69 +37,41 @@ function walkSync(dir, fileList = []) {
|
|
|
49
37
|
return fileList;
|
|
50
38
|
}
|
|
51
39
|
|
|
52
|
-
/**
|
|
53
|
-
* Generates a single hash representing the entire infrastructure code state.
|
|
54
|
-
*/
|
|
55
40
|
function getInfrastructureHash() {
|
|
56
41
|
try {
|
|
57
42
|
const allFiles = walkSync(SYSTEM_ROOT);
|
|
58
|
-
allFiles.sort();
|
|
59
|
-
|
|
43
|
+
allFiles.sort();
|
|
60
44
|
const bigHash = crypto.createHash('sha256');
|
|
61
|
-
|
|
62
45
|
for (const filePath of allFiles) {
|
|
63
46
|
const content = fs.readFileSync(filePath, 'utf8');
|
|
64
47
|
const relativePath = path.relative(SYSTEM_ROOT, filePath);
|
|
65
|
-
|
|
66
|
-
// DECISION: How to clean?
|
|
67
48
|
let cleanContent = content;
|
|
68
|
-
|
|
69
|
-
// 1. If it's JS, use your system standard for code hashing
|
|
70
49
|
if (filePath.endsWith('.js')) {
|
|
71
|
-
// This strips comments and whitespace consistently with ManifestBuilder
|
|
72
|
-
// Note: generateCodeHash returns a hash, we can just use that hash
|
|
73
50
|
cleanContent = generateCodeHash(content);
|
|
74
|
-
}
|
|
75
|
-
// 2. If it's JSON/YAML, just strip basic whitespace to ignore indent changes
|
|
76
|
-
else {
|
|
51
|
+
} else {
|
|
77
52
|
cleanContent = content.replace(/\s+/g, '');
|
|
78
53
|
}
|
|
79
|
-
|
|
80
|
-
// Feed the PATH and the CLEAN CONTENT into the master hash
|
|
81
54
|
bigHash.update(`${relativePath}:${cleanContent}|`);
|
|
82
55
|
}
|
|
83
|
-
|
|
84
56
|
return bigHash.digest('hex');
|
|
85
57
|
} catch (e) {
|
|
86
58
|
console.warn(`[BuildReporter] ⚠️ Failed to generate infra hash: ${e.message}`);
|
|
87
|
-
return 'infra_hash_error';
|
|
59
|
+
return 'infra_hash_error';
|
|
88
60
|
}
|
|
89
61
|
}
|
|
90
62
|
|
|
91
|
-
/**
|
|
92
|
-
* UPDATED: System Fingerprint = Manifest Hash + Infrastructure Hash
|
|
93
|
-
*/
|
|
94
63
|
function getSystemFingerprint(manifest) {
|
|
95
|
-
// 1. Business Logic Hash (The Calculations)
|
|
96
64
|
const sortedManifestHashes = manifest.map(c => c.hash).sort().join('|');
|
|
97
|
-
|
|
98
|
-
// 2. Infrastructure Hash (The System Code)
|
|
99
65
|
const infraHash = getInfrastructureHash();
|
|
100
|
-
|
|
101
|
-
// 3. Combine
|
|
102
66
|
return crypto.createHash('sha256')
|
|
103
67
|
.update(sortedManifestHashes + infraHash)
|
|
104
68
|
.digest('hex');
|
|
105
69
|
}
|
|
106
70
|
|
|
107
|
-
/**
|
|
108
|
-
* Helper: Determines if a calculation should be excluded from the report.
|
|
109
|
-
*/
|
|
110
71
|
function isDateBeforeAvailability(dateStr, calcManifest) {
|
|
111
72
|
const targetDate = new Date(dateStr + 'T00:00:00Z');
|
|
112
73
|
const deps = calcManifest.rootDataDependencies || [];
|
|
113
74
|
if (deps.length === 0) return false;
|
|
114
|
-
|
|
115
75
|
for (const dep of deps) {
|
|
116
76
|
let startDate = null;
|
|
117
77
|
if (dep === 'portfolio') startDate = DEFINITIVE_EARLIEST_DATES.portfolio;
|
|
@@ -119,19 +79,14 @@ function isDateBeforeAvailability(dateStr, calcManifest) {
|
|
|
119
79
|
else if (dep === 'social') startDate = DEFINITIVE_EARLIEST_DATES.social;
|
|
120
80
|
else if (dep === 'insights') startDate = DEFINITIVE_EARLIEST_DATES.insights;
|
|
121
81
|
else if (dep === 'price') startDate = DEFINITIVE_EARLIEST_DATES.price;
|
|
122
|
-
|
|
123
82
|
if (startDate && targetDate < startDate) { return true; }
|
|
124
83
|
}
|
|
125
84
|
return false;
|
|
126
85
|
}
|
|
127
86
|
|
|
128
|
-
/**
|
|
129
|
-
* Helper: Calculates the transitive closure of dependents (Blast Radius).
|
|
130
|
-
*/
|
|
131
87
|
function calculateBlastRadius(targetCalcName, reverseGraph) {
|
|
132
88
|
const impactSet = new Set();
|
|
133
89
|
const queue = [targetCalcName];
|
|
134
|
-
|
|
135
90
|
while(queue.length > 0) {
|
|
136
91
|
const current = queue.shift();
|
|
137
92
|
const dependents = reverseGraph.get(current) || [];
|
|
@@ -142,7 +97,6 @@ function calculateBlastRadius(targetCalcName, reverseGraph) {
|
|
|
142
97
|
}
|
|
143
98
|
});
|
|
144
99
|
}
|
|
145
|
-
|
|
146
100
|
return {
|
|
147
101
|
directDependents: (reverseGraph.get(targetCalcName) || []).length,
|
|
148
102
|
totalCascadingDependents: impactSet.size,
|
|
@@ -150,14 +104,11 @@ function calculateBlastRadius(targetCalcName, reverseGraph) {
|
|
|
150
104
|
};
|
|
151
105
|
}
|
|
152
106
|
|
|
153
|
-
/**
|
|
154
|
-
* [NEW] Helper: Runs SimHash check with Caching and Registry Persistence.
|
|
155
|
-
*/
|
|
156
107
|
async function verifyBehavioralStability(candidates, manifestMap, dailyStatus, logger, simHashCache, db) {
|
|
157
108
|
const trueReRuns = [];
|
|
158
109
|
const stableUpdates = [];
|
|
159
110
|
|
|
160
|
-
//
|
|
111
|
+
// Concurrency for simulations
|
|
161
112
|
const limit = pLimit(10);
|
|
162
113
|
|
|
163
114
|
const checks = candidates.map(item => limit(async () => {
|
|
@@ -170,16 +121,10 @@ async function verifyBehavioralStability(candidates, manifestMap, dailyStatus, l
|
|
|
170
121
|
return;
|
|
171
122
|
}
|
|
172
123
|
|
|
173
|
-
// 1. Check Cache first (Avoid re-simulating the same code for 100 different dates)
|
|
174
124
|
let newSimHash = simHashCache.get(manifest.hash);
|
|
175
|
-
|
|
176
|
-
// 2. If Miss, Run Simulation & Persist to Registry
|
|
177
125
|
if (!newSimHash) {
|
|
178
126
|
newSimHash = await SimRunner.run(manifest, manifestMap);
|
|
179
127
|
simHashCache.set(manifest.hash, newSimHash);
|
|
180
|
-
|
|
181
|
-
// Write to Registry so Production Workers can find it without running SimRunner
|
|
182
|
-
// Fire-and-forget write to reduce latency
|
|
183
128
|
db.collection(SIMHASH_REGISTRY_COLLECTION).doc(manifest.hash).set({
|
|
184
129
|
simHash: newSimHash,
|
|
185
130
|
createdAt: new Date(),
|
|
@@ -187,17 +132,14 @@ async function verifyBehavioralStability(candidates, manifestMap, dailyStatus, l
|
|
|
187
132
|
}).catch(err => logger.log('WARN', `Failed to write SimHash registry for ${manifest.name}: ${err.message}`));
|
|
188
133
|
}
|
|
189
134
|
|
|
190
|
-
// 3. Compare
|
|
191
135
|
if (newSimHash === stored.simHash) {
|
|
192
|
-
// BEHAVIORAL MATCH: Code changed, but output is identical.
|
|
193
136
|
stableUpdates.push({
|
|
194
137
|
...item,
|
|
195
138
|
reason: "Code Updated (Logic Stable)",
|
|
196
|
-
simHash: newSimHash,
|
|
197
|
-
newHash: manifest.hash
|
|
139
|
+
simHash: newSimHash,
|
|
140
|
+
newHash: manifest.hash
|
|
198
141
|
});
|
|
199
142
|
} else {
|
|
200
|
-
// BEHAVIORAL MISMATCH: Logic changed.
|
|
201
143
|
trueReRuns.push({
|
|
202
144
|
...item,
|
|
203
145
|
reason: item.reason + ` [SimHash Mismatch]`,
|
|
@@ -215,9 +157,6 @@ async function verifyBehavioralStability(candidates, manifestMap, dailyStatus, l
|
|
|
215
157
|
return { trueReRuns, stableUpdates };
|
|
216
158
|
}
|
|
217
159
|
|
|
218
|
-
/**
|
|
219
|
-
* AUTO-RUN ENTRY POINT
|
|
220
|
-
*/
|
|
221
160
|
async function ensureBuildReport(config, dependencies, manifest) {
|
|
222
161
|
const { db, logger } = dependencies;
|
|
223
162
|
const now = new Date();
|
|
@@ -234,40 +173,28 @@ async function ensureBuildReport(config, dependencies, manifest) {
|
|
|
234
173
|
|
|
235
174
|
if (!shouldRun) { logger.log('INFO', `[BuildReporter] 🔒 Report for v${packageVersion} locked. Skipping.`); return; }
|
|
236
175
|
|
|
237
|
-
// [NEW] 1. Calculate Current System Fingerprint
|
|
238
176
|
const currentSystemHash = getSystemFingerprint(manifest);
|
|
239
|
-
|
|
240
|
-
// [NEW] 2. Fetch Last Build's Fingerprint
|
|
241
177
|
const latestBuildDoc = await db.collection('computation_build_records').doc('latest').get();
|
|
242
178
|
|
|
243
179
|
if (latestBuildDoc.exists) {
|
|
244
180
|
const latestData = latestBuildDoc.data();
|
|
245
|
-
|
|
246
|
-
// [OPTIMIZATION] If signatures match, we can clone the report or just skip
|
|
247
181
|
if (latestData.systemFingerprint === currentSystemHash) {
|
|
248
182
|
logger.log('INFO', `[BuildReporter] ⚡ System Fingerprint (${currentSystemHash.substring(0,8)}) matches latest build. Skipping Report.`);
|
|
249
|
-
|
|
250
|
-
// Create a "Skipped" record for the new version so we know it deployed
|
|
251
183
|
await db.collection('computation_build_records').doc(buildId).set({
|
|
252
184
|
buildId,
|
|
253
185
|
packageVersion,
|
|
254
186
|
systemFingerprint: currentSystemHash,
|
|
255
187
|
status: 'SKIPPED_IDENTICAL',
|
|
256
|
-
referenceBuild: latestData.buildId,
|
|
188
|
+
referenceBuild: latestData.buildId,
|
|
257
189
|
generatedAt: new Date().toISOString()
|
|
258
190
|
});
|
|
259
|
-
|
|
260
|
-
// Release lock and exit
|
|
261
191
|
lockRef.update({ status: 'SKIPPED', completedAt: new Date() }).catch(() => {});
|
|
262
192
|
return;
|
|
263
193
|
}
|
|
264
194
|
}
|
|
265
195
|
|
|
266
196
|
logger.log('INFO', `[BuildReporter] 🚀 Change Detected. Running Pre-flight Report for v${packageVersion}...`);
|
|
267
|
-
|
|
268
|
-
// Pass the fingerprint to generateBuildReport so it can save it
|
|
269
197
|
await generateBuildReport(config, dependencies, manifest, 90, buildId, currentSystemHash);
|
|
270
|
-
|
|
271
198
|
lockRef.update({ status: 'COMPLETED', completedAt: new Date() }).catch(() => {});
|
|
272
199
|
|
|
273
200
|
} catch (e) {
|
|
@@ -275,14 +202,9 @@ async function ensureBuildReport(config, dependencies, manifest) {
|
|
|
275
202
|
}
|
|
276
203
|
}
|
|
277
204
|
|
|
278
|
-
/**
|
|
279
|
-
* Generates the report, writes to Firestore, AND FIXES STABLE UPDATES.
|
|
280
|
-
*/
|
|
281
205
|
async function generateBuildReport(config, dependencies, manifest, daysBack = 90, customBuildId = null, systemFingerprint = null) {
|
|
282
206
|
const { db, logger } = dependencies;
|
|
283
207
|
const buildId = customBuildId || `manual_${Date.now()}`;
|
|
284
|
-
|
|
285
|
-
// Calculate fingerprint if not provided (for manual runs)
|
|
286
208
|
const finalFingerprint = systemFingerprint || getSystemFingerprint(manifest);
|
|
287
209
|
|
|
288
210
|
logger.log('INFO', `[BuildReporter] Generating Build Report: ${buildId} (Scope: ${daysBack} days, Fingerprint: ${finalFingerprint.substring(0,8)})...`);
|
|
@@ -293,8 +215,6 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
293
215
|
|
|
294
216
|
const datesToCheck = getExpectedDateStrings(startDate, today);
|
|
295
217
|
const manifestMap = new Map(manifest.map(c => [normalizeName(c.name), c]));
|
|
296
|
-
|
|
297
|
-
// [OPTIMIZATION] Cache SimHashes across dates so we only calculate once per code version
|
|
298
218
|
const simHashCache = new Map();
|
|
299
219
|
|
|
300
220
|
const reverseGraph = new Map();
|
|
@@ -312,16 +232,16 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
312
232
|
const reportHeader = {
|
|
313
233
|
buildId,
|
|
314
234
|
packageVersion,
|
|
315
|
-
systemFingerprint: finalFingerprint,
|
|
235
|
+
systemFingerprint: finalFingerprint,
|
|
316
236
|
generatedAt: new Date().toISOString(),
|
|
317
237
|
summary: {},
|
|
318
238
|
_sharded: true
|
|
319
239
|
};
|
|
320
240
|
|
|
321
241
|
let totalRun = 0, totalReRun = 0, totalStable = 0;
|
|
322
|
-
const detailWrites = [];
|
|
323
242
|
|
|
324
|
-
|
|
243
|
+
// [FIX] Reduced concurrency from 20 to 5 to avoid Firestore DEADLINE_EXCEEDED
|
|
244
|
+
const limit = pLimit(5);
|
|
325
245
|
|
|
326
246
|
const processingPromises = datesToCheck.map(dateStr => limit(async () => {
|
|
327
247
|
try {
|
|
@@ -374,29 +294,22 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
374
294
|
// 1. RUN
|
|
375
295
|
analysis.runnable.forEach(item => pushIfValid(dateSummary.run, item, "New Calculation"));
|
|
376
296
|
|
|
377
|
-
// 2. RE-RUN & STABLE Analysis
|
|
297
|
+
// 2. RE-RUN & STABLE Analysis
|
|
378
298
|
if (analysis.reRuns.length > 0) {
|
|
379
|
-
// Pass simHashCache and db for registry writes
|
|
380
299
|
const { trueReRuns, stableUpdates } = await verifyBehavioralStability(analysis.reRuns, manifestMap, dailyStatus, logger, simHashCache, db);
|
|
381
300
|
|
|
382
301
|
trueReRuns.forEach(item => pushIfValid(dateSummary.rerun, item, "Logic Changed"));
|
|
383
302
|
stableUpdates.forEach(item => pushIfValid(dateSummary.stable, item, "Cosmetic Change"));
|
|
384
303
|
|
|
385
|
-
// [CRITICAL FIX] "Fix the Blast Radius"
|
|
386
|
-
// If updates are STABLE, we update the status NOW.
|
|
387
|
-
// This implies: Code Hash changes, but Sim Hash stays same.
|
|
388
|
-
// The Dispatcher will see the new Code Hash in status matches the Manifest, so it won't dispatch.
|
|
389
304
|
if (stableUpdates.length > 0) {
|
|
390
305
|
const updatesPayload = {};
|
|
391
306
|
for (const stable of stableUpdates) {
|
|
392
307
|
const m = manifestMap.get(stable.name);
|
|
393
|
-
// We preserve the *existing* resultHash because the logic is proven stable.
|
|
394
|
-
// We update the 'hash' to the NEW code hash.
|
|
395
308
|
if (m && dailyStatus[stable.name]) {
|
|
396
309
|
updatesPayload[stable.name] = {
|
|
397
|
-
hash: m.hash,
|
|
398
|
-
simHash: stable.simHash,
|
|
399
|
-
resultHash: dailyStatus[stable.name].resultHash,
|
|
310
|
+
hash: m.hash,
|
|
311
|
+
simHash: stable.simHash,
|
|
312
|
+
resultHash: dailyStatus[stable.name].resultHash,
|
|
400
313
|
dependencyResultHashes: dailyStatus[stable.name].dependencyResultHashes || {},
|
|
401
314
|
category: m.category,
|
|
402
315
|
composition: m.composition,
|
|
@@ -404,7 +317,6 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
404
317
|
};
|
|
405
318
|
}
|
|
406
319
|
}
|
|
407
|
-
// Perform the "Fix"
|
|
408
320
|
if (Object.keys(updatesPayload).length > 0) {
|
|
409
321
|
await updateComputationStatus(dateStr, updatesPayload, config, dependencies);
|
|
410
322
|
logger.log('INFO', `[BuildReporter] 🩹 Fixed ${Object.keys(updatesPayload).length} stable items for ${dateStr}. They will NOT re-run.`);
|
|
@@ -413,22 +325,19 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
413
325
|
}
|
|
414
326
|
|
|
415
327
|
// 3. BLOCKED / IMPOSSIBLE / UPTODATE
|
|
416
|
-
analysis.blocked.forEach(item => pushIfValid(dateSummary.blocked,
|
|
417
|
-
analysis.failedDependency.forEach(item => pushIfValid(dateSummary.blocked,
|
|
418
|
-
analysis.impossible.forEach(item => pushIfValid(dateSummary.impossible, item));
|
|
419
|
-
analysis.skipped.forEach(item => pushIfValid(dateSummary.uptodate,
|
|
328
|
+
analysis.blocked.forEach (item => pushIfValid(dateSummary.blocked, item));
|
|
329
|
+
analysis.failedDependency.forEach (item => pushIfValid(dateSummary.blocked, item, "Dependency Missing"));
|
|
330
|
+
analysis.impossible.forEach (item => pushIfValid(dateSummary.impossible, item));
|
|
331
|
+
analysis.skipped.forEach (item => pushIfValid(dateSummary.uptodate, item, "Up To Date"));
|
|
420
332
|
|
|
421
333
|
// Meta stats
|
|
422
|
-
const includedCount = dateSummary.run.length
|
|
334
|
+
const includedCount = dateSummary.run.length + dateSummary.rerun.length + dateSummary.stable.length +
|
|
423
335
|
dateSummary.blocked.length + dateSummary.impossible.length + dateSummary.uptodate.length;
|
|
424
336
|
dateSummary.meta.totalIncluded = includedCount;
|
|
425
337
|
dateSummary.meta.match = (includedCount === expectedCount);
|
|
426
338
|
|
|
427
|
-
//
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
// ADD THIS (Write immediately):
|
|
431
|
-
await db.collection('computation_build_records')
|
|
339
|
+
// Write Immediately
|
|
340
|
+
await db.collection('computation_build_records')
|
|
432
341
|
.doc(buildId)
|
|
433
342
|
.collection('details')
|
|
434
343
|
.doc(dateStr)
|
|
@@ -450,10 +359,6 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
450
359
|
|
|
451
360
|
await db.collection('computation_build_records').doc(buildId).set(reportHeader);
|
|
452
361
|
|
|
453
|
-
// Parallel write details ---> Now redundant.
|
|
454
|
-
//const writeLimit = pLimit(15);
|
|
455
|
-
//await Promise.all(detailWrites.map(w => writeLimit(() => w.ref.set(w.data))));
|
|
456
|
-
|
|
457
362
|
await db.collection('computation_build_records').doc('latest').set({ ...reportHeader, note: "Latest build report pointer." });
|
|
458
363
|
|
|
459
364
|
logger.log('SUCCESS', `[BuildReporter] Report ${buildId} saved. Re-runs: ${totalReRun}, Stable (Fixed): ${totalStable}, New: ${totalRun}.`);
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# Cloud Workflows Definition for BullTrackers Computation Pipeline
|
|
2
2
|
# Orchestrates 5 sequential passes using Event-Driven Callbacks (Zero Polling).
|
|
3
|
-
# FIXED:
|
|
4
|
-
# FIXED: Proper extraction of 'callback_details.url' for the dispatcher.
|
|
3
|
+
# FIXED: Restored 'passes' and 'max_retries' variables in init step.
|
|
5
4
|
|
|
6
5
|
main:
|
|
7
6
|
params: [input]
|
|
@@ -10,8 +9,14 @@ main:
|
|
|
10
9
|
assign:
|
|
11
10
|
- project: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
|
|
12
11
|
- location: "europe-west1"
|
|
13
|
-
|
|
14
|
-
-
|
|
12
|
+
|
|
13
|
+
# T-1 Date Logic (Process Yesterday)
|
|
14
|
+
- now: ${sys.now()}
|
|
15
|
+
- yesterday_timestamp: ${now - 86400}
|
|
16
|
+
- yesterday_str: ${text.substring(time.format(yesterday_timestamp), 0, 10)}
|
|
17
|
+
- date_to_run: ${default(map.get(input, "date"), yesterday_str)}
|
|
18
|
+
|
|
19
|
+
# Configuration Variables (Restored)
|
|
15
20
|
- passes: ["1", "2", "3", "4", "5"]
|
|
16
21
|
- max_retries: 3
|
|
17
22
|
|
|
@@ -42,7 +47,6 @@ main:
|
|
|
42
47
|
- attempt_count: ${attempt_count + 1}
|
|
43
48
|
|
|
44
49
|
# 1. GENERATE CALLBACK ENDPOINT
|
|
45
|
-
# We use the 'events' library. This returns an object containing the URL.
|
|
46
50
|
- create_callback:
|
|
47
51
|
call: events.create_callback_endpoint
|
|
48
52
|
args:
|
|
@@ -60,7 +64,6 @@ main:
|
|
|
60
64
|
severity: "INFO"
|
|
61
65
|
|
|
62
66
|
# 2. TRIGGER DISPATCHER
|
|
63
|
-
# We pass the extracted 'callback_url' string to the dispatcher.
|
|
64
67
|
- trigger_dispatcher:
|
|
65
68
|
call: http.post
|
|
66
69
|
args:
|
|
@@ -89,12 +92,11 @@ main:
|
|
|
89
92
|
next: pass_retry_loop
|
|
90
93
|
|
|
91
94
|
# 4. WAIT FOR WORKER SIGNAL
|
|
92
|
-
# We must pass the original 'callback_details' object here, not the URL string.
|
|
93
95
|
- wait_for_completion:
|
|
94
96
|
call: events.await_callback
|
|
95
97
|
args:
|
|
96
98
|
callback: ${callback_details}
|
|
97
|
-
timeout:
|
|
99
|
+
timeout: 10800 # UPDATED: Reduced from 86400 (24h) to 10800 (3h) to detect crashes faster
|
|
98
100
|
result: callback_request
|
|
99
101
|
|
|
100
102
|
# 5. PROCESS SIGNAL
|