bulltrackers-module 1.0.306 → 1.0.307

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,314 +1,102 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
- * PURPOSE: Consumes tasks, executes logic, and signals Workflow upon Batch Completion.
4
- * UPDATED: Added "Contention-Aware Retry" for the Batch Counter to fix ABORTED errors.
3
+ * UPDATED: Removed redundant Callback and Sentinel logic.
4
+ * The system now relies on Dispatcher cursor satiation.
5
5
  */
6
6
 
7
7
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
8
8
  const { getManifest } = require('../topology/ManifestLoader');
9
9
  const { StructuredLogger } = require('../logger/logger');
10
10
  const { recordRunAttempt } = require('../persistence/RunRecorder');
11
- const https = require('https');
12
- const { GoogleAuth } = require('google-auth-library');
13
11
  const { normalizeName } = require('../utils/utils');
12
+ const os = require('os');
14
13
 
15
14
  let calculationPackage;
16
- try { calculationPackage = require('aiden-shared-calculations-unified');
17
- } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
15
+ try { calculationPackage = require('aiden-shared-calculations-unified'); } catch (e) { throw e; }
18
16
  const calculations = calculationPackage.calculations;
19
17
 
20
18
  const MAX_RETRIES = 3;
21
19
 
22
- /**
23
- * [UPDATED] Heartbeat now returns a closure to get the PEAK memory.
24
- * This acts as a "Black Box Recorder".
25
- */
20
+ /** Black Box Recorder for Peak Memory. */
26
21
  function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
27
22
  let peakRss = 0;
28
-
29
- const getMemStats = () => {
30
- const mem = process.memoryUsage();
31
- const rssMB = Math.round(mem.rss / 1024 / 1024);
32
- if (rssMB > peakRss) peakRss = rssMB;
33
-
34
- return {
35
- rssMB: rssMB,
36
- heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
37
- timestamp: new Date()
38
- };
39
- };
40
-
41
23
  const timer = setInterval(async () => {
42
- try {
43
- const stats = getMemStats();
44
- // Use update() to minimize payload size and avoid overwriting status
45
- await db.doc(ledgerPath).update({
46
- 'telemetry.lastMemory': stats,
47
- 'telemetry.lastHeartbeat': new Date()
48
- }).catch(() => {}); // Ignore write errors to prevent crashing the worker
49
- } catch (e) {
50
- // Silently fail on telemetry errors
51
- }
24
+ const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
25
+ if (rssMB > peakRss) peakRss = rssMB;
26
+ await db.doc(ledgerPath).update({ 'telemetry.lastMemoryMB': rssMB, 'telemetry.lastHeartbeat': new Date() }).catch(() => {});
52
27
  }, intervalMs);
53
-
54
- // Unref so this timer doesn't prevent the process from exiting naturally
55
28
  timer.unref();
56
-
57
- return {
58
- timer,
59
- getPeak: () => peakRss
60
- };
61
- }
62
-
63
- /**
64
- * Helper: Fires the webhook back to Google Cloud Workflows.
65
- */
66
- async function triggerWorkflowCallback(url, status, logger) {
67
- if (!url) return;
68
- logger.log('INFO', `[Worker] 🔔 BATCH COMPLETE! Triggering Workflow Callback: ${status}`);
69
-
70
- try {
71
- const auth = new GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] });
72
- const client = await auth.getClient();
73
- const accessToken = await client.getAccessToken();
74
- const token = accessToken.token;
75
-
76
- return new Promise((resolve, reject) => {
77
- const body = JSON.stringify({ status: status, timestamp: new Date().toISOString() });
78
-
79
- const req = https.request(url, {
80
- method: 'POST',
81
- headers: {
82
- 'Content-Type': 'application/json',
83
- 'Content-Length': Buffer.byteLength(body),
84
- 'Authorization': `Bearer ${token}`
85
- }
86
- }, (res) => {
87
- if (res.statusCode >= 200 && res.statusCode < 300) { resolve(); }
88
- else { logger.log('WARN', `Callback responded with ${res.statusCode}`); resolve(); }
89
- });
90
-
91
- req.on('error', (e) => { logger.log('ERROR', `Failed to trigger callback: ${e.message}`); resolve(); });
92
- req.write(body);
93
- req.end();
94
- });
95
- } catch (e) {
96
- logger.log('ERROR', `Failed to generate auth token for callback: ${e.message}`);
97
- }
98
- }
99
-
100
- /**
101
- * [UPDATED] Helper: Decrements 'remainingTasks' in Firestore.
102
- * NOW INCLUDES CONTENTION RETRY LOGIC (The "Sentinel" Fix)
103
- */
104
- async function decrementAndCheck(db, metaStatePath, logger) {
105
- if (!metaStatePath) return null;
106
-
107
- const MAX_CONTENTION_RETRIES = 10;
108
- let attempt = 0;
109
-
110
- while (attempt < MAX_CONTENTION_RETRIES) {
111
- try {
112
- const result = await db.runTransaction(async (t) => {
113
- const ref = db.doc(metaStatePath);
114
- const doc = await t.get(ref);
115
- if (!doc.exists) return null;
116
-
117
- const data = doc.data();
118
- // Safety: Don't decrement below zero
119
- const currentRemaining = data.remainingTasks || 0;
120
- if (currentRemaining <= 0) return { remaining: 0, callbackUrl: data.callbackUrl };
121
-
122
- const newRemaining = currentRemaining - 1;
123
- t.update(ref, { remainingTasks: newRemaining, lastUpdated: new Date() });
124
-
125
- return { remaining: newRemaining, callbackUrl: data.callbackUrl };
126
- });
127
-
128
- // Success! Check if we are the "Sentinel" (the last one)
129
- if (result && result.remaining <= 0) return result.callbackUrl;
130
- return null; // We decremented successfully, but weren't the last one.
131
-
132
- } catch (e) {
133
- // Check if it's a contention error (ABORTED/10 or DEADLINE_EXCEEDED/4)
134
- const isContention = e.code === 10 || e.code === 4 || (e.message && e.message.includes('contention'));
135
-
136
- if (isContention) {
137
- attempt++;
138
- // JITTER: Random delay between 50ms and 500ms to desynchronize the herd
139
- const delay = Math.floor(Math.random() * 450) + 50;
140
- logger.log('WARN', `[Worker] Batch counter contention (Attempt ${attempt}/${MAX_CONTENTION_RETRIES}). Retrying in ${delay}ms...`);
141
- await new Promise(r => setTimeout(r, delay));
142
- } else {
143
- // Fatal error (permission, etc)
144
- logger.log('ERROR', `[Worker] Fatal error decrementing batch counter: ${e.message}`);
145
- return null;
146
- }
147
- }
148
- }
149
-
150
- logger.log('ERROR', `[Worker] Failed to decrement batch counter after ${MAX_CONTENTION_RETRIES} attempts. The count will be inaccurate.`);
151
- return null;
29
+ return { timer, getPeak: () => peakRss };
152
30
  }
153
31
 
154
32
  async function handleComputationTask(message, config, dependencies) {
155
- const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
156
- const runDependencies = { ...dependencies, logger: systemLogger };
157
- const { logger, db } = runDependencies;
33
+ const logger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
34
+ const runDeps = { ...dependencies, logger };
35
+ const db = dependencies.db;
158
36
  let data;
159
37
 
160
38
  try {
161
- if (message.data && message.data.message && message.data.message.data) { data = JSON.parse(Buffer.from(message.data.message.data, 'base64').toString());
162
- } else if (message.data && typeof message.data === 'string') { data = JSON.parse(Buffer.from(message.data, 'base64').toString());
163
- } else if (message.json) { data = message.json;
164
- } else { data = message; }
165
- } catch (parseError) { logger.log('ERROR', `[Worker] Failed to parse Pub/Sub payload.`, { error: parseError.message }); return; }
39
+ const raw = message.data?.message?.data || message.data || message.json;
40
+ data = (typeof raw === 'string') ? JSON.parse(Buffer.from(raw, 'base64').toString()) : raw;
41
+ } catch (e) { return; }
166
42
 
167
- if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
43
+ if (!data || data.action !== 'RUN_COMPUTATION_DATE') return;
168
44
 
169
- // [UPDATED] Extract 'resources' from payload (set by Dispatcher)
170
- const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, metaStatePath, resources } = data;
171
- const resourceTier = resources || 'standard'; // Default to standard
172
-
173
- if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
174
-
175
- const retryCount = message.deliveryAttempt || 1;
45
+ const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, resources } = data;
46
+ const resourceTier = resources || 'standard';
176
47
  const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
177
48
 
178
- // --- POISON MESSAGE HANDLING (DLQ) ---
179
- if (retryCount > MAX_RETRIES) {
180
- logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
181
- try {
182
- await db.collection('computation_dead_letter_queue').add({
183
- originalData: data,
184
- dispatchId: dispatchId,
185
- error: { message: 'Max Retries Exceeded', stack: 'PubSub delivery limit reached' },
186
- finalAttemptAt: new Date(),
187
- failureReason: 'MAX_RETRIES_EXCEEDED'
188
- });
189
-
190
- await db.doc(ledgerPath).set({
191
- status: 'FAILED',
192
- error: 'Max Retries Exceeded (Poison Message)',
193
- failedAt: new Date()
194
- }, { merge: true });
49
+ logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}]`);
195
50
 
196
- const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
197
- if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
198
- return;
199
- } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
200
- }
51
+ // 1. Audit Lease
52
+ await db.doc(ledgerPath).set({
53
+ status: 'IN_PROGRESS',
54
+ workerId: process.env.K_REVISION || os.hostname(),
55
+ startedAt: new Date(),
56
+ dispatchId
57
+ }, { merge: true });
201
58
 
202
- logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}] [Tier: ${resourceTier}]`);
203
-
204
- // 1. Update Status to IN_PROGRESS & Initialize Telemetry
205
- try {
206
- await db.doc(ledgerPath).set({
207
- status: 'IN_PROGRESS',
208
- workerId: process.env.K_REVISION || 'unknown',
209
- startedAt: new Date(),
210
- dispatchId: dispatchId,
211
- telemetry: { startTime: new Date(), lastMemory: null } // Init for heartbeat
212
- }, { merge: true });
213
- } catch (leaseErr) {}
214
-
215
- // 2. START HEARTBEAT (The Flight Recorder)
216
- // [UPDATED] Using new logic to track peak
217
- const heartbeatControl = startMemoryHeartbeat(db, ledgerPath, 2000);
218
-
219
- let computationManifest;
220
- try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
221
- } catch (manifestError) {
222
- clearInterval(heartbeatControl.timer); // Stop if we fail early
223
- logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
224
- return;
225
- }
59
+ const heartbeat = startMemoryHeartbeat(db, ledgerPath);
226
60
 
227
61
  try {
62
+ const manifest = getManifest(config.activeProductLines || [], calculations, runDeps);
228
63
  const startTime = Date.now();
229
- const result = await executeDispatchTask(
230
- date, pass, computation, config, runDependencies,
231
- computationManifest, previousCategory, dependencyResultHashes
64
+
65
+ const result = await executeDispatchTask(
66
+ date, pass, computation, config, runDeps,
67
+ manifest, previousCategory, dependencyResultHashes
232
68
  );
233
- const duration = Date.now() - startTime;
234
-
235
- // STOP HEARTBEAT ON SUCCESS
236
- clearInterval(heartbeatControl.timer);
237
69
 
238
- const failureReport = result?.updates?.failureReport || [];
70
+ clearInterval(heartbeat.timer);
71
+ const failureReport = result?.updates?.failureReport || [];
239
72
  const successUpdates = result?.updates?.successUpdates || {};
240
73
 
241
- if (failureReport.length > 0) {
242
- const failReason = failureReport[0];
243
- throw new Error(failReason.error.message || 'Computation Logic Failed');
244
- }
245
- else {
246
- if (Object.keys(successUpdates).length > 0) { logger.log('INFO', `[Worker] ✅ Stored: ${computation}`); }
247
- else { logger.log('WARN', `[Worker] ⚠️ Empty Result: ${computation}`); }
248
-
249
- // Extract the metrics from the success update for the recorder
250
- const calcUpdate = successUpdates[normalizeName(computation)] || {};
251
- const finalMetrics = {
252
- durationMs: duration,
253
- peakMemoryMB: heartbeatControl.getPeak(),
254
- io: calcUpdate.metrics?.io,
255
- storage: calcUpdate.metrics?.storage,
256
- execution: calcUpdate.metrics?.execution,
257
- validation: calcUpdate.metrics?.validation,
258
- composition: calcUpdate.composition
259
- };
260
-
261
- await db.doc(ledgerPath).update({
262
- status: 'COMPLETED',
263
- completedAt: new Date()
264
- }).catch(() => {});
74
+ if (failureReport.length > 0) throw new Error(failureReport[0].error.message);
75
+
76
+ const calcUpdate = successUpdates[normalizeName(computation)] || {};
77
+ const metrics = {
78
+ durationMs: Date.now() - startTime,
79
+ peakMemoryMB: heartbeat.getPeak(),
80
+ io: calcUpdate.metrics?.io,
81
+ storage: calcUpdate.metrics?.storage,
82
+ execution: calcUpdate.metrics?.execution,
83
+ validation: calcUpdate.metrics?.validation,
84
+ composition: calcUpdate.composition
85
+ };
265
86
 
266
- // [UPDATED] Pass resourceTier and metrics to recordRunAttempt
267
- await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, finalMetrics, triggerReason, resourceTier);
87
+ await db.doc(ledgerPath).update({ status: 'COMPLETED', completedAt: new Date() });
88
+ await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason, resourceTier);
268
89
 
269
- const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
270
- if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
271
- }
272
90
  } catch (err) {
273
- // STOP HEARTBEAT ON ERROR
274
- clearInterval(heartbeatControl.timer);
275
-
276
- // --- ERROR HANDLING ---
277
- const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
278
- err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
279
- err.stage === 'SEMANTIC_GATE' ||
280
- (err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
91
+ clearInterval(heartbeat.timer);
92
+ const isDeterministic = ['SHARDING_LIMIT_EXCEEDED', 'QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(err.stage);
281
93
 
282
- if (isDeterministicError) {
283
- logger.log('ERROR', `[Worker] 🛑 Permanent Failure: ${err.message}`);
284
- try {
285
- await db.collection('computation_dead_letter_queue').add({
286
- originalData: data,
287
- dispatchId: dispatchId,
288
- error: { message: err.message, stack: err.stack, stage: err.stage || 'UNKNOWN' },
289
- finalAttemptAt: new Date(),
290
- failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
291
- });
292
-
293
- await db.doc(ledgerPath).set({
294
- status: 'FAILED',
295
- error: err.message || 'Permanent Deterministic Error',
296
- failedAt: new Date()
297
- }, { merge: true });
298
-
299
- await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0, peakMemoryMB: heartbeatControl.getPeak() }, triggerReason, resourceTier);
300
-
301
- const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
302
- if (callbackUrl) { await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger); }
303
- return;
304
- } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
94
+ if (isDeterministic || (message.deliveryAttempt || 1) >= MAX_RETRIES) {
95
+ await db.doc(ledgerPath).set({ status: 'FAILED', error: err.message, failedAt: new Date() }, { merge: true });
96
+ await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'FATAL' }, { peakMemoryMB: heartbeat.getPeak() }, triggerReason, resourceTier);
97
+ return; // Don't throw, we've handled the permanent failure
305
98
  }
306
-
307
- if (retryCount >= MAX_RETRIES) { throw err; }
308
-
309
- logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
310
- await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0, peakMemoryMB: heartbeatControl.getPeak() }, triggerReason, resourceTier);
311
- throw err;
99
+ throw err; // Trigger Pub/Sub retry
312
100
  }
313
101
  }
314
102