bulltrackers-module 1.0.288 → 1.0.289

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
- * PURPOSE: "Smart Dispatcher" - Analyzes state and dispatches tasks.
4
- * UPDATED: Removed "Zombie" logic. Now forcefully dispatches any task
5
- * that is not explicitly COMPLETED, ensuring reliability for one-shot execution.
3
+ * PURPOSE: "Smart Dispatcher" - Analyzes state, initializes Run Counters, and dispatches tasks.
4
+ * UPDATED: Implements Callback Pattern. Initializes 'computation_runs' doc for worker coordination.
6
5
  */
7
6
 
8
7
  const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -19,13 +18,22 @@ const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
19
18
 
20
19
  /**
21
20
  * Dispatches computation tasks for a specific pass.
21
+ * @param {Object} config - System config
22
+ * @param {Object} dependencies - { db, logger, ... }
23
+ * @param {Array} computationManifest - List of calculations
24
+ * @param {Object} reqBody - (Optional) HTTP Body containing 'callbackUrl' and 'date'
22
25
  */
23
- async function dispatchComputationPass(config, dependencies, computationManifest) {
26
+ async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
24
27
  const { logger, db } = dependencies;
25
28
  const pubsubUtils = new PubSubUtils(dependencies);
26
29
  const passToRun = String(config.COMPUTATION_PASS_TO_RUN);
27
30
 
31
+ // [NEW] Extract Date and Callback from request body (pushed by Workflow)
32
+ const dateStr = reqBody.date || config.date;
33
+ const callbackUrl = reqBody.callbackUrl || null;
34
+
28
35
  if (!passToRun) { return logger.log('ERROR', '[Dispatcher] No pass defined (COMPUTATION_PASS_TO_RUN). Aborting.'); }
36
+ if (!dateStr) { return logger.log('ERROR', '[Dispatcher] No date defined. Aborting.'); }
29
37
 
30
38
  const currentManifestHash = generateCodeHash(
31
39
  computationManifest.map(c => c.hash).sort().join('|')
@@ -37,29 +45,29 @@ async function dispatchComputationPass(config, dependencies, computationManifest
37
45
  if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
38
46
 
39
47
  const calcNames = calcsInThisPass.map(c => c.name);
40
- logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun}`);
41
- logger.log('INFO', `[Dispatcher] Target Calculations: [${calcNames.join(', ')}]`);
42
-
48
+ logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun} for ${dateStr}`);
49
+
50
+ // -- DATE ANALYSIS LOGIC (Unchanged) --
43
51
  const passEarliestDate = Object.values(DEFINITIVE_EARLIEST_DATES).reduce((a, b) => a < b ? a : b);
44
52
  const endDateUTC = new Date(Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate() - 1));
45
- const allExpectedDates = getExpectedDateStrings(passEarliestDate, endDateUTC);
53
+
54
+ // We only analyze the specific requested date to keep dispatch fast for the workflow
55
+ const allExpectedDates = [dateStr];
46
56
 
47
57
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
48
58
  const tasksToDispatch = [];
49
59
  const limit = pLimit(20);
50
60
 
51
- logger.log('INFO', `[Dispatcher] Analyzing ${allExpectedDates.length} dates for viability...`);
52
-
53
- const analysisPromises = allExpectedDates.map(dateStr => limit(async () => {
61
+ const analysisPromises = allExpectedDates.map(d => limit(async () => {
54
62
  try {
55
63
  const fetchPromises = [
56
- fetchComputationStatus(dateStr, config, dependencies),
57
- checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES)
64
+ fetchComputationStatus(d, config, dependencies),
65
+ checkRootDataAvailability(d, config, dependencies, DEFINITIVE_EARLIEST_DATES)
58
66
  ];
59
67
 
60
68
  let prevDateStr = null;
61
69
  if (calcsInThisPass.some(c => c.isHistorical)) {
62
- const prevDate = new Date(dateStr + 'T00:00:00Z');
70
+ const prevDate = new Date(d + 'T00:00:00Z');
63
71
  prevDate.setUTCDate(prevDate.getUTCDate() - 1);
64
72
  prevDateStr = prevDate.toISOString().slice(0, 10);
65
73
 
@@ -77,7 +85,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
77
85
  hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false
78
86
  };
79
87
 
80
- const report = analyzeDateExecution(dateStr, calcsInThisPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
88
+ const report = analyzeDateExecution(d, calcsInThisPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
81
89
 
82
90
  const statusUpdates = {};
83
91
 
@@ -93,25 +101,20 @@ async function dispatchComputationPass(config, dependencies, computationManifest
93
101
 
94
102
  report.failedDependency.forEach(item => {
95
103
  const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
96
- statusUpdates[item.name] = {
97
- hash: false,
98
- category: 'unknown',
99
- reason: `Dependency Missing: ${missingStr}`
100
- };
104
+ statusUpdates[item.name] = { hash: false, category: 'unknown', reason: `Dependency Missing: ${missingStr}` };
101
105
  });
102
106
 
103
107
  if (Object.keys(statusUpdates).length > 0) {
104
- await updateComputationStatus(dateStr, statusUpdates, config, dependencies);
108
+ await updateComputationStatus(d, statusUpdates, config, dependencies);
105
109
  }
106
110
 
107
111
  const validToRun = [...report.runnable, ...report.reRuns];
108
112
  validToRun.forEach(item => {
109
113
  const uniqueDispatchId = crypto.randomUUID();
110
-
111
114
  tasksToDispatch.push({
112
115
  action: 'RUN_COMPUTATION_DATE',
113
116
  dispatchId: uniqueDispatchId,
114
- date: dateStr,
117
+ date: d,
115
118
  pass: passToRun,
116
119
  computation: normalizeName(item.name),
117
120
  hash: item.hash || item.newHash,
@@ -123,15 +126,41 @@ async function dispatchComputationPass(config, dependencies, computationManifest
123
126
  });
124
127
 
125
128
  } catch (e) {
126
- logger.log('ERROR', `[Dispatcher] Failed analysis for ${dateStr}: ${e.message}`);
129
+ logger.log('ERROR', `[Dispatcher] Failed analysis for ${d}: ${e.message}`);
127
130
  }
128
131
  }));
129
132
 
130
133
  await Promise.all(analysisPromises);
131
134
 
135
+ // -- NEW: CALLBACK & COUNTER INITIALIZATION --
136
+
132
137
  if (tasksToDispatch.length > 0) {
133
- logger.log('INFO', `[Dispatcher] 📝 Creating Audit Ledger entries (Transactional) for ${tasksToDispatch.length} tasks...`);
138
+ logger.log('INFO', `[Dispatcher] 📝 Preparing ${tasksToDispatch.length} tasks for execution...`);
134
139
 
140
+ // 1. Initialize Shared State Document (The Counter)
141
+ const runId = crypto.randomUUID();
142
+ const metaStatePath = `computation_runs/${runId}`;
143
+
144
+ if (callbackUrl) {
145
+ await db.doc(metaStatePath).set({
146
+ createdAt: new Date(),
147
+ date: dateStr,
148
+ pass: passToRun,
149
+ totalTasks: tasksToDispatch.length,
150
+ remainingTasks: tasksToDispatch.length, // <--- The Countdown
151
+ callbackUrl: callbackUrl, // <--- The Workflow Hook
152
+ status: 'IN_PROGRESS'
153
+ });
154
+ logger.log('INFO', `[Dispatcher] 🏁 Run State Initialized: ${runId}`);
155
+ }
156
+
157
+ // 2. Attach Run Metadata to every task
158
+ tasksToDispatch.forEach(task => {
159
+ task.runId = runId;
160
+ task.metaStatePath = callbackUrl ? metaStatePath : null;
161
+ });
162
+
163
+ // 3. Create Audit Ledger Entries
135
164
  const finalDispatched = [];
136
165
  const txnLimit = pLimit(20);
137
166
 
@@ -142,34 +171,22 @@ async function dispatchComputationPass(config, dependencies, computationManifest
142
171
  await db.runTransaction(async (t) => {
143
172
  const doc = await t.get(ledgerRef);
144
173
 
145
- // [UPDATED] Robust One-Shot Dispatch Logic
146
- // We REMOVED the "Zombie Timeout" check.
147
- // If the Dispatcher is running, we assume the user intends to ensure these tasks are dispatched.
148
-
149
174
  if (doc.exists) {
150
175
  const data = doc.data();
151
-
152
- // 1. If it's already COMPLETED, do not re-run (Strict idempotency).
153
- if (data.status === 'COMPLETED') {
154
- return false;
155
- }
156
-
157
- // 2. If it is PENDING or IN_PROGRESS:
158
- // Since the Dispatcher runs ONCE per day, seeing PENDING here means
159
- // the *previous* run failed to complete, or the worker died.
160
- // We overwrite it to force a restart.
176
+ // Strict Idempotency: If completed, don't run again.
177
+ if (data.status === 'COMPLETED') return false;
161
178
  }
162
179
 
163
- // Create/Overwrite entry with PENDING to start the cycle
164
180
  t.set(ledgerRef, {
165
181
  status: 'PENDING',
166
182
  dispatchId: task.dispatchId,
183
+ runId: task.runId, // Track the batch ID
167
184
  computation: task.computation,
168
185
  expectedHash: task.hash || 'unknown',
169
186
  createdAt: new Date(),
170
187
  dispatcherHash: currentManifestHash,
171
188
  triggerReason: task.triggerReason,
172
- retries: 0 // Reset retries for the new attempt
189
+ retries: 0
173
190
  }, { merge: true });
174
191
 
175
192
  return true;
@@ -178,14 +195,15 @@ async function dispatchComputationPass(config, dependencies, computationManifest
178
195
  finalDispatched.push(task);
179
196
 
180
197
  } catch (txnErr) {
181
- logger.log('WARN', `[Dispatcher] Transaction failed for ${task.computation} on ${task.date}: ${txnErr.message}`);
198
+ logger.log('WARN', `[Dispatcher] Transaction failed for ${task.computation}: ${txnErr.message}`);
182
199
  }
183
200
  }));
184
201
 
185
202
  await Promise.all(txnPromises);
186
203
 
204
+ // 4. Publish to Pub/Sub
187
205
  if (finalDispatched.length > 0) {
188
- logger.log('INFO', `[Dispatcher] ✅ Publishing ${finalDispatched.length} unique tasks to Pub/Sub...`);
206
+ logger.log('INFO', `[Dispatcher] ✅ Publishing ${finalDispatched.length} tasks to Pub/Sub...`);
189
207
 
190
208
  await pubsubUtils.batchPublishTasks(dependencies, {
191
209
  topicName: TOPIC_NAME,
@@ -194,14 +212,17 @@ async function dispatchComputationPass(config, dependencies, computationManifest
194
212
  maxPubsubBatchSize: 100
195
213
  });
196
214
 
197
- return { dispatched: finalDispatched.length };
215
+ // Return count so workflow knows to wait
216
+ return { dispatched: finalDispatched.length, runId };
198
217
  } else {
199
- logger.log('INFO', `[Dispatcher] All tasks were already COMPLETED (Double Dispatch avoided).`);
218
+ // Edge Case: Analysis said "Run", but Ledger said "Already Done"
219
+ // We must update the state doc to 0 or delete it, OR return 0 so workflow doesn't wait.
220
+ logger.log('INFO', `[Dispatcher] All tasks were already COMPLETED.`);
200
221
  return { dispatched: 0 };
201
222
  }
202
223
 
203
224
  } else {
204
- logger.log('INFO', `[Dispatcher] No valid tasks found. System is up to date.`);
225
+ logger.log('INFO', `[Dispatcher] No valid tasks found (Up to date).`);
205
226
  return { dispatched: 0 };
206
227
  }
207
228
  }
@@ -1,24 +1,98 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
- * PURPOSE: Consumes computation tasks from Pub/Sub.
4
- * UPDATED: Fixed "Silent Failure" bug where tasks got stuck in PENDING.
5
- * Increased MAX_RETRIES and ensured Ledger is updated on poison messages.
3
+ * PURPOSE: Consumes tasks, executes logic, and signals Workflow upon Batch Completion.
4
+ * UPDATED: Implements "Last Worker" Callback Pattern.
6
5
  */
7
6
 
8
7
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
9
8
  const { getManifest } = require('../topology/ManifestLoader');
10
9
  const { StructuredLogger } = require('../logger/logger');
11
10
  const { recordRunAttempt } = require('../persistence/RunRecorder');
11
+ const https = require('https'); // [NEW] Required for callback
12
12
 
13
13
  let calculationPackage;
14
14
  try { calculationPackage = require('aiden-shared-calculations-unified');
15
15
  } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
16
16
  const calculations = calculationPackage.calculations;
17
17
 
18
- // [FIX] Increased from 0 to 3.
19
- // 0 caused "retryCount >= MAX_RETRIES" to trigger immediately on the first run.
20
18
  const MAX_RETRIES = 3;
21
19
 
20
+ /**
21
+ * [NEW] Helper: Fires the webhook back to Google Cloud Workflows.
22
+ */
23
+ function triggerWorkflowCallback(url, status, logger) {
24
+ if (!url) return Promise.resolve();
25
+ logger.log('INFO', `[Worker] 🔔 BATCH COMPLETE! Triggering Workflow Callback: ${status}`);
26
+
27
+ return new Promise((resolve, reject) => {
28
+ const body = JSON.stringify({
29
+ status: status,
30
+ timestamp: new Date().toISOString()
31
+ });
32
+
33
+ const req = https.request(url, {
34
+ method: 'POST',
35
+ headers: {
36
+ 'Content-Type': 'application/json',
37
+ 'Content-Length': Buffer.byteLength(body)
38
+ }
39
+ }, (res) => {
40
+ if (res.statusCode >= 200 && res.statusCode < 300) {
41
+ resolve();
42
+ } else {
43
+ logger.log('WARN', `Callback responded with ${res.statusCode}`);
44
+ resolve(); // Don't crash the worker if callback fails, logic is done.
45
+ }
46
+ });
47
+
48
+ req.on('error', (e) => {
49
+ logger.log('ERROR', `Failed to trigger callback: ${e.message}`);
50
+ resolve();
51
+ });
52
+
53
+ req.write(body);
54
+ req.end();
55
+ });
56
+ }
57
+
58
+ /**
59
+ * [NEW] Helper: Decrements 'remainingTasks' in Firestore.
60
+ * Returns the callbackUrl IF this was the last task.
61
+ */
62
+ async function decrementAndCheck(db, metaStatePath, logger) {
63
+ if (!metaStatePath) return null;
64
+
65
+ try {
66
+ const result = await db.runTransaction(async (t) => {
67
+ const ref = db.doc(metaStatePath);
68
+ const doc = await t.get(ref);
69
+
70
+ if (!doc.exists) return null; // State might have expired or been deleted
71
+ const data = doc.data();
72
+
73
+ const newRemaining = (data.remainingTasks || 0) - 1;
74
+
75
+ t.update(ref, {
76
+ remainingTasks: newRemaining,
77
+ lastUpdated: new Date()
78
+ });
79
+
80
+ // Return needed data only if we hit 0 (or lower, for safety)
81
+ return {
82
+ remaining: newRemaining,
83
+ callbackUrl: data.callbackUrl
84
+ };
85
+ });
86
+
87
+ if (result && result.remaining <= 0) {
88
+ return result.callbackUrl;
89
+ }
90
+ } catch (e) {
91
+ logger.log('ERROR', `[Worker] Failed to decrement batch counter: ${e.message}`);
92
+ }
93
+ return null;
94
+ }
95
+
22
96
  async function handleComputationTask(message, config, dependencies) {
23
97
  const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
24
98
  const runDependencies = { ...dependencies, logger: systemLogger };
@@ -34,15 +108,14 @@ async function handleComputationTask(message, config, dependencies) {
34
108
 
35
109
  if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
36
110
 
37
- // Extract fields including new dependencyResultHashes
38
- const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes } = data;
111
+ // Extract fields including new metaStatePath
112
+ const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes, metaStatePath } = data;
39
113
 
40
114
  if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
41
115
 
42
- // [FIX] Ensure retryCount defaults to 1 (PubSub usually sends 1 for the first attempt)
43
116
  const retryCount = message.deliveryAttempt || 1;
44
117
 
45
- // [FIX] Changed condition to '>' so attempts 1, 2, and 3 are allowed to run.
118
+ // --- POISON MESSAGE HANDLING (DLQ) ---
46
119
  if (retryCount > MAX_RETRIES) {
47
120
  logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
48
121
  try {
@@ -54,25 +127,28 @@ async function handleComputationTask(message, config, dependencies) {
54
127
  failureReason: 'MAX_RETRIES_EXCEEDED'
55
128
  });
56
129
 
57
- // [FIX] CRITICAL: Update Ledger to FAILED.
58
- // Previously, this returned without updating, leaving the Ledger stuck in 'PENDING'.
59
- // Now we explicitly mark it FAILED so the pipeline knows it's dead.
60
130
  await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
61
131
  status: 'FAILED',
62
132
  error: 'Max Retries Exceeded (Poison Message)',
63
133
  failedAt: new Date()
64
134
  }, { merge: true });
65
135
 
136
+ // [CRITICAL] Even if it failed, we MUST decrement the counter.
137
+ // Otherwise the workflow waits 24h for a task that will never finish.
138
+ const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
139
+ if (callbackUrl) {
140
+ // We signal SUCCESS to the workflow because the *Batch* is finished processing (even if this task failed).
141
+ // The "monitor" or next pass can handle data gaps.
142
+ await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
143
+ }
144
+
66
145
  return;
67
146
  } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
68
147
  }
69
148
 
70
- logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}]`, {
71
- dispatchId: dispatchId || 'legacy',
72
- reason: triggerReason
73
- });
149
+ logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date}) [Attempt ${retryCount}/${MAX_RETRIES}]`);
74
150
 
75
- // Mark task as IN_PROGRESS (Visual only, dispatcher does not use this for logic anymore)
151
+ // Update Status to IN_PROGRESS
76
152
  try {
77
153
  await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
78
154
  status: 'IN_PROGRESS',
@@ -80,29 +156,21 @@ async function handleComputationTask(message, config, dependencies) {
80
156
  startedAt: new Date(),
81
157
  dispatchId: dispatchId
82
158
  }, { merge: true });
83
- } catch (leaseErr) {
84
- logger.log('WARN', `[Worker] Failed to update status to IN_PROGRESS for ${computation}. Continuing...`, leaseErr);
85
- }
159
+ } catch (leaseErr) {}
86
160
 
87
161
  let computationManifest;
88
162
  try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
89
163
  } catch (manifestError) {
90
164
  logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
91
- await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: manifestError.message, stage: 'MANIFEST_LOAD' }, { durationMs: 0 }, triggerReason);
165
+ // Do NOT decrement here, let PubSub retry the task.
92
166
  return;
93
167
  }
94
168
 
95
169
  try {
96
170
  const startTime = Date.now();
97
171
  const result = await executeDispatchTask(
98
- date,
99
- pass,
100
- computation,
101
- config,
102
- runDependencies,
103
- computationManifest,
104
- previousCategory,
105
- dependencyResultHashes
172
+ date, pass, computation, config, runDependencies,
173
+ computationManifest, previousCategory, dependencyResultHashes
106
174
  );
107
175
  const duration = Date.now() - startTime;
108
176
 
@@ -110,38 +178,44 @@ async function handleComputationTask(message, config, dependencies) {
110
178
  const successUpdates = result?.updates?.successUpdates || {};
111
179
 
112
180
  if (failureReport.length > 0) {
181
+ // Logic/Storage failure (handled internally by executor)
113
182
  const failReason = failureReport[0];
114
- logger.log('ERROR', `[Worker] ❌ Failed logic/storage for ${computation}`, failReason.error);
115
- const metrics = failReason.metrics || {};
116
- metrics.durationMs = duration;
117
- await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', failReason.error, metrics, triggerReason);
118
183
  throw new Error(failReason.error.message || 'Computation Logic Failed');
119
184
  }
120
- else if (Object.keys(successUpdates).length > 0) {
121
- const successData = successUpdates[computation];
122
- const metrics = successData.metrics || {};
123
- metrics.durationMs = duration;
124
- logger.log('INFO', `[Worker] Stored: ${computation}. ID: ${dispatchId}`);
125
-
126
- // Mark Ledger as COMPLETED
185
+ else {
186
+ if (Object.keys(successUpdates).length > 0) {
187
+ logger.log('INFO', `[Worker] Stored: ${computation}`);
188
+ } else {
189
+ logger.log('WARN', `[Worker] ⚠️ Empty Result: ${computation}`);
190
+ }
191
+
192
+ // 1. Mark Ledger as COMPLETED
127
193
  await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
128
194
  status: 'COMPLETED',
129
195
  completedAt: new Date()
130
196
  }).catch(() => {});
131
197
 
132
- await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason);
133
- }
134
- else {
135
- logger.log('WARN', `[Worker] ⚠️ No results produced for ${computation} (Empty?)`);
136
- await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration }, triggerReason);
198
+ await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, { durationMs: duration }, triggerReason);
199
+
200
+ // 2. [NEW] Decrement Batch Counter & Check for Callback
201
+ const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
202
+
203
+ // 3. [NEW] If last one, fire callback
204
+ if (callbackUrl) {
205
+ await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
206
+ }
137
207
  }
138
208
  } catch (err) {
209
+ // --- ERROR HANDLING ---
210
+
211
+ // Check for Permanent/Deterministic Errors
139
212
  const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
140
213
  err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
214
+ err.stage === 'SEMANTIC_GATE' ||
141
215
  (err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
142
216
 
143
217
  if (isDeterministicError) {
144
- logger.log('ERROR', `[Worker] 🛑 Permanent Failure (Limit Issue). Sending to DLQ immediately: ${dispatchId}`);
218
+ logger.log('ERROR', `[Worker] 🛑 Permanent Failure: ${err.message}`);
145
219
  try {
146
220
  await db.collection('computation_dead_letter_queue').add({
147
221
  originalData: data,
@@ -151,7 +225,6 @@ async function handleComputationTask(message, config, dependencies) {
151
225
  failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
152
226
  });
153
227
 
154
- // [FIX] Update Ledger to FAILED immediately for deterministic errors
155
228
  await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
156
229
  status: 'FAILED',
157
230
  error: err.message || 'Permanent Deterministic Error',
@@ -159,22 +232,29 @@ async function handleComputationTask(message, config, dependencies) {
159
232
  }, { merge: true });
160
233
 
161
234
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
162
- return;
235
+
236
+ // [CRITICAL] Permanent failure -> Must decrement so workflow doesn't hang
237
+ const callbackUrl = await decrementAndCheck(db, metaStatePath, logger);
238
+ if (callbackUrl) {
239
+ await triggerWorkflowCallback(callbackUrl, 'SUCCESS', logger);
240
+ }
241
+
242
+ return; // Do NOT throw, consume the message
163
243
  } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
164
244
  }
165
245
 
166
- // Standard Retryable Error (Crash)
246
+ // Standard Retryable Error (Transient)
167
247
  if (retryCount >= MAX_RETRIES) {
168
- // We throw here, PubSub will retry, and the "Poison Logic" at the top
169
- // will catch it on the NEXT attempt to finalize the failure.
248
+ // Let the top-level poison check handle the decrement on the *next* delivery (or handle here if you prefer).
249
+ // Standard practice: throw so PubSub handles the backoff and redelivery.
250
+ // The poison logic at the top of this function will catch it on attempt N+1.
170
251
  throw err;
171
252
  }
172
253
 
173
254
  logger.log('ERROR', `[Worker] ❌ Crash: ${computation}: ${err.message}`);
174
-
175
255
  await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
176
- // Throwing triggers Pub/Sub retry
177
- throw err;
256
+
257
+ throw err; // Trigger Pub/Sub retry
178
258
  }
179
259
  }
180
260
 
@@ -1,5 +1,6 @@
1
1
  # Cloud Workflows Definition for BullTrackers Computation Pipeline
2
- # Orchestrates 5 sequential passes with Self-Healing (Retry) logic.
2
+ # Orchestrates 5 sequential passes using Event-Driven Callbacks (Zero Polling).
3
+ # UPDATED: Implements "Callback Pattern" to eliminate arbitrary sleeps.
3
4
 
4
5
  main:
5
6
  params: [input]
@@ -12,9 +13,6 @@ main:
12
13
  - date_to_run: ${default(map.get(input, "date"), text.substring(time.format(sys.now()), 0, 10))}
13
14
  - passes: ["1", "2", "3", "4", "5"]
14
15
  - max_retries: 3
15
- - propagation_wait_seconds: 300 # 5 Minutes
16
- # URL of the new Monitor Function
17
- - monitor_url: ${"https://europe-west1-" + project + ".cloudfunctions.net/computation-monitor"}
18
16
 
19
17
  # ======================================================
20
18
  # MAIN LOOP: Iterate through Passes 1 to 5
@@ -28,7 +26,7 @@ main:
28
26
  assign:
29
27
  - attempt_count: 0
30
28
  - pass_success: false
31
- # Construct URL for the specific pass function (e.g. computation-pass-1)
29
+ # URL of the Cloud Function acting as the Dispatcher
32
30
  - dispatcher_url: ${"https://europe-west1-" + project + ".cloudfunctions.net/computation-pass-" + pass_id}
33
31
 
34
32
  # -----------------------------------------------
@@ -42,85 +40,82 @@ main:
42
40
  assign:
43
41
  - attempt_count: ${attempt_count + 1}
44
42
 
43
+ # 1. GENERATE CALLBACK URL
44
+ # This creates a unique HTTP endpoint that points specifically to this step execution.
45
+ - create_callback:
46
+ call: sys.create_callback_url
47
+ result: callback_url
48
+
45
49
  - log_start:
46
50
  call: sys.log
47
51
  args:
48
- text: ${"Starting Pass " + pass_id + " (Attempt " + attempt_count + ") for " + date_to_run}
52
+ text: ${"Starting Pass " + pass_id + " (Attempt " + attempt_count + ") for " + date_to_run + ". Waiting for signal at " + callback_url}
49
53
  severity: "INFO"
50
54
 
51
- # 1. TRIGGER DISPATCHER (Fire and Forget mechanism via HTTP)
55
+ # 2. TRIGGER DISPATCHER
56
+ # We pass the 'callback_url' to the dispatcher so it can hand it to the workers.
52
57
  - trigger_dispatcher:
53
- call: http.get
58
+ call: http.post
54
59
  args:
55
60
  url: ${dispatcher_url}
56
- query:
61
+ body:
57
62
  date: ${date_to_run}
63
+ callbackUrl: ${callback_url} # <--- CRITICAL: Passing the token
58
64
  auth:
59
65
  type: OIDC
60
66
  timeout: 1800 # 30 mins max for dispatch analysis
61
67
  result: dispatch_response
62
68
 
63
- # 2. PROPAGATION WAIT
64
- - wait_for_propagation:
65
- call: sys.log
66
- args:
67
- text: ${"Pass " + pass_id + " dispatched. Waiting " + propagation_wait_seconds + "s for propagation..."}
68
- next: sleep_propagation
69
-
70
- - sleep_propagation:
71
- call: sys.sleep
72
- args:
73
- seconds: ${propagation_wait_seconds}
74
-
75
- # 3. MONITORING LOOP
76
- - monitor_loop:
77
- call: http.get
78
- args:
79
- url: ${monitor_url}
80
- query:
81
- date: ${date_to_run}
82
- pass: ${pass_id}
83
- auth:
84
- type: OIDC
85
- result: status_resp
86
-
87
- - evaluate_status:
69
+ # 3. CHECK FOR "NOTHING TO DO"
70
+ # If the dispatcher found 0 tasks, it returns immediate success. We skip waiting.
71
+ - check_immediate_completion:
88
72
  switch:
89
- # CASE A: Still Running -> Sleep and Poll Again
90
- - condition: ${status_resp.body.state == "RUNNING"}
73
+ - condition: ${dispatch_response.body.dispatched == 0}
91
74
  steps:
92
- - log_running:
75
+ - log_empty:
93
76
  call: sys.log
94
77
  args:
95
- text: ${"Pass " + pass_id + " is RUNNING (" + status_resp.body.activeCount + " active). Waiting..."}
96
- - sleep_polling:
97
- call: sys.sleep
98
- args:
99
- seconds: 60
100
- - continue_monitoring:
101
- next: monitor_loop
78
+ text: ${"Pass " + pass_id + " had 0 tasks. Moving to next pass immediately."}
79
+ - mark_success_empty:
80
+ assign:
81
+ - pass_success: true
82
+ - next_pass_empty:
83
+ next: pass_retry_loop
102
84
 
103
- # CASE B: Clean Success -> Mark done, Break Retry Loop
104
- - condition: ${status_resp.body.state == "SUCCESS"}
85
+ # 4. WAIT FOR WORKER SIGNAL
86
+ # The workflow freezes here (sleeps) until a worker hits the callback_url.
87
+ # This eliminates the need for polling logic.
88
+ - wait_for_completion:
89
+ call: sys.await_callback
90
+ args:
91
+ callback_url: ${callback_url}
92
+ timeout: 86400 # Wait up to 24 hours for the batch to finish
93
+ result: callback_request
94
+
95
+ # 5. PROCESS SIGNAL
96
+ - evaluate_signal:
97
+ assign:
98
+ - signal_data: ${callback_request.http_request.body}
99
+ switch:
100
+ - condition: ${signal_data.status == "SUCCESS"}
105
101
  steps:
106
102
  - log_success:
107
103
  call: sys.log
108
104
  args:
109
- text: ${"Pass " + pass_id + " COMPLETED successfully."}
105
+ text: ${"Pass " + pass_id + " signaled COMPLETION via Callback."}
110
106
  severity: "INFO"
111
107
  - mark_success:
112
108
  assign:
113
109
  - pass_success: true
114
- - proceed_to_next_pass:
115
- next: pass_retry_loop # Exits inner loop because pass_success is now true
110
+ - proceed:
111
+ next: pass_retry_loop
116
112
 
117
- # CASE C: Failures Found -> Continue Retry Loop (will trigger dispatcher again)
118
- - condition: ${status_resp.body.state == "HAS_FAILURES"}
113
+ - condition: ${signal_data.status == "FAILURE"}
119
114
  steps:
120
115
  - log_failure:
121
116
  call: sys.log
122
117
  args:
123
- text: ${"Pass " + pass_id + " has " + status_resp.body.failureCount + " FAILURES. Attempting Retry."}
118
+ text: ${"Pass " + pass_id + " signaled FAILURE (DLQ limit hit). Retrying pass."}
124
119
  severity: "WARNING"
125
120
  - retry_pass:
126
121
  next: pass_retry_loop
@@ -136,7 +131,7 @@ main:
136
131
  - log_giving_up:
137
132
  call: sys.log
138
133
  args:
139
- text: ${"Pass " + pass_id + " failed after " + max_retries + " attempts. Proceeding to next pass with potential gaps."}
134
+ text: ${"Pass " + pass_id + " failed after " + max_retries + " attempts. Proceeding with potential gaps."}
140
135
  severity: "ERROR"
141
136
 
142
137
  - finish:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.288",
3
+ "version": "1.0.289",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [