bulltrackers-module 1.0.338 → 1.0.339

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
3
  * PURPOSE: Sequential Cursor-Based Dispatcher.
4
- * BEHAVIOR: Dispatch -> Wait ETA -> Next Date.
5
- * UPDATED: Added "Sweep" Protocol for OOM recovery & High-Mem Verification.
6
- * UPDATED: Added Safety Checks to permanently skip Deterministic Failures.
4
+ * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
5
+ * UPDATED: Prevents infinite loops by permanently ignoring deterministic failures.
7
6
  */
8
7
 
9
8
  const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -23,7 +22,7 @@ const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
23
22
  // =============================================================================
24
23
  async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
25
24
  if (!tasks || tasks.length === 0) return [];
26
- if (forceRun) return tasks; // Bypass check for Sweep Mode (Handled separately)
25
+ if (forceRun) return tasks;
27
26
 
28
27
  const checkPromises = tasks.map(async (t) => {
29
28
  const taskName = normalizeName(t.name);
@@ -49,21 +48,14 @@ async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false
49
48
  return null;
50
49
  }
51
50
 
52
- // 2. GHOST CHECK (Debounce immediate re-runs)
53
- const isJustFinished = data.status === 'COMPLETED' &&
54
- data.completedAt &&
55
- (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
56
-
57
- if (isJustFinished) return null;
51
+ // 2. COMPLETED CHECK (Ignore)
52
+ if (data.status === 'COMPLETED') return null;
58
53
 
59
- // 3. DETERMINISTIC FAILURE CHECK (Break Infinite Loops)
60
- // If the task failed due to Logic/Quality issues, never retry it automatically.
54
+ // 3. FAILED CHECK (Pass through to Route Splitter)
55
+ // We do NOT filter FAILED tasks here. We pass them to splitRoutes()
56
+ // which decides if they get promoted to High-Mem or dropped forever.
61
57
  if (data.status === 'FAILED') {
62
- const stage = data.error?.stage;
63
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
64
- if (logger) logger.log('WARN', `[Dispatcher] 🛑 Skipping deterministic failure for ${taskName} (${stage}).`);
65
- return null;
66
- }
58
+ return t;
67
59
  }
68
60
  }
69
61
  return t;
@@ -202,7 +194,6 @@ async function handlePassVerification(config, dependencies, computationManifest,
202
194
  const pending = [...report.runnable, ...report.reRuns];
203
195
 
204
196
  if (pending.length > 0) {
205
- // Calculate ETA
206
197
  const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
207
198
  const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
208
199
 
@@ -254,8 +245,8 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
254
245
  return { dispatched: 0 };
255
246
  }
256
247
 
257
- // [FIX] Filter out deterministic failures from Sweep.
258
- // If it failed due to 'QUALITY_CIRCUIT_BREAKER', High-Mem won't fix it.
248
+ // [CRITICAL] FILTER FOR SWEEP:
249
+ // Only dispatch if it hasn't failed High-Mem or Quality checks.
259
250
  const validTasks = [];
260
251
  for (const task of pending) {
261
252
  const name = normalizeName(task.name);
@@ -264,20 +255,28 @@ async function handleSweepDispatch(config, dependencies, computationManifest, re
264
255
  if (doc.exists) {
265
256
  const data = doc.data();
266
257
  const stage = data.error?.stage;
267
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
258
+
259
+ // A. QUALITY CHECK: If it failed logic, DO NOT RETRY.
260
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
268
261
  logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
269
262
  continue;
270
263
  }
264
+
265
+ // B. DEAD END CHECK: If it failed High-Mem already, DO NOT RETRY.
266
+ if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
267
+ logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
268
+ continue;
269
+ }
271
270
  }
272
271
  validTasks.push(task);
273
272
  }
274
273
 
275
274
  if (validTasks.length === 0) {
276
- logger.log('INFO', `[Sweep] ${date} only has deterministic failures. No dispatch.`);
275
+ logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
277
276
  return { dispatched: 0 };
278
277
  }
279
278
 
280
- // 2. FORCE High Mem & Skip Zombie Check
279
+ // 2. FORCE High Mem for remaining valid tasks
281
280
  const currentDispatchId = crypto.randomUUID();
282
281
 
283
282
  const tasksPayload = validTasks.map(t => ({
@@ -360,8 +359,9 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
360
359
  }
361
360
 
362
361
  if (selectedTasks.length > 0) {
363
- const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
364
- if (reroutes.length > 0) selectedTasks = reroutes;
362
+ // Split Logic: Moves OOMs to High-Mem, drops dead letters
363
+ const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, selectedTasks, logger);
364
+ selectedTasks = [...standard, ...highMem];
365
365
  }
366
366
  }
367
367
  }
@@ -421,21 +421,58 @@ async function handleStandardDispatch(config, dependencies, computationManifest,
421
421
  };
422
422
  }
423
423
 
424
- async function getHighMemReroutes(db, date, pass, tasks) {
425
- const reroutes = [];
424
+ // =============================================================================
425
+ // HELPER: Route Splitting (One-Shot Enforcement)
426
+ // =============================================================================
427
+ async function splitRoutes(db, date, pass, tasks, logger) {
428
+ const standard = [];
429
+ const highMem = [];
430
+
426
431
  for (const task of tasks) {
427
432
  const name = normalizeName(task.name);
428
433
  const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
429
434
  const doc = await db.doc(ledgerPath).get();
430
- if (doc.exists) {
431
- const data = doc.data();
432
- const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
433
- (data.resourceTier !== 'high-mem') &&
434
- ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
435
- if (isOOM) reroutes.push({ ...task, resources: 'high-mem' });
435
+
436
+ if (!doc.exists) {
437
+ // New task -> Standard
438
+ standard.push(task);
439
+ continue;
440
+ }
441
+
442
+ const data = doc.data();
443
+
444
+ // If it FAILED, we check if we can escalate it.
445
+ if (data.status === 'FAILED') {
446
+ const stage = data.error?.stage;
447
+
448
+ // 1. QUALITY / LOGIC FAIL: Dead Letter (Drop it)
449
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(stage)) {
450
+ logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
451
+ continue;
452
+ }
453
+
454
+ // 2. PREVIOUSLY HIGH MEM FAIL: Dead Letter (Drop it)
455
+ if (data.resourceTier === 'high-mem') {
456
+ logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Failed on High-Mem already.`);
457
+ continue;
458
+ }
459
+
460
+ // 3. STANDARD FAIL (Crash/OOM): Promote to High Mem (Retry)
461
+ // If it failed standard, we give it ONE shot on high-mem.
462
+ // Note: Even if it was an "Unknown" error, we promote to High-Mem to cover OOMs that looked like crashes.
463
+ highMem.push({
464
+ ...task,
465
+ resources: 'high-mem',
466
+ reason: `Retry: ${data.error?.message || 'Standard Failure'}`
467
+ });
468
+
469
+ } else {
470
+ // If status is not FAILED (e.g. was Zombie and filterActiveTasks passed it), retry Standard.
471
+ standard.push(task);
436
472
  }
437
473
  }
438
- return reroutes;
474
+
475
+ return { standard, highMem };
439
476
  }
440
477
 
441
478
  module.exports = { dispatchComputationPass };
@@ -1,7 +1,10 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
- * UPDATED: Fixed Error Propagation Bug. Preserves 'stage' property when re-throwing logic errors.
4
- * UPDATED: Fixed Firestore 'undefined' field error for dispatchId.
3
+ * UPDATED: Implemented Strict Idempotency Gate (The "One-Shot" Policy).
4
+ * BEHAVIOR:
5
+ * 1. Checks Ledger via Transaction before execution.
6
+ * 2. If Status is COMPLETED or FAILED, immediately ACKs (returns) to stop Pub/Sub loops.
7
+ * 3. Preserves Error 'stage' to ensure logic errors are not retried.
5
8
  */
6
9
 
7
10
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -28,6 +31,62 @@ function startMemoryHeartbeat(db, ledgerPath, intervalMs = 2000) {
28
31
  return { timer, getPeak: () => peakRss };
29
32
  }
30
33
 
34
+ /**
35
+ * STRICT IDEMPOTENCY GATE
36
+ * Uses a transaction to verify this task hasn't already been processed.
37
+ * Returns { shouldRun: boolean, leaseData: object }
38
+ */
39
+ async function checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId) {
40
+ const docRef = db.doc(ledgerPath);
41
+
42
+ try {
43
+ return await db.runTransaction(async (t) => {
44
+ const doc = await t.get(docRef);
45
+
46
+ if (doc.exists) {
47
+ const data = doc.data();
48
+
49
+ // 1. TERMINAL STATE CHECK
50
+ // If the task is already finalized, we MUST NOT run it again.
51
+ // This stops the infinite loop if Pub/Sub redelivers a FAILED task.
52
+ if (['COMPLETED', 'FAILED', 'CRASH'].includes(data.status)) {
53
+ return { shouldRun: false, reason: `Task already in terminal state: ${data.status}` };
54
+ }
55
+
56
+ // 2. DUPLICATE DELIVERY CHECK
57
+ // If it's IN_PROGRESS with the SAME dispatchId, we are likely seeing a Pub/Sub redelivery
58
+ // while the code is actually running. Ignore it.
59
+ if (data.status === 'IN_PROGRESS' && data.dispatchId === dispatchId) {
60
+ return { shouldRun: false, reason: 'Duplicate delivery: Task already IN_PROGRESS with same ID.' };
61
+ }
62
+
63
+ // 3. ZOMBIE CHECK (Optional safety, usually handled by Dispatcher)
64
+ // If it's IN_PROGRESS but clearly stale (older than 15 mins), we might steal it,
65
+ // but generally we let the Dispatcher handle zombies. For now, we block collision.
66
+ if (data.status === 'IN_PROGRESS') {
67
+ return { shouldRun: false, reason: 'Collision: Task currently IN_PROGRESS by another worker.' };
68
+ }
69
+ }
70
+
71
+ // 4. CLAIM LEASE
72
+ // If we get here, the task is either new (doesn't exist) or PENDING.
73
+ const lease = {
74
+ status: 'IN_PROGRESS',
75
+ workerId: workerId,
76
+ dispatchId: dispatchId || 'unknown',
77
+ startedAt: new Date()
78
+ };
79
+
80
+ t.set(docRef, lease, { merge: true });
81
+ return { shouldRun: true, leaseData: lease };
82
+ });
83
+ } catch (e) {
84
+ console.error(`[Idempotency] Transaction failed: ${e.message}`);
85
+ // If transaction fails (contention), assume we shouldn't run
86
+ return { shouldRun: false, reason: `Transaction Error: ${e.message}` };
87
+ }
88
+ }
89
+
31
90
  async function handleComputationTask(message, config, dependencies) {
32
91
  const logger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
33
92
  const runDeps = { ...dependencies, logger };
@@ -45,17 +104,19 @@ async function handleComputationTask(message, config, dependencies) {
45
104
  const resourceTier = resources || 'standard';
46
105
  const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${computation}`;
47
106
 
48
- logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}]`);
107
+ const workerId = process.env.K_REVISION || os.hostname();
49
108
 
50
- // [FIX] Build document object and only add dispatchId if it is defined (prevents Firestore "undefined" error)
51
- const leaseData = {
52
- status: 'IN_PROGRESS',
53
- workerId: process.env.K_REVISION || os.hostname(),
54
- startedAt: new Date()
55
- };
56
- if (dispatchId) leaseData.dispatchId = dispatchId;
109
+ // --- STEP 1: IDEMPOTENCY CHECK ---
110
+ const gate = await checkIdempotencyAndClaimLease(db, ledgerPath, dispatchId, workerId);
111
+
112
+ if (!gate.shouldRun) {
113
+ // [CRITICAL] We return successfully (ACK) to remove the message from Pub/Sub.
114
+ // We do NOT throw an error, because that would cause a retry.
115
+ logger.log('WARN', `[Worker] 🛑 Idempotency Gate: Skipping ${computation}. Reason: ${gate.reason}`);
116
+ return;
117
+ }
57
118
 
58
- await db.doc(ledgerPath).set(leaseData, { merge: true });
119
+ logger.log('INFO', `[Worker] 📥 Task: ${computation} (${date}) [Tier: ${resourceTier}] [ID: ${dispatchId}]`);
59
120
 
60
121
  const heartbeat = startMemoryHeartbeat(db, ledgerPath);
61
122
 
@@ -72,14 +133,11 @@ async function handleComputationTask(message, config, dependencies) {
72
133
  const failureReport = result?.updates?.failureReport || [];
73
134
  const successUpdates = result?.updates?.successUpdates || {};
74
135
 
75
- // [CRITICAL FIX] Correctly propagate the Error Stage.
76
- // Previously, 'throw new Error(msg)' stripped the 'stage' property, causing the
77
- // catch block to treat Deterministic errors (Quality/Logic) as System errors (Transient),
78
- // triggering infinite Pub/Sub retries.
136
+ // [CRITICAL] Propagate Error Stage from inner logic
79
137
  if (failureReport.length > 0) {
80
138
  const reportedError = failureReport[0].error;
81
139
  const errorObj = new Error(reportedError.message);
82
- errorObj.stage = reportedError.stage; // Preserve stage (e.g. 'QUALITY_CIRCUIT_BREAKER')
140
+ errorObj.stage = reportedError.stage;
83
141
  throw errorObj;
84
142
  }
85
143
 
@@ -99,18 +157,19 @@ async function handleComputationTask(message, config, dependencies) {
99
157
 
100
158
  } catch (err) {
101
159
  clearInterval(heartbeat.timer);
160
+
102
161
  const isDeterministic = ['SHARDING_LIMIT_EXCEEDED', 'QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(err.stage);
103
-
104
- // If error is deterministic (Logic/Quality), we record FAILURE and RETURN.
105
- // This ACKs the message and stops the retry loop.
162
+
163
+ // --- STEP 2: ERROR HANDLING ---
164
+ // If Logic Error OR Max Retries reached, mark FAILED and ACK.
106
165
  if (isDeterministic || (message.deliveryAttempt || 1) >= MAX_RETRIES) {
107
166
 
108
- // Write structured error to Ledger so Dispatcher can see the 'stage' later
109
167
  const errorPayload = {
110
168
  message: err.message,
111
169
  stage: err.stage || 'FATAL'
112
170
  };
113
171
 
172
+ // This write ensures the Idempotency Gate blocks future retries
114
173
  await db.doc(ledgerPath).set({
115
174
  status: 'FAILED',
116
175
  error: errorPayload,
@@ -118,10 +177,10 @@ async function handleComputationTask(message, config, dependencies) {
118
177
  }, { merge: true });
119
178
 
120
179
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'FATAL' }, { peakMemoryMB: heartbeat.getPeak() }, triggerReason, resourceTier);
121
- return;
180
+ return; // ACK
122
181
  }
123
182
 
124
- // If non-deterministic (Network/System), throw to trigger Pub/Sub Retry
183
+ // Only throw (NACK) for transient system errors (Network, etc)
125
184
  throw err;
126
185
  }
127
186
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.338",
3
+ "version": "1.0.339",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [