bulltrackers-module 1.0.274 → 1.0.276

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * @fileoverview Main Orchestrator. Coordinates the topological execution.
3
- * UPDATED: Removed 'Permanently Impossible' optimization to ensure full visibility/recovery.
3
+ * UPDATED: Includes Content-Based Dependency Short-Circuiting.
4
4
  * UPDATED: Includes 'Audit Upgrade' check.
5
5
  * UPDATED: Detailed Dependency Reporting for Impossible Chains.
6
6
  */
@@ -18,21 +18,41 @@ function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[c
18
18
 
19
19
  /**
20
20
  * Analyzes whether calculations should run, be skipped, or are blocked.
21
+ * [NEW] Implements ResultHash short-circuit logic.
21
22
  */
22
23
  function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus = null) {
23
24
  const report = { runnable: [], blocked: [], impossible: [], failedDependency: [], reRuns: [], skipped: [] };
24
25
  const simulationStatus = { ...dailyStatus };
25
26
  const isTargetToday = (dateStr === new Date().toISOString().slice(0, 10));
26
27
 
27
- const isDepSatisfied = (depName, currentStatusMap, manifestMap) => {
28
+ // Helper: Validates if a dependency is satisfied, either by Code Match OR Content Match
29
+ const isDepSatisfied = (depName, currentStatusMap, manifestMap, dependentStoredStatus) => {
28
30
  const norm = normalizeName(depName);
29
- const stored = currentStatusMap[norm];
31
+ const storedDep = currentStatusMap[norm];
30
32
  const depManifest = manifestMap.get(norm);
31
- if (!stored) return false;
32
- if (typeof stored.hash === 'string' && stored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
33
- if (!depManifest) return false;
34
- if (stored.hash !== depManifest.hash) return false;
35
- return true;
33
+
34
+ // 1. Basic Existence Checks
35
+ if (!storedDep) return false;
36
+ if (typeof storedDep.hash === 'string' && storedDep.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) return false;
37
+ if (!depManifest) return false;
38
+
39
+ // 2. Code Hash Check (The Standard Check)
40
+ if (storedDep.hash === depManifest.hash) return true;
41
+
42
+ // 3. [NEW] Content-Based Short-Circuit Check
43
+ // If Code Hash mismatch, check if the *Result Hash* is identical to what we used last time.
44
+ // dependentStoredStatus = The status of the calculation (B) that depends on this (A).
45
+ // dependentStoredStatus.dependencyResultHashes[depName] = The ResultHash of A when B last ran.
46
+ // storedDep.resultHash = The current ResultHash of A.
47
+ if (dependentStoredStatus &&
48
+ dependentStoredStatus.dependencyResultHashes &&
49
+ dependentStoredStatus.dependencyResultHashes[depName] &&
50
+ storedDep.resultHash &&
51
+ storedDep.resultHash === dependentStoredStatus.dependencyResultHashes[depName]) {
52
+ return true; // Short-circuit: The output didn't change, so we are safe.
53
+ }
54
+
55
+ return false;
36
56
  };
37
57
 
38
58
  for (const calc of calcsInPass) {
@@ -42,6 +62,17 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
42
62
  const storedCategory = stored ? stored.category : null;
43
63
  const currentHash = calc.hash;
44
64
 
65
+ // Collect current result hashes of dependencies for the next run
66
+ const currentDependencyResultHashes = {};
67
+ if (calc.dependencies) {
68
+ calc.dependencies.forEach(d => {
69
+ const normD = normalizeName(d);
70
+ if (simulationStatus[normD] && simulationStatus[normD].resultHash) {
71
+ currentDependencyResultHashes[d] = simulationStatus[normD].resultHash;
72
+ }
73
+ });
74
+ }
75
+
45
76
  const markImpossible = (reason, type = 'GENERIC') => {
46
77
  report.impossible.push({ name: cName, reason });
47
78
  const statusHash = `${STATUS_IMPOSSIBLE_PREFIX}:${type}`;
@@ -49,25 +80,31 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
49
80
  };
50
81
 
51
82
  const markRunnable = (isReRun = false, reRunDetails = null) => {
52
- if (isReRun) report.reRuns.push(reRunDetails);
53
- else report.runnable.push({ name: cName, ...reRunDetails });
83
+ const payload = {
84
+ name: cName,
85
+ ...reRunDetails,
86
+ dependencyResultHashes: currentDependencyResultHashes // Pass forward
87
+ };
88
+ if (isReRun) report.reRuns.push(payload);
89
+ else report.runnable.push(payload);
54
90
  // Simulate success so dependents can pass their check
55
- simulationStatus[cName] = { hash: currentHash, category: calc.category, composition: calc.composition };
91
+ simulationStatus[cName] = {
92
+ hash: currentHash,
93
+ resultHash: 'SIMULATED',
94
+ category: calc.category,
95
+ composition: calc.composition
96
+ };
56
97
  };
57
98
 
58
99
  let migrationOldCategory = null;
59
100
  if (storedCategory && storedCategory !== calc.category) { migrationOldCategory = storedCategory; }
60
101
 
61
- // [REMOVED] The "Permanently Impossible" optimization block was here.
62
- // Removal ensures we re-check Root Data every time, allowing for visibility and recovery.
63
-
64
- // 1. Check Root Data (The Primary Gate)
102
+ // 1. Check Root Data
65
103
  const rootCheck = checkRootDependencies(calc, rootDataStatus);
66
104
 
67
105
  if (!rootCheck.canRun) {
68
106
  const missingStr = rootCheck.missing.join(', ');
69
107
  if (!isTargetToday) {
70
- // If previously impossible, this confirms it. If previously run, this is a regression.
71
108
  markImpossible(`Missing Root Data: ${missingStr} (Historical)`, 'NO_DATA');
72
109
  } else {
73
110
  report.blocked.push({ name: cName, reason: `Missing Root Data: ${missingStr} (Waiting)` });
@@ -85,15 +122,15 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
85
122
  const depStored = simulationStatus[normDep];
86
123
  if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
87
124
  dependencyIsImpossible = true;
88
- impossibleDepCause = dep; // Capture the culprit
125
+ impossibleDepCause = dep;
89
126
  break;
90
127
  }
91
- if (!isDepSatisfied(dep, simulationStatus, manifestMap)) { missingDeps.push(dep); }
128
+ // Pass 'stored' (this calc's status) to check short-circuiting
129
+ if (!isDepSatisfied(dep, simulationStatus, manifestMap, stored)) { missingDeps.push(dep); }
92
130
  }
93
131
  }
94
132
 
95
133
  if (dependencyIsImpossible) {
96
- // [UPDATED] Include the name of the failing dependency in the reason string
97
134
  markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
98
135
  continue;
99
136
  }
@@ -117,7 +154,12 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
117
154
  markRunnable(false, { reason: "New Calculation" });
118
155
  }
119
156
  else if (storedHash !== currentHash) {
120
- // Smart Audit Logic
157
+ // [NEW] Check if Dependencies caused this, and if their content is actually same
158
+ // Note: If we are here, it means code changed.
159
+ // Short-circuiting logic was handled in 'isDepSatisfied' for upstream checks.
160
+ // But if *my* code changed, I must re-run unless I implement output-caching which is dangerous.
161
+ // So we strictly re-run if code changes.
162
+
121
163
  let changeReason = "Hash Mismatch (Unknown)";
122
164
  const oldComp = stored.composition;
123
165
  const newComp = calc.composition;
@@ -134,6 +176,11 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
134
176
  changeReason = `Layer Update: [${changedLayers.join(', ')}]`;
135
177
  }
136
178
  else if (JSON.stringify(oldComp.deps) !== JSON.stringify(newComp.deps)) {
179
+ // Dependency Hash Mismatch.
180
+ // This is where we COULD have short-circuited if we weren't enforcing code-hash strictness here.
181
+ // But typically if code hash mismatches, we re-run.
182
+ // The "Short-Circuit" benefit is mainly that *dependents* of this calculation
183
+ // won't need to re-run if *this* calculation produces the same output.
137
184
  const changedDeps = [];
138
185
  for(const dKey in newComp.deps) {
139
186
  if (newComp.deps[dKey] !== oldComp.deps[dKey]) changedDeps.push(dKey);
@@ -158,7 +205,6 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
158
205
  else if (migrationOldCategory) {
159
206
  markRunnable(true, { name: cName, reason: 'Category Migration', previousCategory: migrationOldCategory, newCategory: calc.category });
160
207
  }
161
- // Audit Upgrade Check
162
208
  else if (!stored.composition) {
163
209
  markRunnable(true, {
164
210
  name: cName,
@@ -177,8 +223,9 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
177
223
 
178
224
  /**
179
225
  * DIRECT EXECUTION PIPELINE (For Workers)
226
+ * [UPDATED] Accepts dependencyResultHashes
180
227
  */
181
- async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null) {
228
+ async function executeDispatchTask(dateStr, pass, targetComputation, config, dependencies, computationManifest, previousCategory = null, dependencyResultHashes = {}) {
182
229
  const { logger } = dependencies;
183
230
  const pid = generateProcessId(PROCESS_TYPES.EXECUTOR, targetComputation, dateStr);
184
231
 
@@ -187,6 +234,9 @@ async function executeDispatchTask(dateStr, pass, targetComputation, config, dep
187
234
 
188
235
  if (!calcManifest) { throw new Error(`Calculation '${targetComputation}' not found in manifest.`); }
189
236
 
237
+ // [NEW] Attach the dependency result hashes to the manifest so ResultCommitter can save them
238
+ calcManifest.dependencyResultHashes = dependencyResultHashes;
239
+
190
240
  if (previousCategory) {
191
241
  calcManifest.previousCategory = previousCategory;
192
242
  logger.log('INFO', `[Executor] Migration detected for ${calcManifest.name}. Old data will be cleaned from: ${previousCategory}`);
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
3
  * PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
4
- * UPDATED: Fixed "undefined" reason crash for failed dependencies.
4
+ * UPDATED: Implements Zombie Task Recovery & Dependency Result Hash Passing.
5
5
  */
6
6
 
7
7
  const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -11,13 +11,13 @@ const { fetchComputationStatus, updateComputationStatus } = require('../persiste
11
11
  const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
12
12
  const { generateCodeHash } = require('../topology/HashManager');
13
13
  const pLimit = require('p-limit');
14
+ const crypto = require('crypto');
14
15
 
15
16
  const TOPIC_NAME = 'computation-tasks';
16
17
  const STATUS_IMPOSSIBLE = 'IMPOSSIBLE';
17
18
 
18
19
  /**
19
20
  * Dispatches computation tasks for a specific pass.
20
- * Performs full pre-flight checks (Root Data, Dependencies, History) before emitting.
21
21
  */
22
22
  async function dispatchComputationPass(config, dependencies, computationManifest) {
23
23
  const { logger, db } = dependencies;
@@ -26,43 +26,36 @@ async function dispatchComputationPass(config, dependencies, computationManifest
26
26
 
27
27
  if (!passToRun) { return logger.log('ERROR', '[Dispatcher] No pass defined (COMPUTATION_PASS_TO_RUN). Aborting.'); }
28
28
 
29
- // 1. Get Calculations for this Pass
29
+ const currentManifestHash = generateCodeHash(
30
+ computationManifest.map(c => c.hash).sort().join('|')
31
+ );
32
+
30
33
  const passes = groupByPass(computationManifest);
31
34
  const calcsInThisPass = passes[passToRun] || [];
32
35
 
33
36
  if (!calcsInThisPass.length) { return logger.log('WARN', `[Dispatcher] No calcs for Pass ${passToRun}. Exiting.`); }
34
37
 
35
- // --- [NEW] OPTIMIZATION 1: PREEMPTIVE HASH CHECK ---
36
- const currentManifestHash = generateCodeHash(
37
- computationManifest.map(c => c.hash).sort().join('|')
38
- );
39
- // ---------------------------------------------------
40
-
41
38
  const calcNames = calcsInThisPass.map(c => c.name);
42
39
  logger.log('INFO', `🚀 [Dispatcher] Smart-Dispatching PASS ${passToRun}`);
43
40
  logger.log('INFO', `[Dispatcher] Target Calculations: [${calcNames.join(', ')}]`);
44
41
 
45
- // 2. Determine Date Range
46
42
  const passEarliestDate = Object.values(DEFINITIVE_EARLIEST_DATES).reduce((a, b) => a < b ? a : b);
47
43
  const endDateUTC = new Date(Date.UTC(new Date().getUTCFullYear(), new Date().getUTCMonth(), new Date().getUTCDate() - 1));
48
44
  const allExpectedDates = getExpectedDateStrings(passEarliestDate, endDateUTC);
49
45
 
50
46
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
51
47
  const tasksToDispatch = [];
52
- const limit = pLimit(20); // Process 20 days in parallel
48
+ const limit = pLimit(20);
53
49
 
54
50
  logger.log('INFO', `[Dispatcher] Analyzing ${allExpectedDates.length} dates for viability...`);
55
51
 
56
- // 3. Analyze Each Date (Concurrent)
57
52
  const analysisPromises = allExpectedDates.map(dateStr => limit(async () => {
58
53
  try {
59
- // [NEW] OPTIMIZATION 3: PARALLEL STATUS FETCH
60
54
  const fetchPromises = [
61
- fetchComputationStatus(dateStr, config, dependencies), // A. Current Status
62
- checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES) // C. Root Data
55
+ fetchComputationStatus(dateStr, config, dependencies),
56
+ checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES)
63
57
  ];
64
58
 
65
- // B. Fetch Status (Yesterday) - Only if historical continuity is needed
66
59
  let prevDateStr = null;
67
60
  if (calcsInThisPass.some(c => c.isHistorical)) {
68
61
  const prevDate = new Date(dateStr + 'T00:00:00Z');
@@ -83,25 +76,20 @@ async function dispatchComputationPass(config, dependencies, computationManifest
83
76
  hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false
84
77
  };
85
78
 
86
- // D. Run Core Analysis Logic
87
79
  const report = analyzeDateExecution(dateStr, calcsInThisPass, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
88
80
 
89
- // E. Handle Non-Runnable States (Write directly to DB, don't dispatch)
90
81
  const statusUpdates = {};
91
82
 
92
- // Mark Impossible (Permanent Failure)
93
83
  report.impossible.forEach(item => {
94
84
  if (dailyStatus[item.name]?.hash !== STATUS_IMPOSSIBLE) {
95
85
  statusUpdates[item.name] = { hash: STATUS_IMPOSSIBLE, category: 'unknown', reason: item.reason };
96
86
  }
97
87
  });
98
88
 
99
- // Mark Blocked (Explicit Block)
100
89
  report.blocked.forEach(item => {
101
90
  statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
102
91
  });
103
92
 
104
- // [FIX] Mark Failed Dependencies (Implicit Block) - Safely generate reason string
105
93
  report.failedDependency.forEach(item => {
106
94
  const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
107
95
  statusUpdates[item.name] = {
@@ -115,17 +103,21 @@ async function dispatchComputationPass(config, dependencies, computationManifest
115
103
  await updateComputationStatus(dateStr, statusUpdates, config, dependencies);
116
104
  }
117
105
 
118
- // F. Queue Runnables
119
106
  const validToRun = [...report.runnable, ...report.reRuns];
120
107
  validToRun.forEach(item => {
108
+ const uniqueDispatchId = crypto.randomUUID();
109
+
121
110
  tasksToDispatch.push({
122
111
  action: 'RUN_COMPUTATION_DATE',
112
+ dispatchId: uniqueDispatchId,
123
113
  date: dateStr,
124
114
  pass: passToRun,
125
115
  computation: normalizeName(item.name),
126
116
  hash: item.hash || item.newHash,
127
117
  previousCategory: item.previousCategory || null,
128
- triggerReason: item.reason || "Unknown",
118
+ triggerReason: item.reason || "Unknown",
119
+ // [NEW] Pass Content-Based hashes provided by analyzeDateExecution
120
+ dependencyResultHashes: item.dependencyResultHashes || {},
129
121
  timestamp: Date.now()
130
122
  });
131
123
  });
@@ -137,13 +129,11 @@ async function dispatchComputationPass(config, dependencies, computationManifest
137
129
 
138
130
  await Promise.all(analysisPromises);
139
131
 
140
- // 4. Dispatch Valid Tasks with Atomic Ledger Check
141
132
  if (tasksToDispatch.length > 0) {
142
133
  logger.log('INFO', `[Dispatcher] 📝 Creating Audit Ledger entries (Transactional) for ${tasksToDispatch.length} tasks...`);
143
134
 
144
- // --- [NEW] OPTIMIZATION 2: ATOMIC TRANSACTION FOR LEDGER ---
145
135
  const finalDispatched = [];
146
- const txnLimit = pLimit(20); // Limit concurrent transactions
136
+ const txnLimit = pLimit(20);
147
137
 
148
138
  const txnPromises = tasksToDispatch.map(task => txnLimit(async () => {
149
139
  const ledgerRef = db.collection(`computation_audit_ledger/${task.date}/passes/${task.pass}/tasks`).doc(task.computation);
@@ -151,23 +141,35 @@ async function dispatchComputationPass(config, dependencies, computationManifest
151
141
  try {
152
142
  await db.runTransaction(async (t) => {
153
143
  const doc = await t.get(ledgerRef);
154
- if (doc.exists && doc.data().status === 'PENDING') {
155
- // Task is already pending from another dispatcher, Skip.
156
- return false;
144
+
145
+ // [NEW] Zombie Task Recovery Check
146
+ if (doc.exists) {
147
+ const data = doc.data();
148
+ const now = Date.now();
149
+ const isPending = data.status === 'PENDING';
150
+ // A task is a zombie if it is PENDING and the lease has expired (or lease is missing but it's been > 1h)
151
+ const isLeaseExpired = data.leaseExpiresAt && data.leaseExpiresAt < now;
152
+ // Fallback: If no lease exists, assume 1 hour timeout for legacy zombie detection
153
+ const isLegacyZombie = !data.leaseExpiresAt && data.createdAt && (now - data.createdAt.toMillis() > 3600000);
154
+
155
+ if (isPending && !isLeaseExpired && !isLegacyZombie) {
156
+ return false; // Valid active pending task, do not double dispatch
157
+ }
157
158
  }
159
+
158
160
  t.set(ledgerRef, {
159
161
  status: 'PENDING',
162
+ dispatchId: task.dispatchId,
160
163
  computation: task.computation,
161
164
  expectedHash: task.hash || 'unknown',
162
165
  createdAt: new Date(),
163
- dispatcherHash: currentManifestHash, // Tracking source
164
- triggerReason: task.triggerReason, // Track trigger in ledger too
166
+ dispatcherHash: currentManifestHash,
167
+ triggerReason: task.triggerReason,
165
168
  retries: 0
166
169
  }, { merge: true });
167
170
  return true;
168
171
  });
169
172
 
170
- // Only dispatch if we successfully reserved the PENDING state
171
173
  finalDispatched.push(task);
172
174
 
173
175
  } catch (txnErr) {
@@ -176,7 +178,6 @@ async function dispatchComputationPass(config, dependencies, computationManifest
176
178
  }));
177
179
 
178
180
  await Promise.all(txnPromises);
179
- // ---------------------------------------------------
180
181
 
181
182
  if (finalDispatched.length > 0) {
182
183
  logger.log('INFO', `[Dispatcher] ✅ Publishing ${finalDispatched.length} unique tasks to Pub/Sub...`);
@@ -1,8 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_worker.js
3
- * PURPOSE: Consumes computation tasks from Pub/Sub and executes them.
4
- * UPDATED: Added Deterministic Error Short-Circuit to prevent infinite retry storms on data limits.
5
- * UPDATED: Integrated Run Ledger for per-run/per-date success/failure tracking.
3
+ * PURPOSE: Consumes computation tasks from Pub/Sub.
4
+ * UPDATED: Implements Lease Claiming and passes Dependency Hashes.
6
5
  */
7
6
 
8
7
  const { executeDispatchTask } = require('../WorkflowOrchestrator.js');
@@ -14,18 +13,14 @@ let calculationPackage;
14
13
  try { calculationPackage = require('aiden-shared-calculations-unified');
15
14
  } catch (e) {console.error("FATAL: Could not load 'aiden-shared-calculations-unified'."); throw e; }
16
15
  const calculations = calculationPackage.calculations;
17
- const MAX_RETRIES = 3;
16
+ const MAX_RETRIES = 0;
18
17
 
19
- /**
20
- * Handles a single Pub/Sub message.
21
- */
22
18
  async function handleComputationTask(message, config, dependencies) {
23
19
  const systemLogger = new StructuredLogger({ minLevel: config.minLevel || 'INFO', enableStructured: true, ...config });
24
20
  const runDependencies = { ...dependencies, logger: systemLogger };
25
21
  const { logger, db } = runDependencies;
26
22
  let data;
27
23
 
28
- // ----------------------------------- Parse message -----------------------------------
29
24
  try {
30
25
  if (message.data && message.data.message && message.data.message.data) { data = JSON.parse(Buffer.from(message.data.message.data, 'base64').toString());
31
26
  } else if (message.data && typeof message.data === 'string') { data = JSON.parse(Buffer.from(message.data, 'base64').toString());
@@ -33,24 +28,42 @@ async function handleComputationTask(message, config, dependencies) {
33
28
  } else { data = message; }
34
29
  } catch (parseError) { logger.log('ERROR', `[Worker] Failed to parse Pub/Sub payload.`, { error: parseError.message }); return; }
35
30
 
36
- // ----------------------------------- Validate & Execute -----------------------------------
37
31
  if (!data || data.action !== 'RUN_COMPUTATION_DATE') { return; }
38
32
 
39
- // Extract Trigger Reason
40
- const { date, pass, computation, previousCategory, triggerReason } = data;
33
+ // Extract fields including new dependencyResultHashes
34
+ const { date, pass, computation, previousCategory, triggerReason, dispatchId, dependencyResultHashes } = data;
41
35
 
42
- if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload: Missing date, pass, or computation.`, data); return; }
36
+ if (!date || !pass || !computation) { logger.log('ERROR', `[Worker] Invalid payload.`, data); return; }
37
+
38
+ logger.log('INFO', `[Worker] 📥 Received Task: ${computation} (${date})`, {
39
+ dispatchId: dispatchId || 'legacy',
40
+ reason: triggerReason
41
+ });
42
+
43
+ // [NEW] LEASE CLAIMING
44
+ // Mark task as IN_PROGRESS and set a lease timeout (e.g., 20 minutes) to prevent Zombies
45
+ try {
46
+ const leaseTimeMs = (config.workerLeaseMinutes || 20) * 60 * 1000;
47
+ await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).set({
48
+ status: 'IN_PROGRESS',
49
+ workerId: process.env.K_REVISION || 'unknown',
50
+ startedAt: new Date(),
51
+ leaseExpiresAt: Date.now() + leaseTimeMs,
52
+ dispatchId: dispatchId
53
+ }, { merge: true });
54
+ } catch (leaseErr) {
55
+ logger.log('WARN', `[Worker] Failed to claim lease for ${computation}. Continuing anyway...`, leaseErr);
56
+ }
57
+
43
58
  let computationManifest;
44
59
  try { computationManifest = getManifest(config.activeProductLines || [], calculations, runDependencies);
45
- } catch (manifestError) { logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
46
- // FIX: Passing { durationMs: 0 } instead of {} to satisfy type requirements
60
+ } catch (manifestError) {
61
+ logger.log('FATAL', `[Worker] Failed to load Manifest: ${manifestError.message}`);
47
62
  await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: manifestError.message, stage: 'MANIFEST_LOAD' }, { durationMs: 0 }, triggerReason);
48
63
  return;
49
64
  }
50
65
 
51
66
  try {
52
- logger.log('INFO', `[Worker] 📥 Received: ${computation} for ${date} [Reason: ${triggerReason || 'Unknown'}]`);
53
-
54
67
  const startTime = Date.now();
55
68
  const result = await executeDispatchTask(
56
69
  date,
@@ -59,7 +72,8 @@ async function handleComputationTask(message, config, dependencies) {
59
72
  config,
60
73
  runDependencies,
61
74
  computationManifest,
62
- previousCategory
75
+ previousCategory,
76
+ dependencyResultHashes // [NEW] Pass hashes to executor
63
77
  );
64
78
  const duration = Date.now() - startTime;
65
79
 
@@ -69,7 +83,7 @@ async function handleComputationTask(message, config, dependencies) {
69
83
  if (failureReport.length > 0) {
70
84
  const failReason = failureReport[0];
71
85
  logger.log('ERROR', `[Worker] ❌ Failed logic/storage for ${computation}`, failReason.error);
72
- const metrics = failReason.metrics || {};
86
+ const metrics = failReason.metrics || {};
73
87
  metrics.durationMs = duration;
74
88
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', failReason.error, metrics, triggerReason);
75
89
  throw new Error(failReason.error.message || 'Computation Logic Failed');
@@ -78,9 +92,14 @@ async function handleComputationTask(message, config, dependencies) {
78
92
  const successData = successUpdates[computation];
79
93
  const metrics = successData.metrics || {};
80
94
  metrics.durationMs = duration;
95
+ logger.log('INFO', `[Worker] ✅ Stored: ${computation}. ID: ${dispatchId}`);
81
96
 
82
- logger.log('INFO', `[Worker] Stored: ${computation}. Processed: ${metrics.execution?.processedUsers || metrics.execution?.processedItems || '?'} items.`);
83
-
97
+ // Mark Ledger as COMPLETED
98
+ await db.collection(`computation_audit_ledger/${date}/passes/${pass}/tasks`).doc(computation).update({
99
+ status: 'COMPLETED',
100
+ completedAt: new Date()
101
+ }).catch(() => {});
102
+
84
103
  await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', null, metrics, triggerReason);
85
104
  }
86
105
  else {
@@ -88,41 +107,32 @@ async function handleComputationTask(message, config, dependencies) {
88
107
  await recordRunAttempt(db, { date, computation, pass }, 'SUCCESS', { message: 'Empty Result' }, { durationMs: duration }, triggerReason);
89
108
  }
90
109
  } catch (err) {
91
- // ----------------------------------- ERROR HANDLING & RETRY LOGIC -----------------------------------
92
-
93
- // 1. DETERMINISTIC ERROR CHECK (Short-Circuit)
94
- // If the error is permanent (like "Too Big" or "Validation Failed"), DO NOT RETRY.
95
- // This stops the "Retry Storm" where we pay for 3-4 retries of a task that will never succeed.
96
110
  const isDeterministicError = err.stage === 'SHARDING_LIMIT_EXCEEDED' ||
97
111
  err.stage === 'QUALITY_CIRCUIT_BREAKER' ||
98
112
  (err.message && (err.message.includes('INVALID_ARGUMENT') || err.message.includes('Transaction too big')));
99
113
 
100
114
  if (isDeterministicError) {
101
- logger.log('ERROR', `[Worker] 🛑 Permanent Failure (Data/Limit Issue). Sending to DLQ immediately: ${computation} ${date}`);
115
+ logger.log('ERROR', `[Worker] 🛑 Permanent Failure (Limit Issue). Sending to DLQ immediately: ${dispatchId}`);
102
116
  try {
103
117
  await db.collection('computation_dead_letter_queue').add({
104
118
  originalData: data,
119
+ dispatchId: dispatchId,
105
120
  error: { message: err.message, stack: err.stack, stage: err.stage || 'UNKNOWN' },
106
121
  finalAttemptAt: new Date(),
107
122
  failureReason: 'PERMANENT_DETERMINISTIC_ERROR'
108
123
  });
109
-
110
- // CRITICAL: We record the failure but return successfully to Pub/Sub to ACK the message and stop retries.
111
- // This ensures the task is marked as Failed in run history, but does NOT block the queue.
112
124
  await recordRunAttempt(db, { date, computation, pass }, 'FAILURE', { message: err.message, stage: err.stage || 'PERMANENT_FAIL' }, { durationMs: 0 }, triggerReason);
113
125
  return;
114
- } catch (dlqErr) {
115
- logger.log('FATAL', `[Worker] Failed to write to DLQ for deterministic error`, dlqErr);
116
- }
126
+ } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
117
127
  }
118
128
 
119
- // 2. STANDARD RETRY LOGIC (Timeout / Crash)
120
129
  const retryCount = message.deliveryAttempt || 0;
121
130
  if (retryCount >= MAX_RETRIES) {
122
- logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation} ${date} (Attempt ${retryCount})`);
131
+ logger.log('ERROR', `[Worker] ☠️ Task POISONED. Moved to DLQ: ${computation}`);
123
132
  try {
124
133
  await db.collection('computation_dead_letter_queue').add({
125
134
  originalData: data,
135
+ dispatchId: dispatchId,
126
136
  error: { message: err.message, stack: err.stack },
127
137
  finalAttemptAt: new Date(),
128
138
  failureReason: 'MAX_RETRIES_EXCEEDED'
@@ -131,8 +141,7 @@ async function handleComputationTask(message, config, dependencies) {
131
141
  } catch (dlqErr) { logger.log('FATAL', `[Worker] Failed to write to DLQ`, dlqErr); }
132
142
  }
133
143
 
134
- // If it's not deterministic and not max retries, we throw to let Pub/Sub retry it.
135
- logger.log('ERROR', `[Worker] ❌ Crash: ${computation} for ${date}: ${err.message}`);
144
+ logger.log('ERROR', `[Worker] Crash: ${computation}: ${err.message}`);
136
145
  await recordRunAttempt(db, { date, computation, pass }, 'CRASH', { message: err.message, stack: err.stack, stage: 'SYSTEM_CRASH' }, { durationMs: 0 }, triggerReason);
137
146
  throw err;
138
147
  }
@@ -1,10 +1,8 @@
1
1
  /**
2
2
  * @fileoverview Handles saving computation results with observability and Smart Cleanup.
3
- * UPDATED: Added "Strategy 4" (50 keys) to handle 'too many index entries' errors.
4
- * UPDATED: Supports Incremental (Flush) Commits to prevent OOM.
5
- * FIX: Throws proper Error objects.
3
+ * UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
6
4
  */
7
- const { commitBatchInChunks } = require('./FirestoreUtils');
5
+ const { commitBatchInChunks, generateDataHash } = require('../utils/utils'); // [UPDATED] Import generateDataHash
8
6
  const { updateComputationStatus } = require('./StatusRepository');
9
7
  const { batchStoreSchemas } = require('../utils/schema_capture');
10
8
  const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
@@ -14,13 +12,10 @@ const pLimit = require('p-limit');
14
12
 
15
13
  const NON_RETRYABLE_ERRORS = [
16
14
  'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION'
17
- // removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
18
15
  ];
19
16
 
20
17
  /**
21
18
  * Commits results to Firestore.
22
- * @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
23
- * @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
24
19
  */
25
20
  async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
26
21
  const successUpdates = {};
@@ -52,7 +47,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
52
47
  const result = await calc.getResult();
53
48
  const overrides = validationOverrides[calc.manifest.name] || {};
54
49
 
55
- // Only validate if we have data or if it's the final flush
50
+ // Validation
56
51
  if (result && Object.keys(result).length > 0) {
57
52
  const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
58
53
  if (!healthCheck.valid) {
@@ -66,16 +61,20 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
66
61
 
67
62
  const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
68
63
 
69
- // If empty and standard mode, record 0-byte success.
70
- // If empty and INTERMEDIATE flush, just skip this calc for this flush.
64
+ // [NEW] Calculate Result Hash (Content-Based)
65
+ const resultHash = isEmpty ? 'empty' : generateDataHash(result);
66
+
67
+ // Handle Empty Results
71
68
  if (isEmpty) {
72
69
  if (flushMode === 'INTERMEDIATE') {
73
- nextShardIndexes[name] = currentShardIndex; // No change
70
+ nextShardIndexes[name] = currentShardIndex;
74
71
  continue;
75
72
  }
76
73
  if (calc.manifest.hash) {
77
74
  successUpdates[name] = {
78
- hash: calc.manifest.hash,
75
+ hash: calc.manifest.hash,
76
+ resultHash: resultHash, // [NEW] Store result hash
77
+ dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, // [NEW] Capture dep context
79
78
  category: calc.manifest.category,
80
79
  composition: calc.manifest.composition,
81
80
  metrics: runMetrics
@@ -90,7 +89,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
90
89
  const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
91
90
 
92
91
  if (isMultiDate) {
93
- // Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
94
92
  const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
95
93
  const dailyData = result[historicalDate];
96
94
  if (!dailyData || Object.keys(dailyData).length === 0) return;
@@ -102,7 +100,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
102
100
  .collection(config.computationsSubcollection)
103
101
  .doc(name);
104
102
 
105
- // For historical Fan-Out, we assume standard flush mode (not incremental) for now
106
103
  await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
107
104
  }));
108
105
  await Promise.all(datePromises);
@@ -110,6 +107,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
110
107
  if (calc.manifest.hash) {
111
108
  successUpdates[name] = {
112
109
  hash: calc.manifest.hash,
110
+ resultHash: resultHash, // [NEW]
111
+ dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, // [NEW]
113
112
  category: calc.manifest.category,
114
113
  composition: calc.manifest.composition,
115
114
  metrics: runMetrics
@@ -117,7 +116,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
117
116
  }
118
117
 
119
118
  } else {
120
- // --- STANDARD / INCREMENTAL MODE ---
121
119
  const mainDocRef = db.collection(config.resultsCollection)
122
120
  .doc(dStr)
123
121
  .collection(config.resultsSubcollection)
@@ -131,12 +129,13 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
131
129
  runMetrics.storage.isSharded = writeStats.isSharded;
132
130
  runMetrics.storage.shardCount = writeStats.shardCount;
133
131
 
134
- // Track next index for subsequent flushes
135
132
  nextShardIndexes[name] = writeStats.nextShardIndex;
136
133
 
137
134
  if (calc.manifest.hash) {
138
135
  successUpdates[name] = {
139
136
  hash: calc.manifest.hash,
137
+ resultHash: resultHash, // [NEW]
138
+ dependencyResultHashes: calc.manifest.dependencyResultHashes || {}, // [NEW]
140
139
  category: calc.manifest.category,
141
140
  composition: calc.manifest.composition,
142
141
  metrics: runMetrics
@@ -171,10 +170,6 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
171
170
  }
172
171
 
173
172
  async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
174
- // Strategy 1: Standard (900KB, no key limit)
175
- // Strategy 2: Aggressive Bytes (450KB, 10k keys)
176
- // Strategy 3: Very Aggressive (200KB, 2k keys)
177
- // Strategy 4: [NEW] Index Explosion Protection (100KB, 50 keys) - Handles "too many index entries"
178
173
  const strategies = [
179
174
  { bytes: 900 * 1024, keys: null },
180
175
  { bytes: 450 * 1024, keys: 10000 },
@@ -190,14 +185,10 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
190
185
  const constraints = strategies[attempt];
191
186
  try {
192
187
  const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
188
+ const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
193
189
 
194
- // Analyze the update batch
195
- const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
196
-
197
- // Calculate stats
198
190
  finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
199
191
 
200
- // Logic to determine next shard index
201
192
  let maxIndex = startShardIndex;
202
193
  updates.forEach(u => {
203
194
  const segs = u.ref.path.split('/');
@@ -224,8 +215,6 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
224
215
  lastError = commitErr;
225
216
  const msg = commitErr.message || '';
226
217
  const code = commitErr.code || '';
227
-
228
- // Check for explicit "too many index entries" or transaction size issues
229
218
  const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
230
219
  const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
231
220
 
@@ -233,14 +222,11 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
233
222
  logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
234
223
  throw commitErr;
235
224
  }
236
-
237
225
  if (isIndexError || isSizeError) {
238
226
  logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
239
227
  continue;
240
- }
241
- else {
228
+ } else {
242
229
  logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
243
- // We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
244
230
  continue;
245
231
  }
246
232
  }
@@ -261,32 +247,28 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
261
247
  let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
262
248
  let shardIndex = startShardIndex;
263
249
 
264
- // Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
265
250
  if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
266
251
  const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
267
252
  return [{ ref: docRef, data, options: { merge: true } }];
268
253
  }
269
254
 
270
- // Sharding Logic
271
255
  for (const [key, value] of Object.entries(result)) {
272
256
  if (key.startsWith('_')) continue;
273
257
  const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
274
258
  const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
275
259
 
276
260
  if (byteLimitReached || keyLimitReached) {
277
- writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite shard doc
261
+ writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
278
262
  shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
279
263
  }
280
264
  currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
281
265
  }
282
266
 
283
- // Push remaining chunk
284
267
  if (Object.keys(currentChunk).length > 0) {
285
268
  writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
286
- shardIndex++; // Increment so count is correct (0-based index means count is index+1)
269
+ shardIndex++;
287
270
  }
288
271
 
289
- // Pointer Logic
290
272
  if (flushMode !== 'INTERMEDIATE') {
291
273
  const pointerData = {
292
274
  _completed: true,
@@ -294,13 +276,12 @@ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 *
294
276
  _shardCount: shardIndex,
295
277
  _lastUpdated: new Date().toISOString()
296
278
  };
297
- writes.push({ ref: docRef, data: pointerData, options: { merge: true } }); // Merge pointer
279
+ writes.push({ ref: docRef, data: pointerData, options: { merge: true } });
298
280
  }
299
281
 
300
282
  return writes;
301
283
  }
302
284
 
303
- // ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
304
285
  async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
305
286
  const { db, logger, calculationUtils } = deps;
306
287
  const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
@@ -3,6 +3,7 @@
3
3
  * Generates a "Pre-Flight" report of what the computation system WILL do.
4
4
  * REFACTORED: Strict 5-category reporting with date-based exclusion logic.
5
5
  * UPDATED: Added transactional locking to prevent duplicate reports on concurrent cold starts.
6
+ * UPDATED: Adds 'pass' number to detail records for better waterfall visibility.
6
7
  */
7
8
 
8
9
  const { analyzeDateExecution } = require('../WorkflowOrchestrator');
@@ -164,12 +165,18 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
164
165
  dateSummary.meta.totalExpected = expectedCount;
165
166
 
166
167
  // Helper to push only if date is valid for this specific calc
168
+ // [UPDATED] Adds 'pass' number to the record
167
169
  const pushIfValid = (targetArray, item, extraReason = null) => {
168
170
  const calcManifest = manifestMap.get(item.name);
169
171
  if (calcManifest && isDateBeforeAvailability(dateStr, calcManifest)) {
170
172
  return; // EXCLUDED: Date is before data exists
171
173
  }
172
- targetArray.push({ name: item.name, reason: item.reason || extraReason });
174
+
175
+ targetArray.push({
176
+ name: item.name,
177
+ reason: item.reason || extraReason,
178
+ pass: calcManifest ? calcManifest.pass : '?'
179
+ });
173
180
  };
174
181
 
175
182
  // 1. RUN (New)
@@ -28,6 +28,34 @@ function generateCodeHash(codeString) {
28
28
  return crypto.createHash('sha256').update(clean).digest('hex');
29
29
  }
30
30
 
31
+ /**
32
+ * [NEW] Generates a stable SHA-256 hash of a data object.
33
+ * Keys are sorted to ensure determinism.
34
+ */
35
+ function generateDataHash(data) {
36
+ if (data === undefined) return 'undefined';
37
+
38
+ // Recursive stable stringify
39
+ const stableStringify = (obj) => {
40
+ if (typeof obj !== 'object' || obj === null) {
41
+ return JSON.stringify(obj);
42
+ }
43
+ if (Array.isArray(obj)) {
44
+ return '[' + obj.map(stableStringify).join(',') + ']';
45
+ }
46
+ return '{' + Object.keys(obj).sort().map(k =>
47
+ JSON.stringify(k) + ':' + stableStringify(obj[k])
48
+ ).join(',') + '}';
49
+ };
50
+
51
+ try {
52
+ const str = stableStringify(data);
53
+ return crypto.createHash('sha256').update(str).digest('hex');
54
+ } catch (e) {
55
+ return 'hash_error';
56
+ }
57
+ }
58
+
31
59
  /**
32
60
  * Executes a function with exponential backoff retry logic.
33
61
  * @param {Function} fn - Async function to execute
@@ -49,7 +77,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
49
77
  }
50
78
  }
51
79
 
52
- /** * Stage 2: Commit a batch of writes in chunks
80
+ /** Stage 2: Commit a batch of writes in chunks
53
81
  * FIXED: Now respects write.options (e.g. { merge: false }) to allow overwrites/deletes.
54
82
  */
55
83
  async function commitBatchInChunks(config, deps, writes, operationName) {
@@ -220,6 +248,7 @@ module.exports = {
220
248
  getExpectedDateStrings,
221
249
  getEarliestDataDates,
222
250
  generateCodeHash,
251
+ generateDataHash, // Exported
223
252
  withRetry,
224
253
  DEFINITIVE_EARLIEST_DATES
225
254
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.274",
3
+ "version": "1.0.276",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [