bulltrackers-module 1.0.322 → 1.0.324

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,7 +121,8 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
121
121
  }
122
122
 
123
123
  if (missingDeps.length > 0) {
124
- const isImpossible = missingDeps.some(d => simulationStatus[normalizeName(d)]?.hash?.startsWith(STATUS_IMPOSSIBLE_PREFIX));
124
+ // Cast to string to solve some weird null bug
125
+ const isImpossible = missingDeps.some(d => String(simulationStatus[normalizeName(d)]?.hash).startsWith(STATUS_IMPOSSIBLE_PREFIX));
125
126
  if (isImpossible) {
126
127
  report.impossible.push({ name: cName, reason: 'Upstream Impossible' });
127
128
  simulationStatus[cName] = { hash: `${STATUS_IMPOSSIBLE_PREFIX}:UPSTREAM` };
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
- * PURPOSE: Sequential Cursor-Based Dispatcher with Hyper-Verbose Telemetry.
4
- * UPDATED: Sweep mode now auto-upgrades missed tasks to high-mem due to potential silent OOMs.
3
+ * PURPOSE: Sequential Cursor-Based Dispatcher with Ledger Awareness, SimHash Stability, and Session Caching.
4
+ * UPDATED: Fixed Ledger Blindness, Cursor Shifting, and Live Analysis Disconnect.
5
5
  */
6
6
 
7
7
  const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -9,138 +9,269 @@ const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.j
9
9
  const { PubSubUtils } = require('../../core/utils/pubsub_utils');
10
10
  const { fetchComputationStatus } = require('../persistence/StatusRepository');
11
11
  const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
12
- const crypto = require('crypto');
12
+ const crypto = require('crypto');
13
13
 
14
- const OOM_THRESHOLD_MB = 1500;
14
+ const OOM_THRESHOLD_MB = 1500;
15
15
  const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
16
+ const SESSION_CACHE_DURATION_MS = 1000 * 60 * 30; // 30 Minutes
16
17
 
17
- async function getHighMemReroutes(db, date, pass, tasks) {
18
- const reroutes = [];
19
- for (const task of tasks) {
20
- const name = normalizeName(task.name);
21
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
22
- const doc = await db.doc(ledgerPath).get();
18
+ // =============================================================================
19
+ // HELPER: Ledger Awareness (Prevents Race Conditions)
20
+ // =============================================================================
21
+ async function filterActiveTasks(db, date, pass, tasks) {
22
+ if (!tasks || tasks.length === 0) return [];
23
+
24
+ const checkPromises = tasks.map(async (t) => {
25
+ const taskName = normalizeName(t.name);
26
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${taskName}`;
27
+ const snap = await db.doc(ledgerPath).get();
23
28
 
24
- if (doc.exists) {
25
- const data = doc.data();
26
- const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
27
- (data.resourceTier !== 'high-mem') &&
28
- ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
29
+ if (snap.exists) {
30
+ const data = snap.data();
31
+ // Check PENDING, IN_PROGRESS, or "Ghost" (Completed < 1 min ago)
32
+ const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
33
+ const isJustFinished = data.status === 'COMPLETED' &&
34
+ data.completedAt &&
35
+ (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
29
36
 
30
- if (isOOM) {
31
- reroutes.push({ ...task, resources: 'high-mem' });
37
+ if (isActive || isJustFinished) return null;
38
+ }
39
+ return t;
40
+ });
41
+
42
+ const results = await Promise.all(checkPromises);
43
+ return results.filter(t => t !== null);
44
+ }
45
+
46
+ // =============================================================================
47
+ // HELPER: SimHash Stability (Solves Live Analysis Disconnect)
48
+ // =============================================================================
49
+ async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
50
+ const { db, logger } = dependencies;
51
+ const resolvedTasks = [];
52
+ const remainingTasks = [];
53
+
54
+ // Cache for SimHashes to avoid redundant DB lookups in loop
55
+ const simHashCache = new Map();
56
+
57
+ for (const task of tasks) {
58
+ // Only apply to Re-Runs (Hash Mismatches), not fresh runs (Missing Data)
59
+ const currentStatus = dailyStatus ? dailyStatus[task.name] : null;
60
+ const manifestItem = manifestMap.get(normalizeName(task.name));
61
+
62
+ if (currentStatus && currentStatus.simHash && manifestItem) {
63
+
64
+ // 1. Get the SimHash for the NEW code (from Registry)
65
+ let newSimHash = simHashCache.get(manifestItem.hash);
66
+ if (!newSimHash) {
67
+ const simDoc = await db.collection('system_simhash_registry').doc(manifestItem.hash).get();
68
+ if (simDoc.exists) {
69
+ newSimHash = simDoc.data().simHash;
70
+ simHashCache.set(manifestItem.hash, newSimHash);
71
+ }
72
+ }
73
+
74
+ // 2. Compare
75
+ if (newSimHash && newSimHash === currentStatus.simHash) {
76
+ resolvedTasks.push({
77
+ name: task.name,
78
+ hash: manifestItem.hash,
79
+ simHash: newSimHash,
80
+ prevStatus: currentStatus // Pass previous status to preserve other fields
81
+ });
82
+ continue;
32
83
  }
33
84
  }
85
+ remainingTasks.push(task);
34
86
  }
35
- return reroutes;
87
+
88
+ // 3. Apply Updates for Stable Tasks
89
+ if (resolvedTasks.length > 0) {
90
+ const updatePayload = {};
91
+
92
+ resolvedTasks.forEach(t => {
93
+ // [FIXED] Construct full nested object to avoid dot-notation issues with .set()
94
+ // We merge existing data (like resultHash) so we don't lose the valid calculation output
95
+ updatePayload[t.name] = {
96
+ ...(t.prevStatus || {}), // Keep existing resultHash, output, etc.
97
+ hash: t.hash, // Update to new code hash
98
+ simHash: t.simHash, // Confirmed stable simHash
99
+ reason: 'SimHash Stable (Auto-Resolved)',
100
+ lastUpdated: new Date().toISOString()
101
+ };
102
+ });
103
+
104
+ // Use set with merge: true. Now that keys are "clean" (no dots),
105
+ // objects will merge correctly into the document structure.
106
+ await db.collection('computation_status').doc(date).set(updatePayload, { merge: true });
107
+ logger.log('INFO', `[SimHash] ⏩ Fast-forwarded ${resolvedTasks.length} tasks for ${date} (Logic Unchanged).`);
108
+ }
109
+
110
+ return remainingTasks;
111
+ }
112
+ // =============================================================================
113
+ // HELPER: Stable Session Management (Solves Cursor Shifting)
114
+ // =============================================================================
115
+ async function getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild) {
116
+ const { db, logger } = dependencies;
117
+ const sessionId = `pass_${passToRun}_${dateLimitStr.replace(/-/g, '')}`;
118
+ const sessionRef = db.collection('dispatcher_sessions').doc(sessionId);
119
+
120
+ // 1. Try to Load Session
121
+ if (!forceRebuild) {
122
+ const sessionSnap = await sessionRef.get();
123
+ if (sessionSnap.exists) {
124
+ const data = sessionSnap.data();
125
+ const age = Date.now() - new Date(data.createdAt).getTime();
126
+ if (age < SESSION_CACHE_DURATION_MS) {
127
+ logger.log('INFO', `[Session] 📂 Loaded stable session for Pass ${passToRun} (${data.dates.length} dates).`);
128
+ return data.dates;
129
+ }
130
+ }
131
+ }
132
+
133
+ // 2. Rebuild Session (Expensive Scan)
134
+ logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${passToRun}...`);
135
+ const earliestDates = await getEarliestDataDates(config, dependencies);
136
+ const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
137
+
138
+ // We only want dates that *might* be dirty.
139
+ // Optimization: We add ALL dates to the list. The dispatcher checks them individually.
140
+ // Why? Because if we pre-filter here, we repeat the work of the dispatcher.
141
+ // Better: Store the plain list of dates sorted descending (newest first usually better for backfills, ascending for standard).
142
+ // Let's stick to Ascending (oldest first) as standard.
143
+
144
+ await sessionRef.set({
145
+ dates: allDates,
146
+ createdAt: new Date().toISOString(),
147
+ configHash: dateLimitStr // Simple versioning
148
+ });
149
+
150
+ return allDates;
36
151
  }
37
152
 
153
+ // =============================================================================
154
+ // MAIN DISPATCHER
155
+ // =============================================================================
38
156
  async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
39
157
  const { logger, db } = dependencies;
40
158
  const pubsubUtils = new PubSubUtils(dependencies);
41
159
 
42
160
  const passToRun = String(reqBody.pass || config.COMPUTATION_PASS_TO_RUN || "1");
43
161
  const targetCursorN = parseInt(reqBody.cursorIndex || 1);
44
- const dateLimitStr = reqBody.date || config.date || "2025-01-01";
162
+ const dateLimitStr = reqBody.date || config.date || "2025-01-01";
163
+ const forceRebuild = reqBody.forceRebuild === true;
45
164
 
46
165
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
47
166
  const passes = groupByPass(computationManifest);
48
167
  const calcsInThisPass = passes[passToRun] || [];
49
-
50
168
  const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
51
169
 
52
170
  if (!calcsInThisPass.length) {
53
- logger.log('WARN', `[Dispatcher] 🛑 No calculations found for Pass ${passToRun}.`);
54
171
  return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
55
172
  }
56
173
 
57
- const earliestDates = await getEarliestDataDates(config, dependencies);
58
- const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
174
+ // 1. Get Stable Date List (Solves Shifting Cursor)
175
+ const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild);
59
176
 
60
- if (allDates.length === 0) {
61
- logger.log('ERROR', `[Dispatcher] ❌ Date range is empty.`);
177
+ if (!sessionDates || sessionDates.length === 0) {
62
178
  return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
63
179
  }
64
180
 
65
- // 1. Identify all "Dirty" dates (dates that actually have work to do)
66
- const dirtyDates = [];
67
- for (const d of allDates) {
68
- const dailyStatus = await fetchComputationStatus(d, config, dependencies);
69
- const availability = await checkRootDataAvailability(d, config, dependencies, DEFINITIVE_EARLIEST_DATES);
70
-
71
- if (!availability || !availability.status.hasPrices) continue;
181
+ // 2. Select Date based on Cursor
182
+ let selectedDate = null;
183
+ let selectedTasks = [];
184
+ let isReroute = false;
185
+ let isSweep = false;
72
186
 
73
- const report = analyzeDateExecution(d, calcsInThisPass, availability.status, dailyStatus, manifestMap, null);
74
- const tasks = [...report.runnable, ...report.reRuns];
75
-
76
- if (tasks.length > 0) {
77
- dirtyDates.push({ date: d, tasks });
78
- }
187
+ // Check bounds
188
+ if (targetCursorN <= sessionDates.length) {
189
+ // Normal Operation
190
+ selectedDate = sessionDates[targetCursorN - 1];
191
+ } else {
192
+ // End of list
193
+ return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
79
194
  }
80
195
 
81
- let selectedDate = null;
82
- let selectedTasks = [];
83
- let isReroute = false;
84
- let isSweep = false;
85
-
86
- // Logic for Reroutes (Known OOM handling)
87
- if (targetCursorN > 1 && (targetCursorN - 2) < dirtyDates.length) {
88
- const prevEntry = dirtyDates[targetCursorN - 2];
89
- const reroutes = await getHighMemReroutes(db, prevEntry.date, passToRun, prevEntry.tasks);
90
- if (reroutes.length > 0) {
91
- selectedDate = prevEntry.date;
92
- selectedTasks = reroutes;
93
- isReroute = true;
196
+ // 3. Analyze SPECIFIC Date (Live Analysis)
197
+ // We only fetch status for the ONE date we are looking at + context
198
+ if (selectedDate) {
199
+ // A. Fetch Context
200
+ const needsHistory = calcsInThisPass.some(c => c.isHistorical);
201
+ const earliestDates = await getEarliestDataDates(config, dependencies);
202
+
203
+ let prevDailyStatusPromise = Promise.resolve(null);
204
+ if (needsHistory) {
205
+ const prevD = new Date(selectedDate + 'T00:00:00Z');
206
+ prevD.setUTCDate(prevD.getUTCDate() - 1);
207
+ if (prevD >= earliestDates.absoluteEarliest) {
208
+ prevDailyStatusPromise = fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
209
+ }
94
210
  }
95
- }
96
211
 
97
- // Logic for standard cursor progression or Sweep (Recovery) mode
98
- if (!selectedDate) {
99
- if (targetCursorN <= dirtyDates.length) {
100
- const entry = dirtyDates[targetCursorN - 1];
101
- selectedDate = entry.date;
102
- selectedTasks = entry.tasks;
103
- } else if (dirtyDates.length > 0) {
104
- // RECOVERY/SWEEP MODE:
105
- // The cursor has passed the number of dirty dates, but tasks still remain.
106
- // These tasks are automatically routed to 'high-mem' with recovery reasoning.
107
- isSweep = true;
108
- selectedDate = dirtyDates[0].date;
109
- selectedTasks = dirtyDates[0].tasks.map(t => ({
110
- ...t,
111
- resources: 'high-mem',
112
- reason: `${t.reason || 'Missed Computation'} [RECOVERY_AUTO_UPGRADE: Potential Silent OOM]`
113
- }));
212
+ const [dailyStatus, prevDailyStatus, availability] = await Promise.all([
213
+ fetchComputationStatus(selectedDate, config, dependencies),
214
+ prevDailyStatusPromise,
215
+ checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
216
+ ]);
217
+
218
+ if (availability && availability.status.hasPrices) {
219
+ const report = analyzeDateExecution(selectedDate, calcsInThisPass, availability.status, dailyStatus, manifestMap, prevDailyStatus);
220
+ let rawTasks = [...report.runnable, ...report.reRuns];
221
+
222
+ // B. Apply SimHash Resolution (Problem #1)
223
+ if (rawTasks.length > 0) {
224
+ rawTasks = await attemptSimHashResolution(dependencies, selectedDate, rawTasks, dailyStatus, manifestMap);
225
+ }
226
+
227
+ // C. Apply Ledger Filter (Problem #2)
228
+ if (rawTasks.length > 0) {
229
+ selectedTasks = await filterActiveTasks(db, selectedDate, passToRun, rawTasks);
230
+ }
231
+
232
+ // D. Check for High-Mem Reroutes (OOM handling)
233
+ if (selectedTasks.length > 0) {
234
+ const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
235
+ if (reroutes.length > 0) {
236
+ selectedTasks = reroutes;
237
+ isReroute = true;
238
+ }
239
+ }
240
+ } else {
241
+ logger.log('WARN', `[Dispatcher] Date ${selectedDate} skipped (Data Unavailable).`);
114
242
  }
115
243
  }
116
244
 
117
- if (!selectedDate) {
118
- logger.log('INFO', `[Dispatcher] 🏁 Pass ${passToRun} is fully satiated. No work remaining.`);
119
- return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0, etaSeconds: 0 };
245
+ // 4. Dispatch Logic
246
+ if (selectedTasks.length === 0) {
247
+ // Nothing to do for this date.
248
+ // CRITICAL: We return dispatched: 0, but n_cursor_ignored: FALSE.
249
+ // This tells workflow to increment cursor and check the next date in the Stable Session.
250
+ return {
251
+ status: 'CONTINUE_PASS',
252
+ dateProcessed: selectedDate,
253
+ dispatched: 0,
254
+ n_cursor_ignored: false, // Proceed to next date
255
+ etaSeconds: 0,
256
+ remainingDates: sessionDates.length - targetCursorN
257
+ };
120
258
  }
121
259
 
122
- const totalweight = selectedTasks.reduce((sum, t) => {
123
- const weight = manifestWeightMap.get(normalizeName(t.name)) || 1.0;
124
- return sum + weight;
125
- }, 0);
126
-
127
- // 2. Prepare Payload and Telemetry
260
+ // 5. Publish Tasks
261
+ const totalweight = selectedTasks.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
128
262
  const currentDispatchId = crypto.randomUUID();
129
263
  const etaSeconds = Math.max(20, Math.ceil(totalweight * BASE_SECONDS_PER_WEIGHT_UNIT));
130
- const remainingDatesCount = Math.max(0, dirtyDates.length - targetCursorN);
131
264
 
132
- // [UPDATED] Capture both name and reason for transparency
133
265
  const taskDetails = selectedTasks.map(t => `${t.name} (${t.reason})`);
134
266
 
135
- logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}. ETA: ${etaSeconds}s. [Mode: ${isSweep ? 'RECOVERY' : 'NORMAL'}]`, {
136
- date: selectedDate,
137
- pass: passToRun,
138
- dispatchedCount: selectedTasks.length,
139
- remainingCursorDates: remainingDatesCount,
140
- totalweight: totalweight,
141
- etaSeconds: etaSeconds,
142
- dispatchId: currentDispatchId,
143
- tasks: taskDetails // [UPDATED] Now logs "calc-name (Reason)"
267
+ logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`, {
268
+ date: selectedDate,
269
+ pass: passToRun,
270
+ dispatchedCount: selectedTasks.length,
271
+ cursor: targetCursorN,
272
+ etaSeconds: etaSeconds,
273
+ dispatchId: currentDispatchId,
274
+ tasks: taskDetails
144
275
  });
145
276
 
146
277
  const mapToTaskPayload = (t) => ({
@@ -150,7 +281,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
150
281
  date: selectedDate,
151
282
  pass: passToRun,
152
283
  dispatchId: currentDispatchId,
153
- triggerReason: t.reason, // Already passed to worker
284
+ triggerReason: t.reason,
154
285
  resources: t.resources || 'standard'
155
286
  });
156
287
 
@@ -161,27 +292,49 @@ async function dispatchComputationPass(config, dependencies, computationManifest
161
292
  if (standardTasks.length > 0) {
162
293
  pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
163
294
  topicName: config.computationTopicStandard || 'computation-tasks',
164
- tasks : standardTasks,
165
- taskType : `pass-${passToRun}-std`
295
+ tasks: standardTasks,
296
+ taskType: `pass-${passToRun}-std`
166
297
  }));
167
298
  }
168
299
  if (highMemTasks.length > 0) {
169
300
  pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
170
301
  topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
171
- tasks : highMemTasks,
172
- taskType : `pass-${passToRun}-high`
302
+ tasks: highMemTasks,
303
+ taskType: `pass-${passToRun}-high`
173
304
  }));
174
305
  }
175
306
  await Promise.all(pubPromises);
176
307
 
308
+ // CRITICAL: We dispatched work. We want to check THIS date again next time
309
+ // to ensure tasks completed. So we IGNORE cursor increment.
177
310
  return {
178
- status : isSweep ? 'RECOVERY' : 'CONTINUE_PASS',
179
- dateProcessed : selectedDate,
180
- dispatched : selectedTasks.length,
181
- n_cursor_ignored: isReroute,
182
- etaSeconds : etaSeconds,
183
- remainingDates : remainingDatesCount
311
+ status: 'CONTINUE_PASS',
312
+ dateProcessed: selectedDate,
313
+ dispatched: selectedTasks.length,
314
+ n_cursor_ignored: true, // Hold cursor until this date is clean
315
+ etaSeconds: etaSeconds,
316
+ remainingDates: sessionDates.length - targetCursorN
184
317
  };
185
318
  }
186
319
 
320
+ async function getHighMemReroutes(db, date, pass, tasks) {
321
+ const reroutes = [];
322
+ for (const task of tasks) {
323
+ const name = normalizeName(task.name);
324
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
325
+ const doc = await db.doc(ledgerPath).get();
326
+
327
+ if (doc.exists) {
328
+ const data = doc.data();
329
+ const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
330
+ (data.resourceTier !== 'high-mem') &&
331
+ ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
332
+ if (isOOM) {
333
+ reroutes.push({ ...task, resources: 'high-mem' });
334
+ }
335
+ }
336
+ }
337
+ return reroutes;
338
+ }
339
+
187
340
  module.exports = { dispatchComputationPass };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.322",
3
+ "version": "1.0.324",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [