bulltrackers-module 1.0.323 → 1.0.325

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
- * PURPOSE: Sequential Cursor-Based Dispatcher with Hyper-Verbose Telemetry.
4
- * UPDATED: Sweep mode now auto-upgrades missed tasks to high-mem due to potential silent OOMs.
3
+ * PURPOSE: Sequential Cursor-Based Dispatcher.
4
+ * BEHAVIOR: Dispatch -> Wait ETA -> Next Date.
5
5
  */
6
6
 
7
7
  const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -9,138 +9,222 @@ const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.j
9
9
  const { PubSubUtils } = require('../../core/utils/pubsub_utils');
10
10
  const { fetchComputationStatus } = require('../persistence/StatusRepository');
11
11
  const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
12
- const crypto = require('crypto');
12
+ const crypto = require('crypto');
13
13
 
14
- const OOM_THRESHOLD_MB = 1500;
14
+ const OOM_THRESHOLD_MB = 1500;
15
15
  const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
16
+ const SESSION_CACHE_DURATION_MS = 1000 * 60 * 30; // 30 Minutes
16
17
 
17
- async function getHighMemReroutes(db, date, pass, tasks) {
18
- const reroutes = [];
19
- for (const task of tasks) {
20
- const name = normalizeName(task.name);
21
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
22
- const doc = await db.doc(ledgerPath).get();
18
+ // =============================================================================
19
+ // HELPER: Ledger Awareness (Prevents Race Conditions)
20
+ // =============================================================================
21
+ async function filterActiveTasks(db, date, pass, tasks) {
22
+ if (!tasks || tasks.length === 0) return [];
23
+
24
+ const checkPromises = tasks.map(async (t) => {
25
+ const taskName = normalizeName(t.name);
26
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${taskName}`;
27
+ const snap = await db.doc(ledgerPath).get();
23
28
 
24
- if (doc.exists) {
25
- const data = doc.data();
26
- const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
27
- (data.resourceTier !== 'high-mem') &&
28
- ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
29
+ if (snap.exists) {
30
+ const data = snap.data();
31
+ // Check PENDING, IN_PROGRESS, or "Ghost" (Completed < 1 min ago)
32
+ const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
33
+ const isJustFinished = data.status === 'COMPLETED' &&
34
+ data.completedAt &&
35
+ (Date.now() - new Date(data.completedAt).getTime() < 60 * 1000);
29
36
 
30
- if (isOOM) {
31
- reroutes.push({ ...task, resources: 'high-mem' });
37
+ if (isActive || isJustFinished) return null; // Filter out
38
+ }
39
+ return t;
40
+ });
41
+
42
+ const results = await Promise.all(checkPromises);
43
+ return results.filter(t => t !== null);
44
+ }
45
+
46
+ // =============================================================================
47
+ // HELPER: SimHash Stability (Solves Live Analysis Disconnect)
48
+ // =============================================================================
49
+ async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
50
+ const { db, logger } = dependencies;
51
+ const resolvedTasks = [];
52
+ const remainingTasks = [];
53
+ const simHashCache = new Map();
54
+
55
+ for (const task of tasks) {
56
+ const currentStatus = dailyStatus ? dailyStatus[task.name] : null;
57
+ const manifestItem = manifestMap.get(normalizeName(task.name));
58
+
59
+ if (currentStatus && currentStatus.simHash && manifestItem) {
60
+ let newSimHash = simHashCache.get(manifestItem.hash);
61
+ if (!newSimHash) {
62
+ const simDoc = await db.collection('system_simhash_registry').doc(manifestItem.hash).get();
63
+ if (simDoc.exists) {
64
+ newSimHash = simDoc.data().simHash;
65
+ simHashCache.set(manifestItem.hash, newSimHash);
66
+ }
67
+ }
68
+
69
+ if (newSimHash && newSimHash === currentStatus.simHash) {
70
+ resolvedTasks.push({
71
+ name: task.name,
72
+ hash: manifestItem.hash,
73
+ simHash: newSimHash,
74
+ prevStatus: currentStatus
75
+ });
76
+ continue;
32
77
  }
33
78
  }
79
+ remainingTasks.push(task);
34
80
  }
35
- return reroutes;
81
+
82
+ if (resolvedTasks.length > 0) {
83
+ const updatePayload = {};
84
+ resolvedTasks.forEach(t => {
85
+ updatePayload[t.name] = {
86
+ ...(t.prevStatus || {}),
87
+ hash: t.hash,
88
+ simHash: t.simHash,
89
+ reason: 'SimHash Stable (Auto-Resolved)',
90
+ lastUpdated: new Date().toISOString()
91
+ };
92
+ });
93
+ await db.collection('computation_status').doc(date).set(updatePayload, { merge: true });
94
+ logger.log('INFO', `[SimHash] ⏩ Fast-forwarded ${resolvedTasks.length} tasks for ${date} (Logic Unchanged).`);
95
+ }
96
+
97
+ return remainingTasks;
98
+ }
99
+
100
+ // =============================================================================
101
+ // HELPER: Stable Session Management (Solves Cursor Shifting)
102
+ // =============================================================================
103
+ async function getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild) {
104
+ const { db, logger } = dependencies;
105
+ const sessionId = `pass_${passToRun}_${dateLimitStr.replace(/-/g, '')}`;
106
+ const sessionRef = db.collection('dispatcher_sessions').doc(sessionId);
107
+
108
+ if (!forceRebuild) {
109
+ const sessionSnap = await sessionRef.get();
110
+ if (sessionSnap.exists) {
111
+ const data = sessionSnap.data();
112
+ if ((Date.now() - new Date(data.createdAt).getTime()) < SESSION_CACHE_DURATION_MS) {
113
+ // logger.log('INFO', `[Session] 📂 Loaded stable session for Pass ${passToRun}.`);
114
+ return data.dates;
115
+ }
116
+ }
117
+ }
118
+
119
+ logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${passToRun}...`);
120
+ const earliestDates = await getEarliestDataDates(config, dependencies);
121
+ const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
122
+
123
+ await sessionRef.set({ dates: allDates, createdAt: new Date().toISOString(), configHash: dateLimitStr });
124
+ return allDates;
36
125
  }
37
126
 
127
+ // =============================================================================
128
+ // MAIN DISPATCHER
129
+ // =============================================================================
38
130
  async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
39
131
  const { logger, db } = dependencies;
40
132
  const pubsubUtils = new PubSubUtils(dependencies);
41
133
 
42
- const passToRun = String(reqBody.pass || config.COMPUTATION_PASS_TO_RUN || "1");
134
+ const passToRun = String(reqBody.pass || "1");
43
135
  const targetCursorN = parseInt(reqBody.cursorIndex || 1);
44
- const dateLimitStr = reqBody.date || config.date || "2025-01-01";
136
+ const dateLimitStr = reqBody.date || "2025-01-01";
137
+ const forceRebuild = reqBody.forceRebuild === true;
45
138
 
46
139
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
47
140
  const passes = groupByPass(computationManifest);
48
141
  const calcsInThisPass = passes[passToRun] || [];
49
-
50
142
  const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
51
143
 
52
- if (!calcsInThisPass.length) {
53
- logger.log('WARN', `[Dispatcher] 🛑 No calculations found for Pass ${passToRun}.`);
54
- return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
55
- }
144
+ if (!calcsInThisPass.length) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
56
145
 
57
- const earliestDates = await getEarliestDataDates(config, dependencies);
58
- const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
59
-
60
- if (allDates.length === 0) {
61
- logger.log('ERROR', `[Dispatcher] Date range is empty.`);
146
+ // 1. Get Stable Date List
147
+ const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild);
148
+ if (!sessionDates || sessionDates.length === 0) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
149
+
150
+ // 2. Select Date
151
+ let selectedDate = null;
152
+ let selectedTasks = [];
153
+
154
+ if (targetCursorN <= sessionDates.length) {
155
+ selectedDate = sessionDates[targetCursorN - 1];
156
+ } else {
62
157
  return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
63
158
  }
64
159
 
65
- // 1. Identify all "Dirty" dates (dates that actually have work to do)
66
- const dirtyDates = [];
67
- for (const d of allDates) {
68
- const dailyStatus = await fetchComputationStatus(d, config, dependencies);
69
- const availability = await checkRootDataAvailability(d, config, dependencies, DEFINITIVE_EARLIEST_DATES);
160
+ // 3. Analyze SPECIFIC Date
161
+ if (selectedDate) {
162
+ const earliestDates = await getEarliestDataDates(config, dependencies);
163
+ const needsHistory = calcsInThisPass.some(c => c.isHistorical);
70
164
 
71
- if (!availability || !availability.status.hasPrices) continue;
72
-
73
- const report = analyzeDateExecution(d, calcsInThisPass, availability.status, dailyStatus, manifestMap, null);
74
- const tasks = [...report.runnable, ...report.reRuns];
75
-
76
- if (tasks.length > 0) {
77
- dirtyDates.push({ date: d, tasks });
165
+ let prevDailyStatusPromise = Promise.resolve(null);
166
+ if (needsHistory) {
167
+ const prevD = new Date(selectedDate + 'T00:00:00Z');
168
+ prevD.setUTCDate(prevD.getUTCDate() - 1);
169
+ if (prevD >= earliestDates.absoluteEarliest) {
170
+ prevDailyStatusPromise = fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
171
+ }
78
172
  }
79
- }
80
173
 
81
- let selectedDate = null;
82
- let selectedTasks = [];
83
- let isReroute = false;
84
- let isSweep = false;
85
-
86
- // Logic for Reroutes (Known OOM handling)
87
- if (targetCursorN > 1 && (targetCursorN - 2) < dirtyDates.length) {
88
- const prevEntry = dirtyDates[targetCursorN - 2];
89
- const reroutes = await getHighMemReroutes(db, prevEntry.date, passToRun, prevEntry.tasks);
90
- if (reroutes.length > 0) {
91
- selectedDate = prevEntry.date;
92
- selectedTasks = reroutes;
93
- isReroute = true;
94
- }
95
- }
174
+ const [dailyStatus, prevDailyStatus, availability] = await Promise.all([
175
+ fetchComputationStatus(selectedDate, config, dependencies),
176
+ prevDailyStatusPromise,
177
+ checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
178
+ ]);
179
+
180
+ if (availability && availability.status.hasPrices) {
181
+ const report = analyzeDateExecution(selectedDate, calcsInThisPass, availability.status, dailyStatus, manifestMap, prevDailyStatus);
182
+ let rawTasks = [...report.runnable, ...report.reRuns];
96
183
 
97
- // Logic for standard cursor progression or Sweep (Recovery) mode
98
- if (!selectedDate) {
99
- if (targetCursorN <= dirtyDates.length) {
100
- const entry = dirtyDates[targetCursorN - 1];
101
- selectedDate = entry.date;
102
- selectedTasks = entry.tasks;
103
- } else if (dirtyDates.length > 0) {
104
- // RECOVERY/SWEEP MODE:
105
- // The cursor has passed the number of dirty dates, but tasks still remain.
106
- // These tasks are automatically routed to 'high-mem' with recovery reasoning.
107
- isSweep = true;
108
- selectedDate = dirtyDates[0].date;
109
- selectedTasks = dirtyDates[0].tasks.map(t => ({
110
- ...t,
111
- resources: 'high-mem',
112
- reason: `${t.reason || 'Missed Computation'} [RECOVERY_AUTO_UPGRADE: Potential Silent OOM]`
113
- }));
184
+ if (rawTasks.length > 0) {
185
+ rawTasks = await attemptSimHashResolution(dependencies, selectedDate, rawTasks, dailyStatus, manifestMap);
186
+
187
+ // Ledger Filter: Removes tasks that are already running
188
+ selectedTasks = await filterActiveTasks(db, selectedDate, passToRun, rawTasks);
189
+ }
190
+
191
+ // OOM / High-Mem Reroute Check
192
+ if (selectedTasks.length > 0) {
193
+ const reroutes = await getHighMemReroutes(db, selectedDate, passToRun, selectedTasks);
194
+ if (reroutes.length > 0) {
195
+ selectedTasks = reroutes;
196
+ }
197
+ }
114
198
  }
115
199
  }
116
200
 
117
- if (!selectedDate) {
118
- logger.log('INFO', `[Dispatcher] 🏁 Pass ${passToRun} is fully satiated. No work remaining.`);
119
- return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0, etaSeconds: 0 };
201
+ // 4. Dispatch Logic
202
+ if (selectedTasks.length === 0) {
203
+ // Return 0 dispatched, FALSE cursor ignored -> Move to NEXT date immediately.
204
+ return {
205
+ status: 'CONTINUE_PASS',
206
+ dateProcessed: selectedDate,
207
+ dispatched: 0,
208
+ n_cursor_ignored: false,
209
+ etaSeconds: 0,
210
+ remainingDates: sessionDates.length - targetCursorN
211
+ };
120
212
  }
121
213
 
122
- const totalweight = selectedTasks.reduce((sum, t) => {
123
- const weight = manifestWeightMap.get(normalizeName(t.name)) || 1.0;
124
- return sum + weight;
125
- }, 0);
126
-
127
- // 2. Prepare Payload and Telemetry
214
+ // 5. Send Tasks
215
+ const totalweight = selectedTasks.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
128
216
  const currentDispatchId = crypto.randomUUID();
129
217
  const etaSeconds = Math.max(20, Math.ceil(totalweight * BASE_SECONDS_PER_WEIGHT_UNIT));
130
- const remainingDatesCount = Math.max(0, dirtyDates.length - targetCursorN);
131
218
 
132
- // [UPDATED] Capture both name and reason for transparency
133
219
  const taskDetails = selectedTasks.map(t => `${t.name} (${t.reason})`);
134
-
135
- logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}. ETA: ${etaSeconds}s. [Mode: ${isSweep ? 'RECOVERY' : 'NORMAL'}]`, {
136
- date: selectedDate,
137
- pass: passToRun,
138
- dispatchedCount: selectedTasks.length,
139
- remainingCursorDates: remainingDatesCount,
140
- totalweight: totalweight,
141
- etaSeconds: etaSeconds,
142
- dispatchId: currentDispatchId,
143
- tasks: taskDetails // [UPDATED] Now logs "calc-name (Reason)"
220
+ logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`, {
221
+ date: selectedDate,
222
+ pass: passToRun,
223
+ dispatchedCount: selectedTasks.length,
224
+ cursor: targetCursorN,
225
+ etaSeconds: etaSeconds,
226
+ dispatchId: currentDispatchId,
227
+ tasks: taskDetails
144
228
  });
145
229
 
146
230
  const mapToTaskPayload = (t) => ({
@@ -150,7 +234,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
150
234
  date: selectedDate,
151
235
  pass: passToRun,
152
236
  dispatchId: currentDispatchId,
153
- triggerReason: t.reason, // Already passed to worker
237
+ triggerReason: t.reason,
154
238
  resources: t.resources || 'standard'
155
239
  });
156
240
 
@@ -161,27 +245,50 @@ async function dispatchComputationPass(config, dependencies, computationManifest
161
245
  if (standardTasks.length > 0) {
162
246
  pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
163
247
  topicName: config.computationTopicStandard || 'computation-tasks',
164
- tasks : standardTasks,
165
- taskType : `pass-${passToRun}-std`
248
+ tasks: standardTasks,
249
+ taskType: `pass-${passToRun}-std`
166
250
  }));
167
251
  }
168
252
  if (highMemTasks.length > 0) {
169
253
  pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
170
254
  topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
171
- tasks : highMemTasks,
172
- taskType : `pass-${passToRun}-high`
255
+ tasks: highMemTasks,
256
+ taskType: `pass-${passToRun}-high`
173
257
  }));
174
258
  }
175
259
  await Promise.all(pubPromises);
176
260
 
261
+ // CRITICAL: We dispatched work.
262
+ // We return n_cursor_ignored: FALSE.
263
+ // This tells the workflow to Wait ETA -> Increment Cursor -> Move to Next Date.
177
264
  return {
178
- status : isSweep ? 'RECOVERY' : 'CONTINUE_PASS',
179
- dateProcessed : selectedDate,
180
- dispatched : selectedTasks.length,
181
- n_cursor_ignored: isReroute,
182
- etaSeconds : etaSeconds,
183
- remainingDates : remainingDatesCount
265
+ status: 'CONTINUE_PASS',
266
+ dateProcessed: selectedDate,
267
+ dispatched: selectedTasks.length,
268
+ n_cursor_ignored: false, // FORCE NEXT DATE
269
+ etaSeconds: etaSeconds,
270
+ remainingDates: sessionDates.length - targetCursorN
184
271
  };
185
272
  }
186
273
 
274
+ async function getHighMemReroutes(db, date, pass, tasks) {
275
+ const reroutes = [];
276
+ for (const task of tasks) {
277
+ const name = normalizeName(task.name);
278
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
279
+ const doc = await db.doc(ledgerPath).get();
280
+
281
+ if (doc.exists) {
282
+ const data = doc.data();
283
+ const isOOM = (data.status === 'FAILED' || data.status === 'CRASH') &&
284
+ (data.resourceTier !== 'high-mem') &&
285
+ ((data.peakMemoryMB > OOM_THRESHOLD_MB) || (data.error && /memory/i.test(data.error.message)));
286
+ if (isOOM) {
287
+ reroutes.push({ ...task, resources: 'high-mem' });
288
+ }
289
+ }
290
+ }
291
+ return reroutes;
292
+ }
293
+
187
294
  module.exports = { dispatchComputationPass };
@@ -1,5 +1,5 @@
1
1
  # Cloud Workflows: Precision Cursor-Based Orchestrator
2
- # UPDATED: Added satiation detection to break early on 0 remaining dates.
2
+ # SIMPLE MODE: Dispatch -> Wait ETA -> Next Date
3
3
 
4
4
  main:
5
5
  params: [input]
@@ -20,7 +20,6 @@ main:
20
20
  assign:
21
21
  - n_cursor: 1
22
22
  - pass_complete: false
23
- - consecutive_empty_dispatches: 0
24
23
 
25
24
  - sequential_date_loop:
26
25
  switch:
@@ -39,26 +38,25 @@ main:
39
38
 
40
39
  - evaluate_dispatch:
41
40
  switch:
41
+ # 1. End of Session (Dispatcher reached end of date list)
42
42
  - condition: '${dispatch_res.body.status == "MOVE_TO_NEXT_PASS"}'
43
43
  assign:
44
44
  - pass_complete: true
45
45
 
46
- # NEW: Explicit Satiation Check
47
- - condition: '${dispatch_res.body.status == "CONTINUE_PASS" and dispatch_res.body.remainingDates == 0}'
46
+ # 2. Satiation Check (Specific to date/logic)
47
+ - condition: '${dispatch_res.body.status == "CONTINUE_PASS" and dispatch_res.body.remainingDates == 0 and dispatch_res.body.dispatched == 0}'
48
48
  steps:
49
49
  - log_satiation:
50
50
  call: sys.log
51
51
  args:
52
- text: '${"Pass " + pass_id + " - ✅ Pass satiated (0 remaining dates). Moving to next pass."}'
52
+ text: '${"Pass " + pass_id + " - ✅ Pass satiated (0 remaining). Next pass."}'
53
53
  - mark_complete:
54
54
  assign:
55
55
  - pass_complete: true
56
56
 
57
+ # 3. Work Dispatched: Wait ETA -> Move Next (Ignored flag is FALSE)
57
58
  - condition: '${dispatch_res.body.dispatched > 0}'
58
59
  steps:
59
- - reset_retry_counter:
60
- assign:
61
- - consecutive_empty_dispatches: 0
62
60
  - wait_for_completion:
63
61
  call: sys.sleep
64
62
  args:
@@ -69,26 +67,18 @@ main:
69
67
  - next_loop_work:
70
68
  next: sequential_date_loop
71
69
 
70
+ # 4. No Work (Clean or Busy): Move Next Immediately
72
71
  - condition: '${dispatch_res.body.dispatched == 0}'
73
72
  steps:
74
- - increment_retry:
73
+ - wait_short:
74
+ call: sys.sleep
75
+ args:
76
+ seconds: 2 # Tiny debounce
77
+ - update_cursor_retry:
75
78
  assign:
76
- - consecutive_empty_dispatches: '${consecutive_empty_dispatches + 1}'
77
- - check_break_condition:
78
- switch:
79
- - condition: '${consecutive_empty_dispatches >= 3}'
80
- assign:
81
- - pass_complete: true
82
- - condition: '${true}'
83
- steps:
84
- - wait_short:
85
- call: sys.sleep
86
- args:
87
- seconds: 5
88
- - update_cursor_retry:
89
- assign:
90
- - n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
91
- - next_loop_retry:
92
- next: sequential_date_loop
79
+ # Dispatcher sends n_cursor_ignored=false, so we increment.
80
+ - n_cursor: '${if(dispatch_res.body.n_cursor_ignored, n_cursor, n_cursor + 1)}'
81
+ - next_loop_retry:
82
+ next: sequential_date_loop
93
83
  - finish:
94
84
  return: "Pipeline Execution Satiated and Complete"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.323",
3
+ "version": "1.0.325",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [