bulltrackers-module 1.0.657 → 1.0.659

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/functions/api-v2/routes/popular_investors.js +80 -0
  2. package/functions/computation-system/data/AvailabilityChecker.js +163 -317
  3. package/functions/computation-system/data/CachedDataLoader.js +158 -222
  4. package/functions/computation-system/data/DependencyFetcher.js +201 -406
  5. package/functions/computation-system/executors/MetaExecutor.js +176 -280
  6. package/functions/computation-system/executors/StandardExecutor.js +325 -383
  7. package/functions/computation-system/helpers/computation_dispatcher.js +294 -699
  8. package/functions/computation-system/helpers/computation_worker.js +3 -2
  9. package/functions/computation-system/legacy/AvailabilityCheckerOld.js +382 -0
  10. package/functions/computation-system/legacy/CachedDataLoaderOld.js +357 -0
  11. package/functions/computation-system/legacy/DependencyFetcherOld.js +478 -0
  12. package/functions/computation-system/legacy/MetaExecutorold.js +364 -0
  13. package/functions/computation-system/legacy/StandardExecutorold.js +476 -0
  14. package/functions/computation-system/legacy/computation_dispatcherold.js +944 -0
  15. package/functions/computation-system/persistence/ResultCommitter.js +137 -188
  16. package/functions/computation-system/services/SnapshotService.js +129 -0
  17. package/functions/computation-system/tools/BuildReporter.js +12 -7
  18. package/functions/computation-system/utils/data_loader.js +213 -238
  19. package/package.json +3 -2
  20. package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +0 -163
  21. package/functions/computation-system/workflows/data_feeder_pipeline.yaml +0 -115
  22. package/functions/computation-system/workflows/datafeederpipelineinstructions.md +0 -30
  23. package/functions/computation-system/workflows/morning_prep_pipeline.yaml +0 -55
@@ -1,10 +1,8 @@
1
1
  /**
2
+ * V2 TO REPLACE THE OLD.
2
3
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
- * PURPOSE: Sequential Cursor-Based Dispatcher.
4
- * UPDATED: Implemented "Fast-Forward" Scanning Loop to skip empty dates efficiently.
5
- * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
6
- * UPDATED: Generates Google Cloud Trace Context (traceId/spanId) for end-to-end monitoring.
7
- * UPDATED: Added Schedule Awareness (Daily, Weekly, Monthly) to filter tasks by date.
4
+ * PURPOSE: Sequential Cursor-Based Dispatcher (Refactored & Condensed).
5
+ * UPDATED: Added SNAPSHOT handling.
8
6
  */
9
7
 
10
8
  const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -12,272 +10,181 @@ const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.j
12
10
  const { PubSubUtils } = require('../../core/utils/pubsub_utils');
13
11
  const { fetchComputationStatus } = require('../persistence/StatusRepository');
14
12
  const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
15
- const { runFinalSweepCheck } = require('../tools/FinalSweepReporter'); // [NEW]
13
+ const { runFinalSweepCheck } = require('../tools/FinalSweepReporter');
14
+ // 1. IMPORT SNAPSHOT SERVICE
15
+ const { generateDailySnapshots } = require('../services/SnapshotService');
16
16
  const crypto = require('crypto');
17
17
 
18
- const OOM_THRESHOLD_MB = 1500; // Unused
19
18
  const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
20
19
  const SESSION_CACHE_DURATION_MS = 1000 * 60 * 30; // 30 Minutes
21
20
  const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
22
21
 
23
- // =============================================================================
24
- // HELPER: Firestore Timestamp Conversion
25
- // =============================================================================
26
- /**
27
- * Converts a Firestore Timestamp or Date to milliseconds.
28
- * Firestore stores Date objects as Timestamp objects, which have a .toDate() method.
29
- * This function handles both cases correctly.
30
- * @param {any} field - Firestore Timestamp, Date object, or string
31
- * @returns {number} Milliseconds since epoch, or 0 if invalid
32
- */
22
+ // ... [SHARED UTILS and SHARED ASYNC HELPERS remain exactly the same] ...
23
+
33
24
  function getMillis(field) {
34
25
  if (!field) return 0;
35
- // Handle Firestore Timestamp (has .toDate() method)
36
- if (field.toDate && typeof field.toDate === 'function') {
37
- return field.toDate().getTime();
38
- }
39
- // Handle standard Date object or string
26
+ if (field.toDate && typeof field.toDate === 'function') return field.toDate().getTime();
40
27
  const date = new Date(field);
41
28
  return isNaN(date.getTime()) ? 0 : date.getTime();
42
- }
29
+ }
43
30
 
44
- // =============================================================================
45
- // HELPER: Schedule Logic
46
- // =============================================================================
47
- /**
48
- * Checks if a computation is scheduled to run on a specific date.
49
- * Defaults to DAILY if no schedule is present.
50
- * * @param {string} dateStr - YYYY-MM-DD string
51
- * @param {Object} scheduleConfig - { type: 'DAILY'|'WEEKLY'|'MONTHLY', days: [] }
52
- * @returns {boolean} True if the computation should run
53
- */
54
31
  function isComputationScheduled(dateStr, scheduleConfig) {
55
- // Default: Run every day if no schedule is provided or explicitly DAILY
56
- if (!scheduleConfig || !scheduleConfig.type || scheduleConfig.type === 'DAILY') {
57
- return true;
58
- }
59
-
60
- const date = new Date(dateStr + 'T00:00:00Z'); // Ensure UTC parsing
32
+ if (!scheduleConfig || !scheduleConfig.type || scheduleConfig.type === 'DAILY') return true;
33
+ const date = new Date(dateStr + 'T00:00:00Z');
61
34
 
62
- // Weekly Schedule: Check Day of Week (0=Sun, 1=Mon, ..., 6=Sat)
63
35
  if (scheduleConfig.type === 'WEEKLY') {
64
- const dayOfWeek = date.getUTCDay();
65
- const validDays = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
66
- return validDays.includes(dayOfWeek);
36
+ const days = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
37
+ return days.includes(date.getUTCDay());
67
38
  }
68
-
69
- // Monthly Schedule: Check Day of Month (1-31)
70
39
  if (scheduleConfig.type === 'MONTHLY') {
71
- const dayOfMonth = date.getUTCDate();
72
- const validDates = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
73
- return validDates.includes(dayOfMonth);
40
+ const days = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
41
+ return days.includes(date.getUTCDate());
74
42
  }
75
-
76
- // Fallback default
77
43
  return true;
78
44
  }
79
45
 
80
- // =============================================================================
81
- // HELPER: Ledger Awareness
82
- // =============================================================================
83
- async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
84
- if (!tasks || tasks.length === 0) return [];
85
- if (forceRun) return tasks;
86
-
87
- const checkPromises = tasks.map(async (t) => {
88
- const taskName = normalizeName(t.name);
89
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${taskName}`;
90
- const snap = await db.doc(ledgerPath).get();
91
-
92
- if (snap.exists) {
93
- const data = snap.data();
94
- const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
95
-
96
- if (isActive) {
97
- const lastActivityTime = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
98
-
99
- if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
100
- if (logger) logger.log('WARN', `[Dispatcher] 🧟 Breaking stale lock for ${taskName}.`);
101
- return t;
102
- }
103
- return null;
104
- }
105
- // Note: We do NOT filter COMPLETED here anymore for Sweep.
106
- // If the Orchestrator says it needs to run, we run it.
46
+ function createTaskPayload(task, date, pass, dispatchId, resources, triggerReason) {
47
+ return {
48
+ action: 'RUN_COMPUTATION_DATE',
49
+ computation: task.name || task.computation,
50
+ date: date,
51
+ pass: pass,
52
+ dispatchId: dispatchId,
53
+ triggerReason: triggerReason || task.reason || 'DISPATCH',
54
+ resources: resources || task.resources || 'standard',
55
+ hash: task.hash,
56
+ traceContext: {
57
+ traceId: crypto.randomBytes(16).toString('hex'),
58
+ spanId: crypto.randomBytes(8).toString('hex'),
59
+ sampled: true
107
60
  }
108
- return t;
109
- });
110
- const results = await Promise.all(checkPromises);
111
- return results.filter(t => t !== null);
61
+ };
112
62
  }
113
63
 
114
- // =============================================================================
115
- // HELPER: SimHash Stability
116
- // =============================================================================
117
- async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
118
- const { db, logger } = dependencies;
119
- const resolvedTasks = [];
120
- const remainingTasks = [];
121
- const simHashCache = new Map();
122
-
123
- for (const task of tasks) {
124
- const currentStatus = dailyStatus ? dailyStatus[task.name] : null;
125
- const manifestItem = manifestMap.get(normalizeName(task.name));
126
-
127
- if (currentStatus && currentStatus.simHash && manifestItem) {
128
- let newSimHash = simHashCache.get(manifestItem.hash);
129
- if (!newSimHash) {
130
- const simDoc = await db.collection('system_simhash_registry').doc(manifestItem.hash).get();
131
- if (simDoc.exists) {
132
- newSimHash = simDoc.data().simHash;
133
- simHashCache.set(manifestItem.hash, newSimHash);
134
- }
135
- }
64
+ async function assessDateRunnability(date, computations, config, dependencies, manifestMap) {
65
+ const { status: rootStatus } = await checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES) || {};
66
+ if (!rootStatus) return null;
136
67
 
137
- if (newSimHash && newSimHash === currentStatus.simHash) {
138
- resolvedTasks.push({
139
- name: task.name,
140
- hash: manifestItem.hash,
141
- simHash: newSimHash,
142
- prevStatus: currentStatus
143
- });
144
- continue;
145
- }
68
+ const dailyStatus = await fetchComputationStatus(date, config, dependencies);
69
+
70
+ let prevDailyStatus = null;
71
+ if (computations.some(c => c.isHistorical)) {
72
+ const prevD = new Date(date + 'T00:00:00Z');
73
+ prevD.setUTCDate(prevD.getUTCDate() - 1);
74
+ const earliest = (await getEarliestDataDates(config, dependencies)).absoluteEarliest;
75
+ if (prevD >= earliest) {
76
+ prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
146
77
  }
147
- remainingTasks.push(task);
148
78
  }
149
79
 
150
- if (resolvedTasks.length > 0) {
151
- const updatePayload = {};
152
- resolvedTasks.forEach(t => {
153
- updatePayload[t.name] = {
154
- ...(t.prevStatus || {}),
155
- hash: t.hash,
156
- simHash: t.simHash,
157
- reason: 'SimHash Stable (Auto-Resolved)',
158
- lastUpdated: new Date().toISOString()
159
- };
80
+ const report = analyzeDateExecution(date, computations, rootStatus, dailyStatus, manifestMap, prevDailyStatus);
81
+ return { report, dailyStatus };
82
+ }
83
+
84
+ async function publishTaskBatch(dependencies, config, tasks, pass, topicOverride = null) {
85
+ if (tasks.length === 0) return 0;
86
+
87
+ const { logger } = dependencies;
88
+ const pubsubUtils = new PubSubUtils(dependencies);
89
+ const isHighMem = topicOverride ? topicOverride.includes('highmem') : tasks[0].resources === 'high-mem';
90
+ const topic = topicOverride || (isHighMem ? config.computationTopicHighMem : config.computationTopicStandard)
91
+ || (isHighMem ? 'computation-tasks-highmem' : 'computation-tasks');
92
+
93
+ const names = tasks.map(t => t.computation).join(', ');
94
+ logger.log('INFO', `[Dispatcher] 📤 Dispatching ${tasks.length} tasks to ${topic}: ${names.slice(0, 100)}...`);
95
+
96
+ const CHUNK_SIZE = 250;
97
+ const chunks = [];
98
+ for (let i = 0; i < tasks.length; i += CHUNK_SIZE) chunks.push(tasks.slice(i, i + CHUNK_SIZE));
99
+
100
+ for (const chunk of chunks) {
101
+ await pubsubUtils.batchPublishTasks(dependencies, {
102
+ topicName: topic,
103
+ tasks: chunk,
104
+ taskType: `pass-${pass}-${isHighMem ? 'high' : 'std'}`
160
105
  });
161
- await db.collection('computation_status').doc(date).set(updatePayload, { merge: true });
162
- logger.log('INFO', `[SimHash] ⏩ Fast-forwarded ${resolvedTasks.length} tasks for ${date}.`);
163
106
  }
164
-
165
- return remainingTasks;
107
+ return tasks.length;
166
108
  }
167
109
 
168
- async function getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild) {
110
+ async function getStableDateSession(config, dependencies, pass, dateLimitStr, forceRebuild) {
169
111
  const { db, logger } = dependencies;
170
- const sessionId = `pass_${passToRun}_${dateLimitStr.replace(/-/g, '')}`;
112
+ const sessionId = `pass_${pass}_${dateLimitStr.replace(/-/g, '')}`;
171
113
  const sessionRef = db.collection('dispatcher_sessions').doc(sessionId);
172
114
 
173
115
  if (!forceRebuild) {
174
- const sessionSnap = await sessionRef.get();
175
- if (sessionSnap.exists) {
176
- const data = sessionSnap.data();
177
- if ((Date.now() - new Date(data.createdAt).getTime()) < SESSION_CACHE_DURATION_MS) {
178
- return data.dates;
179
- }
116
+ const snap = await sessionRef.get();
117
+ if (snap.exists && (Date.now() - new Date(snap.data().createdAt).getTime()) < SESSION_CACHE_DURATION_MS) {
118
+ return snap.data().dates;
180
119
  }
181
120
  }
182
121
 
183
- logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${passToRun}...`);
184
- const earliestDates = await getEarliestDataDates(config, dependencies);
185
- const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
186
-
187
- await sessionRef.set({ dates: allDates, createdAt: new Date().toISOString(), configHash: dateLimitStr });
188
- return allDates;
122
+ logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${pass}...`);
123
+ const earliest = await getEarliestDataDates(config, dependencies);
124
+ const dates = getExpectedDateStrings(earliest.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
125
+ await sessionRef.set({ dates, createdAt: new Date().toISOString(), configHash: dateLimitStr });
126
+ return dates;
189
127
  }
190
128
 
191
- // MAIN ENTRY POINT
192
129
  // =============================================================================
193
- async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
194
- const action = reqBody.action || 'DISPATCH';
130
+ // HANDLERS
131
+ // =============================================================================
132
+
133
+ // 2. NEW SNAPSHOT HANDLER
134
+ async function handleSnapshot(config, dependencies, reqBody) {
135
+ const { logger } = dependencies;
136
+ const date = reqBody.date;
195
137
 
196
- if (action === 'VERIFY') {
197
- return handlePassVerification(config, dependencies, computationManifest, reqBody);
198
- }
199
- else if (action === 'SWEEP') {
200
- return handleSweepDispatch(config, dependencies, computationManifest, reqBody);
201
- }
202
- else if (action === 'REPORT') {
203
- return handleFinalSweepReporting(config, dependencies, computationManifest, reqBody);
204
- }
205
- // [NEW] FORCE RUN HANDLER
206
- else if (action === 'FORCE_RUN') {
207
- return handleForceRun(config, dependencies, computationManifest, reqBody);
138
+ if (!date) throw new Error('Snapshot action requires a "date"');
139
+
140
+ try {
141
+ logger.log('INFO', `[Dispatcher] 📸 Triggering Snapshot Service for ${date}`);
142
+ // Calls the service we created earlier
143
+ const result = await generateDailySnapshots(date, config, dependencies);
144
+ return result;
145
+ } catch (e) {
146
+ logger.log('ERROR', `[Dispatcher] Snapshot failed: ${e.message}`);
147
+ // Return error object so workflow can see failure
148
+ return { status: 'ERROR', error: e.message };
208
149
  }
209
-
210
- return handleStandardDispatch(config, dependencies, computationManifest, reqBody);
211
150
  }
212
151
 
213
- // =============================================================================
214
- // NEW: Force Run Handler (Bypasses Checks)
215
- // =============================================================================
216
152
  async function handleForceRun(config, dependencies, computationManifest, reqBody) {
217
153
  const { logger } = dependencies;
218
- const pubsubUtils = new PubSubUtils(dependencies);
219
- const computationName = reqBody.computation; // Required
220
- const dateInput = reqBody.date; // Optional (YYYY-MM-DD)
154
+ const computationName = reqBody.computation;
155
+ const dateInput = reqBody.date;
221
156
 
222
- if (!computationName) {
223
- throw new Error('Force Run requires "computation" name.');
224
- }
157
+ if (!computationName) throw new Error('Force Run requires "computation" name.');
225
158
 
226
- // 1. Verify Computation Exists
227
159
  const manifestItem = computationManifest.find(c => normalizeName(c.name) === normalizeName(computationName));
228
- if (!manifestItem) {
229
- throw new Error(`Computation '${computationName}' not found in manifest.`);
230
- }
160
+ if (!manifestItem) throw new Error(`Computation '${computationName}' not found.`);
231
161
 
232
- // 2. Determine Target Dates
233
162
  let candidateDates = [];
234
163
  if (dateInput) {
235
- // Single Date Mode
236
164
  candidateDates = [dateInput];
237
165
  } else {
238
- // All Dates Mode (Backfill)
239
166
  logger.log('INFO', `[ForceRun] No date provided. Calculating date range for ${computationName}...`);
240
- const earliestDates = await getEarliestDataDates(config, dependencies);
241
- // Calculate from system start until today
242
- candidateDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date());
167
+ const earliest = await getEarliestDataDates(config, dependencies);
168
+ candidateDates = getExpectedDateStrings(earliest.absoluteEarliest, new Date());
243
169
  }
244
170
 
245
171
  logger.log('INFO', `[ForceRun] Checking ${candidateDates.length} candidate dates for runnability...`);
246
172
 
247
- // 3. Filter to only runnable dates using analyzeDateExecution
248
- const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
249
- const calcsInPass = groupByPass(computationManifest, manifestItem.pass || "1");
250
- const targetComputationNormalized = normalizeName(computationName);
251
-
252
- // Filter to only the target computation
253
- const targetCalcs = calcsInPass.filter(c => normalizeName(c.name) === targetComputationNormalized);
254
-
255
- if (targetCalcs.length === 0) {
256
- throw new Error(`Computation '${computationName}' not found in pass ${manifestItem.pass || "1"}`);
257
- }
258
-
259
173
  const runnableDates = [];
260
174
  const skippedDates = [];
175
+ const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
261
176
 
262
- for (const dateStr of candidateDates) {
263
- // Check root data availability
264
- const rootDataStatus = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
265
-
266
- // Get computation status for this date
267
- const dailyStatus = await fetchComputationStatus(dateStr, config, dependencies);
268
-
269
- // Check previous day status if needed
270
- let prevDailyStatus = null;
271
- if (targetCalcs.some(c => c.isHistorical)) {
272
- const prevDate = new Date(dateStr + 'T00:00:00Z');
273
- prevDate.setUTCDate(prevDate.getUTCDate() - 1);
274
- prevDailyStatus = await fetchComputationStatus(prevDate.toISOString().slice(0, 10), config, dependencies);
177
+ const targetComp = { ...manifestItem, schedule: null };
178
+ const targetComputationNormalized = normalizeName(computationName);
179
+
180
+ for (const date of candidateDates) {
181
+ const result = await assessDateRunnability(date, [targetComp], config, dependencies, manifestMap);
182
+ if (!result) {
183
+ skippedDates.push({ date, reason: 'Root data unavailable' });
184
+ continue;
275
185
  }
276
-
277
- // Analyze if this computation can run on this date
278
- const report = analyzeDateExecution(dateStr, targetCalcs, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
279
-
280
- // Check if the target computation is runnable, needs re-run, or has failed dependencies
186
+
187
+ const { report } = result;
281
188
  const isRunnable = report.runnable.some(t => normalizeName(t.name) === targetComputationNormalized);
282
189
  const needsReRun = report.reRuns.some(t => normalizeName(t.name) === targetComputationNormalized);
283
190
  const hasFailedDep = report.failedDependency.some(t => normalizeName(t.name) === targetComputationNormalized);
@@ -285,21 +192,18 @@ async function handleForceRun(config, dependencies, computationManifest, reqBody
285
192
  const isBlocked = report.blocked.some(t => normalizeName(t.name) === targetComputationNormalized);
286
193
 
287
194
  if (isRunnable || needsReRun || hasFailedDep) {
288
- runnableDates.push(dateStr);
195
+ runnableDates.push(date);
289
196
  } else if (isImpossible) {
290
- skippedDates.push({ date: dateStr, reason: report.impossible.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Impossible' });
197
+ skippedDates.push({ date, reason: report.impossible.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Impossible' });
291
198
  } else if (isBlocked) {
292
- skippedDates.push({ date: dateStr, reason: report.blocked.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Blocked' });
199
+ skippedDates.push({ date, reason: report.blocked.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Blocked' });
293
200
  } else {
294
- skippedDates.push({ date: dateStr, reason: 'Not runnable (unknown reason)' });
201
+ skippedDates.push({ date, reason: 'Not runnable (unknown reason)' });
295
202
  }
296
203
  }
297
204
 
298
205
  logger.log('INFO', `[ForceRun] ✅ Found ${runnableDates.length} runnable dates out of ${candidateDates.length} candidates`);
299
- if (skippedDates.length > 0) {
300
- logger.log('INFO', `[ForceRun] ⏭️ Skipped ${skippedDates.length} dates: ${skippedDates.slice(0, 5).map(s => `${s.date} (${s.reason})`).join(', ')}${skippedDates.length > 5 ? '...' : ''}`);
301
- }
302
-
206
+
303
207
  if (runnableDates.length === 0) {
304
208
  return {
305
209
  status: 'NO_RUNNABLE_DATES',
@@ -313,48 +217,19 @@ async function handleForceRun(config, dependencies, computationManifest, reqBody
313
217
 
314
218
  logger.log('WARN', `[ForceRun] 🚨 MANUALLY Triggering ${computationName} for ${runnableDates.length} runnable dates. Pass: ${manifestItem.pass}`);
315
219
 
316
- // 4. Construct Tasks (only for runnable dates)
317
- const dispatchId = crypto.randomUUID();
318
- const tasks = runnableDates.map(date => {
319
- const traceId = crypto.randomBytes(16).toString('hex');
320
- const spanId = crypto.randomBytes(8).toString('hex');
321
- return {
322
- action: 'RUN_COMPUTATION_DATE',
323
- computation: manifestItem.name,
324
- date: date,
325
- pass: manifestItem.pass || "1",
326
- dispatchId: dispatchId,
327
- triggerReason: 'MANUAL_FORCE_API',
328
- resources: reqBody.resources || 'standard',
329
- // Trace context allows you to find these specific runs in Cloud Trace
330
- traceContext: { traceId, spanId, sampled: true }
331
- };
332
- });
333
-
334
- // 4. Batch Publish (Chunked to stay under Pub/Sub limits)
335
- const CHUNK_SIZE = 250; // Safe batch size
336
220
  const topic = (reqBody.resources === 'high-mem')
337
221
  ? (config.computationTopicHighMem || 'computation-tasks-highmem')
338
222
  : (config.computationTopicStandard || 'computation-tasks');
339
-
340
- let dispatchedCount = 0;
341
- const chunks = [];
342
- for (let i = 0; i < tasks.length; i += CHUNK_SIZE) {
343
- chunks.push(tasks.slice(i, i + CHUNK_SIZE));
344
- }
345
223
 
346
- // Publish chunks sequentially to avoid memory spikes
347
- for (const chunk of chunks) {
348
- await pubsubUtils.batchPublishTasks(dependencies, {
349
- topicName: topic,
350
- tasks: chunk,
351
- taskType: 'manual-force-run'
352
- });
353
- dispatchedCount += chunk.length;
354
- }
224
+ const dispatchId = crypto.randomUUID();
225
+ const tasks = runnableDates.map(date =>
226
+ createTaskPayload(manifestItem, date, manifestItem.pass || "1", dispatchId, reqBody.resources, 'MANUAL_FORCE_API')
227
+ );
228
+
229
+ const dispatchedCount = await publishTaskBatch(dependencies, config, tasks, manifestItem.pass || "1", topic);
355
230
 
356
231
  return {
357
- status: 'FORCED',
232
+ status: 'FORCED',
358
233
  computation: computationName,
359
234
  mode: dateInput ? 'SINGLE_DATE' : 'ALL_DATES',
360
235
  datesChecked: candidateDates.length,
@@ -365,89 +240,34 @@ async function handleForceRun(config, dependencies, computationManifest, reqBody
365
240
  };
366
241
  }
367
242
 
368
- // =============================================================================
369
- // NEW: Final Sweep Reporting Handler
370
- // =============================================================================
371
- async function handleFinalSweepReporting(config, dependencies, computationManifest, reqBody) {
372
- const { logger } = dependencies;
373
- const passToRun = String(reqBody.pass || "1");
374
- // Target date is required for detailed forensics
375
- const date = reqBody.date || new Date().toISOString().slice(0, 10);
376
-
377
- logger.log('INFO', `[Dispatcher] 📝 Triggering Final Sweep Forensics for Pass ${passToRun} on ${date}...`);
378
-
379
- try {
380
- const result = await runFinalSweepCheck(config, dependencies, date, passToRun, computationManifest);
381
- return {
382
- status: 'COMPLETED',
383
- date: date,
384
- pass: passToRun,
385
- issuesFound: result.issuesCount
386
- };
387
- } catch (e) {
388
- logger.log('ERROR', `[Dispatcher] Forensics failed: ${e.message}`);
389
- return { status: 'ERROR', error: e.message };
390
- }
391
- }
392
-
393
- // =============================================================================
394
- // LOGIC: Verify Pass Completion
395
- // =============================================================================
396
243
  async function handlePassVerification(config, dependencies, computationManifest, reqBody) {
397
244
  const { logger } = dependencies;
398
- const passToRun = String(reqBody.pass || "1");
399
- const dateLimitStr = reqBody.date || "2025-01-01";
245
+ const pass = String(reqBody.pass || "1");
246
+ const dateLimit = reqBody.date || "2025-01-01";
400
247
 
401
- logger.log('INFO', `[Verify] 🧹 Sweeping Pass ${passToRun} for unfinished work...`);
248
+ logger.log('INFO', `[Verify] 🧹 Sweeping Pass ${pass} for unfinished work...`);
402
249
 
403
- const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, false);
404
- const passes = groupByPass(computationManifest);
405
- const calcsInPass = passes[passToRun] || [];
250
+ const sessionDates = await getStableDateSession(config, dependencies, pass, dateLimit, false);
251
+ const passComputations = groupByPass(computationManifest)[pass] || [];
406
252
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
407
253
  const weightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
408
254
 
409
255
  const missingTasks = [];
410
256
 
411
257
  for (const date of sessionDates) {
412
- // [SCHEDULE CHECK] Filter tasks that are not scheduled for this date
413
- const scheduledComputations = calcsInPass.filter(c =>
414
- isComputationScheduled(date, c.schedule)
415
- );
416
-
417
- if (scheduledComputations.length === 0) continue;
418
-
419
- const [dailyStatus, availability] = await Promise.all([
420
- fetchComputationStatus(date, config, dependencies),
421
- checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
422
- ]);
423
-
424
- let prevDailyStatus = null;
425
- if (scheduledComputations.some(c => c.isHistorical)) {
426
- const prevD = new Date(date + 'T00:00:00Z');
427
- prevD.setUTCDate(prevD.getUTCDate() - 1);
428
- prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
429
- }
258
+ const scheduled = passComputations.filter(c => isComputationScheduled(date, c.schedule));
259
+ if (scheduled.length === 0) continue;
430
260
 
431
- const report = analyzeDateExecution(
432
- date,
433
- scheduledComputations, // Use filtered list
434
- availability ? availability.status : {},
435
- dailyStatus,
436
- manifestMap,
437
- prevDailyStatus
438
- );
439
-
440
- const pending = [...report.runnable, ...report.reRuns];
261
+ const result = await assessDateRunnability(date, scheduled, config, dependencies, manifestMap);
262
+ if (!result) continue;
263
+
264
+ const pending = [...result.report.runnable, ...result.report.reRuns];
441
265
 
442
266
  if (pending.length > 0) {
443
267
  const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
444
268
  const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
445
269
 
446
- missingTasks.push({
447
- date: date,
448
- taskCount: pending.length,
449
- eta: eta
450
- });
270
+ missingTasks.push({ date, taskCount: pending.length, eta });
451
271
  }
452
272
  }
453
273
 
@@ -455,487 +275,262 @@ async function handlePassVerification(config, dependencies, computationManifest,
455
275
  return { missingTasks };
456
276
  }
457
277
 
458
- // =============================================================================
459
- // LOGIC: Sweep Dispatch (Forced High-Mem)
460
- // =============================================================================
461
278
  async function handleSweepDispatch(config, dependencies, computationManifest, reqBody) {
462
279
  const { logger, db } = dependencies;
463
- const pubsubUtils = new PubSubUtils(dependencies);
464
- const passToRun = String(reqBody.pass || "1");
280
+ const pass = String(reqBody.pass || "1");
465
281
  const date = reqBody.date;
466
-
467
282
  if (!date) throw new Error('Sweep dispatch requires date');
468
283
 
469
- const passes = groupByPass(computationManifest);
470
- const calcsInPass = passes[passToRun] || [];
471
-
472
- // [SCHEDULE CHECK] Filter tasks that are not scheduled for this date
473
- const scheduledComputations = calcsInPass.filter(c =>
474
- isComputationScheduled(date, c.schedule)
475
- );
476
-
477
- if (scheduledComputations.length === 0) {
478
- logger.log('INFO', `[Sweep] ${date} has no scheduled tasks for Pass ${passToRun}. Ignoring.`);
479
- return { dispatched: 0 };
480
- }
481
-
482
- // 1. Analyze specific date
483
- const [dailyStatus, availability] = await Promise.all([
484
- fetchComputationStatus(date, config, dependencies),
485
- checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
486
- ]);
487
-
488
- let prevDailyStatus = null;
489
- if (scheduledComputations.some(c => c.isHistorical)) {
490
- const prevD = new Date(date + 'T00:00:00Z');
491
- prevD.setUTCDate(prevD.getUTCDate() - 1);
492
- prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
493
- }
494
-
495
284
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
496
- const report = analyzeDateExecution(
497
- date,
498
- scheduledComputations, // Use filtered list
499
- availability ? availability.status : {},
500
- dailyStatus,
501
- manifestMap,
502
- prevDailyStatus
503
- );
504
- const pending = [...report.runnable, ...report.reRuns];
285
+ const calculations = groupByPass(computationManifest)[pass] || [];
286
+ const scheduled = calculations.filter(c => isComputationScheduled(date, c.schedule));
505
287
 
506
- if (pending.length === 0) {
507
- logger.log('INFO', `[Sweep] ${date} is clean. No dispatch.`);
508
- return { dispatched: 0 };
509
- }
288
+ if (!scheduled.length) return { dispatched: 0 };
510
289
 
290
+ const result = await assessDateRunnability(date, scheduled, config, dependencies, manifestMap);
291
+ if (!result) return { dispatched: 0 };
292
+
293
+ const pending = [...result.report.runnable, ...result.report.reRuns];
511
294
  const validTasks = [];
295
+
512
296
  for (const task of pending) {
513
- const name = normalizeName(task.name);
514
- const ledgerPath = `computation_audit_ledger/${date}/passes/${passToRun}/tasks/${name}`;
297
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${normalizeName(task.name)}`;
515
298
  const doc = await db.doc(ledgerPath).get();
516
299
 
517
300
  if (doc.exists) {
518
301
  const data = doc.data();
302
+ const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
519
303
 
520
- // 1. ACTIVE CHECK: Don't double-dispatch if already running... UNLESS IT'S A ZOMBIE
521
- if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
304
+ if (isActive) {
522
305
  const lastActivity = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
523
-
524
- // If it's been silent for > 15 mins, it's a Zombie. Kill it and Re-run.
525
- if ((Date.now() - lastActivity) > STALE_LOCK_THRESHOLD_MS) {
526
- logger.log('WARN', `[Sweep] 🧟 Found ZOMBIE lock for ${name}. Breaking lock and re-running.`);
527
- // Don't continue; let it fall through to dispatch
528
- } else {
529
- logger.log('INFO', `[Sweep] ⏳ Skipping ${name} - Valid IN_PROGRESS.`);
530
- continue;
531
- }
306
+ if ((Date.now() - lastActivity) < STALE_LOCK_THRESHOLD_MS) continue;
307
+ logger.log('WARN', `[Sweep] 🧟 Breaking ZOMBIE lock for ${task.name}`);
308
+ }
309
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(data.error?.stage) && data.hash === task.hash) {
310
+ continue;
532
311
  }
533
-
534
- // 2. COMPLETION CHECK (GHOST STATE FIX)
535
- // We REMOVED the check that skips if (status === 'COMPLETED' && hash === task.hash).
536
- // If we are here, 'analyzeDateExecution' (The Brain) decided this task is NOT done
537
- // (likely due to a missing or outdated entry in computation_status).
538
- // Even if the Ledger (The Log) says it finished, the system state is inconsistent.
539
- // We MUST re-run to repair the Status Index.
540
-
541
- const stage = data.error?.stage;
542
-
543
- // 3. DETERMINISTIC FAILURE CHECK
544
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
545
- // If hash matches, it's the exact same code that failed before. Don't retry in loop.
546
- if (data.hash === task.hash) {
547
- logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
548
- continue;
549
- }
550
- logger.log('INFO', `[Sweep] 🔄 Code Updated for ${name}. Retrying sweep despite previous ${stage}.`);
551
- }
552
-
553
- // 4. DEAD END CHECK (High Mem)
554
- if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
555
- // If code hasn't changed, don't hammer it.
556
- if (data.hash === task.hash) {
557
- logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
558
- continue;
559
- }
560
- }
561
312
  }
562
313
  validTasks.push(task);
563
314
  }
564
315
 
565
- if (validTasks.length === 0) {
566
- logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
567
- return { dispatched: 0 };
568
- }
569
-
570
- // 2. FORCE High Mem & INJECT TRACE
571
- const currentDispatchId = crypto.randomUUID();
572
-
573
- const tasksPayload = validTasks.map(t => {
574
- const traceId = crypto.randomBytes(16).toString('hex');
575
- const spanId = crypto.randomBytes(8).toString('hex');
576
-
577
- return {
578
- ...t,
579
- action: 'RUN_COMPUTATION_DATE',
580
- computation: t.name,
581
- date: date,
582
- pass: passToRun,
583
- dispatchId: currentDispatchId,
584
- triggerReason: 'SWEEP_RECOVERY',
585
- resources: 'high-mem', // FORCE
586
- traceContext: { traceId, spanId, sampled: true }
587
- };
588
- });
589
-
590
- const taskNames = tasksPayload.map(t => t.computation || t.name).join(', ');
591
- logger.log('WARN', `[Sweep] 🧹 Forcing ${tasksPayload.length} tasks to HIGH-MEM for ${date}.`, {
592
- date: date,
593
- pass: passToRun,
594
- tasks: tasksPayload.map(t => ({ name: t.computation || t.name, reason: 'sweep' })),
595
- topic: config.computationTopicHighMem || 'computation-tasks-highmem'
596
- });
597
-
598
- await pubsubUtils.batchPublishTasks(dependencies, {
599
- topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
600
- tasks: tasksPayload,
601
- taskType: `pass-${passToRun}-sweep`
602
- });
316
+ const dispatchId = crypto.randomUUID();
317
+ const tasksPayload = validTasks.map(t =>
318
+ createTaskPayload(t, date, pass, dispatchId, 'high-mem', 'SWEEP_RECOVERY')
319
+ );
603
320
 
604
- return { dispatched: tasksPayload.length };
321
+ return { dispatched: await publishTaskBatch(dependencies, config, tasksPayload, pass, config.computationTopicHighMem) };
605
322
  }
606
323
 
607
- // =============================================================================
608
- // LOGIC: Standard Dispatch (Fast-Forward Enabled)
609
- // =============================================================================
610
324
  async function handleStandardDispatch(config, dependencies, computationManifest, reqBody) {
611
325
  const { logger, db } = dependencies;
612
- const pubsubUtils = new PubSubUtils(dependencies);
613
-
614
- const passToRun = String(reqBody.pass || "1");
326
+ const pass = String(reqBody.pass || "1");
327
+ const dateLimit = reqBody.date || "2025-01-01";
615
328
  const targetCursorN = parseInt(reqBody.cursorIndex || 1);
616
- const dateLimitStr = reqBody.date || "2025-01-01";
617
- const forceRebuild = reqBody.forceRebuild === true;
618
329
 
619
330
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
620
- const passes = groupByPass(computationManifest);
621
- const calcsInThisPass = passes[passToRun] || [];
331
+ const passComputations = groupByPass(computationManifest)[pass] || [];
622
332
  const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
333
+ const sessionDates = await getStableDateSession(config, dependencies, pass, dateLimit, reqBody.forceRebuild);
623
334
 
624
- if (!calcsInThisPass.length) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
625
-
626
- const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild);
627
- if (!sessionDates || sessionDates.length === 0) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
628
-
629
- // --- Fast-Forward Loop Configuration ---
630
- // Scans up to 50 dates or 40 seconds to find work, avoiding empty "wait loops"
631
- const MAX_SCAN_DEPTH = 50; // This is actually 50 + Today, so 51.
632
- const TIME_LIMIT_MS = 40000;
633
- const startTime = Date.now();
335
+ if (!passComputations.length || !sessionDates || targetCursorN > sessionDates.length) {
336
+ return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
337
+ }
634
338
 
339
+ const MAX_SCAN = 50;
340
+ const TIME_LIMIT = 40000;
341
+ const startT = Date.now();
342
+
635
343
  let currentCursor = targetCursorN;
636
- let selectedTasks = [];
637
- let selectedDate = null;
638
- let datesScanned = 0;
344
+ let tasksToDispatch = [];
345
+ let processedDate = null;
346
+ let scanned = 0;
639
347
 
640
- // Loop until work is found, end is reached, or safety limits hit
641
348
  while (currentCursor <= sessionDates.length) {
642
- datesScanned++;
643
- selectedDate = sessionDates[currentCursor - 1]; // 0-indexed array
349
+ scanned++;
350
+ processedDate = sessionDates[currentCursor - 1];
644
351
 
645
- // 1. Safety Break (Prevent Timeout)
646
- if ((Date.now() - startTime) > TIME_LIMIT_MS || datesScanned > MAX_SCAN_DEPTH) {
647
- logger.log('INFO', `[Dispatcher] ⏩ Fast-forward paused at ${selectedDate} after scanning ${datesScanned} dates.`);
352
+ if ((Date.now() - startT) > TIME_LIMIT || scanned > MAX_SCAN) {
353
+ logger.log('INFO', `[Dispatcher] ⏩ Fast-forward paused at ${processedDate} after scanning ${scanned} dates.`);
648
354
  break;
649
355
  }
650
356
 
651
- // 2. [SCHEDULE CHECK] Filter computations scheduled for this specific date
652
- const scheduledComputations = calcsInThisPass.filter(c =>
653
- isComputationScheduled(selectedDate, c.schedule)
654
- );
655
-
656
- // Optimization: If nothing is scheduled for today, skip expensive DB checks
657
- if (scheduledComputations.length === 0) {
658
- // DEBUG: Log when schedule filtering removes all tasks
659
- if (calcsInThisPass.length > 0) {
660
- logger.log('TRACE', `[Dispatcher] Date ${selectedDate}: ${calcsInThisPass.length} pass computations, but 0 scheduled for this date. Skipping.`);
661
- }
662
- currentCursor++;
357
+ const scheduled = passComputations.filter(c => isComputationScheduled(processedDate, c.schedule));
358
+ if (scheduled.length === 0) {
359
+ currentCursor++;
663
360
  continue;
664
361
  }
665
362
 
666
- // 3. Analyze Date
667
- const earliestDates = await getEarliestDataDates(config, dependencies);
668
- let prevDailyStatusPromise = Promise.resolve(null);
669
- if (scheduledComputations.some(c => c.isHistorical)) {
670
- const prevD = new Date(selectedDate + 'T00:00:00Z');
671
- prevD.setUTCDate(prevD.getUTCDate() - 1);
672
- if (prevD >= earliestDates.absoluteEarliest) {
673
- prevDailyStatusPromise = fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
674
- }
675
- }
363
+ const result = await assessDateRunnability(processedDate, scheduled, config, dependencies, manifestMap);
364
+ if (result && (result.report.runnable.length > 0 || result.report.reRuns.length > 0)) {
365
+ let candidates = await attemptSimHashResolution(dependencies, processedDate, [...result.report.runnable, ...result.report.reRuns], result.dailyStatus, manifestMap);
366
+ const { standard, highMem } = await resolveRoutes(db, processedDate, pass, candidates, logger);
367
+ tasksToDispatch = [...standard, ...highMem];
676
368
 
677
- const [dailyStatus, prevDailyStatus, availability] = await Promise.all([
678
- fetchComputationStatus(selectedDate, config, dependencies),
679
- prevDailyStatusPromise,
680
- checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
681
- ]);
682
-
683
- // DEBUG: Log availability check
684
- if (!availability || !availability.status) {
685
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: Availability check failed or returned null. Skipping analysis.`);
369
+ if (tasksToDispatch.length > 0) break;
686
370
  }
687
-
688
- if (availability && availability.status) {
689
- const report = analyzeDateExecution(
690
- selectedDate,
691
- scheduledComputations, // Use filtered list
692
- availability.status,
693
- dailyStatus,
694
- manifestMap,
695
- prevDailyStatus
696
- );
697
- let rawTasks = [...report.runnable, ...report.reRuns];
698
-
699
- // DEBUG: Log analysis results
700
- if (rawTasks.length === 0 && (report.runnable.length > 0 || report.reRuns.length > 0)) {
701
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: analyzeDateExecution found ${report.runnable.length} runnable + ${report.reRuns.length} reRuns, but rawTasks is empty!`);
702
- }
703
- if (rawTasks.length > 0) {
704
- logger.log('TRACE', `[Dispatcher] Date ${selectedDate}: analyzeDateExecution found ${report.runnable.length} runnable, ${report.reRuns.length} reRuns. Total: ${rawTasks.length}`);
705
- }
706
-
707
- if (rawTasks.length > 0) {
708
- rawTasks = await attemptSimHashResolution(dependencies, selectedDate, rawTasks, dailyStatus, manifestMap);
709
- const activeTasks = await filterActiveTasks(db, selectedDate, passToRun, rawTasks, logger);
710
-
711
- if (activeTasks.length > 0) {
712
- // DEBUG: Log what we're about to route
713
- logger.log('INFO', `[Dispatcher] 🔍 Date ${selectedDate}: ${rawTasks.length} raw tasks → ${activeTasks.length} after filtering. Routing...`);
714
- const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, activeTasks, logger);
715
- selectedTasks = [...standard, ...highMem];
716
-
717
- // DEBUG: Log routing results
718
- if (selectedTasks.length === 0 && activeTasks.length > 0) {
719
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: ${activeTasks.length} tasks filtered out by splitRoutes! Tasks: ${activeTasks.map(t => t.name).join(', ')}`);
720
- }
721
-
722
- if (selectedTasks.length > 0) {
723
- // Found work! Break loop to dispatch.
724
- break;
725
- }
726
- } else if (rawTasks.length > 0) {
727
- // DEBUG: Log if filterActiveTasks removed all tasks
728
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: ${rawTasks.length} raw tasks all filtered out by filterActiveTasks! Tasks: ${rawTasks.map(t => t.name).join(', ')}`);
729
- }
730
- }
731
- }
732
-
733
- // No work found for this date. Fast-forward to next.
734
371
  currentCursor++;
735
372
  }
736
373
 
737
- // --- Result Handling ---
738
-
739
- // Case 1: Satiated (Scanned to end of session with no work)
740
- if (currentCursor > sessionDates.length && selectedTasks.length === 0) {
374
+ if (currentCursor > sessionDates.length && tasksToDispatch.length === 0) {
741
375
  return {
742
376
  status: 'CONTINUE_PASS',
743
- dateProcessed: selectedDate,
377
+ dateProcessed: processedDate,
744
378
  dispatched: 0,
745
379
  n_cursor_ignored: false,
746
380
  remainingDates: 0,
747
- nextCursor: currentCursor // Matches length + 1
381
+ nextCursor: currentCursor
748
382
  };
749
383
  }
750
384
 
751
- // Case 2: Paused by Limit (No work found yet, but more dates remain)
752
- if (selectedTasks.length === 0) {
385
+ if (tasksToDispatch.length === 0) {
753
386
  return {
754
387
  status: 'CONTINUE_PASS',
755
- dateProcessed: selectedDate,
388
+ dateProcessed: processedDate,
756
389
  dispatched: 0,
757
390
  n_cursor_ignored: false,
758
391
  remainingDates: sessionDates.length - currentCursor + 1,
759
- nextCursor: currentCursor // Resume from here
392
+ nextCursor: currentCursor
760
393
  };
761
394
  }
762
395
 
763
- // Case 3: Work Found (Dispatching)
764
- const totalweight = selectedTasks.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
765
- const currentDispatchId = crypto.randomUUID();
766
- const etaSeconds = Math.max(20, Math.ceil(totalweight * BASE_SECONDS_PER_WEIGHT_UNIT));
396
+ const totalWeight = tasksToDispatch.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
397
+ const etaSeconds = Math.max(20, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
767
398
 
768
- if (datesScanned > 1) {
769
- logger.log('INFO', `[Dispatcher] ⏩ Fast-forwarded ${datesScanned - 1} empty dates. Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`);
399
+ if (scanned > 1) {
400
+ logger.log('INFO', `[Dispatcher] ⏩ Fast-forwarded ${scanned - 1} empty dates. Dispatching ${tasksToDispatch.length} tasks for ${processedDate}.`);
770
401
  } else {
771
- logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`);
402
+ logger.log('INFO', `[Dispatcher] ✅ Dispatching ${tasksToDispatch.length} tasks for ${processedDate}.`);
772
403
  }
773
404
 
774
- const mapToTaskPayload = (t) => {
775
- const traceId = crypto.randomBytes(16).toString('hex');
776
- const spanId = crypto.randomBytes(8).toString('hex');
777
- return {
778
- ...t,
779
- action: 'RUN_COMPUTATION_DATE',
780
- computation: t.name,
781
- date: selectedDate,
782
- pass: passToRun,
783
- dispatchId: currentDispatchId,
784
- triggerReason: t.reason,
785
- resources: t.resources || 'standard',
786
- traceContext: {
787
- traceId: traceId,
788
- spanId: spanId,
789
- sampled: true
790
- }
791
- };
792
- };
405
+ const dispatchId = crypto.randomUUID();
406
+ const standardPayload = tasksToDispatch.filter(t => t.resources !== 'high-mem').map(t => createTaskPayload(t, processedDate, pass, dispatchId, 'standard', t.reason));
407
+ const highMemPayload = tasksToDispatch.filter(t => t.resources === 'high-mem').map(t => createTaskPayload(t, processedDate, pass, dispatchId, 'high-mem', t.reason));
793
408
 
794
- const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(mapToTaskPayload);
795
- const highMemTasks = selectedTasks.filter(t => t.resources === 'high-mem').map(mapToTaskPayload);
796
-
797
- const pubPromises = [];
798
- if (standardTasks.length > 0) {
799
- const taskNames = standardTasks.map(t => t.computation || t.name).join(', ');
800
- logger.log('INFO', `[Dispatcher] 📤 Dispatching ${standardTasks.length} standard tasks: ${taskNames}`, {
801
- date: selectedDate,
802
- pass: passToRun,
803
- tasks: standardTasks.map(t => ({ name: t.computation || t.name, reason: t.triggerReason || 'new' })),
804
- topic: config.computationTopicStandard || 'computation-tasks'
805
- });
806
- pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
807
- topicName: config.computationTopicStandard || 'computation-tasks',
808
- tasks: standardTasks,
809
- taskType: `pass-${passToRun}-std`
810
- }));
811
- }
812
- if (highMemTasks.length > 0) {
813
- const taskNames = highMemTasks.map(t => t.computation || t.name).join(', ');
814
- logger.log('INFO', `[Dispatcher] 📤 Dispatching ${highMemTasks.length} high-memory tasks: ${taskNames}`, {
815
- date: selectedDate,
816
- pass: passToRun,
817
- tasks: highMemTasks.map(t => ({ name: t.computation || t.name, reason: t.triggerReason || 'retry' })),
818
- topic: config.computationTopicHighMem || 'computation-tasks-highmem'
819
- });
820
- pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
821
- topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
822
- tasks: highMemTasks,
823
- taskType: `pass-${passToRun}-high`
824
- }));
825
- }
826
- await Promise.all(pubPromises);
409
+ await Promise.all([
410
+ publishTaskBatch(dependencies, config, standardPayload, pass),
411
+ publishTaskBatch(dependencies, config, highMemPayload, pass)
412
+ ]);
827
413
 
828
414
  return {
829
415
  status: 'CONTINUE_PASS',
830
- dateProcessed: selectedDate,
831
- dispatched: selectedTasks.length,
416
+ dateProcessed: processedDate,
417
+ dispatched: tasksToDispatch.length,
832
418
  n_cursor_ignored: false,
833
419
  etaSeconds: etaSeconds,
834
420
  remainingDates: sessionDates.length - targetCursorN,
835
- nextCursor: currentCursor + 1 // Start next scan AFTER this date
421
+ nextCursor: currentCursor + 1
836
422
  };
837
423
  }
838
424
 
839
425
  // =============================================================================
840
- // HELPER: Route Splitting (One-Shot Enforcement)
426
+ // LOGIC: Resolution & Routing
841
427
  // =============================================================================
842
- // [UPDATED] Route Splitting with Version-Aware Dead Lettering
843
- async function splitRoutes(db, date, pass, tasks, logger) {
428
+
429
+ async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
430
+ const { db, logger } = dependencies;
431
+ const resolved = [], remaining = [];
432
+ const hashCache = new Map();
433
+
434
+ for (const task of tasks) {
435
+ const status = dailyStatus ? dailyStatus[task.name] : null;
436
+ const manifest = manifestMap.get(normalizeName(task.name));
437
+
438
+ if (status?.simHash && manifest) {
439
+ let knownSimHash = hashCache.get(manifest.hash);
440
+ if (!knownSimHash) {
441
+ const doc = await db.collection('system_simhash_registry').doc(manifest.hash).get();
442
+ if (doc.exists) {
443
+ knownSimHash = doc.data().simHash;
444
+ hashCache.set(manifest.hash, knownSimHash);
445
+ }
446
+ }
447
+ if (knownSimHash === status.simHash) {
448
+ resolved.push({ name: task.name, hash: manifest.hash, simHash: knownSimHash, prevStatus: status });
449
+ continue;
450
+ }
451
+ }
452
+ remaining.push(task);
453
+ }
454
+
455
+ if (resolved.length) {
456
+ const updates = {};
457
+ resolved.forEach(t => updates[t.name] = { ...t.prevStatus, hash: t.hash, simHash: t.simHash, reason: 'SimHash Auto-Resolve', lastUpdated: new Date().toISOString() });
458
+ await db.collection('computation_status').doc(date).set(updates, { merge: true });
459
+ logger.log('INFO', `[SimHash] ⏩ Resolved ${resolved.length} tasks for ${date}.`);
460
+ }
461
+ return remaining;
462
+ }
463
+
464
+ async function resolveRoutes(db, date, pass, tasks, logger) {
844
465
  const standard = [];
845
466
  const highMem = [];
846
467
 
847
- for (const task of tasks) {
468
+ const checks = tasks.map(async (task) => {
848
469
  const name = normalizeName(task.name);
849
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
850
- const doc = await db.doc(ledgerPath).get();
470
+ const doc = await db.doc(`computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`).get();
851
471
 
852
- if (!doc.exists) {
853
- // No ledger entry - trust analyzeDateExecution, dispatch as standard
854
- standard.push(task);
855
- continue;
856
- }
472
+ if (!doc.exists) return { task, type: 'std' };
857
473
 
858
474
  const data = doc.data();
859
475
 
860
- // CRITICAL FIX: If analyzeDateExecution says this task should run, we MUST trust it.
861
- // The ledger might say COMPLETED, but if computation_status is missing/outdated,
862
- // we need to re-run to repair the state. Only skip if actively running.
863
- // Note: filterActiveTasks already filtered out non-stale PENDING/IN_PROGRESS,
864
- // but we double-check here in case of race conditions.
865
476
  if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
866
- // Check if it's stale (should have been caught by filterActiveTasks, but double-check)
867
- const lastActivityTime = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
868
-
869
- if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
870
- // Stale lock - break it and continue
871
- logger.log('WARN', `[Dispatcher] 🧟 splitRoutes: Breaking stale lock for ${name}.`);
872
- // Fall through to handle as if no active lock
873
- } else {
874
- // Valid active lock - skip (shouldn't happen if filterActiveTasks worked correctly)
875
- logger.log('TRACE', `[Dispatcher] splitRoutes: Skipping ${name} - Valid IN_PROGRESS (should have been filtered earlier).`);
876
- continue;
877
- }
477
+ const lastActive = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
478
+ if ((Date.now() - lastActive) < STALE_LOCK_THRESHOLD_MS) return null;
479
+ logger.log('WARN', `[Dispatcher] 🧟 Breaking stale lock for ${name}`);
878
480
  }
879
-
481
+
880
482
  if (data.status === 'FAILED') {
881
483
  const stage = data.error?.stage;
484
+ const isCodeChanged = data.hash !== task.hash;
882
485
 
883
- // 1. DETERMINISTIC FAILURES (Never Retry UNLESS Code Updated)
884
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
885
- // [FIX] Check if the hash matches. If code changed, we MUST retry.
886
- if (data.hash === task.hash) {
887
- logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
888
- continue;
889
- }
890
-
891
- // If hashes differ, we reset to Standard execution to give the new code a chance
892
- logger.log('INFO', `[Dispatcher] 🔄 Code Updated for ${name}. Retrying despite previous ${stage}.`);
893
- standard.push({ ...task, reason: 'Retry: Code Version Changed' });
894
- continue;
486
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(stage)) {
487
+ if (!isCodeChanged) return null;
488
+ return { task: { ...task, reason: 'Retry: Code Version Changed' }, type: 'std' };
895
489
  }
896
490
 
897
- // 2. HIGH MEMORY FAILURE HANDLING (The New Logic)
898
491
  if (data.resourceTier === 'high-mem') {
899
- const failedHash = data.hash || data.composition?.code; // Support legacy or new structure
900
- const currentHash = task.hash;
901
-
902
- // A. EXACT CODE MATCH: It failed High-Mem with THIS code.
903
- if (failedHash === currentHash) {
904
- logger.log('WARN', `[Dispatcher] 💀 Dead End: ${name} failed High-Mem on this version (${currentHash.slice(0,6)}). Waiting for code fix.`);
905
- continue; // STOP. Do not retry.
906
- }
907
-
908
- // B. CODE MISMATCH: The code has changed since the High-Mem failure.
909
- // We reset it to 'standard' to see if the fix optimized memory usage.
910
- else {
911
- logger.log('INFO', `[Dispatcher] 🔄 Code Updated for ${name}. Resetting High-Mem failure to Standard retry.`);
912
- standard.push({
913
- ...task,
914
- reason: 'Retry: Code Version Changed'
915
- });
916
- continue;
917
- }
492
+ if (!isCodeChanged) return null;
493
+ return { task: { ...task, reason: 'Retry: Code Changed (HighMem Reset)' }, type: 'std' };
918
494
  }
919
495
 
920
- // 3. STANDARD FAILURE -> PROMOTE TO HIGH MEM
921
- highMem.push({
922
- ...task,
923
- resources: 'high-mem',
924
- reason: `Retry: ${data.error?.message || 'Standard Failure'}`
925
- });
926
-
927
- } else {
928
- // Status is likely COMPLETED or some other state.
929
- // CRITICAL: If analyzeDateExecution says this should run, we MUST trust it.
930
- // The ledger might show COMPLETED, but if computation_status is missing/outdated,
931
- // we need to re-run to repair the state. This is the "ghost state fix" logic.
932
- // Trust the Brain (analyzeDateExecution) over the Log (ledger).
933
- logger.log('INFO', `[Dispatcher] 🔄 splitRoutes: ${name} has ledger status '${data.status}', but analyzeDateExecution says it should run. Trusting analysis and dispatching.`);
934
- standard.push(task);
496
+ return { task: { ...task, reason: `Retry: ${data.error?.message}`, resources: 'high-mem' }, type: 'high' };
935
497
  }
936
- }
937
498
 
499
+ return { task, type: 'std' };
500
+ });
501
+
502
+ const results = (await Promise.all(checks)).filter(r => r !== null);
503
+ results.forEach(r => r.type === 'high' ? highMem.push(r.task) : standard.push(r.task));
504
+
938
505
  return { standard, highMem };
939
506
  }
940
507
 
508
+ // =============================================================================
509
+ // MAIN ENTRY
510
+ // =============================================================================
511
+
512
+ async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
513
+ switch (reqBody.action) {
514
+ case 'VERIFY': return handlePassVerification(config, dependencies, computationManifest, reqBody);
515
+ case 'SWEEP': return handleSweepDispatch(config, dependencies, computationManifest, reqBody);
516
+ case 'REPORT': return handleFinalSweepReporting(config, dependencies, computationManifest, reqBody);
517
+ case 'FORCE_RUN': return handleForceRun(config, dependencies, computationManifest, reqBody);
518
+ // 3. REGISTER SNAPSHOT ACTION
519
+ case 'SNAPSHOT': return handleSnapshot(config, dependencies, reqBody);
520
+ default: return handleStandardDispatch(config, dependencies, computationManifest, reqBody);
521
+ }
522
+ }
523
+
524
+ async function handleFinalSweepReporting(config, dependencies, computationManifest, reqBody) {
525
+ const { logger } = dependencies;
526
+ const date = reqBody.date || new Date().toISOString().slice(0, 10);
527
+ try {
528
+ const res = await runFinalSweepCheck(config, dependencies, date, String(reqBody.pass || "1"), computationManifest);
529
+ return { status: 'COMPLETED', date, issues: res.issuesCount };
530
+ } catch (e) {
531
+ logger.log('ERROR', `[Dispatcher] Report failed: ${e.message}`);
532
+ return { status: 'ERROR', error: e.message };
533
+ }
534
+ }
535
+
941
536
  module.exports = { dispatchComputationPass };