bulltrackers-module 1.0.658 → 1.0.660

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/functions/computation-system/data/AvailabilityChecker.js +163 -317
  2. package/functions/computation-system/data/CachedDataLoader.js +158 -222
  3. package/functions/computation-system/data/DependencyFetcher.js +201 -406
  4. package/functions/computation-system/executors/MetaExecutor.js +176 -280
  5. package/functions/computation-system/executors/StandardExecutor.js +325 -383
  6. package/functions/computation-system/helpers/computation_dispatcher.js +306 -701
  7. package/functions/computation-system/helpers/computation_worker.js +3 -2
  8. package/functions/computation-system/legacy/AvailabilityCheckerOld.js +382 -0
  9. package/functions/computation-system/legacy/CachedDataLoaderOld.js +357 -0
  10. package/functions/computation-system/legacy/DependencyFetcherOld.js +478 -0
  11. package/functions/computation-system/legacy/MetaExecutorold.js +364 -0
  12. package/functions/computation-system/legacy/StandardExecutorold.js +476 -0
  13. package/functions/computation-system/legacy/computation_dispatcherold.js +944 -0
  14. package/functions/computation-system/persistence/ResultCommitter.js +137 -188
  15. package/functions/computation-system/services/SnapshotService.js +129 -0
  16. package/functions/computation-system/tools/BuildReporter.js +12 -7
  17. package/functions/computation-system/utils/data_loader.js +213 -238
  18. package/package.json +3 -2
  19. package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +0 -163
  20. package/functions/computation-system/workflows/data_feeder_pipeline.yaml +0 -115
  21. package/functions/computation-system/workflows/datafeederpipelineinstructions.md +0 -30
  22. package/functions/computation-system/workflows/morning_prep_pipeline.yaml +0 -55
@@ -1,10 +1,8 @@
1
1
  /**
2
+ * V2 TO REPLACE THE OLD.
2
3
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
- * PURPOSE: Sequential Cursor-Based Dispatcher.
4
- * UPDATED: Implemented "Fast-Forward" Scanning Loop to skip empty dates efficiently.
5
- * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
6
- * UPDATED: Generates Google Cloud Trace Context (traceId/spanId) for end-to-end monitoring.
7
- * UPDATED: Added Schedule Awareness (Daily, Weekly, Monthly) to filter tasks by date.
4
+ * PURPOSE: Sequential Cursor-Based Dispatcher (Refactored & Condensed).
5
+ * UPDATED: Added SNAPSHOT handling.
8
6
  */
9
7
 
10
8
  const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -12,294 +10,210 @@ const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.j
12
10
  const { PubSubUtils } = require('../../core/utils/pubsub_utils');
13
11
  const { fetchComputationStatus } = require('../persistence/StatusRepository');
14
12
  const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
15
- const { runFinalSweepCheck } = require('../tools/FinalSweepReporter'); // [NEW]
13
+ const { runFinalSweepCheck } = require('../tools/FinalSweepReporter');
14
+ // 1. IMPORT SNAPSHOT SERVICE
15
+ const { generateDailySnapshots } = require('../services/SnapshotService');
16
16
  const crypto = require('crypto');
17
17
 
18
- const OOM_THRESHOLD_MB = 1500; // Unused
19
18
  const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
20
19
  const SESSION_CACHE_DURATION_MS = 1000 * 60 * 30; // 30 Minutes
21
20
  const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
22
21
 
23
- // =============================================================================
24
- // HELPER: Firestore Timestamp Conversion
25
- // =============================================================================
26
- /**
27
- * Converts a Firestore Timestamp or Date to milliseconds.
28
- * Firestore stores Date objects as Timestamp objects, which have a .toDate() method.
29
- * This function handles both cases correctly.
30
- * @param {any} field - Firestore Timestamp, Date object, or string
31
- * @returns {number} Milliseconds since epoch, or 0 if invalid
32
- */
22
+ // ... [SHARED UTILS and SHARED ASYNC HELPERS remain exactly the same] ...
23
+
33
24
  function getMillis(field) {
34
25
  if (!field) return 0;
35
- // Handle Firestore Timestamp (has .toDate() method)
36
- if (field.toDate && typeof field.toDate === 'function') {
37
- return field.toDate().getTime();
38
- }
39
- // Handle standard Date object or string
26
+ if (field.toDate && typeof field.toDate === 'function') return field.toDate().getTime();
40
27
  const date = new Date(field);
41
28
  return isNaN(date.getTime()) ? 0 : date.getTime();
42
- }
29
+ }
43
30
 
44
- // =============================================================================
45
- // HELPER: Schedule Logic
46
- // =============================================================================
47
- /**
48
- * Checks if a computation is scheduled to run on a specific date.
49
- * Defaults to DAILY if no schedule is present.
50
- * * @param {string} dateStr - YYYY-MM-DD string
51
- * @param {Object} scheduleConfig - { type: 'DAILY'|'WEEKLY'|'MONTHLY', days: [] }
52
- * @returns {boolean} True if the computation should run
53
- */
54
31
  function isComputationScheduled(dateStr, scheduleConfig) {
55
- // Default: Run every day if no schedule is provided or explicitly DAILY
56
- if (!scheduleConfig || !scheduleConfig.type || scheduleConfig.type === 'DAILY') {
57
- return true;
58
- }
59
-
60
- const date = new Date(dateStr + 'T00:00:00Z'); // Ensure UTC parsing
32
+ if (!scheduleConfig || !scheduleConfig.type || scheduleConfig.type === 'DAILY') return true;
33
+ const date = new Date(dateStr + 'T00:00:00Z');
61
34
 
62
- // Weekly Schedule: Check Day of Week (0=Sun, 1=Mon, ..., 6=Sat)
63
35
  if (scheduleConfig.type === 'WEEKLY') {
64
- const dayOfWeek = date.getUTCDay();
65
- const validDays = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
66
- return validDays.includes(dayOfWeek);
36
+ const days = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
37
+ return days.includes(date.getUTCDay());
67
38
  }
68
-
69
- // Monthly Schedule: Check Day of Month (1-31)
70
39
  if (scheduleConfig.type === 'MONTHLY') {
71
- const dayOfMonth = date.getUTCDate();
72
- const validDates = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
73
- return validDates.includes(dayOfMonth);
40
+ const days = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
41
+ return days.includes(date.getUTCDate());
74
42
  }
75
-
76
- // Fallback default
77
43
  return true;
78
44
  }
79
45
 
80
- // =============================================================================
81
- // HELPER: Ledger Awareness
82
- // =============================================================================
83
- async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
84
- if (!tasks || tasks.length === 0) return [];
85
- if (forceRun) return tasks;
86
-
87
- const checkPromises = tasks.map(async (t) => {
88
- const taskName = normalizeName(t.name);
89
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${taskName}`;
90
- const snap = await db.doc(ledgerPath).get();
91
-
92
- if (snap.exists) {
93
- const data = snap.data();
94
- const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
95
-
96
- if (isActive) {
97
- const lastActivityTime = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
98
-
99
- if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
100
- if (logger) logger.log('WARN', `[Dispatcher] 🧟 Breaking stale lock for ${taskName}.`);
101
- return t;
102
- }
103
- return null;
104
- }
105
- // Note: We do NOT filter COMPLETED here anymore for Sweep.
106
- // If the Orchestrator says it needs to run, we run it.
46
+ function createTaskPayload(task, date, pass, dispatchId, resources, triggerReason) {
47
+ return {
48
+ action: 'RUN_COMPUTATION_DATE',
49
+ computation: task.name || task.computation,
50
+ date: date,
51
+ pass: pass,
52
+ dispatchId: dispatchId,
53
+ triggerReason: triggerReason || task.reason || 'DISPATCH',
54
+ resources: resources || task.resources || 'standard',
55
+ hash: task.hash,
56
+ traceContext: {
57
+ traceId: crypto.randomBytes(16).toString('hex'),
58
+ spanId: crypto.randomBytes(8).toString('hex'),
59
+ sampled: true
107
60
  }
108
- return t;
109
- });
110
- const results = await Promise.all(checkPromises);
111
- return results.filter(t => t !== null);
61
+ };
112
62
  }
113
63
 
114
- // =============================================================================
115
- // HELPER: SimHash Stability
116
- // =============================================================================
117
- async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
118
- const { db, logger } = dependencies;
119
- const resolvedTasks = [];
120
- const remainingTasks = [];
121
- const simHashCache = new Map();
122
-
123
- for (const task of tasks) {
124
- const currentStatus = dailyStatus ? dailyStatus[task.name] : null;
125
- const manifestItem = manifestMap.get(normalizeName(task.name));
126
-
127
- if (currentStatus && currentStatus.simHash && manifestItem) {
128
- let newSimHash = simHashCache.get(manifestItem.hash);
129
- if (!newSimHash) {
130
- const simDoc = await db.collection('system_simhash_registry').doc(manifestItem.hash).get();
131
- if (simDoc.exists) {
132
- newSimHash = simDoc.data().simHash;
133
- simHashCache.set(manifestItem.hash, newSimHash);
134
- }
135
- }
64
+ async function assessDateRunnability(date, computations, config, dependencies, manifestMap) {
65
+ const { status: rootStatus } = await checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES) || {};
66
+ if (!rootStatus) return null;
136
67
 
137
- if (newSimHash && newSimHash === currentStatus.simHash) {
138
- resolvedTasks.push({
139
- name: task.name,
140
- hash: manifestItem.hash,
141
- simHash: newSimHash,
142
- prevStatus: currentStatus
143
- });
144
- continue;
145
- }
68
+ const dailyStatus = await fetchComputationStatus(date, config, dependencies);
69
+
70
+ let prevDailyStatus = null;
71
+ if (computations.some(c => c.isHistorical)) {
72
+ const prevD = new Date(date + 'T00:00:00Z');
73
+ prevD.setUTCDate(prevD.getUTCDate() - 1);
74
+ const earliest = (await getEarliestDataDates(config, dependencies)).absoluteEarliest;
75
+ if (prevD >= earliest) {
76
+ prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
146
77
  }
147
- remainingTasks.push(task);
148
78
  }
149
79
 
150
- if (resolvedTasks.length > 0) {
151
- const updatePayload = {};
152
- resolvedTasks.forEach(t => {
153
- updatePayload[t.name] = {
154
- ...(t.prevStatus || {}),
155
- hash: t.hash,
156
- simHash: t.simHash,
157
- reason: 'SimHash Stable (Auto-Resolved)',
158
- lastUpdated: new Date().toISOString()
159
- };
80
+ const report = analyzeDateExecution(date, computations, rootStatus, dailyStatus, manifestMap, prevDailyStatus);
81
+ return { report, dailyStatus };
82
+ }
83
+
84
+ async function publishTaskBatch(dependencies, config, tasks, pass, topicOverride = null) {
85
+ if (tasks.length === 0) return 0;
86
+
87
+ const { logger } = dependencies;
88
+ const pubsubUtils = new PubSubUtils(dependencies);
89
+ const isHighMem = topicOverride ? topicOverride.includes('highmem') : tasks[0].resources === 'high-mem';
90
+ const topic = topicOverride || (isHighMem ? config.computationTopicHighMem : config.computationTopicStandard)
91
+ || (isHighMem ? 'computation-tasks-highmem' : 'computation-tasks');
92
+
93
+ const names = tasks.map(t => t.computation).join(', ');
94
+ logger.log('INFO', `[Dispatcher] 📤 Dispatching ${tasks.length} tasks to ${topic}: ${names.slice(0, 100)}...`);
95
+
96
+ const CHUNK_SIZE = 250;
97
+ const chunks = [];
98
+ for (let i = 0; i < tasks.length; i += CHUNK_SIZE) chunks.push(tasks.slice(i, i + CHUNK_SIZE));
99
+
100
+ for (const chunk of chunks) {
101
+ await pubsubUtils.batchPublishTasks(dependencies, {
102
+ topicName: topic,
103
+ tasks: chunk,
104
+ taskType: `pass-${pass}-${isHighMem ? 'high' : 'std'}`
160
105
  });
161
- await db.collection('computation_status').doc(date).set(updatePayload, { merge: true });
162
- logger.log('INFO', `[SimHash] ⏩ Fast-forwarded ${resolvedTasks.length} tasks for ${date}.`);
163
106
  }
164
-
165
- return remainingTasks;
107
+ return tasks.length;
166
108
  }
167
109
 
168
- async function getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild) {
110
+ async function getStableDateSession(config, dependencies, pass, dateLimitStr, forceRebuild) {
169
111
  const { db, logger } = dependencies;
170
- const sessionId = `pass_${passToRun}_${dateLimitStr.replace(/-/g, '')}`;
112
+ const sessionId = `pass_${pass}_${dateLimitStr.replace(/-/g, '')}`;
171
113
  const sessionRef = db.collection('dispatcher_sessions').doc(sessionId);
172
114
 
173
115
  if (!forceRebuild) {
174
- const sessionSnap = await sessionRef.get();
175
- if (sessionSnap.exists) {
176
- const data = sessionSnap.data();
177
- if ((Date.now() - new Date(data.createdAt).getTime()) < SESSION_CACHE_DURATION_MS) {
178
- return data.dates;
179
- }
116
+ const snap = await sessionRef.get();
117
+ if (snap.exists && (Date.now() - new Date(snap.data().createdAt).getTime()) < SESSION_CACHE_DURATION_MS) {
118
+ return snap.data().dates;
180
119
  }
181
120
  }
182
121
 
183
- logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${passToRun}...`);
184
- const earliestDates = await getEarliestDataDates(config, dependencies);
185
- const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
186
-
187
- await sessionRef.set({ dates: allDates, createdAt: new Date().toISOString(), configHash: dateLimitStr });
188
- return allDates;
122
+ logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${pass}...`);
123
+ const earliest = await getEarliestDataDates(config, dependencies);
124
+ const dates = getExpectedDateStrings(earliest.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
125
+ await sessionRef.set({ dates, createdAt: new Date().toISOString(), configHash: dateLimitStr });
126
+ return dates;
189
127
  }
190
128
 
191
- // MAIN ENTRY POINT
192
129
  // =============================================================================
193
- async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
194
- const action = reqBody.action || 'DISPATCH';
130
+ // HANDLERS
131
+ // =============================================================================
132
+
133
+ // 2. NEW SNAPSHOT HANDLER
134
+ async function handleSnapshot(config, dependencies, reqBody) {
135
+ const { logger } = dependencies;
136
+ const date = reqBody.date;
195
137
 
196
- if (action === 'VERIFY') {
197
- return handlePassVerification(config, dependencies, computationManifest, reqBody);
198
- }
199
- else if (action === 'SWEEP') {
200
- return handleSweepDispatch(config, dependencies, computationManifest, reqBody);
201
- }
202
- else if (action === 'REPORT') {
203
- return handleFinalSweepReporting(config, dependencies, computationManifest, reqBody);
204
- }
205
- // [NEW] FORCE RUN HANDLER
206
- else if (action === 'FORCE_RUN') {
207
- return handleForceRun(config, dependencies, computationManifest, reqBody);
138
+ if (!date) throw new Error('Snapshot action requires a "date"');
139
+
140
+ try {
141
+ logger.log('INFO', `[Dispatcher] 📸 Triggering Snapshot Service for ${date}`);
142
+ // Calls the service we created earlier
143
+ const result = await generateDailySnapshots(date, config, dependencies);
144
+ return result;
145
+ } catch (e) {
146
+ logger.log('ERROR', `[Dispatcher] Snapshot failed: ${e.message}`);
147
+ // Return error object so workflow can see failure
148
+ return { status: 'ERROR', error: e.message };
208
149
  }
209
-
210
- return handleStandardDispatch(config, dependencies, computationManifest, reqBody);
211
150
  }
212
151
 
213
- // =============================================================================
214
- // NEW: Force Run Handler (Bypasses Checks)
215
- // =============================================================================
216
152
  async function handleForceRun(config, dependencies, computationManifest, reqBody) {
217
153
  const { logger } = dependencies;
218
- const pubsubUtils = new PubSubUtils(dependencies);
219
- const computationName = reqBody.computation; // Required
220
- const dateInput = reqBody.date; // Optional (YYYY-MM-DD)
154
+ const computationName = reqBody.computation;
155
+ const dateInput = reqBody.date;
221
156
 
222
- if (!computationName) {
223
- throw new Error('Force Run requires "computation" name.');
224
- }
157
+ if (!computationName) throw new Error('Force Run requires "computation" name.');
225
158
 
226
- // 1. Verify Computation Exists
227
159
  const manifestItem = computationManifest.find(c => normalizeName(c.name) === normalizeName(computationName));
228
- if (!manifestItem) {
229
- throw new Error(`Computation '${computationName}' not found in manifest.`);
230
- }
160
+ if (!manifestItem) throw new Error(`Computation '${computationName}' not found.`);
231
161
 
232
- // 2. Determine Target Dates
233
162
  let candidateDates = [];
234
163
  if (dateInput) {
235
- // Single Date Mode
236
164
  candidateDates = [dateInput];
237
165
  } else {
238
- // All Dates Mode (Backfill)
239
166
  logger.log('INFO', `[ForceRun] No date provided. Calculating date range for ${computationName}...`);
240
- const earliestDates = await getEarliestDataDates(config, dependencies);
241
- // Calculate from system start until today
242
- candidateDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date());
167
+ const earliest = await getEarliestDataDates(config, dependencies);
168
+ candidateDates = getExpectedDateStrings(earliest.absoluteEarliest, new Date());
243
169
  }
244
170
 
245
171
  logger.log('INFO', `[ForceRun] Checking ${candidateDates.length} candidate dates for runnability...`);
246
172
 
247
- // 3. Filter to only runnable dates using analyzeDateExecution
248
- const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
249
- const calcsInPass = groupByPass(computationManifest, manifestItem.pass || "1");
250
- const targetComputationNormalized = normalizeName(computationName);
251
-
252
- // Filter to only the target computation
253
- const targetCalcs = calcsInPass.filter(c => normalizeName(c.name) === targetComputationNormalized);
254
-
255
- if (targetCalcs.length === 0) {
256
- throw new Error(`Computation '${computationName}' not found in pass ${manifestItem.pass || "1"}`);
257
- }
258
-
259
173
  const runnableDates = [];
260
174
  const skippedDates = [];
175
+ const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
261
176
 
262
- for (const dateStr of candidateDates) {
263
- // Check root data availability
264
- const rootDataStatus = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
265
-
266
- // Get computation status for this date
267
- const dailyStatus = await fetchComputationStatus(dateStr, config, dependencies);
268
-
269
- // Check previous day status if needed
270
- let prevDailyStatus = null;
271
- if (targetCalcs.some(c => c.isHistorical)) {
272
- const prevDate = new Date(dateStr + 'T00:00:00Z');
273
- prevDate.setUTCDate(prevDate.getUTCDate() - 1);
274
- prevDailyStatus = await fetchComputationStatus(prevDate.toISOString().slice(0, 10), config, dependencies);
177
+ const targetComp = { ...manifestItem, schedule: null };
178
+ const targetComputationNormalized = normalizeName(computationName);
179
+
180
+ for (const date of candidateDates) {
181
+ const result = await assessDateRunnability(date, [targetComp], config, dependencies, manifestMap);
182
+ if (!result) {
183
+ skippedDates.push({ date, reason: 'Root data unavailable' });
184
+ continue;
275
185
  }
276
-
277
- // Analyze if this computation can run on this date
278
- const report = analyzeDateExecution(dateStr, targetCalcs, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
279
-
280
- // Check if the target computation is runnable, needs re-run, or has failed dependencies
186
+
187
+ const { report } = result;
281
188
  const isRunnable = report.runnable.some(t => normalizeName(t.name) === targetComputationNormalized);
282
189
  const needsReRun = report.reRuns.some(t => normalizeName(t.name) === targetComputationNormalized);
283
190
  const hasFailedDep = report.failedDependency.some(t => normalizeName(t.name) === targetComputationNormalized);
284
191
  const isImpossible = report.impossible.some(t => normalizeName(t.name) === targetComputationNormalized);
285
192
  const isBlocked = report.blocked.some(t => normalizeName(t.name) === targetComputationNormalized);
193
+ const isSkipped = report.skipped.some(t => normalizeName(t.name) === targetComputationNormalized);
286
194
 
287
- if (isRunnable || needsReRun || hasFailedDep) {
288
- runnableDates.push(dateStr);
289
- } else if (isImpossible) {
290
- skippedDates.push({ date: dateStr, reason: report.impossible.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Impossible' });
195
+ // For force runs: treat skipped computations (already stored with valid hash) as runnable
196
+ // They will overwrite with the same result, which is fine for testing
197
+ // Only mark as impossible if root data or dependencies don't exist at all
198
+ if (isImpossible) {
199
+ skippedDates.push({ date, reason: report.impossible.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Impossible' });
200
+ } else if (isRunnable || needsReRun || hasFailedDep || isSkipped) {
201
+ // Runnable, needs re-run, has failed deps (but not impossible), or skipped (already stored)
202
+ // All of these are runnable for force runs - will overwrite existing results if needed
203
+ runnableDates.push(date);
291
204
  } else if (isBlocked) {
292
- skippedDates.push({ date: dateStr, reason: report.blocked.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Blocked' });
205
+ // Blocked usually means waiting for data - for force runs, if root data exists, still runnable
206
+ // Only skip if truly impossible (handled above)
207
+ runnableDates.push(date);
293
208
  } else {
294
- skippedDates.push({ date: dateStr, reason: 'Not runnable (unknown reason)' });
209
+ // Unknown state - for force runs, if root data exists (which it does, since result is not null), treat as runnable
210
+ logger.log('WARN', `[ForceRun] Computation ${computationName} in unknown state for ${date}, treating as runnable`);
211
+ runnableDates.push(date);
295
212
  }
296
213
  }
297
214
 
298
215
  logger.log('INFO', `[ForceRun] ✅ Found ${runnableDates.length} runnable dates out of ${candidateDates.length} candidates`);
299
- if (skippedDates.length > 0) {
300
- logger.log('INFO', `[ForceRun] ⏭️ Skipped ${skippedDates.length} dates: ${skippedDates.slice(0, 5).map(s => `${s.date} (${s.reason})`).join(', ')}${skippedDates.length > 5 ? '...' : ''}`);
301
- }
302
-
216
+
303
217
  if (runnableDates.length === 0) {
304
218
  return {
305
219
  status: 'NO_RUNNABLE_DATES',
@@ -313,48 +227,19 @@ async function handleForceRun(config, dependencies, computationManifest, reqBody
313
227
 
314
228
  logger.log('WARN', `[ForceRun] 🚨 MANUALLY Triggering ${computationName} for ${runnableDates.length} runnable dates. Pass: ${manifestItem.pass}`);
315
229
 
316
- // 4. Construct Tasks (only for runnable dates)
317
- const dispatchId = crypto.randomUUID();
318
- const tasks = runnableDates.map(date => {
319
- const traceId = crypto.randomBytes(16).toString('hex');
320
- const spanId = crypto.randomBytes(8).toString('hex');
321
- return {
322
- action: 'RUN_COMPUTATION_DATE',
323
- computation: manifestItem.name,
324
- date: date,
325
- pass: manifestItem.pass || "1",
326
- dispatchId: dispatchId,
327
- triggerReason: 'MANUAL_FORCE_API',
328
- resources: reqBody.resources || 'standard',
329
- // Trace context allows you to find these specific runs in Cloud Trace
330
- traceContext: { traceId, spanId, sampled: true }
331
- };
332
- });
333
-
334
- // 4. Batch Publish (Chunked to stay under Pub/Sub limits)
335
- const CHUNK_SIZE = 250; // Safe batch size
336
230
  const topic = (reqBody.resources === 'high-mem')
337
231
  ? (config.computationTopicHighMem || 'computation-tasks-highmem')
338
232
  : (config.computationTopicStandard || 'computation-tasks');
339
-
340
- let dispatchedCount = 0;
341
- const chunks = [];
342
- for (let i = 0; i < tasks.length; i += CHUNK_SIZE) {
343
- chunks.push(tasks.slice(i, i + CHUNK_SIZE));
344
- }
345
233
 
346
- // Publish chunks sequentially to avoid memory spikes
347
- for (const chunk of chunks) {
348
- await pubsubUtils.batchPublishTasks(dependencies, {
349
- topicName: topic,
350
- tasks: chunk,
351
- taskType: 'manual-force-run'
352
- });
353
- dispatchedCount += chunk.length;
354
- }
234
+ const dispatchId = crypto.randomUUID();
235
+ const tasks = runnableDates.map(date =>
236
+ createTaskPayload(manifestItem, date, manifestItem.pass || "1", dispatchId, reqBody.resources, 'MANUAL_FORCE_API')
237
+ );
238
+
239
+ const dispatchedCount = await publishTaskBatch(dependencies, config, tasks, manifestItem.pass || "1", topic);
355
240
 
356
241
  return {
357
- status: 'FORCED',
242
+ status: 'FORCED',
358
243
  computation: computationName,
359
244
  mode: dateInput ? 'SINGLE_DATE' : 'ALL_DATES',
360
245
  datesChecked: candidateDates.length,
@@ -365,89 +250,34 @@ async function handleForceRun(config, dependencies, computationManifest, reqBody
365
250
  };
366
251
  }
367
252
 
368
- // =============================================================================
369
- // NEW: Final Sweep Reporting Handler
370
- // =============================================================================
371
- async function handleFinalSweepReporting(config, dependencies, computationManifest, reqBody) {
372
- const { logger } = dependencies;
373
- const passToRun = String(reqBody.pass || "1");
374
- // Target date is required for detailed forensics
375
- const date = reqBody.date || new Date().toISOString().slice(0, 10);
376
-
377
- logger.log('INFO', `[Dispatcher] 📝 Triggering Final Sweep Forensics for Pass ${passToRun} on ${date}...`);
378
-
379
- try {
380
- const result = await runFinalSweepCheck(config, dependencies, date, passToRun, computationManifest);
381
- return {
382
- status: 'COMPLETED',
383
- date: date,
384
- pass: passToRun,
385
- issuesFound: result.issuesCount
386
- };
387
- } catch (e) {
388
- logger.log('ERROR', `[Dispatcher] Forensics failed: ${e.message}`);
389
- return { status: 'ERROR', error: e.message };
390
- }
391
- }
392
-
393
- // =============================================================================
394
- // LOGIC: Verify Pass Completion
395
- // =============================================================================
396
253
  async function handlePassVerification(config, dependencies, computationManifest, reqBody) {
397
254
  const { logger } = dependencies;
398
- const passToRun = String(reqBody.pass || "1");
399
- const dateLimitStr = reqBody.date || "2025-01-01";
255
+ const pass = String(reqBody.pass || "1");
256
+ const dateLimit = reqBody.date || "2025-01-01";
400
257
 
401
- logger.log('INFO', `[Verify] 🧹 Sweeping Pass ${passToRun} for unfinished work...`);
258
+ logger.log('INFO', `[Verify] 🧹 Sweeping Pass ${pass} for unfinished work...`);
402
259
 
403
- const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, false);
404
- const passes = groupByPass(computationManifest);
405
- const calcsInPass = passes[passToRun] || [];
260
+ const sessionDates = await getStableDateSession(config, dependencies, pass, dateLimit, false);
261
+ const passComputations = groupByPass(computationManifest)[pass] || [];
406
262
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
407
263
  const weightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
408
264
 
409
265
  const missingTasks = [];
410
266
 
411
267
  for (const date of sessionDates) {
412
- // [SCHEDULE CHECK] Filter tasks that are not scheduled for this date
413
- const scheduledComputations = calcsInPass.filter(c =>
414
- isComputationScheduled(date, c.schedule)
415
- );
416
-
417
- if (scheduledComputations.length === 0) continue;
418
-
419
- const [dailyStatus, availability] = await Promise.all([
420
- fetchComputationStatus(date, config, dependencies),
421
- checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
422
- ]);
423
-
424
- let prevDailyStatus = null;
425
- if (scheduledComputations.some(c => c.isHistorical)) {
426
- const prevD = new Date(date + 'T00:00:00Z');
427
- prevD.setUTCDate(prevD.getUTCDate() - 1);
428
- prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
429
- }
268
+ const scheduled = passComputations.filter(c => isComputationScheduled(date, c.schedule));
269
+ if (scheduled.length === 0) continue;
430
270
 
431
- const report = analyzeDateExecution(
432
- date,
433
- scheduledComputations, // Use filtered list
434
- availability ? availability.status : {},
435
- dailyStatus,
436
- manifestMap,
437
- prevDailyStatus
438
- );
439
-
440
- const pending = [...report.runnable, ...report.reRuns];
271
+ const result = await assessDateRunnability(date, scheduled, config, dependencies, manifestMap);
272
+ if (!result) continue;
273
+
274
+ const pending = [...result.report.runnable, ...result.report.reRuns];
441
275
 
442
276
  if (pending.length > 0) {
443
277
  const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
444
278
  const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
445
279
 
446
- missingTasks.push({
447
- date: date,
448
- taskCount: pending.length,
449
- eta: eta
450
- });
280
+ missingTasks.push({ date, taskCount: pending.length, eta });
451
281
  }
452
282
  }
453
283
 
@@ -455,487 +285,262 @@ async function handlePassVerification(config, dependencies, computationManifest,
455
285
  return { missingTasks };
456
286
  }
457
287
 
458
- // =============================================================================
459
- // LOGIC: Sweep Dispatch (Forced High-Mem)
460
- // =============================================================================
461
288
  async function handleSweepDispatch(config, dependencies, computationManifest, reqBody) {
462
289
  const { logger, db } = dependencies;
463
- const pubsubUtils = new PubSubUtils(dependencies);
464
- const passToRun = String(reqBody.pass || "1");
290
+ const pass = String(reqBody.pass || "1");
465
291
  const date = reqBody.date;
466
-
467
292
  if (!date) throw new Error('Sweep dispatch requires date');
468
293
 
469
- const passes = groupByPass(computationManifest);
470
- const calcsInPass = passes[passToRun] || [];
471
-
472
- // [SCHEDULE CHECK] Filter tasks that are not scheduled for this date
473
- const scheduledComputations = calcsInPass.filter(c =>
474
- isComputationScheduled(date, c.schedule)
475
- );
476
-
477
- if (scheduledComputations.length === 0) {
478
- logger.log('INFO', `[Sweep] ${date} has no scheduled tasks for Pass ${passToRun}. Ignoring.`);
479
- return { dispatched: 0 };
480
- }
481
-
482
- // 1. Analyze specific date
483
- const [dailyStatus, availability] = await Promise.all([
484
- fetchComputationStatus(date, config, dependencies),
485
- checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
486
- ]);
487
-
488
- let prevDailyStatus = null;
489
- if (scheduledComputations.some(c => c.isHistorical)) {
490
- const prevD = new Date(date + 'T00:00:00Z');
491
- prevD.setUTCDate(prevD.getUTCDate() - 1);
492
- prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
493
- }
494
-
495
294
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
496
- const report = analyzeDateExecution(
497
- date,
498
- scheduledComputations, // Use filtered list
499
- availability ? availability.status : {},
500
- dailyStatus,
501
- manifestMap,
502
- prevDailyStatus
503
- );
504
- const pending = [...report.runnable, ...report.reRuns];
295
+ const calculations = groupByPass(computationManifest)[pass] || [];
296
+ const scheduled = calculations.filter(c => isComputationScheduled(date, c.schedule));
505
297
 
506
- if (pending.length === 0) {
507
- logger.log('INFO', `[Sweep] ${date} is clean. No dispatch.`);
508
- return { dispatched: 0 };
509
- }
298
+ if (!scheduled.length) return { dispatched: 0 };
510
299
 
300
+ const result = await assessDateRunnability(date, scheduled, config, dependencies, manifestMap);
301
+ if (!result) return { dispatched: 0 };
302
+
303
+ const pending = [...result.report.runnable, ...result.report.reRuns];
511
304
  const validTasks = [];
305
+
512
306
  for (const task of pending) {
513
- const name = normalizeName(task.name);
514
- const ledgerPath = `computation_audit_ledger/${date}/passes/${passToRun}/tasks/${name}`;
307
+ const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${normalizeName(task.name)}`;
515
308
  const doc = await db.doc(ledgerPath).get();
516
309
 
517
310
  if (doc.exists) {
518
311
  const data = doc.data();
312
+ const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
519
313
 
520
- // 1. ACTIVE CHECK: Don't double-dispatch if already running... UNLESS IT'S A ZOMBIE
521
- if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
314
+ if (isActive) {
522
315
  const lastActivity = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
523
-
524
- // If it's been silent for > 15 mins, it's a Zombie. Kill it and Re-run.
525
- if ((Date.now() - lastActivity) > STALE_LOCK_THRESHOLD_MS) {
526
- logger.log('WARN', `[Sweep] 🧟 Found ZOMBIE lock for ${name}. Breaking lock and re-running.`);
527
- // Don't continue; let it fall through to dispatch
528
- } else {
529
- logger.log('INFO', `[Sweep] ⏳ Skipping ${name} - Valid IN_PROGRESS.`);
530
- continue;
531
- }
316
+ if ((Date.now() - lastActivity) < STALE_LOCK_THRESHOLD_MS) continue;
317
+ logger.log('WARN', `[Sweep] 🧟 Breaking ZOMBIE lock for ${task.name}`);
318
+ }
319
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(data.error?.stage) && data.hash === task.hash) {
320
+ continue;
532
321
  }
533
-
534
- // 2. COMPLETION CHECK (GHOST STATE FIX)
535
- // We REMOVED the check that skips if (status === 'COMPLETED' && hash === task.hash).
536
- // If we are here, 'analyzeDateExecution' (The Brain) decided this task is NOT done
537
- // (likely due to a missing or outdated entry in computation_status).
538
- // Even if the Ledger (The Log) says it finished, the system state is inconsistent.
539
- // We MUST re-run to repair the Status Index.
540
-
541
- const stage = data.error?.stage;
542
-
543
- // 3. DETERMINISTIC FAILURE CHECK
544
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
545
- // If hash matches, it's the exact same code that failed before. Don't retry in loop.
546
- if (data.hash === task.hash) {
547
- logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
548
- continue;
549
- }
550
- logger.log('INFO', `[Sweep] 🔄 Code Updated for ${name}. Retrying sweep despite previous ${stage}.`);
551
- }
552
-
553
- // 4. DEAD END CHECK (High Mem)
554
- if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
555
- // If code hasn't changed, don't hammer it.
556
- if (data.hash === task.hash) {
557
- logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
558
- continue;
559
- }
560
- }
561
322
  }
562
323
  validTasks.push(task);
563
324
  }
564
325
 
565
- if (validTasks.length === 0) {
566
- logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
567
- return { dispatched: 0 };
568
- }
569
-
570
- // 2. FORCE High Mem & INJECT TRACE
571
- const currentDispatchId = crypto.randomUUID();
572
-
573
- const tasksPayload = validTasks.map(t => {
574
- const traceId = crypto.randomBytes(16).toString('hex');
575
- const spanId = crypto.randomBytes(8).toString('hex');
576
-
577
- return {
578
- ...t,
579
- action: 'RUN_COMPUTATION_DATE',
580
- computation: t.name,
581
- date: date,
582
- pass: passToRun,
583
- dispatchId: currentDispatchId,
584
- triggerReason: 'SWEEP_RECOVERY',
585
- resources: 'high-mem', // FORCE
586
- traceContext: { traceId, spanId, sampled: true }
587
- };
588
- });
589
-
590
- const taskNames = tasksPayload.map(t => t.computation || t.name).join(', ');
591
- logger.log('WARN', `[Sweep] 🧹 Forcing ${tasksPayload.length} tasks to HIGH-MEM for ${date}.`, {
592
- date: date,
593
- pass: passToRun,
594
- tasks: tasksPayload.map(t => ({ name: t.computation || t.name, reason: 'sweep' })),
595
- topic: config.computationTopicHighMem || 'computation-tasks-highmem'
596
- });
597
-
598
- await pubsubUtils.batchPublishTasks(dependencies, {
599
- topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
600
- tasks: tasksPayload,
601
- taskType: `pass-${passToRun}-sweep`
602
- });
326
+ const dispatchId = crypto.randomUUID();
327
+ const tasksPayload = validTasks.map(t =>
328
+ createTaskPayload(t, date, pass, dispatchId, 'high-mem', 'SWEEP_RECOVERY')
329
+ );
603
330
 
604
- return { dispatched: tasksPayload.length };
331
+ return { dispatched: await publishTaskBatch(dependencies, config, tasksPayload, pass, config.computationTopicHighMem) };
605
332
  }
606
333
 
607
- // =============================================================================
608
- // LOGIC: Standard Dispatch (Fast-Forward Enabled)
609
- // =============================================================================
610
334
  async function handleStandardDispatch(config, dependencies, computationManifest, reqBody) {
611
335
  const { logger, db } = dependencies;
612
- const pubsubUtils = new PubSubUtils(dependencies);
613
-
614
- const passToRun = String(reqBody.pass || "1");
336
+ const pass = String(reqBody.pass || "1");
337
+ const dateLimit = reqBody.date || "2025-01-01";
615
338
  const targetCursorN = parseInt(reqBody.cursorIndex || 1);
616
- const dateLimitStr = reqBody.date || "2025-01-01";
617
- const forceRebuild = reqBody.forceRebuild === true;
618
339
 
619
340
  const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
620
- const passes = groupByPass(computationManifest);
621
- const calcsInThisPass = passes[passToRun] || [];
341
+ const passComputations = groupByPass(computationManifest)[pass] || [];
622
342
  const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
343
+ const sessionDates = await getStableDateSession(config, dependencies, pass, dateLimit, reqBody.forceRebuild);
623
344
 
624
- if (!calcsInThisPass.length) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
625
-
626
- const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild);
627
- if (!sessionDates || sessionDates.length === 0) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
628
-
629
- // --- Fast-Forward Loop Configuration ---
630
- // Scans up to 50 dates or 40 seconds to find work, avoiding empty "wait loops"
631
- const MAX_SCAN_DEPTH = 50; // This is actually 50 + Today, so 51.
632
- const TIME_LIMIT_MS = 40000;
633
- const startTime = Date.now();
345
+ if (!passComputations.length || !sessionDates || targetCursorN > sessionDates.length) {
346
+ return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
347
+ }
634
348
 
349
+ const MAX_SCAN = 50;
350
+ const TIME_LIMIT = 40000;
351
+ const startT = Date.now();
352
+
635
353
  let currentCursor = targetCursorN;
636
- let selectedTasks = [];
637
- let selectedDate = null;
638
- let datesScanned = 0;
354
+ let tasksToDispatch = [];
355
+ let processedDate = null;
356
+ let scanned = 0;
639
357
 
640
- // Loop until work is found, end is reached, or safety limits hit
641
358
  while (currentCursor <= sessionDates.length) {
642
- datesScanned++;
643
- selectedDate = sessionDates[currentCursor - 1]; // 0-indexed array
359
+ scanned++;
360
+ processedDate = sessionDates[currentCursor - 1];
644
361
 
645
- // 1. Safety Break (Prevent Timeout)
646
- if ((Date.now() - startTime) > TIME_LIMIT_MS || datesScanned > MAX_SCAN_DEPTH) {
647
- logger.log('INFO', `[Dispatcher] ⏩ Fast-forward paused at ${selectedDate} after scanning ${datesScanned} dates.`);
362
+ if ((Date.now() - startT) > TIME_LIMIT || scanned > MAX_SCAN) {
363
+ logger.log('INFO', `[Dispatcher] ⏩ Fast-forward paused at ${processedDate} after scanning ${scanned} dates.`);
648
364
  break;
649
365
  }
650
366
 
651
- // 2. [SCHEDULE CHECK] Filter computations scheduled for this specific date
652
- const scheduledComputations = calcsInThisPass.filter(c =>
653
- isComputationScheduled(selectedDate, c.schedule)
654
- );
655
-
656
- // Optimization: If nothing is scheduled for today, skip expensive DB checks
657
- if (scheduledComputations.length === 0) {
658
- // DEBUG: Log when schedule filtering removes all tasks
659
- if (calcsInThisPass.length > 0) {
660
- logger.log('TRACE', `[Dispatcher] Date ${selectedDate}: ${calcsInThisPass.length} pass computations, but 0 scheduled for this date. Skipping.`);
661
- }
662
- currentCursor++;
367
+ const scheduled = passComputations.filter(c => isComputationScheduled(processedDate, c.schedule));
368
+ if (scheduled.length === 0) {
369
+ currentCursor++;
663
370
  continue;
664
371
  }
665
372
 
666
- // 3. Analyze Date
667
- const earliestDates = await getEarliestDataDates(config, dependencies);
668
- let prevDailyStatusPromise = Promise.resolve(null);
669
- if (scheduledComputations.some(c => c.isHistorical)) {
670
- const prevD = new Date(selectedDate + 'T00:00:00Z');
671
- prevD.setUTCDate(prevD.getUTCDate() - 1);
672
- if (prevD >= earliestDates.absoluteEarliest) {
673
- prevDailyStatusPromise = fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
674
- }
675
- }
373
+ const result = await assessDateRunnability(processedDate, scheduled, config, dependencies, manifestMap);
374
+ if (result && (result.report.runnable.length > 0 || result.report.reRuns.length > 0)) {
375
+ let candidates = await attemptSimHashResolution(dependencies, processedDate, [...result.report.runnable, ...result.report.reRuns], result.dailyStatus, manifestMap);
376
+ const { standard, highMem } = await resolveRoutes(db, processedDate, pass, candidates, logger);
377
+ tasksToDispatch = [...standard, ...highMem];
676
378
 
677
- const [dailyStatus, prevDailyStatus, availability] = await Promise.all([
678
- fetchComputationStatus(selectedDate, config, dependencies),
679
- prevDailyStatusPromise,
680
- checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
681
- ]);
682
-
683
- // DEBUG: Log availability check
684
- if (!availability || !availability.status) {
685
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: Availability check failed or returned null. Skipping analysis.`);
379
+ if (tasksToDispatch.length > 0) break;
686
380
  }
687
-
688
- if (availability && availability.status) {
689
- const report = analyzeDateExecution(
690
- selectedDate,
691
- scheduledComputations, // Use filtered list
692
- availability.status,
693
- dailyStatus,
694
- manifestMap,
695
- prevDailyStatus
696
- );
697
- let rawTasks = [...report.runnable, ...report.reRuns];
698
-
699
- // DEBUG: Log analysis results
700
- if (rawTasks.length === 0 && (report.runnable.length > 0 || report.reRuns.length > 0)) {
701
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: analyzeDateExecution found ${report.runnable.length} runnable + ${report.reRuns.length} reRuns, but rawTasks is empty!`);
702
- }
703
- if (rawTasks.length > 0) {
704
- logger.log('TRACE', `[Dispatcher] Date ${selectedDate}: analyzeDateExecution found ${report.runnable.length} runnable, ${report.reRuns.length} reRuns. Total: ${rawTasks.length}`);
705
- }
706
-
707
- if (rawTasks.length > 0) {
708
- rawTasks = await attemptSimHashResolution(dependencies, selectedDate, rawTasks, dailyStatus, manifestMap);
709
- const activeTasks = await filterActiveTasks(db, selectedDate, passToRun, rawTasks, logger);
710
-
711
- if (activeTasks.length > 0) {
712
- // DEBUG: Log what we're about to route
713
- logger.log('INFO', `[Dispatcher] 🔍 Date ${selectedDate}: ${rawTasks.length} raw tasks → ${activeTasks.length} after filtering. Routing...`);
714
- const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, activeTasks, logger);
715
- selectedTasks = [...standard, ...highMem];
716
-
717
- // DEBUG: Log routing results
718
- if (selectedTasks.length === 0 && activeTasks.length > 0) {
719
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: ${activeTasks.length} tasks filtered out by splitRoutes! Tasks: ${activeTasks.map(t => t.name).join(', ')}`);
720
- }
721
-
722
- if (selectedTasks.length > 0) {
723
- // Found work! Break loop to dispatch.
724
- break;
725
- }
726
- } else if (rawTasks.length > 0) {
727
- // DEBUG: Log if filterActiveTasks removed all tasks
728
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: ${rawTasks.length} raw tasks all filtered out by filterActiveTasks! Tasks: ${rawTasks.map(t => t.name).join(', ')}`);
729
- }
730
- }
731
- }
732
-
733
- // No work found for this date. Fast-forward to next.
734
381
  currentCursor++;
735
382
  }
736
383
 
737
- // --- Result Handling ---
738
-
739
- // Case 1: Satiated (Scanned to end of session with no work)
740
- if (currentCursor > sessionDates.length && selectedTasks.length === 0) {
384
+ if (currentCursor > sessionDates.length && tasksToDispatch.length === 0) {
741
385
  return {
742
386
  status: 'CONTINUE_PASS',
743
- dateProcessed: selectedDate,
387
+ dateProcessed: processedDate,
744
388
  dispatched: 0,
745
389
  n_cursor_ignored: false,
746
390
  remainingDates: 0,
747
- nextCursor: currentCursor // Matches length + 1
391
+ nextCursor: currentCursor
748
392
  };
749
393
  }
750
394
 
751
- // Case 2: Paused by Limit (No work found yet, but more dates remain)
752
- if (selectedTasks.length === 0) {
395
+ if (tasksToDispatch.length === 0) {
753
396
  return {
754
397
  status: 'CONTINUE_PASS',
755
- dateProcessed: selectedDate,
398
+ dateProcessed: processedDate,
756
399
  dispatched: 0,
757
400
  n_cursor_ignored: false,
758
401
  remainingDates: sessionDates.length - currentCursor + 1,
759
- nextCursor: currentCursor // Resume from here
402
+ nextCursor: currentCursor
760
403
  };
761
404
  }
762
405
 
763
- // Case 3: Work Found (Dispatching)
764
- const totalweight = selectedTasks.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
765
- const currentDispatchId = crypto.randomUUID();
766
- const etaSeconds = Math.max(20, Math.ceil(totalweight * BASE_SECONDS_PER_WEIGHT_UNIT));
406
+ const totalWeight = tasksToDispatch.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
407
+ const etaSeconds = Math.max(20, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
767
408
 
768
- if (datesScanned > 1) {
769
- logger.log('INFO', `[Dispatcher] ⏩ Fast-forwarded ${datesScanned - 1} empty dates. Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`);
409
+ if (scanned > 1) {
410
+ logger.log('INFO', `[Dispatcher] ⏩ Fast-forwarded ${scanned - 1} empty dates. Dispatching ${tasksToDispatch.length} tasks for ${processedDate}.`);
770
411
  } else {
771
- logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`);
412
+ logger.log('INFO', `[Dispatcher] ✅ Dispatching ${tasksToDispatch.length} tasks for ${processedDate}.`);
772
413
  }
773
414
 
774
- const mapToTaskPayload = (t) => {
775
- const traceId = crypto.randomBytes(16).toString('hex');
776
- const spanId = crypto.randomBytes(8).toString('hex');
777
- return {
778
- ...t,
779
- action: 'RUN_COMPUTATION_DATE',
780
- computation: t.name,
781
- date: selectedDate,
782
- pass: passToRun,
783
- dispatchId: currentDispatchId,
784
- triggerReason: t.reason,
785
- resources: t.resources || 'standard',
786
- traceContext: {
787
- traceId: traceId,
788
- spanId: spanId,
789
- sampled: true
790
- }
791
- };
792
- };
415
+ const dispatchId = crypto.randomUUID();
416
+ const standardPayload = tasksToDispatch.filter(t => t.resources !== 'high-mem').map(t => createTaskPayload(t, processedDate, pass, dispatchId, 'standard', t.reason));
417
+ const highMemPayload = tasksToDispatch.filter(t => t.resources === 'high-mem').map(t => createTaskPayload(t, processedDate, pass, dispatchId, 'high-mem', t.reason));
793
418
 
794
- const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(mapToTaskPayload);
795
- const highMemTasks = selectedTasks.filter(t => t.resources === 'high-mem').map(mapToTaskPayload);
796
-
797
- const pubPromises = [];
798
- if (standardTasks.length > 0) {
799
- const taskNames = standardTasks.map(t => t.computation || t.name).join(', ');
800
- logger.log('INFO', `[Dispatcher] 📤 Dispatching ${standardTasks.length} standard tasks: ${taskNames}`, {
801
- date: selectedDate,
802
- pass: passToRun,
803
- tasks: standardTasks.map(t => ({ name: t.computation || t.name, reason: t.triggerReason || 'new' })),
804
- topic: config.computationTopicStandard || 'computation-tasks'
805
- });
806
- pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
807
- topicName: config.computationTopicStandard || 'computation-tasks',
808
- tasks: standardTasks,
809
- taskType: `pass-${passToRun}-std`
810
- }));
811
- }
812
- if (highMemTasks.length > 0) {
813
- const taskNames = highMemTasks.map(t => t.computation || t.name).join(', ');
814
- logger.log('INFO', `[Dispatcher] 📤 Dispatching ${highMemTasks.length} high-memory tasks: ${taskNames}`, {
815
- date: selectedDate,
816
- pass: passToRun,
817
- tasks: highMemTasks.map(t => ({ name: t.computation || t.name, reason: t.triggerReason || 'retry' })),
818
- topic: config.computationTopicHighMem || 'computation-tasks-highmem'
819
- });
820
- pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
821
- topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
822
- tasks: highMemTasks,
823
- taskType: `pass-${passToRun}-high`
824
- }));
825
- }
826
- await Promise.all(pubPromises);
419
+ await Promise.all([
420
+ publishTaskBatch(dependencies, config, standardPayload, pass),
421
+ publishTaskBatch(dependencies, config, highMemPayload, pass)
422
+ ]);
827
423
 
828
424
  return {
829
425
  status: 'CONTINUE_PASS',
830
- dateProcessed: selectedDate,
831
- dispatched: selectedTasks.length,
426
+ dateProcessed: processedDate,
427
+ dispatched: tasksToDispatch.length,
832
428
  n_cursor_ignored: false,
833
429
  etaSeconds: etaSeconds,
834
430
  remainingDates: sessionDates.length - targetCursorN,
835
- nextCursor: currentCursor + 1 // Start next scan AFTER this date
431
+ nextCursor: currentCursor + 1
836
432
  };
837
433
  }
838
434
 
839
435
  // =============================================================================
840
- // HELPER: Route Splitting (One-Shot Enforcement)
436
+ // LOGIC: Resolution & Routing
841
437
  // =============================================================================
842
- // [UPDATED] Route Splitting with Version-Aware Dead Lettering
843
- async function splitRoutes(db, date, pass, tasks, logger) {
438
+
439
+ async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
440
+ const { db, logger } = dependencies;
441
+ const resolved = [], remaining = [];
442
+ const hashCache = new Map();
443
+
444
+ for (const task of tasks) {
445
+ const status = dailyStatus ? dailyStatus[task.name] : null;
446
+ const manifest = manifestMap.get(normalizeName(task.name));
447
+
448
+ if (status?.simHash && manifest) {
449
+ let knownSimHash = hashCache.get(manifest.hash);
450
+ if (!knownSimHash) {
451
+ const doc = await db.collection('system_simhash_registry').doc(manifest.hash).get();
452
+ if (doc.exists) {
453
+ knownSimHash = doc.data().simHash;
454
+ hashCache.set(manifest.hash, knownSimHash);
455
+ }
456
+ }
457
+ if (knownSimHash === status.simHash) {
458
+ resolved.push({ name: task.name, hash: manifest.hash, simHash: knownSimHash, prevStatus: status });
459
+ continue;
460
+ }
461
+ }
462
+ remaining.push(task);
463
+ }
464
+
465
+ if (resolved.length) {
466
+ const updates = {};
467
+ resolved.forEach(t => updates[t.name] = { ...t.prevStatus, hash: t.hash, simHash: t.simHash, reason: 'SimHash Auto-Resolve', lastUpdated: new Date().toISOString() });
468
+ await db.collection('computation_status').doc(date).set(updates, { merge: true });
469
+ logger.log('INFO', `[SimHash] ⏩ Resolved ${resolved.length} tasks for ${date}.`);
470
+ }
471
+ return remaining;
472
+ }
473
+
474
+ async function resolveRoutes(db, date, pass, tasks, logger) {
844
475
  const standard = [];
845
476
  const highMem = [];
846
477
 
847
- for (const task of tasks) {
478
+ const checks = tasks.map(async (task) => {
848
479
  const name = normalizeName(task.name);
849
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
850
- const doc = await db.doc(ledgerPath).get();
480
+ const doc = await db.doc(`computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`).get();
851
481
 
852
- if (!doc.exists) {
853
- // No ledger entry - trust analyzeDateExecution, dispatch as standard
854
- standard.push(task);
855
- continue;
856
- }
482
+ if (!doc.exists) return { task, type: 'std' };
857
483
 
858
484
  const data = doc.data();
859
485
 
860
- // CRITICAL FIX: If analyzeDateExecution says this task should run, we MUST trust it.
861
- // The ledger might say COMPLETED, but if computation_status is missing/outdated,
862
- // we need to re-run to repair the state. Only skip if actively running.
863
- // Note: filterActiveTasks already filtered out non-stale PENDING/IN_PROGRESS,
864
- // but we double-check here in case of race conditions.
865
486
  if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
866
- // Check if it's stale (should have been caught by filterActiveTasks, but double-check)
867
- const lastActivityTime = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
868
-
869
- if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
870
- // Stale lock - break it and continue
871
- logger.log('WARN', `[Dispatcher] 🧟 splitRoutes: Breaking stale lock for ${name}.`);
872
- // Fall through to handle as if no active lock
873
- } else {
874
- // Valid active lock - skip (shouldn't happen if filterActiveTasks worked correctly)
875
- logger.log('TRACE', `[Dispatcher] splitRoutes: Skipping ${name} - Valid IN_PROGRESS (should have been filtered earlier).`);
876
- continue;
877
- }
487
+ const lastActive = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
488
+ if ((Date.now() - lastActive) < STALE_LOCK_THRESHOLD_MS) return null;
489
+ logger.log('WARN', `[Dispatcher] 🧟 Breaking stale lock for ${name}`);
878
490
  }
879
-
491
+
880
492
  if (data.status === 'FAILED') {
881
493
  const stage = data.error?.stage;
494
+ const isCodeChanged = data.hash !== task.hash;
882
495
 
883
- // 1. DETERMINISTIC FAILURES (Never Retry UNLESS Code Updated)
884
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
885
- // [FIX] Check if the hash matches. If code changed, we MUST retry.
886
- if (data.hash === task.hash) {
887
- logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
888
- continue;
889
- }
890
-
891
- // If hashes differ, we reset to Standard execution to give the new code a chance
892
- logger.log('INFO', `[Dispatcher] 🔄 Code Updated for ${name}. Retrying despite previous ${stage}.`);
893
- standard.push({ ...task, reason: 'Retry: Code Version Changed' });
894
- continue;
496
+ if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE'].includes(stage)) {
497
+ if (!isCodeChanged) return null;
498
+ return { task: { ...task, reason: 'Retry: Code Version Changed' }, type: 'std' };
895
499
  }
896
500
 
897
- // 2. HIGH MEMORY FAILURE HANDLING (The New Logic)
898
501
  if (data.resourceTier === 'high-mem') {
899
- const failedHash = data.hash || data.composition?.code; // Support legacy or new structure
900
- const currentHash = task.hash;
901
-
902
- // A. EXACT CODE MATCH: It failed High-Mem with THIS code.
903
- if (failedHash === currentHash) {
904
- logger.log('WARN', `[Dispatcher] 💀 Dead End: ${name} failed High-Mem on this version (${currentHash.slice(0,6)}). Waiting for code fix.`);
905
- continue; // STOP. Do not retry.
906
- }
907
-
908
- // B. CODE MISMATCH: The code has changed since the High-Mem failure.
909
- // We reset it to 'standard' to see if the fix optimized memory usage.
910
- else {
911
- logger.log('INFO', `[Dispatcher] 🔄 Code Updated for ${name}. Resetting High-Mem failure to Standard retry.`);
912
- standard.push({
913
- ...task,
914
- reason: 'Retry: Code Version Changed'
915
- });
916
- continue;
917
- }
502
+ if (!isCodeChanged) return null;
503
+ return { task: { ...task, reason: 'Retry: Code Changed (HighMem Reset)' }, type: 'std' };
918
504
  }
919
505
 
920
- // 3. STANDARD FAILURE -> PROMOTE TO HIGH MEM
921
- highMem.push({
922
- ...task,
923
- resources: 'high-mem',
924
- reason: `Retry: ${data.error?.message || 'Standard Failure'}`
925
- });
926
-
927
- } else {
928
- // Status is likely COMPLETED or some other state.
929
- // CRITICAL: If analyzeDateExecution says this should run, we MUST trust it.
930
- // The ledger might show COMPLETED, but if computation_status is missing/outdated,
931
- // we need to re-run to repair the state. This is the "ghost state fix" logic.
932
- // Trust the Brain (analyzeDateExecution) over the Log (ledger).
933
- logger.log('INFO', `[Dispatcher] 🔄 splitRoutes: ${name} has ledger status '${data.status}', but analyzeDateExecution says it should run. Trusting analysis and dispatching.`);
934
- standard.push(task);
506
+ return { task: { ...task, reason: `Retry: ${data.error?.message}`, resources: 'high-mem' }, type: 'high' };
935
507
  }
936
- }
937
508
 
509
+ return { task, type: 'std' };
510
+ });
511
+
512
+ const results = (await Promise.all(checks)).filter(r => r !== null);
513
+ results.forEach(r => r.type === 'high' ? highMem.push(r.task) : standard.push(r.task));
514
+
938
515
  return { standard, highMem };
939
516
  }
940
517
 
518
+ // =============================================================================
519
+ // MAIN ENTRY
520
+ // =============================================================================
521
+
522
+ async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
523
+ switch (reqBody.action) {
524
+ case 'VERIFY': return handlePassVerification(config, dependencies, computationManifest, reqBody);
525
+ case 'SWEEP': return handleSweepDispatch(config, dependencies, computationManifest, reqBody);
526
+ case 'REPORT': return handleFinalSweepReporting(config, dependencies, computationManifest, reqBody);
527
+ case 'FORCE_RUN': return handleForceRun(config, dependencies, computationManifest, reqBody);
528
+ // 3. REGISTER SNAPSHOT ACTION
529
+ case 'SNAPSHOT': return handleSnapshot(config, dependencies, reqBody);
530
+ default: return handleStandardDispatch(config, dependencies, computationManifest, reqBody);
531
+ }
532
+ }
533
+
534
+ async function handleFinalSweepReporting(config, dependencies, computationManifest, reqBody) {
535
+ const { logger } = dependencies;
536
+ const date = reqBody.date || new Date().toISOString().slice(0, 10);
537
+ try {
538
+ const res = await runFinalSweepCheck(config, dependencies, date, String(reqBody.pass || "1"), computationManifest);
539
+ return { status: 'COMPLETED', date, issues: res.issuesCount };
540
+ } catch (e) {
541
+ logger.log('ERROR', `[Dispatcher] Report failed: ${e.message}`);
542
+ return { status: 'ERROR', error: e.message };
543
+ }
544
+ }
545
+
941
546
  module.exports = { dispatchComputationPass };