bulltrackers-module 1.0.732 → 1.0.733

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/functions/orchestrator/index.js +19 -17
  2. package/index.js +8 -29
  3. package/package.json +1 -1
  4. package/functions/computation-system/WorkflowOrchestrator.js +0 -213
  5. package/functions/computation-system/config/monitoring_config.js +0 -31
  6. package/functions/computation-system/config/validation_overrides.js +0 -10
  7. package/functions/computation-system/context/ContextFactory.js +0 -143
  8. package/functions/computation-system/context/ManifestBuilder.js +0 -379
  9. package/functions/computation-system/data/AvailabilityChecker.js +0 -236
  10. package/functions/computation-system/data/CachedDataLoader.js +0 -325
  11. package/functions/computation-system/data/DependencyFetcher.js +0 -455
  12. package/functions/computation-system/executors/MetaExecutor.js +0 -279
  13. package/functions/computation-system/executors/PriceBatchExecutor.js +0 -108
  14. package/functions/computation-system/executors/StandardExecutor.js +0 -465
  15. package/functions/computation-system/helpers/computation_dispatcher.js +0 -750
  16. package/functions/computation-system/helpers/computation_worker.js +0 -375
  17. package/functions/computation-system/helpers/monitor.js +0 -64
  18. package/functions/computation-system/helpers/on_demand_helpers.js +0 -154
  19. package/functions/computation-system/layers/extractors.js +0 -1097
  20. package/functions/computation-system/layers/index.js +0 -40
  21. package/functions/computation-system/layers/mathematics.js +0 -522
  22. package/functions/computation-system/layers/profiling.js +0 -537
  23. package/functions/computation-system/layers/validators.js +0 -170
  24. package/functions/computation-system/legacy/AvailabilityCheckerOld.js +0 -388
  25. package/functions/computation-system/legacy/CachedDataLoaderOld.js +0 -357
  26. package/functions/computation-system/legacy/DependencyFetcherOld.js +0 -478
  27. package/functions/computation-system/legacy/MetaExecutorold.js +0 -364
  28. package/functions/computation-system/legacy/StandardExecutorold.js +0 -476
  29. package/functions/computation-system/legacy/computation_dispatcherold.js +0 -944
  30. package/functions/computation-system/logger/logger.js +0 -297
  31. package/functions/computation-system/persistence/ContractValidator.js +0 -81
  32. package/functions/computation-system/persistence/FirestoreUtils.js +0 -56
  33. package/functions/computation-system/persistence/ResultCommitter.js +0 -283
  34. package/functions/computation-system/persistence/ResultsValidator.js +0 -130
  35. package/functions/computation-system/persistence/RunRecorder.js +0 -142
  36. package/functions/computation-system/persistence/StatusRepository.js +0 -52
  37. package/functions/computation-system/reporter_epoch.js +0 -6
  38. package/functions/computation-system/scripts/UpdateContracts.js +0 -128
  39. package/functions/computation-system/services/SnapshotService.js +0 -148
  40. package/functions/computation-system/simulation/Fabricator.js +0 -285
  41. package/functions/computation-system/simulation/SeededRandom.js +0 -41
  42. package/functions/computation-system/simulation/SimRunner.js +0 -51
  43. package/functions/computation-system/system_epoch.js +0 -2
  44. package/functions/computation-system/tools/BuildReporter.js +0 -531
  45. package/functions/computation-system/tools/ContractDiscoverer.js +0 -144
  46. package/functions/computation-system/tools/DeploymentValidator.js +0 -536
  47. package/functions/computation-system/tools/FinalSweepReporter.js +0 -322
  48. package/functions/computation-system/topology/HashManager.js +0 -55
  49. package/functions/computation-system/topology/ManifestLoader.js +0 -47
  50. package/functions/computation-system/utils/data_loader.js +0 -675
  51. package/functions/computation-system/utils/schema_capture.js +0 -121
  52. package/functions/computation-system/utils/utils.js +0 -188
@@ -1,944 +0,0 @@
1
- /**
2
- * OLD VERSION KEPT FOR REFERENCE, UNUSED IN PIPELINE
3
- * FILENAME: computation-system/helpers/computation_dispatcher.js
4
- * PURPOSE: Sequential Cursor-Based Dispatcher.
5
- * UPDATED: Implemented "Fast-Forward" Scanning Loop to skip empty dates efficiently.
6
- * UPDATED: Enforces Strict One-Shot Policy (Standard -> HighMem -> Dead Letter).
7
- * UPDATED: Generates Google Cloud Trace Context (traceId/spanId) for end-to-end monitoring.
8
- * UPDATED: Added Schedule Awareness (Daily, Weekly, Monthly) to filter tasks by date.
9
- *
10
- */
11
-
12
- const { getExpectedDateStrings, getEarliestDataDates, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
13
- const { groupByPass, analyzeDateExecution } = require('../WorkflowOrchestrator.js');
14
- const { PubSubUtils } = require('../../core/utils/pubsub_utils.js');
15
- const { fetchComputationStatus } = require('../persistence/StatusRepository.js');
16
- const { checkRootDataAvailability } = require('../data/AvailabilityChecker.js');
17
- const { runFinalSweepCheck } = require('../tools/FinalSweepReporter.js'); // [NEW]
18
- const crypto = require('crypto');
19
-
20
- const OOM_THRESHOLD_MB = 1500; // Unused
21
- const BASE_SECONDS_PER_WEIGHT_UNIT = 3;
22
- const SESSION_CACHE_DURATION_MS = 1000 * 60 * 30; // 30 Minutes
23
- const STALE_LOCK_THRESHOLD_MS = 1000 * 60 * 15;
24
-
25
- // =============================================================================
26
- // HELPER: Firestore Timestamp Conversion
27
- // =============================================================================
28
- /**
29
- * Converts a Firestore Timestamp or Date to milliseconds.
30
- * Firestore stores Date objects as Timestamp objects, which have a .toDate() method.
31
- * This function handles both cases correctly.
32
- * @param {any} field - Firestore Timestamp, Date object, or string
33
- * @returns {number} Milliseconds since epoch, or 0 if invalid
34
- */
35
- function getMillis(field) {
36
- if (!field) return 0;
37
- // Handle Firestore Timestamp (has .toDate() method)
38
- if (field.toDate && typeof field.toDate === 'function') {
39
- return field.toDate().getTime();
40
- }
41
- // Handle standard Date object or string
42
- const date = new Date(field);
43
- return isNaN(date.getTime()) ? 0 : date.getTime();
44
- }
45
-
46
- // =============================================================================
47
- // HELPER: Schedule Logic
48
- // =============================================================================
49
- /**
50
- * Checks if a computation is scheduled to run on a specific date.
51
- * Defaults to DAILY if no schedule is present.
52
- * * @param {string} dateStr - YYYY-MM-DD string
53
- * @param {Object} scheduleConfig - { type: 'DAILY'|'WEEKLY'|'MONTHLY', days: [] }
54
- * @returns {boolean} True if the computation should run
55
- */
56
- function isComputationScheduled(dateStr, scheduleConfig) {
57
- // Default: Run every day if no schedule is provided or explicitly DAILY
58
- if (!scheduleConfig || !scheduleConfig.type || scheduleConfig.type === 'DAILY') {
59
- return true;
60
- }
61
-
62
- const date = new Date(dateStr + 'T00:00:00Z'); // Ensure UTC parsing
63
-
64
- // Weekly Schedule: Check Day of Week (0=Sun, 1=Mon, ..., 6=Sat)
65
- if (scheduleConfig.type === 'WEEKLY') {
66
- const dayOfWeek = date.getUTCDay();
67
- const validDays = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
68
- return validDays.includes(dayOfWeek);
69
- }
70
-
71
- // Monthly Schedule: Check Day of Month (1-31)
72
- if (scheduleConfig.type === 'MONTHLY') {
73
- const dayOfMonth = date.getUTCDate();
74
- const validDates = Array.isArray(scheduleConfig.days) ? scheduleConfig.days : [scheduleConfig.day];
75
- return validDates.includes(dayOfMonth);
76
- }
77
-
78
- // Fallback default
79
- return true;
80
- }
81
-
82
- // =============================================================================
83
- // HELPER: Ledger Awareness
84
- // =============================================================================
85
- async function filterActiveTasks(db, date, pass, tasks, logger, forceRun = false) {
86
- if (!tasks || tasks.length === 0) return [];
87
- if (forceRun) return tasks;
88
-
89
- const checkPromises = tasks.map(async (t) => {
90
- const taskName = normalizeName(t.name);
91
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${taskName}`;
92
- const snap = await db.doc(ledgerPath).get();
93
-
94
- if (snap.exists) {
95
- const data = snap.data();
96
- const isActive = ['PENDING', 'IN_PROGRESS'].includes(data.status);
97
-
98
- if (isActive) {
99
- const lastActivityTime = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
100
-
101
- if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
102
- if (logger) logger.log('WARN', `[Dispatcher] 🧟 Breaking stale lock for ${taskName}.`);
103
- return t;
104
- }
105
- return null;
106
- }
107
- // Note: We do NOT filter COMPLETED here anymore for Sweep.
108
- // If the Orchestrator says it needs to run, we run it.
109
- }
110
- return t;
111
- });
112
- const results = await Promise.all(checkPromises);
113
- return results.filter(t => t !== null);
114
- }
115
-
116
- // =============================================================================
117
- // HELPER: SimHash Stability
118
- // =============================================================================
119
- async function attemptSimHashResolution(dependencies, date, tasks, dailyStatus, manifestMap) {
120
- const { db, logger } = dependencies;
121
- const resolvedTasks = [];
122
- const remainingTasks = [];
123
- const simHashCache = new Map();
124
-
125
- for (const task of tasks) {
126
- const currentStatus = dailyStatus ? dailyStatus[task.name] : null;
127
- const manifestItem = manifestMap.get(normalizeName(task.name));
128
-
129
- if (currentStatus && currentStatus.simHash && manifestItem) {
130
- let newSimHash = simHashCache.get(manifestItem.hash);
131
- if (!newSimHash) {
132
- const simDoc = await db.collection('system_simhash_registry').doc(manifestItem.hash).get();
133
- if (simDoc.exists) {
134
- newSimHash = simDoc.data().simHash;
135
- simHashCache.set(manifestItem.hash, newSimHash);
136
- }
137
- }
138
-
139
- if (newSimHash && newSimHash === currentStatus.simHash) {
140
- resolvedTasks.push({
141
- name: task.name,
142
- hash: manifestItem.hash,
143
- simHash: newSimHash,
144
- prevStatus: currentStatus
145
- });
146
- continue;
147
- }
148
- }
149
- remainingTasks.push(task);
150
- }
151
-
152
- if (resolvedTasks.length > 0) {
153
- const updatePayload = {};
154
- resolvedTasks.forEach(t => {
155
- updatePayload[t.name] = {
156
- ...(t.prevStatus || {}),
157
- hash: t.hash,
158
- simHash: t.simHash,
159
- reason: 'SimHash Stable (Auto-Resolved)',
160
- lastUpdated: new Date().toISOString()
161
- };
162
- });
163
- await db.collection('computation_status').doc(date).set(updatePayload, { merge: true });
164
- logger.log('INFO', `[SimHash] ⏩ Fast-forwarded ${resolvedTasks.length} tasks for ${date}.`);
165
- }
166
-
167
- return remainingTasks;
168
- }
169
-
170
- async function getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild) {
171
- const { db, logger } = dependencies;
172
- const sessionId = `pass_${passToRun}_${dateLimitStr.replace(/-/g, '')}`;
173
- const sessionRef = db.collection('dispatcher_sessions').doc(sessionId);
174
-
175
- if (!forceRebuild) {
176
- const sessionSnap = await sessionRef.get();
177
- if (sessionSnap.exists) {
178
- const data = sessionSnap.data();
179
- if ((Date.now() - new Date(data.createdAt).getTime()) < SESSION_CACHE_DURATION_MS) {
180
- return data.dates;
181
- }
182
- }
183
- }
184
-
185
- logger.log('INFO', `[Session] 🔄 Rebuilding dispatch session for Pass ${passToRun}...`);
186
- const earliestDates = await getEarliestDataDates(config, dependencies);
187
- const allDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date(dateLimitStr + 'T00:00:00Z'));
188
-
189
- await sessionRef.set({ dates: allDates, createdAt: new Date().toISOString(), configHash: dateLimitStr });
190
- return allDates;
191
- }
192
-
193
- // MAIN ENTRY POINT
194
- // =============================================================================
195
- async function dispatchComputationPass(config, dependencies, computationManifest, reqBody = {}) {
196
- const action = reqBody.action || 'DISPATCH';
197
-
198
- if (action === 'VERIFY') {
199
- return handlePassVerification(config, dependencies, computationManifest, reqBody);
200
- }
201
- else if (action === 'SWEEP') {
202
- return handleSweepDispatch(config, dependencies, computationManifest, reqBody);
203
- }
204
- else if (action === 'REPORT') {
205
- return handleFinalSweepReporting(config, dependencies, computationManifest, reqBody);
206
- }
207
- // [NEW] FORCE RUN HANDLER
208
- else if (action === 'FORCE_RUN') {
209
- return handleForceRun(config, dependencies, computationManifest, reqBody);
210
- }
211
-
212
- return handleStandardDispatch(config, dependencies, computationManifest, reqBody);
213
- }
214
-
215
- // =============================================================================
216
- // NEW: Force Run Handler (Bypasses Checks)
217
- // =============================================================================
218
- async function handleForceRun(config, dependencies, computationManifest, reqBody) {
219
- const { logger } = dependencies;
220
- const pubsubUtils = new PubSubUtils(dependencies);
221
- const computationName = reqBody.computation; // Required
222
- const dateInput = reqBody.date; // Optional (YYYY-MM-DD)
223
-
224
- if (!computationName) {
225
- throw new Error('Force Run requires "computation" name.');
226
- }
227
-
228
- // 1. Verify Computation Exists
229
- const manifestItem = computationManifest.find(c => normalizeName(c.name) === normalizeName(computationName));
230
- if (!manifestItem) {
231
- throw new Error(`Computation '${computationName}' not found in manifest.`);
232
- }
233
-
234
- // 2. Determine Target Dates
235
- let candidateDates = [];
236
- if (dateInput) {
237
- // Single Date Mode
238
- candidateDates = [dateInput];
239
- } else {
240
- // All Dates Mode (Backfill)
241
- logger.log('INFO', `[ForceRun] No date provided. Calculating date range for ${computationName}...`);
242
- const earliestDates = await getEarliestDataDates(config, dependencies);
243
- // Calculate from system start until today
244
- candidateDates = getExpectedDateStrings(earliestDates.absoluteEarliest, new Date());
245
- }
246
-
247
- logger.log('INFO', `[ForceRun] Checking ${candidateDates.length} candidate dates for runnability...`);
248
-
249
- // 3. Filter to only runnable dates using analyzeDateExecution
250
- const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
251
- const passes = groupByPass(computationManifest);
252
- const calcsInPass = passes[manifestItem.pass || "1"] || [];
253
- const targetComputationNormalized = normalizeName(computationName);
254
-
255
- // Filter to only the target computation
256
- const targetCalcs = calcsInPass.filter(c => normalizeName(c.name) === targetComputationNormalized);
257
-
258
- if (targetCalcs.length === 0) {
259
- throw new Error(`Computation '${computationName}' not found in pass ${manifestItem.pass || "1"}`);
260
- }
261
-
262
- const runnableDates = [];
263
- const skippedDates = [];
264
-
265
- for (const dateStr of candidateDates) {
266
- // Check root data availability
267
- const rootDataStatus = await checkRootDataAvailability(dateStr, config, dependencies, DEFINITIVE_EARLIEST_DATES);
268
-
269
- // Get computation status for this date
270
- const dailyStatus = await fetchComputationStatus(dateStr, config, dependencies);
271
-
272
- // Check previous day status if needed
273
- let prevDailyStatus = null;
274
- if (targetCalcs.some(c => c.isHistorical)) {
275
- const prevDate = new Date(dateStr + 'T00:00:00Z');
276
- prevDate.setUTCDate(prevDate.getUTCDate() - 1);
277
- prevDailyStatus = await fetchComputationStatus(prevDate.toISOString().slice(0, 10), config, dependencies);
278
- }
279
-
280
- // Analyze if this computation can run on this date
281
- const report = analyzeDateExecution(dateStr, targetCalcs, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
282
-
283
- // Check if the target computation is runnable, needs re-run, or has failed dependencies
284
- const isRunnable = report.runnable.some(t => normalizeName(t.name) === targetComputationNormalized);
285
- const needsReRun = report.reRuns.some(t => normalizeName(t.name) === targetComputationNormalized);
286
- const hasFailedDep = report.failedDependency.some(t => normalizeName(t.name) === targetComputationNormalized);
287
- const isImpossible = report.impossible.some(t => normalizeName(t.name) === targetComputationNormalized);
288
- const isBlocked = report.blocked.some(t => normalizeName(t.name) === targetComputationNormalized);
289
-
290
- if (isRunnable || needsReRun || hasFailedDep) {
291
- runnableDates.push(dateStr);
292
- } else if (isImpossible) {
293
- skippedDates.push({ date: dateStr, reason: report.impossible.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Impossible' });
294
- } else if (isBlocked) {
295
- skippedDates.push({ date: dateStr, reason: report.blocked.find(t => normalizeName(t.name) === targetComputationNormalized)?.reason || 'Blocked' });
296
- } else {
297
- skippedDates.push({ date: dateStr, reason: 'Not runnable (unknown reason)' });
298
- }
299
- }
300
-
301
- logger.log('INFO', `[ForceRun] ✅ Found ${runnableDates.length} runnable dates out of ${candidateDates.length} candidates`);
302
- if (skippedDates.length > 0) {
303
- logger.log('INFO', `[ForceRun] ⏭️ Skipped ${skippedDates.length} dates: ${skippedDates.slice(0, 5).map(s => `${s.date} (${s.reason})`).join(', ')}${skippedDates.length > 5 ? '...' : ''}`);
304
- }
305
-
306
- if (runnableDates.length === 0) {
307
- return {
308
- status: 'NO_RUNNABLE_DATES',
309
- computation: computationName,
310
- mode: dateInput ? 'SINGLE_DATE' : 'ALL_DATES',
311
- datesChecked: candidateDates.length,
312
- datesRunnable: 0,
313
- skippedReasons: skippedDates.slice(0, 10)
314
- };
315
- }
316
-
317
- logger.log('WARN', `[ForceRun] 🚨 MANUALLY Triggering ${computationName} for ${runnableDates.length} runnable dates. Pass: ${manifestItem.pass}`);
318
-
319
- // 4. Construct Tasks (only for runnable dates)
320
- const dispatchId = crypto.randomUUID();
321
- const tasks = runnableDates.map(date => {
322
- const traceId = crypto.randomBytes(16).toString('hex');
323
- const spanId = crypto.randomBytes(8).toString('hex');
324
- return {
325
- action: 'RUN_COMPUTATION_DATE',
326
- computation: manifestItem.name,
327
- date: date,
328
- pass: manifestItem.pass || "1",
329
- dispatchId: dispatchId,
330
- triggerReason: 'MANUAL_FORCE_API',
331
- resources: reqBody.resources || 'standard',
332
- // Trace context allows you to find these specific runs in Cloud Trace
333
- traceContext: { traceId, spanId, sampled: true }
334
- };
335
- });
336
-
337
- // 4. Batch Publish (Chunked to stay under Pub/Sub limits)
338
- const CHUNK_SIZE = 250; // Safe batch size
339
- const topic = (reqBody.resources === 'high-mem')
340
- ? (config.computationTopicHighMem || 'computation-tasks-highmem')
341
- : (config.computationTopicStandard || 'computation-tasks');
342
-
343
- let dispatchedCount = 0;
344
- const chunks = [];
345
- for (let i = 0; i < tasks.length; i += CHUNK_SIZE) {
346
- chunks.push(tasks.slice(i, i + CHUNK_SIZE));
347
- }
348
-
349
- // Publish chunks sequentially to avoid memory spikes
350
- for (const chunk of chunks) {
351
- await pubsubUtils.batchPublishTasks(dependencies, {
352
- topicName: topic,
353
- tasks: chunk,
354
- taskType: 'manual-force-run'
355
- });
356
- dispatchedCount += chunk.length;
357
- }
358
-
359
- return {
360
- status: 'FORCED',
361
- computation: computationName,
362
- mode: dateInput ? 'SINGLE_DATE' : 'ALL_DATES',
363
- datesChecked: candidateDates.length,
364
- datesRunnable: runnableDates.length,
365
- datesTriggered: dispatchedCount,
366
- skippedCount: skippedDates.length,
367
- targetTopic: topic
368
- };
369
- }
370
-
371
- // =============================================================================
372
- // NEW: Final Sweep Reporting Handler
373
- // =============================================================================
374
- async function handleFinalSweepReporting(config, dependencies, computationManifest, reqBody) {
375
- const { logger } = dependencies;
376
- const passToRun = String(reqBody.pass || "1");
377
- // Target date is required for detailed forensics
378
- const date = reqBody.date || new Date().toISOString().slice(0, 10);
379
-
380
- logger.log('INFO', `[Dispatcher] 📝 Triggering Final Sweep Forensics for Pass ${passToRun} on ${date}...`);
381
-
382
- try {
383
- const result = await runFinalSweepCheck(config, dependencies, date, passToRun, computationManifest);
384
- return {
385
- status: 'COMPLETED',
386
- date: date,
387
- pass: passToRun,
388
- issuesFound: result.issuesCount
389
- };
390
- } catch (e) {
391
- logger.log('ERROR', `[Dispatcher] Forensics failed: ${e.message}`);
392
- return { status: 'ERROR', error: e.message };
393
- }
394
- }
395
-
396
- // =============================================================================
397
- // LOGIC: Verify Pass Completion
398
- // =============================================================================
399
- async function handlePassVerification(config, dependencies, computationManifest, reqBody) {
400
- const { logger } = dependencies;
401
- const passToRun = String(reqBody.pass || "1");
402
- const dateLimitStr = reqBody.date || "2025-01-01";
403
-
404
- logger.log('INFO', `[Verify] 🧹 Sweeping Pass ${passToRun} for unfinished work...`);
405
-
406
- const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, false);
407
- const passes = groupByPass(computationManifest);
408
- const calcsInPass = passes[passToRun] || [];
409
- const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
410
- const weightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
411
-
412
- const missingTasks = [];
413
-
414
- for (const date of sessionDates) {
415
- // [SCHEDULE CHECK] Filter tasks that are not scheduled for this date
416
- const scheduledComputations = calcsInPass.filter(c =>
417
- isComputationScheduled(date, c.schedule)
418
- );
419
-
420
- if (scheduledComputations.length === 0) continue;
421
-
422
- const [dailyStatus, availability] = await Promise.all([
423
- fetchComputationStatus(date, config, dependencies),
424
- checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
425
- ]);
426
-
427
- let prevDailyStatus = null;
428
- if (scheduledComputations.some(c => c.isHistorical)) {
429
- const prevD = new Date(date + 'T00:00:00Z');
430
- prevD.setUTCDate(prevD.getUTCDate() - 1);
431
- prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
432
- }
433
-
434
- const report = analyzeDateExecution(
435
- date,
436
- scheduledComputations, // Use filtered list
437
- availability ? availability.status : {},
438
- dailyStatus,
439
- manifestMap,
440
- prevDailyStatus
441
- );
442
-
443
- const pending = [...report.runnable, ...report.reRuns];
444
-
445
- if (pending.length > 0) {
446
- const totalWeight = pending.reduce((sum, t) => sum + (weightMap.get(normalizeName(t.name)) || 1.0), 0);
447
- const eta = Math.max(30, Math.ceil(totalWeight * BASE_SECONDS_PER_WEIGHT_UNIT));
448
-
449
- missingTasks.push({
450
- date: date,
451
- taskCount: pending.length,
452
- eta: eta
453
- });
454
- }
455
- }
456
-
457
- logger.log('INFO', `[Verify] Found ${missingTasks.length} dates with pending work.`);
458
- return { missingTasks };
459
- }
460
-
461
- // =============================================================================
462
- // LOGIC: Sweep Dispatch (Forced High-Mem)
463
- // =============================================================================
464
- async function handleSweepDispatch(config, dependencies, computationManifest, reqBody) {
465
- const { logger, db } = dependencies;
466
- const pubsubUtils = new PubSubUtils(dependencies);
467
- const passToRun = String(reqBody.pass || "1");
468
- const date = reqBody.date;
469
-
470
- if (!date) throw new Error('Sweep dispatch requires date');
471
-
472
- const passes = groupByPass(computationManifest);
473
- const calcsInPass = passes[passToRun] || [];
474
-
475
- // [SCHEDULE CHECK] Filter tasks that are not scheduled for this date
476
- const scheduledComputations = calcsInPass.filter(c =>
477
- isComputationScheduled(date, c.schedule)
478
- );
479
-
480
- if (scheduledComputations.length === 0) {
481
- logger.log('INFO', `[Sweep] ${date} has no scheduled tasks for Pass ${passToRun}. Ignoring.`);
482
- return { dispatched: 0 };
483
- }
484
-
485
- // 1. Analyze specific date
486
- const [dailyStatus, availability] = await Promise.all([
487
- fetchComputationStatus(date, config, dependencies),
488
- checkRootDataAvailability(date, config, dependencies, DEFINITIVE_EARLIEST_DATES)
489
- ]);
490
-
491
- let prevDailyStatus = null;
492
- if (scheduledComputations.some(c => c.isHistorical)) {
493
- const prevD = new Date(date + 'T00:00:00Z');
494
- prevD.setUTCDate(prevD.getUTCDate() - 1);
495
- prevDailyStatus = await fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
496
- }
497
-
498
- const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
499
- const report = analyzeDateExecution(
500
- date,
501
- scheduledComputations, // Use filtered list
502
- availability ? availability.status : {},
503
- dailyStatus,
504
- manifestMap,
505
- prevDailyStatus
506
- );
507
- const pending = [...report.runnable, ...report.reRuns];
508
-
509
- if (pending.length === 0) {
510
- logger.log('INFO', `[Sweep] ${date} is clean. No dispatch.`);
511
- return { dispatched: 0 };
512
- }
513
-
514
- const validTasks = [];
515
- for (const task of pending) {
516
- const name = normalizeName(task.name);
517
- const ledgerPath = `computation_audit_ledger/${date}/passes/${passToRun}/tasks/${name}`;
518
- const doc = await db.doc(ledgerPath).get();
519
-
520
- if (doc.exists) {
521
- const data = doc.data();
522
-
523
- // 1. ACTIVE CHECK: Don't double-dispatch if already running... UNLESS IT'S A ZOMBIE
524
- if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
525
- const lastActivity = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
526
-
527
- // If it's been silent for > 15 mins, it's a Zombie. Kill it and Re-run.
528
- if ((Date.now() - lastActivity) > STALE_LOCK_THRESHOLD_MS) {
529
- logger.log('WARN', `[Sweep] 🧟 Found ZOMBIE lock for ${name}. Breaking lock and re-running.`);
530
- // Don't continue; let it fall through to dispatch
531
- } else {
532
- logger.log('INFO', `[Sweep] ⏳ Skipping ${name} - Valid IN_PROGRESS.`);
533
- continue;
534
- }
535
- }
536
-
537
- // 2. COMPLETION CHECK (GHOST STATE FIX)
538
- // We REMOVED the check that skips if (status === 'COMPLETED' && hash === task.hash).
539
- // If we are here, 'analyzeDateExecution' (The Brain) decided this task is NOT done
540
- // (likely due to a missing or outdated entry in computation_status).
541
- // Even if the Ledger (The Log) says it finished, the system state is inconsistent.
542
- // We MUST re-run to repair the Status Index.
543
-
544
- const stage = data.error?.stage;
545
-
546
- // 3. DETERMINISTIC FAILURE CHECK
547
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
548
- // If hash matches, it's the exact same code that failed before. Don't retry in loop.
549
- if (data.hash === task.hash) {
550
- logger.log('WARN', `[Sweep] 🛑 Skipping deterministic failure for ${name} (${stage}).`);
551
- continue;
552
- }
553
- logger.log('INFO', `[Sweep] 🔄 Code Updated for ${name}. Retrying sweep despite previous ${stage}.`);
554
- }
555
-
556
- // 4. DEAD END CHECK (High Mem)
557
- if (data.resourceTier === 'high-mem' && data.status === 'FAILED') {
558
- // If code hasn't changed, don't hammer it.
559
- if (data.hash === task.hash) {
560
- logger.log('WARN', `[Sweep] 🛑 Skipping ${name} - Already failed on High-Mem.`);
561
- continue;
562
- }
563
- }
564
- }
565
- validTasks.push(task);
566
- }
567
-
568
- if (validTasks.length === 0) {
569
- logger.log('INFO', `[Sweep] ${date} has no retryable tasks. Ignoring.`);
570
- return { dispatched: 0 };
571
- }
572
-
573
- // 2. FORCE High Mem & INJECT TRACE
574
- const currentDispatchId = crypto.randomUUID();
575
-
576
- const tasksPayload = validTasks.map(t => {
577
- const traceId = crypto.randomBytes(16).toString('hex');
578
- const spanId = crypto.randomBytes(8).toString('hex');
579
-
580
- return {
581
- ...t,
582
- action: 'RUN_COMPUTATION_DATE',
583
- computation: t.name,
584
- date: date,
585
- pass: passToRun,
586
- dispatchId: currentDispatchId,
587
- triggerReason: 'SWEEP_RECOVERY',
588
- resources: 'high-mem', // FORCE
589
- traceContext: { traceId, spanId, sampled: true }
590
- };
591
- });
592
-
593
- const taskNames = tasksPayload.map(t => t.computation || t.name).join(', ');
594
- logger.log('WARN', `[Sweep] 🧹 Forcing ${tasksPayload.length} tasks to HIGH-MEM for ${date}.`, {
595
- date: date,
596
- pass: passToRun,
597
- tasks: tasksPayload.map(t => ({ name: t.computation || t.name, reason: 'sweep' })),
598
- topic: config.computationTopicHighMem || 'computation-tasks-highmem'
599
- });
600
-
601
- await pubsubUtils.batchPublishTasks(dependencies, {
602
- topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
603
- tasks: tasksPayload,
604
- taskType: `pass-${passToRun}-sweep`
605
- });
606
-
607
- return { dispatched: tasksPayload.length };
608
- }
609
-
610
- // =============================================================================
611
- // LOGIC: Standard Dispatch (Fast-Forward Enabled)
612
- // =============================================================================
613
- async function handleStandardDispatch(config, dependencies, computationManifest, reqBody) {
614
- const { logger, db } = dependencies;
615
- const pubsubUtils = new PubSubUtils(dependencies);
616
-
617
- const passToRun = String(reqBody.pass || "1");
618
- const targetCursorN = parseInt(reqBody.cursorIndex || 1);
619
- const dateLimitStr = reqBody.date || "2025-01-01";
620
- const forceRebuild = reqBody.forceRebuild === true;
621
-
622
- const manifestMap = new Map(computationManifest.map(c => [normalizeName(c.name), c]));
623
- const passes = groupByPass(computationManifest);
624
- const calcsInThisPass = passes[passToRun] || [];
625
- const manifestWeightMap = new Map(computationManifest.map(c => [normalizeName(c.name), c.weight || 1.0]));
626
-
627
- if (!calcsInThisPass.length) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
628
-
629
- const sessionDates = await getStableDateSession(config, dependencies, passToRun, dateLimitStr, forceRebuild);
630
- if (!sessionDates || sessionDates.length === 0) return { status: 'MOVE_TO_NEXT_PASS', dispatched: 0 };
631
-
632
- // --- Fast-Forward Loop Configuration ---
633
- // Scans up to 50 dates or 40 seconds to find work, avoiding empty "wait loops"
634
- const MAX_SCAN_DEPTH = 50; // This is actually 50 + Today, so 51.
635
- const TIME_LIMIT_MS = 40000;
636
- const startTime = Date.now();
637
-
638
- let currentCursor = targetCursorN;
639
- let selectedTasks = [];
640
- let selectedDate = null;
641
- let datesScanned = 0;
642
-
643
- // Loop until work is found, end is reached, or safety limits hit
644
- while (currentCursor <= sessionDates.length) {
645
- datesScanned++;
646
- selectedDate = sessionDates[currentCursor - 1]; // 0-indexed array
647
-
648
- // 1. Safety Break (Prevent Timeout)
649
- if ((Date.now() - startTime) > TIME_LIMIT_MS || datesScanned > MAX_SCAN_DEPTH) {
650
- logger.log('INFO', `[Dispatcher] ⏩ Fast-forward paused at ${selectedDate} after scanning ${datesScanned} dates.`);
651
- break;
652
- }
653
-
654
- // 2. [SCHEDULE CHECK] Filter computations scheduled for this specific date
655
- const scheduledComputations = calcsInThisPass.filter(c =>
656
- isComputationScheduled(selectedDate, c.schedule)
657
- );
658
-
659
- // Optimization: If nothing is scheduled for today, skip expensive DB checks
660
- if (scheduledComputations.length === 0) {
661
- // DEBUG: Log when schedule filtering removes all tasks
662
- if (calcsInThisPass.length > 0) {
663
- logger.log('TRACE', `[Dispatcher] Date ${selectedDate}: ${calcsInThisPass.length} pass computations, but 0 scheduled for this date. Skipping.`);
664
- }
665
- currentCursor++;
666
- continue;
667
- }
668
-
669
- // 3. Analyze Date
670
- const earliestDates = await getEarliestDataDates(config, dependencies);
671
- let prevDailyStatusPromise = Promise.resolve(null);
672
- if (scheduledComputations.some(c => c.isHistorical)) {
673
- const prevD = new Date(selectedDate + 'T00:00:00Z');
674
- prevD.setUTCDate(prevD.getUTCDate() - 1);
675
- if (prevD >= earliestDates.absoluteEarliest) {
676
- prevDailyStatusPromise = fetchComputationStatus(prevD.toISOString().slice(0, 10), config, dependencies);
677
- }
678
- }
679
-
680
- const [dailyStatus, prevDailyStatus, availability] = await Promise.all([
681
- fetchComputationStatus(selectedDate, config, dependencies),
682
- prevDailyStatusPromise,
683
- checkRootDataAvailability(selectedDate, config, dependencies, DEFINITIVE_EARLIEST_DATES)
684
- ]);
685
-
686
- // DEBUG: Log availability check
687
- if (!availability || !availability.status) {
688
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: Availability check failed or returned null. Skipping analysis.`);
689
- }
690
-
691
- if (availability && availability.status) {
692
- const report = analyzeDateExecution(
693
- selectedDate,
694
- scheduledComputations, // Use filtered list
695
- availability.status,
696
- dailyStatus,
697
- manifestMap,
698
- prevDailyStatus
699
- );
700
- let rawTasks = [...report.runnable, ...report.reRuns];
701
-
702
- // DEBUG: Log analysis results
703
- if (rawTasks.length === 0 && (report.runnable.length > 0 || report.reRuns.length > 0)) {
704
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: analyzeDateExecution found ${report.runnable.length} runnable + ${report.reRuns.length} reRuns, but rawTasks is empty!`);
705
- }
706
- if (rawTasks.length > 0) {
707
- logger.log('TRACE', `[Dispatcher] Date ${selectedDate}: analyzeDateExecution found ${report.runnable.length} runnable, ${report.reRuns.length} reRuns. Total: ${rawTasks.length}`);
708
- }
709
-
710
- if (rawTasks.length > 0) {
711
- rawTasks = await attemptSimHashResolution(dependencies, selectedDate, rawTasks, dailyStatus, manifestMap);
712
- const activeTasks = await filterActiveTasks(db, selectedDate, passToRun, rawTasks, logger);
713
-
714
- if (activeTasks.length > 0) {
715
- // DEBUG: Log what we're about to route
716
- logger.log('INFO', `[Dispatcher] 🔍 Date ${selectedDate}: ${rawTasks.length} raw tasks → ${activeTasks.length} after filtering. Routing...`);
717
- const { standard, highMem } = await splitRoutes(db, selectedDate, passToRun, activeTasks, logger);
718
- selectedTasks = [...standard, ...highMem];
719
-
720
- // DEBUG: Log routing results
721
- if (selectedTasks.length === 0 && activeTasks.length > 0) {
722
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: ${activeTasks.length} tasks filtered out by splitRoutes! Tasks: ${activeTasks.map(t => t.name).join(', ')}`);
723
- }
724
-
725
- if (selectedTasks.length > 0) {
726
- // Found work! Break loop to dispatch.
727
- break;
728
- }
729
- } else if (rawTasks.length > 0) {
730
- // DEBUG: Log if filterActiveTasks removed all tasks
731
- logger.log('WARN', `[Dispatcher] ⚠️ Date ${selectedDate}: ${rawTasks.length} raw tasks all filtered out by filterActiveTasks! Tasks: ${rawTasks.map(t => t.name).join(', ')}`);
732
- }
733
- }
734
- }
735
-
736
- // No work found for this date. Fast-forward to next.
737
- currentCursor++;
738
- }
739
-
740
- // --- Result Handling ---
741
-
742
- // Case 1: Satiated (Scanned to end of session with no work)
743
- if (currentCursor > sessionDates.length && selectedTasks.length === 0) {
744
- return {
745
- status: 'CONTINUE_PASS',
746
- dateProcessed: selectedDate,
747
- dispatched: 0,
748
- n_cursor_ignored: false,
749
- remainingDates: 0,
750
- nextCursor: currentCursor // Matches length + 1
751
- };
752
- }
753
-
754
- // Case 2: Paused by Limit (No work found yet, but more dates remain)
755
- if (selectedTasks.length === 0) {
756
- return {
757
- status: 'CONTINUE_PASS',
758
- dateProcessed: selectedDate,
759
- dispatched: 0,
760
- n_cursor_ignored: false,
761
- remainingDates: sessionDates.length - currentCursor + 1,
762
- nextCursor: currentCursor // Resume from here
763
- };
764
- }
765
-
766
- // Case 3: Work Found (Dispatching)
767
- const totalweight = selectedTasks.reduce((sum, t) => sum + (manifestWeightMap.get(normalizeName(t.name)) || 1.0), 0);
768
- const currentDispatchId = crypto.randomUUID();
769
- const etaSeconds = Math.max(20, Math.ceil(totalweight * BASE_SECONDS_PER_WEIGHT_UNIT));
770
-
771
- if (datesScanned > 1) {
772
- logger.log('INFO', `[Dispatcher] ⏩ Fast-forwarded ${datesScanned - 1} empty dates. Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`);
773
- } else {
774
- logger.log('INFO', `[Dispatcher] ✅ Dispatching ${selectedTasks.length} tasks for ${selectedDate}.`);
775
- }
776
-
777
- const mapToTaskPayload = (t) => {
778
- const traceId = crypto.randomBytes(16).toString('hex');
779
- const spanId = crypto.randomBytes(8).toString('hex');
780
- return {
781
- ...t,
782
- action: 'RUN_COMPUTATION_DATE',
783
- computation: t.name,
784
- date: selectedDate,
785
- pass: passToRun,
786
- dispatchId: currentDispatchId,
787
- triggerReason: t.reason,
788
- resources: t.resources || 'standard',
789
- traceContext: {
790
- traceId: traceId,
791
- spanId: spanId,
792
- sampled: true
793
- }
794
- };
795
- };
796
-
797
- const standardTasks = selectedTasks.filter(t => t.resources !== 'high-mem').map(mapToTaskPayload);
798
- const highMemTasks = selectedTasks.filter(t => t.resources === 'high-mem').map(mapToTaskPayload);
799
-
800
- const pubPromises = [];
801
- if (standardTasks.length > 0) {
802
- const taskNames = standardTasks.map(t => t.computation || t.name).join(', ');
803
- logger.log('INFO', `[Dispatcher] 📤 Dispatching ${standardTasks.length} standard tasks: ${taskNames}`, {
804
- date: selectedDate,
805
- pass: passToRun,
806
- tasks: standardTasks.map(t => ({ name: t.computation || t.name, reason: t.triggerReason || 'new' })),
807
- topic: config.computationTopicStandard || 'computation-tasks'
808
- });
809
- pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
810
- topicName: config.computationTopicStandard || 'computation-tasks',
811
- tasks: standardTasks,
812
- taskType: `pass-${passToRun}-std`
813
- }));
814
- }
815
- if (highMemTasks.length > 0) {
816
- const taskNames = highMemTasks.map(t => t.computation || t.name).join(', ');
817
- logger.log('INFO', `[Dispatcher] 📤 Dispatching ${highMemTasks.length} high-memory tasks: ${taskNames}`, {
818
- date: selectedDate,
819
- pass: passToRun,
820
- tasks: highMemTasks.map(t => ({ name: t.computation || t.name, reason: t.triggerReason || 'retry' })),
821
- topic: config.computationTopicHighMem || 'computation-tasks-highmem'
822
- });
823
- pubPromises.push(pubsubUtils.batchPublishTasks(dependencies, {
824
- topicName: config.computationTopicHighMem || 'computation-tasks-highmem',
825
- tasks: highMemTasks,
826
- taskType: `pass-${passToRun}-high`
827
- }));
828
- }
829
- await Promise.all(pubPromises);
830
-
831
- return {
832
- status: 'CONTINUE_PASS',
833
- dateProcessed: selectedDate,
834
- dispatched: selectedTasks.length,
835
- n_cursor_ignored: false,
836
- etaSeconds: etaSeconds,
837
- remainingDates: sessionDates.length - targetCursorN,
838
- nextCursor: currentCursor + 1 // Start next scan AFTER this date
839
- };
840
- }
841
-
842
- // =============================================================================
843
- // HELPER: Route Splitting (One-Shot Enforcement)
844
- // =============================================================================
845
- // [UPDATED] Route Splitting with Version-Aware Dead Lettering
846
- async function splitRoutes(db, date, pass, tasks, logger) {
847
- const standard = [];
848
- const highMem = [];
849
-
850
- for (const task of tasks) {
851
- const name = normalizeName(task.name);
852
- const ledgerPath = `computation_audit_ledger/${date}/passes/${pass}/tasks/${name}`;
853
- const doc = await db.doc(ledgerPath).get();
854
-
855
- if (!doc.exists) {
856
- // No ledger entry - trust analyzeDateExecution, dispatch as standard
857
- standard.push(task);
858
- continue;
859
- }
860
-
861
- const data = doc.data();
862
-
863
- // CRITICAL FIX: If analyzeDateExecution says this task should run, we MUST trust it.
864
- // The ledger might say COMPLETED, but if computation_status is missing/outdated,
865
- // we need to re-run to repair the state. Only skip if actively running.
866
- // Note: filterActiveTasks already filtered out non-stale PENDING/IN_PROGRESS,
867
- // but we double-check here in case of race conditions.
868
- if (['PENDING', 'IN_PROGRESS'].includes(data.status)) {
869
- // Check if it's stale (should have been caught by filterActiveTasks, but double-check)
870
- const lastActivityTime = getMillis(data.telemetry?.lastHeartbeat) || getMillis(data.startedAt);
871
-
872
- if ((Date.now() - lastActivityTime) > STALE_LOCK_THRESHOLD_MS) {
873
- // Stale lock - break it and continue
874
- logger.log('WARN', `[Dispatcher] 🧟 splitRoutes: Breaking stale lock for ${name}.`);
875
- // Fall through to handle as if no active lock
876
- } else {
877
- // Valid active lock - skip (shouldn't happen if filterActiveTasks worked correctly)
878
- logger.log('TRACE', `[Dispatcher] splitRoutes: Skipping ${name} - Valid IN_PROGRESS (should have been filtered earlier).`);
879
- continue;
880
- }
881
- }
882
-
883
- if (data.status === 'FAILED') {
884
- const stage = data.error?.stage;
885
-
886
- // 1. DETERMINISTIC FAILURES (Never Retry UNLESS Code Updated)
887
- if (['QUALITY_CIRCUIT_BREAKER', 'SEMANTIC_GATE', 'SHARDING_LIMIT_EXCEEDED'].includes(stage)) {
888
- // [FIX] Check if the hash matches. If code changed, we MUST retry.
889
- if (data.hash === task.hash) {
890
- logger.log('WARN', `[Dispatcher] 🛑 Dropping ${name} - Deterministic Failure (${stage}).`);
891
- continue;
892
- }
893
-
894
- // If hashes differ, we reset to Standard execution to give the new code a chance
895
- logger.log('INFO', `[Dispatcher] 🔄 Code Updated for ${name}. Retrying despite previous ${stage}.`);
896
- standard.push({ ...task, reason: 'Retry: Code Version Changed' });
897
- continue;
898
- }
899
-
900
- // 2. HIGH MEMORY FAILURE HANDLING (The New Logic)
901
- if (data.resourceTier === 'high-mem') {
902
- const failedHash = data.hash || data.composition?.code; // Support legacy or new structure
903
- const currentHash = task.hash;
904
-
905
- // A. EXACT CODE MATCH: It failed High-Mem with THIS code.
906
- if (failedHash === currentHash) {
907
- logger.log('WARN', `[Dispatcher] 💀 Dead End: ${name} failed High-Mem on this version (${currentHash.slice(0,6)}). Waiting for code fix.`);
908
- continue; // STOP. Do not retry.
909
- }
910
-
911
- // B. CODE MISMATCH: The code has changed since the High-Mem failure.
912
- // We reset it to 'standard' to see if the fix optimized memory usage.
913
- else {
914
- logger.log('INFO', `[Dispatcher] 🔄 Code Updated for ${name}. Resetting High-Mem failure to Standard retry.`);
915
- standard.push({
916
- ...task,
917
- reason: 'Retry: Code Version Changed'
918
- });
919
- continue;
920
- }
921
- }
922
-
923
- // 3. STANDARD FAILURE -> PROMOTE TO HIGH MEM
924
- highMem.push({
925
- ...task,
926
- resources: 'high-mem',
927
- reason: `Retry: ${data.error?.message || 'Standard Failure'}`
928
- });
929
-
930
- } else {
931
- // Status is likely COMPLETED or some other state.
932
- // CRITICAL: If analyzeDateExecution says this should run, we MUST trust it.
933
- // The ledger might show COMPLETED, but if computation_status is missing/outdated,
934
- // we need to re-run to repair the state. This is the "ghost state fix" logic.
935
- // Trust the Brain (analyzeDateExecution) over the Log (ledger).
936
- logger.log('INFO', `[Dispatcher] 🔄 splitRoutes: ${name} has ledger status '${data.status}', but analyzeDateExecution says it should run. Trusting analysis and dispatching.`);
937
- standard.push(task);
938
- }
939
- }
940
-
941
- return { standard, highMem };
942
- }
943
-
944
- module.exports = { dispatchComputationPass };