bulltrackers-module 1.0.808 → 1.0.809

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,7 +31,6 @@ class StateRepository {
31
31
  const fullResultsTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${this.resultsTable}\``;
32
32
  const query = `SELECT entity_id, result_data FROM ${fullResultsTable} WHERE date = CAST(@date AS DATE) AND computation_name = @comp AND entity_id IN UNNEST(@ids)`;
33
33
 
34
- // [FIX] Removed try/catch. If BQ fails, we must fail.
35
34
  const [rows] = await this.bigquery.query({ query, params: { date: dateStr, comp: computationName, ids: entityIds }, location: this.config.bigquery.location });
36
35
  const resultMap = {};
37
36
  for (const row of rows) {
@@ -46,7 +45,6 @@ class StateRepository {
46
45
  const fullResultsTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${this.resultsTable}\``;
47
46
  const query = `SELECT result_data FROM ${fullResultsTable} WHERE date = CAST(@date AS DATE) AND computation_name = @comp AND entity_id = '_global' LIMIT 1`;
48
47
 
49
- // [FIX] Removed try/catch
50
48
  const [rows] = await this.bigquery.query({ query, params: { date: dateStr, comp: computationName }, location: this.config.bigquery.location });
51
49
  if (rows.length === 0) return null;
52
50
  let data = rows[0].result_data;
@@ -55,7 +53,11 @@ class StateRepository {
55
53
  }
56
54
 
57
55
  async getDailyStatus(dateStr) {
58
- // [FIX] Removed try/catch. Do not return empty map on error.
56
+ // [FIX] REMOVED try/catch.
57
+ // If Firestore is down, we WANT this to throw.
58
+ // Returning an empty Map causes the Planner to believe ALL history is missing,
59
+ // triggering catastrophic backfills.
60
+
59
61
  const snapshot = await this.firestore.collectionGroup('runs')
60
62
  .where('date', '==', dateStr)
61
63
  .get();
@@ -123,7 +125,7 @@ class StateRepository {
123
125
  lastUpdated: new Date(),
124
126
  startedAt: metadata.startedAt ? new Date(metadata.startedAt) : new Date(),
125
127
  totalBatches: metadata.totalBatches || 0,
126
- finalized: false // [FIX] Initialize finalized flag for race-condition prevention
128
+ finalized: false
127
129
  };
128
130
 
129
131
  await docRef.set(data, { merge: true });
@@ -155,17 +157,11 @@ class StateRepository {
155
157
 
156
158
  return { completedBatches: snapshot.data().count };
157
159
  } catch (e) {
158
- // [FIX] Log the error!
159
160
  this.logger.error(`[StateRepo] getCheckpointProgress failed for ${checkpointId}: ${e.message}`);
160
161
  return { completedBatches: 0 };
161
162
  }
162
163
  }
163
164
 
164
- /**
165
- * [FIX] Atomic Transaction to Claim Finalization Rights.
166
- * Prevents multiple workers from triggering finalization simultaneously.
167
- * @returns {Promise<boolean>} true if claim successful, false otherwise.
168
- */
169
165
  async claimFinalization(checkpointId, requiredTotalBatches) {
170
166
  const checkpointRef = this.firestore.collection(this.collections.checkpoints).doc(checkpointId);
171
167
 
@@ -176,12 +172,10 @@ class StateRepository {
176
172
 
177
173
  const data = doc.data();
178
174
 
179
- // 1. Check if already finalized or finalizing
180
175
  if (data.finalized || data.status === 'finalizing') {
181
176
  return false;
182
177
  }
183
178
 
184
- // 2. Check if actually complete (Double check inside transaction)
185
179
  const batchesSnap = await t.get(
186
180
  this.firestore.collection(this.collections.checkpoints)
187
181
  .doc(checkpointId)
@@ -192,13 +186,12 @@ class StateRepository {
192
186
  const currentCount = batchesSnap.data().count;
193
187
 
194
188
  if (currentCount < requiredTotalBatches) {
195
- return false; // Still pending
189
+ return false;
196
190
  }
197
191
 
198
- // 3. Claim it
199
192
  t.update(checkpointRef, {
200
193
  status: 'finalizing',
201
- finalized: true, // Mark as claimed
194
+ finalized: true,
202
195
  finalizationStartedAt: new Date()
203
196
  });
204
197
 
@@ -303,7 +296,6 @@ class StateRepository {
303
296
  async findZombies(thresholdMinutes) {
304
297
  const thresholdDate = new Date(Date.now() - (thresholdMinutes * 60000));
305
298
  try {
306
- // NOTE: Requires Composite Index on (status, lastUpdated)
307
299
  const snapshot = await this.firestore.collection(this.collections.checkpoints)
308
300
  .where('status', '==', 'running')
309
301
  .where('lastUpdated', '<', thresholdDate)
@@ -322,7 +314,6 @@ class StateRepository {
322
314
  });
323
315
  return zombies;
324
316
  } catch (e) {
325
- // [FIX] Do NOT fail silently. If the DB is down, we must know.
326
317
  this.logger.error(`[StateRepo] CRITICAL: findZombies failed: ${e.message}`);
327
318
  throw e;
328
319
  }
@@ -333,7 +324,6 @@ class StateRepository {
333
324
  await docRef.set({
334
325
  status: 'recovering',
335
326
  lastUpdated: new Date(),
336
- // Uses atomic increment to track retry count
337
327
  attempts: FieldValue.increment(1)
338
328
  }, { merge: true });
339
329
  }
@@ -5,6 +5,7 @@
5
5
  * FIX: Passes computationName to BQ Adapter for better logging.
6
6
  * FIX: Solved Race Condition in Finalization.
7
7
  * FIX: Solved Hardcoded Failure Thresholds.
8
+ * FIX: Persists 'blocked' status to prevent planner loops.
8
9
  */
9
10
 
10
11
  const { RunAnalyzer } = require('./RunAnalyzer');
@@ -48,6 +49,18 @@ class Coordinator {
48
49
 
49
50
  if (analysis.status === 'blocked') {
50
51
  this.logger.log(`[Coordinator] Blocked: ${analysis.reason}`);
52
+
53
+ // [FIX] Update state to 'blocked' so Planner knows it's handled.
54
+ if (!dryRun) {
55
+ await this.stateRepo.updateStatus({
56
+ computation: entry.name,
57
+ date,
58
+ status: 'blocked',
59
+ hash: entry.hash,
60
+ metadata: { blockedReason: analysis.reason }
61
+ });
62
+ }
63
+
51
64
  return { status: 'blocked', reason: analysis.reason };
52
65
  }
53
66
 
@@ -122,9 +135,9 @@ class Coordinator {
122
135
 
123
136
  if (failures.length > 0 && !dryRun && this.stateRepo.logBatchErrors) { await this.stateRepo.logBatchErrors(checkpointId || 'unknown-checkpoint', failures); }
124
137
 
125
- // [FIX] Configurable Failure Thresholds (Instead of hardcoded 10%)
126
- const maxFailureRate = this.config.execution?.maxFailureRate ?? 0.15; // Default 15%
127
- const minFailuresForAbort = this.config.execution?.minFailuresForAbort ?? 5; // Minimum failures before aborting
138
+ // [FIX] Configurable Failure Thresholds
139
+ const maxFailureRate = this.config.execution?.maxFailureRate ?? 0.15;
140
+ const minFailuresForAbort = this.config.execution?.minFailuresForAbort ?? 5;
128
141
 
129
142
  const failureRate = failures.length / entityIds.length;
130
143
  if (entityIds.length > 0 && failures.length >= minFailuresForAbort && failureRate > maxFailureRate) {
@@ -162,14 +175,12 @@ class Coordinator {
162
175
 
163
176
  async tryFinalizeComputation({ computationName, date, checkpointId, totalBatches }) {
164
177
  // [FIX] Use Atomic Claim Transaction instead of Check-Then-Act
165
- // This prevents multiple workers from winning the race to finalize
166
178
  const claimed = await this.stateRepo.claimFinalization(checkpointId, totalBatches);
167
179
 
168
180
  if (claimed) {
169
181
  return await this.finalizeComputation({ computationName, date, checkpointId });
170
182
  }
171
183
 
172
- // If not claimed, check if it's because it's still pending or already done
173
184
  const progress = await this.stateRepo.getCheckpointProgress(checkpointId);
174
185
  return { status: 'pending_or_finalized', progress: `${progress.completedBatches}/${totalBatches}` };
175
186
  }
@@ -178,14 +189,11 @@ class Coordinator {
178
189
  const entry = this.manifestMap.get(computationName.toLowerCase());
179
190
  this.logger.log(`[Coordinator] Finalizing ${entry.name} for ${date}...`);
180
191
 
181
- const start = Date.now(); // Track finalization timing
182
192
  const dailyStatus = await this.stateRepo.getDailyStatus(date);
183
193
  const previousRun = dailyStatus.get(entry.name);
184
194
 
185
- // Finalize storage (merge results)
186
195
  const { resultHash } = await this.storage.finalizeResults(date, entry) || {};
187
196
 
188
- // Update State
189
197
  await this.stateRepo.updateStatus({
190
198
  computation: entry.name,
191
199
  date,
@@ -195,14 +203,11 @@ class Coordinator {
195
203
  metadata: { checkpointId, finalizedAt: new Date() }
196
204
  });
197
205
 
198
- // [ADD] Discord Alert
199
206
  if (this.notifier) {
200
- // Calculate total duration if we have metadata, otherwise just use current op
201
- const duration = 0; // Simple placeholder, or calculate from checkpoint start time
207
+ const duration = 0;
202
208
  this.notifier.reportSuccess(entry.name, date, duration, resultHash);
203
209
  }
204
210
 
205
- // Trigger Cascades
206
211
  if (resultHash && (!previousRun || resultHash !== previousRun.resultHash)) {
207
212
  await this._triggerCascading(entry, date);
208
213
  }
@@ -232,7 +237,6 @@ class Coordinator {
232
237
  const checkpointId = `${entry.name}-${date}-${Date.now()}`;
233
238
  let batchCounter = 0;
234
239
 
235
- // [FIX] Added hash to task name to prevent collision on re-runs
236
240
  const configHash = entry.hash ? entry.hash.substring(0, 8) : 'nohash';
237
241
 
238
242
  const createBatchTask = (batchIds) => {
@@ -365,7 +369,6 @@ class Coordinator {
365
369
  const totalBatches = batchCounter;
366
370
 
367
371
  if (remainingBatches.length === 0) {
368
- // [ADD] Explicit Diagnostic Log
369
372
  this.logger.log(`[Coordinator] [Recovery] Diagnosis for ${entry.name}: All ${totalBatches} batches complete. Missing Finalizer. Triggering immediately.`);
370
373
 
371
374
  await this._triggerTryFinalizeTask(entry, date, checkpointId, 0, totalBatches, true);
@@ -4,6 +4,8 @@
4
4
  * - Allows any pass to be scheduled if Hash Mismatch is detected.
5
5
  * - Enforces execution order via ETAs: Pass N runs 10 mins after Pass N-1.
6
6
  * - FIX: Enforces UTC date arithmetic.
7
+ * - FIX: T-1 Scheduling Logic.
8
+ * - FIX: Handles 'blocked' status properly.
7
9
  */
8
10
 
9
11
  const { ScheduleValidator } = require('./ScheduleValidator');
@@ -22,11 +24,9 @@ class TaskScheduler {
22
24
  this.LOOKBACK_DAYS = config.planningLookbackDays ?? 7;
23
25
  this.LOOKAHEAD_HOURS = config.planningLookaheadHours ?? 24;
24
26
  this.ZOMBIE_THRESHOLD_MINUTES = config.zombieThresholdMinutes ?? 15;
25
-
26
- // 10 Minute buffer per pass level to ensure upstream dependencies
27
- // have time to finish (or trigger cascades) before downstream runs.
28
27
  this.PASS_DELAY_SECONDS = config.passDelaySeconds ?? 600;
29
28
 
29
+ // [FIX] Default to -1 (Yesterday) to enforce T-1 rule
30
30
  this.MAX_DATE_OFFSET = config.scheduling?.maxSchedulingDateOffset ?? -1;
31
31
  }
32
32
 
@@ -43,6 +43,7 @@ class TaskScheduler {
43
43
 
44
44
  this.logger.log(`[Planner] Reconciling: ${windowStart.toISOString()} to ${windowEnd.toISOString()}`);
45
45
 
46
+ // [FIX] Use Date Generator that respects MAX_DATE_OFFSET
46
47
  const targetDates = this._generateDateRange(windowStart, windowEnd);
47
48
  const tasksToSchedule = new Map();
48
49
  const stats = { checked: 0, missing: 0, mismatched: 0, scheduled: 0, exists: 0, deleted: 0 };
@@ -52,20 +53,22 @@ class TaskScheduler {
52
53
  const dailyStatus = await this.stateRepo.getDailyStatus(dateStr);
53
54
 
54
55
  for (const entry of this.manifest) {
55
- // [UPDATE] Removed "entry.pass !== 1" check.
56
- // We now evaluate ALL passes for potential fixes/backfills.
57
-
58
56
  const effectiveSchedule = this.validator.parseSchedule(entry.schedule);
59
57
  if (!this.validator.shouldRunOnDate(effectiveSchedule, dateObj)) continue;
60
58
 
61
59
  stats.checked++;
62
60
  const statusEntry = dailyStatus instanceof Map ? dailyStatus.get(entry.name) : dailyStatus[entry.name];
61
+
63
62
  const lastRunHash = statusEntry?.hash;
64
63
  const rawStatus = statusEntry?.status;
65
64
  const status = rawStatus ? rawStatus.toLowerCase() : null;
66
65
 
67
66
  let reason = null;
68
67
 
68
+ // [FIX] Logic to handle 'blocked' status.
69
+ // If it is 'blocked', we generally treat it as handled (do nothing).
70
+ // It will only be retried if it is truly missing or failed.
71
+
69
72
  if (!statusEntry || status === 'pending') {
70
73
  reason = 'MISSING_RUN';
71
74
  stats.missing++;
@@ -74,7 +77,8 @@ class TaskScheduler {
74
77
  reason = 'RETRY_FAILED';
75
78
  stats.missing++;
76
79
  }
77
- else if (lastRunHash !== entry.hash && status !== 'running') {
80
+ // Check mismatch only if not running and not blocked (blocked means we are waiting for something else)
81
+ else if (lastRunHash !== entry.hash && status !== 'running' && status !== 'blocked') {
78
82
  reason = 'HASH_MISMATCH';
79
83
  stats.mismatched++;
80
84
  }
@@ -83,17 +87,13 @@ class TaskScheduler {
83
87
  const taskKey = `root-${entry.name}-${dateStr}-${entry.hash}`;
84
88
 
85
89
  if (!tasksToSchedule.has(taskKey)) {
86
- // 1. Calculate Base Window (When it SHOULD run according to schedule)
90
+ // 1. Calculate Base Window
87
91
  const baseRunAt = this._getNextRunWindow(effectiveSchedule, dateObj);
88
92
 
89
93
  // 2. Calculate Topological Delay
90
- // If Pass 1 runs at T, Pass 2 runs at T + 10m, Pass 3 at T + 20m.
91
- // This allows Pass 1 to finish and trigger a natural Cascade for Pass 2
92
- // BEFORE the scheduled Pass 2 task fires (avoiding redundancy/conflicts).
93
94
  const passDelay = (entry.pass - 1) * this.PASS_DELAY_SECONDS;
94
95
 
95
96
  // 3. Determine Final Execution Time
96
- // If baseRunAt is 0 (meaning "ASAP" / window passed), we base delay on NOW.
97
97
  const nowSeconds = Math.floor(Date.now() / 1000);
98
98
  const effectiveBase = baseRunAt === 0 ? nowSeconds : baseRunAt;
99
99
  const runAtSeconds = effectiveBase + passDelay;
@@ -140,7 +140,6 @@ class TaskScheduler {
140
140
 
141
141
  /**
142
142
  * Watchdog: Find and recover zombie tasks.
143
- * UPDATED: Now diagnoses the cause of the zombie state.
144
143
  */
145
144
  async runWatchdog() {
146
145
  if (!this.stateRepo.findZombies) {
@@ -155,17 +154,16 @@ class TaskScheduler {
155
154
 
156
155
  this.logger.log(`[Watchdog] Found ${recoverable.length} zombies.`);
157
156
 
158
- // [ADD] Diagnostic Phase: Inspect checkpoints to see WHY they are zombies
157
+ // [ADD] Diagnostic Phase
159
158
  const diagnostics = await Promise.all(recoverable.map(async z => {
160
159
  try {
161
- // Use existing repo methods to peek at state
162
160
  const progress = await this.stateRepo.getCheckpointProgress(z.checkpointId);
163
161
  const checkpoint = await this.stateRepo.loadCheckpoint(z.checkpointId);
164
162
 
165
163
  const total = checkpoint?.totalBatches || 0;
166
164
  const completed = progress.completedBatches || 0;
167
165
  let reason = 'Unknown Stuck State';
168
- let type = 'warn'; // warn vs success
166
+ let type = 'warn';
169
167
 
170
168
  if (total > 0 && completed >= total) {
171
169
  reason = '✅ All Batches Done (Finalizer Failed)';
@@ -185,16 +183,25 @@ class TaskScheduler {
185
183
  }
186
184
  }));
187
185
 
188
- // Notify Discord with enhanced info
189
186
  if (this.notifier) {
190
187
  await this.notifier.reportZombies(recoverable.length, diagnostics);
191
188
  }
192
189
 
193
- // Claim zombies
194
190
  await Promise.all(recoverable.map(z => this.stateRepo.claimZombie(z.checkpointId)));
195
191
 
196
- // Re-dispatch logic
197
192
  const recoveryTasks = recoverable.map(z => {
193
+ // [FIX] Enforce T-1 Rule on Zombies too to avoid resurrecting future tasks
194
+ const zDate = new Date(z.date);
195
+ const now = new Date();
196
+ const maxAllowed = new Date(now);
197
+ maxAllowed.setUTCDate(now.getUTCDate() + this.MAX_DATE_OFFSET);
198
+ maxAllowed.setUTCHours(23, 59, 59, 999);
199
+
200
+ if (zDate > maxAllowed) {
201
+ this.logger.warn(`[Watchdog] Skipping recovery for ${z.name}@${z.date} (Outside Scheduling Window)`);
202
+ return null;
203
+ }
204
+
198
205
  const entry = this.manifestMap.get(z.name);
199
206
  if (!entry) return null;
200
207
 
@@ -219,7 +226,7 @@ class TaskScheduler {
219
226
  const dates = [];
220
227
  let cur = new Date(start);
221
228
 
222
- // [FIX] Cap the scheduling window to T-1 (Yesterday)
229
+ // [FIX] Cap the scheduling window based on config (Default T-1)
223
230
  const now = new Date();
224
231
  const maxAllowed = new Date(now);
225
232
  maxAllowed.setUTCDate(now.getUTCDate() + this.MAX_DATE_OFFSET);
@@ -1,61 +1,73 @@
1
1
  /**
2
- * V3 Dispatcher Handler (FIXED)
3
- * Routes tasks to the correct controller method.
2
+ * V3 Dispatcher Handler (FIXED & LOGGING ENHANCED)
3
+ * Routes tasks and ensures errors are returned to Cloud Tasks logs.
4
4
  */
5
5
  exports.dispatcher = async (req, res) => {
6
+ // [DEBUG] Log the incoming trigger to match Cloud Tasks timestamps
7
+ const taskId = req.get('X-CloudTasks-TaskName') || 'unknown-task';
8
+ console.log(`[Dispatcher] Received task ${taskId} type=${req.body.type || 'unknown'}`);
9
+
6
10
  try {
7
11
  const system = require('../index');
8
12
  await system.initialize();
9
13
 
10
- const { type, computation, date, dryRun, ...params } = req.body;
14
+ const { type, computation, date, entityIds, ...params } = req.body;
11
15
 
12
- // 1. Worker Batch
16
+ // 1. Route Worker Batches
13
17
  if (type === 'worker-batch') {
14
- if (!params.entityIds) return res.status(400).json({ error: 'Missing entityIds' });
15
- return res.status(200).json(await system.coordinatorInstance.processBatch({
16
- computationName: computation,
17
- date,
18
- entityIds: params.entityIds,
19
- dryRun,
20
- checkpointId: params.checkpointId,
21
- batchNum: params.batchNum,
22
- totalBatches: params.totalBatches
23
- }));
18
+ if (!entityIds) throw new Error('Missing entityIds');
19
+ const result = await system.coordinatorInstance.processBatch({
20
+ computationName: computation, date, entityIds, ...params
21
+ });
22
+ return res.status(200).json(result);
24
23
  }
25
24
 
26
- // 2. Finalizer (Crucial Fix)
25
+ // 2. Route Finalizer
27
26
  if (type === 'try-finalize') {
28
- return res.status(200).json(await system.coordinatorInstance.tryFinalizeComputation({
29
- computationName: computation,
30
- date,
31
- checkpointId: params.checkpointId,
32
- totalBatches: params.totalBatches
33
- }));
27
+ const result = await system.coordinatorInstance.tryFinalizeComputation({
28
+ computationName: computation, date, ...params
29
+ });
30
+ return res.status(200).json(result);
34
31
  }
35
32
 
36
- // 3. Cascade Trigger
33
+ // 3. Route Cascade
37
34
  if (type === 'cascade-trigger') {
38
- return res.status(200).json(await system.runComputation({
39
- computationName: computation,
40
- date,
41
- force: params.force || false
42
- }));
35
+ const result = await system.runComputation({
36
+ computationName: computation, date, force: params.force || false
37
+ });
38
+ return res.status(200).json(result);
43
39
  }
44
40
 
45
- // 4. Coordinator (Default Entry Point)
41
+ // 4. Default: Coordinator (Root Triggers)
46
42
  const compName = computation || req.body.computationName;
47
- const targetDate = date || req.body.targetDate;
48
-
49
- if (!compName) return res.status(400).json({ error: 'Missing computation name' });
43
+ if (!compName) throw new Error('Missing computation name');
50
44
 
51
- return res.status(200).json(await system.runComputation({
45
+ const result = await system.runComputation({
52
46
  computationName: compName,
53
- date: targetDate || new Date().toISOString().split('T')[0],
54
- dryRun: dryRun || false
55
- }));
47
+ date: date || new Date().toISOString().split('T')[0],
48
+ dryRun: params.dryRun || false,
49
+ ...params
50
+ });
51
+
52
+ // [LOGIC FIX] If the result is 'blocked', return 200 (OK) so Cloud Tasks STOPS retrying.
53
+ // If we return 500, Cloud Tasks will retry forever (or until max attempts).
54
+ if (result.status === 'blocked') {
55
+ console.log(`[Dispatcher] Task Blocked (Handling as Success to stop retry): ${result.reason}`);
56
+ return res.status(200).json(result);
57
+ }
58
+
59
+ return res.status(200).json(result);
56
60
 
57
61
  } catch (e) {
58
- console.error('[V3-Dispatcher] Error:', e);
59
- return res.status(500).json({ error: e.message });
62
+ // [CRITICAL] Log error to Cloud Logging
63
+ console.error(`[V3-Dispatcher] CRITICAL ERROR on ${taskId}:`, e);
64
+
65
+ // Return 500 with the error message.
66
+ // Cloud Tasks 'attemptResponseLog' usually captures the first few bytes of the body.
67
+ return res.status(500).json({
68
+ error: e.message,
69
+ stack: e.stack ? e.stack.split('\n')[0] : 'no-stack',
70
+ taskId
71
+ });
60
72
  }
61
73
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.808",
3
+ "version": "1.0.809",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [