bulltrackers-module 1.0.765 → 1.0.766

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -287,116 +287,126 @@ class Orchestrator {
287
287
  async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
288
288
  const checkpointer = new Checkpointer(this.config, this.storageManager);
289
289
  let cp = null;
290
- if (!options.dryRun) {
291
- const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
292
- cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
293
-
294
- if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
295
- if (cp && cp.skipped) {
296
- this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
297
- return { count: 0, hash: 'skipped_dead_letter', skipped: true };
298
- }
299
- if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
300
- if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
301
- }
302
-
303
- // DECISION: Use remote workers or local execution?
304
- const useRemote = this._shouldUseRemoteWorkers(entry, options);
305
290
 
306
- if (useRemote) {
307
- this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
308
- return this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
309
- }
291
+ try {
292
+ if (!options.dryRun) {
293
+ const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
294
+ cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
310
295
 
311
- // LOCAL EXECUTION PATH (Original Logic)
312
- const driverTable = this._getDriverTable(entry.requires);
313
- const driverEntityField = this.config.tables[driverTable]?.entityField;
314
- const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
296
+ if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
297
+ if (cp && cp.skipped) {
298
+ this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
299
+ return { count: 0, hash: 'skipped_dead_letter', skipped: true };
300
+ }
301
+ if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
302
+ if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
303
+ }
315
304
 
316
- let globalData = {};
317
- if (Object.keys(globalRequires).length > 0) {
318
- globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
319
- }
305
+ // DECISION: Use remote workers or local execution?
306
+ const useRemote = this._shouldUseRemoteWorkers(entry, options);
307
+
308
+ if (useRemote) {
309
+ this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
310
+ return await this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
311
+ }
320
312
 
321
- const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
322
- const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
323
-
324
- const rollingHash = crypto.createHash('sha256');
325
- let totalCount = 0;
326
- let batchIndex = 0;
327
- const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
328
- const limit = pLimit(concurrency);
313
+ // LOCAL EXECUTION PATH (Original Logic)
314
+ const driverTable = this._getDriverTable(entry.requires);
315
+ const driverEntityField = this.config.tables[driverTable]?.entityField;
316
+ const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
329
317
 
330
- for await (const batch of batchStream) {
331
- if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
332
- batchIndex++; continue;
318
+ let globalData = {};
319
+ if (Object.keys(globalRequires).length > 0) {
320
+ globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
333
321
  }
334
322
 
335
- const { data: batchLocalData, entityIds } = batch;
336
- const combinedData = { ...batchLocalData, ...globalData };
323
+ const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
324
+ const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
337
325
 
338
- // STRICT FIX: Prefetch dependencies for the batch.
339
- const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
340
-
341
- const { rules } = this.ruleInjector.createContext();
342
- const batchResults = {};
343
-
344
- await Promise.all(entityIds.map(entityId => limit(async () => {
345
- const instance = new entry.class();
346
- const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
326
+ const rollingHash = crypto.createHash('sha256');
327
+ let totalCount = 0;
328
+ let batchIndex = 0;
329
+ const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
330
+ const limit = pLimit(concurrency);
331
+
332
+ for await (const batch of batchStream) {
333
+ if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
334
+ batchIndex++; continue;
335
+ }
336
+
337
+ const { data: batchLocalData, entityIds } = batch;
338
+ const combinedData = { ...batchLocalData, ...globalData };
339
+
340
+ // STRICT FIX: Prefetch dependencies for the batch.
341
+ const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
347
342
 
348
- const context = {
349
- computation: entry, date: dateStr, entityId, data: entityData,
343
+ const { rules } = this.ruleInjector.createContext();
344
+ const batchResults = {};
345
+
346
+ await Promise.all(entityIds.map(entityId => limit(async () => {
347
+ const instance = new entry.class();
348
+ const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
350
349
 
351
- // STRICT FIX: No fallback to _lazyLoadDependency.
352
- getDependency: (depName, targetId) => {
353
- const id = targetId || entityId;
354
-
355
- // 1. Look in Batch-Prefetched Dependencies (Priority)
356
- if (batchDeps[depName] && batchDeps[depName].has(id)) {
357
- return batchDeps[depName].get(id);
358
- }
350
+ const context = {
351
+ computation: entry, date: dateStr, entityId, data: entityData,
359
352
 
360
- // 2. Look in Global/Preloaded Dependencies
361
- if (depResults[depName]) {
362
- if (depResults[depName][id] !== undefined) return depResults[depName][id];
363
- }
353
+ // STRICT FIX: No fallback to _lazyLoadDependency.
354
+ getDependency: (depName, targetId) => {
355
+ const id = targetId || entityId;
356
+
357
+ // 1. Look in Batch-Prefetched Dependencies (Priority)
358
+ if (batchDeps[depName] && batchDeps[depName].has(id)) {
359
+ return batchDeps[depName].get(id);
360
+ }
361
+
362
+ // 2. Look in Global/Preloaded Dependencies
363
+ if (depResults[depName]) {
364
+ if (depResults[depName][id] !== undefined) return depResults[depName][id];
365
+ }
366
+
367
+ // 3. STRICT MODE: Throw Error
368
+ throw new Error(
369
+ `[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
370
+ `Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
371
+ );
372
+ },
364
373
 
365
- // 3. STRICT MODE: Throw Error
366
- throw new Error(
367
- `[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
368
- `Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
369
- );
370
- },
371
-
372
- previousResult, rules, references: this.referenceDataCache,
373
- config: this.config, dataFetcher: this.dataFetcher
374
- };
374
+ previousResult, rules, references: this.referenceDataCache,
375
+ config: this.config, dataFetcher: this.dataFetcher
376
+ };
377
+
378
+ const result = await this.runner.run(instance, context);
379
+ if (result !== undefined) {
380
+ batchResults[entityId] = result;
381
+ this._updateRollingHash(rollingHash, result);
382
+ }
383
+ })));
375
384
 
376
- const result = await this.runner.run(instance, context);
377
- if (result !== undefined) {
378
- batchResults[entityId] = result;
379
- this._updateRollingHash(rollingHash, result);
385
+ if (!options.dryRun) {
386
+ await this.storageManager.commitResults(dateStr, entry, batchResults, {});
387
+ const lastId = entityIds[entityIds.length - 1];
388
+ await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
380
389
  }
381
- })));
382
390
 
383
- if (!options.dryRun) {
384
- await this.storageManager.commitResults(dateStr, entry, batchResults, {});
385
- const lastId = entityIds[entityIds.length - 1];
386
- await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
391
+ totalCount += Object.keys(batchResults).length;
392
+ batchIndex++;
387
393
  }
388
394
 
389
- totalCount += Object.keys(batchResults).length;
390
- batchIndex++;
391
- }
395
+ if (!options.dryRun) {
396
+ await this.storageManager.finalizeResults(dateStr, entry);
397
+ if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
398
+ }
392
399
 
393
- if (!options.dryRun) {
394
- await this.storageManager.finalizeResults(dateStr, entry);
395
- if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
400
+ // FIX: Return valid object shape including skipped: false
401
+ return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
402
+
403
+ } catch (error) {
404
+ if (cp && cp.id) {
405
+ this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
406
+ await this.storageManager.failCheckpoint(cp.id, error.message);
407
+ }
408
+ throw error;
396
409
  }
397
-
398
- // FIX: Return valid object shape including skipped: false
399
- return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
400
410
  }
401
411
 
402
412
  /**
@@ -6,6 +6,7 @@
6
6
  * - finalizeResults: Loads all GCS files to a temp table and performs a single MERGE
7
7
  * * V2.2 UPDATE: Added saveCheckpoint for Append-Only Checkpointer support.
8
8
  * * V2.3 UPDATE: Parallelized GCS and Firestore writes in commitResults.
9
+ * * V2.4 UPDATE: Added Attempt Tracking for Zombie Detection.
9
10
  * * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
10
11
  * * FIX: Improved error logging to catch swallowed BigQuery insert errors.
11
12
  * * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
@@ -351,8 +352,9 @@ class StorageManager {
351
352
  // FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
352
353
  // We only count it as a zombie if the LATEST row is 'running'.
353
354
  // This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
355
+ // UPDATE: Added attempts to the selection
354
356
  const query = `
355
- SELECT computation_name, date, checkpoint_id, last_updated
357
+ SELECT computation_name, date, checkpoint_id, last_updated, attempts
356
358
  FROM (
357
359
  SELECT
358
360
  computation_name,
@@ -360,6 +362,7 @@ class StorageManager {
360
362
  checkpoint_id,
361
363
  last_updated,
362
364
  status,
365
+ attempts,
363
366
  ROW_NUMBER() OVER (
364
367
  PARTITION BY computation_name, date
365
368
  ORDER BY last_updated DESC,
@@ -377,7 +380,12 @@ class StorageManager {
377
380
  LIMIT 50
378
381
  `;
379
382
  const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
380
- return rows.map(r => ({ name: r.computation_name, date: r.date.value || r.date, checkpointId: r.checkpoint_id }));
383
+ return rows.map(r => ({
384
+ name: r.computation_name,
385
+ date: r.date.value || r.date,
386
+ checkpointId: r.checkpoint_id,
387
+ attempts: r.attempts
388
+ }));
381
389
  } catch (e) {
382
390
  console.error(`[Storage] findZombies failed: ${e.message}`);
383
391
  return [];
@@ -121,17 +121,27 @@ async function runWatchdog(req, res) {
121
121
  // 1. Find Zombies
122
122
  const zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
123
123
 
124
- if (zombies.length === 0) {
125
- return res.status(200).send('No zombies detected.');
124
+ // Filter out excessive attempts
125
+ const actionableZombies = [];
126
+ for (const z of zombies) {
127
+ if ((z.attempts || 0) >= 3) {
128
+ console.warn(`[Watchdog] Ignoring zombie ${z.name} (Checkpoint: ${z.checkpointId}) - Max attempts reached (${z.attempts})`);
129
+ continue;
130
+ }
131
+ actionableZombies.push(z);
132
+ }
133
+
134
+ if (actionableZombies.length === 0) {
135
+ return res.status(200).send('No recoverable zombies.');
126
136
  }
127
137
 
128
- console.log(`[Watchdog] 🧟 Found ${zombies.length} zombies. Initiating recovery...`);
138
+ console.log(`[Watchdog] 🧟 Found ${actionableZombies.length} zombies. Initiating recovery...`);
129
139
 
130
140
  // 2. Claim & Recover
131
141
  // We claim them first so the next watchdog doesn't grab them while we are dispatching
132
- await Promise.all(zombies.map(z => storageManager.claimZombie(z.checkpointId)));
142
+ await Promise.all(actionableZombies.map(z => storageManager.claimZombie(z.checkpointId)));
133
143
 
134
- const recoveryTasks = zombies.map(z => {
144
+ const recoveryTasks = actionableZombies.map(z => {
135
145
  const entry = manifest.find(m => m.name === z.name);
136
146
  if (!entry) {
137
147
  console.error(`[Watchdog] Computation ${z.name} no longer exists in manifest. Cannot recover.`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.765",
3
+ "version": "1.0.766",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [