bulltrackers-module 1.0.765 → 1.0.766
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -287,116 +287,126 @@ class Orchestrator {
|
|
|
287
287
|
async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
|
|
288
288
|
const checkpointer = new Checkpointer(this.config, this.storageManager);
|
|
289
289
|
let cp = null;
|
|
290
|
-
if (!options.dryRun) {
|
|
291
|
-
const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
|
|
292
|
-
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
293
|
-
|
|
294
|
-
if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
|
|
295
|
-
if (cp && cp.skipped) {
|
|
296
|
-
this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
|
|
297
|
-
return { count: 0, hash: 'skipped_dead_letter', skipped: true };
|
|
298
|
-
}
|
|
299
|
-
if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
|
|
300
|
-
if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// DECISION: Use remote workers or local execution?
|
|
304
|
-
const useRemote = this._shouldUseRemoteWorkers(entry, options);
|
|
305
290
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
291
|
+
try {
|
|
292
|
+
if (!options.dryRun) {
|
|
293
|
+
const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
|
|
294
|
+
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
310
295
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
296
|
+
if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
|
|
297
|
+
if (cp && cp.skipped) {
|
|
298
|
+
this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
|
|
299
|
+
return { count: 0, hash: 'skipped_dead_letter', skipped: true };
|
|
300
|
+
}
|
|
301
|
+
if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
|
|
302
|
+
if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
|
|
303
|
+
}
|
|
315
304
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
305
|
+
// DECISION: Use remote workers or local execution?
|
|
306
|
+
const useRemote = this._shouldUseRemoteWorkers(entry, options);
|
|
307
|
+
|
|
308
|
+
if (useRemote) {
|
|
309
|
+
this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
|
|
310
|
+
return await this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
|
|
311
|
+
}
|
|
320
312
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
let totalCount = 0;
|
|
326
|
-
let batchIndex = 0;
|
|
327
|
-
const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
|
|
328
|
-
const limit = pLimit(concurrency);
|
|
313
|
+
// LOCAL EXECUTION PATH (Original Logic)
|
|
314
|
+
const driverTable = this._getDriverTable(entry.requires);
|
|
315
|
+
const driverEntityField = this.config.tables[driverTable]?.entityField;
|
|
316
|
+
const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
|
|
329
317
|
|
|
330
|
-
|
|
331
|
-
if (
|
|
332
|
-
|
|
318
|
+
let globalData = {};
|
|
319
|
+
if (Object.keys(globalRequires).length > 0) {
|
|
320
|
+
globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
|
|
333
321
|
}
|
|
334
322
|
|
|
335
|
-
const
|
|
336
|
-
const
|
|
323
|
+
const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
|
|
324
|
+
const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
|
|
337
325
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
const
|
|
342
|
-
const
|
|
343
|
-
|
|
344
|
-
await
|
|
345
|
-
|
|
346
|
-
|
|
326
|
+
const rollingHash = crypto.createHash('sha256');
|
|
327
|
+
let totalCount = 0;
|
|
328
|
+
let batchIndex = 0;
|
|
329
|
+
const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
|
|
330
|
+
const limit = pLimit(concurrency);
|
|
331
|
+
|
|
332
|
+
for await (const batch of batchStream) {
|
|
333
|
+
if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
|
|
334
|
+
batchIndex++; continue;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const { data: batchLocalData, entityIds } = batch;
|
|
338
|
+
const combinedData = { ...batchLocalData, ...globalData };
|
|
339
|
+
|
|
340
|
+
// STRICT FIX: Prefetch dependencies for the batch.
|
|
341
|
+
const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
|
|
347
342
|
|
|
348
|
-
const
|
|
349
|
-
|
|
343
|
+
const { rules } = this.ruleInjector.createContext();
|
|
344
|
+
const batchResults = {};
|
|
345
|
+
|
|
346
|
+
await Promise.all(entityIds.map(entityId => limit(async () => {
|
|
347
|
+
const instance = new entry.class();
|
|
348
|
+
const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
|
|
350
349
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
const id = targetId || entityId;
|
|
354
|
-
|
|
355
|
-
// 1. Look in Batch-Prefetched Dependencies (Priority)
|
|
356
|
-
if (batchDeps[depName] && batchDeps[depName].has(id)) {
|
|
357
|
-
return batchDeps[depName].get(id);
|
|
358
|
-
}
|
|
350
|
+
const context = {
|
|
351
|
+
computation: entry, date: dateStr, entityId, data: entityData,
|
|
359
352
|
|
|
360
|
-
//
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
353
|
+
// STRICT FIX: No fallback to _lazyLoadDependency.
|
|
354
|
+
getDependency: (depName, targetId) => {
|
|
355
|
+
const id = targetId || entityId;
|
|
356
|
+
|
|
357
|
+
// 1. Look in Batch-Prefetched Dependencies (Priority)
|
|
358
|
+
if (batchDeps[depName] && batchDeps[depName].has(id)) {
|
|
359
|
+
return batchDeps[depName].get(id);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// 2. Look in Global/Preloaded Dependencies
|
|
363
|
+
if (depResults[depName]) {
|
|
364
|
+
if (depResults[depName][id] !== undefined) return depResults[depName][id];
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// 3. STRICT MODE: Throw Error
|
|
368
|
+
throw new Error(
|
|
369
|
+
`[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
|
|
370
|
+
`Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
|
|
371
|
+
);
|
|
372
|
+
},
|
|
364
373
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
};
|
|
374
|
+
previousResult, rules, references: this.referenceDataCache,
|
|
375
|
+
config: this.config, dataFetcher: this.dataFetcher
|
|
376
|
+
};
|
|
377
|
+
|
|
378
|
+
const result = await this.runner.run(instance, context);
|
|
379
|
+
if (result !== undefined) {
|
|
380
|
+
batchResults[entityId] = result;
|
|
381
|
+
this._updateRollingHash(rollingHash, result);
|
|
382
|
+
}
|
|
383
|
+
})));
|
|
375
384
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
385
|
+
if (!options.dryRun) {
|
|
386
|
+
await this.storageManager.commitResults(dateStr, entry, batchResults, {});
|
|
387
|
+
const lastId = entityIds[entityIds.length - 1];
|
|
388
|
+
await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
|
|
380
389
|
}
|
|
381
|
-
})));
|
|
382
390
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
const lastId = entityIds[entityIds.length - 1];
|
|
386
|
-
await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
|
|
391
|
+
totalCount += Object.keys(batchResults).length;
|
|
392
|
+
batchIndex++;
|
|
387
393
|
}
|
|
388
394
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
395
|
+
if (!options.dryRun) {
|
|
396
|
+
await this.storageManager.finalizeResults(dateStr, entry);
|
|
397
|
+
if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
|
|
398
|
+
}
|
|
392
399
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
400
|
+
// FIX: Return valid object shape including skipped: false
|
|
401
|
+
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
|
|
402
|
+
|
|
403
|
+
} catch (error) {
|
|
404
|
+
if (cp && cp.id) {
|
|
405
|
+
this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
|
|
406
|
+
await this.storageManager.failCheckpoint(cp.id, error.message);
|
|
407
|
+
}
|
|
408
|
+
throw error;
|
|
396
409
|
}
|
|
397
|
-
|
|
398
|
-
// FIX: Return valid object shape including skipped: false
|
|
399
|
-
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
|
|
400
410
|
}
|
|
401
411
|
|
|
402
412
|
/**
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
* - finalizeResults: Loads all GCS files to a temp table and performs a single MERGE
|
|
7
7
|
* * V2.2 UPDATE: Added saveCheckpoint for Append-Only Checkpointer support.
|
|
8
8
|
* * V2.3 UPDATE: Parallelized GCS and Firestore writes in commitResults.
|
|
9
|
+
* * V2.4 UPDATE: Added Attempt Tracking for Zombie Detection.
|
|
9
10
|
* * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
|
|
10
11
|
* * FIX: Improved error logging to catch swallowed BigQuery insert errors.
|
|
11
12
|
* * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
|
|
@@ -351,8 +352,9 @@ class StorageManager {
|
|
|
351
352
|
// FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
|
|
352
353
|
// We only count it as a zombie if the LATEST row is 'running'.
|
|
353
354
|
// This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
|
|
355
|
+
// UPDATE: Added attempts to the selection
|
|
354
356
|
const query = `
|
|
355
|
-
SELECT computation_name, date, checkpoint_id, last_updated
|
|
357
|
+
SELECT computation_name, date, checkpoint_id, last_updated, attempts
|
|
356
358
|
FROM (
|
|
357
359
|
SELECT
|
|
358
360
|
computation_name,
|
|
@@ -360,6 +362,7 @@ class StorageManager {
|
|
|
360
362
|
checkpoint_id,
|
|
361
363
|
last_updated,
|
|
362
364
|
status,
|
|
365
|
+
attempts,
|
|
363
366
|
ROW_NUMBER() OVER (
|
|
364
367
|
PARTITION BY computation_name, date
|
|
365
368
|
ORDER BY last_updated DESC,
|
|
@@ -377,7 +380,12 @@ class StorageManager {
|
|
|
377
380
|
LIMIT 50
|
|
378
381
|
`;
|
|
379
382
|
const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
|
|
380
|
-
return rows.map(r => ({
|
|
383
|
+
return rows.map(r => ({
|
|
384
|
+
name: r.computation_name,
|
|
385
|
+
date: r.date.value || r.date,
|
|
386
|
+
checkpointId: r.checkpoint_id,
|
|
387
|
+
attempts: r.attempts
|
|
388
|
+
}));
|
|
381
389
|
} catch (e) {
|
|
382
390
|
console.error(`[Storage] findZombies failed: ${e.message}`);
|
|
383
391
|
return [];
|
|
@@ -121,17 +121,27 @@ async function runWatchdog(req, res) {
|
|
|
121
121
|
// 1. Find Zombies
|
|
122
122
|
const zombies = await storageManager.findZombies(ZOMBIE_THRESHOLD_MINUTES);
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
// Filter out excessive attempts
|
|
125
|
+
const actionableZombies = [];
|
|
126
|
+
for (const z of zombies) {
|
|
127
|
+
if ((z.attempts || 0) >= 3) {
|
|
128
|
+
console.warn(`[Watchdog] Ignoring zombie ${z.name} (Checkpoint: ${z.checkpointId}) - Max attempts reached (${z.attempts})`);
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
actionableZombies.push(z);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (actionableZombies.length === 0) {
|
|
135
|
+
return res.status(200).send('No recoverable zombies.');
|
|
126
136
|
}
|
|
127
137
|
|
|
128
|
-
console.log(`[Watchdog] 🧟 Found ${
|
|
138
|
+
console.log(`[Watchdog] 🧟 Found ${actionableZombies.length} zombies. Initiating recovery...`);
|
|
129
139
|
|
|
130
140
|
// 2. Claim & Recover
|
|
131
141
|
// We claim them first so the next watchdog doesn't grab them while we are dispatching
|
|
132
|
-
await Promise.all(
|
|
142
|
+
await Promise.all(actionableZombies.map(z => storageManager.claimZombie(z.checkpointId)));
|
|
133
143
|
|
|
134
|
-
const recoveryTasks =
|
|
144
|
+
const recoveryTasks = actionableZombies.map(z => {
|
|
135
145
|
const entry = manifest.find(m => m.name === z.name);
|
|
136
146
|
if (!entry) {
|
|
137
147
|
console.error(`[Watchdog] Computation ${z.name} no longer exists in manifest. Cannot recover.`);
|