bulltrackers-module 1.0.765 → 1.0.768
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +298 -186
- package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
- package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
- package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
- package/functions/computation-system-v2/config/bulltrackers.config.js +26 -14
- package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
- package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
- package/functions/computation-system-v2/framework/data/DataFetcher.js +142 -4
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +119 -122
- package/functions/computation-system-v2/framework/storage/StorageManager.js +16 -18
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +155 -66
- package/functions/computation-system-v2/handlers/scheduler.js +15 -5
- package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
- package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
- package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
- package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
- package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
- package/functions/computation-system-v2/test/analyze-results.js +0 -238
- package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
- package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
- package/functions/computation-system-v2/test/other/test-framework.js +0 -500
- package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
- package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
- package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
- package/functions/computation-system-v2/test/other/test-results.json +0 -31
- package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
- package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
- package/functions/computation-system-v2/test/other/test-storage.js +0 -449
- package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
- package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
- package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
|
@@ -5,12 +5,7 @@
|
|
|
5
5
|
* 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
|
|
6
6
|
* 3. Execution Strategy (Streaming vs. In-Memory)
|
|
7
7
|
* 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
|
|
8
|
-
* * * UPDATE:
|
|
9
|
-
* * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
|
|
10
|
-
* * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
|
|
11
|
-
* * * UPDATE: Aggregates performance reporting to prevent log spam.
|
|
12
|
-
* * * FIX: Resolved N+1 Dependency Fetching (Strict Mode in Streaming).
|
|
13
|
-
* * * FIX: Added missing 'skipped' property to return types for type safety.
|
|
8
|
+
* * * UPDATE: Removed SQL-based execution support (isSql flag ignored).
|
|
14
9
|
*/
|
|
15
10
|
|
|
16
11
|
const crypto = require('crypto');
|
|
@@ -182,6 +177,7 @@ class Orchestrator {
|
|
|
182
177
|
const { name } = entry;
|
|
183
178
|
const forceEntities = options.entities;
|
|
184
179
|
|
|
180
|
+
// 1. Analyze Status (Skip if done/cached, unless forced)
|
|
185
181
|
if (!forceEntities) {
|
|
186
182
|
const decision = await this._analyzeEntry(entry, dateStr);
|
|
187
183
|
const isSkippedOrCached = decision.type === 'skipped' || decision.type === 'cached';
|
|
@@ -198,7 +194,12 @@ class Orchestrator {
|
|
|
198
194
|
this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
|
|
199
195
|
const startTime = Date.now();
|
|
200
196
|
|
|
197
|
+
// 2. Load Dependencies (Crucial for Lineage, Locking, and Upstream Checks)
|
|
201
198
|
const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
|
|
199
|
+
|
|
200
|
+
// =====================================================================
|
|
201
|
+
// STANDARD JS COMPUTATION (ETL) ONLY
|
|
202
|
+
// =====================================================================
|
|
202
203
|
|
|
203
204
|
let previousResult = null;
|
|
204
205
|
if (entry.isHistorical) {
|
|
@@ -256,8 +257,6 @@ class Orchestrator {
|
|
|
256
257
|
await this.lineageMiddleware.flush();
|
|
257
258
|
|
|
258
259
|
// Trigger dependency-driven cascading for downstream computations.
|
|
259
|
-
// This will enqueue Cloud Tasks for any dependents whose full
|
|
260
|
-
// dependency set has completed for the given date.
|
|
261
260
|
try {
|
|
262
261
|
await this._scheduleDependents(entry, dateStr);
|
|
263
262
|
} catch (cascadeError) {
|
|
@@ -287,116 +286,137 @@ class Orchestrator {
|
|
|
287
286
|
async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
|
|
288
287
|
const checkpointer = new Checkpointer(this.config, this.storageManager);
|
|
289
288
|
let cp = null;
|
|
290
|
-
if (!options.dryRun) {
|
|
291
|
-
const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
|
|
292
|
-
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
293
|
-
|
|
294
|
-
if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
|
|
295
|
-
if (cp && cp.skipped) {
|
|
296
|
-
this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
|
|
297
|
-
return { count: 0, hash: 'skipped_dead_letter', skipped: true };
|
|
298
|
-
}
|
|
299
|
-
if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
|
|
300
|
-
if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// DECISION: Use remote workers or local execution?
|
|
304
|
-
const useRemote = this._shouldUseRemoteWorkers(entry, options);
|
|
305
289
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
290
|
+
try {
|
|
291
|
+
if (!options.dryRun) {
|
|
292
|
+
const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
|
|
293
|
+
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
310
294
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
295
|
+
if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
|
|
296
|
+
if (cp && cp.skipped) {
|
|
297
|
+
this._log('INFO', `⏭️ Skipping ${entry.name}: ${cp.reason}`);
|
|
298
|
+
return { count: 0, hash: 'skipped_dead_letter', skipped: true };
|
|
299
|
+
}
|
|
300
|
+
if (cp?.isCompleted) return { count: 0, hash: 'cached', skipped: true };
|
|
301
|
+
if (cp?.isResumed) this._log('INFO', `Resuming ${entry.name} from checkpoint...`);
|
|
302
|
+
}
|
|
315
303
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
304
|
+
// DECISION: Use remote workers or local execution?
|
|
305
|
+
const useRemote = this._shouldUseRemoteWorkers(entry, options);
|
|
306
|
+
|
|
307
|
+
if (useRemote) {
|
|
308
|
+
this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
|
|
309
|
+
return await this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
|
|
310
|
+
}
|
|
320
311
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
let totalCount = 0;
|
|
326
|
-
let batchIndex = 0;
|
|
327
|
-
const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
|
|
328
|
-
const limit = pLimit(concurrency);
|
|
312
|
+
// LOCAL EXECUTION PATH (Original Logic)
|
|
313
|
+
const driverTable = this._getDriverTable(entry.requires);
|
|
314
|
+
const driverEntityField = this.config.tables[driverTable]?.entityField;
|
|
315
|
+
const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
|
|
329
316
|
|
|
330
|
-
|
|
331
|
-
if (
|
|
332
|
-
|
|
317
|
+
let globalData = {};
|
|
318
|
+
if (Object.keys(globalRequires).length > 0) {
|
|
319
|
+
globalData = await this.dataFetcher.fetchForComputation(globalRequires, dateStr);
|
|
333
320
|
}
|
|
334
321
|
|
|
335
|
-
const
|
|
336
|
-
const
|
|
322
|
+
const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
|
|
323
|
+
const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
|
|
337
324
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
const
|
|
342
|
-
const
|
|
343
|
-
|
|
344
|
-
await
|
|
345
|
-
|
|
346
|
-
|
|
325
|
+
const rollingHash = crypto.createHash('sha256');
|
|
326
|
+
let totalCount = 0;
|
|
327
|
+
let batchIndex = 0;
|
|
328
|
+
const concurrency = this.config.execution?.entityConcurrency || DEFAULT_CONCURRENCY;
|
|
329
|
+
const limit = pLimit(concurrency);
|
|
330
|
+
|
|
331
|
+
for await (const batch of batchStream) {
|
|
332
|
+
if (cp && cp.completedBatches && cp.completedBatches.has(batchIndex)) {
|
|
333
|
+
batchIndex++; continue;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const { data: batchLocalData, entityIds } = batch;
|
|
337
|
+
const combinedData = { ...batchLocalData, ...globalData };
|
|
338
|
+
|
|
339
|
+
// STRICT FIX: Prefetch dependencies for the batch.
|
|
340
|
+
const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
|
|
347
341
|
|
|
348
|
-
const
|
|
349
|
-
|
|
342
|
+
const { rules } = this.ruleInjector.createContext();
|
|
343
|
+
const batchResults = {};
|
|
344
|
+
|
|
345
|
+
await Promise.all(entityIds.map(entityId => limit(async () => {
|
|
346
|
+
const instance = new entry.class();
|
|
347
|
+
const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
|
|
350
348
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
const id = targetId || entityId;
|
|
354
|
-
|
|
355
|
-
// 1. Look in Batch-Prefetched Dependencies (Priority)
|
|
356
|
-
if (batchDeps[depName] && batchDeps[depName].has(id)) {
|
|
357
|
-
return batchDeps[depName].get(id);
|
|
358
|
-
}
|
|
349
|
+
const context = {
|
|
350
|
+
computation: entry, date: dateStr, entityId, data: entityData,
|
|
359
351
|
|
|
360
|
-
//
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
352
|
+
// STRICT FIX: No fallback to _lazyLoadDependency.
|
|
353
|
+
getDependency: (depName, targetId) => {
|
|
354
|
+
const id = targetId || entityId;
|
|
355
|
+
|
|
356
|
+
// 1. Look in Batch-Prefetched Dependencies (Priority)
|
|
357
|
+
if (batchDeps[depName] && batchDeps[depName].has(id)) {
|
|
358
|
+
return batchDeps[depName].get(id);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// 2. Look in Global/Preloaded Dependencies
|
|
362
|
+
if (depResults[depName]) {
|
|
363
|
+
if (depResults[depName][id] !== undefined) return depResults[depName][id];
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// 3. STRICT MODE: Throw Error
|
|
367
|
+
throw new Error(
|
|
368
|
+
`[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
|
|
369
|
+
`Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
|
|
370
|
+
);
|
|
371
|
+
},
|
|
364
372
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
};
|
|
373
|
+
previousResult, rules, references: this.referenceDataCache,
|
|
374
|
+
config: this.config, dataFetcher: this.dataFetcher
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
const result = await this.runner.run(instance, context);
|
|
378
|
+
if (result !== undefined) {
|
|
379
|
+
batchResults[entityId] = result;
|
|
380
|
+
this._updateRollingHash(rollingHash, result);
|
|
381
|
+
}
|
|
382
|
+
})));
|
|
375
383
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
384
|
+
if (!options.dryRun) {
|
|
385
|
+
await this.storageManager.commitResults(dateStr, entry, batchResults, {});
|
|
386
|
+
const lastId = entityIds[entityIds.length - 1];
|
|
387
|
+
await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
|
|
380
388
|
}
|
|
381
|
-
})));
|
|
382
389
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
const lastId = entityIds[entityIds.length - 1];
|
|
386
|
-
await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
|
|
390
|
+
totalCount += Object.keys(batchResults).length;
|
|
391
|
+
batchIndex++;
|
|
387
392
|
}
|
|
388
393
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
394
|
+
if (!options.dryRun) {
|
|
395
|
+
await this.storageManager.finalizeResults(dateStr, entry);
|
|
396
|
+
if (cp) await checkpointer.complete(dateStr, entry.name, cp.id);
|
|
397
|
+
}
|
|
392
398
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
399
|
+
// FIX: Return valid object shape including skipped: false
|
|
400
|
+
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
|
|
401
|
+
|
|
402
|
+
} catch (error) {
|
|
403
|
+
// === 🔍 INSERT THIS DEBUG BLOCK ===
|
|
404
|
+
console.error('________________________________________________________________');
|
|
405
|
+
console.error('🛑 CRITICAL COMPUTATION CRASH DETECTED');
|
|
406
|
+
console.error(`📍 Computation: ${entry.name}`);
|
|
407
|
+
console.error(`💥 Error Message: ${error.message}`);
|
|
408
|
+
console.error(`📚 Stack Trace:\n${error.stack}`);
|
|
409
|
+
console.error('________________________________________________________________');
|
|
410
|
+
// ===================================
|
|
411
|
+
|
|
412
|
+
if (cp && cp.id) {
|
|
413
|
+
this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
|
|
414
|
+
// This next line is what causes the "Streaming Buffer" error
|
|
415
|
+
// if the row was just inserted. Now you will see the REAL error above.
|
|
416
|
+
await this.storageManager.failCheckpoint(cp.id, error.message);
|
|
417
|
+
}
|
|
418
|
+
throw error;
|
|
396
419
|
}
|
|
397
|
-
|
|
398
|
-
// FIX: Return valid object shape including skipped: false
|
|
399
|
-
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
|
|
400
420
|
}
|
|
401
421
|
|
|
402
422
|
/**
|
|
@@ -692,10 +712,6 @@ class Orchestrator {
|
|
|
692
712
|
return prefetched;
|
|
693
713
|
}
|
|
694
714
|
|
|
695
|
-
/**
|
|
696
|
-
* Build a reverse dependency index so that when a computation completes
|
|
697
|
-
* we can quickly find all computations that depend on it.
|
|
698
|
-
*/
|
|
699
715
|
_buildDependentsIndex() {
|
|
700
716
|
this.dependentsByName = new Map();
|
|
701
717
|
if (!this.manifest) return;
|
|
@@ -710,14 +726,6 @@ class Orchestrator {
|
|
|
710
726
|
}
|
|
711
727
|
}
|
|
712
728
|
|
|
713
|
-
/**
|
|
714
|
-
* Schedule dependent computations via Cloud Tasks after a computation
|
|
715
|
-
* has successfully completed for a given date.
|
|
716
|
-
*
|
|
717
|
-
* The scheduler is responsible only for root / pass-1 computations.
|
|
718
|
-
* All downstream work is triggered here with a configurable time gap
|
|
719
|
-
* once ALL dependencies of a computation have completed.
|
|
720
|
-
*/
|
|
721
729
|
async _scheduleDependents(entry, dateStr) {
|
|
722
730
|
const dependents = this.dependentsByName.get(entry.name);
|
|
723
731
|
if (!dependents || dependents.length === 0) return;
|
|
@@ -736,13 +744,9 @@ class Orchestrator {
|
|
|
736
744
|
|
|
737
745
|
const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
|
|
738
746
|
const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
|
|
739
|
-
|
|
740
|
-
// Use the latest in-memory status for this date so we can see the
|
|
741
|
-
// just-updated computation plus any earlier ones.
|
|
742
747
|
const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
743
748
|
|
|
744
749
|
for (const depEntry of dependents) {
|
|
745
|
-
// Compute the latest completion time across all of this computation's dependencies.
|
|
746
750
|
let latestDependencyTime = null;
|
|
747
751
|
let missingDependency = false;
|
|
748
752
|
|
|
@@ -761,8 +765,6 @@ class Orchestrator {
|
|
|
761
765
|
}
|
|
762
766
|
}
|
|
763
767
|
|
|
764
|
-
// If any dependency hasn't completed yet, we will schedule this
|
|
765
|
-
// dependent when that dependency finishes instead.
|
|
766
768
|
if (missingDependency || !latestDependencyTime) {
|
|
767
769
|
continue;
|
|
768
770
|
}
|
|
@@ -804,12 +806,10 @@ class Orchestrator {
|
|
|
804
806
|
|
|
805
807
|
this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
|
|
806
808
|
} catch (e) {
|
|
807
|
-
// Code 6: ALREADY_EXISTS – task already scheduled, this is fine (idempotent)
|
|
808
809
|
if (e.code === 6) {
|
|
809
810
|
this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
|
|
810
811
|
continue;
|
|
811
812
|
}
|
|
812
|
-
|
|
813
813
|
this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
|
|
814
814
|
}
|
|
815
815
|
}
|
|
@@ -818,10 +818,7 @@ class Orchestrator {
|
|
|
818
818
|
async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
|
|
819
819
|
if (preloaded[depName] && !entityId) return preloaded[depName];
|
|
820
820
|
if (preloaded[depName] && entityId) return preloaded[depName][entityId];
|
|
821
|
-
|
|
822
|
-
// WARN: This is the slow path that we removed from Streaming
|
|
823
821
|
this._log('WARN', `LAZY LOAD: Fetching single entity '${entityId}' for '${depName}'. This is slow.`);
|
|
824
|
-
|
|
825
822
|
if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
|
|
826
823
|
return this.stateRepository.getResult(dateStr, depName);
|
|
827
824
|
}
|
|
@@ -6,10 +6,11 @@
|
|
|
6
6
|
* - finalizeResults: Loads all GCS files to a temp table and performs a single MERGE
|
|
7
7
|
* * V2.2 UPDATE: Added saveCheckpoint for Append-Only Checkpointer support.
|
|
8
8
|
* * V2.3 UPDATE: Parallelized GCS and Firestore writes in commitResults.
|
|
9
|
+
* * V2.4 UPDATE: Added Attempt Tracking for Zombie Detection.
|
|
9
10
|
* * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
|
|
10
11
|
* * FIX: Improved error logging to catch swallowed BigQuery insert errors.
|
|
11
12
|
* * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
|
|
12
|
-
* * FIX:
|
|
13
|
+
* * FIX: Removed SAFE.PARSE_JSON from MERGE to match STRING schema types.
|
|
13
14
|
*/
|
|
14
15
|
|
|
15
16
|
const { Firestore } = require('@google-cloud/firestore');
|
|
@@ -52,8 +53,7 @@ class StorageManager {
|
|
|
52
53
|
async claimZombie(checkpointId) {
|
|
53
54
|
if (!checkpointId) return;
|
|
54
55
|
|
|
55
|
-
|
|
56
|
-
const { projectId, dataset } = this.config.bigquery; //
|
|
56
|
+
const { projectId, dataset } = this.config.bigquery;
|
|
57
57
|
|
|
58
58
|
const query = `
|
|
59
59
|
UPDATE \`${projectId}.${dataset}.computation_checkpoints\`
|
|
@@ -348,11 +348,8 @@ class StorageManager {
|
|
|
348
348
|
const table = 'computation_checkpoints';
|
|
349
349
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
350
350
|
try {
|
|
351
|
-
// FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
|
|
352
|
-
// We only count it as a zombie if the LATEST row is 'running'.
|
|
353
|
-
// This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
|
|
354
351
|
const query = `
|
|
355
|
-
SELECT computation_name, date, checkpoint_id, last_updated
|
|
352
|
+
SELECT computation_name, date, checkpoint_id, last_updated, attempts
|
|
356
353
|
FROM (
|
|
357
354
|
SELECT
|
|
358
355
|
computation_name,
|
|
@@ -360,6 +357,7 @@ class StorageManager {
|
|
|
360
357
|
checkpoint_id,
|
|
361
358
|
last_updated,
|
|
362
359
|
status,
|
|
360
|
+
attempts,
|
|
363
361
|
ROW_NUMBER() OVER (
|
|
364
362
|
PARTITION BY computation_name, date
|
|
365
363
|
ORDER BY last_updated DESC,
|
|
@@ -377,7 +375,12 @@ class StorageManager {
|
|
|
377
375
|
LIMIT 50
|
|
378
376
|
`;
|
|
379
377
|
const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
|
|
380
|
-
return rows.map(r => ({
|
|
378
|
+
return rows.map(r => ({
|
|
379
|
+
name: r.computation_name,
|
|
380
|
+
date: r.date.value || r.date,
|
|
381
|
+
checkpointId: r.checkpoint_id,
|
|
382
|
+
attempts: r.attempts
|
|
383
|
+
}));
|
|
381
384
|
} catch (e) {
|
|
382
385
|
console.error(`[Storage] findZombies failed: ${e.message}`);
|
|
383
386
|
return [];
|
|
@@ -399,9 +402,6 @@ class StorageManager {
|
|
|
399
402
|
const table = 'computation_checkpoints';
|
|
400
403
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
401
404
|
try {
|
|
402
|
-
// FIX: Added Tie-Breaker logic to ORDER BY
|
|
403
|
-
// If timestamps are identical, 'completed' (1) comes before 'failed' (2) before 'running' (3).
|
|
404
|
-
// This ensures we never accidentally pick a "running" row when a "completed" one exists at the exact same ms.
|
|
405
405
|
const query = `
|
|
406
406
|
SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
|
|
407
407
|
FROM ${fullTable}
|
|
@@ -499,8 +499,6 @@ class StorageManager {
|
|
|
499
499
|
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
500
500
|
const table = dataset.table(tableName);
|
|
501
501
|
|
|
502
|
-
// Note: result_data and dependency_result_hashes are loaded as STRING from the JSON file
|
|
503
|
-
// They will be parsed into JSON during the merge step.
|
|
504
502
|
const schema = [
|
|
505
503
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
506
504
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
@@ -523,6 +521,7 @@ class StorageManager {
|
|
|
523
521
|
|
|
524
522
|
await this._ensureBigQueryTable(targetTable);
|
|
525
523
|
|
|
524
|
+
// FIX: Removed SAFE.PARSE_JSON() because target columns are STRING.
|
|
526
525
|
const mergeQuery = `
|
|
527
526
|
MERGE INTO ${fullTarget} T
|
|
528
527
|
USING (
|
|
@@ -536,15 +535,15 @@ class StorageManager {
|
|
|
536
535
|
UPDATE SET
|
|
537
536
|
code_hash = S.code_hash,
|
|
538
537
|
result_hash = S.result_hash,
|
|
539
|
-
dependency_result_hashes =
|
|
538
|
+
dependency_result_hashes = S.dependency_result_hashes,
|
|
540
539
|
entity_count = S.entity_count,
|
|
541
|
-
result_data =
|
|
540
|
+
result_data = S.result_data,
|
|
542
541
|
updated_at = S.updated_at
|
|
543
542
|
WHEN NOT MATCHED THEN
|
|
544
543
|
INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
|
|
545
544
|
dependency_result_hashes, entity_count, result_data, updated_at)
|
|
546
545
|
VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
|
|
547
|
-
|
|
546
|
+
S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
|
|
548
547
|
`;
|
|
549
548
|
|
|
550
549
|
// UPDATE: Use createQueryJob to capture DML statistics
|
|
@@ -651,7 +650,7 @@ class StorageManager {
|
|
|
651
650
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
652
651
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
653
652
|
{ name: 'duration_ms', type: 'INTEGER', mode: 'NULLABLE' },
|
|
654
|
-
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
|
|
653
|
+
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
|
|
655
654
|
{ name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
656
655
|
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
|
657
656
|
{ name: 'created_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
@@ -770,7 +769,6 @@ class StorageManager {
|
|
|
770
769
|
}
|
|
771
770
|
|
|
772
771
|
_logError(context, error) {
|
|
773
|
-
// Safe logging for BigQuery PartialFailureError which hides details in .errors
|
|
774
772
|
let details = error.message;
|
|
775
773
|
if (error.errors && Array.isArray(error.errors)) {
|
|
776
774
|
details = JSON.stringify(error.errors, null, 2);
|