bulltrackers-module 1.0.768 → 1.0.769
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +557 -337
- package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
- package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
- package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
- package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
- package/functions/computation-system-v2/computations/SignedInUserList.js +51 -0
- package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
- package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
- package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +30 -128
- package/functions/computation-system-v2/core-api.js +17 -9
- package/functions/computation-system-v2/data_schema_reference.MD +108 -0
- package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
- package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
- package/functions/computation-system-v2/devtools/index.js +36 -0
- package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
- package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
- package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
- package/functions/computation-system-v2/devtools/shared/index.js +16 -0
- package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
- package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
- package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
- package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
- package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
- package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
- package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
- package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +250 -184
- package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +215 -129
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
- package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
- package/functions/computation-system-v2/framework/storage/StorageManager.js +105 -67
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +12 -6
- package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
- package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
- package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
- package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
- package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
- package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
- package/package.json +1 -1
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
|
|
6
6
|
* 3. Execution Strategy (Streaming vs. In-Memory)
|
|
7
7
|
* 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
|
|
8
|
-
* * * UPDATE:
|
|
8
|
+
* * * UPDATE: Added triggerDeploymentBackfill for automatic history correction.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
const crypto = require('crypto');
|
|
@@ -40,14 +40,14 @@ class Orchestrator {
|
|
|
40
40
|
constructor(config, logger = null) {
|
|
41
41
|
this.config = config;
|
|
42
42
|
this.logger = logger || console;
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
// 1. Initialize Base Services
|
|
45
45
|
this.schemaRegistry = new SchemaRegistry(config.bigquery, this.logger);
|
|
46
46
|
this.queryBuilder = new QueryBuilder(config.bigquery, this.schemaRegistry, this.logger);
|
|
47
47
|
this.dataFetcher = new DataFetcher({ ...config.bigquery, tables: config.tables }, this.queryBuilder, this.logger);
|
|
48
48
|
this.storageManager = new StorageManager(config, this.logger);
|
|
49
49
|
this.stateRepository = new StateRepository(config, this.logger);
|
|
50
|
-
|
|
50
|
+
|
|
51
51
|
// 2. Initialize Logic & Rules
|
|
52
52
|
this.manifestBuilder = new ManifestBuilder(config, this.logger);
|
|
53
53
|
const rulesRegistry = new RulesRegistry(config, this.logger);
|
|
@@ -57,7 +57,7 @@ class Orchestrator {
|
|
|
57
57
|
// Keep reference to profiler middleware to access stats later
|
|
58
58
|
this.profilerMiddleware = new ProfilerMiddleware(config);
|
|
59
59
|
this.profilerMiddleware.setStorage(this.storageManager);
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
this.lineageMiddleware = new LineageMiddleware(config);
|
|
62
62
|
const costTracker = new CostTrackerMiddleware(config);
|
|
63
63
|
|
|
@@ -67,13 +67,13 @@ class Orchestrator {
|
|
|
67
67
|
this.lineageMiddleware,
|
|
68
68
|
this.profilerMiddleware
|
|
69
69
|
]);
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
// 4. Initialize Remote Task Runner (Worker Pool)
|
|
72
72
|
// Only create if worker pool is enabled in config
|
|
73
|
-
this.remoteRunner = config.workerPool?.enabled
|
|
73
|
+
this.remoteRunner = config.workerPool?.enabled
|
|
74
74
|
? new RemoteTaskRunner(config, this.logger)
|
|
75
75
|
: null;
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
// State
|
|
78
78
|
this.manifest = null;
|
|
79
79
|
this.runAnalyzer = null;
|
|
@@ -94,6 +94,44 @@ class Orchestrator {
|
|
|
94
94
|
this._log('INFO', `Initialized with ${this.manifest.length} computations`);
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
+
// =========================================================================
|
|
98
|
+
// NEW: AUTOMATIC DEPLOYMENT BACKFILL
|
|
99
|
+
// =========================================================================
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Trigger a backfill for all historical dates of a computation.
|
|
103
|
+
* Called when code changes are detected.
|
|
104
|
+
*/
|
|
105
|
+
async triggerDeploymentBackfill(computationName) {
|
|
106
|
+
if (!this.manifest) await this.initialize();
|
|
107
|
+
|
|
108
|
+
const normName = computationName.toLowerCase().replace(/[^a-z0-9]/g, '');
|
|
109
|
+
const entry = this.manifest.find(e => e.name === normName);
|
|
110
|
+
|
|
111
|
+
if (!entry) throw new Error(`Computation ${computationName} not found in manifest`);
|
|
112
|
+
|
|
113
|
+
// 1. Fetch all dates this computation has ever run
|
|
114
|
+
// Relies on StateRepository.getRunDates() which must be implemented
|
|
115
|
+
const dates = await this.stateRepository.getRunDates(computationName);
|
|
116
|
+
this._log('INFO', `[Deploy] Found ${dates.length} historical dates for ${computationName}`);
|
|
117
|
+
|
|
118
|
+
if (dates.length === 0) return { scheduled: 0 };
|
|
119
|
+
|
|
120
|
+
// 2. Schedule tasks for all of them
|
|
121
|
+
// We do NOT use 'force: true'. We rely on RunAnalyzer to see the hash mismatch.
|
|
122
|
+
let scheduledCount = 0;
|
|
123
|
+
for (const dateStr of dates) {
|
|
124
|
+
await this._scheduleCloudTask(entry.originalName, dateStr, 'deployment_backfill');
|
|
125
|
+
scheduledCount++;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return { scheduled: scheduledCount };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// =========================================================================
|
|
132
|
+
// EXECUTION LOGIC
|
|
133
|
+
// =========================================================================
|
|
134
|
+
|
|
97
135
|
async analyze(options) {
|
|
98
136
|
const { date } = options;
|
|
99
137
|
if (!this.manifest) await this.initialize();
|
|
@@ -107,7 +145,7 @@ class Orchestrator {
|
|
|
107
145
|
async execute(options) {
|
|
108
146
|
const { date, pass = null, computation = null, dryRun = false, entities = null } = options;
|
|
109
147
|
if (!this.manifest) await this.initialize();
|
|
110
|
-
|
|
148
|
+
|
|
111
149
|
this._log('INFO', `Starting execution for ${date}...`);
|
|
112
150
|
|
|
113
151
|
let toRun = this.manifest;
|
|
@@ -118,7 +156,7 @@ class Orchestrator {
|
|
|
118
156
|
}
|
|
119
157
|
|
|
120
158
|
const passes = this.manifestBuilder.groupByPass(toRun);
|
|
121
|
-
const passNumbers = Object.keys(passes).map(Number).sort((a,b) => a-b);
|
|
159
|
+
const passNumbers = Object.keys(passes).map(Number).sort((a, b) => a - b);
|
|
122
160
|
const passesToRun = pass ? [parseInt(pass, 10)] : passNumbers;
|
|
123
161
|
|
|
124
162
|
const summary = {
|
|
@@ -130,12 +168,12 @@ class Orchestrator {
|
|
|
130
168
|
for (const passNum of passesToRun) {
|
|
131
169
|
const passComputations = passes[passNum] || [];
|
|
132
170
|
this._log('INFO', `Executing Pass ${passNum}: ${passComputations.length} computations`);
|
|
133
|
-
|
|
171
|
+
|
|
134
172
|
// Computations in the same pass run in parallel here
|
|
135
173
|
await Promise.all(passComputations.map(async (entry) => {
|
|
136
174
|
try {
|
|
137
175
|
const res = await this._executeComputation(entry, date, { ...options, dryRun, entities });
|
|
138
|
-
|
|
176
|
+
|
|
139
177
|
if (summary[res.status]) {
|
|
140
178
|
summary[res.status].push(res);
|
|
141
179
|
summary.summary[res.status]++;
|
|
@@ -155,17 +193,17 @@ class Orchestrator {
|
|
|
155
193
|
}
|
|
156
194
|
}));
|
|
157
195
|
}
|
|
158
|
-
|
|
196
|
+
|
|
159
197
|
this._printExecutionSummary(summary);
|
|
160
198
|
return summary;
|
|
161
199
|
}
|
|
162
200
|
|
|
163
201
|
async runSingle(entry, dateStr, options = {}) {
|
|
164
202
|
if (!this.manifest) await this.initialize();
|
|
165
|
-
return this._executeComputation(entry, dateStr, {
|
|
203
|
+
return this._executeComputation(entry, dateStr, {
|
|
166
204
|
...options,
|
|
167
|
-
dryRun: options.dryRun || false,
|
|
168
|
-
entities: options.entityIds
|
|
205
|
+
dryRun: options.dryRun || false,
|
|
206
|
+
entities: options.entityIds
|
|
169
207
|
});
|
|
170
208
|
}
|
|
171
209
|
|
|
@@ -200,14 +238,14 @@ class Orchestrator {
|
|
|
200
238
|
// =====================================================================
|
|
201
239
|
// STANDARD JS COMPUTATION (ETL) ONLY
|
|
202
240
|
// =====================================================================
|
|
203
|
-
|
|
241
|
+
|
|
204
242
|
let previousResult = null;
|
|
205
243
|
if (entry.isHistorical) {
|
|
206
244
|
previousResult = await this.stateRepository.getResult(this._subtractDay(dateStr), name);
|
|
207
245
|
}
|
|
208
246
|
|
|
209
247
|
let stats = { count: 0, hash: null, skipped: false };
|
|
210
|
-
|
|
248
|
+
|
|
211
249
|
try {
|
|
212
250
|
if (entry.type === 'per-entity' && !forceEntities) {
|
|
213
251
|
stats = await this._executeStreaming(entry, dateStr, depResults, previousResult, options);
|
|
@@ -219,7 +257,7 @@ class Orchestrator {
|
|
|
219
257
|
if (!options.dryRun) {
|
|
220
258
|
const wallClockDuration = Date.now() - startTime;
|
|
221
259
|
const profilerStats = this.profilerMiddleware.profiler.getAndClearStats(entry.name);
|
|
222
|
-
|
|
260
|
+
|
|
223
261
|
// If we have granular stats, include them. If not (e.g. skipped batches), use defaults.
|
|
224
262
|
const report = {
|
|
225
263
|
runId: this.config.testMode?.runId || 'production',
|
|
@@ -238,7 +276,7 @@ class Orchestrator {
|
|
|
238
276
|
|
|
239
277
|
// Async save (don't block pipeline)
|
|
240
278
|
this.storageManager.savePerformanceReport(report).catch(e => {
|
|
241
|
-
|
|
279
|
+
this._log('WARN', `Failed to save perf report for ${name}: ${e.message}`);
|
|
242
280
|
});
|
|
243
281
|
}
|
|
244
282
|
|
|
@@ -277,8 +315,8 @@ class Orchestrator {
|
|
|
277
315
|
entityCount: stats.count || 0,
|
|
278
316
|
status: 'failed',
|
|
279
317
|
metrics: { error: e.message }
|
|
280
|
-
}).catch(() => {});
|
|
281
|
-
|
|
318
|
+
}).catch(() => { });
|
|
319
|
+
|
|
282
320
|
throw e;
|
|
283
321
|
}
|
|
284
322
|
}
|
|
@@ -286,11 +324,11 @@ class Orchestrator {
|
|
|
286
324
|
async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
|
|
287
325
|
const checkpointer = new Checkpointer(this.config, this.storageManager);
|
|
288
326
|
let cp = null;
|
|
289
|
-
|
|
327
|
+
|
|
290
328
|
try {
|
|
291
329
|
if (!options.dryRun) {
|
|
292
330
|
const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
|
|
293
|
-
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
331
|
+
cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
|
|
294
332
|
|
|
295
333
|
if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
|
|
296
334
|
if (cp && cp.skipped) {
|
|
@@ -303,7 +341,7 @@ class Orchestrator {
|
|
|
303
341
|
|
|
304
342
|
// DECISION: Use remote workers or local execution?
|
|
305
343
|
const useRemote = this._shouldUseRemoteWorkers(entry, options);
|
|
306
|
-
|
|
344
|
+
|
|
307
345
|
if (useRemote) {
|
|
308
346
|
this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
|
|
309
347
|
return await this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
|
|
@@ -311,6 +349,7 @@ class Orchestrator {
|
|
|
311
349
|
|
|
312
350
|
// LOCAL EXECUTION PATH (Original Logic)
|
|
313
351
|
const driverTable = this._getDriverTable(entry.requires);
|
|
352
|
+
this._log('INFO', `[DRIVER PROOF] Computation '${entry.name}' is being driven by table: '${driverTable}'`);
|
|
314
353
|
const driverEntityField = this.config.tables[driverTable]?.entityField;
|
|
315
354
|
const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
|
|
316
355
|
|
|
@@ -321,7 +360,7 @@ class Orchestrator {
|
|
|
321
360
|
|
|
322
361
|
const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
|
|
323
362
|
const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
|
|
324
|
-
|
|
363
|
+
|
|
325
364
|
const rollingHash = crypto.createHash('sha256');
|
|
326
365
|
let totalCount = 0;
|
|
327
366
|
let batchIndex = 0;
|
|
@@ -335,41 +374,41 @@ class Orchestrator {
|
|
|
335
374
|
|
|
336
375
|
const { data: batchLocalData, entityIds } = batch;
|
|
337
376
|
const combinedData = { ...batchLocalData, ...globalData };
|
|
338
|
-
|
|
377
|
+
|
|
339
378
|
// STRICT FIX: Prefetch dependencies for the batch.
|
|
340
379
|
const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
|
|
341
|
-
|
|
342
|
-
const { rules } = this.ruleInjector.createContext();
|
|
380
|
+
|
|
381
|
+
const { rules } = this.ruleInjector.createContext();
|
|
343
382
|
const batchResults = {};
|
|
344
|
-
|
|
383
|
+
|
|
345
384
|
await Promise.all(entityIds.map(entityId => limit(async () => {
|
|
346
385
|
const instance = new entry.class();
|
|
347
386
|
const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
|
|
348
|
-
|
|
387
|
+
|
|
349
388
|
const context = {
|
|
350
389
|
computation: entry, date: dateStr, entityId, data: entityData,
|
|
351
|
-
|
|
390
|
+
|
|
352
391
|
// STRICT FIX: No fallback to _lazyLoadDependency.
|
|
353
392
|
getDependency: (depName, targetId) => {
|
|
354
393
|
const id = targetId || entityId;
|
|
355
|
-
|
|
394
|
+
|
|
356
395
|
// 1. Look in Batch-Prefetched Dependencies (Priority)
|
|
357
396
|
if (batchDeps[depName] && batchDeps[depName].has(id)) {
|
|
358
397
|
return batchDeps[depName].get(id);
|
|
359
398
|
}
|
|
360
|
-
|
|
399
|
+
|
|
361
400
|
// 2. Look in Global/Preloaded Dependencies
|
|
362
401
|
if (depResults[depName]) {
|
|
363
402
|
if (depResults[depName][id] !== undefined) return depResults[depName][id];
|
|
364
403
|
}
|
|
365
|
-
|
|
404
|
+
|
|
366
405
|
// 3. STRICT MODE: Throw Error
|
|
367
406
|
throw new Error(
|
|
368
407
|
`[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
|
|
369
408
|
`Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
|
|
370
409
|
);
|
|
371
410
|
},
|
|
372
|
-
|
|
411
|
+
|
|
373
412
|
previousResult, rules, references: this.referenceDataCache,
|
|
374
413
|
config: this.config, dataFetcher: this.dataFetcher
|
|
375
414
|
};
|
|
@@ -382,7 +421,7 @@ class Orchestrator {
|
|
|
382
421
|
})));
|
|
383
422
|
|
|
384
423
|
if (!options.dryRun) {
|
|
385
|
-
await this.storageManager.commitResults(dateStr, entry, batchResults, {});
|
|
424
|
+
await this.storageManager.commitResults(dateStr, entry, batchResults, {});
|
|
386
425
|
const lastId = entityIds[entityIds.length - 1];
|
|
387
426
|
await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
|
|
388
427
|
}
|
|
@@ -398,21 +437,17 @@ class Orchestrator {
|
|
|
398
437
|
|
|
399
438
|
// FIX: Return valid object shape including skipped: false
|
|
400
439
|
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
|
|
401
|
-
|
|
440
|
+
|
|
402
441
|
} catch (error) {
|
|
403
|
-
// === 🔍 INSERT THIS DEBUG BLOCK ===
|
|
404
442
|
console.error('________________________________________________________________');
|
|
405
443
|
console.error('🛑 CRITICAL COMPUTATION CRASH DETECTED');
|
|
406
444
|
console.error(`📍 Computation: ${entry.name}`);
|
|
407
445
|
console.error(`💥 Error Message: ${error.message}`);
|
|
408
446
|
console.error(`📚 Stack Trace:\n${error.stack}`);
|
|
409
447
|
console.error('________________________________________________________________');
|
|
410
|
-
// ===================================
|
|
411
448
|
|
|
412
449
|
if (cp && cp.id) {
|
|
413
450
|
this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
|
|
414
|
-
// This next line is what causes the "Streaming Buffer" error
|
|
415
|
-
// if the row was just inserted. Now you will see the REAL error above.
|
|
416
451
|
await this.storageManager.failCheckpoint(cp.id, error.message);
|
|
417
452
|
}
|
|
418
453
|
throw error;
|
|
@@ -437,22 +472,22 @@ class Orchestrator {
|
|
|
437
472
|
if (options.useWorkerPool === false) {
|
|
438
473
|
return false;
|
|
439
474
|
}
|
|
440
|
-
|
|
475
|
+
|
|
441
476
|
if (!this.remoteRunner) return false;
|
|
442
477
|
if (options.forceLocal) return false;
|
|
443
|
-
|
|
478
|
+
|
|
444
479
|
const poolConfig = this.config.workerPool || {};
|
|
445
|
-
|
|
446
|
-
if (poolConfig.excludeComputations?.includes(entry.name) ||
|
|
480
|
+
|
|
481
|
+
if (poolConfig.excludeComputations?.includes(entry.name) ||
|
|
447
482
|
poolConfig.excludeComputations?.includes(entry.originalName)) {
|
|
448
483
|
return false;
|
|
449
484
|
}
|
|
450
|
-
|
|
451
|
-
if (poolConfig.forceOffloadComputations?.includes(entry.name) ||
|
|
485
|
+
|
|
486
|
+
if (poolConfig.forceOffloadComputations?.includes(entry.name) ||
|
|
452
487
|
poolConfig.forceOffloadComputations?.includes(entry.originalName)) {
|
|
453
488
|
return true;
|
|
454
489
|
}
|
|
455
|
-
|
|
490
|
+
|
|
456
491
|
if (entry.type !== 'per-entity') return false;
|
|
457
492
|
return true;
|
|
458
493
|
}
|
|
@@ -484,7 +519,7 @@ class Orchestrator {
|
|
|
484
519
|
|
|
485
520
|
const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
|
|
486
521
|
const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
|
|
487
|
-
|
|
522
|
+
|
|
488
523
|
const rollingHash = crypto.createHash('sha256');
|
|
489
524
|
let totalCount = 0;
|
|
490
525
|
let totalErrors = 0;
|
|
@@ -499,10 +534,10 @@ class Orchestrator {
|
|
|
499
534
|
|
|
500
535
|
const { data: batchLocalData, entityIds } = batch;
|
|
501
536
|
const combinedData = { ...batchLocalData, ...globalData };
|
|
502
|
-
|
|
537
|
+
|
|
503
538
|
// Prefetch dependencies for this batch
|
|
504
539
|
const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
|
|
505
|
-
|
|
540
|
+
|
|
506
541
|
// Convert Map to Object for serialization
|
|
507
542
|
const serializedDeps = {};
|
|
508
543
|
for (const [depName, depMap] of Object.entries(batchDeps)) {
|
|
@@ -512,7 +547,7 @@ class Orchestrator {
|
|
|
512
547
|
serializedDeps[depName] = depMap;
|
|
513
548
|
}
|
|
514
549
|
}
|
|
515
|
-
|
|
550
|
+
|
|
516
551
|
// Merge with preloaded deps
|
|
517
552
|
const mergedDeps = { ...depResults, ...serializedDeps };
|
|
518
553
|
|
|
@@ -526,18 +561,18 @@ class Orchestrator {
|
|
|
526
561
|
// INVOKE REMOTE WORKERS
|
|
527
562
|
this._log('INFO', `[Remote] Processing batch ${batchIndex}: ${entityIds.length} entities`);
|
|
528
563
|
const { results: batchResults, errors } = await this.remoteRunner.runBatch(
|
|
529
|
-
entry,
|
|
530
|
-
dateStr,
|
|
531
|
-
baseContext,
|
|
532
|
-
entityIds,
|
|
533
|
-
entityDataMap,
|
|
564
|
+
entry,
|
|
565
|
+
dateStr,
|
|
566
|
+
baseContext,
|
|
567
|
+
entityIds,
|
|
568
|
+
entityDataMap,
|
|
534
569
|
mergedDeps
|
|
535
570
|
);
|
|
536
571
|
|
|
537
572
|
if (errors.length > 0) {
|
|
538
573
|
this._log('WARN', `[Remote] Batch ${batchIndex}: ${errors.length} entities failed`);
|
|
539
574
|
totalErrors += errors.length;
|
|
540
|
-
|
|
575
|
+
|
|
541
576
|
errors.slice(0, 3).forEach(e => {
|
|
542
577
|
this._log('DEBUG', ` - ${e.entityId}: ${e.error}`);
|
|
543
578
|
});
|
|
@@ -579,7 +614,7 @@ class Orchestrator {
|
|
|
579
614
|
const driverTable = entry.type === 'per-entity' ? this._getDriverTable(entry.requires) : null;
|
|
580
615
|
const driverEntityField = driverTable ? this.config.tables[driverTable]?.entityField : null;
|
|
581
616
|
const instance = new entry.class();
|
|
582
|
-
|
|
617
|
+
|
|
583
618
|
const context = {
|
|
584
619
|
computation: entry, date: dateStr, data,
|
|
585
620
|
getDependency: (dep, ent) => this._lazyLoadDependency(dateStr, dep, ent, depResults),
|
|
@@ -588,7 +623,7 @@ class Orchestrator {
|
|
|
588
623
|
};
|
|
589
624
|
|
|
590
625
|
let results = {};
|
|
591
|
-
|
|
626
|
+
|
|
592
627
|
if (entry.type === 'per-entity') {
|
|
593
628
|
const ids = forceEntities || this._extractEntityIds(data);
|
|
594
629
|
const limit = pLimit(DEFAULT_CONCURRENCY);
|
|
@@ -602,17 +637,19 @@ class Orchestrator {
|
|
|
602
637
|
}
|
|
603
638
|
|
|
604
639
|
const finalHash = this._hashResults(results);
|
|
605
|
-
|
|
606
|
-
if (!options.dryRun && !forceEntities) {
|
|
640
|
+
|
|
641
|
+
if (!options.dryRun && (!forceEntities || options.allowPartialCommit)) {
|
|
607
642
|
const currentStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
608
643
|
const status = currentStatus.get(entry.name.toLowerCase());
|
|
609
|
-
|
|
644
|
+
|
|
610
645
|
if (!options.force && status && status.resultHash === finalHash) {
|
|
611
646
|
return { count: Object.keys(results || {}).length, hash: finalHash, skipped: true };
|
|
612
647
|
}
|
|
613
|
-
|
|
648
|
+
|
|
614
649
|
await this.storageManager.commitResults(dateStr, entry, results, {});
|
|
615
|
-
|
|
650
|
+
if (!forceEntities) {
|
|
651
|
+
await this.storageManager.finalizeResults(dateStr, entry);
|
|
652
|
+
}
|
|
616
653
|
}
|
|
617
654
|
|
|
618
655
|
// FIX: Return valid object shape including skipped: false
|
|
@@ -627,7 +664,7 @@ class Orchestrator {
|
|
|
627
664
|
console.log(`✅ Completed: ${summary.summary.completed}`);
|
|
628
665
|
console.log(`❌ Errors: ${summary.summary.errors}`);
|
|
629
666
|
console.log(`⏭️ Skipped: ${summary.summary.skipped + summary.summary.blocked + summary.summary.impossible}`);
|
|
630
|
-
|
|
667
|
+
|
|
631
668
|
const skippedItems = [...summary.skipped, ...summary.blocked, ...summary.impossible];
|
|
632
669
|
if (skippedItems.length > 0) {
|
|
633
670
|
console.log('\n--- Details (Why did it skip?) ---');
|
|
@@ -646,11 +683,27 @@ class Orchestrator {
|
|
|
646
683
|
}
|
|
647
684
|
|
|
648
685
|
_getDriverTable(requires) {
|
|
686
|
+
// PASS 1: Prioritize tables with a DATE field (Time-Partitioned)
|
|
649
687
|
for (const name of Object.keys(requires)) {
|
|
688
|
+
const spec = requires[name];
|
|
689
|
+
// Metrics cannot be drivers
|
|
690
|
+
if (spec.type === 'metric') continue;
|
|
691
|
+
|
|
692
|
+
const conf = this.config.tables[name];
|
|
693
|
+
// If it has BOTH, it's a perfect driver (e.g. portfolio_snapshots)
|
|
694
|
+
if (conf && conf.entityField && conf.dateField) return name;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// PASS 2: Fallback to any entity table (e.g. Static Master List)
|
|
698
|
+
for (const name of Object.keys(requires)) {
|
|
699
|
+
const spec = requires[name];
|
|
700
|
+
if (spec.type === 'metric') continue;
|
|
701
|
+
|
|
650
702
|
const conf = this.config.tables[name];
|
|
651
703
|
if (conf && conf.entityField) return name;
|
|
652
704
|
}
|
|
653
|
-
|
|
705
|
+
|
|
706
|
+
return null;
|
|
654
707
|
}
|
|
655
708
|
|
|
656
709
|
_splitRequirements(requires, driverTable) {
|
|
@@ -683,7 +736,7 @@ class Orchestrator {
|
|
|
683
736
|
for (const dep of entry.dependencies) {
|
|
684
737
|
const stat = dailyStatus.get(dep);
|
|
685
738
|
if (stat?.resultHash) depResultHashes[dep] = stat.resultHash;
|
|
686
|
-
if (stat?.entityCount > 50000) depResults[dep] = null;
|
|
739
|
+
if (stat?.entityCount > 50000) depResults[dep] = null;
|
|
687
740
|
else depResults[dep] = await this.stateRepository.getResult(dateStr, dep);
|
|
688
741
|
}
|
|
689
742
|
if (entry.conditionalDependencies) {
|
|
@@ -726,30 +779,91 @@ class Orchestrator {
|
|
|
726
779
|
}
|
|
727
780
|
}
|
|
728
781
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
782
|
+
// =========================================================================
|
|
783
|
+
// TASK SCHEDULING & CASCADING
|
|
784
|
+
// =========================================================================
|
|
732
785
|
|
|
786
|
+
/**
|
|
787
|
+
* Schedules a task on Cloud Tasks (Wraps API call)
|
|
788
|
+
*/
|
|
789
|
+
async _scheduleCloudTask(computationName, dateStr, source, delayMinutes = 0) {
|
|
733
790
|
const cloudTasksConfig = this.config.cloudTasks;
|
|
734
|
-
|
|
735
|
-
|
|
791
|
+
|
|
792
|
+
// Mock fallback for local/test environments without cloud config
|
|
793
|
+
if (!cloudTasksConfig || !cloudTasksConfig.queueName) {
|
|
794
|
+
if (process.env.NODE_ENV !== 'production') {
|
|
795
|
+
this._log('INFO', `[Mock Task] Scheduled ${computationName} for ${dateStr} (Source: ${source})`);
|
|
796
|
+
return;
|
|
797
|
+
}
|
|
736
798
|
return;
|
|
737
799
|
}
|
|
738
800
|
|
|
739
801
|
const { projectId, location, queueName, dispatcherUrl, serviceAccountEmail } = cloudTasksConfig;
|
|
802
|
+
|
|
740
803
|
if (!projectId || !location || !queueName || !dispatcherUrl || !serviceAccountEmail) {
|
|
741
|
-
this._log('WARN', 'cloudTasks configuration incomplete; skipping
|
|
804
|
+
this._log('WARN', 'cloudTasks configuration incomplete; skipping scheduling');
|
|
742
805
|
return;
|
|
743
806
|
}
|
|
744
807
|
|
|
745
|
-
const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
|
|
746
808
|
const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
|
|
809
|
+
const scheduleTime = new Date(Date.now() + delayMinutes * 60 * 1000);
|
|
810
|
+
|
|
811
|
+
// Unique ID prevents duplicate tasks for the same logical event
|
|
812
|
+
// Note: For backfills, we might want to allow overwrite, but unique IDs help tracing
|
|
813
|
+
const taskName = `${queuePath}/tasks/${computationName}-${dateStr}-${Date.now()}`;
|
|
814
|
+
|
|
815
|
+
const payload = {
|
|
816
|
+
computationName: computationName,
|
|
817
|
+
targetDate: dateStr,
|
|
818
|
+
source: source,
|
|
819
|
+
scheduledAt: scheduleTime.toISOString()
|
|
820
|
+
};
|
|
821
|
+
|
|
822
|
+
const task = {
|
|
823
|
+
httpRequest: {
|
|
824
|
+
httpMethod: 'POST',
|
|
825
|
+
url: dispatcherUrl,
|
|
826
|
+
headers: { 'Content-Type': 'application/json' },
|
|
827
|
+
body: Buffer.from(JSON.stringify(payload)).toString('base64'),
|
|
828
|
+
oidcToken: { serviceAccountEmail, audience: dispatcherUrl }
|
|
829
|
+
},
|
|
830
|
+
scheduleTime: {
|
|
831
|
+
seconds: Math.floor(scheduleTime.getTime() / 1000),
|
|
832
|
+
nanos: 0
|
|
833
|
+
},
|
|
834
|
+
name: taskName
|
|
835
|
+
};
|
|
836
|
+
|
|
837
|
+
try {
|
|
838
|
+
await this.cloudTasksClient.createTask({ parent: queuePath, task });
|
|
839
|
+
if (source === 'dependency') {
|
|
840
|
+
this._log('INFO', `Triggered dependent ${computationName} for ${dateStr}`);
|
|
841
|
+
}
|
|
842
|
+
} catch (e) {
|
|
843
|
+
if (e.code === 6) { // ALREADY_EXISTS
|
|
844
|
+
this._log('INFO', `Task for ${computationName} (${dateStr}) already exists.`);
|
|
845
|
+
} else {
|
|
846
|
+
this._log('WARN', `Failed to schedule task ${computationName}: ${e.message}`);
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
/**
|
|
852
|
+
* Checks all dependents of the just-completed computation.
|
|
853
|
+
* If they are ready (all other dependencies met), schedules them.
|
|
854
|
+
*/
|
|
855
|
+
async _scheduleDependents(entry, dateStr) {
|
|
856
|
+
const dependents = this.dependentsByName.get(entry.name);
|
|
857
|
+
if (!dependents || dependents.length === 0) return;
|
|
858
|
+
|
|
859
|
+
const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
|
|
747
860
|
const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
748
861
|
|
|
749
862
|
for (const depEntry of dependents) {
|
|
750
863
|
let latestDependencyTime = null;
|
|
751
864
|
let missingDependency = false;
|
|
752
865
|
|
|
866
|
+
// Check if *all* dependencies of this dependent are ready
|
|
753
867
|
for (const depName of depEntry.dependencies || []) {
|
|
754
868
|
const depStatus = dailyStatus.get(depName);
|
|
755
869
|
if (!depStatus || !depStatus.updatedAt) {
|
|
@@ -765,53 +879,13 @@ class Orchestrator {
|
|
|
765
879
|
}
|
|
766
880
|
}
|
|
767
881
|
|
|
768
|
-
if (missingDependency
|
|
882
|
+
if (missingDependency) {
|
|
883
|
+
// Dependent is not ready yet (waiting on other inputs)
|
|
769
884
|
continue;
|
|
770
885
|
}
|
|
771
886
|
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
const payload = {
|
|
775
|
-
computationName: depEntry.originalName,
|
|
776
|
-
targetDate: dateStr,
|
|
777
|
-
source: 'dependency',
|
|
778
|
-
scheduledAt: scheduleTime.toISOString()
|
|
779
|
-
};
|
|
780
|
-
|
|
781
|
-
const taskName = `${queuePath}/tasks/${depEntry.name}-${dateStr}`;
|
|
782
|
-
|
|
783
|
-
const task = {
|
|
784
|
-
httpRequest: {
|
|
785
|
-
httpMethod: 'POST',
|
|
786
|
-
url: dispatcherUrl,
|
|
787
|
-
headers: { 'Content-Type': 'application/json' },
|
|
788
|
-
body: Buffer.from(JSON.stringify(payload)).toString('base64'),
|
|
789
|
-
oidcToken: {
|
|
790
|
-
serviceAccountEmail,
|
|
791
|
-
audience: dispatcherUrl
|
|
792
|
-
}
|
|
793
|
-
},
|
|
794
|
-
scheduleTime: {
|
|
795
|
-
seconds: Math.floor(scheduleTime.getTime() / 1000),
|
|
796
|
-
nanos: 0
|
|
797
|
-
},
|
|
798
|
-
name: taskName
|
|
799
|
-
};
|
|
800
|
-
|
|
801
|
-
try {
|
|
802
|
-
await this.cloudTasksClient.createTask({
|
|
803
|
-
parent: queuePath,
|
|
804
|
-
task
|
|
805
|
-
});
|
|
806
|
-
|
|
807
|
-
this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
|
|
808
|
-
} catch (e) {
|
|
809
|
-
if (e.code === 6) {
|
|
810
|
-
this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
|
|
811
|
-
continue;
|
|
812
|
-
}
|
|
813
|
-
this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
|
|
814
|
-
}
|
|
887
|
+
// Schedule via the unified helper
|
|
888
|
+
await this._scheduleCloudTask(depEntry.originalName, dateStr, 'dependency', dependencyGapMinutes);
|
|
815
889
|
}
|
|
816
890
|
}
|
|
817
891
|
|
|
@@ -825,11 +899,23 @@ class Orchestrator {
|
|
|
825
899
|
|
|
826
900
|
async _loadReferenceData() {
|
|
827
901
|
if (!this.config.referenceData) return;
|
|
828
|
-
|
|
902
|
+
|
|
903
|
+
await Promise.all(this.config.referenceData.map(async (refEntry) => {
|
|
829
904
|
try {
|
|
830
|
-
|
|
905
|
+
// FIX: Support both string (legacy) and object (with fields) formats
|
|
906
|
+
const table = typeof refEntry === 'string' ? refEntry : refEntry.table;
|
|
907
|
+
const fields = typeof refEntry === 'string' ? null : refEntry.fields;
|
|
908
|
+
|
|
909
|
+
const data = await this.dataFetcher.fetch({
|
|
910
|
+
table,
|
|
911
|
+
fields, // Pass specific fields to prevent "LAZY SELECT BLOCKED"
|
|
912
|
+
targetDate: new Date().toISOString().slice(0, 10),
|
|
913
|
+
mandatory: false
|
|
914
|
+
});
|
|
831
915
|
this.referenceDataCache[table] = data || {};
|
|
832
|
-
} catch (e) {
|
|
916
|
+
} catch (e) {
|
|
917
|
+
this._log('WARN', `Failed to load Ref Data ${refEntry.table || refEntry}: ${e.message}`);
|
|
918
|
+
}
|
|
833
919
|
}));
|
|
834
920
|
}
|
|
835
921
|
|
|
@@ -847,9 +933,9 @@ class Orchestrator {
|
|
|
847
933
|
Object.entries(data).forEach(([tbl, d]) => {
|
|
848
934
|
const conf = this.config.tables[tbl] || {};
|
|
849
935
|
if (conf.entityField === driverEntityField && d && !Array.isArray(d)) {
|
|
850
|
-
|
|
936
|
+
out[tbl] = d[id] || null;
|
|
851
937
|
} else {
|
|
852
|
-
|
|
938
|
+
out[tbl] = d;
|
|
853
939
|
}
|
|
854
940
|
});
|
|
855
941
|
return out;
|
|
@@ -865,12 +951,12 @@ class Orchestrator {
|
|
|
865
951
|
d.setUTCDate(d.getUTCDate() - 1);
|
|
866
952
|
return d.toISOString().slice(0, 10);
|
|
867
953
|
}
|
|
868
|
-
_getAllTables() {
|
|
869
|
-
const s = new Set();
|
|
870
|
-
if (this.manifest) this.manifest.forEach(e => Object.keys(e.requires).forEach(t => s.add(t)));
|
|
871
|
-
return Array.from(s);
|
|
954
|
+
_getAllTables() {
|
|
955
|
+
const s = new Set();
|
|
956
|
+
if (this.manifest) this.manifest.forEach(e => Object.keys(e.requires).forEach(t => s.add(t)));
|
|
957
|
+
return Array.from(s);
|
|
872
958
|
}
|
|
873
959
|
_log(l, m) { this.logger.log(l, `[Orchestrator] ${m}`); }
|
|
874
960
|
}
|
|
875
961
|
|
|
876
|
-
module.exports = { Orchestrator };
|
|
962
|
+
module.exports = { Orchestrator };
|