bulltrackers-module 1.0.766 → 1.0.769

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
  2. package/functions/computation-system-v2/computations/BehavioralAnomaly.js +559 -227
  3. package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
  4. package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
  5. package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
  6. package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
  7. package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
  8. package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
  9. package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
  10. package/functions/computation-system-v2/computations/SignedInUserList.js +51 -0
  11. package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
  12. package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
  13. package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
  14. package/functions/computation-system-v2/config/bulltrackers.config.js +40 -126
  15. package/functions/computation-system-v2/core-api.js +17 -9
  16. package/functions/computation-system-v2/data_schema_reference.MD +108 -0
  17. package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
  18. package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
  19. package/functions/computation-system-v2/devtools/index.js +36 -0
  20. package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
  21. package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
  22. package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
  23. package/functions/computation-system-v2/devtools/shared/index.js +16 -0
  24. package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
  25. package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
  26. package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
  27. package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
  28. package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
  29. package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
  30. package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
  31. package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
  32. package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
  33. package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
  34. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
  35. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
  36. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
  37. package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
  38. package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
  39. package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
  40. package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
  41. package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
  42. package/functions/computation-system-v2/framework/data/DataFetcher.js +330 -126
  43. package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
  44. package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
  45. package/functions/computation-system-v2/framework/execution/Orchestrator.js +226 -153
  46. package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
  47. package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
  48. package/functions/computation-system-v2/framework/storage/StorageManager.js +111 -83
  49. package/functions/computation-system-v2/framework/testing/ComputationTester.js +161 -66
  50. package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
  51. package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
  52. package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
  53. package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
  54. package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
  55. package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
  56. package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
  57. package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
  58. package/package.json +1 -1
  59. package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
  60. package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
  61. package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
  62. package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
  63. package/functions/computation-system-v2/test/analyze-results.js +0 -238
  64. package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
  65. package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
  66. package/functions/computation-system-v2/test/other/test-framework.js +0 -500
  67. package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
  68. package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
  69. package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
  70. package/functions/computation-system-v2/test/other/test-results.json +0 -31
  71. package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
  72. package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
  73. package/functions/computation-system-v2/test/other/test-storage.js +0 -449
  74. package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
  75. package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
  76. package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
@@ -5,12 +5,7 @@
5
5
  * 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
6
6
  * 3. Execution Strategy (Streaming vs. In-Memory)
7
7
  * 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
8
- * * * UPDATE: Added Execution Summary logging to debug Skipped/Blocked/Impossible tasks.
9
- * * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
10
- * * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
11
- * * * UPDATE: Aggregates performance reporting to prevent log spam.
12
- * * * FIX: Resolved N+1 Dependency Fetching (Strict Mode in Streaming).
13
- * * * FIX: Added missing 'skipped' property to return types for type safety.
8
+ * * * UPDATE: Added triggerDeploymentBackfill for automatic history correction.
14
9
  */
15
10
 
16
11
  const crypto = require('crypto');
@@ -45,14 +40,14 @@ class Orchestrator {
45
40
  constructor(config, logger = null) {
46
41
  this.config = config;
47
42
  this.logger = logger || console;
48
-
43
+
49
44
  // 1. Initialize Base Services
50
45
  this.schemaRegistry = new SchemaRegistry(config.bigquery, this.logger);
51
46
  this.queryBuilder = new QueryBuilder(config.bigquery, this.schemaRegistry, this.logger);
52
47
  this.dataFetcher = new DataFetcher({ ...config.bigquery, tables: config.tables }, this.queryBuilder, this.logger);
53
48
  this.storageManager = new StorageManager(config, this.logger);
54
49
  this.stateRepository = new StateRepository(config, this.logger);
55
-
50
+
56
51
  // 2. Initialize Logic & Rules
57
52
  this.manifestBuilder = new ManifestBuilder(config, this.logger);
58
53
  const rulesRegistry = new RulesRegistry(config, this.logger);
@@ -62,7 +57,7 @@ class Orchestrator {
62
57
  // Keep reference to profiler middleware to access stats later
63
58
  this.profilerMiddleware = new ProfilerMiddleware(config);
64
59
  this.profilerMiddleware.setStorage(this.storageManager);
65
-
60
+
66
61
  this.lineageMiddleware = new LineageMiddleware(config);
67
62
  const costTracker = new CostTrackerMiddleware(config);
68
63
 
@@ -72,13 +67,13 @@ class Orchestrator {
72
67
  this.lineageMiddleware,
73
68
  this.profilerMiddleware
74
69
  ]);
75
-
70
+
76
71
  // 4. Initialize Remote Task Runner (Worker Pool)
77
72
  // Only create if worker pool is enabled in config
78
- this.remoteRunner = config.workerPool?.enabled
73
+ this.remoteRunner = config.workerPool?.enabled
79
74
  ? new RemoteTaskRunner(config, this.logger)
80
75
  : null;
81
-
76
+
82
77
  // State
83
78
  this.manifest = null;
84
79
  this.runAnalyzer = null;
@@ -99,6 +94,44 @@ class Orchestrator {
99
94
  this._log('INFO', `Initialized with ${this.manifest.length} computations`);
100
95
  }
101
96
 
97
+ // =========================================================================
98
+ // NEW: AUTOMATIC DEPLOYMENT BACKFILL
99
+ // =========================================================================
100
+
101
+ /**
102
+ * Trigger a backfill for all historical dates of a computation.
103
+ * Called when code changes are detected.
104
+ */
105
+ async triggerDeploymentBackfill(computationName) {
106
+ if (!this.manifest) await this.initialize();
107
+
108
+ const normName = computationName.toLowerCase().replace(/[^a-z0-9]/g, '');
109
+ const entry = this.manifest.find(e => e.name === normName);
110
+
111
+ if (!entry) throw new Error(`Computation ${computationName} not found in manifest`);
112
+
113
+ // 1. Fetch all dates this computation has ever run
114
+ // Relies on StateRepository.getRunDates() which must be implemented
115
+ const dates = await this.stateRepository.getRunDates(computationName);
116
+ this._log('INFO', `[Deploy] Found ${dates.length} historical dates for ${computationName}`);
117
+
118
+ if (dates.length === 0) return { scheduled: 0 };
119
+
120
+ // 2. Schedule tasks for all of them
121
+ // We do NOT use 'force: true'. We rely on RunAnalyzer to see the hash mismatch.
122
+ let scheduledCount = 0;
123
+ for (const dateStr of dates) {
124
+ await this._scheduleCloudTask(entry.originalName, dateStr, 'deployment_backfill');
125
+ scheduledCount++;
126
+ }
127
+
128
+ return { scheduled: scheduledCount };
129
+ }
130
+
131
+ // =========================================================================
132
+ // EXECUTION LOGIC
133
+ // =========================================================================
134
+
102
135
  async analyze(options) {
103
136
  const { date } = options;
104
137
  if (!this.manifest) await this.initialize();
@@ -112,7 +145,7 @@ class Orchestrator {
112
145
  async execute(options) {
113
146
  const { date, pass = null, computation = null, dryRun = false, entities = null } = options;
114
147
  if (!this.manifest) await this.initialize();
115
-
148
+
116
149
  this._log('INFO', `Starting execution for ${date}...`);
117
150
 
118
151
  let toRun = this.manifest;
@@ -123,7 +156,7 @@ class Orchestrator {
123
156
  }
124
157
 
125
158
  const passes = this.manifestBuilder.groupByPass(toRun);
126
- const passNumbers = Object.keys(passes).map(Number).sort((a,b) => a-b);
159
+ const passNumbers = Object.keys(passes).map(Number).sort((a, b) => a - b);
127
160
  const passesToRun = pass ? [parseInt(pass, 10)] : passNumbers;
128
161
 
129
162
  const summary = {
@@ -135,12 +168,12 @@ class Orchestrator {
135
168
  for (const passNum of passesToRun) {
136
169
  const passComputations = passes[passNum] || [];
137
170
  this._log('INFO', `Executing Pass ${passNum}: ${passComputations.length} computations`);
138
-
171
+
139
172
  // Computations in the same pass run in parallel here
140
173
  await Promise.all(passComputations.map(async (entry) => {
141
174
  try {
142
175
  const res = await this._executeComputation(entry, date, { ...options, dryRun, entities });
143
-
176
+
144
177
  if (summary[res.status]) {
145
178
  summary[res.status].push(res);
146
179
  summary.summary[res.status]++;
@@ -160,17 +193,17 @@ class Orchestrator {
160
193
  }
161
194
  }));
162
195
  }
163
-
196
+
164
197
  this._printExecutionSummary(summary);
165
198
  return summary;
166
199
  }
167
200
 
168
201
  async runSingle(entry, dateStr, options = {}) {
169
202
  if (!this.manifest) await this.initialize();
170
- return this._executeComputation(entry, dateStr, {
203
+ return this._executeComputation(entry, dateStr, {
171
204
  ...options,
172
- dryRun: options.dryRun || false,
173
- entities: options.entityIds
205
+ dryRun: options.dryRun || false,
206
+ entities: options.entityIds
174
207
  });
175
208
  }
176
209
 
@@ -182,6 +215,7 @@ class Orchestrator {
182
215
  const { name } = entry;
183
216
  const forceEntities = options.entities;
184
217
 
218
+ // 1. Analyze Status (Skip if done/cached, unless forced)
185
219
  if (!forceEntities) {
186
220
  const decision = await this._analyzeEntry(entry, dateStr);
187
221
  const isSkippedOrCached = decision.type === 'skipped' || decision.type === 'cached';
@@ -198,15 +232,20 @@ class Orchestrator {
198
232
  this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
199
233
  const startTime = Date.now();
200
234
 
235
+ // 2. Load Dependencies (Crucial for Lineage, Locking, and Upstream Checks)
201
236
  const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
202
-
237
+
238
+ // =====================================================================
239
+ // STANDARD JS COMPUTATION (ETL) ONLY
240
+ // =====================================================================
241
+
203
242
  let previousResult = null;
204
243
  if (entry.isHistorical) {
205
244
  previousResult = await this.stateRepository.getResult(this._subtractDay(dateStr), name);
206
245
  }
207
246
 
208
247
  let stats = { count: 0, hash: null, skipped: false };
209
-
248
+
210
249
  try {
211
250
  if (entry.type === 'per-entity' && !forceEntities) {
212
251
  stats = await this._executeStreaming(entry, dateStr, depResults, previousResult, options);
@@ -218,7 +257,7 @@ class Orchestrator {
218
257
  if (!options.dryRun) {
219
258
  const wallClockDuration = Date.now() - startTime;
220
259
  const profilerStats = this.profilerMiddleware.profiler.getAndClearStats(entry.name);
221
-
260
+
222
261
  // If we have granular stats, include them. If not (e.g. skipped batches), use defaults.
223
262
  const report = {
224
263
  runId: this.config.testMode?.runId || 'production',
@@ -237,7 +276,7 @@ class Orchestrator {
237
276
 
238
277
  // Async save (don't block pipeline)
239
278
  this.storageManager.savePerformanceReport(report).catch(e => {
240
- this._log('WARN', `Failed to save perf report for ${name}: ${e.message}`);
279
+ this._log('WARN', `Failed to save perf report for ${name}: ${e.message}`);
241
280
  });
242
281
  }
243
282
 
@@ -256,8 +295,6 @@ class Orchestrator {
256
295
  await this.lineageMiddleware.flush();
257
296
 
258
297
  // Trigger dependency-driven cascading for downstream computations.
259
- // This will enqueue Cloud Tasks for any dependents whose full
260
- // dependency set has completed for the given date.
261
298
  try {
262
299
  await this._scheduleDependents(entry, dateStr);
263
300
  } catch (cascadeError) {
@@ -278,8 +315,8 @@ class Orchestrator {
278
315
  entityCount: stats.count || 0,
279
316
  status: 'failed',
280
317
  metrics: { error: e.message }
281
- }).catch(() => {});
282
-
318
+ }).catch(() => { });
319
+
283
320
  throw e;
284
321
  }
285
322
  }
@@ -287,11 +324,11 @@ class Orchestrator {
287
324
  async _executeStreaming(entry, dateStr, depResults, previousResult, options) {
288
325
  const checkpointer = new Checkpointer(this.config, this.storageManager);
289
326
  let cp = null;
290
-
327
+
291
328
  try {
292
329
  if (!options.dryRun) {
293
330
  const forceLock = options.force || this.config.bypassLocks || process.env.NODE_ENV === 'test';
294
- cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
331
+ cp = await checkpointer.initCheckpoint(dateStr, entry.name, 0, entry.hash, forceLock);
295
332
 
296
333
  if (cp && cp.isLocked) throw new Error(`⚠️ Computation ${entry.name} is currently LOCKED.`);
297
334
  if (cp && cp.skipped) {
@@ -304,7 +341,7 @@ class Orchestrator {
304
341
 
305
342
  // DECISION: Use remote workers or local execution?
306
343
  const useRemote = this._shouldUseRemoteWorkers(entry, options);
307
-
344
+
308
345
  if (useRemote) {
309
346
  this._log('INFO', `Using REMOTE worker pool for ${entry.name}`);
310
347
  return await this._executeStreamingRemote(entry, dateStr, depResults, previousResult, options, checkpointer, cp);
@@ -312,6 +349,7 @@ class Orchestrator {
312
349
 
313
350
  // LOCAL EXECUTION PATH (Original Logic)
314
351
  const driverTable = this._getDriverTable(entry.requires);
352
+ this._log('INFO', `[DRIVER PROOF] Computation '${entry.name}' is being driven by table: '${driverTable}'`);
315
353
  const driverEntityField = this.config.tables[driverTable]?.entityField;
316
354
  const { batchRequires, globalRequires } = this._splitRequirements(entry.requires, driverTable);
317
355
 
@@ -322,7 +360,7 @@ class Orchestrator {
322
360
 
323
361
  const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
324
362
  const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
325
-
363
+
326
364
  const rollingHash = crypto.createHash('sha256');
327
365
  let totalCount = 0;
328
366
  let batchIndex = 0;
@@ -336,41 +374,41 @@ class Orchestrator {
336
374
 
337
375
  const { data: batchLocalData, entityIds } = batch;
338
376
  const combinedData = { ...batchLocalData, ...globalData };
339
-
377
+
340
378
  // STRICT FIX: Prefetch dependencies for the batch.
341
379
  const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
342
-
343
- const { rules } = this.ruleInjector.createContext();
380
+
381
+ const { rules } = this.ruleInjector.createContext();
344
382
  const batchResults = {};
345
-
383
+
346
384
  await Promise.all(entityIds.map(entityId => limit(async () => {
347
385
  const instance = new entry.class();
348
386
  const entityData = this._filterDataForEntity(combinedData, entityId, driverEntityField);
349
-
387
+
350
388
  const context = {
351
389
  computation: entry, date: dateStr, entityId, data: entityData,
352
-
390
+
353
391
  // STRICT FIX: No fallback to _lazyLoadDependency.
354
392
  getDependency: (depName, targetId) => {
355
393
  const id = targetId || entityId;
356
-
394
+
357
395
  // 1. Look in Batch-Prefetched Dependencies (Priority)
358
396
  if (batchDeps[depName] && batchDeps[depName].has(id)) {
359
397
  return batchDeps[depName].get(id);
360
398
  }
361
-
399
+
362
400
  // 2. Look in Global/Preloaded Dependencies
363
401
  if (depResults[depName]) {
364
402
  if (depResults[depName][id] !== undefined) return depResults[depName][id];
365
403
  }
366
-
404
+
367
405
  // 3. STRICT MODE: Throw Error
368
406
  throw new Error(
369
407
  `[Strict Dependency] Dependency '${depName}' (ID: ${id}) not found in batch context. ` +
370
408
  `Ensure '${depName}' is listed in ${entry.name}.getConfig().dependencies.`
371
409
  );
372
410
  },
373
-
411
+
374
412
  previousResult, rules, references: this.referenceDataCache,
375
413
  config: this.config, dataFetcher: this.dataFetcher
376
414
  };
@@ -383,7 +421,7 @@ class Orchestrator {
383
421
  })));
384
422
 
385
423
  if (!options.dryRun) {
386
- await this.storageManager.commitResults(dateStr, entry, batchResults, {});
424
+ await this.storageManager.commitResults(dateStr, entry, batchResults, {});
387
425
  const lastId = entityIds[entityIds.length - 1];
388
426
  await checkpointer.markBatchComplete(dateStr, entry.name, cp?.id, batchIndex, batchSize, lastId);
389
427
  }
@@ -399,8 +437,15 @@ class Orchestrator {
399
437
 
400
438
  // FIX: Return valid object shape including skipped: false
401
439
  return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
402
-
440
+
403
441
  } catch (error) {
442
+ console.error('________________________________________________________________');
443
+ console.error('🛑 CRITICAL COMPUTATION CRASH DETECTED');
444
+ console.error(`📍 Computation: ${entry.name}`);
445
+ console.error(`💥 Error Message: ${error.message}`);
446
+ console.error(`📚 Stack Trace:\n${error.stack}`);
447
+ console.error('________________________________________________________________');
448
+
404
449
  if (cp && cp.id) {
405
450
  this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
406
451
  await this.storageManager.failCheckpoint(cp.id, error.message);
@@ -427,22 +472,22 @@ class Orchestrator {
427
472
  if (options.useWorkerPool === false) {
428
473
  return false;
429
474
  }
430
-
475
+
431
476
  if (!this.remoteRunner) return false;
432
477
  if (options.forceLocal) return false;
433
-
478
+
434
479
  const poolConfig = this.config.workerPool || {};
435
-
436
- if (poolConfig.excludeComputations?.includes(entry.name) ||
480
+
481
+ if (poolConfig.excludeComputations?.includes(entry.name) ||
437
482
  poolConfig.excludeComputations?.includes(entry.originalName)) {
438
483
  return false;
439
484
  }
440
-
441
- if (poolConfig.forceOffloadComputations?.includes(entry.name) ||
485
+
486
+ if (poolConfig.forceOffloadComputations?.includes(entry.name) ||
442
487
  poolConfig.forceOffloadComputations?.includes(entry.originalName)) {
443
488
  return true;
444
489
  }
445
-
490
+
446
491
  if (entry.type !== 'per-entity') return false;
447
492
  return true;
448
493
  }
@@ -474,7 +519,7 @@ class Orchestrator {
474
519
 
475
520
  const batchSize = this.config.execution?.batchSize || BATCH_SIZE;
476
521
  const batchStream = this.dataFetcher.fetchComputationBatched(batchRequires, dateStr, batchSize);
477
-
522
+
478
523
  const rollingHash = crypto.createHash('sha256');
479
524
  let totalCount = 0;
480
525
  let totalErrors = 0;
@@ -489,10 +534,10 @@ class Orchestrator {
489
534
 
490
535
  const { data: batchLocalData, entityIds } = batch;
491
536
  const combinedData = { ...batchLocalData, ...globalData };
492
-
537
+
493
538
  // Prefetch dependencies for this batch
494
539
  const batchDeps = await this._prefetchBatchDependencies(entry, dateStr, depResults, entityIds);
495
-
540
+
496
541
  // Convert Map to Object for serialization
497
542
  const serializedDeps = {};
498
543
  for (const [depName, depMap] of Object.entries(batchDeps)) {
@@ -502,7 +547,7 @@ class Orchestrator {
502
547
  serializedDeps[depName] = depMap;
503
548
  }
504
549
  }
505
-
550
+
506
551
  // Merge with preloaded deps
507
552
  const mergedDeps = { ...depResults, ...serializedDeps };
508
553
 
@@ -516,18 +561,18 @@ class Orchestrator {
516
561
  // INVOKE REMOTE WORKERS
517
562
  this._log('INFO', `[Remote] Processing batch ${batchIndex}: ${entityIds.length} entities`);
518
563
  const { results: batchResults, errors } = await this.remoteRunner.runBatch(
519
- entry,
520
- dateStr,
521
- baseContext,
522
- entityIds,
523
- entityDataMap,
564
+ entry,
565
+ dateStr,
566
+ baseContext,
567
+ entityIds,
568
+ entityDataMap,
524
569
  mergedDeps
525
570
  );
526
571
 
527
572
  if (errors.length > 0) {
528
573
  this._log('WARN', `[Remote] Batch ${batchIndex}: ${errors.length} entities failed`);
529
574
  totalErrors += errors.length;
530
-
575
+
531
576
  errors.slice(0, 3).forEach(e => {
532
577
  this._log('DEBUG', ` - ${e.entityId}: ${e.error}`);
533
578
  });
@@ -569,7 +614,7 @@ class Orchestrator {
569
614
  const driverTable = entry.type === 'per-entity' ? this._getDriverTable(entry.requires) : null;
570
615
  const driverEntityField = driverTable ? this.config.tables[driverTable]?.entityField : null;
571
616
  const instance = new entry.class();
572
-
617
+
573
618
  const context = {
574
619
  computation: entry, date: dateStr, data,
575
620
  getDependency: (dep, ent) => this._lazyLoadDependency(dateStr, dep, ent, depResults),
@@ -578,7 +623,7 @@ class Orchestrator {
578
623
  };
579
624
 
580
625
  let results = {};
581
-
626
+
582
627
  if (entry.type === 'per-entity') {
583
628
  const ids = forceEntities || this._extractEntityIds(data);
584
629
  const limit = pLimit(DEFAULT_CONCURRENCY);
@@ -592,17 +637,19 @@ class Orchestrator {
592
637
  }
593
638
 
594
639
  const finalHash = this._hashResults(results);
595
-
596
- if (!options.dryRun && !forceEntities) {
640
+
641
+ if (!options.dryRun && (!forceEntities || options.allowPartialCommit)) {
597
642
  const currentStatus = await this.stateRepository.getDailyStatus(dateStr);
598
643
  const status = currentStatus.get(entry.name.toLowerCase());
599
-
644
+
600
645
  if (!options.force && status && status.resultHash === finalHash) {
601
646
  return { count: Object.keys(results || {}).length, hash: finalHash, skipped: true };
602
647
  }
603
-
648
+
604
649
  await this.storageManager.commitResults(dateStr, entry, results, {});
605
- await this.storageManager.finalizeResults(dateStr, entry);
650
+ if (!forceEntities) {
651
+ await this.storageManager.finalizeResults(dateStr, entry);
652
+ }
606
653
  }
607
654
 
608
655
  // FIX: Return valid object shape including skipped: false
@@ -617,7 +664,7 @@ class Orchestrator {
617
664
  console.log(`✅ Completed: ${summary.summary.completed}`);
618
665
  console.log(`❌ Errors: ${summary.summary.errors}`);
619
666
  console.log(`⏭️ Skipped: ${summary.summary.skipped + summary.summary.blocked + summary.summary.impossible}`);
620
-
667
+
621
668
  const skippedItems = [...summary.skipped, ...summary.blocked, ...summary.impossible];
622
669
  if (skippedItems.length > 0) {
623
670
  console.log('\n--- Details (Why did it skip?) ---');
@@ -636,11 +683,27 @@ class Orchestrator {
636
683
  }
637
684
 
638
685
  _getDriverTable(requires) {
686
+ // PASS 1: Prioritize tables with a DATE field (Time-Partitioned)
639
687
  for (const name of Object.keys(requires)) {
688
+ const spec = requires[name];
689
+ // Metrics cannot be drivers
690
+ if (spec.type === 'metric') continue;
691
+
692
+ const conf = this.config.tables[name];
693
+ // If it has BOTH, it's a perfect driver (e.g. portfolio_snapshots)
694
+ if (conf && conf.entityField && conf.dateField) return name;
695
+ }
696
+
697
+ // PASS 2: Fallback to any entity table (e.g. Static Master List)
698
+ for (const name of Object.keys(requires)) {
699
+ const spec = requires[name];
700
+ if (spec.type === 'metric') continue;
701
+
640
702
  const conf = this.config.tables[name];
641
703
  if (conf && conf.entityField) return name;
642
704
  }
643
- return null;
705
+
706
+ return null;
644
707
  }
645
708
 
646
709
  _splitRequirements(requires, driverTable) {
@@ -673,7 +736,7 @@ class Orchestrator {
673
736
  for (const dep of entry.dependencies) {
674
737
  const stat = dailyStatus.get(dep);
675
738
  if (stat?.resultHash) depResultHashes[dep] = stat.resultHash;
676
- if (stat?.entityCount > 50000) depResults[dep] = null;
739
+ if (stat?.entityCount > 50000) depResults[dep] = null;
677
740
  else depResults[dep] = await this.stateRepository.getResult(dateStr, dep);
678
741
  }
679
742
  if (entry.conditionalDependencies) {
@@ -702,10 +765,6 @@ class Orchestrator {
702
765
  return prefetched;
703
766
  }
704
767
 
705
- /**
706
- * Build a reverse dependency index so that when a computation completes
707
- * we can quickly find all computations that depend on it.
708
- */
709
768
  _buildDependentsIndex() {
710
769
  this.dependentsByName = new Map();
711
770
  if (!this.manifest) return;
@@ -720,42 +779,91 @@ class Orchestrator {
720
779
  }
721
780
  }
722
781
 
782
+ // =========================================================================
783
+ // TASK SCHEDULING & CASCADING
784
+ // =========================================================================
785
+
723
786
  /**
724
- * Schedule dependent computations via Cloud Tasks after a computation
725
- * has successfully completed for a given date.
726
- *
727
- * The scheduler is responsible only for root / pass-1 computations.
728
- * All downstream work is triggered here with a configurable time gap
729
- * once ALL dependencies of a computation have completed.
787
+ * Schedules a task on Cloud Tasks (Wraps API call)
730
788
  */
731
- async _scheduleDependents(entry, dateStr) {
732
- const dependents = this.dependentsByName.get(entry.name);
733
- if (!dependents || dependents.length === 0) return;
734
-
789
+ async _scheduleCloudTask(computationName, dateStr, source, delayMinutes = 0) {
735
790
  const cloudTasksConfig = this.config.cloudTasks;
736
- if (!cloudTasksConfig) {
737
- this._log('WARN', 'cloudTasks config missing; skipping dependent scheduling');
791
+
792
+ // Mock fallback for local/test environments without cloud config
793
+ if (!cloudTasksConfig || !cloudTasksConfig.queueName) {
794
+ if (process.env.NODE_ENV !== 'production') {
795
+ this._log('INFO', `[Mock Task] Scheduled ${computationName} for ${dateStr} (Source: ${source})`);
796
+ return;
797
+ }
738
798
  return;
739
799
  }
740
800
 
741
801
  const { projectId, location, queueName, dispatcherUrl, serviceAccountEmail } = cloudTasksConfig;
802
+
742
803
  if (!projectId || !location || !queueName || !dispatcherUrl || !serviceAccountEmail) {
743
- this._log('WARN', 'cloudTasks configuration incomplete; skipping dependent scheduling');
804
+ this._log('WARN', 'cloudTasks configuration incomplete; skipping scheduling');
744
805
  return;
745
806
  }
746
807
 
747
- const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
748
808
  const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
809
+ const scheduleTime = new Date(Date.now() + delayMinutes * 60 * 1000);
810
+
811
+ // Unique ID prevents duplicate tasks for the same logical event
812
+ // Note: For backfills, we might want to allow overwrite, but unique IDs help tracing
813
+ const taskName = `${queuePath}/tasks/${computationName}-${dateStr}-${Date.now()}`;
814
+
815
+ const payload = {
816
+ computationName: computationName,
817
+ targetDate: dateStr,
818
+ source: source,
819
+ scheduledAt: scheduleTime.toISOString()
820
+ };
821
+
822
+ const task = {
823
+ httpRequest: {
824
+ httpMethod: 'POST',
825
+ url: dispatcherUrl,
826
+ headers: { 'Content-Type': 'application/json' },
827
+ body: Buffer.from(JSON.stringify(payload)).toString('base64'),
828
+ oidcToken: { serviceAccountEmail, audience: dispatcherUrl }
829
+ },
830
+ scheduleTime: {
831
+ seconds: Math.floor(scheduleTime.getTime() / 1000),
832
+ nanos: 0
833
+ },
834
+ name: taskName
835
+ };
836
+
837
+ try {
838
+ await this.cloudTasksClient.createTask({ parent: queuePath, task });
839
+ if (source === 'dependency') {
840
+ this._log('INFO', `Triggered dependent ${computationName} for ${dateStr}`);
841
+ }
842
+ } catch (e) {
843
+ if (e.code === 6) { // ALREADY_EXISTS
844
+ this._log('INFO', `Task for ${computationName} (${dateStr}) already exists.`);
845
+ } else {
846
+ this._log('WARN', `Failed to schedule task ${computationName}: ${e.message}`);
847
+ }
848
+ }
849
+ }
749
850
 
750
- // Use the latest in-memory status for this date so we can see the
751
- // just-updated computation plus any earlier ones.
851
+ /**
852
+ * Checks all dependents of the just-completed computation.
853
+ * If they are ready (all other dependencies met), schedules them.
854
+ */
855
+ async _scheduleDependents(entry, dateStr) {
856
+ const dependents = this.dependentsByName.get(entry.name);
857
+ if (!dependents || dependents.length === 0) return;
858
+
859
+ const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
752
860
  const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
753
861
 
754
862
  for (const depEntry of dependents) {
755
- // Compute the latest completion time across all of this computation's dependencies.
756
863
  let latestDependencyTime = null;
757
864
  let missingDependency = false;
758
865
 
866
+ // Check if *all* dependencies of this dependent are ready
759
867
  for (const depName of depEntry.dependencies || []) {
760
868
  const depStatus = dailyStatus.get(depName);
761
869
  if (!depStatus || !depStatus.updatedAt) {
@@ -771,78 +879,43 @@ class Orchestrator {
771
879
  }
772
880
  }
773
881
 
774
- // If any dependency hasn't completed yet, we will schedule this
775
- // dependent when that dependency finishes instead.
776
- if (missingDependency || !latestDependencyTime) {
882
+ if (missingDependency) {
883
+ // Dependent is not ready yet (waiting on other inputs)
777
884
  continue;
778
885
  }
779
886
 
780
- const scheduleTime = new Date(latestDependencyTime.getTime() + dependencyGapMinutes * 60 * 1000);
781
-
782
- const payload = {
783
- computationName: depEntry.originalName,
784
- targetDate: dateStr,
785
- source: 'dependency',
786
- scheduledAt: scheduleTime.toISOString()
787
- };
788
-
789
- const taskName = `${queuePath}/tasks/${depEntry.name}-${dateStr}`;
790
-
791
- const task = {
792
- httpRequest: {
793
- httpMethod: 'POST',
794
- url: dispatcherUrl,
795
- headers: { 'Content-Type': 'application/json' },
796
- body: Buffer.from(JSON.stringify(payload)).toString('base64'),
797
- oidcToken: {
798
- serviceAccountEmail,
799
- audience: dispatcherUrl
800
- }
801
- },
802
- scheduleTime: {
803
- seconds: Math.floor(scheduleTime.getTime() / 1000),
804
- nanos: 0
805
- },
806
- name: taskName
807
- };
808
-
809
- try {
810
- await this.cloudTasksClient.createTask({
811
- parent: queuePath,
812
- task
813
- });
814
-
815
- this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
816
- } catch (e) {
817
- // Code 6: ALREADY_EXISTS – task already scheduled, this is fine (idempotent)
818
- if (e.code === 6) {
819
- this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
820
- continue;
821
- }
822
-
823
- this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
824
- }
887
+ // Schedule via the unified helper
888
+ await this._scheduleCloudTask(depEntry.originalName, dateStr, 'dependency', dependencyGapMinutes);
825
889
  }
826
890
  }
827
891
 
828
892
  async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
829
893
  if (preloaded[depName] && !entityId) return preloaded[depName];
830
894
  if (preloaded[depName] && entityId) return preloaded[depName][entityId];
831
-
832
- // WARN: This is the slow path that we removed from Streaming
833
895
  this._log('WARN', `LAZY LOAD: Fetching single entity '${entityId}' for '${depName}'. This is slow.`);
834
-
835
896
  if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
836
897
  return this.stateRepository.getResult(dateStr, depName);
837
898
  }
838
899
 
839
900
  async _loadReferenceData() {
840
901
  if (!this.config.referenceData) return;
841
- await Promise.all(this.config.referenceData.map(async (table) => {
902
+
903
+ await Promise.all(this.config.referenceData.map(async (refEntry) => {
842
904
  try {
843
- const data = await this.dataFetcher.fetch({ table, targetDate: new Date().toISOString().slice(0, 10), mandatory: false });
905
+ // FIX: Support both string (legacy) and object (with fields) formats
906
+ const table = typeof refEntry === 'string' ? refEntry : refEntry.table;
907
+ const fields = typeof refEntry === 'string' ? null : refEntry.fields;
908
+
909
+ const data = await this.dataFetcher.fetch({
910
+ table,
911
+ fields, // Pass specific fields to prevent "LAZY SELECT BLOCKED"
912
+ targetDate: new Date().toISOString().slice(0, 10),
913
+ mandatory: false
914
+ });
844
915
  this.referenceDataCache[table] = data || {};
845
- } catch (e) { this._log('WARN', `Failed to load Ref Data ${table}: ${e.message}`); }
916
+ } catch (e) {
917
+ this._log('WARN', `Failed to load Ref Data ${refEntry.table || refEntry}: ${e.message}`);
918
+ }
846
919
  }));
847
920
  }
848
921
 
@@ -860,9 +933,9 @@ class Orchestrator {
860
933
  Object.entries(data).forEach(([tbl, d]) => {
861
934
  const conf = this.config.tables[tbl] || {};
862
935
  if (conf.entityField === driverEntityField && d && !Array.isArray(d)) {
863
- out[tbl] = d[id] || null;
936
+ out[tbl] = d[id] || null;
864
937
  } else {
865
- out[tbl] = d;
938
+ out[tbl] = d;
866
939
  }
867
940
  });
868
941
  return out;
@@ -878,12 +951,12 @@ class Orchestrator {
878
951
  d.setUTCDate(d.getUTCDate() - 1);
879
952
  return d.toISOString().slice(0, 10);
880
953
  }
881
- _getAllTables() {
882
- const s = new Set();
883
- if (this.manifest) this.manifest.forEach(e => Object.keys(e.requires).forEach(t => s.add(t)));
884
- return Array.from(s);
954
+ _getAllTables() {
955
+ const s = new Set();
956
+ if (this.manifest) this.manifest.forEach(e => Object.keys(e.requires).forEach(t => s.add(t)));
957
+ return Array.from(s);
885
958
  }
886
959
  _log(l, m) { this.logger.log(l, `[Orchestrator] ${m}`); }
887
960
  }
888
961
 
889
- module.exports = { Orchestrator };
962
+ module.exports = { Orchestrator };