bulltrackers-module 1.0.766 → 1.0.768

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/functions/computation-system-v2/computations/BehavioralAnomaly.js +298 -186
  2. package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
  3. package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
  4. package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
  5. package/functions/computation-system-v2/config/bulltrackers.config.js +26 -14
  6. package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
  7. package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
  8. package/functions/computation-system-v2/framework/data/DataFetcher.js +142 -4
  9. package/functions/computation-system-v2/framework/execution/Orchestrator.js +18 -31
  10. package/functions/computation-system-v2/framework/storage/StorageManager.js +7 -17
  11. package/functions/computation-system-v2/framework/testing/ComputationTester.js +155 -66
  12. package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
  13. package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
  14. package/package.json +1 -1
  15. package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
  16. package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
  17. package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
  18. package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
  19. package/functions/computation-system-v2/test/analyze-results.js +0 -238
  20. package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
  21. package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
  22. package/functions/computation-system-v2/test/other/test-framework.js +0 -500
  23. package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
  24. package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
  25. package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
  26. package/functions/computation-system-v2/test/other/test-results.json +0 -31
  27. package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
  28. package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
  29. package/functions/computation-system-v2/test/other/test-storage.js +0 -449
  30. package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
  31. package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
  32. package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
@@ -84,7 +84,7 @@ module.exports = {
84
84
  'portfolio_snapshots': {
85
85
  dateField: 'date',
86
86
  entityField: 'user_id',
87
- dataField: 'portfolio_data',
87
+ //dataField: 'portfolio_data',
88
88
  description: 'Daily portfolio snapshots for all users'
89
89
  },
90
90
 
@@ -92,7 +92,7 @@ module.exports = {
92
92
  'trade_history_snapshots': {
93
93
  dateField: 'date',
94
94
  entityField: 'user_id',
95
- dataField: 'history_data',
95
+ //dataField: 'history_data',
96
96
  description: 'Daily trade history snapshots'
97
97
  },
98
98
 
@@ -100,7 +100,7 @@ module.exports = {
100
100
  'social_post_snapshots': {
101
101
  dateField: 'date',
102
102
  entityField: 'user_id',
103
- dataField: 'posts_data',
103
+ //dataField: 'posts_data',
104
104
  description: 'Daily social post snapshots'
105
105
  },
106
106
 
@@ -108,7 +108,7 @@ module.exports = {
108
108
  'asset_prices': {
109
109
  dateField: 'date',
110
110
  entityField: 'instrument_id',
111
- dataField: null, // Flat table
111
+ //dataField: null, // Flat table
112
112
  description: 'Daily asset prices'
113
113
  },
114
114
 
@@ -116,7 +116,7 @@ module.exports = {
116
116
  'pi_rankings': {
117
117
  dateField: 'date',
118
118
  entityField: 'pi_id',
119
- dataField: 'rankings_data',
119
+ //dataField: 'rankings_data',
120
120
  description: 'Daily PI rankings snapshot'
121
121
  },
122
122
 
@@ -124,7 +124,7 @@ module.exports = {
124
124
  'pi_master_list': {
125
125
  dateField: null, // Not date-partitioned
126
126
  entityField: 'cid',
127
- dataField: null,
127
+ //dataField: null,
128
128
  description: 'Master list of all Popular Investors'
129
129
  },
130
130
 
@@ -132,7 +132,7 @@ module.exports = {
132
132
  'pi_ratings': {
133
133
  dateField: 'date',
134
134
  entityField: 'pi_id',
135
- dataField: null,
135
+ //dataField: null,
136
136
  description: 'Daily PI ratings'
137
137
  },
138
138
 
@@ -140,7 +140,7 @@ module.exports = {
140
140
  'pi_page_views': {
141
141
  dateField: 'date',
142
142
  entityField: 'pi_id',
143
- dataField: null,
143
+ //dataField: null,
144
144
  description: 'Daily PI page view metrics'
145
145
  },
146
146
 
@@ -148,7 +148,7 @@ module.exports = {
148
148
  'watchlist_membership': {
149
149
  dateField: 'date',
150
150
  entityField: 'pi_id',
151
- dataField: null,
151
+ //dataField: null,
152
152
  description: 'Daily watchlist membership counts'
153
153
  },
154
154
 
@@ -156,7 +156,7 @@ module.exports = {
156
156
  'pi_alert_history': {
157
157
  dateField: 'date',
158
158
  entityField: 'pi_id',
159
- dataField: 'metadata',
159
+ //dataField: 'metadata',
160
160
  description: 'Daily alert trigger history'
161
161
  },
162
162
 
@@ -164,7 +164,7 @@ module.exports = {
164
164
  'instrument_insights': {
165
165
  dateField: 'date',
166
166
  entityField: 'instrument_id',
167
- dataField: 'insights_data',
167
+ //dataField: 'insights_data',
168
168
  description: 'Daily instrument insights'
169
169
  },
170
170
 
@@ -172,7 +172,7 @@ module.exports = {
172
172
  'ticker_mappings': {
173
173
  dateField: null,
174
174
  entityField: 'instrument_id',
175
- dataField: null,
175
+ //dataField: null,
176
176
  description: 'Instrument ID to ticker symbol mappings'
177
177
  },
178
178
 
@@ -180,15 +180,27 @@ module.exports = {
180
180
  'computation_results': {
181
181
  dateField: 'date',
182
182
  entityField: null, // Keyed by computation_name
183
- dataField: 'result_data',
183
+ //dataField: 'result_data',
184
184
  description: 'Stored computation results'
185
185
  },
186
186
  // NEW: Sector Mappings Table
187
187
  'sector_mappings': {
188
188
  dateField: null, // Static data
189
189
  entityField: 'symbol', // Key the data by symbol for fast lookup
190
- dataField: null,
190
+ //dataField: null,
191
191
  description: 'Ticker to Sector mappings migrated from Firestore'
192
+ },
193
+
194
+ // NEW: Map the abstract requirement 'behavioral_features' to the actual BQ table
195
+ 'behavioral_features': {
196
+ tableName: 'daily_behavioral_features',
197
+ dateField: 'date',
198
+ entityField: 'user_id', // Important for per-entity fetching
199
+ schema: [
200
+ { name: 'user_id', type: 'STRING' },
201
+ { name: 'hhi_score', type: 'FLOAT' },
202
+ { name: 'martingale_events', type: 'INTEGER' }
203
+ ]
192
204
  }
193
205
  },
194
206
 
@@ -39,12 +39,9 @@ class ManifestBuilder {
39
39
  if (entry) {
40
40
  manifestMap.set(entry.name, entry);
41
41
 
42
- // CRITICAL FIX: Include conditional dependencies in the DAG for cycle detection and topological sort.
43
- // Even if the dependency is conditional at runtime, the execution order (Pass) must respect it.
44
42
  const graphDeps = [...entry.dependencies];
45
43
  if (entry.conditionalDependencies) {
46
44
  entry.conditionalDependencies.forEach(cd => {
47
- // Ensure we use the normalized name for the graph
48
45
  graphDeps.push(cd.computation);
49
46
  });
50
47
  }
@@ -60,7 +57,7 @@ class ManifestBuilder {
60
57
  throw new Error(`[Manifest] Circular dependency detected: ${cycle}`);
61
58
  }
62
59
 
63
- // 3. Topological Sort (calculates passes)
60
+ // 3. Topological Sort
64
61
  const sortedItems = Graph.topologicalSort(nodes, adjacency);
65
62
 
66
63
  // 4. Hydrate Sorted List
@@ -115,8 +112,6 @@ class ManifestBuilder {
115
112
  compositeHash += `|RULE:${mod}:${h}`;
116
113
  }
117
114
 
118
- // Normalize conditional dependencies if they exist
119
- // This ensures the Orchestrator can look them up by normalized name later
120
115
  const conditionalDependencies = (config.conditionalDependencies || []).map(cd => ({
121
116
  ...cd,
122
117
  computation: this._normalize(cd.computation)
@@ -128,16 +123,20 @@ class ManifestBuilder {
128
123
  class: ComputationClass,
129
124
  category: config.category || 'default',
130
125
  type: config.type || 'global',
126
+
127
+ outputTable: config.outputTable || null,
128
+ // --------------------------------------
129
+
131
130
  requires: config.requires || {},
132
131
  dependencies: (config.dependencies || []).map(d => this._normalize(d)),
133
- conditionalDependencies, // FIX: Pass this through to the manifest entry
132
+ conditionalDependencies,
134
133
  isHistorical: config.isHistorical || false,
135
134
  isTest: config.isTest || false,
136
135
  schedule: this.scheduleValidator.parseSchedule(config.schedule),
137
136
  storage: this._parseStorageConfig(config.storage),
138
137
  ttlDays: config.ttlDays,
139
- pass: 0, // Set later by Graph.js
140
- hash: this._hashCode(compositeHash), // Intrinsic hash
138
+ pass: 0,
139
+ hash: this._hashCode(compositeHash),
141
140
  weight: ComputationClass.getWeight ? ComputationClass.getWeight() : 1.0,
142
141
  composition: {
143
142
  epoch: this.epoch,
@@ -152,7 +151,6 @@ class ManifestBuilder {
152
151
  _computeFinalHashes(sorted, manifestMap) {
153
152
  for (const entry of sorted) {
154
153
  let hashInput = entry.hash;
155
- // Includes strict dependencies in the hash chain
156
154
  if (entry.dependencies.length > 0) {
157
155
  const depHashes = entry.dependencies.sort().map(d => {
158
156
  const h = manifestMap.get(d)?.hash;
@@ -161,10 +159,6 @@ class ManifestBuilder {
161
159
  });
162
160
  hashInput += `|DEPS:${depHashes.join('|')}`;
163
161
  }
164
- // Note: Conditional dependencies are currently excluded from the hash chain
165
- // because they might not be loaded. If strict versioning is required for them,
166
- // they should be added here too.
167
-
168
162
  entry.hash = this._hashCode(hashInput);
169
163
  }
170
164
  }
@@ -203,7 +197,7 @@ class ManifestBuilder {
203
197
  const used = {};
204
198
  for (const [name, exports] of Object.entries(this.sharedLayers)) {
205
199
  const found = Object.keys(exports).filter(exp =>
206
- code.includes(exp) // Simple include check, similar to original regex
200
+ code.includes(exp)
207
201
  );
208
202
  if (found.length) used[name] = found;
209
203
  }
@@ -240,7 +234,6 @@ class ManifestBuilder {
240
234
 
241
235
  _log(l, m) { this.logger ? this.logger.log(l, `[Manifest] ${m}`) : console.log(`[Manifest] ${m}`); }
242
236
 
243
- // Public alias for groupByPass matching the Interface
244
237
  groupByPass(m) { return this._groupByPass(m); }
245
238
  }
246
239
 
@@ -2,6 +2,7 @@
2
2
  * @fileoverview Run Analyzer
3
3
  * * Pure logic component that determines which computations need to run.
4
4
  * Decouples decision-making from execution and storage.
5
+ * * * UPDATE: Removed SQL bypass. All computations are now checked for data availability.
5
6
  */
6
7
 
7
8
  class RunAnalyzer {
@@ -57,7 +58,7 @@ class RunAnalyzer {
57
58
  }
58
59
 
59
60
  // 2. Data Availability Check
60
- // Note: This is the only async IO part (calls DataFetcher)
61
+ // UPDATE: Removed isSql check. All computations must have their raw data available.
61
62
  const availability = await this.dataFetcher.checkAvailability(requires, dateStr);
62
63
  if (!availability.canRun) {
63
64
  if (!isToday) {
@@ -9,9 +9,14 @@
9
9
  * * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
10
10
  * * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
11
11
  * - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
12
+ * * V2.6 UPDATE: Query Result Caching.
13
+ * - Implemented in-memory LRU cache to prevent redundant BigQuery costs for reference data.
14
+ * * V2.7 FIX: Double-Encoded JSON Normalization.
15
+ * - Automatically detects and recursively parses JSON strings (e.g. posts_data) to prevent downstream parsing errors.
12
16
  */
13
17
 
14
18
  const { BigQuery } = require('@google-cloud/bigquery');
19
+ const crypto = require('crypto');
15
20
 
16
21
  // FIX #3: Hard limit to prevent cost spirals
17
22
  const MAX_LOOKBACK_DAYS = 30;
@@ -29,11 +34,24 @@ class DataFetcher {
29
34
 
30
35
  this.client = new BigQuery({ projectId: this.projectId });
31
36
 
37
+ // Cache Configuration (V2.6)
38
+ this.cacheConfig = config.queryCache || {
39
+ enabled: true,
40
+ ttlMs: 300000, // 5 minutes default
41
+ maxSize: 1000 // Max unique queries to cache
42
+ };
43
+
44
+ // Use Map as LRU cache (insertion order preserved)
45
+ this.cache = new Map();
46
+
32
47
  this.stats = {
33
48
  queries: 0,
34
49
  rowsFetched: 0,
35
50
  errors: 0,
36
- bytesProcessed: 0
51
+ bytesProcessed: 0,
52
+ cacheHits: 0,
53
+ cacheMisses: 0,
54
+ cacheEvictions: 0
37
55
  };
38
56
  }
39
57
 
@@ -273,21 +291,74 @@ class DataFetcher {
273
291
 
274
292
  return { canRun: missing.length === 0, available, missing };
275
293
  }
294
+
276
295
 
277
296
  getStats() { return { ...this.stats }; }
278
- resetStats() { this.stats = { queries: 0, rowsFetched: 0, errors: 0, bytesProcessed: 0 }; }
297
+
298
+ resetStats() {
299
+ this.stats = {
300
+ queries: 0,
301
+ rowsFetched: 0,
302
+ errors: 0,
303
+ bytesProcessed: 0,
304
+ cacheHits: 0,
305
+ cacheMisses: 0,
306
+ cacheEvictions: 0
307
+ };
308
+ this.cache.clear();
309
+ }
310
+
311
+ clearCache() {
312
+ this.cache.clear();
313
+ this._log('DEBUG', 'Query cache cleared');
314
+ }
315
+
316
+ // =========================================================================
317
+ // PRIVATE METHODS
318
+ // =========================================================================
279
319
 
280
320
  async _execute(query) {
321
+ // V2.6: Query Caching
322
+ if (this.cacheConfig.enabled) {
323
+ const cacheKey = this._generateCacheKey(query);
324
+ const cached = this.cache.get(cacheKey);
325
+
326
+ if (cached) {
327
+ if (Date.now() - cached.timestamp < this.cacheConfig.ttlMs) {
328
+ this.stats.cacheHits++;
329
+ // Refresh LRU position (delete and re-set moves to end)
330
+ this.cache.delete(cacheKey);
331
+ this.cache.set(cacheKey, cached);
332
+ // Return cached rows immediately - no BigQuery cost
333
+ return cached.rows;
334
+ } else {
335
+ this.cache.delete(cacheKey); // Expired
336
+ }
337
+ }
338
+ this.stats.cacheMisses++;
339
+ }
340
+
281
341
  this.stats.queries++;
342
+
282
343
  try {
283
344
  const [job] = await this.client.createQueryJob({
284
345
  query: query.sql, params: query.params, location: this.location
285
346
  });
286
347
  const [rows] = await job.getQueryResults();
287
348
  const [metadata] = await job.getMetadata();
349
+
288
350
  this.stats.rowsFetched += rows.length;
289
351
  this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
290
- return rows;
352
+
353
+ // FIX V2.7: Normalize Rows (Recursive JSON Parse) BEFORE caching
354
+ const normalizedRows = rows.map(r => this._normalizeRow(r));
355
+
356
+ // Store in cache if enabled
357
+ if (this.cacheConfig.enabled) {
358
+ this._addToCache(query, normalizedRows);
359
+ }
360
+
361
+ return normalizedRows;
291
362
  } catch (e) {
292
363
  this.stats.errors++;
293
364
  this._log('ERROR', `Query failed: ${e.message}`);
@@ -296,6 +367,8 @@ class DataFetcher {
296
367
  }
297
368
 
298
369
  async *_executeStream(query) {
370
+ // NOTE: We do NOT cache streams. They are typically massive datasets (batch processing)
371
+ // and caching them in memory would cause OOM.
299
372
  this.stats.queries++;
300
373
  try {
301
374
  const [job] = await this.client.createQueryJob({
@@ -304,7 +377,8 @@ class DataFetcher {
304
377
  const stream = job.getQueryResultsStream();
305
378
  for await (const row of stream) {
306
379
  this.stats.rowsFetched++;
307
- yield row;
380
+ // FIX V2.7: Normalize Rows (Recursive JSON Parse)
381
+ yield this._normalizeRow(row);
308
382
  }
309
383
  } catch (e) {
310
384
  this.stats.errors++;
@@ -312,6 +386,70 @@ class DataFetcher {
312
386
  throw e;
313
387
  }
314
388
  }
389
+
390
+ /**
391
+ * V2.8 FIX: JSON Detection Logic
392
+ */
393
+ _normalizeRow(row) {
394
+ const normalized = { ...row };
395
+ for (const [key, value] of Object.entries(normalized)) {
396
+ if (typeof value === 'string') {
397
+ const trimmed = value.trim();
398
+ // FIX: Check for " (Double Encoded JSON) in addition to { and [
399
+ if (trimmed.startsWith('{') || trimmed.startsWith('[') || trimmed.startsWith('"')) {
400
+ normalized[key] = this._safeRecursiveParse(value);
401
+ }
402
+ }
403
+ }
404
+ return normalized;
405
+ }
406
+
407
+ /**
408
+ * V2.7 FIX: Helper to safely recursively parse JSON.
409
+ * Handles: Double-Encoded JSON Strings (parsed recursively)
410
+ */
411
+ _safeRecursiveParse(input) {
412
+ if (!input) return null;
413
+ if (typeof input === 'object') return input;
414
+ try {
415
+ const parsed = JSON.parse(input);
416
+ // Recursion for double-encoded strings
417
+ if (typeof parsed === 'string') return this._safeRecursiveParse(parsed);
418
+ return parsed;
419
+ } catch (e) {
420
+ return input; // Not JSON, return original
421
+ }
422
+ }
423
+
424
+ /**
425
+ * V2.6: Generate a unique cache key for a query
426
+ */
427
+ _generateCacheKey(query) {
428
+ // Hash the SQL + Params to ensure uniqueness
429
+ const str = query.sql + JSON.stringify(query.params || {});
430
+ return crypto.createHash('md5').update(str).digest('hex');
431
+ }
432
+
433
+ /**
434
+ * V2.6: Add to cache with LRU eviction
435
+ */
436
+ _addToCache(query, rows) {
437
+ // Generate key
438
+ const key = this._generateCacheKey(query);
439
+
440
+ // Eviction Logic
441
+ if (this.cache.size >= this.cacheConfig.maxSize) {
442
+ // Map iterator yields in insertion order. First item is oldest.
443
+ const oldestKey = this.cache.keys().next().value;
444
+ this.cache.delete(oldestKey);
445
+ this.stats.cacheEvictions++;
446
+ }
447
+
448
+ this.cache.set(key, {
449
+ rows: rows,
450
+ timestamp: Date.now()
451
+ });
452
+ }
315
453
 
316
454
  /**
317
455
  * Transforms raw rows into a structured object.
@@ -5,12 +5,7 @@
5
5
  * 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
6
6
  * 3. Execution Strategy (Streaming vs. In-Memory)
7
7
  * 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
8
- * * * UPDATE: Added Execution Summary logging to debug Skipped/Blocked/Impossible tasks.
9
- * * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
10
- * * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
11
- * * * UPDATE: Aggregates performance reporting to prevent log spam.
12
- * * * FIX: Resolved N+1 Dependency Fetching (Strict Mode in Streaming).
13
- * * * FIX: Added missing 'skipped' property to return types for type safety.
8
+ * * * UPDATE: Removed SQL-based execution support (isSql flag ignored).
14
9
  */
15
10
 
16
11
  const crypto = require('crypto');
@@ -182,6 +177,7 @@ class Orchestrator {
182
177
  const { name } = entry;
183
178
  const forceEntities = options.entities;
184
179
 
180
+ // 1. Analyze Status (Skip if done/cached, unless forced)
185
181
  if (!forceEntities) {
186
182
  const decision = await this._analyzeEntry(entry, dateStr);
187
183
  const isSkippedOrCached = decision.type === 'skipped' || decision.type === 'cached';
@@ -198,7 +194,12 @@ class Orchestrator {
198
194
  this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
199
195
  const startTime = Date.now();
200
196
 
197
+ // 2. Load Dependencies (Crucial for Lineage, Locking, and Upstream Checks)
201
198
  const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
199
+
200
+ // =====================================================================
201
+ // STANDARD JS COMPUTATION (ETL) ONLY
202
+ // =====================================================================
202
203
 
203
204
  let previousResult = null;
204
205
  if (entry.isHistorical) {
@@ -256,8 +257,6 @@ class Orchestrator {
256
257
  await this.lineageMiddleware.flush();
257
258
 
258
259
  // Trigger dependency-driven cascading for downstream computations.
259
- // This will enqueue Cloud Tasks for any dependents whose full
260
- // dependency set has completed for the given date.
261
260
  try {
262
261
  await this._scheduleDependents(entry, dateStr);
263
262
  } catch (cascadeError) {
@@ -401,8 +400,19 @@ class Orchestrator {
401
400
  return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
402
401
 
403
402
  } catch (error) {
403
+ // === 🔍 INSERT THIS DEBUG BLOCK ===
404
+ console.error('________________________________________________________________');
405
+ console.error('🛑 CRITICAL COMPUTATION CRASH DETECTED');
406
+ console.error(`📍 Computation: ${entry.name}`);
407
+ console.error(`💥 Error Message: ${error.message}`);
408
+ console.error(`📚 Stack Trace:\n${error.stack}`);
409
+ console.error('________________________________________________________________');
410
+ // ===================================
411
+
404
412
  if (cp && cp.id) {
405
413
  this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
414
+ // This next line is what causes the "Streaming Buffer" error
415
+ // if the row was just inserted. Now you will see the REAL error above.
406
416
  await this.storageManager.failCheckpoint(cp.id, error.message);
407
417
  }
408
418
  throw error;
@@ -702,10 +712,6 @@ class Orchestrator {
702
712
  return prefetched;
703
713
  }
704
714
 
705
- /**
706
- * Build a reverse dependency index so that when a computation completes
707
- * we can quickly find all computations that depend on it.
708
- */
709
715
  _buildDependentsIndex() {
710
716
  this.dependentsByName = new Map();
711
717
  if (!this.manifest) return;
@@ -720,14 +726,6 @@ class Orchestrator {
720
726
  }
721
727
  }
722
728
 
723
- /**
724
- * Schedule dependent computations via Cloud Tasks after a computation
725
- * has successfully completed for a given date.
726
- *
727
- * The scheduler is responsible only for root / pass-1 computations.
728
- * All downstream work is triggered here with a configurable time gap
729
- * once ALL dependencies of a computation have completed.
730
- */
731
729
  async _scheduleDependents(entry, dateStr) {
732
730
  const dependents = this.dependentsByName.get(entry.name);
733
731
  if (!dependents || dependents.length === 0) return;
@@ -746,13 +744,9 @@ class Orchestrator {
746
744
 
747
745
  const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
748
746
  const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
749
-
750
- // Use the latest in-memory status for this date so we can see the
751
- // just-updated computation plus any earlier ones.
752
747
  const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
753
748
 
754
749
  for (const depEntry of dependents) {
755
- // Compute the latest completion time across all of this computation's dependencies.
756
750
  let latestDependencyTime = null;
757
751
  let missingDependency = false;
758
752
 
@@ -771,8 +765,6 @@ class Orchestrator {
771
765
  }
772
766
  }
773
767
 
774
- // If any dependency hasn't completed yet, we will schedule this
775
- // dependent when that dependency finishes instead.
776
768
  if (missingDependency || !latestDependencyTime) {
777
769
  continue;
778
770
  }
@@ -814,12 +806,10 @@ class Orchestrator {
814
806
 
815
807
  this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
816
808
  } catch (e) {
817
- // Code 6: ALREADY_EXISTS – task already scheduled, this is fine (idempotent)
818
809
  if (e.code === 6) {
819
810
  this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
820
811
  continue;
821
812
  }
822
-
823
813
  this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
824
814
  }
825
815
  }
@@ -828,10 +818,7 @@ class Orchestrator {
828
818
  async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
829
819
  if (preloaded[depName] && !entityId) return preloaded[depName];
830
820
  if (preloaded[depName] && entityId) return preloaded[depName][entityId];
831
-
832
- // WARN: This is the slow path that we removed from Streaming
833
821
  this._log('WARN', `LAZY LOAD: Fetching single entity '${entityId}' for '${depName}'. This is slow.`);
834
-
835
822
  if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
836
823
  return this.stateRepository.getResult(dateStr, depName);
837
824
  }
@@ -10,7 +10,7 @@
10
10
  * * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
11
11
  * * FIX: Improved error logging to catch swallowed BigQuery insert errors.
12
12
  * * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
13
- * * FIX: Added SAFE.PARSE_JSON to MERGE statement for BOTH result_data and dependency_result_hashes.
13
+ * * FIX: Removed SAFE.PARSE_JSON from MERGE to match STRING schema types.
14
14
  */
15
15
 
16
16
  const { Firestore } = require('@google-cloud/firestore');
@@ -53,8 +53,7 @@ class StorageManager {
53
53
  async claimZombie(checkpointId) {
54
54
  if (!checkpointId) return;
55
55
 
56
- // FIX: Access projectId and dataset from the config object
57
- const { projectId, dataset } = this.config.bigquery; //
56
+ const { projectId, dataset } = this.config.bigquery;
58
57
 
59
58
  const query = `
60
59
  UPDATE \`${projectId}.${dataset}.computation_checkpoints\`
@@ -349,10 +348,6 @@ class StorageManager {
349
348
  const table = 'computation_checkpoints';
350
349
  const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
351
350
  try {
352
- // FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
353
- // We only count it as a zombie if the LATEST row is 'running'.
354
- // This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
355
- // UPDATE: Added attempts to the selection
356
351
  const query = `
357
352
  SELECT computation_name, date, checkpoint_id, last_updated, attempts
358
353
  FROM (
@@ -407,9 +402,6 @@ class StorageManager {
407
402
  const table = 'computation_checkpoints';
408
403
  const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
409
404
  try {
410
- // FIX: Added Tie-Breaker logic to ORDER BY
411
- // If timestamps are identical, 'completed' (1) comes before 'failed' (2) before 'running' (3).
412
- // This ensures we never accidentally pick a "running" row when a "completed" one exists at the exact same ms.
413
405
  const query = `
414
406
  SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
415
407
  FROM ${fullTable}
@@ -507,8 +499,6 @@ class StorageManager {
507
499
  const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
508
500
  const table = dataset.table(tableName);
509
501
 
510
- // Note: result_data and dependency_result_hashes are loaded as STRING from the JSON file
511
- // They will be parsed into JSON during the merge step.
512
502
  const schema = [
513
503
  { name: 'date', type: 'DATE', mode: 'REQUIRED' },
514
504
  { name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
@@ -531,6 +521,7 @@ class StorageManager {
531
521
 
532
522
  await this._ensureBigQueryTable(targetTable);
533
523
 
524
+ // FIX: Removed SAFE.PARSE_JSON() because target columns are STRING.
534
525
  const mergeQuery = `
535
526
  MERGE INTO ${fullTarget} T
536
527
  USING (
@@ -544,15 +535,15 @@ class StorageManager {
544
535
  UPDATE SET
545
536
  code_hash = S.code_hash,
546
537
  result_hash = S.result_hash,
547
- dependency_result_hashes = SAFE.PARSE_JSON(S.dependency_result_hashes),
538
+ dependency_result_hashes = S.dependency_result_hashes,
548
539
  entity_count = S.entity_count,
549
- result_data = SAFE.PARSE_JSON(S.result_data),
540
+ result_data = S.result_data,
550
541
  updated_at = S.updated_at
551
542
  WHEN NOT MATCHED THEN
552
543
  INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
553
544
  dependency_result_hashes, entity_count, result_data, updated_at)
554
545
  VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
555
- SAFE.PARSE_JSON(S.dependency_result_hashes), S.entity_count, SAFE.PARSE_JSON(S.result_data), S.updated_at)
546
+ S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
556
547
  `;
557
548
 
558
549
  // UPDATE: Use createQueryJob to capture DML statistics
@@ -659,7 +650,7 @@ class StorageManager {
659
650
  { name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
660
651
  { name: 'date', type: 'DATE', mode: 'REQUIRED' },
661
652
  { name: 'duration_ms', type: 'INTEGER', mode: 'NULLABLE' },
662
- { name: 'metrics', type: 'STRING', mode: 'NULLABLE' }, // JSON string
653
+ { name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
663
654
  { name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
664
655
  { name: 'status', type: 'STRING', mode: 'NULLABLE' },
665
656
  { name: 'created_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
@@ -778,7 +769,6 @@ class StorageManager {
778
769
  }
779
770
 
780
771
  _logError(context, error) {
781
- // Safe logging for BigQuery PartialFailureError which hides details in .errors
782
772
  let details = error.message;
783
773
  if (error.errors && Array.isArray(error.errors)) {
784
774
  details = JSON.stringify(error.errors, null, 2);