bulltrackers-module 1.0.766 → 1.0.768
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +298 -186
- package/functions/computation-system-v2/computations/NewSectorExposure.js +82 -35
- package/functions/computation-system-v2/computations/NewSocialPost.js +52 -24
- package/functions/computation-system-v2/computations/PopularInvestorProfileMetrics.js +354 -641
- package/functions/computation-system-v2/config/bulltrackers.config.js +26 -14
- package/functions/computation-system-v2/framework/core/Manifest.js +9 -16
- package/functions/computation-system-v2/framework/core/RunAnalyzer.js +2 -1
- package/functions/computation-system-v2/framework/data/DataFetcher.js +142 -4
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +18 -31
- package/functions/computation-system-v2/framework/storage/StorageManager.js +7 -17
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +155 -66
- package/functions/computation-system-v2/scripts/test-computation-dag.js +109 -0
- package/functions/task-engine/helpers/data_storage_helpers.js +6 -6
- package/package.json +1 -1
- package/functions/computation-system-v2/computations/PopularInvestorRiskAssessment.js +0 -176
- package/functions/computation-system-v2/computations/PopularInvestorRiskMetrics.js +0 -294
- package/functions/computation-system-v2/computations/UserPortfolioSummary.js +0 -172
- package/functions/computation-system-v2/scripts/migrate-sectors.js +0 -73
- package/functions/computation-system-v2/test/analyze-results.js +0 -238
- package/functions/computation-system-v2/test/other/test-dependency-cascade.js +0 -150
- package/functions/computation-system-v2/test/other/test-dispatcher.js +0 -317
- package/functions/computation-system-v2/test/other/test-framework.js +0 -500
- package/functions/computation-system-v2/test/other/test-real-execution.js +0 -166
- package/functions/computation-system-v2/test/other/test-real-integration.js +0 -194
- package/functions/computation-system-v2/test/other/test-refactor-e2e.js +0 -131
- package/functions/computation-system-v2/test/other/test-results.json +0 -31
- package/functions/computation-system-v2/test/other/test-risk-metrics-computation.js +0 -329
- package/functions/computation-system-v2/test/other/test-scheduler.js +0 -204
- package/functions/computation-system-v2/test/other/test-storage.js +0 -449
- package/functions/computation-system-v2/test/run-pipeline-test.js +0 -554
- package/functions/computation-system-v2/test/test-full-pipeline.js +0 -227
- package/functions/computation-system-v2/test/test-worker-pool.js +0 -266
|
@@ -84,7 +84,7 @@ module.exports = {
|
|
|
84
84
|
'portfolio_snapshots': {
|
|
85
85
|
dateField: 'date',
|
|
86
86
|
entityField: 'user_id',
|
|
87
|
-
dataField: 'portfolio_data',
|
|
87
|
+
//dataField: 'portfolio_data',
|
|
88
88
|
description: 'Daily portfolio snapshots for all users'
|
|
89
89
|
},
|
|
90
90
|
|
|
@@ -92,7 +92,7 @@ module.exports = {
|
|
|
92
92
|
'trade_history_snapshots': {
|
|
93
93
|
dateField: 'date',
|
|
94
94
|
entityField: 'user_id',
|
|
95
|
-
dataField: 'history_data',
|
|
95
|
+
//dataField: 'history_data',
|
|
96
96
|
description: 'Daily trade history snapshots'
|
|
97
97
|
},
|
|
98
98
|
|
|
@@ -100,7 +100,7 @@ module.exports = {
|
|
|
100
100
|
'social_post_snapshots': {
|
|
101
101
|
dateField: 'date',
|
|
102
102
|
entityField: 'user_id',
|
|
103
|
-
dataField: 'posts_data',
|
|
103
|
+
//dataField: 'posts_data',
|
|
104
104
|
description: 'Daily social post snapshots'
|
|
105
105
|
},
|
|
106
106
|
|
|
@@ -108,7 +108,7 @@ module.exports = {
|
|
|
108
108
|
'asset_prices': {
|
|
109
109
|
dateField: 'date',
|
|
110
110
|
entityField: 'instrument_id',
|
|
111
|
-
dataField: null, // Flat table
|
|
111
|
+
//dataField: null, // Flat table
|
|
112
112
|
description: 'Daily asset prices'
|
|
113
113
|
},
|
|
114
114
|
|
|
@@ -116,7 +116,7 @@ module.exports = {
|
|
|
116
116
|
'pi_rankings': {
|
|
117
117
|
dateField: 'date',
|
|
118
118
|
entityField: 'pi_id',
|
|
119
|
-
dataField: 'rankings_data',
|
|
119
|
+
//dataField: 'rankings_data',
|
|
120
120
|
description: 'Daily PI rankings snapshot'
|
|
121
121
|
},
|
|
122
122
|
|
|
@@ -124,7 +124,7 @@ module.exports = {
|
|
|
124
124
|
'pi_master_list': {
|
|
125
125
|
dateField: null, // Not date-partitioned
|
|
126
126
|
entityField: 'cid',
|
|
127
|
-
dataField: null,
|
|
127
|
+
//dataField: null,
|
|
128
128
|
description: 'Master list of all Popular Investors'
|
|
129
129
|
},
|
|
130
130
|
|
|
@@ -132,7 +132,7 @@ module.exports = {
|
|
|
132
132
|
'pi_ratings': {
|
|
133
133
|
dateField: 'date',
|
|
134
134
|
entityField: 'pi_id',
|
|
135
|
-
dataField: null,
|
|
135
|
+
//dataField: null,
|
|
136
136
|
description: 'Daily PI ratings'
|
|
137
137
|
},
|
|
138
138
|
|
|
@@ -140,7 +140,7 @@ module.exports = {
|
|
|
140
140
|
'pi_page_views': {
|
|
141
141
|
dateField: 'date',
|
|
142
142
|
entityField: 'pi_id',
|
|
143
|
-
dataField: null,
|
|
143
|
+
//dataField: null,
|
|
144
144
|
description: 'Daily PI page view metrics'
|
|
145
145
|
},
|
|
146
146
|
|
|
@@ -148,7 +148,7 @@ module.exports = {
|
|
|
148
148
|
'watchlist_membership': {
|
|
149
149
|
dateField: 'date',
|
|
150
150
|
entityField: 'pi_id',
|
|
151
|
-
dataField: null,
|
|
151
|
+
//dataField: null,
|
|
152
152
|
description: 'Daily watchlist membership counts'
|
|
153
153
|
},
|
|
154
154
|
|
|
@@ -156,7 +156,7 @@ module.exports = {
|
|
|
156
156
|
'pi_alert_history': {
|
|
157
157
|
dateField: 'date',
|
|
158
158
|
entityField: 'pi_id',
|
|
159
|
-
dataField: 'metadata',
|
|
159
|
+
//dataField: 'metadata',
|
|
160
160
|
description: 'Daily alert trigger history'
|
|
161
161
|
},
|
|
162
162
|
|
|
@@ -164,7 +164,7 @@ module.exports = {
|
|
|
164
164
|
'instrument_insights': {
|
|
165
165
|
dateField: 'date',
|
|
166
166
|
entityField: 'instrument_id',
|
|
167
|
-
dataField: 'insights_data',
|
|
167
|
+
//dataField: 'insights_data',
|
|
168
168
|
description: 'Daily instrument insights'
|
|
169
169
|
},
|
|
170
170
|
|
|
@@ -172,7 +172,7 @@ module.exports = {
|
|
|
172
172
|
'ticker_mappings': {
|
|
173
173
|
dateField: null,
|
|
174
174
|
entityField: 'instrument_id',
|
|
175
|
-
dataField: null,
|
|
175
|
+
//dataField: null,
|
|
176
176
|
description: 'Instrument ID to ticker symbol mappings'
|
|
177
177
|
},
|
|
178
178
|
|
|
@@ -180,15 +180,27 @@ module.exports = {
|
|
|
180
180
|
'computation_results': {
|
|
181
181
|
dateField: 'date',
|
|
182
182
|
entityField: null, // Keyed by computation_name
|
|
183
|
-
dataField: 'result_data',
|
|
183
|
+
//dataField: 'result_data',
|
|
184
184
|
description: 'Stored computation results'
|
|
185
185
|
},
|
|
186
186
|
// NEW: Sector Mappings Table
|
|
187
187
|
'sector_mappings': {
|
|
188
188
|
dateField: null, // Static data
|
|
189
189
|
entityField: 'symbol', // Key the data by symbol for fast lookup
|
|
190
|
-
dataField: null,
|
|
190
|
+
//dataField: null,
|
|
191
191
|
description: 'Ticker to Sector mappings migrated from Firestore'
|
|
192
|
+
},
|
|
193
|
+
|
|
194
|
+
// NEW: Map the abstract requirement 'behavioral_features' to the actual BQ table
|
|
195
|
+
'behavioral_features': {
|
|
196
|
+
tableName: 'daily_behavioral_features',
|
|
197
|
+
dateField: 'date',
|
|
198
|
+
entityField: 'user_id', // Important for per-entity fetching
|
|
199
|
+
schema: [
|
|
200
|
+
{ name: 'user_id', type: 'STRING' },
|
|
201
|
+
{ name: 'hhi_score', type: 'FLOAT' },
|
|
202
|
+
{ name: 'martingale_events', type: 'INTEGER' }
|
|
203
|
+
]
|
|
192
204
|
}
|
|
193
205
|
},
|
|
194
206
|
|
|
@@ -39,12 +39,9 @@ class ManifestBuilder {
|
|
|
39
39
|
if (entry) {
|
|
40
40
|
manifestMap.set(entry.name, entry);
|
|
41
41
|
|
|
42
|
-
// CRITICAL FIX: Include conditional dependencies in the DAG for cycle detection and topological sort.
|
|
43
|
-
// Even if the dependency is conditional at runtime, the execution order (Pass) must respect it.
|
|
44
42
|
const graphDeps = [...entry.dependencies];
|
|
45
43
|
if (entry.conditionalDependencies) {
|
|
46
44
|
entry.conditionalDependencies.forEach(cd => {
|
|
47
|
-
// Ensure we use the normalized name for the graph
|
|
48
45
|
graphDeps.push(cd.computation);
|
|
49
46
|
});
|
|
50
47
|
}
|
|
@@ -60,7 +57,7 @@ class ManifestBuilder {
|
|
|
60
57
|
throw new Error(`[Manifest] Circular dependency detected: ${cycle}`);
|
|
61
58
|
}
|
|
62
59
|
|
|
63
|
-
// 3. Topological Sort
|
|
60
|
+
// 3. Topological Sort
|
|
64
61
|
const sortedItems = Graph.topologicalSort(nodes, adjacency);
|
|
65
62
|
|
|
66
63
|
// 4. Hydrate Sorted List
|
|
@@ -115,8 +112,6 @@ class ManifestBuilder {
|
|
|
115
112
|
compositeHash += `|RULE:${mod}:${h}`;
|
|
116
113
|
}
|
|
117
114
|
|
|
118
|
-
// Normalize conditional dependencies if they exist
|
|
119
|
-
// This ensures the Orchestrator can look them up by normalized name later
|
|
120
115
|
const conditionalDependencies = (config.conditionalDependencies || []).map(cd => ({
|
|
121
116
|
...cd,
|
|
122
117
|
computation: this._normalize(cd.computation)
|
|
@@ -128,16 +123,20 @@ class ManifestBuilder {
|
|
|
128
123
|
class: ComputationClass,
|
|
129
124
|
category: config.category || 'default',
|
|
130
125
|
type: config.type || 'global',
|
|
126
|
+
|
|
127
|
+
outputTable: config.outputTable || null,
|
|
128
|
+
// --------------------------------------
|
|
129
|
+
|
|
131
130
|
requires: config.requires || {},
|
|
132
131
|
dependencies: (config.dependencies || []).map(d => this._normalize(d)),
|
|
133
|
-
conditionalDependencies,
|
|
132
|
+
conditionalDependencies,
|
|
134
133
|
isHistorical: config.isHistorical || false,
|
|
135
134
|
isTest: config.isTest || false,
|
|
136
135
|
schedule: this.scheduleValidator.parseSchedule(config.schedule),
|
|
137
136
|
storage: this._parseStorageConfig(config.storage),
|
|
138
137
|
ttlDays: config.ttlDays,
|
|
139
|
-
pass: 0,
|
|
140
|
-
hash: this._hashCode(compositeHash),
|
|
138
|
+
pass: 0,
|
|
139
|
+
hash: this._hashCode(compositeHash),
|
|
141
140
|
weight: ComputationClass.getWeight ? ComputationClass.getWeight() : 1.0,
|
|
142
141
|
composition: {
|
|
143
142
|
epoch: this.epoch,
|
|
@@ -152,7 +151,6 @@ class ManifestBuilder {
|
|
|
152
151
|
_computeFinalHashes(sorted, manifestMap) {
|
|
153
152
|
for (const entry of sorted) {
|
|
154
153
|
let hashInput = entry.hash;
|
|
155
|
-
// Includes strict dependencies in the hash chain
|
|
156
154
|
if (entry.dependencies.length > 0) {
|
|
157
155
|
const depHashes = entry.dependencies.sort().map(d => {
|
|
158
156
|
const h = manifestMap.get(d)?.hash;
|
|
@@ -161,10 +159,6 @@ class ManifestBuilder {
|
|
|
161
159
|
});
|
|
162
160
|
hashInput += `|DEPS:${depHashes.join('|')}`;
|
|
163
161
|
}
|
|
164
|
-
// Note: Conditional dependencies are currently excluded from the hash chain
|
|
165
|
-
// because they might not be loaded. If strict versioning is required for them,
|
|
166
|
-
// they should be added here too.
|
|
167
|
-
|
|
168
162
|
entry.hash = this._hashCode(hashInput);
|
|
169
163
|
}
|
|
170
164
|
}
|
|
@@ -203,7 +197,7 @@ class ManifestBuilder {
|
|
|
203
197
|
const used = {};
|
|
204
198
|
for (const [name, exports] of Object.entries(this.sharedLayers)) {
|
|
205
199
|
const found = Object.keys(exports).filter(exp =>
|
|
206
|
-
code.includes(exp)
|
|
200
|
+
code.includes(exp)
|
|
207
201
|
);
|
|
208
202
|
if (found.length) used[name] = found;
|
|
209
203
|
}
|
|
@@ -240,7 +234,6 @@ class ManifestBuilder {
|
|
|
240
234
|
|
|
241
235
|
_log(l, m) { this.logger ? this.logger.log(l, `[Manifest] ${m}`) : console.log(`[Manifest] ${m}`); }
|
|
242
236
|
|
|
243
|
-
// Public alias for groupByPass matching the Interface
|
|
244
237
|
groupByPass(m) { return this._groupByPass(m); }
|
|
245
238
|
}
|
|
246
239
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Run Analyzer
|
|
3
3
|
* * Pure logic component that determines which computations need to run.
|
|
4
4
|
* Decouples decision-making from execution and storage.
|
|
5
|
+
* * * UPDATE: Removed SQL bypass. All computations are now checked for data availability.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
class RunAnalyzer {
|
|
@@ -57,7 +58,7 @@ class RunAnalyzer {
|
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
// 2. Data Availability Check
|
|
60
|
-
//
|
|
61
|
+
// UPDATE: Removed isSql check. All computations must have their raw data available.
|
|
61
62
|
const availability = await this.dataFetcher.checkAvailability(requires, dateStr);
|
|
62
63
|
if (!availability.canRun) {
|
|
63
64
|
if (!isToday) {
|
|
@@ -9,9 +9,14 @@
|
|
|
9
9
|
* * V2.4 FIX: Runaway Query Cost Prevention [Fix #3].
|
|
10
10
|
* * V2.5 UPDATE: Super-Entity Monitoring [Safety Valve for Fix #6].
|
|
11
11
|
* - Warns if a single entity exceeds reasonable batch limits (Memory Risk).
|
|
12
|
+
* * V2.6 UPDATE: Query Result Caching.
|
|
13
|
+
* - Implemented in-memory LRU cache to prevent redundant BigQuery costs for reference data.
|
|
14
|
+
* * V2.7 FIX: Double-Encoded JSON Normalization.
|
|
15
|
+
* - Automatically detects and recursively parses JSON strings (e.g. posts_data) to prevent downstream parsing errors.
|
|
12
16
|
*/
|
|
13
17
|
|
|
14
18
|
const { BigQuery } = require('@google-cloud/bigquery');
|
|
19
|
+
const crypto = require('crypto');
|
|
15
20
|
|
|
16
21
|
// FIX #3: Hard limit to prevent cost spirals
|
|
17
22
|
const MAX_LOOKBACK_DAYS = 30;
|
|
@@ -29,11 +34,24 @@ class DataFetcher {
|
|
|
29
34
|
|
|
30
35
|
this.client = new BigQuery({ projectId: this.projectId });
|
|
31
36
|
|
|
37
|
+
// Cache Configuration (V2.6)
|
|
38
|
+
this.cacheConfig = config.queryCache || {
|
|
39
|
+
enabled: true,
|
|
40
|
+
ttlMs: 300000, // 5 minutes default
|
|
41
|
+
maxSize: 1000 // Max unique queries to cache
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// Use Map as LRU cache (insertion order preserved)
|
|
45
|
+
this.cache = new Map();
|
|
46
|
+
|
|
32
47
|
this.stats = {
|
|
33
48
|
queries: 0,
|
|
34
49
|
rowsFetched: 0,
|
|
35
50
|
errors: 0,
|
|
36
|
-
bytesProcessed: 0
|
|
51
|
+
bytesProcessed: 0,
|
|
52
|
+
cacheHits: 0,
|
|
53
|
+
cacheMisses: 0,
|
|
54
|
+
cacheEvictions: 0
|
|
37
55
|
};
|
|
38
56
|
}
|
|
39
57
|
|
|
@@ -273,21 +291,74 @@ class DataFetcher {
|
|
|
273
291
|
|
|
274
292
|
return { canRun: missing.length === 0, available, missing };
|
|
275
293
|
}
|
|
294
|
+
|
|
276
295
|
|
|
277
296
|
getStats() { return { ...this.stats }; }
|
|
278
|
-
|
|
297
|
+
|
|
298
|
+
resetStats() {
|
|
299
|
+
this.stats = {
|
|
300
|
+
queries: 0,
|
|
301
|
+
rowsFetched: 0,
|
|
302
|
+
errors: 0,
|
|
303
|
+
bytesProcessed: 0,
|
|
304
|
+
cacheHits: 0,
|
|
305
|
+
cacheMisses: 0,
|
|
306
|
+
cacheEvictions: 0
|
|
307
|
+
};
|
|
308
|
+
this.cache.clear();
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
clearCache() {
|
|
312
|
+
this.cache.clear();
|
|
313
|
+
this._log('DEBUG', 'Query cache cleared');
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// =========================================================================
|
|
317
|
+
// PRIVATE METHODS
|
|
318
|
+
// =========================================================================
|
|
279
319
|
|
|
280
320
|
async _execute(query) {
|
|
321
|
+
// V2.6: Query Caching
|
|
322
|
+
if (this.cacheConfig.enabled) {
|
|
323
|
+
const cacheKey = this._generateCacheKey(query);
|
|
324
|
+
const cached = this.cache.get(cacheKey);
|
|
325
|
+
|
|
326
|
+
if (cached) {
|
|
327
|
+
if (Date.now() - cached.timestamp < this.cacheConfig.ttlMs) {
|
|
328
|
+
this.stats.cacheHits++;
|
|
329
|
+
// Refresh LRU position (delete and re-set moves to end)
|
|
330
|
+
this.cache.delete(cacheKey);
|
|
331
|
+
this.cache.set(cacheKey, cached);
|
|
332
|
+
// Return cached rows immediately - no BigQuery cost
|
|
333
|
+
return cached.rows;
|
|
334
|
+
} else {
|
|
335
|
+
this.cache.delete(cacheKey); // Expired
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
this.stats.cacheMisses++;
|
|
339
|
+
}
|
|
340
|
+
|
|
281
341
|
this.stats.queries++;
|
|
342
|
+
|
|
282
343
|
try {
|
|
283
344
|
const [job] = await this.client.createQueryJob({
|
|
284
345
|
query: query.sql, params: query.params, location: this.location
|
|
285
346
|
});
|
|
286
347
|
const [rows] = await job.getQueryResults();
|
|
287
348
|
const [metadata] = await job.getMetadata();
|
|
349
|
+
|
|
288
350
|
this.stats.rowsFetched += rows.length;
|
|
289
351
|
this.stats.bytesProcessed += parseInt(metadata.statistics?.totalBytesProcessed || 0, 10);
|
|
290
|
-
|
|
352
|
+
|
|
353
|
+
// FIX V2.7: Normalize Rows (Recursive JSON Parse) BEFORE caching
|
|
354
|
+
const normalizedRows = rows.map(r => this._normalizeRow(r));
|
|
355
|
+
|
|
356
|
+
// Store in cache if enabled
|
|
357
|
+
if (this.cacheConfig.enabled) {
|
|
358
|
+
this._addToCache(query, normalizedRows);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return normalizedRows;
|
|
291
362
|
} catch (e) {
|
|
292
363
|
this.stats.errors++;
|
|
293
364
|
this._log('ERROR', `Query failed: ${e.message}`);
|
|
@@ -296,6 +367,8 @@ class DataFetcher {
|
|
|
296
367
|
}
|
|
297
368
|
|
|
298
369
|
async *_executeStream(query) {
|
|
370
|
+
// NOTE: We do NOT cache streams. They are typically massive datasets (batch processing)
|
|
371
|
+
// and caching them in memory would cause OOM.
|
|
299
372
|
this.stats.queries++;
|
|
300
373
|
try {
|
|
301
374
|
const [job] = await this.client.createQueryJob({
|
|
@@ -304,7 +377,8 @@ class DataFetcher {
|
|
|
304
377
|
const stream = job.getQueryResultsStream();
|
|
305
378
|
for await (const row of stream) {
|
|
306
379
|
this.stats.rowsFetched++;
|
|
307
|
-
|
|
380
|
+
// FIX V2.7: Normalize Rows (Recursive JSON Parse)
|
|
381
|
+
yield this._normalizeRow(row);
|
|
308
382
|
}
|
|
309
383
|
} catch (e) {
|
|
310
384
|
this.stats.errors++;
|
|
@@ -312,6 +386,70 @@ class DataFetcher {
|
|
|
312
386
|
throw e;
|
|
313
387
|
}
|
|
314
388
|
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* V2.8 FIX: JSON Detection Logic
|
|
392
|
+
*/
|
|
393
|
+
_normalizeRow(row) {
|
|
394
|
+
const normalized = { ...row };
|
|
395
|
+
for (const [key, value] of Object.entries(normalized)) {
|
|
396
|
+
if (typeof value === 'string') {
|
|
397
|
+
const trimmed = value.trim();
|
|
398
|
+
// FIX: Check for " (Double Encoded JSON) in addition to { and [
|
|
399
|
+
if (trimmed.startsWith('{') || trimmed.startsWith('[') || trimmed.startsWith('"')) {
|
|
400
|
+
normalized[key] = this._safeRecursiveParse(value);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
return normalized;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* V2.7 FIX: Helper to safely recursively parse JSON.
|
|
409
|
+
* Handles: Double-Encoded JSON Strings (parsed recursively)
|
|
410
|
+
*/
|
|
411
|
+
_safeRecursiveParse(input) {
|
|
412
|
+
if (!input) return null;
|
|
413
|
+
if (typeof input === 'object') return input;
|
|
414
|
+
try {
|
|
415
|
+
const parsed = JSON.parse(input);
|
|
416
|
+
// Recursion for double-encoded strings
|
|
417
|
+
if (typeof parsed === 'string') return this._safeRecursiveParse(parsed);
|
|
418
|
+
return parsed;
|
|
419
|
+
} catch (e) {
|
|
420
|
+
return input; // Not JSON, return original
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* V2.6: Generate a unique cache key for a query
|
|
426
|
+
*/
|
|
427
|
+
_generateCacheKey(query) {
|
|
428
|
+
// Hash the SQL + Params to ensure uniqueness
|
|
429
|
+
const str = query.sql + JSON.stringify(query.params || {});
|
|
430
|
+
return crypto.createHash('md5').update(str).digest('hex');
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* V2.6: Add to cache with LRU eviction
|
|
435
|
+
*/
|
|
436
|
+
_addToCache(query, rows) {
|
|
437
|
+
// Generate key
|
|
438
|
+
const key = this._generateCacheKey(query);
|
|
439
|
+
|
|
440
|
+
// Eviction Logic
|
|
441
|
+
if (this.cache.size >= this.cacheConfig.maxSize) {
|
|
442
|
+
// Map iterator yields in insertion order. First item is oldest.
|
|
443
|
+
const oldestKey = this.cache.keys().next().value;
|
|
444
|
+
this.cache.delete(oldestKey);
|
|
445
|
+
this.stats.cacheEvictions++;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
this.cache.set(key, {
|
|
449
|
+
rows: rows,
|
|
450
|
+
timestamp: Date.now()
|
|
451
|
+
});
|
|
452
|
+
}
|
|
315
453
|
|
|
316
454
|
/**
|
|
317
455
|
* Transforms raw rows into a structured object.
|
|
@@ -5,12 +5,7 @@
|
|
|
5
5
|
* 2. Data Provisioning (Fetching Data, Loading Dependencies, Reference Data)
|
|
6
6
|
* 3. Execution Strategy (Streaming vs. In-Memory)
|
|
7
7
|
* 4. Delegation (Hands off actual 'work' to TaskRunner + Middleware)
|
|
8
|
-
* * * UPDATE:
|
|
9
|
-
* * * UPDATE: Includes Global vs Batch Data Split to fix "Identity Crisis".
|
|
10
|
-
* * * UPDATE: Implemented FORCE logic to bypass "up-to-date" checks for testing.
|
|
11
|
-
* * * UPDATE: Aggregates performance reporting to prevent log spam.
|
|
12
|
-
* * * FIX: Resolved N+1 Dependency Fetching (Strict Mode in Streaming).
|
|
13
|
-
* * * FIX: Added missing 'skipped' property to return types for type safety.
|
|
8
|
+
* * * UPDATE: Removed SQL-based execution support (isSql flag ignored).
|
|
14
9
|
*/
|
|
15
10
|
|
|
16
11
|
const crypto = require('crypto');
|
|
@@ -182,6 +177,7 @@ class Orchestrator {
|
|
|
182
177
|
const { name } = entry;
|
|
183
178
|
const forceEntities = options.entities;
|
|
184
179
|
|
|
180
|
+
// 1. Analyze Status (Skip if done/cached, unless forced)
|
|
185
181
|
if (!forceEntities) {
|
|
186
182
|
const decision = await this._analyzeEntry(entry, dateStr);
|
|
187
183
|
const isSkippedOrCached = decision.type === 'skipped' || decision.type === 'cached';
|
|
@@ -198,7 +194,12 @@ class Orchestrator {
|
|
|
198
194
|
this._log('INFO', `Running ${name} (Type: ${entry.type})...`);
|
|
199
195
|
const startTime = Date.now();
|
|
200
196
|
|
|
197
|
+
// 2. Load Dependencies (Crucial for Lineage, Locking, and Upstream Checks)
|
|
201
198
|
const { depResults, depResultHashes } = await this._loadDependencies(entry, dateStr);
|
|
199
|
+
|
|
200
|
+
// =====================================================================
|
|
201
|
+
// STANDARD JS COMPUTATION (ETL) ONLY
|
|
202
|
+
// =====================================================================
|
|
202
203
|
|
|
203
204
|
let previousResult = null;
|
|
204
205
|
if (entry.isHistorical) {
|
|
@@ -256,8 +257,6 @@ class Orchestrator {
|
|
|
256
257
|
await this.lineageMiddleware.flush();
|
|
257
258
|
|
|
258
259
|
// Trigger dependency-driven cascading for downstream computations.
|
|
259
|
-
// This will enqueue Cloud Tasks for any dependents whose full
|
|
260
|
-
// dependency set has completed for the given date.
|
|
261
260
|
try {
|
|
262
261
|
await this._scheduleDependents(entry, dateStr);
|
|
263
262
|
} catch (cascadeError) {
|
|
@@ -401,8 +400,19 @@ class Orchestrator {
|
|
|
401
400
|
return { count: totalCount, hash: rollingHash.digest('hex').substring(0, 16), skipped: false };
|
|
402
401
|
|
|
403
402
|
} catch (error) {
|
|
403
|
+
// === 🔍 INSERT THIS DEBUG BLOCK ===
|
|
404
|
+
console.error('________________________________________________________________');
|
|
405
|
+
console.error('🛑 CRITICAL COMPUTATION CRASH DETECTED');
|
|
406
|
+
console.error(`📍 Computation: ${entry.name}`);
|
|
407
|
+
console.error(`💥 Error Message: ${error.message}`);
|
|
408
|
+
console.error(`📚 Stack Trace:\n${error.stack}`);
|
|
409
|
+
console.error('________________________________________________________________');
|
|
410
|
+
// ===================================
|
|
411
|
+
|
|
404
412
|
if (cp && cp.id) {
|
|
405
413
|
this._log('ERROR', `Streaming failed, marking checkpoint ${cp.id} as failed.`);
|
|
414
|
+
// This next line is what causes the "Streaming Buffer" error
|
|
415
|
+
// if the row was just inserted. Now you will see the REAL error above.
|
|
406
416
|
await this.storageManager.failCheckpoint(cp.id, error.message);
|
|
407
417
|
}
|
|
408
418
|
throw error;
|
|
@@ -702,10 +712,6 @@ class Orchestrator {
|
|
|
702
712
|
return prefetched;
|
|
703
713
|
}
|
|
704
714
|
|
|
705
|
-
/**
|
|
706
|
-
* Build a reverse dependency index so that when a computation completes
|
|
707
|
-
* we can quickly find all computations that depend on it.
|
|
708
|
-
*/
|
|
709
715
|
_buildDependentsIndex() {
|
|
710
716
|
this.dependentsByName = new Map();
|
|
711
717
|
if (!this.manifest) return;
|
|
@@ -720,14 +726,6 @@ class Orchestrator {
|
|
|
720
726
|
}
|
|
721
727
|
}
|
|
722
728
|
|
|
723
|
-
/**
|
|
724
|
-
* Schedule dependent computations via Cloud Tasks after a computation
|
|
725
|
-
* has successfully completed for a given date.
|
|
726
|
-
*
|
|
727
|
-
* The scheduler is responsible only for root / pass-1 computations.
|
|
728
|
-
* All downstream work is triggered here with a configurable time gap
|
|
729
|
-
* once ALL dependencies of a computation have completed.
|
|
730
|
-
*/
|
|
731
729
|
async _scheduleDependents(entry, dateStr) {
|
|
732
730
|
const dependents = this.dependentsByName.get(entry.name);
|
|
733
731
|
if (!dependents || dependents.length === 0) return;
|
|
@@ -746,13 +744,9 @@ class Orchestrator {
|
|
|
746
744
|
|
|
747
745
|
const dependencyGapMinutes = this.config.scheduling?.dependencyGapMinutes ?? 5;
|
|
748
746
|
const queuePath = this.cloudTasksClient.queuePath(projectId, location, queueName);
|
|
749
|
-
|
|
750
|
-
// Use the latest in-memory status for this date so we can see the
|
|
751
|
-
// just-updated computation plus any earlier ones.
|
|
752
747
|
const dailyStatus = await this.stateRepository.getDailyStatus(dateStr);
|
|
753
748
|
|
|
754
749
|
for (const depEntry of dependents) {
|
|
755
|
-
// Compute the latest completion time across all of this computation's dependencies.
|
|
756
750
|
let latestDependencyTime = null;
|
|
757
751
|
let missingDependency = false;
|
|
758
752
|
|
|
@@ -771,8 +765,6 @@ class Orchestrator {
|
|
|
771
765
|
}
|
|
772
766
|
}
|
|
773
767
|
|
|
774
|
-
// If any dependency hasn't completed yet, we will schedule this
|
|
775
|
-
// dependent when that dependency finishes instead.
|
|
776
768
|
if (missingDependency || !latestDependencyTime) {
|
|
777
769
|
continue;
|
|
778
770
|
}
|
|
@@ -814,12 +806,10 @@ class Orchestrator {
|
|
|
814
806
|
|
|
815
807
|
this._log('INFO', `Scheduled dependent ${depEntry.name} for ${dateStr} at ${scheduleTime.toISOString()}`);
|
|
816
808
|
} catch (e) {
|
|
817
|
-
// Code 6: ALREADY_EXISTS – task already scheduled, this is fine (idempotent)
|
|
818
809
|
if (e.code === 6) {
|
|
819
810
|
this._log('INFO', `Dependent ${depEntry.name} for ${dateStr} already scheduled (duplicate task ignored)`);
|
|
820
811
|
continue;
|
|
821
812
|
}
|
|
822
|
-
|
|
823
813
|
this._log('WARN', `Failed to schedule dependent ${depEntry.name}: ${e.message}`);
|
|
824
814
|
}
|
|
825
815
|
}
|
|
@@ -828,10 +818,7 @@ class Orchestrator {
|
|
|
828
818
|
async _lazyLoadDependency(dateStr, depName, entityId, preloaded) {
|
|
829
819
|
if (preloaded[depName] && !entityId) return preloaded[depName];
|
|
830
820
|
if (preloaded[depName] && entityId) return preloaded[depName][entityId];
|
|
831
|
-
|
|
832
|
-
// WARN: This is the slow path that we removed from Streaming
|
|
833
821
|
this._log('WARN', `LAZY LOAD: Fetching single entity '${entityId}' for '${depName}'. This is slow.`);
|
|
834
|
-
|
|
835
822
|
if (entityId) return this.stateRepository.getEntityResult(dateStr, depName, entityId);
|
|
836
823
|
return this.stateRepository.getResult(dateStr, depName);
|
|
837
824
|
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* * FIX: Switched to bigquery.createJob for GCS imports to prevent local file path interpretation errors.
|
|
11
11
|
* * FIX: Improved error logging to catch swallowed BigQuery insert errors.
|
|
12
12
|
* * FIX: finalizeResults now checks for file existence to prevent "Not found" errors on empty results.
|
|
13
|
-
* * FIX:
|
|
13
|
+
* * FIX: Removed SAFE.PARSE_JSON from MERGE to match STRING schema types.
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
const { Firestore } = require('@google-cloud/firestore');
|
|
@@ -53,8 +53,7 @@ class StorageManager {
|
|
|
53
53
|
async claimZombie(checkpointId) {
|
|
54
54
|
if (!checkpointId) return;
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
const { projectId, dataset } = this.config.bigquery; //
|
|
56
|
+
const { projectId, dataset } = this.config.bigquery;
|
|
58
57
|
|
|
59
58
|
const query = `
|
|
60
59
|
UPDATE \`${projectId}.${dataset}.computation_checkpoints\`
|
|
@@ -349,10 +348,6 @@ class StorageManager {
|
|
|
349
348
|
const table = 'computation_checkpoints';
|
|
350
349
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
351
350
|
try {
|
|
352
|
-
// FIX: Use subquery with ROW_NUMBER to find the TRUE latest state per computation.
|
|
353
|
-
// We only count it as a zombie if the LATEST row is 'running'.
|
|
354
|
-
// This ignores 'running' rows that have a newer (or same-time) 'completed' sibling.
|
|
355
|
-
// UPDATE: Added attempts to the selection
|
|
356
351
|
const query = `
|
|
357
352
|
SELECT computation_name, date, checkpoint_id, last_updated, attempts
|
|
358
353
|
FROM (
|
|
@@ -407,9 +402,6 @@ class StorageManager {
|
|
|
407
402
|
const table = 'computation_checkpoints';
|
|
408
403
|
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
409
404
|
try {
|
|
410
|
-
// FIX: Added Tie-Breaker logic to ORDER BY
|
|
411
|
-
// If timestamps are identical, 'completed' (1) comes before 'failed' (2) before 'running' (3).
|
|
412
|
-
// This ensures we never accidentally pick a "running" row when a "completed" one exists at the exact same ms.
|
|
413
405
|
const query = `
|
|
414
406
|
SELECT checkpoint_id, status, processed_count, last_entity_id, completed_batches, worker_instance_id, last_updated, attempts, code_hash, started_at
|
|
415
407
|
FROM ${fullTable}
|
|
@@ -507,8 +499,6 @@ class StorageManager {
|
|
|
507
499
|
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
508
500
|
const table = dataset.table(tableName);
|
|
509
501
|
|
|
510
|
-
// Note: result_data and dependency_result_hashes are loaded as STRING from the JSON file
|
|
511
|
-
// They will be parsed into JSON during the merge step.
|
|
512
502
|
const schema = [
|
|
513
503
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
514
504
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
@@ -531,6 +521,7 @@ class StorageManager {
|
|
|
531
521
|
|
|
532
522
|
await this._ensureBigQueryTable(targetTable);
|
|
533
523
|
|
|
524
|
+
// FIX: Removed SAFE.PARSE_JSON() because target columns are STRING.
|
|
534
525
|
const mergeQuery = `
|
|
535
526
|
MERGE INTO ${fullTarget} T
|
|
536
527
|
USING (
|
|
@@ -544,15 +535,15 @@ class StorageManager {
|
|
|
544
535
|
UPDATE SET
|
|
545
536
|
code_hash = S.code_hash,
|
|
546
537
|
result_hash = S.result_hash,
|
|
547
|
-
dependency_result_hashes =
|
|
538
|
+
dependency_result_hashes = S.dependency_result_hashes,
|
|
548
539
|
entity_count = S.entity_count,
|
|
549
|
-
result_data =
|
|
540
|
+
result_data = S.result_data,
|
|
550
541
|
updated_at = S.updated_at
|
|
551
542
|
WHEN NOT MATCHED THEN
|
|
552
543
|
INSERT (date, computation_name, category, entity_id, code_hash, result_hash,
|
|
553
544
|
dependency_result_hashes, entity_count, result_data, updated_at)
|
|
554
545
|
VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
|
|
555
|
-
|
|
546
|
+
S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
|
|
556
547
|
`;
|
|
557
548
|
|
|
558
549
|
// UPDATE: Use createQueryJob to capture DML statistics
|
|
@@ -659,7 +650,7 @@ class StorageManager {
|
|
|
659
650
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
660
651
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
661
652
|
{ name: 'duration_ms', type: 'INTEGER', mode: 'NULLABLE' },
|
|
662
|
-
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
|
|
653
|
+
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
|
|
663
654
|
{ name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
664
655
|
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
|
665
656
|
{ name: 'created_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
@@ -778,7 +769,6 @@ class StorageManager {
|
|
|
778
769
|
}
|
|
779
770
|
|
|
780
771
|
_logError(context, error) {
|
|
781
|
-
// Safe logging for BigQuery PartialFailureError which hides details in .errors
|
|
782
772
|
let details = error.message;
|
|
783
773
|
if (error.errors && Array.isArray(error.errors)) {
|
|
784
774
|
details = JSON.stringify(error.errors, null, 2);
|