bulltrackers-module 1.0.768 → 1.0.770
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system-v2/UserPortfolioMetrics.js +50 -0
- package/functions/computation-system-v2/computations/BehavioralAnomaly.js +557 -337
- package/functions/computation-system-v2/computations/GlobalAumPerAsset30D.js +103 -0
- package/functions/computation-system-v2/computations/PIDailyAssetAUM.js +134 -0
- package/functions/computation-system-v2/computations/PiFeatureVectors.js +227 -0
- package/functions/computation-system-v2/computations/PiRecommender.js +359 -0
- package/functions/computation-system-v2/computations/RiskScoreIncrease.js +13 -13
- package/functions/computation-system-v2/computations/SignedInUserMirrorHistory.js +138 -0
- package/functions/computation-system-v2/computations/SignedInUserPIProfileMetrics.js +106 -0
- package/functions/computation-system-v2/computations/SignedInUserProfileMetrics.js +324 -0
- package/functions/computation-system-v2/config/bulltrackers.config.js +30 -128
- package/functions/computation-system-v2/core-api.js +17 -9
- package/functions/computation-system-v2/data_schema_reference.MD +108 -0
- package/functions/computation-system-v2/devtools/builder/builder.js +362 -0
- package/functions/computation-system-v2/devtools/builder/examples/user-metrics.yaml +26 -0
- package/functions/computation-system-v2/devtools/index.js +36 -0
- package/functions/computation-system-v2/devtools/shared/MockDataFactory.js +235 -0
- package/functions/computation-system-v2/devtools/shared/SchemaTemplates.js +475 -0
- package/functions/computation-system-v2/devtools/shared/SystemIntrospector.js +517 -0
- package/functions/computation-system-v2/devtools/shared/index.js +16 -0
- package/functions/computation-system-v2/devtools/simulation/DAGAnalyzer.js +243 -0
- package/functions/computation-system-v2/devtools/simulation/MockDataFetcher.js +306 -0
- package/functions/computation-system-v2/devtools/simulation/MockStorageManager.js +336 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationEngine.js +525 -0
- package/functions/computation-system-v2/devtools/simulation/SimulationServer.js +581 -0
- package/functions/computation-system-v2/devtools/simulation/index.js +17 -0
- package/functions/computation-system-v2/devtools/simulation/simulate.js +324 -0
- package/functions/computation-system-v2/devtools/vscode-computation/package.json +90 -0
- package/functions/computation-system-v2/devtools/vscode-computation/snippets/computation.json +128 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/extension.ts +401 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/codeActions.ts +152 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/completions.ts +207 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/diagnostics.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/src/providers/hover.ts +205 -0
- package/functions/computation-system-v2/devtools/vscode-computation/tsconfig.json +22 -0
- package/functions/computation-system-v2/docs/HowToCreateComputations.MD +602 -0
- package/functions/computation-system-v2/framework/data/DataFetcher.js +250 -184
- package/functions/computation-system-v2/framework/data/MaterializedViewManager.js +84 -0
- package/functions/computation-system-v2/framework/data/QueryBuilder.js +38 -38
- package/functions/computation-system-v2/framework/execution/Orchestrator.js +215 -129
- package/functions/computation-system-v2/framework/scheduling/ScheduleValidator.js +17 -19
- package/functions/computation-system-v2/framework/storage/StateRepository.js +32 -2
- package/functions/computation-system-v2/framework/storage/StorageManager.js +105 -67
- package/functions/computation-system-v2/framework/testing/ComputationTester.js +12 -6
- package/functions/computation-system-v2/handlers/dispatcher.js +57 -29
- package/functions/computation-system-v2/handlers/scheduler.js +172 -203
- package/functions/computation-system-v2/legacy/PiAssetRecommender.js.old +115 -0
- package/functions/computation-system-v2/legacy/PiSimilarityMatrix.js +104 -0
- package/functions/computation-system-v2/legacy/PiSimilarityVector.js +71 -0
- package/functions/computation-system-v2/scripts/debug_aggregation.js +25 -0
- package/functions/computation-system-v2/scripts/test-invalidation-scenarios.js +234 -0
- package/package.json +1 -1
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Schedule Validator
|
|
3
|
-
*
|
|
4
|
-
* Validates computation schedules and enforces timing rules:
|
|
3
|
+
* * Validates computation schedules and enforces timing rules:
|
|
5
4
|
* 1. Parses schedule declarations
|
|
6
5
|
* 2. Validates schedule format
|
|
7
|
-
* 3. Checks
|
|
8
|
-
*
|
|
6
|
+
* 3. Checks logical ordering (Dependent must not be scheduled BEFORE Dependency)
|
|
7
|
+
* * * UPDATE: Removed "Gap Warning" (race condition check).
|
|
8
|
+
* * * The system's RunAnalyzer (Blocking) and Orchestrator (Cascading)
|
|
9
|
+
* * * handle overlapping schedules safely, so 0-minute gaps are valid.
|
|
9
10
|
*/
|
|
10
11
|
|
|
11
12
|
/**
|
|
@@ -37,6 +38,8 @@ class ScheduleValidator {
|
|
|
37
38
|
time: '02:00',
|
|
38
39
|
timezone: 'UTC'
|
|
39
40
|
};
|
|
41
|
+
// dependencyGapMinutes is no longer used for validation warnings,
|
|
42
|
+
// but kept if needed for other scheduling logic (e.g. cloud task delays)
|
|
40
43
|
this.dependencyGapMinutes = config.scheduling?.dependencyGapMinutes || 15;
|
|
41
44
|
}
|
|
42
45
|
|
|
@@ -261,7 +264,7 @@ class ScheduleValidator {
|
|
|
261
264
|
const gap = this.calculateGap(depSchedule, entrySchedule);
|
|
262
265
|
|
|
263
266
|
if (gap === null) {
|
|
264
|
-
// Different frequencies - can't directly compare
|
|
267
|
+
// Different frequencies - can't directly compare (Warning is still useful here as a heads up)
|
|
265
268
|
issues.push({
|
|
266
269
|
severity: 'warning',
|
|
267
270
|
computation: entry.name,
|
|
@@ -272,27 +275,22 @@ class ScheduleValidator {
|
|
|
272
275
|
continue;
|
|
273
276
|
}
|
|
274
277
|
|
|
278
|
+
// STRICT CHECK: Dependent CANNOT run BEFORE dependency
|
|
275
279
|
if (gap < 0) {
|
|
276
|
-
// Dependent runs BEFORE its dependency
|
|
277
280
|
issues.push({
|
|
278
281
|
severity: 'error',
|
|
279
282
|
computation: entry.name,
|
|
280
283
|
dependency: depName,
|
|
281
284
|
gap,
|
|
282
285
|
message: `${entry.name} is scheduled BEFORE its dependency ${depName} (${Math.abs(gap)} minutes earlier)`,
|
|
283
|
-
suggestion: `Move ${entry.name} to
|
|
286
|
+
suggestion: `Move ${entry.name} to after ${depName} or use the default schedule.`
|
|
284
287
|
});
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
gap,
|
|
292
|
-
message: `${entry.name} scheduled only ${gap} minutes after dependency ${depName}`,
|
|
293
|
-
suggestion: `Increase gap to at least ${this.dependencyGapMinutes} minutes`
|
|
294
|
-
});
|
|
295
|
-
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// REMOVED: Warning for (0 <= gap < 15).
|
|
291
|
+
// Reason: In this architecture, dependents are triggered via Event Cascade (Pass 1 -> Pass 2).
|
|
292
|
+
// A 0-minute gap (or same default schedule) is safe because the RunAnalyzer will simply BLOCK
|
|
293
|
+
// the dependent until the dependency is ready, or the Orchestrator will trigger it automatically.
|
|
296
294
|
}
|
|
297
295
|
|
|
298
296
|
return issues;
|
|
@@ -324,4 +322,4 @@ class ScheduleValidator {
|
|
|
324
322
|
}
|
|
325
323
|
}
|
|
326
324
|
|
|
327
|
-
module.exports = { ScheduleValidator };
|
|
325
|
+
module.exports = { ScheduleValidator };
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* 1. Loading daily execution status (hashes, timestamps)
|
|
5
5
|
* 2. Loading previous results (for dependencies and history)
|
|
6
6
|
* 3. Caching results for performance
|
|
7
|
+
* * * UPDATE: Added getRunDates() to support automatic backfill fan-out.
|
|
7
8
|
*/
|
|
8
9
|
|
|
9
10
|
const { BigQuery } = require('@google-cloud/bigquery');
|
|
@@ -87,6 +88,37 @@ class StateRepository {
|
|
|
87
88
|
return statusMap;
|
|
88
89
|
}
|
|
89
90
|
|
|
91
|
+
/**
|
|
92
|
+
* Fetch all dates where a computation has previously run.
|
|
93
|
+
* Used for fan-out / backfill operations on code deployment.
|
|
94
|
+
* @param {string} computationName
|
|
95
|
+
* @returns {Promise<string[]>} List of YYYY-MM-DD strings
|
|
96
|
+
*/
|
|
97
|
+
async getRunDates(computationName) {
|
|
98
|
+
try {
|
|
99
|
+
const table = this.config.resultStore?.table || 'computation_results';
|
|
100
|
+
const fullTable = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${table}\``;
|
|
101
|
+
|
|
102
|
+
const query = `
|
|
103
|
+
SELECT DISTINCT date
|
|
104
|
+
FROM ${fullTable}
|
|
105
|
+
WHERE computation_name = @compName
|
|
106
|
+
ORDER BY date DESC
|
|
107
|
+
`;
|
|
108
|
+
|
|
109
|
+
const [rows] = await this.bigquery.query({
|
|
110
|
+
query,
|
|
111
|
+
params: { compName: computationName.toLowerCase() },
|
|
112
|
+
location: this.config.bigquery.location
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
return rows.map(r => r.date.value || r.date);
|
|
116
|
+
} catch (e) {
|
|
117
|
+
this._log('WARN', `Failed to fetch run dates for ${computationName}: ${e.message}`);
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
90
122
|
/**
|
|
91
123
|
* Update the local status cache after a write.
|
|
92
124
|
* @param {string} dateStr
|
|
@@ -208,8 +240,6 @@ class StateRepository {
|
|
|
208
240
|
|
|
209
241
|
/**
|
|
210
242
|
* Get multiple entity results in a single query (batch lazy load)
|
|
211
|
-
* FIXED: This solves the N+1 problem by allowing the Executor to fetch dependencies
|
|
212
|
-
* for an entire processing batch in one go.
|
|
213
243
|
*/
|
|
214
244
|
async getBatchEntityResults(dateStr, computationName, entityIds) {
|
|
215
245
|
if (!entityIds || entityIds.length === 0) return {};
|
|
@@ -23,20 +23,20 @@ class StorageManager {
|
|
|
23
23
|
constructor(config, logger = null) {
|
|
24
24
|
this.config = config;
|
|
25
25
|
this.logger = logger;
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
this.bigquery = new BigQuery({
|
|
28
28
|
projectId: config.bigquery?.projectId,
|
|
29
29
|
location: config.bigquery?.location || 'EU'
|
|
30
30
|
});
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
this.storage = new Storage({
|
|
33
33
|
projectId: config.bigquery?.projectId
|
|
34
34
|
});
|
|
35
|
-
|
|
35
|
+
|
|
36
36
|
this._firestore = null;
|
|
37
37
|
this.tableExists = new Map();
|
|
38
38
|
}
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
get firestore() {
|
|
41
41
|
if (!this._firestore) {
|
|
42
42
|
this._firestore = new Firestore({
|
|
@@ -52,7 +52,7 @@ class StorageManager {
|
|
|
52
52
|
*/
|
|
53
53
|
async claimZombie(checkpointId) {
|
|
54
54
|
if (!checkpointId) return;
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
const { projectId, dataset } = this.config.bigquery;
|
|
57
57
|
|
|
58
58
|
const query = `
|
|
@@ -60,7 +60,7 @@ class StorageManager {
|
|
|
60
60
|
SET last_updated = CURRENT_TIMESTAMP()
|
|
61
61
|
WHERE checkpoint_id = @checkpointId
|
|
62
62
|
`;
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
try {
|
|
65
65
|
await this.bigquery.query({
|
|
66
66
|
query,
|
|
@@ -72,8 +72,8 @@ class StorageManager {
|
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
|
|
76
|
+
|
|
77
77
|
// =========================================================================
|
|
78
78
|
// RESULT COMMITTING (Batch -> GCS Buffer)
|
|
79
79
|
// =========================================================================
|
|
@@ -86,7 +86,7 @@ class StorageManager {
|
|
|
86
86
|
async commitResults(dateStr, entry, results, depResultHashes = {}) {
|
|
87
87
|
const storageConfig = this._resolveStorageConfig(entry);
|
|
88
88
|
const startTime = Date.now();
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
// Define GCS Task (Fatal on error)
|
|
91
91
|
const gcsTask = async () => {
|
|
92
92
|
if (storageConfig.bigquery === false) return null;
|
|
@@ -116,11 +116,11 @@ class StorageManager {
|
|
|
116
116
|
firestoreTask()
|
|
117
117
|
]);
|
|
118
118
|
|
|
119
|
-
const writeResults = {
|
|
120
|
-
bigquery: bigqueryResult,
|
|
121
|
-
firestore: firestoreResult
|
|
119
|
+
const writeResults = {
|
|
120
|
+
bigquery: bigqueryResult,
|
|
121
|
+
firestore: firestoreResult
|
|
122
122
|
};
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
const duration = Date.now() - startTime;
|
|
125
125
|
this._log('INFO', `Committed (Staged) ${entry.name} results in ${duration}ms`);
|
|
126
126
|
return writeResults;
|
|
@@ -138,18 +138,18 @@ class StorageManager {
|
|
|
138
138
|
const table = this.config.resultStore?.table || 'computation_results';
|
|
139
139
|
const bucketName = this.config.gcs?.bucket;
|
|
140
140
|
const prefix = this.config.gcs?.prefix || 'staging';
|
|
141
|
-
|
|
141
|
+
|
|
142
142
|
// 1. Define GCS path pattern: gs://bucket/prefix/date/computation/*.json
|
|
143
143
|
const filePrefix = `${prefix}/${dateStr}/${entry.name}/`;
|
|
144
144
|
const gcsPath = `gs://${bucketName}/${filePrefix}*.json`;
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
this._log('INFO', `Finalizing ${entry.name}...`);
|
|
147
147
|
|
|
148
148
|
try {
|
|
149
149
|
// FIX: Check if files actually exist before trying to load them
|
|
150
150
|
// If the computation produced 0 results, no files exist, and BQ will throw "Not Found".
|
|
151
151
|
const [files] = await this.storage.bucket(bucketName).getFiles({ prefix: filePrefix });
|
|
152
|
-
|
|
152
|
+
|
|
153
153
|
if (!files || files.length === 0) {
|
|
154
154
|
this._log('INFO', `No staged files found for ${entry.name}. Skipping finalization (Empty Result).`);
|
|
155
155
|
return;
|
|
@@ -161,7 +161,7 @@ class StorageManager {
|
|
|
161
161
|
// 2. Load GCS files into a Temporary Table
|
|
162
162
|
// We create the temp table with the exact schema we expect first
|
|
163
163
|
await this._createTempTableForLoad(tempTableId);
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
// FIX: Use bigquery.createJob directly.
|
|
166
166
|
const [job] = await this.bigquery.createJob({
|
|
167
167
|
configuration: {
|
|
@@ -234,9 +234,9 @@ class StorageManager {
|
|
|
234
234
|
await this.bigquery.dataset(dataset).table(table).insert([row]);
|
|
235
235
|
} catch (error) {
|
|
236
236
|
if (error.name === 'PartialFailureError' || error.errors) {
|
|
237
|
-
|
|
237
|
+
this._log('ERROR', `Checkpoint insert failed: ${JSON.stringify(error.errors)}`);
|
|
238
238
|
} else {
|
|
239
|
-
|
|
239
|
+
this._log('ERROR', `Checkpoint insert failed: ${error.message}`);
|
|
240
240
|
}
|
|
241
241
|
throw error;
|
|
242
242
|
}
|
|
@@ -375,15 +375,15 @@ class StorageManager {
|
|
|
375
375
|
LIMIT 50
|
|
376
376
|
`;
|
|
377
377
|
const [rows] = await this.bigquery.query({ query, params: { minutes: minutesThreshold }, location: this.config.bigquery.location });
|
|
378
|
-
return rows.map(r => ({
|
|
379
|
-
name: r.computation_name,
|
|
380
|
-
date: r.date.value || r.date,
|
|
378
|
+
return rows.map(r => ({
|
|
379
|
+
name: r.computation_name,
|
|
380
|
+
date: r.date.value || r.date,
|
|
381
381
|
checkpointId: r.checkpoint_id,
|
|
382
|
-
attempts: r.attempts
|
|
382
|
+
attempts: r.attempts
|
|
383
383
|
}));
|
|
384
|
-
} catch (e) {
|
|
384
|
+
} catch (e) {
|
|
385
385
|
console.error(`[Storage] findZombies failed: ${e.message}`);
|
|
386
|
-
return [];
|
|
386
|
+
return [];
|
|
387
387
|
}
|
|
388
388
|
}
|
|
389
389
|
|
|
@@ -417,7 +417,7 @@ class StorageManager {
|
|
|
417
417
|
`;
|
|
418
418
|
const [rows] = await this.bigquery.query({ query, params: { date: dateStr, computationName }, location: this.config.bigquery.location });
|
|
419
419
|
if (rows.length === 0) return null;
|
|
420
|
-
|
|
420
|
+
|
|
421
421
|
const r = rows[0];
|
|
422
422
|
return {
|
|
423
423
|
...r,
|
|
@@ -438,17 +438,17 @@ class StorageManager {
|
|
|
438
438
|
async savePerformanceReport(report) {
|
|
439
439
|
const table = 'computation_performance';
|
|
440
440
|
const dataset = this.config.bigquery.dataset;
|
|
441
|
-
await this._ensurePerformanceTable(table);
|
|
441
|
+
await this._ensurePerformanceTable(table);
|
|
442
442
|
|
|
443
443
|
const row = {
|
|
444
|
-
run_id
|
|
445
|
-
computation_name
|
|
444
|
+
run_id: report.runId || 'unknown',
|
|
445
|
+
computation_name: report.computationName,
|
|
446
446
|
date: report.date,
|
|
447
447
|
duration_ms: report.durationMs,
|
|
448
|
-
metrics: JSON.stringify
|
|
448
|
+
metrics: JSON.stringify(report.metrics || {}),
|
|
449
449
|
entity_count: report.entityCount || 0,
|
|
450
|
-
status
|
|
451
|
-
created_at
|
|
450
|
+
status: report.status || 'completed',
|
|
451
|
+
created_at: this.bigquery.timestamp(new Date())
|
|
452
452
|
};
|
|
453
453
|
try {
|
|
454
454
|
await this.bigquery.dataset(dataset).table(table).insert([row]);
|
|
@@ -478,27 +478,27 @@ class StorageManager {
|
|
|
478
478
|
async _stageToGCS(dateStr, entry, results, depResultHashes) {
|
|
479
479
|
const rows = this._buildBigQueryRows(dateStr, entry, results, depResultHashes);
|
|
480
480
|
if (rows.length === 0) return { rowCount: 0 };
|
|
481
|
-
|
|
481
|
+
|
|
482
482
|
const bucketName = this.config.gcs?.bucket;
|
|
483
483
|
const prefix = this.config.gcs?.prefix || 'staging';
|
|
484
484
|
const filename = `${prefix}/${dateStr}/${entry.name}/${crypto.randomUUID()}.json`;
|
|
485
|
-
|
|
485
|
+
|
|
486
486
|
const file = this.storage.bucket(bucketName).file(filename);
|
|
487
|
-
|
|
487
|
+
|
|
488
488
|
const ndjson = rows.map(r => JSON.stringify(r)).join('\n');
|
|
489
|
-
|
|
489
|
+
|
|
490
490
|
await file.save(ndjson, {
|
|
491
491
|
contentType: 'application/json',
|
|
492
|
-
resumable: false
|
|
492
|
+
resumable: false
|
|
493
493
|
});
|
|
494
|
-
|
|
494
|
+
|
|
495
495
|
return { rowCount: rows.length, gcsUri: `gs://${bucketName}/${filename}` };
|
|
496
496
|
}
|
|
497
497
|
|
|
498
498
|
async _createTempTableForLoad(tableName) {
|
|
499
499
|
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
500
500
|
const table = dataset.table(tableName);
|
|
501
|
-
|
|
501
|
+
|
|
502
502
|
const schema = [
|
|
503
503
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
504
504
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
@@ -518,7 +518,7 @@ class StorageManager {
|
|
|
518
518
|
async _mergeStagedData(targetTable, tempTable) {
|
|
519
519
|
const fullTarget = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${targetTable}\``;
|
|
520
520
|
const fullTemp = `\`${this.config.bigquery.projectId}.${this.config.bigquery.dataset}.${tempTable}\``;
|
|
521
|
-
|
|
521
|
+
|
|
522
522
|
await this._ensureBigQueryTable(targetTable);
|
|
523
523
|
|
|
524
524
|
// FIX: Removed SAFE.PARSE_JSON() because target columns are STRING.
|
|
@@ -545,21 +545,59 @@ class StorageManager {
|
|
|
545
545
|
VALUES (S.date, S.computation_name, S.category, S.entity_id, S.code_hash, S.result_hash,
|
|
546
546
|
S.dependency_result_hashes, S.entity_count, S.result_data, S.updated_at)
|
|
547
547
|
`;
|
|
548
|
-
|
|
548
|
+
|
|
549
549
|
// UPDATE: Use createQueryJob to capture DML statistics
|
|
550
550
|
try {
|
|
551
|
-
const [job] = await this.bigquery.createQueryJob({
|
|
552
|
-
query: mergeQuery,
|
|
553
|
-
location: this.config.bigquery.location
|
|
551
|
+
const [job] = await this.bigquery.createQueryJob({
|
|
552
|
+
query: mergeQuery,
|
|
553
|
+
location: this.config.bigquery.location
|
|
554
554
|
});
|
|
555
|
-
|
|
555
|
+
|
|
556
556
|
await job.getQueryResults(); // Wait for completion
|
|
557
|
-
|
|
557
|
+
|
|
558
558
|
const metadata = await job.getMetadata();
|
|
559
559
|
const stats = metadata[0]?.statistics?.query;
|
|
560
560
|
const affectedRows = stats?.numDmlAffectedRows;
|
|
561
561
|
|
|
562
562
|
this._log('INFO', `Merge complete on ${targetTable}. Rows affected (Inserted/Updated): ${affectedRows}`);
|
|
563
|
+
|
|
564
|
+
// =========================================================================
|
|
565
|
+
// CRITICAL FIX: Delete stale entity rows that were NOT in the new staging data.
|
|
566
|
+
// This prevents ghost entities from previous runs from polluting results.
|
|
567
|
+
// We identify the date and computation from the temp table and remove any
|
|
568
|
+
// rows in the target that don't have a matching entity_id in the new run.
|
|
569
|
+
// =========================================================================
|
|
570
|
+
const deleteQuery = `
|
|
571
|
+
DELETE FROM ${fullTarget} T
|
|
572
|
+
WHERE EXISTS (
|
|
573
|
+
-- Identify which (date, computation_name) pairs were just processed
|
|
574
|
+
SELECT 1 FROM ${fullTemp} S
|
|
575
|
+
WHERE S.date = T.date AND S.computation_name = T.computation_name
|
|
576
|
+
)
|
|
577
|
+
AND NOT EXISTS (
|
|
578
|
+
-- Keep only entity_ids that are in the new staging data
|
|
579
|
+
SELECT 1 FROM ${fullTemp} S
|
|
580
|
+
WHERE S.date = T.date
|
|
581
|
+
AND S.computation_name = T.computation_name
|
|
582
|
+
AND S.entity_id = T.entity_id
|
|
583
|
+
)
|
|
584
|
+
`;
|
|
585
|
+
|
|
586
|
+
const [deleteJob] = await this.bigquery.createQueryJob({
|
|
587
|
+
query: deleteQuery,
|
|
588
|
+
location: this.config.bigquery.location
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
await deleteJob.getQueryResults();
|
|
592
|
+
|
|
593
|
+
const deleteMeta = await deleteJob.getMetadata();
|
|
594
|
+
const deleteStats = deleteMeta[0]?.statistics?.query;
|
|
595
|
+
const deletedRows = deleteStats?.numDmlAffectedRows;
|
|
596
|
+
|
|
597
|
+
if (deletedRows && parseInt(deletedRows, 10) > 0) {
|
|
598
|
+
this._log('INFO', `Cleanup: Deleted ${deletedRows} stale entity rows from ${targetTable}`);
|
|
599
|
+
}
|
|
600
|
+
|
|
563
601
|
} catch (e) {
|
|
564
602
|
this._logError(`Merge Failed on ${targetTable}`, e);
|
|
565
603
|
throw e;
|
|
@@ -578,7 +616,7 @@ class StorageManager {
|
|
|
578
616
|
const rows = [];
|
|
579
617
|
const timestamp = new Date().toISOString();
|
|
580
618
|
const depResultHashesJson = JSON.stringify(depResultHashes);
|
|
581
|
-
|
|
619
|
+
|
|
582
620
|
if (entry.type === 'per-entity' && typeof results === 'object') {
|
|
583
621
|
for (const [entityId, data] of Object.entries(results)) {
|
|
584
622
|
rows.push({
|
|
@@ -650,7 +688,7 @@ class StorageManager {
|
|
|
650
688
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
651
689
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
652
690
|
{ name: 'duration_ms', type: 'INTEGER', mode: 'NULLABLE' },
|
|
653
|
-
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
|
|
691
|
+
{ name: 'metrics', type: 'STRING', mode: 'NULLABLE' },
|
|
654
692
|
{ name: 'entity_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
655
693
|
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
|
656
694
|
{ name: 'created_at', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
@@ -663,28 +701,28 @@ class StorageManager {
|
|
|
663
701
|
});
|
|
664
702
|
this._log('INFO', `Created table ${tableName}`);
|
|
665
703
|
}
|
|
666
|
-
|
|
704
|
+
|
|
667
705
|
this.tableExists.set(tableName, true);
|
|
668
706
|
}
|
|
669
|
-
|
|
707
|
+
|
|
670
708
|
async _ensureCheckpointTable(tableName) {
|
|
671
709
|
if (this.tableExists.get(tableName)) return;
|
|
672
710
|
const dataset = this.bigquery.dataset(this.config.bigquery.dataset);
|
|
673
711
|
const table = dataset.table(tableName);
|
|
674
712
|
const [exists] = await table.exists();
|
|
675
|
-
|
|
713
|
+
|
|
676
714
|
const schema = [
|
|
677
715
|
{ name: 'date', type: 'DATE', mode: 'REQUIRED' },
|
|
678
716
|
{ name: 'computation_name', type: 'STRING', mode: 'REQUIRED' },
|
|
679
717
|
{ name: 'checkpoint_id', type: 'STRING', mode: 'REQUIRED' },
|
|
680
718
|
{ name: 'worker_instance_id', type: 'STRING', mode: 'NULLABLE' },
|
|
681
|
-
{ name: 'code_hash', type: 'STRING', mode: 'NULLABLE' },
|
|
719
|
+
{ name: 'code_hash', type: 'STRING', mode: 'NULLABLE' },
|
|
682
720
|
{ name: 'status', type: 'STRING', mode: 'REQUIRED' },
|
|
683
721
|
{ name: 'processed_count', type: 'INTEGER', mode: 'NULLABLE' },
|
|
684
722
|
{ name: 'total_entities', type: 'INTEGER', mode: 'NULLABLE' },
|
|
685
723
|
{ name: 'last_entity_id', type: 'STRING', mode: 'NULLABLE' },
|
|
686
724
|
{ name: 'completed_batches', type: 'INTEGER', mode: 'REPEATED' },
|
|
687
|
-
{ name: 'attempts', type: 'INTEGER', mode: 'NULLABLE' },
|
|
725
|
+
{ name: 'attempts', type: 'INTEGER', mode: 'NULLABLE' },
|
|
688
726
|
{ name: 'started_at', type: 'TIMESTAMP', mode: 'REQUIRED' },
|
|
689
727
|
{ name: 'last_updated', type: 'TIMESTAMP', mode: 'REQUIRED' }
|
|
690
728
|
];
|
|
@@ -695,38 +733,38 @@ class StorageManager {
|
|
|
695
733
|
timePartitioning: { type: 'DAY', field: 'date' },
|
|
696
734
|
clustering: { fields: ['computation_name', 'status'] }
|
|
697
735
|
});
|
|
698
|
-
}
|
|
736
|
+
}
|
|
699
737
|
this.tableExists.set(tableName, true);
|
|
700
738
|
}
|
|
701
739
|
|
|
702
740
|
_writeToFirestore(dateStr, entry, results, firestoreConfig) {
|
|
703
741
|
const { path, merge, includeMetadata } = firestoreConfig;
|
|
704
742
|
if (!path) throw new Error(`Firestore path not configured for ${entry.name}`);
|
|
705
|
-
|
|
743
|
+
|
|
706
744
|
const timestamp = new Date();
|
|
707
745
|
const metadata = includeMetadata ? {
|
|
708
|
-
_computedAt: timestamp, _computationDate: dateStr,
|
|
746
|
+
_computedAt: timestamp, _computationDate: dateStr,
|
|
709
747
|
_computationName: entry.name, _codeHash: entry.hash
|
|
710
748
|
} : {};
|
|
711
|
-
|
|
749
|
+
|
|
712
750
|
let docCount = 0;
|
|
713
|
-
|
|
751
|
+
|
|
714
752
|
if (entry.type === 'per-entity' && typeof results === 'object') {
|
|
715
753
|
const batches = [];
|
|
716
754
|
let currentBatch = this.firestore.batch();
|
|
717
755
|
let batchCount = 0;
|
|
718
756
|
const MAX_BATCH = 500;
|
|
719
|
-
|
|
757
|
+
|
|
720
758
|
for (const [entityId, data] of Object.entries(results)) {
|
|
721
759
|
const docPath = this._resolvePath(path, {
|
|
722
760
|
entityId, date: dateStr, computationName: entry.name, category: entry.category || 'uncategorized'
|
|
723
761
|
});
|
|
724
|
-
|
|
762
|
+
|
|
725
763
|
const docRef = this.firestore.doc(docPath);
|
|
726
764
|
const docData = { ...data, ...metadata };
|
|
727
|
-
|
|
765
|
+
|
|
728
766
|
merge ? currentBatch.set(docRef, docData, { merge: true }) : currentBatch.set(docRef, docData);
|
|
729
|
-
|
|
767
|
+
|
|
730
768
|
batchCount++; docCount++;
|
|
731
769
|
if (batchCount >= MAX_BATCH) {
|
|
732
770
|
batches.push(currentBatch);
|
|
@@ -735,10 +773,10 @@ class StorageManager {
|
|
|
735
773
|
}
|
|
736
774
|
}
|
|
737
775
|
if (batchCount > 0) batches.push(currentBatch);
|
|
738
|
-
|
|
776
|
+
|
|
739
777
|
const limit = pLimit(10);
|
|
740
778
|
return Promise.all(batches.map(b => limit(() => b.commit()))).then(() => ({ docCount }));
|
|
741
|
-
|
|
779
|
+
|
|
742
780
|
} else {
|
|
743
781
|
const docPath = this._resolvePath(path, {
|
|
744
782
|
entityId: '_global', date: dateStr, computationName: entry.name, category: entry.category || 'uncategorized'
|
|
@@ -763,7 +801,7 @@ class StorageManager {
|
|
|
763
801
|
const str = typeof data === 'string' ? data : JSON.stringify(data);
|
|
764
802
|
return crypto.createHash('md5').update(str).digest('hex').substring(0, 16);
|
|
765
803
|
}
|
|
766
|
-
|
|
804
|
+
|
|
767
805
|
_log(level, message) {
|
|
768
806
|
this.logger?.log ? this.logger.log(level, `[StorageManager] ${message}`) : console.log(`[${level}] [StorageManager] ${message}`);
|
|
769
807
|
}
|
|
@@ -775,10 +813,10 @@ class StorageManager {
|
|
|
775
813
|
} else if (error.response && error.response.insertErrors) {
|
|
776
814
|
details = JSON.stringify(error.response.insertErrors, null, 2);
|
|
777
815
|
}
|
|
778
|
-
|
|
816
|
+
|
|
779
817
|
this._log('ERROR', `${context}: ${details}`);
|
|
780
818
|
}
|
|
781
|
-
|
|
819
|
+
|
|
782
820
|
}
|
|
783
821
|
|
|
784
822
|
module.exports = { StorageManager };
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* - Redirects output to a test table.
|
|
5
5
|
* - Resolves and executes dependency chains.
|
|
6
6
|
* - Finds valid runnable dates based on raw data availability.
|
|
7
|
-
* * * UPDATE:
|
|
7
|
+
* * * UPDATE: Fixed dependency reading to look at test table instead of production.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
const fs = require('fs');
|
|
@@ -28,14 +28,21 @@ class IntegrationTester {
|
|
|
28
28
|
_createTestConfig(config) {
|
|
29
29
|
const testConfig = JSON.parse(JSON.stringify(config)); // Deep clone
|
|
30
30
|
|
|
31
|
-
// 1. Redirect Storage
|
|
31
|
+
// 1. Redirect Storage (Writes)
|
|
32
32
|
if (!testConfig.resultStore) testConfig.resultStore = {};
|
|
33
33
|
testConfig.resultStore.table = 'computation_results_test';
|
|
34
|
+
|
|
35
|
+
// 2. Redirect Reads for Intermediate Results (Crucial for Chained Computations)
|
|
36
|
+
// We must ensure that when a computation asks for 'computation_results',
|
|
37
|
+
// it reads from the TEST table where the upstream dependency just wrote.
|
|
38
|
+
if (testConfig.tables && testConfig.tables['computation_results']) {
|
|
39
|
+
testConfig.tables['computation_results'].tableName = 'computation_results_test';
|
|
40
|
+
}
|
|
34
41
|
|
|
35
|
-
//
|
|
42
|
+
// 3. Disable Side Effects
|
|
36
43
|
testConfig.cloudTasks = null; // Don't trigger downstream
|
|
37
44
|
|
|
38
|
-
//
|
|
45
|
+
// 4. Force Local Execution for simplified debugging
|
|
39
46
|
if (testConfig.workerPool) {
|
|
40
47
|
testConfig.workerPool.enabled = false;
|
|
41
48
|
}
|
|
@@ -122,8 +129,7 @@ class IntegrationTester {
|
|
|
122
129
|
force: true // Force run even if "up to date" in test table
|
|
123
130
|
});
|
|
124
131
|
|
|
125
|
-
//
|
|
126
|
-
// Always fetch from the standard State Repository (JSON result store).
|
|
132
|
+
// Always fetch from the standard State Repository (JSON result store)
|
|
127
133
|
const result = await this.orchestrator.stateRepository.getResult(dateStr, targetName);
|
|
128
134
|
|
|
129
135
|
return {
|