bulltrackers-module 1.0.262 → 1.0.264
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/StandardExecutor.js +42 -7
- package/functions/computation-system/layers/profiling.js +309 -149
- package/functions/computation-system/persistence/FirestoreUtils.js +2 -10
- package/functions/computation-system/persistence/ResultCommitter.js +139 -120
- package/functions/computation-system/persistence/ResultsValidator.js +7 -19
- package/functions/computation-system/persistence/RunRecorder.js +3 -8
- package/functions/computation-system/persistence/StatusRepository.js +7 -21
- package/functions/computation-system/tools/BuildReporter.js +22 -31
- package/functions/computation-system/utils/data_loader.js +1 -6
- package/functions/computation-system/utils/schema_capture.js +6 -11
- package/functions/computation-system/utils/utils.js +8 -8
- package/functions/root-data-indexer/index.js +34 -63
- package/package.json +1 -1
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
3
|
* UPDATED: Returns detailed failure reports AND metrics for the Audit Logger.
|
|
4
4
|
* UPDATED: Stops retrying on non-transient errors.
|
|
5
|
+
* UPDATED: Supports Multi-Date Fan-Out (Time Machine Mode) with CONCURRENCY THROTTLING.
|
|
5
6
|
*/
|
|
6
7
|
const { commitBatchInChunks } = require('./FirestoreUtils');
|
|
7
8
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
8
9
|
const { batchStoreSchemas } = require('../utils/schema_capture');
|
|
9
10
|
const { generateProcessId, PROCESS_TYPES } = require('../logger/logger');
|
|
10
|
-
|
|
11
11
|
const { HeuristicValidator } = require('./ResultsValidator');
|
|
12
12
|
const validationOverrides = require('../config/validation_overrides');
|
|
13
|
+
const pLimit = require('p-limit'); // <--- CRITICAL IMPORT
|
|
13
14
|
|
|
14
15
|
const NON_RETRYABLE_ERRORS = [
|
|
15
16
|
'INVALID_ARGUMENT', // Schema/Type mismatch
|
|
@@ -26,7 +27,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
26
27
|
const { logger, db } = deps;
|
|
27
28
|
const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
|
|
28
29
|
|
|
29
|
-
|
|
30
|
+
// SAFETY LIMIT: Only allow 10 concurrent daily writes to prevent network saturation during Fan-Out
|
|
31
|
+
const fanOutLimit = pLimit(10);
|
|
30
32
|
|
|
31
33
|
for (const name in stateObj) {
|
|
32
34
|
const calc = stateObj[name];
|
|
@@ -44,144 +46,88 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
44
46
|
const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
|
|
45
47
|
|
|
46
48
|
if (!healthCheck.valid) {
|
|
47
|
-
// If validation failed, we consider it an anomaly but we BLOCK the write (throw error)
|
|
48
49
|
runMetrics.validation.isValid = false;
|
|
49
50
|
runMetrics.validation.anomalies.push(healthCheck.reason);
|
|
50
51
|
throw { message: healthCheck.reason, stage: 'QUALITY_CIRCUIT_BREAKER' };
|
|
51
52
|
}
|
|
52
53
|
|
|
53
|
-
// Check for minor anomalies (validation warnings that didn't fail) - optional implementation
|
|
54
|
-
// For now, we assume if valid=true, anomalies are empty unless we add warning logic later.
|
|
55
|
-
|
|
56
54
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0) || (typeof result === 'number' && result === 0);
|
|
57
55
|
if (isEmpty) {
|
|
58
|
-
// Log empty success
|
|
59
56
|
if (calc.manifest.hash) {
|
|
60
|
-
successUpdates[name] = {
|
|
61
|
-
hash: false,
|
|
62
|
-
category: calc.manifest.category,
|
|
63
|
-
metrics: runMetrics // Return empty metrics
|
|
64
|
-
};
|
|
57
|
+
successUpdates[name] = { hash: false, category: calc.manifest.category, metrics: runMetrics };
|
|
65
58
|
}
|
|
66
59
|
continue;
|
|
67
60
|
}
|
|
68
61
|
|
|
69
|
-
// Calculate Key Count rough estimate
|
|
70
62
|
if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
|
|
71
63
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
// --- REACTIVE SELF-HEALING LOOP ---
|
|
80
|
-
let committed = false;
|
|
81
|
-
// Strategy: 1=Normal, 2=Safe (Halved), 3=Aggressive (Quartered + Key Limit)
|
|
82
|
-
const strategies = [
|
|
83
|
-
{ bytes: 900 * 1024, keys: null }, // Attempt 1: Standard
|
|
84
|
-
{ bytes: 450 * 1024, keys: 10000 }, // Attempt 2: High Index usage
|
|
85
|
-
{ bytes: 200 * 1024, keys: 2000 } // Attempt 3: Extreme fragmentation
|
|
86
|
-
];
|
|
87
|
-
|
|
88
|
-
let lastError = null;
|
|
89
|
-
|
|
90
|
-
for (let attempt = 0; attempt < strategies.length; attempt++) {
|
|
91
|
-
if (committed) break;
|
|
64
|
+
// --- MULTI-DATE FAN-OUT DETECTION ---
|
|
65
|
+
// If the result keys are ALL date strings (YYYY-MM-DD), we split the writes.
|
|
66
|
+
const resultKeys = Object.keys(result || {});
|
|
67
|
+
const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
|
|
68
|
+
|
|
69
|
+
if (isMultiDate) {
|
|
70
|
+
logger.log('INFO', `[ResultCommitter] 🕰️ Multi-Date Output detected for ${name} (${resultKeys.length} days). Throttled Fan-Out...`);
|
|
92
71
|
|
|
93
|
-
|
|
72
|
+
// Group updates by DATE. result is { "2024-01-01": { user1: ... }, "2024-01-02": { user1: ... } }
|
|
73
|
+
// We execute a fan-out commit for each date using p-limit.
|
|
94
74
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
_verified: true,
|
|
120
|
-
_shardingStrategy: attempt + 1
|
|
121
|
-
},
|
|
122
|
-
options: { merge: true }
|
|
123
|
-
});
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
// 3. Attempt Commit
|
|
127
|
-
await commitBatchInChunks(config, deps, updates, `${name} Results (Att ${attempt+1})`);
|
|
128
|
-
|
|
129
|
-
// Log Success
|
|
130
|
-
if (logger && logger.logStorage) {
|
|
131
|
-
logger.logStorage(pid, name, dStr, mainDocRef.path, totalSize, isSharded);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
committed = true; // Exit loop
|
|
135
|
-
|
|
136
|
-
} catch (commitErr) {
|
|
137
|
-
lastError = commitErr;
|
|
138
|
-
const msg = commitErr.message || '';
|
|
139
|
-
|
|
140
|
-
const isNonRetryable = NON_RETRYABLE_ERRORS.includes(commitErr.code);
|
|
141
|
-
if (isNonRetryable) {
|
|
142
|
-
logger.log('ERROR', `[SelfHealing] ${name} encountered FATAL error (Attempt ${attempt + 1}): ${msg}. Aborting.`);
|
|
143
|
-
throw commitErr;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
|
|
147
|
-
const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
|
|
148
|
-
|
|
149
|
-
if (isSizeError || isIndexError) {
|
|
150
|
-
logger.log('WARN', `[SelfHealing] ${name} failed write attempt ${attempt + 1}. Retrying with tighter constraints...`, { error: msg });
|
|
151
|
-
continue; // Try next strategy
|
|
152
|
-
} else {
|
|
153
|
-
logger.log('WARN', `[SelfHealing] ${name} unknown error (Attempt ${attempt + 1}). Retrying...`, { error: msg });
|
|
154
|
-
}
|
|
75
|
+
const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
|
|
76
|
+
const dailyData = result[historicalDate];
|
|
77
|
+
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
78
|
+
|
|
79
|
+
const historicalDocRef = db.collection(config.resultsCollection)
|
|
80
|
+
.doc(historicalDate) // Use the HISTORICAL date, not dStr
|
|
81
|
+
.collection(config.resultsSubcollection)
|
|
82
|
+
.doc(calc.manifest.category)
|
|
83
|
+
.collection(config.computationsSubcollection)
|
|
84
|
+
.doc(name);
|
|
85
|
+
|
|
86
|
+
// Re-use the existing sharding logic for this specific date payload
|
|
87
|
+
await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps);
|
|
88
|
+
}));
|
|
89
|
+
|
|
90
|
+
await Promise.all(datePromises);
|
|
91
|
+
|
|
92
|
+
// Mark success for the Target Date (dStr) so the workflow continues
|
|
93
|
+
if (calc.manifest.hash) {
|
|
94
|
+
successUpdates[name] = {
|
|
95
|
+
hash: calc.manifest.hash,
|
|
96
|
+
category: calc.manifest.category,
|
|
97
|
+
metrics: runMetrics // Pass metrics up
|
|
98
|
+
};
|
|
155
99
|
}
|
|
156
|
-
}
|
|
157
100
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
101
|
+
} else {
|
|
102
|
+
// --- STANDARD MODE (Single Date) ---
|
|
103
|
+
const mainDocRef = db.collection(config.resultsCollection)
|
|
104
|
+
.doc(dStr)
|
|
105
|
+
.collection(config.resultsSubcollection)
|
|
106
|
+
.doc(calc.manifest.category)
|
|
107
|
+
.collection(config.computationsSubcollection)
|
|
108
|
+
.doc(name);
|
|
109
|
+
|
|
110
|
+
// Use the encapsulated write function
|
|
111
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps);
|
|
112
|
+
|
|
113
|
+
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
114
|
+
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
115
|
+
runMetrics.storage.shardCount = writeStats.shardCount;
|
|
116
|
+
|
|
117
|
+
// Mark Success & Pass Metrics
|
|
118
|
+
if (calc.manifest.hash) {
|
|
119
|
+
successUpdates[name] = {
|
|
120
|
+
hash: calc.manifest.hash,
|
|
121
|
+
category: calc.manifest.category,
|
|
122
|
+
metrics: runMetrics
|
|
123
|
+
};
|
|
124
|
+
}
|
|
174
125
|
}
|
|
175
126
|
|
|
176
127
|
// Capture Schema
|
|
177
128
|
if (calc.manifest.class.getSchema) {
|
|
178
129
|
const { class: _cls, ...safeMetadata } = calc.manifest;
|
|
179
|
-
schemas.push({
|
|
180
|
-
name,
|
|
181
|
-
category: calc.manifest.category,
|
|
182
|
-
schema: calc.manifest.class.getSchema(),
|
|
183
|
-
metadata: safeMetadata
|
|
184
|
-
});
|
|
130
|
+
schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
|
|
185
131
|
}
|
|
186
132
|
|
|
187
133
|
// Cleanup Migration
|
|
@@ -210,6 +156,80 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
210
156
|
return { successUpdates, failureReport };
|
|
211
157
|
}
|
|
212
158
|
|
|
159
|
+
/**
|
|
160
|
+
* Encapsulated write logic for reuse in Fan-Out.
|
|
161
|
+
* Handles sharding strategy and retries.
|
|
162
|
+
*/
|
|
163
|
+
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps) {
|
|
164
|
+
// Strategy: 1=Normal, 2=Safe (Halved), 3=Aggressive (Quartered + Key Limit)
|
|
165
|
+
const strategies = [
|
|
166
|
+
{ bytes: 900 * 1024, keys: null }, // Attempt 1: Standard
|
|
167
|
+
{ bytes: 450 * 1024, keys: 10000 }, // Attempt 2: High Index usage
|
|
168
|
+
{ bytes: 200 * 1024, keys: 2000 } // Attempt 3: Extreme fragmentation
|
|
169
|
+
];
|
|
170
|
+
|
|
171
|
+
let committed = false;
|
|
172
|
+
let lastError = null;
|
|
173
|
+
let finalStats = { totalSize: 0, isSharded: false, shardCount: 1 };
|
|
174
|
+
|
|
175
|
+
for (let attempt = 0; attempt < strategies.length; attempt++) {
|
|
176
|
+
if (committed) break;
|
|
177
|
+
|
|
178
|
+
const constraints = strategies[attempt];
|
|
179
|
+
|
|
180
|
+
try {
|
|
181
|
+
// 1. Prepare Shards with current constraints
|
|
182
|
+
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys);
|
|
183
|
+
|
|
184
|
+
// Stats
|
|
185
|
+
const pointer = updates.find(u => u.data._completed === true);
|
|
186
|
+
finalStats.isSharded = pointer && pointer.data._sharded === true;
|
|
187
|
+
finalStats.shardCount = finalStats.isSharded ? (pointer.data._shardCount || 1) : 1;
|
|
188
|
+
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
189
|
+
|
|
190
|
+
// 2. Attempt Commit
|
|
191
|
+
await commitBatchInChunks(config, deps, updates, `${name}::${dateContext} (Att ${attempt+1})`);
|
|
192
|
+
|
|
193
|
+
// Log Success
|
|
194
|
+
if (logger && logger.logStorage) {
|
|
195
|
+
logger.logStorage(null, name, dateContext, docRef.path, finalStats.totalSize, finalStats.isSharded);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
committed = true; // Exit loop
|
|
199
|
+
|
|
200
|
+
} catch (commitErr) {
|
|
201
|
+
lastError = commitErr;
|
|
202
|
+
const msg = commitErr.message || '';
|
|
203
|
+
|
|
204
|
+
const isNonRetryable = NON_RETRYABLE_ERRORS.includes(commitErr.code);
|
|
205
|
+
if (isNonRetryable) {
|
|
206
|
+
logger.log('ERROR', `[SelfHealing] ${name} encountered FATAL error (Attempt ${attempt + 1}): ${msg}. Aborting.`);
|
|
207
|
+
throw commitErr;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
|
|
211
|
+
const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
|
|
212
|
+
|
|
213
|
+
if (isSizeError || isIndexError) {
|
|
214
|
+
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed write attempt ${attempt + 1}. Retrying with tighter constraints...`, { error: msg });
|
|
215
|
+
continue; // Try next strategy
|
|
216
|
+
} else {
|
|
217
|
+
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error (Attempt ${attempt + 1}). Retrying...`, { error: msg });
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (!committed) {
|
|
223
|
+
throw {
|
|
224
|
+
message: `Exhausted sharding strategies for ${name} on ${dateContext}. Last error: ${lastError?.message}`,
|
|
225
|
+
stack: lastError?.stack,
|
|
226
|
+
stage: 'SHARDING_LIMIT_EXCEEDED'
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return finalStats;
|
|
231
|
+
}
|
|
232
|
+
|
|
213
233
|
/**
|
|
214
234
|
* Deletes result documents from a previous category location.
|
|
215
235
|
*/
|
|
@@ -227,9 +247,8 @@ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config,
|
|
|
227
247
|
|
|
228
248
|
const shardsCol = oldDocRef.collection('_shards');
|
|
229
249
|
const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
let ops = 0;
|
|
250
|
+
const batch = db.batch();
|
|
251
|
+
let ops = 0;
|
|
233
252
|
|
|
234
253
|
for (const shardDoc of shardsSnap) { batch.delete(shardDoc); ops++; }
|
|
235
254
|
batch.delete(oldDocRef);
|
|
@@ -97,32 +97,20 @@ class HeuristicValidator {
|
|
|
97
97
|
// If we found numeric values, check if they are all the same
|
|
98
98
|
let isFlatline = false;
|
|
99
99
|
if (numericValues.length > 5) {
|
|
100
|
-
const first
|
|
100
|
+
const first = numericValues[0];
|
|
101
101
|
const identicalCount = numericValues.filter(v => Math.abs(v - first) < 0.000001).length;
|
|
102
|
-
const flatlinePct
|
|
102
|
+
const flatlinePct = (identicalCount / numericValues.length) * 100;
|
|
103
103
|
|
|
104
104
|
// Only flag flatline if the value isn't 0 (0 is handled by maxZeroPct)
|
|
105
|
-
if (flatlinePct > thresholds.maxFlatlinePct && Math.abs(first) > 0.0001) {
|
|
106
|
-
isFlatline = true;
|
|
107
|
-
}
|
|
105
|
+
if (flatlinePct > thresholds.maxFlatlinePct && Math.abs(first) > 0.0001) { isFlatline = true; }
|
|
108
106
|
}
|
|
109
107
|
|
|
110
108
|
// 6. Evaluations
|
|
111
|
-
if (nanPct
|
|
112
|
-
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if (zeroPct > thresholds.maxZeroPct) {
|
|
116
|
-
return { valid: false, reason: `Data Integrity: ${zeroPct.toFixed(1)}% of sampled results are 0. (Suspected Logic Failure)` };
|
|
117
|
-
}
|
|
109
|
+
if (nanPct > thresholds.maxNanPct) { return { valid: false, reason: `Mathematical Error: ${nanPct.toFixed(1)}% of sampled results contain NaN or Infinity.` }; }
|
|
110
|
+
if (zeroPct > thresholds.maxZeroPct) { return { valid: false, reason: `Data Integrity: ${zeroPct.toFixed(1)}% of sampled results are 0. (Suspected Logic Failure)` }; }
|
|
111
|
+
if (nullPct > thresholds.maxNullPct) { return { valid: false, reason: `Data Integrity: ${nullPct.toFixed(1)}% of sampled results are Empty/Null.` }; }
|
|
118
112
|
|
|
119
|
-
if (
|
|
120
|
-
return { valid: false, reason: `Data Integrity: ${nullPct.toFixed(1)}% of sampled results are Empty/Null.` };
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (isFlatline) {
|
|
124
|
-
return { valid: false, reason: `Anomaly: Detected Result Flatline. >${thresholds.maxFlatlinePct}% of outputs are identical (non-zero).` };
|
|
125
|
-
}
|
|
113
|
+
if (isFlatline) { return { valid: false, reason: `Anomaly: Detected Result Flatline. >${thresholds.maxFlatlinePct}% of outputs are identical (non-zero).` }; }
|
|
126
114
|
|
|
127
115
|
// Special check for Distribution/Profile calculations
|
|
128
116
|
if (calcName.includes('profile') || calcName.includes('distribution')) {
|
|
@@ -48,7 +48,7 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
48
48
|
|
|
49
49
|
// History Doc: Stores this specific run
|
|
50
50
|
// ID Format: targetDate_triggerTimestamp (Sortable by data date, then execution time)
|
|
51
|
-
const runId
|
|
51
|
+
const runId = `${targetDate}_${triggerTimestamp}`;
|
|
52
52
|
const runDocRef = computationDocRef.collection('history').doc(runId);
|
|
53
53
|
|
|
54
54
|
// 2. Prepare Metrics & Environment Info
|
|
@@ -56,16 +56,11 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
56
56
|
|
|
57
57
|
// Calculate size in MB
|
|
58
58
|
let sizeMB = 0;
|
|
59
|
-
if (detailedMetrics.storage && detailedMetrics.storage.sizeBytes) {
|
|
60
|
-
sizeMB = Number((detailedMetrics.storage.sizeBytes / (1024 * 1024)).toFixed(4));
|
|
61
|
-
}
|
|
59
|
+
if (detailedMetrics.storage && detailedMetrics.storage.sizeBytes) { sizeMB = Number((detailedMetrics.storage.sizeBytes / (1024 * 1024)).toFixed(4)); }
|
|
62
60
|
|
|
63
61
|
// Extract Validation Anomalies (Unusual Keys/Values)
|
|
64
62
|
const anomalies = detailedMetrics.validation?.anomalies || [];
|
|
65
|
-
if (error && error.message && error.message.includes('Data Integrity')) {
|
|
66
|
-
// If the error itself was a validation failure, add it to anomalies
|
|
67
|
-
anomalies.push(error.message);
|
|
68
|
-
}
|
|
63
|
+
if (error && error.message && error.message.includes('Data Integrity')) { anomalies.push(error.message); }
|
|
69
64
|
|
|
70
65
|
// 3. Construct the Run Log Entry
|
|
71
66
|
const runEntry = {
|
|
@@ -5,23 +5,17 @@
|
|
|
5
5
|
|
|
6
6
|
async function fetchComputationStatus(dateStr, config, { db }) {
|
|
7
7
|
if (!dateStr) throw new Error('fetchStatus requires a key');
|
|
8
|
-
|
|
9
8
|
const collection = config.computationStatusCollection || 'computation_status';
|
|
10
|
-
const docRef
|
|
11
|
-
|
|
12
|
-
const snap = await docRef.get();
|
|
9
|
+
const docRef = db.collection(collection).doc(dateStr);
|
|
10
|
+
const snap = await docRef.get();
|
|
13
11
|
if (!snap.exists) return {};
|
|
14
|
-
|
|
15
|
-
const rawData = snap.data();
|
|
12
|
+
const rawData = snap.data();
|
|
16
13
|
const normalized = {};
|
|
17
14
|
|
|
18
15
|
// Normalize V1 (String) to V2 (Object)
|
|
19
16
|
for (const [name, value] of Object.entries(rawData)) {
|
|
20
|
-
if (typeof value === 'string') {
|
|
21
|
-
|
|
22
|
-
} else {
|
|
23
|
-
normalized[name] = value; // V2 entry { hash, category }
|
|
24
|
-
}
|
|
17
|
+
if (typeof value === 'string') { normalized[name] = { hash: value, category: null }; // Legacy entry
|
|
18
|
+
} else { normalized[name] = value; }
|
|
25
19
|
}
|
|
26
20
|
|
|
27
21
|
return normalized;
|
|
@@ -34,18 +28,10 @@ async function updateComputationStatus(dateStr, updates, config, { db }) {
|
|
|
34
28
|
const collection = config.computationStatusCollection || 'computation_status';
|
|
35
29
|
const docRef = db.collection(collection).doc(dateStr);
|
|
36
30
|
|
|
37
|
-
// We expect updates to be an object: { "CalcName": { hash: "...", category: "..." } }
|
|
38
|
-
// But result committer might still pass strings if we don't update it.
|
|
39
|
-
// We will enforce the structure here just in case.
|
|
40
|
-
|
|
41
31
|
const safeUpdates = {};
|
|
42
32
|
for (const [key, val] of Object.entries(updates)) {
|
|
43
|
-
if (typeof val === 'string') {
|
|
44
|
-
|
|
45
|
-
safeUpdates[key] = { hash: val, category: 'unknown', lastUpdated: new Date() };
|
|
46
|
-
} else {
|
|
47
|
-
safeUpdates[key] = { ...val, lastUpdated: new Date() };
|
|
48
|
-
}
|
|
33
|
+
if (typeof val === 'string') { safeUpdates[key] = { hash: val, category: 'unknown', lastUpdated: new Date() };
|
|
34
|
+
} else { safeUpdates[key] = { ...val, lastUpdated: new Date() }; }
|
|
49
35
|
}
|
|
50
36
|
|
|
51
37
|
await docRef.set(safeUpdates, { merge: true });
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Build Reporter & Auto-Runner.
|
|
3
3
|
* Generates a "Pre-Flight" report of what the computation system WILL do.
|
|
4
|
-
* UPDATED:
|
|
4
|
+
* UPDATED: Fixed 'latest' document overwrite bug.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
const { analyzeDateExecution }
|
|
8
|
-
const { fetchComputationStatus }
|
|
7
|
+
const { analyzeDateExecution } = require('../WorkflowOrchestrator');
|
|
8
|
+
const { fetchComputationStatus } = require('../persistence/StatusRepository');
|
|
9
9
|
const { normalizeName, getExpectedDateStrings, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils');
|
|
10
|
-
const { checkRootDataAvailability }
|
|
11
|
-
const pLimit
|
|
12
|
-
const path
|
|
13
|
-
const packageJson
|
|
14
|
-
const packageVersion
|
|
10
|
+
const { checkRootDataAvailability } = require('../data/AvailabilityChecker');
|
|
11
|
+
const pLimit = require('p-limit');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
const packageJson = require(path.join(__dirname, '..', '..', '..', 'package.json'));
|
|
14
|
+
const packageVersion = packageJson.version;
|
|
15
15
|
|
|
16
16
|
/**
|
|
17
17
|
* AUTO-RUN ENTRY POINT
|
|
@@ -20,11 +20,13 @@ const packageVersion = packageJson.version;
|
|
|
20
20
|
async function ensureBuildReport(config, dependencies, manifest) {
|
|
21
21
|
const { db, logger } = dependencies;
|
|
22
22
|
const now = new Date();
|
|
23
|
+
// Create a standardized build ID
|
|
23
24
|
const buildId = `v${packageVersion}_${now.getFullYear()}-${String(now.getMonth()+1).padStart(2,'0')}-${String(now.getDate()).padStart(2,'0')}_${String(now.getHours()).padStart(2,'0')}-${String(now.getMinutes()).padStart(2,'0')}-${String(now.getSeconds()).padStart(2,'0')}`;
|
|
24
25
|
const latestRef = db.collection('computation_build_records').doc('latest');
|
|
25
26
|
|
|
26
27
|
try {
|
|
27
28
|
const latestDoc = await latestRef.get();
|
|
29
|
+
// Check using 'packageVersion' key to match what we store
|
|
28
30
|
const priorVersion = latestDoc.exists ? latestDoc.data().packageVersion : null;
|
|
29
31
|
|
|
30
32
|
if (priorVersion === packageVersion) {
|
|
@@ -33,13 +35,9 @@ async function ensureBuildReport(config, dependencies, manifest) {
|
|
|
33
35
|
}
|
|
34
36
|
|
|
35
37
|
logger.log('INFO', `[BuildReporter] 🚀 New Version Detected (${packageVersion}). Auto-running Pre-flight Report...`);
|
|
36
|
-
await generateBuildReport(config, dependencies, manifest, 90, buildId);
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
buildId,
|
|
41
|
-
generatedAt: now.toISOString()
|
|
42
|
-
});
|
|
39
|
+
// Run generation. This function handles writing the 'latest' document with FULL data.
|
|
40
|
+
await generateBuildReport(config, dependencies, manifest, 90, buildId);
|
|
43
41
|
|
|
44
42
|
} catch (e) {
|
|
45
43
|
logger.log('ERROR', `[BuildReporter] Auto-run check failed: ${e.message}`);
|
|
@@ -56,23 +54,23 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
56
54
|
logger.log('INFO', `[BuildReporter] Generating Build Report: ${buildId} (Scope: ${daysBack} days)...`);
|
|
57
55
|
|
|
58
56
|
// 1. Determine Date Range
|
|
59
|
-
const today
|
|
57
|
+
const today = new Date();
|
|
60
58
|
const startDate = new Date();
|
|
61
59
|
startDate.setDate(today.getDate() - daysBack);
|
|
62
60
|
|
|
63
61
|
const datesToCheck = getExpectedDateStrings(startDate, today);
|
|
64
|
-
const manifestMap
|
|
62
|
+
const manifestMap = new Map(manifest.map(c => [normalizeName(c.name), c]));
|
|
65
63
|
|
|
66
64
|
const reportData = {
|
|
67
65
|
buildId,
|
|
68
|
-
|
|
66
|
+
packageVersion: packageVersion,
|
|
69
67
|
generatedAt: new Date().toISOString(),
|
|
70
68
|
summary: {},
|
|
71
69
|
dates: {}
|
|
72
70
|
};
|
|
73
71
|
|
|
74
72
|
let totalReRuns = 0;
|
|
75
|
-
let totalNew
|
|
73
|
+
let totalNew = 0;
|
|
76
74
|
|
|
77
75
|
// 2. PARALLEL PROCESSING
|
|
78
76
|
const limit = pLimit(20);
|
|
@@ -105,21 +103,17 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
105
103
|
// If we fetched prevStatus, it's at index 2
|
|
106
104
|
const prevDailyStatus = (prevDateStr && results[2]) ? results[2] : (prevDateStr ? {} : null);
|
|
107
105
|
|
|
108
|
-
const rootDataStatus = availability ? availability.status : {
|
|
109
|
-
hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false
|
|
110
|
-
};
|
|
106
|
+
const rootDataStatus = availability ? availability.status : { hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false };
|
|
111
107
|
|
|
112
108
|
// D. Run Logic Analysis
|
|
113
109
|
const analysis = analyzeDateExecution(dateStr, manifest, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
|
|
114
110
|
|
|
115
111
|
// E. Format Findings
|
|
116
|
-
const dateSummary = {
|
|
117
|
-
willRun: [], willReRun: [], blocked: [], impossible: []
|
|
118
|
-
};
|
|
112
|
+
const dateSummary = { willRun: [], willReRun: [], blocked: [], impossible: [] };
|
|
119
113
|
|
|
120
|
-
analysis.runnable.forEach(item => dateSummary.willRun.push({ name: item.name, reason: "New / No Previous Record" }));
|
|
121
|
-
analysis.reRuns.forEach(item => dateSummary.willReRun.push({ name: item.name, reason: item.previousCategory ? "Migration" : "Hash Mismatch" }));
|
|
122
|
-
analysis.impossible.forEach(item => dateSummary.impossible.push({ name: item.name, reason: item.reason }));
|
|
114
|
+
analysis.runnable.forEach (item => dateSummary.willRun.push ({ name: item.name, reason: "New / No Previous Record" }));
|
|
115
|
+
analysis.reRuns.forEach (item => dateSummary.willReRun.push ({ name: item.name, reason: item.previousCategory ? "Migration" : "Hash Mismatch" }));
|
|
116
|
+
analysis.impossible.forEach (item => dateSummary.impossible.push ({ name: item.name, reason: item.reason }));
|
|
123
117
|
[...analysis.blocked, ...analysis.failedDependency].forEach(item => dateSummary.blocked.push({ name: item.name, reason: item.reason || 'Dependency' }));
|
|
124
118
|
|
|
125
119
|
const hasUpdates = dateSummary.willRun.length || dateSummary.willReRun.length || dateSummary.blocked.length || dateSummary.impossible.length;
|
|
@@ -155,10 +149,7 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
155
149
|
await reportRef.set(reportData);
|
|
156
150
|
|
|
157
151
|
// 5. Update 'latest' pointer
|
|
158
|
-
await db.collection('computation_build_records').doc('latest').set({
|
|
159
|
-
...reportData,
|
|
160
|
-
note: "Latest build report pointer."
|
|
161
|
-
});
|
|
152
|
+
await db.collection('computation_build_records').doc('latest').set({ ...reportData, note: "Latest build report pointer." });
|
|
162
153
|
|
|
163
154
|
logger.log('SUCCESS', `[BuildReporter] Report ${buildId} saved. Re-runs: ${totalReRuns}, New: ${totalNew}.`);
|
|
164
155
|
|
|
@@ -187,15 +187,10 @@ async function ensurePriceShardIndex(config, deps) {
|
|
|
187
187
|
const snap = await indexDocRef.get();
|
|
188
188
|
if (snap.exists) {
|
|
189
189
|
const data = snap.data();
|
|
190
|
-
|
|
191
|
-
// [FIX] Check TTL (24 hours)
|
|
192
190
|
const lastUpdated = data.lastUpdated ? new Date(data.lastUpdated).getTime() : 0;
|
|
193
191
|
const now = Date.now();
|
|
194
192
|
const oneDayMs = 24 * 60 * 60 * 1000;
|
|
195
|
-
|
|
196
|
-
if ((now - lastUpdated) < oneDayMs) {
|
|
197
|
-
return data.index || {};
|
|
198
|
-
}
|
|
193
|
+
if ((now - lastUpdated) < oneDayMs) { return data.index || {}; }
|
|
199
194
|
logger.log('INFO', '[ShardIndex] Index is stale (>24h). Rebuilding...');
|
|
200
195
|
} else {
|
|
201
196
|
logger.log('INFO', '[ShardIndex] Index not found. Building new Price Shard Index...');
|
|
@@ -17,14 +17,9 @@ function validateSchema(schema) {
|
|
|
17
17
|
|
|
18
18
|
// 2. Ensure it's not too large (Firestore limit: 1MB, reserve 100KB for metadata)
|
|
19
19
|
const size = Buffer.byteLength(jsonStr);
|
|
20
|
-
if (size > 900 * 1024) {
|
|
21
|
-
return { valid: false, reason: `Schema exceeds 900KB limit (${(size/1024).toFixed(2)} KB)` };
|
|
22
|
-
}
|
|
23
|
-
|
|
20
|
+
if (size > 900 * 1024) { return { valid: false, reason: `Schema exceeds 900KB limit (${(size/1024).toFixed(2)} KB)` }; }
|
|
24
21
|
return { valid: true };
|
|
25
|
-
} catch (e) {
|
|
26
|
-
return { valid: false, reason: `Serialization failed: ${e.message}` };
|
|
27
|
-
}
|
|
22
|
+
} catch (e) { return { valid: false, reason: `Serialization failed: ${e.message}` }; }
|
|
28
23
|
}
|
|
29
24
|
|
|
30
25
|
/**
|
|
@@ -65,10 +60,10 @@ async function batchStoreSchemas(dependencies, config, schemas) {
|
|
|
65
60
|
// Critical: Always overwrite 'lastUpdated' to now
|
|
66
61
|
batch.set(docRef, {
|
|
67
62
|
computationName: item.name,
|
|
68
|
-
category:
|
|
69
|
-
schema:
|
|
70
|
-
metadata:
|
|
71
|
-
lastUpdated:
|
|
63
|
+
category: item.category,
|
|
64
|
+
schema: item.schema,
|
|
65
|
+
metadata: item.metadata || {},
|
|
66
|
+
lastUpdated: new Date()
|
|
72
67
|
}, { merge: true });
|
|
73
68
|
|
|
74
69
|
validCount++;
|
|
@@ -159,13 +159,13 @@ async function getFirstDateFromCollection(config, deps, collectionName) {
|
|
|
159
159
|
async function getEarliestDataDates(config, deps) {
|
|
160
160
|
const { logger } = deps;
|
|
161
161
|
const [ investorDate, speculatorDate, investorHistoryDate, speculatorHistoryDate, insightsDate, socialDate, priceDate ] = await Promise.all([
|
|
162
|
-
getFirstDateFromCollection(config, deps, config.normalUserPortfolioCollection),
|
|
163
|
-
getFirstDateFromCollection(config, deps, config.speculatorPortfolioCollection),
|
|
164
|
-
getFirstDateFromCollection(config, deps, config.normalUserHistoryCollection),
|
|
165
|
-
getFirstDateFromCollection(config, deps, config.speculatorHistoryCollection),
|
|
166
|
-
getFirstDateFromSimpleCollection(config, deps, config.insightsCollectionName),
|
|
167
|
-
getFirstDateFromSimpleCollection(config, deps, config.socialInsightsCollectionName),
|
|
168
|
-
getFirstDateFromPriceCollection(config, deps)
|
|
162
|
+
getFirstDateFromCollection (config, deps, config.normalUserPortfolioCollection),
|
|
163
|
+
getFirstDateFromCollection (config, deps, config.speculatorPortfolioCollection),
|
|
164
|
+
getFirstDateFromCollection (config, deps, config.normalUserHistoryCollection),
|
|
165
|
+
getFirstDateFromCollection (config, deps, config.speculatorHistoryCollection),
|
|
166
|
+
getFirstDateFromSimpleCollection (config, deps, config.insightsCollectionName),
|
|
167
|
+
getFirstDateFromSimpleCollection (config, deps, config.socialInsightsCollectionName),
|
|
168
|
+
getFirstDateFromPriceCollection (config, deps)
|
|
169
169
|
]);
|
|
170
170
|
|
|
171
171
|
const getMinDate = (...dates) => {
|
|
@@ -221,5 +221,5 @@ module.exports = {
|
|
|
221
221
|
getEarliestDataDates,
|
|
222
222
|
generateCodeHash,
|
|
223
223
|
withRetry,
|
|
224
|
-
DEFINITIVE_EARLIEST_DATES
|
|
224
|
+
DEFINITIVE_EARLIEST_DATES
|
|
225
225
|
};
|