bulltrackers-module 1.0.279 → 1.0.281
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/MetaExecutor.js +6 -8
- package/functions/computation-system/executors/StandardExecutor.js +47 -35
- package/functions/computation-system/persistence/ResultCommitter.js +67 -17
- package/functions/computation-system/persistence/RunRecorder.js +17 -3
- package/functions/computation-system/tools/BuildReporter.js +51 -2
- package/functions/computation-system/utils/utils.js +13 -2
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @fileoverview Executor for "Meta" (global) calculations.
|
|
3
3
|
* UPDATED: Uses CachedDataLoader for all data access.
|
|
4
4
|
* UPDATED: Tracks processed shard/item counts.
|
|
5
|
-
* UPDATED:
|
|
5
|
+
* UPDATED: Sends 'isInitialWrite: true' for robust cleanup.
|
|
6
6
|
*/
|
|
7
7
|
const { normalizeName } = require('../utils/utils');
|
|
8
8
|
const { CachedDataLoader } = require('../data/CachedDataLoader');
|
|
@@ -27,7 +27,9 @@ class MetaExecutor {
|
|
|
27
27
|
deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`);
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
|
|
30
|
+
|
|
31
|
+
// [UPDATED] Meta Calcs run once per day, so isInitialWrite is always true
|
|
32
|
+
return await commitResults(state, dStr, passName, config, deps, skipStatusWrite, { isInitialWrite: true });
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
static async executeOncePerDay(calcInstance, metadata, dateStr, computedDeps, prevDeps, config, deps, loader) {
|
|
@@ -35,7 +37,6 @@ class MetaExecutor {
|
|
|
35
37
|
const { logger } = deps;
|
|
36
38
|
const stats = { processedShards: 0, processedItems: 0 };
|
|
37
39
|
|
|
38
|
-
// Lazy fetch insights/social using the loader
|
|
39
40
|
const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
|
|
40
41
|
const social = metadata.rootDataDependencies?.includes('social') ? { today: await loader.loadSocial(dateStr) } : null;
|
|
41
42
|
|
|
@@ -59,12 +60,9 @@ class MetaExecutor {
|
|
|
59
60
|
|
|
60
61
|
stats.processedShards++;
|
|
61
62
|
stats.processedItems += Object.keys(shardData).length;
|
|
62
|
-
|
|
63
|
-
// Removed global.gc()
|
|
64
63
|
}
|
|
65
64
|
logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
|
|
66
65
|
|
|
67
|
-
// Attach stats
|
|
68
66
|
calcInstance._executionStats = stats;
|
|
69
67
|
return calcInstance.getResult ? await calcInstance.getResult() : {};
|
|
70
68
|
} else {
|
|
@@ -75,7 +73,7 @@ class MetaExecutor {
|
|
|
75
73
|
});
|
|
76
74
|
const res = await calcInstance.process(context);
|
|
77
75
|
|
|
78
|
-
stats.processedItems = 1;
|
|
76
|
+
stats.processedItems = 1;
|
|
79
77
|
calcInstance._executionStats = stats;
|
|
80
78
|
|
|
81
79
|
return res;
|
|
@@ -83,4 +81,4 @@ class MetaExecutor {
|
|
|
83
81
|
}
|
|
84
82
|
}
|
|
85
83
|
|
|
86
|
-
module.exports = { MetaExecutor };
|
|
84
|
+
module.exports = { MetaExecutor };
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
|
|
4
4
|
* UPDATED: Removes manual global.gc() calls.
|
|
5
5
|
* UPDATED: Manages incremental sharding states.
|
|
6
|
+
* UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
|
|
6
7
|
*/
|
|
7
8
|
const { normalizeName } = require('../utils/utils');
|
|
8
9
|
const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
|
|
@@ -10,6 +11,7 @@ const { CachedDataLoader } = require
|
|
|
10
11
|
const { ContextFactory } = require('../context/ContextFactory');
|
|
11
12
|
const { commitResults } = require('../persistence/ResultCommitter');
|
|
12
13
|
const mathLayer = require('../layers/index');
|
|
14
|
+
const { performance } = require('perf_hooks');
|
|
13
15
|
|
|
14
16
|
class StandardExecutor {
|
|
15
17
|
static async run(date, calcs, passName, config, deps, rootData, fetchedDeps, previousFetchedDeps, skipStatusWrite = false) {
|
|
@@ -40,7 +42,6 @@ class StandardExecutor {
|
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
// 3. Stream, Process & Batch Flush
|
|
43
|
-
// The return value contains the aggregated success/failure reports from all flushes
|
|
44
45
|
return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
|
|
45
46
|
}
|
|
46
47
|
|
|
@@ -53,20 +54,30 @@ class StandardExecutor {
|
|
|
53
54
|
|
|
54
55
|
logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
|
|
55
56
|
|
|
56
|
-
// Metrics & State Tracking
|
|
57
57
|
const executionStats = {};
|
|
58
|
-
const shardIndexMap = {};
|
|
58
|
+
const shardIndexMap = {};
|
|
59
59
|
const aggregatedSuccess = {};
|
|
60
60
|
const aggregatedFailures = [];
|
|
61
61
|
|
|
62
62
|
Object.keys(state).forEach(name => {
|
|
63
|
-
executionStats[name] = {
|
|
63
|
+
executionStats[name] = {
|
|
64
|
+
processedUsers: 0,
|
|
65
|
+
skippedUsers: 0,
|
|
66
|
+
timings: { setup: 0, stream: 0, processing: 0 }
|
|
67
|
+
};
|
|
64
68
|
shardIndexMap[name] = 0;
|
|
65
69
|
});
|
|
66
70
|
|
|
71
|
+
// Track if we have performed a flush yet (for cleanup logic)
|
|
72
|
+
let hasFlushed = false;
|
|
73
|
+
|
|
74
|
+
const startSetup = performance.now();
|
|
67
75
|
const cachedLoader = new CachedDataLoader(config, deps);
|
|
68
76
|
await cachedLoader.loadMappings();
|
|
77
|
+
const setupDuration = performance.now() - startSetup;
|
|
69
78
|
|
|
79
|
+
Object.keys(executionStats).forEach(name => executionStats[name].timings.setup += setupDuration);
|
|
80
|
+
|
|
70
81
|
const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
71
82
|
const prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
72
83
|
|
|
@@ -78,18 +89,20 @@ class StandardExecutor {
|
|
|
78
89
|
|
|
79
90
|
let yP_chunk = {}, tH_chunk = {};
|
|
80
91
|
|
|
81
|
-
|
|
82
|
-
const BATCH_SIZE = 5000; // Flush every 5000 users
|
|
92
|
+
const BATCH_SIZE = 5000;
|
|
83
93
|
let usersSinceLastFlush = 0;
|
|
84
94
|
|
|
85
95
|
try {
|
|
86
96
|
for await (const tP_chunk of tP_iter) {
|
|
97
|
+
const startStream = performance.now();
|
|
87
98
|
if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
|
|
88
99
|
if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
|
|
89
|
-
|
|
100
|
+
const streamDuration = performance.now() - startStream;
|
|
101
|
+
Object.keys(executionStats).forEach(name => executionStats[name].timings.stream += streamDuration);
|
|
102
|
+
|
|
90
103
|
const chunkSize = Object.keys(tP_chunk).length;
|
|
91
104
|
|
|
92
|
-
|
|
105
|
+
const startProcessing = performance.now();
|
|
93
106
|
const promises = streamingCalcs.map(calc =>
|
|
94
107
|
StandardExecutor.executePerUser(
|
|
95
108
|
calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk,
|
|
@@ -98,18 +111,20 @@ class StandardExecutor {
|
|
|
98
111
|
)
|
|
99
112
|
);
|
|
100
113
|
await Promise.all(promises);
|
|
114
|
+
const procDuration = performance.now() - startProcessing;
|
|
115
|
+
|
|
116
|
+
Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
|
|
101
117
|
|
|
102
118
|
usersSinceLastFlush += chunkSize;
|
|
103
119
|
|
|
104
|
-
// --- BATCH FLUSH CHECK ---
|
|
105
120
|
if (usersSinceLastFlush >= BATCH_SIZE) {
|
|
106
121
|
logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
|
|
107
122
|
|
|
108
|
-
|
|
123
|
+
// [UPDATED] Pass isInitialWrite: true only on the first flush
|
|
124
|
+
const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
|
|
109
125
|
|
|
110
|
-
|
|
126
|
+
hasFlushed = true;
|
|
111
127
|
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
|
|
112
|
-
|
|
113
128
|
usersSinceLastFlush = 0;
|
|
114
129
|
}
|
|
115
130
|
}
|
|
@@ -118,34 +133,27 @@ class StandardExecutor {
|
|
|
118
133
|
if (tH_iter && tH_iter.return) await tH_iter.return();
|
|
119
134
|
}
|
|
120
135
|
|
|
121
|
-
// --- FINAL FLUSH ---
|
|
122
136
|
logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
|
|
123
|
-
|
|
137
|
+
// [UPDATED] If we never flushed in the loop, this is the initial write
|
|
138
|
+
const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite, !hasFlushed);
|
|
124
139
|
|
|
125
140
|
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
|
|
126
141
|
|
|
127
142
|
return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
|
|
128
143
|
}
|
|
129
144
|
|
|
130
|
-
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
|
|
145
|
+
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite, isInitialWrite = false) {
|
|
131
146
|
const transformedState = {};
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
// 1. Prepare and Clear Instances
|
|
147
|
+
|
|
135
148
|
for (const [name, inst] of Object.entries(state)) {
|
|
136
|
-
// Get data from the standard storage location
|
|
137
149
|
const rawResult = inst.results || {};
|
|
138
150
|
|
|
139
|
-
// Handle Multi-Date Fan-Out (Transposition)
|
|
140
|
-
// Logic: Checks if result is { userId: { date: data } }
|
|
141
151
|
const firstUser = Object.keys(rawResult)[0];
|
|
142
152
|
let dataToCommit = rawResult;
|
|
143
|
-
let isMultiDate = false;
|
|
144
153
|
|
|
145
154
|
if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
|
|
146
155
|
const innerKeys = Object.keys(rawResult[firstUser]);
|
|
147
156
|
if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
|
|
148
|
-
isMultiDate = true;
|
|
149
157
|
const transposed = {};
|
|
150
158
|
for (const [userId, dateMap] of Object.entries(rawResult)) {
|
|
151
159
|
for (const [dateKey, dailyData] of Object.entries(dateMap)) {
|
|
@@ -157,24 +165,22 @@ class StandardExecutor {
|
|
|
157
165
|
}
|
|
158
166
|
}
|
|
159
167
|
|
|
160
|
-
// Create a mock instance for the committer that returns just this batch
|
|
161
168
|
transformedState[name] = {
|
|
162
169
|
manifest: inst.manifest,
|
|
163
170
|
getResult: async () => dataToCommit,
|
|
164
|
-
_executionStats: executionStats[name]
|
|
171
|
+
_executionStats: executionStats[name]
|
|
165
172
|
};
|
|
166
173
|
|
|
167
|
-
// ⚠️ CRITICAL: CLEAR MEMORY
|
|
168
174
|
inst.results = {};
|
|
169
175
|
}
|
|
170
176
|
|
|
171
|
-
//
|
|
177
|
+
// [UPDATED] Pass isInitialWrite to ResultCommitter
|
|
172
178
|
const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
|
|
173
|
-
flushMode: mode,
|
|
174
|
-
shardIndexes: shardIndexMap
|
|
179
|
+
flushMode: mode,
|
|
180
|
+
shardIndexes: shardIndexMap,
|
|
181
|
+
isInitialWrite: isInitialWrite
|
|
175
182
|
});
|
|
176
183
|
|
|
177
|
-
// 3. Update Shard Indexes from result
|
|
178
184
|
if (result.shardIndexes) {
|
|
179
185
|
Object.assign(shardIndexMap, result.shardIndexes);
|
|
180
186
|
}
|
|
@@ -185,23 +191,29 @@ class StandardExecutor {
|
|
|
185
191
|
static mergeReports(successAcc, failureAcc, newResult) {
|
|
186
192
|
if (!newResult) return;
|
|
187
193
|
|
|
188
|
-
// Merge Success Updates (Sums metrics)
|
|
189
194
|
for (const [name, update] of Object.entries(newResult.successUpdates)) {
|
|
190
195
|
if (!successAcc[name]) {
|
|
191
196
|
successAcc[name] = update;
|
|
192
197
|
} else {
|
|
193
|
-
// Sum storage metrics
|
|
194
198
|
if (update.metrics?.storage) {
|
|
195
199
|
successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
|
|
196
200
|
successAcc[name].metrics.storage.keys += (update.metrics.storage.keys || 0);
|
|
197
201
|
successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
|
|
198
202
|
}
|
|
199
|
-
|
|
203
|
+
|
|
204
|
+
if (update.metrics?.execution?.timings) {
|
|
205
|
+
if (!successAcc[name].metrics.execution) successAcc[name].metrics.execution = { timings: { setup:0, stream:0, processing:0 }};
|
|
206
|
+
const tDest = successAcc[name].metrics.execution.timings;
|
|
207
|
+
const tSrc = update.metrics.execution.timings;
|
|
208
|
+
|
|
209
|
+
tDest.setup += (tSrc.setup || 0);
|
|
210
|
+
tDest.stream += (tSrc.stream || 0);
|
|
211
|
+
tDest.processing += (tSrc.processing || 0);
|
|
212
|
+
}
|
|
200
213
|
successAcc[name].hash = update.hash;
|
|
201
214
|
}
|
|
202
215
|
}
|
|
203
216
|
|
|
204
|
-
// Merge Failures
|
|
205
217
|
if (newResult.failureReport) {
|
|
206
218
|
failureAcc.push(...newResult.failureReport);
|
|
207
219
|
}
|
|
@@ -243,4 +255,4 @@ class StandardExecutor {
|
|
|
243
255
|
}
|
|
244
256
|
}
|
|
245
257
|
|
|
246
|
-
module.exports = { StandardExecutor };
|
|
258
|
+
module.exports = { StandardExecutor };
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
3
|
* UPDATED: Implements GZIP Compression for efficient storage.
|
|
4
4
|
* UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
|
|
5
|
-
* UPDATED: Auto-enforces Weekend Mode validation
|
|
5
|
+
* UPDATED: Auto-enforces Weekend Mode validation.
|
|
6
|
+
* UPDATED: Implements "Initial Write" logic to wipe stale data/shards on a fresh run.
|
|
6
7
|
*/
|
|
7
8
|
const { commitBatchInChunks, generateDataHash } = require('../utils/utils');
|
|
8
9
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
@@ -30,6 +31,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
30
31
|
|
|
31
32
|
// Options defaults
|
|
32
33
|
const flushMode = options.flushMode || 'STANDARD';
|
|
34
|
+
const isInitialWrite = options.isInitialWrite === true; // [NEW] Flag for clean run
|
|
33
35
|
const shardIndexes = options.shardIndexes || {};
|
|
34
36
|
const nextShardIndexes = {};
|
|
35
37
|
|
|
@@ -50,29 +52,22 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
50
52
|
const result = await calc.getResult();
|
|
51
53
|
const configOverrides = validationOverrides[calc.manifest.name] || {};
|
|
52
54
|
|
|
53
|
-
// --- [NEW] AUTO-ENFORCE WEEKEND MODE FOR PRICE-ONLY CALCS ---
|
|
54
|
-
// If a calculation depends SOLELY on 'price', we assume market closures
|
|
55
|
-
// will cause 0s/Flatlines on weekends, so we enforce lenient validation.
|
|
56
55
|
const dataDeps = calc.manifest.rootDataDependencies || [];
|
|
57
56
|
const isPriceOnly = (dataDeps.length === 1 && dataDeps[0] === 'price');
|
|
58
57
|
|
|
59
58
|
let effectiveOverrides = { ...configOverrides };
|
|
60
59
|
|
|
61
60
|
if (isPriceOnly && !effectiveOverrides.weekend) {
|
|
62
|
-
// Apply strict leniency for weekend/holiday price actions
|
|
63
61
|
effectiveOverrides.weekend = {
|
|
64
62
|
maxZeroPct: 100,
|
|
65
63
|
maxFlatlinePct: 100,
|
|
66
64
|
maxNullPct: 100
|
|
67
65
|
};
|
|
68
66
|
}
|
|
69
|
-
// -----------------------------------------------------------
|
|
70
67
|
|
|
71
68
|
// Validation
|
|
72
69
|
if (result && Object.keys(result).length > 0) {
|
|
73
|
-
// [FIX] Added 'dStr' as 3rd argument to match HeuristicValidator signature
|
|
74
70
|
const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, dStr, effectiveOverrides);
|
|
75
|
-
|
|
76
71
|
if (!healthCheck.valid) {
|
|
77
72
|
runMetrics.validation.isValid = false;
|
|
78
73
|
runMetrics.validation.anomalies.push(healthCheck.reason);
|
|
@@ -83,11 +78,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
83
78
|
}
|
|
84
79
|
|
|
85
80
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
86
|
-
|
|
87
|
-
// Calculate Result Hash (Content-Based)
|
|
88
81
|
const resultHash = isEmpty ? 'empty' : generateDataHash(result);
|
|
89
82
|
|
|
90
|
-
// Handle Empty Results
|
|
91
83
|
if (isEmpty) {
|
|
92
84
|
if (flushMode === 'INTERMEDIATE') {
|
|
93
85
|
nextShardIndexes[name] = currentShardIndex;
|
|
@@ -123,7 +115,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
123
115
|
.collection(config.computationsSubcollection)
|
|
124
116
|
.doc(name);
|
|
125
117
|
|
|
126
|
-
|
|
118
|
+
// Note: Multi-date fan-out rarely hits sharding, and tracking isInitialWrite per-date is complex.
|
|
119
|
+
// We assume standard merging here.
|
|
120
|
+
await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false);
|
|
127
121
|
}));
|
|
128
122
|
await Promise.all(datePromises);
|
|
129
123
|
|
|
@@ -146,7 +140,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
146
140
|
.collection(config.computationsSubcollection)
|
|
147
141
|
.doc(name);
|
|
148
142
|
|
|
149
|
-
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode);
|
|
143
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode, isInitialWrite);
|
|
150
144
|
|
|
151
145
|
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
152
146
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
@@ -192,8 +186,32 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
192
186
|
return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
|
|
193
187
|
}
|
|
194
188
|
|
|
195
|
-
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
|
|
189
|
+
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false) {
|
|
196
190
|
|
|
191
|
+
// [NEW] Transition & Cleanup Logic
|
|
192
|
+
// If this is the initial write of a run, we verify the existing state to prevent "Ghost Data".
|
|
193
|
+
let wasSharded = false;
|
|
194
|
+
let hadRootData = false;
|
|
195
|
+
let shouldWipeShards = false;
|
|
196
|
+
|
|
197
|
+
// Default: Merge updates. But if Initial Write, overwrite (merge: false) to clear stale fields.
|
|
198
|
+
let rootMergeOption = !isInitialWrite;
|
|
199
|
+
|
|
200
|
+
if (isInitialWrite) {
|
|
201
|
+
try {
|
|
202
|
+
const currentSnap = await docRef.get();
|
|
203
|
+
if (currentSnap.exists) {
|
|
204
|
+
const d = currentSnap.data();
|
|
205
|
+
wasSharded = (d._sharded === true);
|
|
206
|
+
// If it was sharded, we MUST wipe the old shards because we are re-writing from scratch.
|
|
207
|
+
// Even if we write new shards, we want to ensure shard_10 doesn't persist if we only write up to shard_5.
|
|
208
|
+
if (wasSharded) shouldWipeShards = true;
|
|
209
|
+
|
|
210
|
+
// If it wasn't sharded, it had root data. overwriting (merge: false) handles that automatically.
|
|
211
|
+
}
|
|
212
|
+
} catch (e) { /* ignore read error */ }
|
|
213
|
+
}
|
|
214
|
+
|
|
197
215
|
// --- COMPRESSION STRATEGY ---
|
|
198
216
|
try {
|
|
199
217
|
const jsonString = JSON.stringify(result);
|
|
@@ -212,7 +230,22 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
212
230
|
payload: compressedBuffer
|
|
213
231
|
};
|
|
214
232
|
|
|
215
|
-
|
|
233
|
+
// Cleanup: If it was sharded, or if we are wiping shards on initial write
|
|
234
|
+
if (shouldWipeShards) {
|
|
235
|
+
logger.log('INFO', `[Cleanup] ${name}: Wiping old shards before Compressed Write.`);
|
|
236
|
+
const updates = [];
|
|
237
|
+
const shardCol = docRef.collection('_shards');
|
|
238
|
+
const shardDocs = await shardCol.listDocuments();
|
|
239
|
+
shardDocs.forEach(d => updates.push({ type: 'DELETE', ref: d }));
|
|
240
|
+
|
|
241
|
+
// Root update with merge: false (overwrites everything)
|
|
242
|
+
updates.push({ ref: docRef, data: compressedPayload, options: { merge: false } });
|
|
243
|
+
|
|
244
|
+
await commitBatchInChunks(config, deps, updates, `${name}::Cleanup+Compress`);
|
|
245
|
+
} else {
|
|
246
|
+
// Standard update (respecting calculated rootMergeOption)
|
|
247
|
+
await docRef.set(compressedPayload, { merge: rootMergeOption });
|
|
248
|
+
}
|
|
216
249
|
|
|
217
250
|
return {
|
|
218
251
|
totalSize: compressedBuffer.length,
|
|
@@ -242,12 +275,29 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
242
275
|
const constraints = strategies[attempt];
|
|
243
276
|
try {
|
|
244
277
|
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
|
|
245
|
-
const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
|
|
246
278
|
|
|
279
|
+
// [NEW] Inject Cleanup Ops
|
|
280
|
+
if (shouldWipeShards) {
|
|
281
|
+
logger.log('INFO', `[Cleanup] ${name}: Wiping old shards before Write (Initial).`);
|
|
282
|
+
const shardCol = docRef.collection('_shards');
|
|
283
|
+
const shardDocs = await shardCol.listDocuments();
|
|
284
|
+
// Prepend DELETEs
|
|
285
|
+
shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
|
|
286
|
+
shouldWipeShards = false; // Done for this loop
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Ensure the root document write respects our merge option
|
|
290
|
+
const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
|
|
291
|
+
if (rootUpdate) {
|
|
292
|
+
rootUpdate.options = { merge: rootMergeOption };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const pointer = updates.find(u => u.data && (u.data._completed !== undefined || u.data._sharded !== undefined));
|
|
247
296
|
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
248
297
|
|
|
249
298
|
let maxIndex = startShardIndex;
|
|
250
299
|
updates.forEach(u => {
|
|
300
|
+
if (u.type === 'DELETE') return;
|
|
251
301
|
const segs = u.ref.path.split('/');
|
|
252
302
|
const last = segs[segs.length - 1];
|
|
253
303
|
if (last.startsWith('shard_')) {
|
|
@@ -360,4 +410,4 @@ function calculateFirestoreBytes(value) {
|
|
|
360
410
|
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
|
|
361
411
|
}
|
|
362
412
|
|
|
363
|
-
module.exports = { commitResults };
|
|
413
|
+
module.exports = { commitResults };
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Utility for recording computation run attempts (The Audit Logger).
|
|
3
3
|
* UPDATED: Stores 'trigger' reason and 'execution' stats.
|
|
4
|
+
* UPDATED (IDEA 2): Stores granular timing profiles.
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
const { FieldValue } = require('../utils/utils');
|
|
@@ -37,6 +38,10 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
37
38
|
const anomalies = detailedMetrics.validation?.anomalies || [];
|
|
38
39
|
if (error && error.message && error.message.includes('Data Integrity')) { anomalies.push(error.message); }
|
|
39
40
|
|
|
41
|
+
// [IDEA 2] Prepare Execution Stats & Timings
|
|
42
|
+
const rawExecStats = detailedMetrics.execution || {};
|
|
43
|
+
const timings = rawExecStats.timings || {};
|
|
44
|
+
|
|
40
45
|
const runEntry = {
|
|
41
46
|
runId: runId,
|
|
42
47
|
computationName: computation,
|
|
@@ -53,8 +58,17 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
53
58
|
type: (triggerReason && triggerReason.includes('Layer')) ? 'CASCADE' : ((triggerReason && triggerReason.includes('New')) ? 'INIT' : 'UPDATE')
|
|
54
59
|
},
|
|
55
60
|
|
|
56
|
-
// [
|
|
57
|
-
executionStats:
|
|
61
|
+
// [IDEA 2] Enhanced Execution Stats
|
|
62
|
+
executionStats: {
|
|
63
|
+
processedUsers: rawExecStats.processedUsers || 0,
|
|
64
|
+
skippedUsers: rawExecStats.skippedUsers || 0,
|
|
65
|
+
// Explicitly break out timings for BigQuery/Analysis
|
|
66
|
+
timings: {
|
|
67
|
+
setupMs: Math.round(timings.setup || 0),
|
|
68
|
+
streamMs: Math.round(timings.stream || 0),
|
|
69
|
+
processingMs: Math.round(timings.processing || 0)
|
|
70
|
+
}
|
|
71
|
+
},
|
|
58
72
|
|
|
59
73
|
outputStats: {
|
|
60
74
|
sizeMB: sizeMB,
|
|
@@ -64,7 +78,7 @@ async function recordRunAttempt(db, context, status, error = null, detailedMetri
|
|
|
64
78
|
},
|
|
65
79
|
|
|
66
80
|
anomalies: anomalies,
|
|
67
|
-
_schemaVersion: '2.
|
|
81
|
+
_schemaVersion: '2.2' // Bumped for profiler
|
|
68
82
|
};
|
|
69
83
|
|
|
70
84
|
if (error) {
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* REFACTORED: Strict 5-category reporting with date-based exclusion logic.
|
|
5
5
|
* UPDATED: Replaced Batch Writes with Parallel Writes to prevent DEADLINE_EXCEEDED timeouts.
|
|
6
6
|
* FIXED: Ensures 'latest' pointer updates even if detail writes fail.
|
|
7
|
+
* UPDATED (IDEA 1): Added Dependency Impact Analysis ("Blast Radius").
|
|
7
8
|
*/
|
|
8
9
|
|
|
9
10
|
const { analyzeDateExecution } = require('../WorkflowOrchestrator');
|
|
@@ -41,6 +42,34 @@ function isDateBeforeAvailability(dateStr, calcManifest) {
|
|
|
41
42
|
return false;
|
|
42
43
|
}
|
|
43
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Helper: Calculates the transitive closure of dependents (Blast Radius).
|
|
47
|
+
* Returns the count of direct and total cascading dependents.
|
|
48
|
+
*/
|
|
49
|
+
function calculateBlastRadius(targetCalcName, reverseGraph) {
|
|
50
|
+
const impactSet = new Set();
|
|
51
|
+
const queue = [targetCalcName];
|
|
52
|
+
|
|
53
|
+
// BFS Traversal
|
|
54
|
+
while(queue.length > 0) {
|
|
55
|
+
const current = queue.shift();
|
|
56
|
+
const dependents = reverseGraph.get(current) || [];
|
|
57
|
+
|
|
58
|
+
dependents.forEach(child => {
|
|
59
|
+
if (!impactSet.has(child)) {
|
|
60
|
+
impactSet.add(child);
|
|
61
|
+
queue.push(child);
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
directDependents: (reverseGraph.get(targetCalcName) || []).length,
|
|
68
|
+
totalCascadingDependents: impactSet.size,
|
|
69
|
+
affectedCalculations: Array.from(impactSet).slice(0, 50) // Cap list size for storage safety
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
44
73
|
/**
|
|
45
74
|
* AUTO-RUN ENTRY POINT
|
|
46
75
|
* Uses transactional locking to prevent race conditions.
|
|
@@ -100,6 +129,19 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
100
129
|
const datesToCheck = getExpectedDateStrings(startDate, today);
|
|
101
130
|
const manifestMap = new Map(manifest.map(c => [normalizeName(c.name), c]));
|
|
102
131
|
|
|
132
|
+
// [IDEA 1] Build Reverse Dependency Graph (Parent -> Children)
|
|
133
|
+
const reverseGraph = new Map();
|
|
134
|
+
manifest.forEach(c => {
|
|
135
|
+
const parentName = normalizeName(c.name);
|
|
136
|
+
if (c.dependencies) {
|
|
137
|
+
c.dependencies.forEach(dep => {
|
|
138
|
+
const depName = normalizeName(dep);
|
|
139
|
+
if (!reverseGraph.has(depName)) reverseGraph.set(depName, []);
|
|
140
|
+
reverseGraph.get(depName).push(parentName);
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
|
|
103
145
|
// Main Report Header
|
|
104
146
|
const reportHeader = {
|
|
105
147
|
buildId,
|
|
@@ -169,11 +211,18 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
169
211
|
return; // EXCLUDED: Date is before data exists
|
|
170
212
|
}
|
|
171
213
|
|
|
172
|
-
|
|
214
|
+
const entry = {
|
|
173
215
|
name: item.name,
|
|
174
216
|
reason: item.reason || extraReason,
|
|
175
217
|
pass: calcManifest ? calcManifest.pass : '?'
|
|
176
|
-
}
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
// [IDEA 1] If this is a Re-Run, calculate Blast Radius
|
|
221
|
+
if (targetArray === dateSummary.rerun) {
|
|
222
|
+
entry.impact = calculateBlastRadius(item.name, reverseGraph);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
targetArray.push(entry);
|
|
177
226
|
};
|
|
178
227
|
|
|
179
228
|
// 1. RUN (New)
|
|
@@ -78,7 +78,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
/** Stage 2: Commit a batch of writes in chunks
|
|
81
|
-
*
|
|
81
|
+
* UPDATED: Now supports { type: 'DELETE' } in the write object.
|
|
82
82
|
*/
|
|
83
83
|
async function commitBatchInChunks(config, deps, writes, operationName) {
|
|
84
84
|
const { db, logger } = deps;
|
|
@@ -118,6 +118,17 @@ async function commitBatchInChunks(config, deps, writes, operationName) {
|
|
|
118
118
|
};
|
|
119
119
|
|
|
120
120
|
for (const write of writes) {
|
|
121
|
+
// [NEW] Handle DELETE operations
|
|
122
|
+
if (write.type === 'DELETE') {
|
|
123
|
+
if ((currentOpsCount + 1 > MAX_BATCH_OPS)) {
|
|
124
|
+
await commitAndReset();
|
|
125
|
+
}
|
|
126
|
+
currentBatch.delete(write.ref);
|
|
127
|
+
currentOpsCount++;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Standard SET/UPDATE operations
|
|
121
132
|
let docSize = 100;
|
|
122
133
|
try { if (write.data) docSize = JSON.stringify(write.data).length; } catch (e) { }
|
|
123
134
|
|
|
@@ -251,4 +262,4 @@ module.exports = {
|
|
|
251
262
|
generateDataHash, // Exported
|
|
252
263
|
withRetry,
|
|
253
264
|
DEFINITIVE_EARLIEST_DATES
|
|
254
|
-
};
|
|
265
|
+
};
|