bulltrackers-module 1.0.280 → 1.0.282
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/MetaExecutor.js +6 -8
- package/functions/computation-system/executors/StandardExecutor.js +20 -43
- package/functions/computation-system/onboarding.md +154 -869
- package/functions/computation-system/persistence/ResultCommitter.js +83 -16
- package/functions/computation-system/simulation/Fabricator.js +285 -0
- package/functions/computation-system/simulation/SeededRandom.js +41 -0
- package/functions/computation-system/simulation/SimRunner.js +51 -0
- package/functions/computation-system/tools/BuildReporter.js +147 -161
- package/functions/computation-system/utils/utils.js +13 -2
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @fileoverview Executor for "Meta" (global) calculations.
|
|
3
3
|
* UPDATED: Uses CachedDataLoader for all data access.
|
|
4
4
|
* UPDATED: Tracks processed shard/item counts.
|
|
5
|
-
* UPDATED:
|
|
5
|
+
* UPDATED: Sends 'isInitialWrite: true' for robust cleanup.
|
|
6
6
|
*/
|
|
7
7
|
const { normalizeName } = require('../utils/utils');
|
|
8
8
|
const { CachedDataLoader } = require('../data/CachedDataLoader');
|
|
@@ -27,7 +27,9 @@ class MetaExecutor {
|
|
|
27
27
|
deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`);
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
|
|
30
|
+
|
|
31
|
+
// [UPDATED] Meta Calcs run once per day, so isInitialWrite is always true
|
|
32
|
+
return await commitResults(state, dStr, passName, config, deps, skipStatusWrite, { isInitialWrite: true });
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
static async executeOncePerDay(calcInstance, metadata, dateStr, computedDeps, prevDeps, config, deps, loader) {
|
|
@@ -35,7 +37,6 @@ class MetaExecutor {
|
|
|
35
37
|
const { logger } = deps;
|
|
36
38
|
const stats = { processedShards: 0, processedItems: 0 };
|
|
37
39
|
|
|
38
|
-
// Lazy fetch insights/social using the loader
|
|
39
40
|
const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
|
|
40
41
|
const social = metadata.rootDataDependencies?.includes('social') ? { today: await loader.loadSocial(dateStr) } : null;
|
|
41
42
|
|
|
@@ -59,12 +60,9 @@ class MetaExecutor {
|
|
|
59
60
|
|
|
60
61
|
stats.processedShards++;
|
|
61
62
|
stats.processedItems += Object.keys(shardData).length;
|
|
62
|
-
|
|
63
|
-
// Removed global.gc()
|
|
64
63
|
}
|
|
65
64
|
logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
|
|
66
65
|
|
|
67
|
-
// Attach stats
|
|
68
66
|
calcInstance._executionStats = stats;
|
|
69
67
|
return calcInstance.getResult ? await calcInstance.getResult() : {};
|
|
70
68
|
} else {
|
|
@@ -75,7 +73,7 @@ class MetaExecutor {
|
|
|
75
73
|
});
|
|
76
74
|
const res = await calcInstance.process(context);
|
|
77
75
|
|
|
78
|
-
stats.processedItems = 1;
|
|
76
|
+
stats.processedItems = 1;
|
|
79
77
|
calcInstance._executionStats = stats;
|
|
80
78
|
|
|
81
79
|
return res;
|
|
@@ -83,4 +81,4 @@ class MetaExecutor {
|
|
|
83
81
|
}
|
|
84
82
|
}
|
|
85
83
|
|
|
86
|
-
module.exports = { MetaExecutor };
|
|
84
|
+
module.exports = { MetaExecutor };
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
|
|
4
4
|
* UPDATED: Removes manual global.gc() calls.
|
|
5
5
|
* UPDATED: Manages incremental sharding states.
|
|
6
|
-
* UPDATED
|
|
6
|
+
* UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
|
|
7
7
|
*/
|
|
8
8
|
const { normalizeName } = require('../utils/utils');
|
|
9
9
|
const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
|
|
@@ -42,7 +42,6 @@ class StandardExecutor {
|
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
// 3. Stream, Process & Batch Flush
|
|
45
|
-
// The return value contains the aggregated success/failure reports from all flushes
|
|
46
45
|
return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
|
|
47
46
|
}
|
|
48
47
|
|
|
@@ -55,29 +54,28 @@ class StandardExecutor {
|
|
|
55
54
|
|
|
56
55
|
logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
|
|
57
56
|
|
|
58
|
-
// [IDEA 2] Metrics & State Tracking
|
|
59
57
|
const executionStats = {};
|
|
60
|
-
const shardIndexMap = {};
|
|
58
|
+
const shardIndexMap = {};
|
|
61
59
|
const aggregatedSuccess = {};
|
|
62
60
|
const aggregatedFailures = [];
|
|
63
61
|
|
|
64
|
-
// Initialize Timing Stats per calculation
|
|
65
62
|
Object.keys(state).forEach(name => {
|
|
66
63
|
executionStats[name] = {
|
|
67
64
|
processedUsers: 0,
|
|
68
65
|
skippedUsers: 0,
|
|
69
|
-
timings: { setup: 0, stream: 0, processing: 0 }
|
|
66
|
+
timings: { setup: 0, stream: 0, processing: 0 }
|
|
70
67
|
};
|
|
71
68
|
shardIndexMap[name] = 0;
|
|
72
69
|
});
|
|
73
70
|
|
|
74
|
-
//
|
|
71
|
+
// Track if we have performed a flush yet (for cleanup logic)
|
|
72
|
+
let hasFlushed = false;
|
|
73
|
+
|
|
75
74
|
const startSetup = performance.now();
|
|
76
75
|
const cachedLoader = new CachedDataLoader(config, deps);
|
|
77
76
|
await cachedLoader.loadMappings();
|
|
78
77
|
const setupDuration = performance.now() - startSetup;
|
|
79
78
|
|
|
80
|
-
// Distribute setup time
|
|
81
79
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.setup += setupDuration);
|
|
82
80
|
|
|
83
81
|
const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
@@ -91,14 +89,11 @@ class StandardExecutor {
|
|
|
91
89
|
|
|
92
90
|
let yP_chunk = {}, tH_chunk = {};
|
|
93
91
|
|
|
94
|
-
|
|
95
|
-
const BATCH_SIZE = 5000; // Flush every 5000 users
|
|
92
|
+
const BATCH_SIZE = 5000;
|
|
96
93
|
let usersSinceLastFlush = 0;
|
|
97
94
|
|
|
98
95
|
try {
|
|
99
|
-
// [IDEA 2] Loop wrapper for profiling
|
|
100
96
|
for await (const tP_chunk of tP_iter) {
|
|
101
|
-
// [IDEA 2] Measure Streaming Time (Gap between processing chunks)
|
|
102
97
|
const startStream = performance.now();
|
|
103
98
|
if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
|
|
104
99
|
if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
|
|
@@ -107,7 +102,6 @@ class StandardExecutor {
|
|
|
107
102
|
|
|
108
103
|
const chunkSize = Object.keys(tP_chunk).length;
|
|
109
104
|
|
|
110
|
-
// [IDEA 2] Measure Processing Time
|
|
111
105
|
const startProcessing = performance.now();
|
|
112
106
|
const promises = streamingCalcs.map(calc =>
|
|
113
107
|
StandardExecutor.executePerUser(
|
|
@@ -119,20 +113,18 @@ class StandardExecutor {
|
|
|
119
113
|
await Promise.all(promises);
|
|
120
114
|
const procDuration = performance.now() - startProcessing;
|
|
121
115
|
|
|
122
|
-
// Assign processing time (Note: Parallel execution means total wall time is shared)
|
|
123
116
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
|
|
124
117
|
|
|
125
118
|
usersSinceLastFlush += chunkSize;
|
|
126
119
|
|
|
127
|
-
// --- BATCH FLUSH CHECK ---
|
|
128
120
|
if (usersSinceLastFlush >= BATCH_SIZE) {
|
|
129
121
|
logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
|
|
130
122
|
|
|
131
|
-
|
|
123
|
+
// [UPDATED] Pass isInitialWrite: true only on the first flush
|
|
124
|
+
const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
|
|
132
125
|
|
|
133
|
-
|
|
126
|
+
hasFlushed = true;
|
|
134
127
|
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
|
|
135
|
-
|
|
136
128
|
usersSinceLastFlush = 0;
|
|
137
129
|
}
|
|
138
130
|
}
|
|
@@ -141,34 +133,27 @@ class StandardExecutor {
|
|
|
141
133
|
if (tH_iter && tH_iter.return) await tH_iter.return();
|
|
142
134
|
}
|
|
143
135
|
|
|
144
|
-
// --- FINAL FLUSH ---
|
|
145
136
|
logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
|
|
146
|
-
|
|
137
|
+
// [UPDATED] If we never flushed in the loop, this is the initial write
|
|
138
|
+
const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite, !hasFlushed);
|
|
147
139
|
|
|
148
140
|
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
|
|
149
141
|
|
|
150
142
|
return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
|
|
151
143
|
}
|
|
152
144
|
|
|
153
|
-
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
|
|
145
|
+
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite, isInitialWrite = false) {
|
|
154
146
|
const transformedState = {};
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
// 1. Prepare and Clear Instances
|
|
147
|
+
|
|
158
148
|
for (const [name, inst] of Object.entries(state)) {
|
|
159
|
-
// Get data from the standard storage location
|
|
160
149
|
const rawResult = inst.results || {};
|
|
161
150
|
|
|
162
|
-
// Handle Multi-Date Fan-Out (Transposition)
|
|
163
|
-
// Logic: Checks if result is { userId: { date: data } }
|
|
164
151
|
const firstUser = Object.keys(rawResult)[0];
|
|
165
152
|
let dataToCommit = rawResult;
|
|
166
|
-
let isMultiDate = false;
|
|
167
153
|
|
|
168
154
|
if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
|
|
169
155
|
const innerKeys = Object.keys(rawResult[firstUser]);
|
|
170
156
|
if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
|
|
171
|
-
isMultiDate = true;
|
|
172
157
|
const transposed = {};
|
|
173
158
|
for (const [userId, dateMap] of Object.entries(rawResult)) {
|
|
174
159
|
for (const [dateKey, dailyData] of Object.entries(dateMap)) {
|
|
@@ -180,24 +165,22 @@ class StandardExecutor {
|
|
|
180
165
|
}
|
|
181
166
|
}
|
|
182
167
|
|
|
183
|
-
// Create a mock instance for the committer that returns just this batch
|
|
184
168
|
transformedState[name] = {
|
|
185
169
|
manifest: inst.manifest,
|
|
186
170
|
getResult: async () => dataToCommit,
|
|
187
|
-
_executionStats: executionStats[name]
|
|
171
|
+
_executionStats: executionStats[name]
|
|
188
172
|
};
|
|
189
173
|
|
|
190
|
-
// ⚠️ CRITICAL: CLEAR MEMORY
|
|
191
174
|
inst.results = {};
|
|
192
175
|
}
|
|
193
176
|
|
|
194
|
-
//
|
|
177
|
+
// [UPDATED] Pass isInitialWrite to ResultCommitter
|
|
195
178
|
const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
|
|
196
|
-
flushMode: mode,
|
|
197
|
-
shardIndexes: shardIndexMap
|
|
179
|
+
flushMode: mode,
|
|
180
|
+
shardIndexes: shardIndexMap,
|
|
181
|
+
isInitialWrite: isInitialWrite
|
|
198
182
|
});
|
|
199
183
|
|
|
200
|
-
// 3. Update Shard Indexes from result
|
|
201
184
|
if (result.shardIndexes) {
|
|
202
185
|
Object.assign(shardIndexMap, result.shardIndexes);
|
|
203
186
|
}
|
|
@@ -208,19 +191,16 @@ class StandardExecutor {
|
|
|
208
191
|
static mergeReports(successAcc, failureAcc, newResult) {
|
|
209
192
|
if (!newResult) return;
|
|
210
193
|
|
|
211
|
-
// Merge Success Updates (Sums metrics)
|
|
212
194
|
for (const [name, update] of Object.entries(newResult.successUpdates)) {
|
|
213
195
|
if (!successAcc[name]) {
|
|
214
196
|
successAcc[name] = update;
|
|
215
197
|
} else {
|
|
216
|
-
// Sum storage metrics
|
|
217
198
|
if (update.metrics?.storage) {
|
|
218
199
|
successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
|
|
219
200
|
successAcc[name].metrics.storage.keys += (update.metrics.storage.keys || 0);
|
|
220
201
|
successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
|
|
221
202
|
}
|
|
222
203
|
|
|
223
|
-
// [IDEA 2] Sum timing metrics
|
|
224
204
|
if (update.metrics?.execution?.timings) {
|
|
225
205
|
if (!successAcc[name].metrics.execution) successAcc[name].metrics.execution = { timings: { setup:0, stream:0, processing:0 }};
|
|
226
206
|
const tDest = successAcc[name].metrics.execution.timings;
|
|
@@ -230,13 +210,10 @@ class StandardExecutor {
|
|
|
230
210
|
tDest.stream += (tSrc.stream || 0);
|
|
231
211
|
tDest.processing += (tSrc.processing || 0);
|
|
232
212
|
}
|
|
233
|
-
|
|
234
|
-
// Keep the latest hash/composition info
|
|
235
213
|
successAcc[name].hash = update.hash;
|
|
236
214
|
}
|
|
237
215
|
}
|
|
238
216
|
|
|
239
|
-
// Merge Failures
|
|
240
217
|
if (newResult.failureReport) {
|
|
241
218
|
failureAcc.push(...newResult.failureReport);
|
|
242
219
|
}
|
|
@@ -278,4 +255,4 @@ class StandardExecutor {
|
|
|
278
255
|
}
|
|
279
256
|
}
|
|
280
257
|
|
|
281
|
-
module.exports = { StandardExecutor };
|
|
258
|
+
module.exports = { StandardExecutor };
|