bulltrackers-module 1.0.280 → 1.0.282

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * @fileoverview Executor for "Meta" (global) calculations.
3
3
  * UPDATED: Uses CachedDataLoader for all data access.
4
4
  * UPDATED: Tracks processed shard/item counts.
5
- * UPDATED: Removed global.gc() calls.
5
+ * UPDATED: Sends 'isInitialWrite: true' for robust cleanup.
6
6
  */
7
7
  const { normalizeName } = require('../utils/utils');
8
8
  const { CachedDataLoader } = require('../data/CachedDataLoader');
@@ -27,7 +27,9 @@ class MetaExecutor {
27
27
  deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`);
28
28
  }
29
29
  }
30
- return await commitResults(state, dStr, passName, config, deps, skipStatusWrite);
30
+
31
+ // [UPDATED] Meta Calcs run once per day, so isInitialWrite is always true
32
+ return await commitResults(state, dStr, passName, config, deps, skipStatusWrite, { isInitialWrite: true });
31
33
  }
32
34
 
33
35
  static async executeOncePerDay(calcInstance, metadata, dateStr, computedDeps, prevDeps, config, deps, loader) {
@@ -35,7 +37,6 @@ class MetaExecutor {
35
37
  const { logger } = deps;
36
38
  const stats = { processedShards: 0, processedItems: 0 };
37
39
 
38
- // Lazy fetch insights/social using the loader
39
40
  const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
40
41
  const social = metadata.rootDataDependencies?.includes('social') ? { today: await loader.loadSocial(dateStr) } : null;
41
42
 
@@ -59,12 +60,9 @@ class MetaExecutor {
59
60
 
60
61
  stats.processedShards++;
61
62
  stats.processedItems += Object.keys(shardData).length;
62
-
63
- // Removed global.gc()
64
63
  }
65
64
  logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
66
65
 
67
- // Attach stats
68
66
  calcInstance._executionStats = stats;
69
67
  return calcInstance.getResult ? await calcInstance.getResult() : {};
70
68
  } else {
@@ -75,7 +73,7 @@ class MetaExecutor {
75
73
  });
76
74
  const res = await calcInstance.process(context);
77
75
 
78
- stats.processedItems = 1; // "Global" item
76
+ stats.processedItems = 1;
79
77
  calcInstance._executionStats = stats;
80
78
 
81
79
  return res;
@@ -83,4 +81,4 @@ class MetaExecutor {
83
81
  }
84
82
  }
85
83
 
86
- module.exports = { MetaExecutor };
84
+ module.exports = { MetaExecutor };
@@ -3,7 +3,7 @@
3
3
  * UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
4
4
  * UPDATED: Removes manual global.gc() calls.
5
5
  * UPDATED: Manages incremental sharding states.
6
- * UPDATED (IDEA 2): Implemented Computation Profiler (timings).
6
+ * UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
7
7
  */
8
8
  const { normalizeName } = require('../utils/utils');
9
9
  const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
@@ -42,7 +42,6 @@ class StandardExecutor {
42
42
  }
43
43
 
44
44
  // 3. Stream, Process & Batch Flush
45
- // The return value contains the aggregated success/failure reports from all flushes
46
45
  return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
47
46
  }
48
47
 
@@ -55,29 +54,28 @@ class StandardExecutor {
55
54
 
56
55
  logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
57
56
 
58
- // [IDEA 2] Metrics & State Tracking
59
57
  const executionStats = {};
60
- const shardIndexMap = {}; // Tracks sharding offsets per calculation
58
+ const shardIndexMap = {};
61
59
  const aggregatedSuccess = {};
62
60
  const aggregatedFailures = [];
63
61
 
64
- // Initialize Timing Stats per calculation
65
62
  Object.keys(state).forEach(name => {
66
63
  executionStats[name] = {
67
64
  processedUsers: 0,
68
65
  skippedUsers: 0,
69
- timings: { setup: 0, stream: 0, processing: 0 } // New
66
+ timings: { setup: 0, stream: 0, processing: 0 }
70
67
  };
71
68
  shardIndexMap[name] = 0;
72
69
  });
73
70
 
74
- // [IDEA 2] Measure Setup Time
71
+ // Track if we have performed a flush yet (for cleanup logic)
72
+ let hasFlushed = false;
73
+
75
74
  const startSetup = performance.now();
76
75
  const cachedLoader = new CachedDataLoader(config, deps);
77
76
  await cachedLoader.loadMappings();
78
77
  const setupDuration = performance.now() - startSetup;
79
78
 
80
- // Distribute setup time
81
79
  Object.keys(executionStats).forEach(name => executionStats[name].timings.setup += setupDuration);
82
80
 
83
81
  const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
@@ -91,14 +89,11 @@ class StandardExecutor {
91
89
 
92
90
  let yP_chunk = {}, tH_chunk = {};
93
91
 
94
- // OOM Protection: Batch Flushing Configuration
95
- const BATCH_SIZE = 5000; // Flush every 5000 users
92
+ const BATCH_SIZE = 5000;
96
93
  let usersSinceLastFlush = 0;
97
94
 
98
95
  try {
99
- // [IDEA 2] Loop wrapper for profiling
100
96
  for await (const tP_chunk of tP_iter) {
101
- // [IDEA 2] Measure Streaming Time (Gap between processing chunks)
102
97
  const startStream = performance.now();
103
98
  if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
104
99
  if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
@@ -107,7 +102,6 @@ class StandardExecutor {
107
102
 
108
103
  const chunkSize = Object.keys(tP_chunk).length;
109
104
 
110
- // [IDEA 2] Measure Processing Time
111
105
  const startProcessing = performance.now();
112
106
  const promises = streamingCalcs.map(calc =>
113
107
  StandardExecutor.executePerUser(
@@ -119,20 +113,18 @@ class StandardExecutor {
119
113
  await Promise.all(promises);
120
114
  const procDuration = performance.now() - startProcessing;
121
115
 
122
- // Assign processing time (Note: Parallel execution means total wall time is shared)
123
116
  Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
124
117
 
125
118
  usersSinceLastFlush += chunkSize;
126
119
 
127
- // --- BATCH FLUSH CHECK ---
128
120
  if (usersSinceLastFlush >= BATCH_SIZE) {
129
121
  logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
130
122
 
131
- const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true);
123
+ // [UPDATED] Pass isInitialWrite: true only on the first flush
124
+ const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
132
125
 
133
- // Aggregate metrics
126
+ hasFlushed = true;
134
127
  StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
135
-
136
128
  usersSinceLastFlush = 0;
137
129
  }
138
130
  }
@@ -141,34 +133,27 @@ class StandardExecutor {
141
133
  if (tH_iter && tH_iter.return) await tH_iter.return();
142
134
  }
143
135
 
144
- // --- FINAL FLUSH ---
145
136
  logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
146
- const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite);
137
+ // [UPDATED] If we never flushed in the loop, this is the initial write
138
+ const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite, !hasFlushed);
147
139
 
148
140
  StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
149
141
 
150
142
  return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
151
143
  }
152
144
 
153
- static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
145
+ static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite, isInitialWrite = false) {
154
146
  const transformedState = {};
155
- const { logger } = deps;
156
-
157
- // 1. Prepare and Clear Instances
147
+
158
148
  for (const [name, inst] of Object.entries(state)) {
159
- // Get data from the standard storage location
160
149
  const rawResult = inst.results || {};
161
150
 
162
- // Handle Multi-Date Fan-Out (Transposition)
163
- // Logic: Checks if result is { userId: { date: data } }
164
151
  const firstUser = Object.keys(rawResult)[0];
165
152
  let dataToCommit = rawResult;
166
- let isMultiDate = false;
167
153
 
168
154
  if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
169
155
  const innerKeys = Object.keys(rawResult[firstUser]);
170
156
  if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
171
- isMultiDate = true;
172
157
  const transposed = {};
173
158
  for (const [userId, dateMap] of Object.entries(rawResult)) {
174
159
  for (const [dateKey, dailyData] of Object.entries(dateMap)) {
@@ -180,24 +165,22 @@ class StandardExecutor {
180
165
  }
181
166
  }
182
167
 
183
- // Create a mock instance for the committer that returns just this batch
184
168
  transformedState[name] = {
185
169
  manifest: inst.manifest,
186
170
  getResult: async () => dataToCommit,
187
- _executionStats: executionStats[name] // Attach current stats including timings
171
+ _executionStats: executionStats[name]
188
172
  };
189
173
 
190
- // ⚠️ CRITICAL: CLEAR MEMORY
191
174
  inst.results = {};
192
175
  }
193
176
 
194
- // 2. Commit Batch
177
+ // [UPDATED] Pass isInitialWrite to ResultCommitter
195
178
  const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
196
- flushMode: mode, // 'INTERMEDIATE' or 'FINAL'
197
- shardIndexes: shardIndexMap // Pass the tracking map
179
+ flushMode: mode,
180
+ shardIndexes: shardIndexMap,
181
+ isInitialWrite: isInitialWrite
198
182
  });
199
183
 
200
- // 3. Update Shard Indexes from result
201
184
  if (result.shardIndexes) {
202
185
  Object.assign(shardIndexMap, result.shardIndexes);
203
186
  }
@@ -208,19 +191,16 @@ class StandardExecutor {
208
191
  static mergeReports(successAcc, failureAcc, newResult) {
209
192
  if (!newResult) return;
210
193
 
211
- // Merge Success Updates (Sums metrics)
212
194
  for (const [name, update] of Object.entries(newResult.successUpdates)) {
213
195
  if (!successAcc[name]) {
214
196
  successAcc[name] = update;
215
197
  } else {
216
- // Sum storage metrics
217
198
  if (update.metrics?.storage) {
218
199
  successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
219
200
  successAcc[name].metrics.storage.keys += (update.metrics.storage.keys || 0);
220
201
  successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
221
202
  }
222
203
 
223
- // [IDEA 2] Sum timing metrics
224
204
  if (update.metrics?.execution?.timings) {
225
205
  if (!successAcc[name].metrics.execution) successAcc[name].metrics.execution = { timings: { setup:0, stream:0, processing:0 }};
226
206
  const tDest = successAcc[name].metrics.execution.timings;
@@ -230,13 +210,10 @@ class StandardExecutor {
230
210
  tDest.stream += (tSrc.stream || 0);
231
211
  tDest.processing += (tSrc.processing || 0);
232
212
  }
233
-
234
- // Keep the latest hash/composition info
235
213
  successAcc[name].hash = update.hash;
236
214
  }
237
215
  }
238
216
 
239
- // Merge Failures
240
217
  if (newResult.failureReport) {
241
218
  failureAcc.push(...newResult.failureReport);
242
219
  }
@@ -278,4 +255,4 @@ class StandardExecutor {
278
255
  }
279
256
  }
280
257
 
281
- module.exports = { StandardExecutor };
258
+ module.exports = { StandardExecutor };