bulltrackers-module 1.0.280 → 1.0.281
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/executors/MetaExecutor.js +6 -8
- package/functions/computation-system/executors/StandardExecutor.js +20 -43
- package/functions/computation-system/persistence/ResultCommitter.js +67 -17
- package/functions/computation-system/utils/utils.js +13 -2
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @fileoverview Executor for "Meta" (global) calculations.
|
|
3
3
|
* UPDATED: Uses CachedDataLoader for all data access.
|
|
4
4
|
* UPDATED: Tracks processed shard/item counts.
|
|
5
|
-
* UPDATED:
|
|
5
|
+
* UPDATED: Sends 'isInitialWrite: true' for robust cleanup.
|
|
6
6
|
*/
|
|
7
7
|
const { normalizeName } = require('../utils/utils');
|
|
8
8
|
const { CachedDataLoader } = require('../data/CachedDataLoader');
|
|
@@ -27,7 +27,9 @@ class MetaExecutor {
|
|
|
27
27
|
deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`);
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
|
|
30
|
+
|
|
31
|
+
// [UPDATED] Meta Calcs run once per day, so isInitialWrite is always true
|
|
32
|
+
return await commitResults(state, dStr, passName, config, deps, skipStatusWrite, { isInitialWrite: true });
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
static async executeOncePerDay(calcInstance, metadata, dateStr, computedDeps, prevDeps, config, deps, loader) {
|
|
@@ -35,7 +37,6 @@ class MetaExecutor {
|
|
|
35
37
|
const { logger } = deps;
|
|
36
38
|
const stats = { processedShards: 0, processedItems: 0 };
|
|
37
39
|
|
|
38
|
-
// Lazy fetch insights/social using the loader
|
|
39
40
|
const insights = metadata.rootDataDependencies?.includes('insights') ? { today: await loader.loadInsights(dateStr) } : null;
|
|
40
41
|
const social = metadata.rootDataDependencies?.includes('social') ? { today: await loader.loadSocial(dateStr) } : null;
|
|
41
42
|
|
|
@@ -59,12 +60,9 @@ class MetaExecutor {
|
|
|
59
60
|
|
|
60
61
|
stats.processedShards++;
|
|
61
62
|
stats.processedItems += Object.keys(shardData).length;
|
|
62
|
-
|
|
63
|
-
// Removed global.gc()
|
|
64
63
|
}
|
|
65
64
|
logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
|
|
66
65
|
|
|
67
|
-
// Attach stats
|
|
68
66
|
calcInstance._executionStats = stats;
|
|
69
67
|
return calcInstance.getResult ? await calcInstance.getResult() : {};
|
|
70
68
|
} else {
|
|
@@ -75,7 +73,7 @@ class MetaExecutor {
|
|
|
75
73
|
});
|
|
76
74
|
const res = await calcInstance.process(context);
|
|
77
75
|
|
|
78
|
-
stats.processedItems = 1;
|
|
76
|
+
stats.processedItems = 1;
|
|
79
77
|
calcInstance._executionStats = stats;
|
|
80
78
|
|
|
81
79
|
return res;
|
|
@@ -83,4 +81,4 @@ class MetaExecutor {
|
|
|
83
81
|
}
|
|
84
82
|
}
|
|
85
83
|
|
|
86
|
-
module.exports = { MetaExecutor };
|
|
84
|
+
module.exports = { MetaExecutor };
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
|
|
4
4
|
* UPDATED: Removes manual global.gc() calls.
|
|
5
5
|
* UPDATED: Manages incremental sharding states.
|
|
6
|
-
* UPDATED
|
|
6
|
+
* UPDATED: Implements 'isInitialWrite' flag for robust cleanup.
|
|
7
7
|
*/
|
|
8
8
|
const { normalizeName } = require('../utils/utils');
|
|
9
9
|
const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
|
|
@@ -42,7 +42,6 @@ class StandardExecutor {
|
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
// 3. Stream, Process & Batch Flush
|
|
45
|
-
// The return value contains the aggregated success/failure reports from all flushes
|
|
46
45
|
return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
|
|
47
46
|
}
|
|
48
47
|
|
|
@@ -55,29 +54,28 @@ class StandardExecutor {
|
|
|
55
54
|
|
|
56
55
|
logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
|
|
57
56
|
|
|
58
|
-
// [IDEA 2] Metrics & State Tracking
|
|
59
57
|
const executionStats = {};
|
|
60
|
-
const shardIndexMap = {};
|
|
58
|
+
const shardIndexMap = {};
|
|
61
59
|
const aggregatedSuccess = {};
|
|
62
60
|
const aggregatedFailures = [];
|
|
63
61
|
|
|
64
|
-
// Initialize Timing Stats per calculation
|
|
65
62
|
Object.keys(state).forEach(name => {
|
|
66
63
|
executionStats[name] = {
|
|
67
64
|
processedUsers: 0,
|
|
68
65
|
skippedUsers: 0,
|
|
69
|
-
timings: { setup: 0, stream: 0, processing: 0 }
|
|
66
|
+
timings: { setup: 0, stream: 0, processing: 0 }
|
|
70
67
|
};
|
|
71
68
|
shardIndexMap[name] = 0;
|
|
72
69
|
});
|
|
73
70
|
|
|
74
|
-
//
|
|
71
|
+
// Track if we have performed a flush yet (for cleanup logic)
|
|
72
|
+
let hasFlushed = false;
|
|
73
|
+
|
|
75
74
|
const startSetup = performance.now();
|
|
76
75
|
const cachedLoader = new CachedDataLoader(config, deps);
|
|
77
76
|
await cachedLoader.loadMappings();
|
|
78
77
|
const setupDuration = performance.now() - startSetup;
|
|
79
78
|
|
|
80
|
-
// Distribute setup time
|
|
81
79
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.setup += setupDuration);
|
|
82
80
|
|
|
83
81
|
const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
@@ -91,14 +89,11 @@ class StandardExecutor {
|
|
|
91
89
|
|
|
92
90
|
let yP_chunk = {}, tH_chunk = {};
|
|
93
91
|
|
|
94
|
-
|
|
95
|
-
const BATCH_SIZE = 5000; // Flush every 5000 users
|
|
92
|
+
const BATCH_SIZE = 5000;
|
|
96
93
|
let usersSinceLastFlush = 0;
|
|
97
94
|
|
|
98
95
|
try {
|
|
99
|
-
// [IDEA 2] Loop wrapper for profiling
|
|
100
96
|
for await (const tP_chunk of tP_iter) {
|
|
101
|
-
// [IDEA 2] Measure Streaming Time (Gap between processing chunks)
|
|
102
97
|
const startStream = performance.now();
|
|
103
98
|
if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
|
|
104
99
|
if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
|
|
@@ -107,7 +102,6 @@ class StandardExecutor {
|
|
|
107
102
|
|
|
108
103
|
const chunkSize = Object.keys(tP_chunk).length;
|
|
109
104
|
|
|
110
|
-
// [IDEA 2] Measure Processing Time
|
|
111
105
|
const startProcessing = performance.now();
|
|
112
106
|
const promises = streamingCalcs.map(calc =>
|
|
113
107
|
StandardExecutor.executePerUser(
|
|
@@ -119,20 +113,18 @@ class StandardExecutor {
|
|
|
119
113
|
await Promise.all(promises);
|
|
120
114
|
const procDuration = performance.now() - startProcessing;
|
|
121
115
|
|
|
122
|
-
// Assign processing time (Note: Parallel execution means total wall time is shared)
|
|
123
116
|
Object.keys(executionStats).forEach(name => executionStats[name].timings.processing += procDuration);
|
|
124
117
|
|
|
125
118
|
usersSinceLastFlush += chunkSize;
|
|
126
119
|
|
|
127
|
-
// --- BATCH FLUSH CHECK ---
|
|
128
120
|
if (usersSinceLastFlush >= BATCH_SIZE) {
|
|
129
121
|
logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
|
|
130
122
|
|
|
131
|
-
|
|
123
|
+
// [UPDATED] Pass isInitialWrite: true only on the first flush
|
|
124
|
+
const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true, !hasFlushed);
|
|
132
125
|
|
|
133
|
-
|
|
126
|
+
hasFlushed = true;
|
|
134
127
|
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
|
|
135
|
-
|
|
136
128
|
usersSinceLastFlush = 0;
|
|
137
129
|
}
|
|
138
130
|
}
|
|
@@ -141,34 +133,27 @@ class StandardExecutor {
|
|
|
141
133
|
if (tH_iter && tH_iter.return) await tH_iter.return();
|
|
142
134
|
}
|
|
143
135
|
|
|
144
|
-
// --- FINAL FLUSH ---
|
|
145
136
|
logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
|
|
146
|
-
|
|
137
|
+
// [UPDATED] If we never flushed in the loop, this is the initial write
|
|
138
|
+
const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite, !hasFlushed);
|
|
147
139
|
|
|
148
140
|
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
|
|
149
141
|
|
|
150
142
|
return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
|
|
151
143
|
}
|
|
152
144
|
|
|
153
|
-
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
|
|
145
|
+
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite, isInitialWrite = false) {
|
|
154
146
|
const transformedState = {};
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
// 1. Prepare and Clear Instances
|
|
147
|
+
|
|
158
148
|
for (const [name, inst] of Object.entries(state)) {
|
|
159
|
-
// Get data from the standard storage location
|
|
160
149
|
const rawResult = inst.results || {};
|
|
161
150
|
|
|
162
|
-
// Handle Multi-Date Fan-Out (Transposition)
|
|
163
|
-
// Logic: Checks if result is { userId: { date: data } }
|
|
164
151
|
const firstUser = Object.keys(rawResult)[0];
|
|
165
152
|
let dataToCommit = rawResult;
|
|
166
|
-
let isMultiDate = false;
|
|
167
153
|
|
|
168
154
|
if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
|
|
169
155
|
const innerKeys = Object.keys(rawResult[firstUser]);
|
|
170
156
|
if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
|
|
171
|
-
isMultiDate = true;
|
|
172
157
|
const transposed = {};
|
|
173
158
|
for (const [userId, dateMap] of Object.entries(rawResult)) {
|
|
174
159
|
for (const [dateKey, dailyData] of Object.entries(dateMap)) {
|
|
@@ -180,24 +165,22 @@ class StandardExecutor {
|
|
|
180
165
|
}
|
|
181
166
|
}
|
|
182
167
|
|
|
183
|
-
// Create a mock instance for the committer that returns just this batch
|
|
184
168
|
transformedState[name] = {
|
|
185
169
|
manifest: inst.manifest,
|
|
186
170
|
getResult: async () => dataToCommit,
|
|
187
|
-
_executionStats: executionStats[name]
|
|
171
|
+
_executionStats: executionStats[name]
|
|
188
172
|
};
|
|
189
173
|
|
|
190
|
-
// ⚠️ CRITICAL: CLEAR MEMORY
|
|
191
174
|
inst.results = {};
|
|
192
175
|
}
|
|
193
176
|
|
|
194
|
-
//
|
|
177
|
+
// [UPDATED] Pass isInitialWrite to ResultCommitter
|
|
195
178
|
const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
|
|
196
|
-
flushMode: mode,
|
|
197
|
-
shardIndexes: shardIndexMap
|
|
179
|
+
flushMode: mode,
|
|
180
|
+
shardIndexes: shardIndexMap,
|
|
181
|
+
isInitialWrite: isInitialWrite
|
|
198
182
|
});
|
|
199
183
|
|
|
200
|
-
// 3. Update Shard Indexes from result
|
|
201
184
|
if (result.shardIndexes) {
|
|
202
185
|
Object.assign(shardIndexMap, result.shardIndexes);
|
|
203
186
|
}
|
|
@@ -208,19 +191,16 @@ class StandardExecutor {
|
|
|
208
191
|
static mergeReports(successAcc, failureAcc, newResult) {
|
|
209
192
|
if (!newResult) return;
|
|
210
193
|
|
|
211
|
-
// Merge Success Updates (Sums metrics)
|
|
212
194
|
for (const [name, update] of Object.entries(newResult.successUpdates)) {
|
|
213
195
|
if (!successAcc[name]) {
|
|
214
196
|
successAcc[name] = update;
|
|
215
197
|
} else {
|
|
216
|
-
// Sum storage metrics
|
|
217
198
|
if (update.metrics?.storage) {
|
|
218
199
|
successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
|
|
219
200
|
successAcc[name].metrics.storage.keys += (update.metrics.storage.keys || 0);
|
|
220
201
|
successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
|
|
221
202
|
}
|
|
222
203
|
|
|
223
|
-
// [IDEA 2] Sum timing metrics
|
|
224
204
|
if (update.metrics?.execution?.timings) {
|
|
225
205
|
if (!successAcc[name].metrics.execution) successAcc[name].metrics.execution = { timings: { setup:0, stream:0, processing:0 }};
|
|
226
206
|
const tDest = successAcc[name].metrics.execution.timings;
|
|
@@ -230,13 +210,10 @@ class StandardExecutor {
|
|
|
230
210
|
tDest.stream += (tSrc.stream || 0);
|
|
231
211
|
tDest.processing += (tSrc.processing || 0);
|
|
232
212
|
}
|
|
233
|
-
|
|
234
|
-
// Keep the latest hash/composition info
|
|
235
213
|
successAcc[name].hash = update.hash;
|
|
236
214
|
}
|
|
237
215
|
}
|
|
238
216
|
|
|
239
|
-
// Merge Failures
|
|
240
217
|
if (newResult.failureReport) {
|
|
241
218
|
failureAcc.push(...newResult.failureReport);
|
|
242
219
|
}
|
|
@@ -278,4 +255,4 @@ class StandardExecutor {
|
|
|
278
255
|
}
|
|
279
256
|
}
|
|
280
257
|
|
|
281
|
-
module.exports = { StandardExecutor };
|
|
258
|
+
module.exports = { StandardExecutor };
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
3
|
* UPDATED: Implements GZIP Compression for efficient storage.
|
|
4
4
|
* UPDATED: Implements Content-Based Hashing (ResultHash) for dependency short-circuiting.
|
|
5
|
-
* UPDATED: Auto-enforces Weekend Mode validation
|
|
5
|
+
* UPDATED: Auto-enforces Weekend Mode validation.
|
|
6
|
+
* UPDATED: Implements "Initial Write" logic to wipe stale data/shards on a fresh run.
|
|
6
7
|
*/
|
|
7
8
|
const { commitBatchInChunks, generateDataHash } = require('../utils/utils');
|
|
8
9
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
@@ -30,6 +31,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
30
31
|
|
|
31
32
|
// Options defaults
|
|
32
33
|
const flushMode = options.flushMode || 'STANDARD';
|
|
34
|
+
const isInitialWrite = options.isInitialWrite === true; // [NEW] Flag for clean run
|
|
33
35
|
const shardIndexes = options.shardIndexes || {};
|
|
34
36
|
const nextShardIndexes = {};
|
|
35
37
|
|
|
@@ -50,29 +52,22 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
50
52
|
const result = await calc.getResult();
|
|
51
53
|
const configOverrides = validationOverrides[calc.manifest.name] || {};
|
|
52
54
|
|
|
53
|
-
// --- [NEW] AUTO-ENFORCE WEEKEND MODE FOR PRICE-ONLY CALCS ---
|
|
54
|
-
// If a calculation depends SOLELY on 'price', we assume market closures
|
|
55
|
-
// will cause 0s/Flatlines on weekends, so we enforce lenient validation.
|
|
56
55
|
const dataDeps = calc.manifest.rootDataDependencies || [];
|
|
57
56
|
const isPriceOnly = (dataDeps.length === 1 && dataDeps[0] === 'price');
|
|
58
57
|
|
|
59
58
|
let effectiveOverrides = { ...configOverrides };
|
|
60
59
|
|
|
61
60
|
if (isPriceOnly && !effectiveOverrides.weekend) {
|
|
62
|
-
// Apply strict leniency for weekend/holiday price actions
|
|
63
61
|
effectiveOverrides.weekend = {
|
|
64
62
|
maxZeroPct: 100,
|
|
65
63
|
maxFlatlinePct: 100,
|
|
66
64
|
maxNullPct: 100
|
|
67
65
|
};
|
|
68
66
|
}
|
|
69
|
-
// -----------------------------------------------------------
|
|
70
67
|
|
|
71
68
|
// Validation
|
|
72
69
|
if (result && Object.keys(result).length > 0) {
|
|
73
|
-
// [FIX] Added 'dStr' as 3rd argument to match HeuristicValidator signature
|
|
74
70
|
const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, dStr, effectiveOverrides);
|
|
75
|
-
|
|
76
71
|
if (!healthCheck.valid) {
|
|
77
72
|
runMetrics.validation.isValid = false;
|
|
78
73
|
runMetrics.validation.anomalies.push(healthCheck.reason);
|
|
@@ -83,11 +78,8 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
83
78
|
}
|
|
84
79
|
|
|
85
80
|
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
86
|
-
|
|
87
|
-
// Calculate Result Hash (Content-Based)
|
|
88
81
|
const resultHash = isEmpty ? 'empty' : generateDataHash(result);
|
|
89
82
|
|
|
90
|
-
// Handle Empty Results
|
|
91
83
|
if (isEmpty) {
|
|
92
84
|
if (flushMode === 'INTERMEDIATE') {
|
|
93
85
|
nextShardIndexes[name] = currentShardIndex;
|
|
@@ -123,7 +115,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
123
115
|
.collection(config.computationsSubcollection)
|
|
124
116
|
.doc(name);
|
|
125
117
|
|
|
126
|
-
|
|
118
|
+
// Note: Multi-date fan-out rarely hits sharding, and tracking isInitialWrite per-date is complex.
|
|
119
|
+
// We assume standard merging here.
|
|
120
|
+
await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD', false);
|
|
127
121
|
}));
|
|
128
122
|
await Promise.all(datePromises);
|
|
129
123
|
|
|
@@ -146,7 +140,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
146
140
|
.collection(config.computationsSubcollection)
|
|
147
141
|
.doc(name);
|
|
148
142
|
|
|
149
|
-
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode);
|
|
143
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode, isInitialWrite);
|
|
150
144
|
|
|
151
145
|
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
152
146
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
@@ -192,8 +186,32 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
192
186
|
return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
|
|
193
187
|
}
|
|
194
188
|
|
|
195
|
-
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
|
|
189
|
+
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD', isInitialWrite = false) {
|
|
196
190
|
|
|
191
|
+
// [NEW] Transition & Cleanup Logic
|
|
192
|
+
// If this is the initial write of a run, we verify the existing state to prevent "Ghost Data".
|
|
193
|
+
let wasSharded = false;
|
|
194
|
+
let hadRootData = false;
|
|
195
|
+
let shouldWipeShards = false;
|
|
196
|
+
|
|
197
|
+
// Default: Merge updates. But if Initial Write, overwrite (merge: false) to clear stale fields.
|
|
198
|
+
let rootMergeOption = !isInitialWrite;
|
|
199
|
+
|
|
200
|
+
if (isInitialWrite) {
|
|
201
|
+
try {
|
|
202
|
+
const currentSnap = await docRef.get();
|
|
203
|
+
if (currentSnap.exists) {
|
|
204
|
+
const d = currentSnap.data();
|
|
205
|
+
wasSharded = (d._sharded === true);
|
|
206
|
+
// If it was sharded, we MUST wipe the old shards because we are re-writing from scratch.
|
|
207
|
+
// Even if we write new shards, we want to ensure shard_10 doesn't persist if we only write up to shard_5.
|
|
208
|
+
if (wasSharded) shouldWipeShards = true;
|
|
209
|
+
|
|
210
|
+
// If it wasn't sharded, it had root data. overwriting (merge: false) handles that automatically.
|
|
211
|
+
}
|
|
212
|
+
} catch (e) { /* ignore read error */ }
|
|
213
|
+
}
|
|
214
|
+
|
|
197
215
|
// --- COMPRESSION STRATEGY ---
|
|
198
216
|
try {
|
|
199
217
|
const jsonString = JSON.stringify(result);
|
|
@@ -212,7 +230,22 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
212
230
|
payload: compressedBuffer
|
|
213
231
|
};
|
|
214
232
|
|
|
215
|
-
|
|
233
|
+
// Cleanup: If it was sharded, or if we are wiping shards on initial write
|
|
234
|
+
if (shouldWipeShards) {
|
|
235
|
+
logger.log('INFO', `[Cleanup] ${name}: Wiping old shards before Compressed Write.`);
|
|
236
|
+
const updates = [];
|
|
237
|
+
const shardCol = docRef.collection('_shards');
|
|
238
|
+
const shardDocs = await shardCol.listDocuments();
|
|
239
|
+
shardDocs.forEach(d => updates.push({ type: 'DELETE', ref: d }));
|
|
240
|
+
|
|
241
|
+
// Root update with merge: false (overwrites everything)
|
|
242
|
+
updates.push({ ref: docRef, data: compressedPayload, options: { merge: false } });
|
|
243
|
+
|
|
244
|
+
await commitBatchInChunks(config, deps, updates, `${name}::Cleanup+Compress`);
|
|
245
|
+
} else {
|
|
246
|
+
// Standard update (respecting calculated rootMergeOption)
|
|
247
|
+
await docRef.set(compressedPayload, { merge: rootMergeOption });
|
|
248
|
+
}
|
|
216
249
|
|
|
217
250
|
return {
|
|
218
251
|
totalSize: compressedBuffer.length,
|
|
@@ -242,12 +275,29 @@ async function writeSingleResult(result, docRef, name, dateContext, logger, conf
|
|
|
242
275
|
const constraints = strategies[attempt];
|
|
243
276
|
try {
|
|
244
277
|
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
|
|
245
|
-
const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined);
|
|
246
278
|
|
|
279
|
+
// [NEW] Inject Cleanup Ops
|
|
280
|
+
if (shouldWipeShards) {
|
|
281
|
+
logger.log('INFO', `[Cleanup] ${name}: Wiping old shards before Write (Initial).`);
|
|
282
|
+
const shardCol = docRef.collection('_shards');
|
|
283
|
+
const shardDocs = await shardCol.listDocuments();
|
|
284
|
+
// Prepend DELETEs
|
|
285
|
+
shardDocs.forEach(d => updates.unshift({ type: 'DELETE', ref: d }));
|
|
286
|
+
shouldWipeShards = false; // Done for this loop
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Ensure the root document write respects our merge option
|
|
290
|
+
const rootUpdate = updates.find(u => u.ref.path === docRef.path && u.type !== 'DELETE');
|
|
291
|
+
if (rootUpdate) {
|
|
292
|
+
rootUpdate.options = { merge: rootMergeOption };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const pointer = updates.find(u => u.data && (u.data._completed !== undefined || u.data._sharded !== undefined));
|
|
247
296
|
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
248
297
|
|
|
249
298
|
let maxIndex = startShardIndex;
|
|
250
299
|
updates.forEach(u => {
|
|
300
|
+
if (u.type === 'DELETE') return;
|
|
251
301
|
const segs = u.ref.path.split('/');
|
|
252
302
|
const last = segs[segs.length - 1];
|
|
253
303
|
if (last.startsWith('shard_')) {
|
|
@@ -360,4 +410,4 @@ function calculateFirestoreBytes(value) {
|
|
|
360
410
|
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
|
|
361
411
|
}
|
|
362
412
|
|
|
363
|
-
module.exports = { commitResults };
|
|
413
|
+
module.exports = { commitResults };
|
|
@@ -78,7 +78,7 @@ async function withRetry(fn, operationName, maxRetries = 3) {
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
/** Stage 2: Commit a batch of writes in chunks
|
|
81
|
-
*
|
|
81
|
+
* UPDATED: Now supports { type: 'DELETE' } in the write object.
|
|
82
82
|
*/
|
|
83
83
|
async function commitBatchInChunks(config, deps, writes, operationName) {
|
|
84
84
|
const { db, logger } = deps;
|
|
@@ -118,6 +118,17 @@ async function commitBatchInChunks(config, deps, writes, operationName) {
|
|
|
118
118
|
};
|
|
119
119
|
|
|
120
120
|
for (const write of writes) {
|
|
121
|
+
// [NEW] Handle DELETE operations
|
|
122
|
+
if (write.type === 'DELETE') {
|
|
123
|
+
if ((currentOpsCount + 1 > MAX_BATCH_OPS)) {
|
|
124
|
+
await commitAndReset();
|
|
125
|
+
}
|
|
126
|
+
currentBatch.delete(write.ref);
|
|
127
|
+
currentOpsCount++;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Standard SET/UPDATE operations
|
|
121
132
|
let docSize = 100;
|
|
122
133
|
try { if (write.data) docSize = JSON.stringify(write.data).length; } catch (e) { }
|
|
123
134
|
|
|
@@ -251,4 +262,4 @@ module.exports = {
|
|
|
251
262
|
generateDataHash, // Exported
|
|
252
263
|
withRetry,
|
|
253
264
|
DEFINITIVE_EARLIEST_DATES
|
|
254
|
-
};
|
|
265
|
+
};
|