bulltrackers-module 1.0.270 → 1.0.272
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/WorkflowOrchestrator.js +5 -1
- package/functions/computation-system/config/validation_overrides.js +3 -0
- package/functions/computation-system/executors/MetaExecutor.js +2 -1
- package/functions/computation-system/executors/StandardExecutor.js +123 -56
- package/functions/computation-system/helpers/computation_dispatcher.js +15 -8
- package/functions/computation-system/persistence/ResultCommitter.js +167 -73
- package/functions/computation-system/persistence/ResultsValidator.js +35 -28
- package/functions/computation-system/tools/BuildReporter.js +38 -22
- package/package.json +1 -1
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Main Orchestrator. Coordinates the topological execution.
|
|
3
3
|
* UPDATED: Removed 'Permanently Impossible' optimization to ensure full visibility/recovery.
|
|
4
4
|
* UPDATED: Includes 'Audit Upgrade' check.
|
|
5
|
+
* UPDATED: Detailed Dependency Reporting for Impossible Chains.
|
|
5
6
|
*/
|
|
6
7
|
const { normalizeName, DEFINITIVE_EARLIEST_DATES } = require('./utils/utils');
|
|
7
8
|
const { checkRootDataAvailability, checkRootDependencies } = require('./data/AvailabilityChecker');
|
|
@@ -76,6 +77,7 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
76
77
|
|
|
77
78
|
// 2. Check Dependencies
|
|
78
79
|
let dependencyIsImpossible = false;
|
|
80
|
+
let impossibleDepCause = null;
|
|
79
81
|
const missingDeps = [];
|
|
80
82
|
if (calc.dependencies) {
|
|
81
83
|
for (const dep of calc.dependencies) {
|
|
@@ -83,6 +85,7 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
83
85
|
const depStored = simulationStatus[normDep];
|
|
84
86
|
if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
|
|
85
87
|
dependencyIsImpossible = true;
|
|
88
|
+
impossibleDepCause = dep; // Capture the culprit
|
|
86
89
|
break;
|
|
87
90
|
}
|
|
88
91
|
if (!isDepSatisfied(dep, simulationStatus, manifestMap)) { missingDeps.push(dep); }
|
|
@@ -90,7 +93,8 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
|
|
|
90
93
|
}
|
|
91
94
|
|
|
92
95
|
if (dependencyIsImpossible) {
|
|
93
|
-
|
|
96
|
+
// [UPDATED] Include the name of the failing dependency in the reason string
|
|
97
|
+
markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
|
|
94
98
|
continue;
|
|
95
99
|
}
|
|
96
100
|
if (missingDeps.length > 0) { report.failedDependency.push({ name: cName, missing: missingDeps }); continue; }
|
|
@@ -3,4 +3,7 @@ module.exports = {
|
|
|
3
3
|
// EXAMPLES :
|
|
4
4
|
// "bankruptcy-detector": { maxZeroPct: 100 }, // It's rare, so 100% 0s is fine
|
|
5
5
|
// "sparse-signal-generator": { maxNullPct: 99 }
|
|
6
|
+
|
|
7
|
+
"instrument-price-change-1d": { maxZeroPct: 100 }, // Because weekeends/holidays return 0 change, technically crypto means this can't hit 100% but it's usually quite close, so we override
|
|
8
|
+
"instrument-price-momentum-20d ": { maxZeroPct: 100 }, // Some assets can be very stagnant over a month, especially bonds or stablecoins
|
|
6
9
|
};
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* @fileoverview Executor for "Meta" (global) calculations.
|
|
3
3
|
* UPDATED: Uses CachedDataLoader for all data access.
|
|
4
4
|
* UPDATED: Tracks processed shard/item counts.
|
|
5
|
+
* UPDATED: Removed global.gc() calls.
|
|
5
6
|
*/
|
|
6
7
|
const { normalizeName } = require('../utils/utils');
|
|
7
8
|
const { CachedDataLoader } = require('../data/CachedDataLoader');
|
|
@@ -59,7 +60,7 @@ class MetaExecutor {
|
|
|
59
60
|
stats.processedShards++;
|
|
60
61
|
stats.processedItems += Object.keys(shardData).length;
|
|
61
62
|
|
|
62
|
-
|
|
63
|
+
// Removed global.gc()
|
|
63
64
|
}
|
|
64
65
|
logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
|
|
65
66
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Executor for "Standard" (per-user) calculations.
|
|
3
|
-
* UPDATED:
|
|
4
|
-
* UPDATED:
|
|
5
|
-
* UPDATED:
|
|
3
|
+
* UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
|
|
4
|
+
* UPDATED: Removes manual global.gc() calls.
|
|
5
|
+
* UPDATED: Manages incremental sharding states.
|
|
6
6
|
*/
|
|
7
7
|
const { normalizeName } = require('../utils/utils');
|
|
8
8
|
const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
|
|
@@ -19,9 +19,8 @@ class StandardExecutor {
|
|
|
19
19
|
// 1. Prepare Yesterdays Data if needed
|
|
20
20
|
const fullRoot = { ...rootData };
|
|
21
21
|
if (calcs.some(c => c.isHistorical)) {
|
|
22
|
-
const prev
|
|
23
|
-
const prevStr
|
|
24
|
-
// Explicitly fetch yesterday's refs as they aren't provided by the daily indexer
|
|
22
|
+
const prev = new Date(date); prev.setUTCDate(prev.getUTCDate() - 1);
|
|
23
|
+
const prevStr = prev.toISOString().slice(0, 10);
|
|
25
24
|
fullRoot.yesterdayPortfolioRefs = await getPortfolioPartRefs(config, deps, prevStr);
|
|
26
25
|
}
|
|
27
26
|
|
|
@@ -31,6 +30,8 @@ class StandardExecutor {
|
|
|
31
30
|
try {
|
|
32
31
|
const inst = new c.class();
|
|
33
32
|
inst.manifest = c;
|
|
33
|
+
// Ensure internal storage exists for flushing
|
|
34
|
+
inst.results = {};
|
|
34
35
|
state[normalizeName(c.name)] = inst;
|
|
35
36
|
logger.log('INFO', `${c.name} calculation running for ${dStr}`);
|
|
36
37
|
} catch (e) {
|
|
@@ -38,61 +39,29 @@ class StandardExecutor {
|
|
|
38
39
|
}
|
|
39
40
|
}
|
|
40
41
|
|
|
41
|
-
// 3. Stream &
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
// 4. Pre-Commit Transformation for Fan-Out
|
|
45
|
-
const transformedState = {};
|
|
46
|
-
for (const [name, inst] of Object.entries(state)) {
|
|
47
|
-
const result = await inst.getResult(); // { userId: { date: data } } or { userId: data }
|
|
48
|
-
const firstUser = Object.keys(result)[0];
|
|
49
|
-
|
|
50
|
-
// Check if the inner value is a Date Map
|
|
51
|
-
// Only checks the first user as heuristic; implies uniform return type
|
|
52
|
-
if (firstUser && result[firstUser] && typeof result[firstUser] === 'object') {
|
|
53
|
-
const innerKeys = Object.keys(result[firstUser]);
|
|
54
|
-
// Check if keys look like YYYY-MM-DD
|
|
55
|
-
const isDateMap = innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
|
|
56
|
-
|
|
57
|
-
if (isDateMap) {
|
|
58
|
-
const transposed = {};
|
|
59
|
-
for (const [userId, dateMap] of Object.entries(result)) {
|
|
60
|
-
for (const [dateKey, dailyData] of Object.entries(dateMap)) {
|
|
61
|
-
if (!transposed[dateKey]) transposed[dateKey] = {};
|
|
62
|
-
transposed[dateKey][userId] = dailyData;
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Mock a "getResult" for the committer that returns the Transposed Map
|
|
67
|
-
transformedState[name] = {
|
|
68
|
-
manifest: inst.manifest,
|
|
69
|
-
getResult: async () => transposed,
|
|
70
|
-
_executionStats: inst._executionStats // Preserve stats
|
|
71
|
-
};
|
|
72
|
-
continue;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
// Normal behavior
|
|
76
|
-
transformedState[name] = inst;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// 5. Commit
|
|
80
|
-
return await commitResults(transformedState, dStr, passName, config, deps, skipStatusWrite);
|
|
42
|
+
// 3. Stream, Process & Batch Flush
|
|
43
|
+
// The return value contains the aggregated success/failure reports from all flushes
|
|
44
|
+
return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
|
|
81
45
|
}
|
|
82
46
|
|
|
83
|
-
static async streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps) {
|
|
47
|
+
static async streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite) {
|
|
84
48
|
const { logger } = deps;
|
|
85
49
|
const calcs = Object.values(state).filter(c => c && c.manifest);
|
|
86
50
|
const streamingCalcs = calcs.filter(c => c.manifest.rootDataDependencies.includes('portfolio') || c.manifest.rootDataDependencies.includes('history'));
|
|
87
51
|
|
|
88
|
-
if (streamingCalcs.length === 0) return;
|
|
52
|
+
if (streamingCalcs.length === 0) return { successUpdates: {}, failureReport: [] };
|
|
89
53
|
|
|
90
54
|
logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
|
|
91
55
|
|
|
92
|
-
//
|
|
56
|
+
// Metrics & State Tracking
|
|
93
57
|
const executionStats = {};
|
|
58
|
+
const shardIndexMap = {}; // Tracks sharding offsets per calculation
|
|
59
|
+
const aggregatedSuccess = {};
|
|
60
|
+
const aggregatedFailures = [];
|
|
61
|
+
|
|
94
62
|
Object.keys(state).forEach(name => {
|
|
95
63
|
executionStats[name] = { processedUsers: 0, skippedUsers: 0 };
|
|
64
|
+
shardIndexMap[name] = 0;
|
|
96
65
|
});
|
|
97
66
|
|
|
98
67
|
const cachedLoader = new CachedDataLoader(config, deps);
|
|
@@ -102,20 +71,24 @@ class StandardExecutor {
|
|
|
102
71
|
const prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
103
72
|
|
|
104
73
|
const tP_iter = streamPortfolioData(config, deps, dateStr, portfolioRefs);
|
|
105
|
-
|
|
106
74
|
const needsYesterdayPortfolio = streamingCalcs.some(c => c.manifest.isHistorical);
|
|
107
75
|
const yP_iter = (needsYesterdayPortfolio && rootData.yesterdayPortfolioRefs) ? streamPortfolioData(config, deps, prevDateStr, rootData.yesterdayPortfolioRefs) : null;
|
|
108
|
-
|
|
109
76
|
const needsTradingHistory = streamingCalcs.some(c => c.manifest.rootDataDependencies.includes('history'));
|
|
110
77
|
const tH_iter = (needsTradingHistory) ? streamHistoryData(config, deps, dateStr, historyRefs) : null;
|
|
111
78
|
|
|
112
79
|
let yP_chunk = {}, tH_chunk = {};
|
|
113
80
|
|
|
81
|
+
// OOM Protection: Batch Flushing Configuration
|
|
82
|
+
const BATCH_SIZE = 5000; // Flush every 5000 users
|
|
83
|
+
let usersSinceLastFlush = 0;
|
|
84
|
+
|
|
114
85
|
try {
|
|
115
86
|
for await (const tP_chunk of tP_iter) {
|
|
116
87
|
if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
|
|
117
88
|
if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
|
|
118
89
|
|
|
90
|
+
const chunkSize = Object.keys(tP_chunk).length;
|
|
91
|
+
|
|
119
92
|
// Execute chunk for all calcs
|
|
120
93
|
const promises = streamingCalcs.map(calc =>
|
|
121
94
|
StandardExecutor.executePerUser(
|
|
@@ -125,19 +98,113 @@ class StandardExecutor {
|
|
|
125
98
|
)
|
|
126
99
|
);
|
|
127
100
|
await Promise.all(promises);
|
|
101
|
+
|
|
102
|
+
usersSinceLastFlush += chunkSize;
|
|
103
|
+
|
|
104
|
+
// --- BATCH FLUSH CHECK ---
|
|
105
|
+
if (usersSinceLastFlush >= BATCH_SIZE) {
|
|
106
|
+
logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
|
|
107
|
+
|
|
108
|
+
const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true);
|
|
109
|
+
|
|
110
|
+
// Aggregate metrics
|
|
111
|
+
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
|
|
112
|
+
|
|
113
|
+
usersSinceLastFlush = 0;
|
|
114
|
+
}
|
|
128
115
|
}
|
|
129
116
|
} finally {
|
|
130
|
-
// Close manual iterators to release resources
|
|
131
117
|
if (yP_iter && yP_iter.return) await yP_iter.return();
|
|
132
118
|
if (tH_iter && tH_iter.return) await tH_iter.return();
|
|
133
119
|
}
|
|
134
120
|
|
|
135
|
-
//
|
|
136
|
-
|
|
137
|
-
|
|
121
|
+
// --- FINAL FLUSH ---
|
|
122
|
+
logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
|
|
123
|
+
const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite);
|
|
124
|
+
|
|
125
|
+
StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
|
|
126
|
+
|
|
127
|
+
return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
|
|
131
|
+
const transformedState = {};
|
|
132
|
+
const { logger } = deps;
|
|
133
|
+
|
|
134
|
+
// 1. Prepare and Clear Instances
|
|
135
|
+
for (const [name, inst] of Object.entries(state)) {
|
|
136
|
+
// Get data from the standard storage location
|
|
137
|
+
const rawResult = inst.results || {};
|
|
138
|
+
|
|
139
|
+
// Handle Multi-Date Fan-Out (Transposition)
|
|
140
|
+
// Logic: Checks if result is { userId: { date: data } }
|
|
141
|
+
const firstUser = Object.keys(rawResult)[0];
|
|
142
|
+
let dataToCommit = rawResult;
|
|
143
|
+
let isMultiDate = false;
|
|
144
|
+
|
|
145
|
+
if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
|
|
146
|
+
const innerKeys = Object.keys(rawResult[firstUser]);
|
|
147
|
+
if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
|
|
148
|
+
isMultiDate = true;
|
|
149
|
+
const transposed = {};
|
|
150
|
+
for (const [userId, dateMap] of Object.entries(rawResult)) {
|
|
151
|
+
for (const [dateKey, dailyData] of Object.entries(dateMap)) {
|
|
152
|
+
if (!transposed[dateKey]) transposed[dateKey] = {};
|
|
153
|
+
transposed[dateKey][userId] = dailyData;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
dataToCommit = transposed;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Create a mock instance for the committer that returns just this batch
|
|
161
|
+
transformedState[name] = {
|
|
162
|
+
manifest: inst.manifest,
|
|
163
|
+
getResult: async () => dataToCommit,
|
|
164
|
+
_executionStats: executionStats[name] // Attach current stats
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
// ⚠️ CRITICAL: CLEAR MEMORY
|
|
168
|
+
inst.results = {};
|
|
138
169
|
}
|
|
170
|
+
|
|
171
|
+
// 2. Commit Batch
|
|
172
|
+
const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
|
|
173
|
+
flushMode: mode, // 'INTERMEDIATE' or 'FINAL'
|
|
174
|
+
shardIndexes: shardIndexMap // Pass the tracking map
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// 3. Update Shard Indexes from result
|
|
178
|
+
if (result.shardIndexes) {
|
|
179
|
+
Object.assign(shardIndexMap, result.shardIndexes);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return result;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
static mergeReports(successAcc, failureAcc, newResult) {
|
|
186
|
+
if (!newResult) return;
|
|
139
187
|
|
|
140
|
-
|
|
188
|
+
// Merge Success Updates (Sums metrics)
|
|
189
|
+
for (const [name, update] of Object.entries(newResult.successUpdates)) {
|
|
190
|
+
if (!successAcc[name]) {
|
|
191
|
+
successAcc[name] = update;
|
|
192
|
+
} else {
|
|
193
|
+
// Sum storage metrics
|
|
194
|
+
if (update.metrics?.storage) {
|
|
195
|
+
successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
|
|
196
|
+
successAcc[name].metrics.storage.keys += (update.metrics.storage.keys || 0);
|
|
197
|
+
successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
|
|
198
|
+
}
|
|
199
|
+
// Keep the latest hash/composition info
|
|
200
|
+
successAcc[name].hash = update.hash;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Merge Failures
|
|
205
|
+
if (newResult.failureReport) {
|
|
206
|
+
failureAcc.push(...newResult.failureReport);
|
|
207
|
+
}
|
|
141
208
|
}
|
|
142
209
|
|
|
143
210
|
static async executePerUser(calcInstance, metadata, dateStr, portfolioData, yesterdayPortfolioData, historyData, computedDeps, prevDeps, config, deps, loader, stats) {
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FILENAME: computation-system/helpers/computation_dispatcher.js
|
|
3
3
|
* PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
|
|
4
|
-
* UPDATED:
|
|
5
|
-
* UPDATED: Added Preemptive Hash Check.
|
|
6
|
-
* UPDATED: Added Parallel Status Fetching.
|
|
7
|
-
* UPDATED: Include triggerReason in Pub/Sub payload.
|
|
4
|
+
* UPDATED: Fixed "undefined" reason crash for failed dependencies.
|
|
8
5
|
*/
|
|
9
6
|
|
|
10
7
|
const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
|
|
@@ -99,9 +96,19 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
99
96
|
}
|
|
100
97
|
});
|
|
101
98
|
|
|
102
|
-
// Mark Blocked
|
|
103
|
-
|
|
104
|
-
|
|
99
|
+
// Mark Blocked (Explicit Block)
|
|
100
|
+
report.blocked.forEach(item => {
|
|
101
|
+
statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// [FIX] Mark Failed Dependencies (Implicit Block) - Safely generate reason string
|
|
105
|
+
report.failedDependency.forEach(item => {
|
|
106
|
+
const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
|
|
107
|
+
statusUpdates[item.name] = {
|
|
108
|
+
hash: false,
|
|
109
|
+
category: 'unknown',
|
|
110
|
+
reason: `Dependency Missing: ${missingStr}`
|
|
111
|
+
};
|
|
105
112
|
});
|
|
106
113
|
|
|
107
114
|
if (Object.keys(statusUpdates).length > 0) {
|
|
@@ -118,7 +125,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
|
|
|
118
125
|
computation: normalizeName(item.name),
|
|
119
126
|
hash: item.hash || item.newHash,
|
|
120
127
|
previousCategory: item.previousCategory || null,
|
|
121
|
-
triggerReason: item.reason || "Unknown",
|
|
128
|
+
triggerReason: item.reason || "Unknown",
|
|
122
129
|
timestamp: Date.now()
|
|
123
130
|
});
|
|
124
131
|
});
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Handles saving computation results with observability and Smart Cleanup.
|
|
3
|
-
* UPDATED:
|
|
4
|
-
* UPDATED:
|
|
3
|
+
* UPDATED: Added "Strategy 4" (50 keys) to handle 'too many index entries' errors.
|
|
4
|
+
* UPDATED: Supports Incremental (Flush) Commits to prevent OOM.
|
|
5
|
+
* FIX: Throws proper Error objects.
|
|
5
6
|
*/
|
|
6
7
|
const { commitBatchInChunks } = require('./FirestoreUtils');
|
|
7
8
|
const { updateComputationStatus } = require('./StatusRepository');
|
|
@@ -12,10 +13,16 @@ const validationOverrides = require('../config/validation_overr
|
|
|
12
13
|
const pLimit = require('p-limit');
|
|
13
14
|
|
|
14
15
|
const NON_RETRYABLE_ERRORS = [
|
|
15
|
-
'
|
|
16
|
+
'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION'
|
|
17
|
+
// removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
|
|
16
18
|
];
|
|
17
19
|
|
|
18
|
-
|
|
20
|
+
/**
|
|
21
|
+
* Commits results to Firestore.
|
|
22
|
+
* @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
|
|
23
|
+
* @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
|
|
24
|
+
*/
|
|
25
|
+
async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
|
|
19
26
|
const successUpdates = {};
|
|
20
27
|
const failureReport = [];
|
|
21
28
|
const schemas = [];
|
|
@@ -23,33 +30,49 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
23
30
|
const { logger, db } = deps;
|
|
24
31
|
const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
|
|
25
32
|
|
|
33
|
+
// Options defaults
|
|
34
|
+
const flushMode = options.flushMode || 'STANDARD';
|
|
35
|
+
const shardIndexes = options.shardIndexes || {};
|
|
36
|
+
const nextShardIndexes = {};
|
|
37
|
+
|
|
26
38
|
const fanOutLimit = pLimit(10);
|
|
27
39
|
|
|
28
40
|
for (const name in stateObj) {
|
|
29
41
|
const calc = stateObj[name];
|
|
30
|
-
|
|
31
|
-
// [NEW] Check for execution stats attached by Executor
|
|
32
42
|
const execStats = calc._executionStats || { processedUsers: 0, skippedUsers: 0 };
|
|
43
|
+
const currentShardIndex = shardIndexes[name] || 0;
|
|
33
44
|
|
|
34
45
|
const runMetrics = {
|
|
35
46
|
storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
|
|
36
47
|
validation: { isValid: true, anomalies: [] },
|
|
37
|
-
execution: execStats
|
|
48
|
+
execution: execStats
|
|
38
49
|
};
|
|
39
50
|
|
|
40
51
|
try {
|
|
41
52
|
const result = await calc.getResult();
|
|
42
53
|
const overrides = validationOverrides[calc.manifest.name] || {};
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if (
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
54
|
+
|
|
55
|
+
// Only validate if we have data or if it's the final flush
|
|
56
|
+
if (result && Object.keys(result).length > 0) {
|
|
57
|
+
const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
|
|
58
|
+
if (!healthCheck.valid) {
|
|
59
|
+
runMetrics.validation.isValid = false;
|
|
60
|
+
runMetrics.validation.anomalies.push(healthCheck.reason);
|
|
61
|
+
const validationError = new Error(healthCheck.reason);
|
|
62
|
+
validationError.stage = 'QUALITY_CIRCUIT_BREAKER';
|
|
63
|
+
throw validationError;
|
|
64
|
+
}
|
|
49
65
|
}
|
|
50
66
|
|
|
51
|
-
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0)
|
|
67
|
+
const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
|
|
68
|
+
|
|
69
|
+
// If empty and standard mode, record 0-byte success.
|
|
70
|
+
// If empty and INTERMEDIATE flush, just skip this calc for this flush.
|
|
52
71
|
if (isEmpty) {
|
|
72
|
+
if (flushMode === 'INTERMEDIATE') {
|
|
73
|
+
nextShardIndexes[name] = currentShardIndex; // No change
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
53
76
|
if (calc.manifest.hash) {
|
|
54
77
|
successUpdates[name] = {
|
|
55
78
|
hash: calc.manifest.hash,
|
|
@@ -63,13 +86,11 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
63
86
|
|
|
64
87
|
if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
|
|
65
88
|
|
|
66
|
-
// ... (Fan-out logic remains same) ...
|
|
67
89
|
const resultKeys = Object.keys(result || {});
|
|
68
90
|
const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
|
|
69
91
|
|
|
70
92
|
if (isMultiDate) {
|
|
71
|
-
|
|
72
|
-
|
|
93
|
+
// Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
|
|
73
94
|
const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
|
|
74
95
|
const dailyData = result[historicalDate];
|
|
75
96
|
if (!dailyData || Object.keys(dailyData).length === 0) return;
|
|
@@ -81,9 +102,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
81
102
|
.collection(config.computationsSubcollection)
|
|
82
103
|
.doc(name);
|
|
83
104
|
|
|
84
|
-
|
|
105
|
+
// For historical Fan-Out, we assume standard flush mode (not incremental) for now
|
|
106
|
+
await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
|
|
85
107
|
}));
|
|
86
|
-
|
|
87
108
|
await Promise.all(datePromises);
|
|
88
109
|
|
|
89
110
|
if (calc.manifest.hash) {
|
|
@@ -96,7 +117,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
96
117
|
}
|
|
97
118
|
|
|
98
119
|
} else {
|
|
99
|
-
// --- STANDARD MODE ---
|
|
120
|
+
// --- STANDARD / INCREMENTAL MODE ---
|
|
100
121
|
const mainDocRef = db.collection(config.resultsCollection)
|
|
101
122
|
.doc(dStr)
|
|
102
123
|
.collection(config.resultsSubcollection)
|
|
@@ -104,11 +125,14 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
104
125
|
.collection(config.computationsSubcollection)
|
|
105
126
|
.doc(name);
|
|
106
127
|
|
|
107
|
-
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps);
|
|
128
|
+
const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode);
|
|
108
129
|
|
|
109
130
|
runMetrics.storage.sizeBytes = writeStats.totalSize;
|
|
110
131
|
runMetrics.storage.isSharded = writeStats.isSharded;
|
|
111
132
|
runMetrics.storage.shardCount = writeStats.shardCount;
|
|
133
|
+
|
|
134
|
+
// Track next index for subsequent flushes
|
|
135
|
+
nextShardIndexes[name] = writeStats.nextShardIndex;
|
|
112
136
|
|
|
113
137
|
if (calc.manifest.hash) {
|
|
114
138
|
successUpdates[name] = {
|
|
@@ -120,112 +144,182 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
120
144
|
}
|
|
121
145
|
}
|
|
122
146
|
|
|
123
|
-
if (calc.manifest.class.getSchema) {
|
|
147
|
+
if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
|
|
124
148
|
const { class: _cls, ...safeMetadata } = calc.manifest;
|
|
125
149
|
schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
|
|
126
150
|
}
|
|
127
151
|
|
|
128
|
-
if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category) {
|
|
152
|
+
if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {
|
|
129
153
|
cleanupTasks.push(deleteOldCalculationData(dStr, calc.manifest.previousCategory, name, config, deps));
|
|
130
154
|
}
|
|
131
155
|
|
|
132
156
|
} catch (e) {
|
|
133
157
|
const stage = e.stage || 'EXECUTION';
|
|
134
158
|
const msg = e.message || 'Unknown error';
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
failureReport.push({
|
|
139
|
-
name,
|
|
140
|
-
error: { message: msg, stack: e.stack, stage },
|
|
141
|
-
metrics: runMetrics
|
|
142
|
-
});
|
|
159
|
+
if (logger && logger.log) { logger.log('ERROR', `Commit failed for ${name} [${stage}]`, { processId: pid, error: e }); }
|
|
160
|
+
failureReport.push({ name, error: { message: msg, stack: e.stack, stage }, metrics: runMetrics });
|
|
143
161
|
}
|
|
144
162
|
}
|
|
145
163
|
|
|
146
164
|
if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => {});
|
|
147
165
|
if (cleanupTasks.length > 0) { await Promise.allSettled(cleanupTasks); }
|
|
148
|
-
if (!skipStatusWrite && Object.keys(successUpdates).length > 0
|
|
166
|
+
if (!skipStatusWrite && Object.keys(successUpdates).length > 0 && flushMode !== 'INTERMEDIATE') {
|
|
167
|
+
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
168
|
+
}
|
|
149
169
|
|
|
150
|
-
return { successUpdates, failureReport };
|
|
170
|
+
return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
|
|
151
171
|
}
|
|
152
172
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
173
|
+
async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
|
|
174
|
+
// Strategy 1: Standard (900KB, no key limit)
|
|
175
|
+
// Strategy 2: Aggressive Bytes (450KB, 10k keys)
|
|
176
|
+
// Strategy 3: Very Aggressive (200KB, 2k keys)
|
|
177
|
+
// Strategy 4: [NEW] Index Explosion Protection (100KB, 50 keys) - Handles "too many index entries"
|
|
178
|
+
const strategies = [
|
|
179
|
+
{ bytes: 900 * 1024, keys: null },
|
|
180
|
+
{ bytes: 450 * 1024, keys: 10000 },
|
|
181
|
+
{ bytes: 200 * 1024, keys: 2000 },
|
|
182
|
+
{ bytes: 100 * 1024, keys: 50 }
|
|
183
|
+
];
|
|
184
|
+
|
|
185
|
+
let committed = false; let lastError = null;
|
|
186
|
+
let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex };
|
|
157
187
|
|
|
158
188
|
for (let attempt = 0; attempt < strategies.length; attempt++) {
|
|
159
189
|
if (committed) break;
|
|
160
190
|
const constraints = strategies[attempt];
|
|
161
191
|
try {
|
|
162
|
-
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys);
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
192
|
+
const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
|
|
193
|
+
|
|
194
|
+
// Analyze the update batch
|
|
195
|
+
const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
|
|
196
|
+
|
|
197
|
+
// Calculate stats
|
|
166
198
|
finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
|
|
199
|
+
|
|
200
|
+
// Logic to determine next shard index
|
|
201
|
+
let maxIndex = startShardIndex;
|
|
202
|
+
updates.forEach(u => {
|
|
203
|
+
const segs = u.ref.path.split('/');
|
|
204
|
+
const last = segs[segs.length - 1];
|
|
205
|
+
if (last.startsWith('shard_')) {
|
|
206
|
+
const idx = parseInt(last.split('_')[1]);
|
|
207
|
+
if (!isNaN(idx) && idx > maxIndex) maxIndex = idx;
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
if (pointer && pointer.data._shardCount) {
|
|
212
|
+
finalStats.shardCount = pointer.data._shardCount;
|
|
213
|
+
finalStats.isSharded = true;
|
|
214
|
+
finalStats.nextShardIndex = finalStats.shardCount;
|
|
215
|
+
} else if (updates.length > 0) {
|
|
216
|
+
finalStats.nextShardIndex = maxIndex + 1;
|
|
217
|
+
finalStats.isSharded = true;
|
|
218
|
+
}
|
|
219
|
+
|
|
167
220
|
await commitBatchInChunks(config, deps, updates, `${name}::${dateContext} (Att ${attempt+1})`);
|
|
168
221
|
if (logger && logger.logStorage) { logger.logStorage(null, name, dateContext, docRef.path, finalStats.totalSize, finalStats.isSharded); }
|
|
169
222
|
committed = true;
|
|
170
223
|
} catch (commitErr) {
|
|
171
224
|
lastError = commitErr;
|
|
172
225
|
const msg = commitErr.message || '';
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
226
|
+
const code = commitErr.code || '';
|
|
227
|
+
|
|
228
|
+
// Check for explicit "too many index entries" or transaction size issues
|
|
229
|
+
const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
|
|
230
|
+
const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
|
|
231
|
+
|
|
232
|
+
if (NON_RETRYABLE_ERRORS.includes(code)) {
|
|
233
|
+
logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
|
|
234
|
+
throw commitErr;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (isIndexError || isSizeError) {
|
|
238
|
+
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
|
|
239
|
+
continue;
|
|
240
|
+
}
|
|
241
|
+
else {
|
|
242
|
+
logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
|
|
243
|
+
// We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
176
246
|
}
|
|
177
247
|
}
|
|
178
|
-
if (!committed) {
|
|
248
|
+
if (!committed) {
|
|
249
|
+
const shardingError = new Error(`Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`);
|
|
250
|
+
shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
|
|
251
|
+
if (lastError && lastError.stack) { shardingError.stack = lastError.stack; }
|
|
252
|
+
throw shardingError;
|
|
253
|
+
}
|
|
179
254
|
return finalStats;
|
|
180
255
|
}
|
|
181
256
|
|
|
182
|
-
async function
|
|
183
|
-
const { db, logger, calculationUtils } = deps;
|
|
184
|
-
const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
|
|
185
|
-
try {
|
|
186
|
-
const oldDocRef = db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(oldCategory).collection(config.computationsSubcollection).doc(calcName);
|
|
187
|
-
const shardsCol = oldDocRef.collection('_shards');
|
|
188
|
-
const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
|
|
189
|
-
const batch = db.batch(); let ops = 0;
|
|
190
|
-
for (const shardDoc of shardsSnap) { batch.delete(shardDoc); ops++; }
|
|
191
|
-
batch.delete(oldDocRef); ops++;
|
|
192
|
-
await withRetry(() => batch.commit(), 'CleanupOldCategory');
|
|
193
|
-
logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
|
|
194
|
-
} catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function calculateFirestoreBytes(value) {
|
|
198
|
-
if (value === null) return 1; if (value === undefined) return 0; if (typeof value === 'boolean') return 1; if (typeof value === 'number') return 8; if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1; if (value instanceof Date) return 8; if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
|
|
199
|
-
if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
|
|
200
|
-
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null) {
|
|
257
|
+
async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null, startShardIndex = 0, flushMode = 'STANDARD') {
|
|
204
258
|
const OVERHEAD_ALLOWANCE = 20 * 1024; const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
|
|
205
259
|
const totalSize = calculateFirestoreBytes(result); const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
|
|
206
260
|
const writes = []; const shardCollection = docRef.collection('_shards');
|
|
207
|
-
let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
|
|
261
|
+
let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
|
|
262
|
+
let shardIndex = startShardIndex;
|
|
208
263
|
|
|
209
|
-
|
|
264
|
+
// Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
|
|
265
|
+
if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
|
|
210
266
|
const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
|
|
211
267
|
return [{ ref: docRef, data, options: { merge: true } }];
|
|
212
268
|
}
|
|
213
269
|
|
|
270
|
+
// Sharding Logic
|
|
214
271
|
for (const [key, value] of Object.entries(result)) {
|
|
215
272
|
if (key.startsWith('_')) continue;
|
|
216
273
|
const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
|
|
217
274
|
const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
|
|
275
|
+
|
|
218
276
|
if (byteLimitReached || keyLimitReached) {
|
|
219
|
-
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
|
|
277
|
+
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite shard doc
|
|
220
278
|
shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
|
|
221
279
|
}
|
|
222
280
|
currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
|
|
223
281
|
}
|
|
224
|
-
if (Object.keys(currentChunk).length > 0) { writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); }
|
|
225
282
|
|
|
226
|
-
|
|
227
|
-
|
|
283
|
+
// Push remaining chunk
|
|
284
|
+
if (Object.keys(currentChunk).length > 0) {
|
|
285
|
+
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
|
|
286
|
+
shardIndex++; // Increment so count is correct (0-based index means count is index+1)
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Pointer Logic
|
|
290
|
+
if (flushMode !== 'INTERMEDIATE') {
|
|
291
|
+
const pointerData = {
|
|
292
|
+
_completed: true,
|
|
293
|
+
_sharded: true,
|
|
294
|
+
_shardCount: shardIndex,
|
|
295
|
+
_lastUpdated: new Date().toISOString()
|
|
296
|
+
};
|
|
297
|
+
writes.push({ ref: docRef, data: pointerData, options: { merge: true } }); // Merge pointer
|
|
298
|
+
}
|
|
299
|
+
|
|
228
300
|
return writes;
|
|
229
301
|
}
|
|
230
302
|
|
|
303
|
+
// ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
|
|
304
|
+
async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
|
|
305
|
+
const { db, logger, calculationUtils } = deps;
|
|
306
|
+
const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
|
|
307
|
+
try {
|
|
308
|
+
const oldDocRef = db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(oldCategory).collection(config.computationsSubcollection).doc(calcName);
|
|
309
|
+
const shardsCol = oldDocRef.collection('_shards');
|
|
310
|
+
const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
|
|
311
|
+
const batch = db.batch(); let ops = 0;
|
|
312
|
+
for (const shardDoc of shardsSnap) { batch.delete(shardDoc); ops++; }
|
|
313
|
+
batch.delete(oldDocRef); ops++;
|
|
314
|
+
await withRetry(() => batch.commit(), 'CleanupOldCategory');
|
|
315
|
+
logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
|
|
316
|
+
} catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function calculateFirestoreBytes(value) {
|
|
320
|
+
if (value === null) return 1; if (value === undefined) return 0; if (typeof value === 'boolean') return 1; if (typeof value === 'number') return 8; if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1; if (value instanceof Date) return 8; if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
|
|
321
|
+
if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
|
|
322
|
+
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
|
|
323
|
+
}
|
|
324
|
+
|
|
231
325
|
module.exports = { commitResults };
|
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview HeuristicValidator.js
|
|
3
3
|
* "Grey Box" validation that infers health using statistical analysis and structural sanity checks.
|
|
4
|
-
* UPDATED: Added
|
|
4
|
+
* UPDATED: Added "Weekend Mode" to allow higher zero/null tolerance on Saturdays/Sundays.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
class HeuristicValidator {
|
|
8
8
|
/**
|
|
9
9
|
* @param {string} calcName - Name for logging
|
|
10
10
|
* @param {Object} data - The result data to inspect
|
|
11
|
+
* @param {string} dateStr - The computation date (YYYY-MM-DD)
|
|
11
12
|
* @param {Object} [overrides] - Optional central config overrides
|
|
12
13
|
*/
|
|
13
|
-
static analyze(calcName, data, overrides = {}) {
|
|
14
|
+
static analyze(calcName, data, dateStr, overrides = {}) {
|
|
14
15
|
// 1. Structure Check
|
|
15
|
-
if (!data || typeof data !== 'object') return { valid: true };
|
|
16
|
+
if (!data || typeof data !== 'object') return { valid: true };
|
|
16
17
|
|
|
17
18
|
const keys = Object.keys(data);
|
|
18
19
|
const totalItems = keys.length;
|
|
19
20
|
|
|
20
|
-
// Skip tiny datasets (statistically insignificant)
|
|
21
21
|
if (totalItems < 5) return { valid: true };
|
|
22
22
|
|
|
23
23
|
// 2. Sampling Configuration
|
|
@@ -26,17 +26,16 @@ class HeuristicValidator {
|
|
|
26
26
|
|
|
27
27
|
let zeroCount = 0;
|
|
28
28
|
let nullCount = 0;
|
|
29
|
-
let nanCount = 0;
|
|
30
|
-
let emptyVectorCount = 0;
|
|
29
|
+
let nanCount = 0;
|
|
30
|
+
let emptyVectorCount = 0;
|
|
31
31
|
let analyzedCount = 0;
|
|
32
32
|
|
|
33
|
-
// For Variance/Flatline Check
|
|
34
33
|
const numericValues = [];
|
|
35
34
|
|
|
36
35
|
for (let i = 0; i < totalItems; i += step) {
|
|
37
36
|
const key = keys[i];
|
|
38
37
|
const val = data[key];
|
|
39
|
-
if (!val) {
|
|
38
|
+
if (!val) {
|
|
40
39
|
nullCount++;
|
|
41
40
|
analyzedCount++;
|
|
42
41
|
continue;
|
|
@@ -44,71 +43,79 @@ class HeuristicValidator {
|
|
|
44
43
|
analyzedCount++;
|
|
45
44
|
|
|
46
45
|
// --- TYPE A: Object / Complex Result ---
|
|
47
|
-
// Example: { "profile": [...], "current_price": 100 } or { "signal": "Buy", "score": 0.5 }
|
|
48
46
|
if (typeof val === 'object') {
|
|
49
47
|
const subValues = Object.values(val);
|
|
50
48
|
|
|
51
|
-
// Dead Object Check: All props are null/0/undefined
|
|
52
49
|
const isDeadObject = subValues.every(v => v === 0 || v === null || v === undefined);
|
|
53
50
|
if (isDeadObject) nullCount++;
|
|
54
51
|
|
|
55
|
-
// NaN Check in Properties
|
|
56
52
|
const hasNan = subValues.some(v => typeof v === 'number' && (isNaN(v) || !isFinite(v)));
|
|
57
53
|
if (hasNan) nanCount++;
|
|
58
54
|
|
|
59
|
-
// Vector/Profile Empty Check (Specific to your System)
|
|
60
|
-
// If result contains 'profile', 'history', 'sparkline', or 'buckets' arrays
|
|
61
55
|
const arrayProps = ['profile', 'history', 'sparkline', 'buckets', 'prices'];
|
|
62
56
|
for (const prop of arrayProps) { if (Array.isArray(val[prop]) && val[prop].length === 0) { emptyVectorCount++; } }
|
|
63
57
|
|
|
64
|
-
// Extract primary numeric score for Flatline check (heuristically guessing the 'main' metric)
|
|
65
58
|
const numericProp = subValues.find(v => typeof v === 'number' && v !== 0);
|
|
66
59
|
if (numericProp !== undefined) numericValues.push(numericProp);
|
|
67
60
|
}
|
|
68
61
|
// --- TYPE B: Scalar / Primitive Result ---
|
|
69
62
|
if (typeof val === 'number') {
|
|
70
|
-
if (isNaN(val) || !isFinite(val))
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
numericValues.push(val); // Include zeros
|
|
63
|
+
if (isNaN(val) || !isFinite(val)) { nanCount++; }
|
|
64
|
+
else {
|
|
65
|
+
numericValues.push(val);
|
|
74
66
|
if (val === 0) zeroCount++;
|
|
75
67
|
}
|
|
76
68
|
}
|
|
77
69
|
}
|
|
78
70
|
|
|
79
|
-
// 3.
|
|
80
|
-
|
|
71
|
+
// 3. Weekend Detection & Threshold Resolution
|
|
72
|
+
let isWeekend = false;
|
|
73
|
+
if (dateStr) {
|
|
74
|
+
try {
|
|
75
|
+
// Force UTC interpretation to align with system dates
|
|
76
|
+
const safeDate = dateStr.includes('T') ? dateStr : `${dateStr}T00:00:00Z`;
|
|
77
|
+
const day = new Date(safeDate).getUTCDay();
|
|
78
|
+
// 0 = Sunday, 6 = Saturday
|
|
79
|
+
isWeekend = (day === 0 || day === 6);
|
|
80
|
+
} catch (e) { /* Fallback to standard validation if date is invalid */ }
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Default Thresholds
|
|
84
|
+
let thresholds = {
|
|
81
85
|
maxZeroPct: overrides.maxZeroPct ?? 99,
|
|
82
86
|
maxNullPct: overrides.maxNullPct ?? 90,
|
|
83
|
-
maxNanPct: overrides.maxNanPct ?? 0,
|
|
84
|
-
maxFlatlinePct:
|
|
87
|
+
maxNanPct: overrides.maxNanPct ?? 0,
|
|
88
|
+
maxFlatlinePct: overrides.maxFlatlinePct ?? 95
|
|
85
89
|
};
|
|
86
90
|
|
|
91
|
+
// Apply Weekend Overrides if applicable
|
|
92
|
+
if (isWeekend && overrides.weekend) {
|
|
93
|
+
thresholds = { ...thresholds, ...overrides.weekend };
|
|
94
|
+
}
|
|
95
|
+
|
|
87
96
|
// 4. Calculate Stats
|
|
88
97
|
const zeroPct = (zeroCount / analyzedCount) * 100;
|
|
89
98
|
const nullPct = (nullCount / analyzedCount) * 100;
|
|
90
99
|
const nanPct = (nanCount / analyzedCount) * 100;
|
|
91
100
|
|
|
92
101
|
// 5. Variance / Flatline Analysis
|
|
93
|
-
// If we found numeric values, check if they are all the same
|
|
94
102
|
let isFlatline = false;
|
|
95
103
|
if (numericValues.length > 5) {
|
|
96
104
|
const first = numericValues[0];
|
|
97
105
|
const identicalCount = numericValues.filter(v => Math.abs(v - first) < 0.000001).length;
|
|
98
106
|
const flatlinePct = (identicalCount / numericValues.length) * 100;
|
|
99
107
|
|
|
100
|
-
// Only flag flatline if the value isn't 0 (0 is handled by maxZeroPct)
|
|
101
108
|
if (flatlinePct > thresholds.maxFlatlinePct && Math.abs(first) > 0.0001) { isFlatline = true; }
|
|
102
109
|
}
|
|
103
110
|
|
|
104
111
|
// 6. Evaluations
|
|
105
|
-
|
|
106
|
-
if (
|
|
107
|
-
if (
|
|
112
|
+
// Note: We include the applied thresholds in the error message for clarity
|
|
113
|
+
if (nanPct > thresholds.maxNanPct) { return { valid: false, reason: `Mathematical Error: ${nanPct.toFixed(1)}% of sampled results contain NaN (Limit: ${thresholds.maxNanPct}%).` }; }
|
|
114
|
+
if (zeroPct > thresholds.maxZeroPct) { return { valid: false, reason: `Data Integrity: ${zeroPct.toFixed(1)}% of results are 0 (Limit: ${thresholds.maxZeroPct}%${isWeekend ? ' [Weekend Mode]' : ''}).` }; }
|
|
115
|
+
if (nullPct > thresholds.maxNullPct) { return { valid: false, reason: `Data Integrity: ${nullPct.toFixed(1)}% of results are Empty/Null (Limit: ${thresholds.maxNullPct}%${isWeekend ? ' [Weekend Mode]' : ''}).` }; }
|
|
108
116
|
|
|
109
117
|
if (isFlatline) { return { valid: false, reason: `Anomaly: Detected Result Flatline. >${thresholds.maxFlatlinePct}% of outputs are identical (non-zero).` }; }
|
|
110
118
|
|
|
111
|
-
// Special check for Distribution/Profile calculations
|
|
112
119
|
if (calcName.includes('profile') || calcName.includes('distribution')) {
|
|
113
120
|
const vectorEmptyPct = (emptyVectorCount / analyzedCount) * 100;
|
|
114
121
|
if (vectorEmptyPct > 90) {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @fileoverview Build Reporter & Auto-Runner.
|
|
3
3
|
* Generates a "Pre-Flight" report of what the computation system WILL do.
|
|
4
4
|
* REFACTORED: Strict 5-category reporting with date-based exclusion logic.
|
|
5
|
-
* UPDATED: Added
|
|
5
|
+
* UPDATED: Added transactional locking to prevent duplicate reports on concurrent cold starts.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
const { analyzeDateExecution } = require('../WorkflowOrchestrator');
|
|
@@ -29,40 +29,56 @@ function isDateBeforeAvailability(dateStr, calcManifest) {
|
|
|
29
29
|
for (const dep of deps) {
|
|
30
30
|
// Map dependency name to start date
|
|
31
31
|
let startDate = null;
|
|
32
|
-
if (dep === 'portfolio')
|
|
32
|
+
if (dep === 'portfolio') startDate = DEFINITIVE_EARLIEST_DATES.portfolio;
|
|
33
33
|
else if (dep === 'history') startDate = DEFINITIVE_EARLIEST_DATES.history;
|
|
34
34
|
else if (dep === 'social') startDate = DEFINITIVE_EARLIEST_DATES.social;
|
|
35
35
|
else if (dep === 'insights') startDate = DEFINITIVE_EARLIEST_DATES.insights;
|
|
36
36
|
else if (dep === 'price') startDate = DEFINITIVE_EARLIEST_DATES.price;
|
|
37
37
|
|
|
38
38
|
// If we have a start date and the target is BEFORE it, exclude this calc.
|
|
39
|
-
if (startDate && targetDate < startDate) {
|
|
40
|
-
return true;
|
|
41
|
-
}
|
|
39
|
+
if (startDate && targetDate < startDate) { return true; }
|
|
42
40
|
}
|
|
43
41
|
return false;
|
|
44
42
|
}
|
|
45
43
|
|
|
46
44
|
/**
|
|
47
45
|
* AUTO-RUN ENTRY POINT
|
|
46
|
+
* UPDATED: Uses transactional locking to prevent race conditions.
|
|
47
|
+
* If we deploy multiple computation pass nodes simultaneously, only one should run the report.
|
|
48
48
|
*/
|
|
49
49
|
async function ensureBuildReport(config, dependencies, manifest) {
|
|
50
50
|
const { db, logger } = dependencies;
|
|
51
51
|
const now = new Date();
|
|
52
52
|
const buildId = `v${packageVersion}_${now.getFullYear()}-${String(now.getMonth()+1).padStart(2,'0')}-${String(now.getDate()).padStart(2,'0')}_${String(now.getHours()).padStart(2,'0')}-${String(now.getMinutes()).padStart(2,'0')}-${String(now.getSeconds()).padStart(2,'0')}`;
|
|
53
|
-
|
|
53
|
+
|
|
54
|
+
// Lock document specific to this version
|
|
55
|
+
const lockRef = db.collection('computation_build_records').doc(`init_lock_v${packageVersion}`);
|
|
54
56
|
|
|
55
57
|
try {
|
|
56
|
-
|
|
57
|
-
const
|
|
58
|
+
// Transaction: "Hey I am deploying" check
|
|
59
|
+
const shouldRun = await db.runTransaction(async (t) => {
|
|
60
|
+
const doc = await t.get(lockRef);
|
|
61
|
+
|
|
62
|
+
if (doc.exists) { return false; } // Someone else beat us to it
|
|
63
|
+
|
|
64
|
+
// Claim the lock
|
|
65
|
+
t.set(lockRef, {
|
|
66
|
+
status: 'IN_PROGRESS',
|
|
67
|
+
startedAt: new Date(),
|
|
68
|
+
workerId: process.env.K_REVISION || 'unknown',
|
|
69
|
+
buildId: buildId
|
|
70
|
+
});
|
|
71
|
+
return true;
|
|
72
|
+
});
|
|
58
73
|
|
|
59
|
-
if (
|
|
60
|
-
logger.log('INFO', `[BuildReporter] ✅ Version ${packageVersion} already has a report. Skipping.`);
|
|
61
|
-
return;
|
|
62
|
-
}
|
|
74
|
+
if (!shouldRun) { logger.log('INFO', `[BuildReporter] 🔒 Report for v${packageVersion} is already being generated (Locked). Skipping.`); return; }
|
|
63
75
|
|
|
64
|
-
logger.log('INFO', `[BuildReporter] 🚀
|
|
76
|
+
logger.log('INFO', `[BuildReporter] 🚀 Lock Acquired. Running Pre-flight Report for v${packageVersion}...`);
|
|
77
|
+
|
|
65
78
|
await generateBuildReport(config, dependencies, manifest, 90, buildId);
|
|
79
|
+
|
|
80
|
+
// Optional: Update lock to completed (fire-and-forget update)
|
|
81
|
+
lockRef.update({ status: 'COMPLETED', completedAt: new Date() }).catch(() => {});
|
|
66
82
|
|
|
67
83
|
} catch (e) {
|
|
68
84
|
logger.log('ERROR', `[BuildReporter] Auto-run check failed: ${e.message}`);
|
|
@@ -117,11 +133,11 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
117
133
|
}
|
|
118
134
|
}
|
|
119
135
|
|
|
120
|
-
const results
|
|
121
|
-
const dailyStatus
|
|
122
|
-
const availability
|
|
136
|
+
const results = await Promise.all(fetchPromises);
|
|
137
|
+
const dailyStatus = results[0];
|
|
138
|
+
const availability = results[1];
|
|
123
139
|
const prevDailyStatus = (prevDateStr && results[2]) ? results[2] : (prevDateStr ? {} : null);
|
|
124
|
-
const rootDataStatus
|
|
140
|
+
const rootDataStatus = availability ? availability.status : { hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false };
|
|
125
141
|
|
|
126
142
|
const analysis = analyzeDateExecution(dateStr, manifest, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
|
|
127
143
|
|
|
@@ -129,11 +145,11 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
129
145
|
// STRICT 5-CATEGORY MAPPING
|
|
130
146
|
// ---------------------------------------------------------
|
|
131
147
|
const dateSummary = {
|
|
132
|
-
run:
|
|
133
|
-
rerun:
|
|
134
|
-
blocked:
|
|
148
|
+
run: [], // New / No Hash / "Runnable"
|
|
149
|
+
rerun: [], // Hash Mismatch / Category Migration
|
|
150
|
+
blocked: [], // Missing Data (Today) / Dependency Missing
|
|
135
151
|
impossible: [], // Missing Data (Historical) / Impossible Dependency
|
|
136
|
-
uptodate:
|
|
152
|
+
uptodate: [], // Hash Match (Previously "Skipped")
|
|
137
153
|
|
|
138
154
|
// [NEW] Metadata for Verification
|
|
139
155
|
meta: {
|
|
@@ -181,7 +197,7 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
|
|
|
181
197
|
dateSummary.uptodate.length;
|
|
182
198
|
|
|
183
199
|
dateSummary.meta.totalIncluded = includedCount;
|
|
184
|
-
dateSummary.meta.match
|
|
200
|
+
dateSummary.meta.match = (includedCount === expectedCount);
|
|
185
201
|
|
|
186
202
|
if (!dateSummary.meta.match) {
|
|
187
203
|
logger.log('WARN', `[BuildReporter] ⚠️ Mismatch on ${dateStr}: Expected ${expectedCount} but got ${includedCount}.`);
|