bulltrackers-module 1.0.270 → 1.0.272

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  * @fileoverview Main Orchestrator. Coordinates the topological execution.
3
3
  * UPDATED: Removed 'Permanently Impossible' optimization to ensure full visibility/recovery.
4
4
  * UPDATED: Includes 'Audit Upgrade' check.
5
+ * UPDATED: Detailed Dependency Reporting for Impossible Chains.
5
6
  */
6
7
  const { normalizeName, DEFINITIVE_EARLIEST_DATES } = require('./utils/utils');
7
8
  const { checkRootDataAvailability, checkRootDependencies } = require('./data/AvailabilityChecker');
@@ -76,6 +77,7 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
76
77
 
77
78
  // 2. Check Dependencies
78
79
  let dependencyIsImpossible = false;
80
+ let impossibleDepCause = null;
79
81
  const missingDeps = [];
80
82
  if (calc.dependencies) {
81
83
  for (const dep of calc.dependencies) {
@@ -83,6 +85,7 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
83
85
  const depStored = simulationStatus[normDep];
84
86
  if (depStored && typeof depStored.hash === 'string' && depStored.hash.startsWith(STATUS_IMPOSSIBLE_PREFIX)) {
85
87
  dependencyIsImpossible = true;
88
+ impossibleDepCause = dep; // Capture the culprit
86
89
  break;
87
90
  }
88
91
  if (!isDepSatisfied(dep, simulationStatus, manifestMap)) { missingDeps.push(dep); }
@@ -90,7 +93,8 @@ function analyzeDateExecution(dateStr, calcsInPass, rootDataStatus, dailyStatus,
90
93
  }
91
94
 
92
95
  if (dependencyIsImpossible) {
93
- markImpossible('Dependency is Impossible', 'UPSTREAM');
96
+ // [UPDATED] Include the name of the failing dependency in the reason string
97
+ markImpossible(`Dependency is Impossible (${impossibleDepCause})`, 'UPSTREAM');
94
98
  continue;
95
99
  }
96
100
  if (missingDeps.length > 0) { report.failedDependency.push({ name: cName, missing: missingDeps }); continue; }
@@ -3,4 +3,7 @@ module.exports = {
3
3
  // EXAMPLES :
4
4
  // "bankruptcy-detector": { maxZeroPct: 100 }, // It's rare, so 100% 0s is fine
5
5
  // "sparse-signal-generator": { maxNullPct: 99 }
6
+
7
+ "instrument-price-change-1d": { maxZeroPct: 100 }, // Because weekeends/holidays return 0 change, technically crypto means this can't hit 100% but it's usually quite close, so we override
8
+ "instrument-price-momentum-20d ": { maxZeroPct: 100 }, // Some assets can be very stagnant over a month, especially bonds or stablecoins
6
9
  };
@@ -2,6 +2,7 @@
2
2
  * @fileoverview Executor for "Meta" (global) calculations.
3
3
  * UPDATED: Uses CachedDataLoader for all data access.
4
4
  * UPDATED: Tracks processed shard/item counts.
5
+ * UPDATED: Removed global.gc() calls.
5
6
  */
6
7
  const { normalizeName } = require('../utils/utils');
7
8
  const { CachedDataLoader } = require('../data/CachedDataLoader');
@@ -59,7 +60,7 @@ class MetaExecutor {
59
60
  stats.processedShards++;
60
61
  stats.processedItems += Object.keys(shardData).length;
61
62
 
62
- if (processedCount % 10 === 0 && global.gc) { global.gc(); }
63
+ // Removed global.gc()
63
64
  }
64
65
  logger.log('INFO', `[Executor] Finished Batched Execution for ${metadata.name} (${processedCount} shards).`);
65
66
 
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * @fileoverview Executor for "Standard" (per-user) calculations.
3
- * UPDATED: Handles lazy loading of data references.
4
- * UPDATED: Supports Multi-Date Fan-Out Aggregation (Time Machine Mode).
5
- * UPDATED: Tracks processed vs skipped users for telemetry.
3
+ * UPDATED: Implements Batch Flushing to prevent OOM on large datasets.
4
+ * UPDATED: Removes manual global.gc() calls.
5
+ * UPDATED: Manages incremental sharding states.
6
6
  */
7
7
  const { normalizeName } = require('../utils/utils');
8
8
  const { streamPortfolioData, streamHistoryData, getPortfolioPartRefs } = require('../utils/data_loader');
@@ -19,9 +19,8 @@ class StandardExecutor {
19
19
  // 1. Prepare Yesterdays Data if needed
20
20
  const fullRoot = { ...rootData };
21
21
  if (calcs.some(c => c.isHistorical)) {
22
- const prev = new Date(date); prev.setUTCDate(prev.getUTCDate() - 1);
23
- const prevStr = prev.toISOString().slice(0, 10);
24
- // Explicitly fetch yesterday's refs as they aren't provided by the daily indexer
22
+ const prev = new Date(date); prev.setUTCDate(prev.getUTCDate() - 1);
23
+ const prevStr = prev.toISOString().slice(0, 10);
25
24
  fullRoot.yesterdayPortfolioRefs = await getPortfolioPartRefs(config, deps, prevStr);
26
25
  }
27
26
 
@@ -31,6 +30,8 @@ class StandardExecutor {
31
30
  try {
32
31
  const inst = new c.class();
33
32
  inst.manifest = c;
33
+ // Ensure internal storage exists for flushing
34
+ inst.results = {};
34
35
  state[normalizeName(c.name)] = inst;
35
36
  logger.log('INFO', `${c.name} calculation running for ${dStr}`);
36
37
  } catch (e) {
@@ -38,61 +39,29 @@ class StandardExecutor {
38
39
  }
39
40
  }
40
41
 
41
- // 3. Stream & Process
42
- await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps);
43
-
44
- // 4. Pre-Commit Transformation for Fan-Out
45
- const transformedState = {};
46
- for (const [name, inst] of Object.entries(state)) {
47
- const result = await inst.getResult(); // { userId: { date: data } } or { userId: data }
48
- const firstUser = Object.keys(result)[0];
49
-
50
- // Check if the inner value is a Date Map
51
- // Only checks the first user as heuristic; implies uniform return type
52
- if (firstUser && result[firstUser] && typeof result[firstUser] === 'object') {
53
- const innerKeys = Object.keys(result[firstUser]);
54
- // Check if keys look like YYYY-MM-DD
55
- const isDateMap = innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
56
-
57
- if (isDateMap) {
58
- const transposed = {};
59
- for (const [userId, dateMap] of Object.entries(result)) {
60
- for (const [dateKey, dailyData] of Object.entries(dateMap)) {
61
- if (!transposed[dateKey]) transposed[dateKey] = {};
62
- transposed[dateKey][userId] = dailyData;
63
- }
64
- }
65
-
66
- // Mock a "getResult" for the committer that returns the Transposed Map
67
- transformedState[name] = {
68
- manifest: inst.manifest,
69
- getResult: async () => transposed,
70
- _executionStats: inst._executionStats // Preserve stats
71
- };
72
- continue;
73
- }
74
- }
75
- // Normal behavior
76
- transformedState[name] = inst;
77
- }
78
-
79
- // 5. Commit
80
- return await commitResults(transformedState, dStr, passName, config, deps, skipStatusWrite);
42
+ // 3. Stream, Process & Batch Flush
43
+ // The return value contains the aggregated success/failure reports from all flushes
44
+ return await StandardExecutor.streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite);
81
45
  }
82
46
 
83
- static async streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps) {
47
+ static async streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps, skipStatusWrite) {
84
48
  const { logger } = deps;
85
49
  const calcs = Object.values(state).filter(c => c && c.manifest);
86
50
  const streamingCalcs = calcs.filter(c => c.manifest.rootDataDependencies.includes('portfolio') || c.manifest.rootDataDependencies.includes('history'));
87
51
 
88
- if (streamingCalcs.length === 0) return;
52
+ if (streamingCalcs.length === 0) return { successUpdates: {}, failureReport: [] };
89
53
 
90
54
  logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
91
55
 
92
- // [NEW] Execution Metrics Container
56
+ // Metrics & State Tracking
93
57
  const executionStats = {};
58
+ const shardIndexMap = {}; // Tracks sharding offsets per calculation
59
+ const aggregatedSuccess = {};
60
+ const aggregatedFailures = [];
61
+
94
62
  Object.keys(state).forEach(name => {
95
63
  executionStats[name] = { processedUsers: 0, skippedUsers: 0 };
64
+ shardIndexMap[name] = 0;
96
65
  });
97
66
 
98
67
  const cachedLoader = new CachedDataLoader(config, deps);
@@ -102,20 +71,24 @@ class StandardExecutor {
102
71
  const prevDateStr = prevDate.toISOString().slice(0, 10);
103
72
 
104
73
  const tP_iter = streamPortfolioData(config, deps, dateStr, portfolioRefs);
105
-
106
74
  const needsYesterdayPortfolio = streamingCalcs.some(c => c.manifest.isHistorical);
107
75
  const yP_iter = (needsYesterdayPortfolio && rootData.yesterdayPortfolioRefs) ? streamPortfolioData(config, deps, prevDateStr, rootData.yesterdayPortfolioRefs) : null;
108
-
109
76
  const needsTradingHistory = streamingCalcs.some(c => c.manifest.rootDataDependencies.includes('history'));
110
77
  const tH_iter = (needsTradingHistory) ? streamHistoryData(config, deps, dateStr, historyRefs) : null;
111
78
 
112
79
  let yP_chunk = {}, tH_chunk = {};
113
80
 
81
+ // OOM Protection: Batch Flushing Configuration
82
+ const BATCH_SIZE = 5000; // Flush every 5000 users
83
+ let usersSinceLastFlush = 0;
84
+
114
85
  try {
115
86
  for await (const tP_chunk of tP_iter) {
116
87
  if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
117
88
  if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
118
89
 
90
+ const chunkSize = Object.keys(tP_chunk).length;
91
+
119
92
  // Execute chunk for all calcs
120
93
  const promises = streamingCalcs.map(calc =>
121
94
  StandardExecutor.executePerUser(
@@ -125,19 +98,113 @@ class StandardExecutor {
125
98
  )
126
99
  );
127
100
  await Promise.all(promises);
101
+
102
+ usersSinceLastFlush += chunkSize;
103
+
104
+ // --- BATCH FLUSH CHECK ---
105
+ if (usersSinceLastFlush >= BATCH_SIZE) {
106
+ logger.log('INFO', `[${passName}] 🛁 Flushing buffer after ${usersSinceLastFlush} users...`);
107
+
108
+ const flushResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'INTERMEDIATE', true);
109
+
110
+ // Aggregate metrics
111
+ StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, flushResult);
112
+
113
+ usersSinceLastFlush = 0;
114
+ }
128
115
  }
129
116
  } finally {
130
- // Close manual iterators to release resources
131
117
  if (yP_iter && yP_iter.return) await yP_iter.return();
132
118
  if (tH_iter && tH_iter.return) await tH_iter.return();
133
119
  }
134
120
 
135
- // Attach stats to the instances so ResultCommitter can find them
136
- for(const name in state) {
137
- if(state[name]) state[name]._executionStats = executionStats[name];
121
+ // --- FINAL FLUSH ---
122
+ logger.log('INFO', `[${passName}] Streaming complete. Performing final commit.`);
123
+ const finalResult = await StandardExecutor.flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, 'FINAL', skipStatusWrite);
124
+
125
+ StandardExecutor.mergeReports(aggregatedSuccess, aggregatedFailures, finalResult);
126
+
127
+ return { successUpdates: aggregatedSuccess, failureReport: aggregatedFailures };
128
+ }
129
+
130
+ static async flushBuffer(state, dateStr, passName, config, deps, shardIndexMap, executionStats, mode, skipStatusWrite) {
131
+ const transformedState = {};
132
+ const { logger } = deps;
133
+
134
+ // 1. Prepare and Clear Instances
135
+ for (const [name, inst] of Object.entries(state)) {
136
+ // Get data from the standard storage location
137
+ const rawResult = inst.results || {};
138
+
139
+ // Handle Multi-Date Fan-Out (Transposition)
140
+ // Logic: Checks if result is { userId: { date: data } }
141
+ const firstUser = Object.keys(rawResult)[0];
142
+ let dataToCommit = rawResult;
143
+ let isMultiDate = false;
144
+
145
+ if (firstUser && rawResult[firstUser] && typeof rawResult[firstUser] === 'object') {
146
+ const innerKeys = Object.keys(rawResult[firstUser]);
147
+ if (innerKeys.length > 0 && innerKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k))) {
148
+ isMultiDate = true;
149
+ const transposed = {};
150
+ for (const [userId, dateMap] of Object.entries(rawResult)) {
151
+ for (const [dateKey, dailyData] of Object.entries(dateMap)) {
152
+ if (!transposed[dateKey]) transposed[dateKey] = {};
153
+ transposed[dateKey][userId] = dailyData;
154
+ }
155
+ }
156
+ dataToCommit = transposed;
157
+ }
158
+ }
159
+
160
+ // Create a mock instance for the committer that returns just this batch
161
+ transformedState[name] = {
162
+ manifest: inst.manifest,
163
+ getResult: async () => dataToCommit,
164
+ _executionStats: executionStats[name] // Attach current stats
165
+ };
166
+
167
+ // ⚠️ CRITICAL: CLEAR MEMORY
168
+ inst.results = {};
138
169
  }
170
+
171
+ // 2. Commit Batch
172
+ const result = await commitResults(transformedState, dateStr, passName, config, deps, skipStatusWrite, {
173
+ flushMode: mode, // 'INTERMEDIATE' or 'FINAL'
174
+ shardIndexes: shardIndexMap // Pass the tracking map
175
+ });
176
+
177
+ // 3. Update Shard Indexes from result
178
+ if (result.shardIndexes) {
179
+ Object.assign(shardIndexMap, result.shardIndexes);
180
+ }
181
+
182
+ return result;
183
+ }
184
+
185
+ static mergeReports(successAcc, failureAcc, newResult) {
186
+ if (!newResult) return;
139
187
 
140
- logger.log('INFO', `[${passName}] Streaming complete.`);
188
+ // Merge Success Updates (Sums metrics)
189
+ for (const [name, update] of Object.entries(newResult.successUpdates)) {
190
+ if (!successAcc[name]) {
191
+ successAcc[name] = update;
192
+ } else {
193
+ // Sum storage metrics
194
+ if (update.metrics?.storage) {
195
+ successAcc[name].metrics.storage.sizeBytes += (update.metrics.storage.sizeBytes || 0);
196
+ successAcc[name].metrics.storage.keys += (update.metrics.storage.keys || 0);
197
+ successAcc[name].metrics.storage.shardCount = Math.max(successAcc[name].metrics.storage.shardCount, update.metrics.storage.shardCount || 1);
198
+ }
199
+ // Keep the latest hash/composition info
200
+ successAcc[name].hash = update.hash;
201
+ }
202
+ }
203
+
204
+ // Merge Failures
205
+ if (newResult.failureReport) {
206
+ failureAcc.push(...newResult.failureReport);
207
+ }
141
208
  }
142
209
 
143
210
  static async executePerUser(calcInstance, metadata, dateStr, portfolioData, yesterdayPortfolioData, historyData, computedDeps, prevDeps, config, deps, loader, stats) {
@@ -1,10 +1,7 @@
1
1
  /**
2
2
  * FILENAME: computation-system/helpers/computation_dispatcher.js
3
3
  * PURPOSE: "Smart Dispatcher" - Analyzes state and only dispatches valid, runnable tasks.
4
- * UPDATED: Implements Audit Ledger creation with Transactions to prevent Race Conditions.
5
- * UPDATED: Added Preemptive Hash Check.
6
- * UPDATED: Added Parallel Status Fetching.
7
- * UPDATED: Include triggerReason in Pub/Sub payload.
4
+ * UPDATED: Fixed "undefined" reason crash for failed dependencies.
8
5
  */
9
6
 
10
7
  const { getExpectedDateStrings, normalizeName, DEFINITIVE_EARLIEST_DATES } = require('../utils/utils.js');
@@ -99,9 +96,19 @@ async function dispatchComputationPass(config, dependencies, computationManifest
99
96
  }
100
97
  });
101
98
 
102
- // Mark Blocked/Failed Deps (Temporary Failure)
103
- [...report.blocked, ...report.failedDependency].forEach(item => {
104
- statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
99
+ // Mark Blocked (Explicit Block)
100
+ report.blocked.forEach(item => {
101
+ statusUpdates[item.name] = { hash: false, category: 'unknown', reason: item.reason };
102
+ });
103
+
104
+ // [FIX] Mark Failed Dependencies (Implicit Block) - Safely generate reason string
105
+ report.failedDependency.forEach(item => {
106
+ const missingStr = item.missing ? item.missing.join(', ') : 'unknown';
107
+ statusUpdates[item.name] = {
108
+ hash: false,
109
+ category: 'unknown',
110
+ reason: `Dependency Missing: ${missingStr}`
111
+ };
105
112
  });
106
113
 
107
114
  if (Object.keys(statusUpdates).length > 0) {
@@ -118,7 +125,7 @@ async function dispatchComputationPass(config, dependencies, computationManifest
118
125
  computation: normalizeName(item.name),
119
126
  hash: item.hash || item.newHash,
120
127
  previousCategory: item.previousCategory || null,
121
- triggerReason: item.reason || "Unknown", // <--- THE KEY ADDITION
128
+ triggerReason: item.reason || "Unknown",
122
129
  timestamp: Date.now()
123
130
  });
124
131
  });
@@ -1,7 +1,8 @@
1
1
  /**
2
2
  * @fileoverview Handles saving computation results with observability and Smart Cleanup.
3
- * UPDATED: Stores Hash Composition in status for audit trail.
4
- * UPDATED: Captures execution metrics.
3
+ * UPDATED: Added "Strategy 4" (50 keys) to handle 'too many index entries' errors.
4
+ * UPDATED: Supports Incremental (Flush) Commits to prevent OOM.
5
+ * FIX: Throws proper Error objects.
5
6
  */
6
7
  const { commitBatchInChunks } = require('./FirestoreUtils');
7
8
  const { updateComputationStatus } = require('./StatusRepository');
@@ -12,10 +13,16 @@ const validationOverrides = require('../config/validation_overr
12
13
  const pLimit = require('p-limit');
13
14
 
14
15
  const NON_RETRYABLE_ERRORS = [
15
- 'INVALID_ARGUMENT', 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION'
16
+ 'PERMISSION_DENIED', 'DATA_LOSS', 'FAILED_PRECONDITION'
17
+ // removed INVALID_ARGUMENT from here as it covers 'too many index entries' which IS retryable via sharding
16
18
  ];
17
19
 
18
- async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false) {
20
+ /**
21
+ * Commits results to Firestore.
22
+ * @param {Object} options.flushMode - 'STANDARD', 'INTERMEDIATE', 'FINAL'
23
+ * @param {Object} options.shardIndexes - Map of { calcName: currentShardIndex }
24
+ */
25
+ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false, options = {}) {
19
26
  const successUpdates = {};
20
27
  const failureReport = [];
21
28
  const schemas = [];
@@ -23,33 +30,49 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
23
30
  const { logger, db } = deps;
24
31
  const pid = generateProcessId(PROCESS_TYPES.STORAGE, passName, dStr);
25
32
 
33
+ // Options defaults
34
+ const flushMode = options.flushMode || 'STANDARD';
35
+ const shardIndexes = options.shardIndexes || {};
36
+ const nextShardIndexes = {};
37
+
26
38
  const fanOutLimit = pLimit(10);
27
39
 
28
40
  for (const name in stateObj) {
29
41
  const calc = stateObj[name];
30
-
31
- // [NEW] Check for execution stats attached by Executor
32
42
  const execStats = calc._executionStats || { processedUsers: 0, skippedUsers: 0 };
43
+ const currentShardIndex = shardIndexes[name] || 0;
33
44
 
34
45
  const runMetrics = {
35
46
  storage: { sizeBytes: 0, isSharded: false, shardCount: 1, keys: 0 },
36
47
  validation: { isValid: true, anomalies: [] },
37
- execution: execStats // <--- Pass this to RunRecorder
48
+ execution: execStats
38
49
  };
39
50
 
40
51
  try {
41
52
  const result = await calc.getResult();
42
53
  const overrides = validationOverrides[calc.manifest.name] || {};
43
- const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
44
-
45
- if (!healthCheck.valid) {
46
- runMetrics.validation.isValid = false;
47
- runMetrics.validation.anomalies.push(healthCheck.reason);
48
- throw { message: healthCheck.reason, stage: 'QUALITY_CIRCUIT_BREAKER' };
54
+
55
+ // Only validate if we have data or if it's the final flush
56
+ if (result && Object.keys(result).length > 0) {
57
+ const healthCheck = HeuristicValidator.analyze(calc.manifest.name, result, overrides);
58
+ if (!healthCheck.valid) {
59
+ runMetrics.validation.isValid = false;
60
+ runMetrics.validation.anomalies.push(healthCheck.reason);
61
+ const validationError = new Error(healthCheck.reason);
62
+ validationError.stage = 'QUALITY_CIRCUIT_BREAKER';
63
+ throw validationError;
64
+ }
49
65
  }
50
66
 
51
- const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0) || (typeof result === 'number' && result === 0);
67
+ const isEmpty = !result || (typeof result === 'object' && Object.keys(result).length === 0);
68
+
69
+ // If empty and standard mode, record 0-byte success.
70
+ // If empty and INTERMEDIATE flush, just skip this calc for this flush.
52
71
  if (isEmpty) {
72
+ if (flushMode === 'INTERMEDIATE') {
73
+ nextShardIndexes[name] = currentShardIndex; // No change
74
+ continue;
75
+ }
53
76
  if (calc.manifest.hash) {
54
77
  successUpdates[name] = {
55
78
  hash: calc.manifest.hash,
@@ -63,13 +86,11 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
63
86
 
64
87
  if (typeof result === 'object') runMetrics.storage.keys = Object.keys(result).length;
65
88
 
66
- // ... (Fan-out logic remains same) ...
67
89
  const resultKeys = Object.keys(result || {});
68
90
  const isMultiDate = resultKeys.length > 0 && resultKeys.every(k => /^\d{4}-\d{2}-\d{2}$/.test(k));
69
91
 
70
92
  if (isMultiDate) {
71
- logger.log('INFO', `[ResultCommitter] 🕰️ Multi-Date Output detected for ${name} (${resultKeys.length} days). Throttled Fan-Out...`);
72
-
93
+ // Multi-Date Fan-Out Logic (Not optimized for incremental yet, falls back to standard per-date write)
73
94
  const datePromises = resultKeys.map((historicalDate) => fanOutLimit(async () => {
74
95
  const dailyData = result[historicalDate];
75
96
  if (!dailyData || Object.keys(dailyData).length === 0) return;
@@ -81,9 +102,9 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
81
102
  .collection(config.computationsSubcollection)
82
103
  .doc(name);
83
104
 
84
- await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps);
105
+ // For historical Fan-Out, we assume standard flush mode (not incremental) for now
106
+ await writeSingleResult(dailyData, historicalDocRef, name, historicalDate, logger, config, deps, 0, 'STANDARD');
85
107
  }));
86
-
87
108
  await Promise.all(datePromises);
88
109
 
89
110
  if (calc.manifest.hash) {
@@ -96,7 +117,7 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
96
117
  }
97
118
 
98
119
  } else {
99
- // --- STANDARD MODE ---
120
+ // --- STANDARD / INCREMENTAL MODE ---
100
121
  const mainDocRef = db.collection(config.resultsCollection)
101
122
  .doc(dStr)
102
123
  .collection(config.resultsSubcollection)
@@ -104,11 +125,14 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
104
125
  .collection(config.computationsSubcollection)
105
126
  .doc(name);
106
127
 
107
- const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps);
128
+ const writeStats = await writeSingleResult(result, mainDocRef, name, dStr, logger, config, deps, currentShardIndex, flushMode);
108
129
 
109
130
  runMetrics.storage.sizeBytes = writeStats.totalSize;
110
131
  runMetrics.storage.isSharded = writeStats.isSharded;
111
132
  runMetrics.storage.shardCount = writeStats.shardCount;
133
+
134
+ // Track next index for subsequent flushes
135
+ nextShardIndexes[name] = writeStats.nextShardIndex;
112
136
 
113
137
  if (calc.manifest.hash) {
114
138
  successUpdates[name] = {
@@ -120,112 +144,182 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
120
144
  }
121
145
  }
122
146
 
123
- if (calc.manifest.class.getSchema) {
147
+ if (calc.manifest.class.getSchema && flushMode !== 'INTERMEDIATE') {
124
148
  const { class: _cls, ...safeMetadata } = calc.manifest;
125
149
  schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
126
150
  }
127
151
 
128
- if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category) {
152
+ if (calc.manifest.previousCategory && calc.manifest.previousCategory !== calc.manifest.category && flushMode !== 'INTERMEDIATE') {
129
153
  cleanupTasks.push(deleteOldCalculationData(dStr, calc.manifest.previousCategory, name, config, deps));
130
154
  }
131
155
 
132
156
  } catch (e) {
133
157
  const stage = e.stage || 'EXECUTION';
134
158
  const msg = e.message || 'Unknown error';
135
-
136
- if (logger && logger.log) { logger.log('ERROR', `Commit failed for ${name} [${stage}]`, { processId: pid, error: msg }); }
137
-
138
- failureReport.push({
139
- name,
140
- error: { message: msg, stack: e.stack, stage },
141
- metrics: runMetrics
142
- });
159
+ if (logger && logger.log) { logger.log('ERROR', `Commit failed for ${name} [${stage}]`, { processId: pid, error: e }); }
160
+ failureReport.push({ name, error: { message: msg, stack: e.stack, stage }, metrics: runMetrics });
143
161
  }
144
162
  }
145
163
 
146
164
  if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => {});
147
165
  if (cleanupTasks.length > 0) { await Promise.allSettled(cleanupTasks); }
148
- if (!skipStatusWrite && Object.keys(successUpdates).length > 0) { await updateComputationStatus(dStr, successUpdates, config, deps); }
166
+ if (!skipStatusWrite && Object.keys(successUpdates).length > 0 && flushMode !== 'INTERMEDIATE') {
167
+ await updateComputationStatus(dStr, successUpdates, config, deps);
168
+ }
149
169
 
150
- return { successUpdates, failureReport };
170
+ return { successUpdates, failureReport, shardIndexes: nextShardIndexes };
151
171
  }
152
172
 
153
- // ... (Helper functions remain unchanged from context) ...
154
- async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps) {
155
- const strategies = [ { bytes: 900 * 1024, keys: null }, { bytes: 450 * 1024, keys: 10000 }, { bytes: 200 * 1024, keys: 2000 } ];
156
- let committed = false; let lastError = null; let finalStats = { totalSize: 0, isSharded: false, shardCount: 1 };
173
+ async function writeSingleResult(result, docRef, name, dateContext, logger, config, deps, startShardIndex = 0, flushMode = 'STANDARD') {
174
+ // Strategy 1: Standard (900KB, no key limit)
175
+ // Strategy 2: Aggressive Bytes (450KB, 10k keys)
176
+ // Strategy 3: Very Aggressive (200KB, 2k keys)
177
+ // Strategy 4: [NEW] Index Explosion Protection (100KB, 50 keys) - Handles "too many index entries"
178
+ const strategies = [
179
+ { bytes: 900 * 1024, keys: null },
180
+ { bytes: 450 * 1024, keys: 10000 },
181
+ { bytes: 200 * 1024, keys: 2000 },
182
+ { bytes: 100 * 1024, keys: 50 }
183
+ ];
184
+
185
+ let committed = false; let lastError = null;
186
+ let finalStats = { totalSize: 0, isSharded: false, shardCount: 1, nextShardIndex: startShardIndex };
157
187
 
158
188
  for (let attempt = 0; attempt < strategies.length; attempt++) {
159
189
  if (committed) break;
160
190
  const constraints = strategies[attempt];
161
191
  try {
162
- const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys);
163
- const pointer = updates.find(u => u.data._completed === true);
164
- finalStats.isSharded = pointer && pointer.data._sharded === true;
165
- finalStats.shardCount = finalStats.isSharded ? (pointer.data._shardCount || 1) : 1;
192
+ const updates = await prepareAutoShardedWrites(result, docRef, logger, constraints.bytes, constraints.keys, startShardIndex, flushMode);
193
+
194
+ // Analyze the update batch
195
+ const pointer = updates.find(u => u.data._completed !== undefined || u.data._sharded !== undefined); // Pointer is on the main doc
196
+
197
+ // Calculate stats
166
198
  finalStats.totalSize = updates.reduce((acc, u) => acc + (u.data ? JSON.stringify(u.data).length : 0), 0);
199
+
200
+ // Logic to determine next shard index
201
+ let maxIndex = startShardIndex;
202
+ updates.forEach(u => {
203
+ const segs = u.ref.path.split('/');
204
+ const last = segs[segs.length - 1];
205
+ if (last.startsWith('shard_')) {
206
+ const idx = parseInt(last.split('_')[1]);
207
+ if (!isNaN(idx) && idx > maxIndex) maxIndex = idx;
208
+ }
209
+ });
210
+
211
+ if (pointer && pointer.data._shardCount) {
212
+ finalStats.shardCount = pointer.data._shardCount;
213
+ finalStats.isSharded = true;
214
+ finalStats.nextShardIndex = finalStats.shardCount;
215
+ } else if (updates.length > 0) {
216
+ finalStats.nextShardIndex = maxIndex + 1;
217
+ finalStats.isSharded = true;
218
+ }
219
+
167
220
  await commitBatchInChunks(config, deps, updates, `${name}::${dateContext} (Att ${attempt+1})`);
168
221
  if (logger && logger.logStorage) { logger.logStorage(null, name, dateContext, docRef.path, finalStats.totalSize, finalStats.isSharded); }
169
222
  committed = true;
170
223
  } catch (commitErr) {
171
224
  lastError = commitErr;
172
225
  const msg = commitErr.message || '';
173
- if (NON_RETRYABLE_ERRORS.includes(commitErr.code)) { logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`); throw commitErr; }
174
- if (msg.includes('Transaction too big') || msg.includes('payload is too large') || msg.includes('too many index entries')) { logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}. Retrying...`, { error: msg }); continue; }
175
- else { logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg }); }
226
+ const code = commitErr.code || '';
227
+
228
+ // Check for explicit "too many index entries" or transaction size issues
229
+ const isIndexError = msg.includes('too many index entries') || msg.includes('INVALID_ARGUMENT');
230
+ const isSizeError = msg.includes('Transaction too big') || msg.includes('payload is too large');
231
+
232
+ if (NON_RETRYABLE_ERRORS.includes(code)) {
233
+ logger.log('ERROR', `[SelfHealing] ${name} FATAL error: ${msg}.`);
234
+ throw commitErr;
235
+ }
236
+
237
+ if (isIndexError || isSizeError) {
238
+ logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} failed attempt ${attempt+1}/${strategies.length}. Strategy: ${JSON.stringify(constraints)}. Error: ${msg}. Retrying with stricter limits...`);
239
+ continue;
240
+ }
241
+ else {
242
+ logger.log('WARN', `[SelfHealing] ${name} on ${dateContext} unknown error. Retrying...`, { error: msg });
243
+ // We typically retry same strategy for unknown transient errors, but here we iterate to be safe.
244
+ continue;
245
+ }
176
246
  }
177
247
  }
178
- if (!committed) { throw { message: `Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`, stack: lastError?.stack, stage: 'SHARDING_LIMIT_EXCEEDED' }; }
248
+ if (!committed) {
249
+ const shardingError = new Error(`Exhausted sharding strategies for ${name}. Last error: ${lastError?.message}`);
250
+ shardingError.stage = 'SHARDING_LIMIT_EXCEEDED';
251
+ if (lastError && lastError.stack) { shardingError.stack = lastError.stack; }
252
+ throw shardingError;
253
+ }
179
254
  return finalStats;
180
255
  }
181
256
 
182
- async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
183
- const { db, logger, calculationUtils } = deps;
184
- const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
185
- try {
186
- const oldDocRef = db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(oldCategory).collection(config.computationsSubcollection).doc(calcName);
187
- const shardsCol = oldDocRef.collection('_shards');
188
- const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
189
- const batch = db.batch(); let ops = 0;
190
- for (const shardDoc of shardsSnap) { batch.delete(shardDoc); ops++; }
191
- batch.delete(oldDocRef); ops++;
192
- await withRetry(() => batch.commit(), 'CleanupOldCategory');
193
- logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
194
- } catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
195
- }
196
-
197
- function calculateFirestoreBytes(value) {
198
- if (value === null) return 1; if (value === undefined) return 0; if (typeof value === 'boolean') return 1; if (typeof value === 'number') return 8; if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1; if (value instanceof Date) return 8; if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
199
- if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
200
- if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
201
- }
202
-
203
- async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null) {
257
+ async function prepareAutoShardedWrites(result, docRef, logger, maxBytes = 900 * 1024, maxKeys = null, startShardIndex = 0, flushMode = 'STANDARD') {
204
258
  const OVERHEAD_ALLOWANCE = 20 * 1024; const CHUNK_LIMIT = maxBytes - OVERHEAD_ALLOWANCE;
205
259
  const totalSize = calculateFirestoreBytes(result); const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16;
206
260
  const writes = []; const shardCollection = docRef.collection('_shards');
207
- let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0; let shardIndex = 0;
261
+ let currentChunk = {}; let currentChunkSize = 0; let currentKeyCount = 0;
262
+ let shardIndex = startShardIndex;
208
263
 
209
- if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT) {
264
+ // Small Data Optimization (Only valid if we are not in an incremental flow or if it's the first standard run)
265
+ if (!maxKeys && (totalSize + docPathSize) < CHUNK_LIMIT && flushMode === 'STANDARD' && startShardIndex === 0) {
210
266
  const data = { ...result, _completed: true, _sharded: false, _lastUpdated: new Date().toISOString() };
211
267
  return [{ ref: docRef, data, options: { merge: true } }];
212
268
  }
213
269
 
270
+ // Sharding Logic
214
271
  for (const [key, value] of Object.entries(result)) {
215
272
  if (key.startsWith('_')) continue;
216
273
  const keySize = Buffer.byteLength(key, 'utf8') + 1; const valueSize = calculateFirestoreBytes(value); const itemSize = keySize + valueSize;
217
274
  const byteLimitReached = (currentChunkSize + itemSize > CHUNK_LIMIT); const keyLimitReached = (maxKeys && currentKeyCount + 1 >= maxKeys);
275
+
218
276
  if (byteLimitReached || keyLimitReached) {
219
- writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
277
+ writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite shard doc
220
278
  shardIndex++; currentChunk = {}; currentChunkSize = 0; currentKeyCount = 0;
221
279
  }
222
280
  currentChunk[key] = value; currentChunkSize += itemSize; currentKeyCount++;
223
281
  }
224
- if (Object.keys(currentChunk).length > 0) { writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); }
225
282
 
226
- const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() };
227
- writes.push({ ref: docRef, data: pointerData, options: { merge: false } });
283
+ // Push remaining chunk
284
+ if (Object.keys(currentChunk).length > 0) {
285
+ writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } });
286
+ shardIndex++; // Increment so count is correct (0-based index means count is index+1)
287
+ }
288
+
289
+ // Pointer Logic
290
+ if (flushMode !== 'INTERMEDIATE') {
291
+ const pointerData = {
292
+ _completed: true,
293
+ _sharded: true,
294
+ _shardCount: shardIndex,
295
+ _lastUpdated: new Date().toISOString()
296
+ };
297
+ writes.push({ ref: docRef, data: pointerData, options: { merge: true } }); // Merge pointer
298
+ }
299
+
228
300
  return writes;
229
301
  }
230
302
 
303
+ // ... (Rest of file: deleteOldCalculationData, calculateFirestoreBytes remains unchanged) ...
304
+ async function deleteOldCalculationData(dateStr, oldCategory, calcName, config, deps) {
305
+ const { db, logger, calculationUtils } = deps;
306
+ const { withRetry } = calculationUtils || { withRetry: (fn) => fn() };
307
+ try {
308
+ const oldDocRef = db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(oldCategory).collection(config.computationsSubcollection).doc(calcName);
309
+ const shardsCol = oldDocRef.collection('_shards');
310
+ const shardsSnap = await withRetry(() => shardsCol.listDocuments(), 'ListOldShards');
311
+ const batch = db.batch(); let ops = 0;
312
+ for (const shardDoc of shardsSnap) { batch.delete(shardDoc); ops++; }
313
+ batch.delete(oldDocRef); ops++;
314
+ await withRetry(() => batch.commit(), 'CleanupOldCategory');
315
+ logger.log('INFO', `[Migration] Cleaned up ${ops} docs for ${calcName} in '${oldCategory}'`);
316
+ } catch (e) { logger.log('WARN', `[Migration] Failed to clean up ${calcName}: ${e.message}`); }
317
+ }
318
+
319
+ function calculateFirestoreBytes(value) {
320
+ if (value === null) return 1; if (value === undefined) return 0; if (typeof value === 'boolean') return 1; if (typeof value === 'number') return 8; if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1; if (value instanceof Date) return 8; if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
321
+ if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
322
+ if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; } return 0;
323
+ }
324
+
231
325
  module.exports = { commitResults };
@@ -1,23 +1,23 @@
1
1
  /**
2
2
  * @fileoverview HeuristicValidator.js
3
3
  * "Grey Box" validation that infers health using statistical analysis and structural sanity checks.
4
- * UPDATED: Added NaN detection, Flatline (Variance) checks, and Vector/Array depth checks.
4
+ * UPDATED: Added "Weekend Mode" to allow higher zero/null tolerance on Saturdays/Sundays.
5
5
  */
6
6
 
7
7
  class HeuristicValidator {
8
8
  /**
9
9
  * @param {string} calcName - Name for logging
10
10
  * @param {Object} data - The result data to inspect
11
+ * @param {string} dateStr - The computation date (YYYY-MM-DD)
11
12
  * @param {Object} [overrides] - Optional central config overrides
12
13
  */
13
- static analyze(calcName, data, overrides = {}) {
14
+ static analyze(calcName, data, dateStr, overrides = {}) {
14
15
  // 1. Structure Check
15
- if (!data || typeof data !== 'object') return { valid: true }; // Let scalar types pass
16
+ if (!data || typeof data !== 'object') return { valid: true };
16
17
 
17
18
  const keys = Object.keys(data);
18
19
  const totalItems = keys.length;
19
20
 
20
- // Skip tiny datasets (statistically insignificant)
21
21
  if (totalItems < 5) return { valid: true };
22
22
 
23
23
  // 2. Sampling Configuration
@@ -26,17 +26,16 @@ class HeuristicValidator {
26
26
 
27
27
  let zeroCount = 0;
28
28
  let nullCount = 0;
29
- let nanCount = 0; // NEW: Track NaNs
30
- let emptyVectorCount = 0; // NEW: Track empty arrays in complex objects
29
+ let nanCount = 0;
30
+ let emptyVectorCount = 0;
31
31
  let analyzedCount = 0;
32
32
 
33
- // For Variance/Flatline Check
34
33
  const numericValues = [];
35
34
 
36
35
  for (let i = 0; i < totalItems; i += step) {
37
36
  const key = keys[i];
38
37
  const val = data[key];
39
- if (!val) { // Catch null/undefined immediately
38
+ if (!val) {
40
39
  nullCount++;
41
40
  analyzedCount++;
42
41
  continue;
@@ -44,71 +43,79 @@ class HeuristicValidator {
44
43
  analyzedCount++;
45
44
 
46
45
  // --- TYPE A: Object / Complex Result ---
47
- // Example: { "profile": [...], "current_price": 100 } or { "signal": "Buy", "score": 0.5 }
48
46
  if (typeof val === 'object') {
49
47
  const subValues = Object.values(val);
50
48
 
51
- // Dead Object Check: All props are null/0/undefined
52
49
  const isDeadObject = subValues.every(v => v === 0 || v === null || v === undefined);
53
50
  if (isDeadObject) nullCount++;
54
51
 
55
- // NaN Check in Properties
56
52
  const hasNan = subValues.some(v => typeof v === 'number' && (isNaN(v) || !isFinite(v)));
57
53
  if (hasNan) nanCount++;
58
54
 
59
- // Vector/Profile Empty Check (Specific to your System)
60
- // If result contains 'profile', 'history', 'sparkline', or 'buckets' arrays
61
55
  const arrayProps = ['profile', 'history', 'sparkline', 'buckets', 'prices'];
62
56
  for (const prop of arrayProps) { if (Array.isArray(val[prop]) && val[prop].length === 0) { emptyVectorCount++; } }
63
57
 
64
- // Extract primary numeric score for Flatline check (heuristically guessing the 'main' metric)
65
58
  const numericProp = subValues.find(v => typeof v === 'number' && v !== 0);
66
59
  if (numericProp !== undefined) numericValues.push(numericProp);
67
60
  }
68
61
  // --- TYPE B: Scalar / Primitive Result ---
69
62
  if (typeof val === 'number') {
70
- if (isNaN(val) || !isFinite(val))
71
- { nanCount++;
72
- } else {
73
- numericValues.push(val); // Include zeros
63
+ if (isNaN(val) || !isFinite(val)) { nanCount++; }
64
+ else {
65
+ numericValues.push(val);
74
66
  if (val === 0) zeroCount++;
75
67
  }
76
68
  }
77
69
  }
78
70
 
79
- // 3. Thresholds
80
- const thresholds = {
71
+ // 3. Weekend Detection & Threshold Resolution
72
+ let isWeekend = false;
73
+ if (dateStr) {
74
+ try {
75
+ // Force UTC interpretation to align with system dates
76
+ const safeDate = dateStr.includes('T') ? dateStr : `${dateStr}T00:00:00Z`;
77
+ const day = new Date(safeDate).getUTCDay();
78
+ // 0 = Sunday, 6 = Saturday
79
+ isWeekend = (day === 0 || day === 6);
80
+ } catch (e) { /* Fallback to standard validation if date is invalid */ }
81
+ }
82
+
83
+ // Default Thresholds
84
+ let thresholds = {
81
85
  maxZeroPct: overrides.maxZeroPct ?? 99,
82
86
  maxNullPct: overrides.maxNullPct ?? 90,
83
- maxNanPct: overrides.maxNanPct ?? 0, // Strict: NaNs are usually bad bugs
84
- maxFlatlinePct: 95 // If >95% of data is identical, it's suspicious
87
+ maxNanPct: overrides.maxNanPct ?? 0,
88
+ maxFlatlinePct: overrides.maxFlatlinePct ?? 95
85
89
  };
86
90
 
91
+ // Apply Weekend Overrides if applicable
92
+ if (isWeekend && overrides.weekend) {
93
+ thresholds = { ...thresholds, ...overrides.weekend };
94
+ }
95
+
87
96
  // 4. Calculate Stats
88
97
  const zeroPct = (zeroCount / analyzedCount) * 100;
89
98
  const nullPct = (nullCount / analyzedCount) * 100;
90
99
  const nanPct = (nanCount / analyzedCount) * 100;
91
100
 
92
101
  // 5. Variance / Flatline Analysis
93
- // If we found numeric values, check if they are all the same
94
102
  let isFlatline = false;
95
103
  if (numericValues.length > 5) {
96
104
  const first = numericValues[0];
97
105
  const identicalCount = numericValues.filter(v => Math.abs(v - first) < 0.000001).length;
98
106
  const flatlinePct = (identicalCount / numericValues.length) * 100;
99
107
 
100
- // Only flag flatline if the value isn't 0 (0 is handled by maxZeroPct)
101
108
  if (flatlinePct > thresholds.maxFlatlinePct && Math.abs(first) > 0.0001) { isFlatline = true; }
102
109
  }
103
110
 
104
111
  // 6. Evaluations
105
- if (nanPct > thresholds.maxNanPct) { return { valid: false, reason: `Mathematical Error: ${nanPct.toFixed(1)}% of sampled results contain NaN or Infinity.` }; }
106
- if (zeroPct > thresholds.maxZeroPct) { return { valid: false, reason: `Data Integrity: ${zeroPct.toFixed(1)}% of sampled results are 0. (Suspected Logic Failure)` }; }
107
- if (nullPct > thresholds.maxNullPct) { return { valid: false, reason: `Data Integrity: ${nullPct.toFixed(1)}% of sampled results are Empty/Null.` }; }
112
+ // Note: We include the applied thresholds in the error message for clarity
113
+ if (nanPct > thresholds.maxNanPct) { return { valid: false, reason: `Mathematical Error: ${nanPct.toFixed(1)}% of sampled results contain NaN (Limit: ${thresholds.maxNanPct}%).` }; }
114
+ if (zeroPct > thresholds.maxZeroPct) { return { valid: false, reason: `Data Integrity: ${zeroPct.toFixed(1)}% of results are 0 (Limit: ${thresholds.maxZeroPct}%${isWeekend ? ' [Weekend Mode]' : ''}).` }; }
115
+ if (nullPct > thresholds.maxNullPct) { return { valid: false, reason: `Data Integrity: ${nullPct.toFixed(1)}% of results are Empty/Null (Limit: ${thresholds.maxNullPct}%${isWeekend ? ' [Weekend Mode]' : ''}).` }; }
108
116
 
109
117
  if (isFlatline) { return { valid: false, reason: `Anomaly: Detected Result Flatline. >${thresholds.maxFlatlinePct}% of outputs are identical (non-zero).` }; }
110
118
 
111
- // Special check for Distribution/Profile calculations
112
119
  if (calcName.includes('profile') || calcName.includes('distribution')) {
113
120
  const vectorEmptyPct = (emptyVectorCount / analyzedCount) * 100;
114
121
  if (vectorEmptyPct > 90) {
@@ -2,7 +2,7 @@
2
2
  * @fileoverview Build Reporter & Auto-Runner.
3
3
  * Generates a "Pre-Flight" report of what the computation system WILL do.
4
4
  * REFACTORED: Strict 5-category reporting with date-based exclusion logic.
5
- * UPDATED: Added meta stats to compare Included vs Expected computation counts per date.
5
+ * UPDATED: Added transactional locking to prevent duplicate reports on concurrent cold starts.
6
6
  */
7
7
 
8
8
  const { analyzeDateExecution } = require('../WorkflowOrchestrator');
@@ -29,40 +29,56 @@ function isDateBeforeAvailability(dateStr, calcManifest) {
29
29
  for (const dep of deps) {
30
30
  // Map dependency name to start date
31
31
  let startDate = null;
32
- if (dep === 'portfolio') startDate = DEFINITIVE_EARLIEST_DATES.portfolio;
32
+ if (dep === 'portfolio') startDate = DEFINITIVE_EARLIEST_DATES.portfolio;
33
33
  else if (dep === 'history') startDate = DEFINITIVE_EARLIEST_DATES.history;
34
34
  else if (dep === 'social') startDate = DEFINITIVE_EARLIEST_DATES.social;
35
35
  else if (dep === 'insights') startDate = DEFINITIVE_EARLIEST_DATES.insights;
36
36
  else if (dep === 'price') startDate = DEFINITIVE_EARLIEST_DATES.price;
37
37
 
38
38
  // If we have a start date and the target is BEFORE it, exclude this calc.
39
- if (startDate && targetDate < startDate) {
40
- return true;
41
- }
39
+ if (startDate && targetDate < startDate) { return true; }
42
40
  }
43
41
  return false;
44
42
  }
45
43
 
46
44
  /**
47
45
  * AUTO-RUN ENTRY POINT
46
+ * UPDATED: Uses transactional locking to prevent race conditions.
47
+ * If we deploy multiple computation pass nodes simultaneously, only one should run the report.
48
48
  */
49
49
  async function ensureBuildReport(config, dependencies, manifest) {
50
50
  const { db, logger } = dependencies;
51
51
  const now = new Date();
52
52
  const buildId = `v${packageVersion}_${now.getFullYear()}-${String(now.getMonth()+1).padStart(2,'0')}-${String(now.getDate()).padStart(2,'0')}_${String(now.getHours()).padStart(2,'0')}-${String(now.getMinutes()).padStart(2,'0')}-${String(now.getSeconds()).padStart(2,'0')}`;
53
- const latestRef = db.collection('computation_build_records').doc('latest');
53
+
54
+ // Lock document specific to this version
55
+ const lockRef = db.collection('computation_build_records').doc(`init_lock_v${packageVersion}`);
54
56
 
55
57
  try {
56
- const latestDoc = await latestRef.get();
57
- const priorVersion = latestDoc.exists ? latestDoc.data().packageVersion : null;
58
+ // Transaction: "Hey I am deploying" check
59
+ const shouldRun = await db.runTransaction(async (t) => {
60
+ const doc = await t.get(lockRef);
61
+
62
+ if (doc.exists) { return false; } // Someone else beat us to it
63
+
64
+ // Claim the lock
65
+ t.set(lockRef, {
66
+ status: 'IN_PROGRESS',
67
+ startedAt: new Date(),
68
+ workerId: process.env.K_REVISION || 'unknown',
69
+ buildId: buildId
70
+ });
71
+ return true;
72
+ });
58
73
 
59
- if (priorVersion === packageVersion) {
60
- logger.log('INFO', `[BuildReporter] ✅ Version ${packageVersion} already has a report. Skipping.`);
61
- return;
62
- }
74
+ if (!shouldRun) { logger.log('INFO', `[BuildReporter] 🔒 Report for v${packageVersion} is already being generated (Locked). Skipping.`); return; }
63
75
 
64
- logger.log('INFO', `[BuildReporter] 🚀 New Version Detected (${packageVersion}). Auto-running Pre-flight Report...`);
76
+ logger.log('INFO', `[BuildReporter] 🚀 Lock Acquired. Running Pre-flight Report for v${packageVersion}...`);
77
+
65
78
  await generateBuildReport(config, dependencies, manifest, 90, buildId);
79
+
80
+ // Optional: Update lock to completed (fire-and-forget update)
81
+ lockRef.update({ status: 'COMPLETED', completedAt: new Date() }).catch(() => {});
66
82
 
67
83
  } catch (e) {
68
84
  logger.log('ERROR', `[BuildReporter] Auto-run check failed: ${e.message}`);
@@ -117,11 +133,11 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
117
133
  }
118
134
  }
119
135
 
120
- const results = await Promise.all(fetchPromises);
121
- const dailyStatus = results[0];
122
- const availability = results[1];
136
+ const results = await Promise.all(fetchPromises);
137
+ const dailyStatus = results[0];
138
+ const availability = results[1];
123
139
  const prevDailyStatus = (prevDateStr && results[2]) ? results[2] : (prevDateStr ? {} : null);
124
- const rootDataStatus = availability ? availability.status : { hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false };
140
+ const rootDataStatus = availability ? availability.status : { hasPortfolio: false, hasHistory: false, hasSocial: false, hasInsights: false, hasPrices: false };
125
141
 
126
142
  const analysis = analyzeDateExecution(dateStr, manifest, rootDataStatus, dailyStatus, manifestMap, prevDailyStatus);
127
143
 
@@ -129,11 +145,11 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
129
145
  // STRICT 5-CATEGORY MAPPING
130
146
  // ---------------------------------------------------------
131
147
  const dateSummary = {
132
- run: [], // New / No Hash / "Runnable"
133
- rerun: [], // Hash Mismatch / Category Migration
134
- blocked: [], // Missing Data (Today) / Dependency Missing
148
+ run: [], // New / No Hash / "Runnable"
149
+ rerun: [], // Hash Mismatch / Category Migration
150
+ blocked: [], // Missing Data (Today) / Dependency Missing
135
151
  impossible: [], // Missing Data (Historical) / Impossible Dependency
136
- uptodate: [], // Hash Match (Previously "Skipped")
152
+ uptodate: [], // Hash Match (Previously "Skipped")
137
153
 
138
154
  // [NEW] Metadata for Verification
139
155
  meta: {
@@ -181,7 +197,7 @@ async function generateBuildReport(config, dependencies, manifest, daysBack = 90
181
197
  dateSummary.uptodate.length;
182
198
 
183
199
  dateSummary.meta.totalIncluded = includedCount;
184
- dateSummary.meta.match = (includedCount === expectedCount);
200
+ dateSummary.meta.match = (includedCount === expectedCount);
185
201
 
186
202
  if (!dateSummary.meta.match) {
187
203
  logger.log('WARN', `[BuildReporter] ⚠️ Mismatch on ${dateStr}: Expected ${expectedCount} but got ${includedCount}.`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.270",
3
+ "version": "1.0.272",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [