bulltrackers-module 1.0.214 → 1.0.216

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,13 @@
1
1
  /**
2
- * FILENAME: bulltrackers-module/functions/computation-system/helpers/orchestration_helpers.js
3
- * FIXED: 'commitResults' now records the CODE HASH in the status document
4
- * instead of a boolean, enabling auto-invalidation on code changes.
2
+ * FILENAME: computation-system/helpers/orchestration_helpers.js
3
+ * FEATURE: Dynamic Auto-Sharding (Transparent 1MB Limit Handling)
4
+ * * DESCRIPTION:
5
+ * This module orchestrates the execution of computations. It handles:
6
+ * 1. Data Availability Checks
7
+ * 2. Dependency Injection (fetching results from previous passes)
8
+ * 3. Transparent Auto-Sharding:
9
+ * - Writes: Automatically detects if a result > 900KB. Splits it into a '_shards' subcollection.
10
+ * - Reads: Automatically detects sharded pointers and re-assembles the data.
5
11
  */
6
12
 
7
13
  const { ComputationController } = require('../controllers/computation_controller');
@@ -12,55 +18,27 @@ const {
12
18
  getHistoryPartRefs, streamPortfolioData, streamHistoryData,
13
19
  getRelevantShardRefs, loadDataByRefs
14
20
  } = require('../utils/data_loader');
15
-
16
- const {
17
- DataExtractor, HistoryExtractor, MathPrimitives, Aggregators,
18
- Validators, SCHEMAS, SignalPrimitives, DistributionAnalytics,
19
- TimeSeries, priceExtractor
20
- } = require('../layers/math_primitives.js');
21
-
21
+ const mathLayer = require('../layers/index.js');
22
22
  const pLimit = require('p-limit');
23
23
 
24
+ // Mappings for backward compatibility
25
+ const LEGACY_MAPPING = {
26
+ DataExtractor: 'extract', HistoryExtractor: 'history', MathPrimitives: 'compute', Aggregators: 'aggregate', Validators: 'validate', SignalPrimitives: 'signals', SCHEMAS: 'schemas', DistributionAnalytics: 'distribution', TimeSeries: 'TimeSeries', priceExtractor: 'priceExtractor', InsightsExtractor: 'insights', UserClassifier: 'classifier', CognitiveBiases: 'bias', SkillAttribution: 'skill', Psychometrics: 'psychometrics'
27
+ };
28
+
24
29
  function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[calc.pass] = acc[calc.pass] || []).push(calc); return acc; }, {}); }
25
30
 
26
- /**
27
- * --- PASSIVE DATA VALIDATION ---
28
- */
29
31
  function validateResultPatterns(logger, calcName, results, category) {
30
32
  if (category === 'speculator' || category === 'speculators') return;
31
-
32
- const tickers = Object.keys(results);
33
- const totalItems = tickers.length;
34
-
35
- if (totalItems < 5) return;
36
-
37
- const sampleTicker = tickers.find(t => results[t] && typeof results[t] === 'object');
38
- if (!sampleTicker) return;
39
-
40
- const keys = Object.keys(results[sampleTicker]);
41
-
42
- keys.forEach(key => {
33
+ const tickers = Object.keys(results); const totalItems = tickers.length; if (totalItems < 5) return;
34
+ const sampleTicker = tickers.find(t => results[t] && typeof results[t] === 'object'); if (!sampleTicker) return;
35
+ Object.keys(results[sampleTicker]).forEach(key => {
43
36
  if (key.startsWith('_')) return;
44
-
45
- let nullCount = 0;
46
- let nanCount = 0;
47
- let undefinedCount = 0;
48
-
49
- for (const t of tickers) {
50
- const val = results[t][key];
51
- if (val === null) nullCount++;
52
- if (val === undefined) undefinedCount++;
53
- if (typeof val === 'number' && isNaN(val)) nanCount++;
54
- }
55
-
56
- if (nanCount === totalItems) {
57
- logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is NaN for 100% of ${totalItems} items.`);
58
- } else if (undefinedCount === totalItems) {
59
- logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is UNDEFINED for 100% of ${totalItems} items.`);
60
- }
61
- else if (nullCount > (totalItems * 0.9)) {
62
- logger.log('WARN', `[DataQuality] Calc '${calcName}' field '${key}' is NULL for ${nullCount}/${totalItems} items.`);
63
- }
37
+ let nullCount = 0, nanCount = 0, undefinedCount = 0;
38
+ for (const t of tickers) { const val = results[t][key]; if (val === null) nullCount++; if (val === undefined) undefinedCount++; if (typeof val === 'number' && isNaN(val)) nanCount++; }
39
+ if (nanCount === totalItems) logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is NaN for 100% of items.`);
40
+ else if (undefinedCount === totalItems) logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is UNDEFINED for 100% of items.`);
41
+ else if (nullCount > (totalItems * 0.9)) logger.log('WARN', `[DataQuality] Calc '${calcName}' field '${key}' is NULL for ${nullCount}/${totalItems} items.`);
64
42
  });
65
43
  }
66
44
 
@@ -68,11 +46,11 @@ function checkRootDependencies(calcManifest, rootDataStatus) {
68
46
  const missing = [];
69
47
  if (!calcManifest.rootDataDependencies) return { canRun: true, missing };
70
48
  for (const dep of calcManifest.rootDataDependencies) {
71
- if (dep === 'portfolio' && !rootDataStatus.hasPortfolio) missing.push('portfolio');
72
- else if (dep === 'insights' && !rootDataStatus.hasInsights) missing.push('insights');
73
- else if (dep === 'social' && !rootDataStatus.hasSocial) missing.push('social');
74
- else if (dep === 'history' && !rootDataStatus.hasHistory) missing.push('history');
75
- else if (dep === 'price' && !rootDataStatus.hasPrices) missing.push('price');
49
+ if (dep === 'portfolio' && !rootDataStatus.hasPortfolio) missing.push('portfolio');
50
+ else if (dep === 'insights' && !rootDataStatus.hasInsights) missing.push('insights');
51
+ else if (dep === 'social' && !rootDataStatus.hasSocial) missing.push('social');
52
+ else if (dep === 'history' && !rootDataStatus.hasHistory) missing.push('history');
53
+ else if (dep === 'price' && !rootDataStatus.hasPrices) missing.push('price');
76
54
  }
77
55
  return { canRun: missing.length === 0, missing };
78
56
  }
@@ -81,166 +59,112 @@ async function checkRootDataAvailability(dateStr, config, dependencies, earliest
81
59
  const { logger } = dependencies;
82
60
  const dateToProcess = new Date(dateStr + 'T00:00:00Z');
83
61
  let portfolioRefs = [], historyRefs = [];
84
- let hasPortfolio = false, hasInsights = false, hasSocial = false, hasHistory = false, hasPrices = false;
85
- let insightsData = null, socialData = null;
86
-
62
+ let hasPortfolio = false, hasInsights = false, hasSocial = false, hasHistory = false, hasPrices = false, insightsData = null, socialData = null;
87
63
  try {
88
64
  const tasks = [];
89
- if (dateToProcess >= earliestDates.portfolio) tasks.push(getPortfolioPartRefs (config, dependencies, dateStr).then(r => { portfolioRefs = r; hasPortfolio = !!r.length; }));
90
- if (dateToProcess >= earliestDates.insights) tasks.push(loadDailyInsights (config, dependencies, dateStr).then(r => { insightsData = r; hasInsights = !!r; }));
91
- if (dateToProcess >= earliestDates.social) tasks.push(loadDailySocialPostInsights (config, dependencies, dateStr).then(r => { socialData = r; hasSocial = !!r; }));
92
- if (dateToProcess >= earliestDates.history) tasks.push(getHistoryPartRefs (config, dependencies, dateStr).then(r => { historyRefs = r; hasHistory = !!r.length; }));
93
-
94
- if (dateToProcess >= earliestDates.price) {
95
- tasks.push(checkPriceDataAvailability(config, dependencies).then(r => { hasPrices = r; }));
96
- }
97
-
65
+ if (dateToProcess >= earliestDates.portfolio) tasks.push(getPortfolioPartRefs(config, dependencies, dateStr).then(r => { portfolioRefs = r; hasPortfolio = !!r.length; }));
66
+ if (dateToProcess >= earliestDates.insights) tasks.push(loadDailyInsights(config, dependencies, dateStr).then(r => { insightsData = r; hasInsights = !!r; }));
67
+ if (dateToProcess >= earliestDates.social) tasks.push(loadDailySocialPostInsights(config, dependencies, dateStr).then(r => { socialData = r; hasSocial = !!r; }));
68
+ if (dateToProcess >= earliestDates.history) tasks.push(getHistoryPartRefs(config, dependencies, dateStr).then(r => { historyRefs = r; hasHistory = !!r.length; }));
69
+ if (dateToProcess >= earliestDates.price) { tasks.push(checkPriceDataAvailability(config, dependencies).then(r => { hasPrices = r; })); }
98
70
  await Promise.all(tasks);
99
-
100
71
  if (!(hasPortfolio || hasInsights || hasSocial || hasHistory || hasPrices)) return null;
101
-
102
- return {
103
- portfolioRefs,
104
- historyRefs,
105
- todayInsights: insightsData,
106
- todaySocialPostInsights: socialData,
107
- status: { hasPortfolio, hasInsights, hasSocial, hasHistory, hasPrices },
108
- yesterdayPortfolioRefs: null // Will be populated if needed
109
- };
110
-
111
- } catch (err) {
112
- logger.log('ERROR', `Error checking data: ${err.message}`);
113
- return null;
114
- }
72
+ return { portfolioRefs, historyRefs, todayInsights: insightsData, todaySocialPostInsights: socialData, status: { hasPortfolio, hasInsights, hasSocial, hasHistory, hasPrices }, yesterdayPortfolioRefs: null };
73
+ } catch (err) { logger.log('ERROR', `Error checking data: ${err.message}`); return null; }
115
74
  }
116
75
 
117
- async function checkPriceDataAvailability(config, dependencies) {
118
- const { db } = dependencies;
119
- const collection = config.priceCollection || 'asset_prices';
120
- try {
121
- const snapshot = await db.collection(collection).limit(1).get();
122
- if (snapshot.empty) return false;
123
- return true;
124
- } catch (e) {
125
- return false;
76
+ async function firestoreHelper(action, { key, updates, config, db }) {
77
+ const collections = { price: config.priceCollection || 'asset_prices', status: config.computationStatusCollection || 'computation_status', };
78
+ switch (action) {
79
+ case 'checkAvailability': try { const snapshot = await db.collection(collections.price).limit(1).get(); return !snapshot.empty; } catch (e) { return false; }
80
+ case 'fetchStatus': { if (!key) throw new Error('fetchStatus requires a key'); const docRef = db.collection(collections.status).doc(key); const snap = await docRef.get(); return snap.exists ? snap.data() : {}; }
81
+ case 'updateStatus': { if (!key) throw new Error('updateStatus requires a key'); if (!updates || Object.keys(updates).length === 0) return; const docRef = db.collection(collections.status).doc(key); await docRef.set(updates, { merge: true }); return true; }
82
+ default: throw new Error(`Unknown action: ${action}`);
126
83
  }
127
84
  }
128
85
 
129
- async function fetchComputationStatus(dateStr, config, { db }) {
130
- const collection = config.computationStatusCollection || 'computation_status';
131
- const docRef = db.collection(collection).doc(dateStr);
132
- const snap = await docRef.get();
133
- return snap.exists ? snap.data() : {};
134
- }
135
-
136
- async function fetchGlobalComputationStatus(config, { db }) {
137
- const collection = config.computationStatusCollection || 'computation_status';
138
- const docRef = db.collection(collection).doc('global_status');
139
- const snap = await docRef.get();
140
- return snap.exists ? snap.data() : {};
141
- }
142
-
143
- async function updateComputationStatus(dateStr, updates, config, { db }) {
144
- if (!updates || Object.keys(updates).length === 0) return;
145
- const collection = config.computationStatusCollection || 'computation_status';
146
- const docRef = db.collection(collection).doc(dateStr);
147
- await docRef.set(updates, { merge: true });
148
- }
149
-
150
- async function updateGlobalComputationStatus(updatesByDate, config, { db }) {
151
- if (!updatesByDate || Object.keys(updatesByDate).length === 0) return;
152
- const collection = config.computationStatusCollection || 'computation_status';
153
- const docRef = db.collection(collection).doc('global_status');
154
- const flattenUpdates = {};
155
- for (const [date, statuses] of Object.entries(updatesByDate)) {
156
- for (const [calc, status] of Object.entries(statuses)) {
157
- flattenUpdates[`${date}.${calc}`] = status;
158
- }
159
- }
160
- try {
161
- await docRef.update(flattenUpdates);
162
- } catch (err) {
163
- if (err.code === 5) {
164
- const deepObj = {};
165
- for (const [date, statuses] of Object.entries(updatesByDate)) {
166
- deepObj[date] = statuses;
167
- }
168
- await docRef.set(deepObj, { merge: true });
169
- } else {
170
- throw err;
171
- }
172
- }
173
- }
86
+ async function checkPriceDataAvailability(config, dependencies) { return firestoreHelper('checkAvailability', { config, db: dependencies.db }); }
87
+ async function fetchComputationStatus(dateStr, config, { db }) { return firestoreHelper('fetchStatus', { key: dateStr, config, db }); }
88
+ async function fetchGlobalComputationStatus(config, { db }) { return firestoreHelper('fetchStatus', { key: 'global_status', config, db }); }
89
+ async function updateComputationStatus(dateStr, updates, config, { db }) { return firestoreHelper('updateStatus', { key: dateStr, updates, config, db }); }
174
90
 
91
+ /**
92
+ * --- REFACTORED: fetchExistingResults ---
93
+ * Transparently handles both standard documents and auto-sharded documents.
94
+ * 1. Fetches the doc.
95
+ * 2. Checks for `_sharded: true` flag.
96
+ * 3. If sharded, fetches subcollection and merges data back into a single object.
97
+ */
175
98
  async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db }, includeSelf = false) {
176
- const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
99
+ const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
177
100
  const calcsToFetch = new Set();
178
- for (const calc of calcsInPass) {
179
- if (calc.dependencies) { calc.dependencies.forEach(d => calcsToFetch.add(normalizeName(d))); }
180
- if (includeSelf && calc.isHistorical) { calcsToFetch.add(normalizeName(calc.name)); }
181
- }
101
+ for (const calc of calcsInPass) { if (calc.dependencies) calc.dependencies.forEach(d => calcsToFetch.add(normalizeName(d))); if (includeSelf && calc.isHistorical) calcsToFetch.add(normalizeName(calc.name)); }
182
102
  if (!calcsToFetch.size) return {};
183
103
  const fetched = {};
184
104
  const docRefs = [];
185
105
  const names = [];
106
+
107
+ // 1. Prepare Reads
186
108
  for (const name of calcsToFetch) {
187
109
  const m = manifestMap.get(name);
188
- if (m) {
189
- docRefs.push(db.collection(config.resultsCollection).doc(dateStr)
190
- .collection(config.resultsSubcollection).doc(m.category || 'unknown')
191
- .collection(config.computationsSubcollection).doc(name));
192
- names.push(name);
193
- }
110
+ if (m) { docRefs.push(db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(m.category || 'unknown').collection(config.computationsSubcollection).doc(name)); names.push(name); }
194
111
  }
112
+
195
113
  if (docRefs.length) {
196
114
  const snaps = await db.getAll(...docRefs);
197
- snaps.forEach((doc, i) => { if (doc.exists && doc.data()._completed) { fetched[names[i]] = doc.data(); } });
115
+ const hydrationPromises = [];
116
+
117
+ // 2. Process Initial Snapshots
118
+ snaps.forEach((doc, i) => { const name = names[i]; if (!doc.exists) return; const data = doc.data(); if (data._sharded === true) { hydrationPromises.push(hydrateAutoShardedResult(doc.ref, name)); } else if (data._completed) { fetched[name] = data; } }); // CHECK FOR AUTO-SHARDING FLAG
119
+
120
+
121
+ // 3. Hydrate Sharded Data in Parallel
122
+ if (hydrationPromises.length > 0) { const hydratedResults = await Promise.all(hydrationPromises); hydratedResults.forEach(res => { fetched[res.name] = res.data; }); }
198
123
  }
199
124
  return fetched;
200
125
  }
201
126
 
127
+ /**
128
+ * Helper: Fetches all docs in the '_shards' subcollection and merges them.
129
+ */
130
+ async function hydrateAutoShardedResult(docRef, resultName) {
131
+ // Determine subcollection name (defaulting to '_shards')
132
+ const shardsCol = docRef.collection('_shards');
133
+ const snapshot = await shardsCol.get();
134
+
135
+ const assembledData = { _completed: true }; // Rebuild the object
136
+
137
+ snapshot.forEach(doc => { const chunk = doc.data(); Object.assign(assembledData, chunk); });
138
+
139
+ // Remove internal flags if they leaked into the shards
140
+ delete assembledData._sharded;
141
+ delete assembledData._completed;
142
+
143
+ return { name: resultName, data: assembledData };
144
+ }
145
+
202
146
  async function streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps) {
203
147
  const { logger } = deps;
204
- const controller = new ComputationController(config, deps);
205
- const calcs = Object.values(state).filter(c => c && c.manifest);
206
- const streamingCalcs = calcs.filter(c =>
207
- c.manifest.rootDataDependencies.includes('portfolio') ||
208
- c.manifest.rootDataDependencies.includes('history')
209
- );
210
-
148
+ const controller = new ComputationController(config, deps);
149
+ const calcs = Object.values(state).filter(c => c && c.manifest);
150
+ const streamingCalcs = calcs.filter(c => c.manifest.rootDataDependencies.includes('portfolio') || c.manifest.rootDataDependencies.includes('history'));
211
151
  if (streamingCalcs.length === 0) return;
212
-
152
+
213
153
  logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
214
-
215
154
  await controller.loader.loadMappings();
216
- const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
155
+ const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
217
156
  const prevDateStr = prevDate.toISOString().slice(0, 10);
218
-
219
- const tP_iter = streamPortfolioData(config, deps, dateStr, portfolioRefs);
157
+ const tP_iter = streamPortfolioData(config, deps, dateStr, portfolioRefs);
220
158
  const needsYesterdayPortfolio = streamingCalcs.some(c => c.manifest.isHistorical);
221
- const yP_iter = (needsYesterdayPortfolio && rootData.yesterdayPortfolioRefs) ? streamPortfolioData(config, deps, prevDateStr, rootData.yesterdayPortfolioRefs) : null;
222
- const needsTradingHistory = streamingCalcs.some(c => c.manifest.rootDataDependencies.includes('history'));
223
- const tH_iter = (needsTradingHistory && historyRefs) ? streamHistoryData(config, deps, dateStr, historyRefs) : null;
224
-
225
- let yP_chunk = {};
226
- let tH_chunk = {};
159
+ const yP_iter = (needsYesterdayPortfolio && rootData.yesterdayPortfolioRefs) ? streamPortfolioData(config, deps, prevDateStr, rootData.yesterdayPortfolioRefs) : null;
160
+ const needsTradingHistory = streamingCalcs.some(c => c.manifest.rootDataDependencies.includes('history'));
161
+ const tH_iter = (needsTradingHistory && historyRefs) ? streamHistoryData(config, deps, dateStr, historyRefs) : null;
227
162
 
163
+ let yP_chunk = {}, tH_chunk = {};
228
164
  for await (const tP_chunk of tP_iter) {
229
165
  if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
230
166
  if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
231
-
232
- const promises = streamingCalcs.map(calc =>
233
- controller.executor.executePerUser(
234
- calc,
235
- calc.manifest,
236
- dateStr,
237
- tP_chunk,
238
- yP_chunk,
239
- tH_chunk,
240
- fetchedDeps,
241
- previousFetchedDeps
242
- )
243
- );
167
+ const promises = streamingCalcs.map(calc => controller.executor.executePerUser(calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk, fetchedDeps, previousFetchedDeps));
244
168
  await Promise.all(promises);
245
169
  }
246
170
  logger.log('INFO', `[${passName}] Streaming complete.`);
@@ -255,18 +179,8 @@ async function runStandardComputationPass(date, calcs, passName, config, deps, r
255
179
  const prevStr = prev.toISOString().slice(0, 10);
256
180
  fullRoot.yesterdayPortfolioRefs = await getPortfolioPartRefs(config, deps, prevStr);
257
181
  }
258
-
259
182
  const state = {};
260
- for (const c of calcs) {
261
- try {
262
- const inst = new c.class();
263
- inst.manifest = c;
264
- state[normalizeName(c.name)] = inst;
265
- logger.log('INFO', `${c.name} calculation running for ${dStr}`);
266
- }
267
- catch (e) { logger.log('WARN', `Failed to init ${c.name}`); }
268
- }
269
-
183
+ for (const c of calcs) { try { const inst = new c.class(); inst.manifest = c; state[normalizeName(c.name)] = inst; logger.log('INFO', `${c.name} calculation running for ${dStr}`); } catch (e) { logger.log('WARN', `Failed to init ${c.name}`); } }
270
184
  await streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps);
271
185
  return await commitResults(state, dStr, passName, config, deps, skipStatusWrite);
272
186
  }
@@ -275,12 +189,10 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
275
189
  const controller = new ComputationController(config, deps);
276
190
  const dStr = date.toISOString().slice(0, 10);
277
191
  const state = {};
278
-
279
192
  for (const mCalc of calcs) {
280
193
  try {
281
194
  deps.logger.log('INFO', `${mCalc.name} calculation running for ${dStr}`);
282
- const inst = new mCalc.class();
283
- inst.manifest = mCalc;
195
+ const inst = new mCalc.class(); inst.manifest = mCalc;
284
196
  await controller.executor.executeOncePerDay(inst, mCalc, dStr, fetchedDeps, previousFetchedDeps);
285
197
  state[normalizeName(mCalc.name)] = inst;
286
198
  } catch (e) { deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`); }
@@ -290,101 +202,46 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
290
202
 
291
203
  /**
292
204
  * --- REFACTORED: commitResults ---
293
- * Commits results individually per calculation.
294
- * If one calculation fails (e.g. size limit), others still succeed.
295
- * UPDATED: Writes the HASH to the status document.
205
+ * Automatically detects result size.
206
+ * If > 900KB, it splits the result into chunks and writes to a subcollection.
207
+ * If < 900KB, it writes normally.
296
208
  */
297
209
  async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false) {
298
210
  const successUpdates = {};
299
211
  const schemas = [];
300
212
 
301
- // Iterate PER CALCULATION to isolate failures
302
213
  for (const name in stateObj) {
303
214
  const calc = stateObj[name];
304
215
  let hasData = false;
305
216
 
306
217
  try {
307
218
  const result = await calc.getResult();
308
- if (!result) {
309
- deps.logger.log('INFO', `${name} for ${dStr}: Skipped (Empty Result)`);
310
- continue;
311
- }
312
-
313
- const standardRes = {};
314
- const shardedWrites = [];
315
- const calcWrites = [];
316
-
317
- // 1. Separate Standard and Sharded Data
318
- for (const key in result) {
319
- if (key.startsWith('sharded_')) {
320
- const sData = result[key];
321
- for (const colName in sData) {
322
- const docsMap = sData[colName];
323
- for (const docId in docsMap) {
324
- const ref = docId.includes('/') ? deps.db.doc(docId) : deps.db.collection(colName).doc(docId);
325
- shardedWrites.push({
326
- ref,
327
- data: { ...docsMap[docId], _completed: true }
328
- });
329
- }
330
- }
331
- if (Object.keys(sData).length > 0) hasData = true;
332
- } else {
333
- standardRes[key] = result[key];
334
- }
335
- }
336
-
337
- // 2. Prepare Standard Result Write
338
- if (Object.keys(standardRes).length) {
339
- validateResultPatterns(deps.logger, name, standardRes, calc.manifest.category);
340
- standardRes._completed = true;
341
-
342
- const docRef = deps.db.collection(config.resultsCollection).doc(dStr)
343
- .collection(config.resultsSubcollection).doc(calc.manifest.category)
344
- .collection(config.computationsSubcollection).doc(name);
219
+ if (!result) { deps.logger.log('INFO', `${name} for ${dStr}: Skipped (Empty Result)`); continue; }
345
220
 
346
- calcWrites.push({
347
- ref: docRef,
348
- data: standardRes
349
- });
350
- hasData = true;
351
- }
221
+ const mainDocRef = deps.db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
352
222
 
353
- // 3. Queue Schema (Safe to accumulate)
354
- if (calc.manifest.class.getSchema) {
355
- const { class: _cls, ...safeMetadata } = calc.manifest;
356
- schemas.push({
357
- name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata
358
- });
223
+ // AUTO-SHARDING LOGIC
224
+ const updates = await prepareAutoShardedWrites(result, mainDocRef, deps.logger);
225
+
226
+ // Collect Schemas if present
227
+ if (calc.manifest.class.getSchema) {
228
+ const { class: _cls, ...safeMetadata } = calc.manifest;
229
+ schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
359
230
  }
360
231
 
361
- // 4. ATTEMPT COMMIT FOR THIS CALCULATION ONLY
362
- if (hasData) {
363
- const allWritesForCalc = [...calcWrites, ...shardedWrites];
364
-
365
- if (allWritesForCalc.length > 0) {
366
- await commitBatchInChunks(config, deps, allWritesForCalc, `${name} Results`);
367
-
368
- // --- CRITICAL UPDATE: Store the Smart Hash ---
369
- successUpdates[name] = calc.manifest.hash || true;
370
-
371
- deps.logger.log('INFO', `${name} for ${dStr}: \u2714 Success (Written)`);
372
- } else {
373
- deps.logger.log('INFO', `${name} for ${dStr}: - No Data to Write`);
374
- }
375
- } else {
376
- deps.logger.log('INFO', `${name} for ${dStr}: - Empty`);
232
+ if (updates.length > 0) {
233
+ await commitBatchInChunks(config, deps, updates, `${name} Results`);
234
+ successUpdates[name] = calc.manifest.hash || true;
235
+ const isSharded = updates.some(u => u.data._sharded === true);
236
+ deps.logger.log('INFO', `${name} for ${dStr}: \u2714 Success (Written ${isSharded ? 'Sharded' : 'Standard'})`);
237
+ } else {
238
+ deps.logger.log('INFO', `${name} for ${dStr}: - Empty Data`);
377
239
  }
378
240
 
379
- } catch (e) {
380
- deps.logger.log('ERROR', `${name} for ${dStr}: \u2716 FAILED Commit: ${e.message}`);
381
- }
241
+ } catch (e) { deps.logger.log('ERROR', `${name} for ${dStr}: \u2716 FAILED Commit: ${e.message}`); }
382
242
  }
383
243
 
384
- // Save Schemas (Best effort, isolated)
385
244
  if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => { });
386
-
387
- // Update Status Document (Only for the ones that succeeded)
388
245
  if (!skipStatusWrite && Object.keys(successUpdates).length > 0) {
389
246
  await updateComputationStatus(dStr, successUpdates, config, deps);
390
247
  deps.logger.log('INFO', `[${passName}] Updated status document for ${Object.keys(successUpdates).length} successful computations.`);
@@ -393,151 +250,151 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
393
250
  }
394
251
 
395
252
  /**
396
- * --- UPDATED: runBatchPriceComputation ---
253
+ * Accurately calculates the size of a value according to Firestore storage rules.
254
+ * Reference: https://firebase.google.com/docs/firestore/storage-size
397
255
  */
256
+ function calculateFirestoreBytes(value) {
257
+ if (value === null) return 1;
258
+ if (value === undefined) return 0; // Firestore drops undefined fields
259
+ if (typeof value === 'boolean') return 1;
260
+ if (typeof value === 'number') return 8; // All numbers are 64-bit doubles or integers
261
+ if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1;
262
+ if (value instanceof Date) return 8; // Timestamps are 8 bytes
263
+
264
+ // Handle References (approximate based on path length)
265
+ if (value.constructor && value.constructor.name === 'DocumentReference') {
266
+ // Path string + 16 bytes for the reference type overhead
267
+ return Buffer.byteLength(value.path, 'utf8') + 16;
268
+ }
269
+
270
+ // Handle Arrays: Sum of all values
271
+ if (Array.isArray(value)) {
272
+ let sum = 0;
273
+ for (const item of value) sum += calculateFirestoreBytes(item);
274
+ return sum;
275
+ }
276
+
277
+ // Handle Objects (Maps): Sum of (Key + 1 + Value)
278
+ if (typeof value === 'object') {
279
+ let sum = 0;
280
+ for (const k in value) {
281
+ if (Object.prototype.hasOwnProperty.call(value, k)) {
282
+ // Key size (utf8 + 1) + Value size
283
+ sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]);
284
+ }
285
+ }
286
+ return sum;
287
+ }
288
+
289
+ return 0; // Fallback
290
+ }
291
+
292
+
293
+ async function prepareAutoShardedWrites(result, docRef, logger) {
294
+ const SAFETY_THRESHOLD_BYTES = 1000 * 1024; // 1MB Limit (We target just under this)
295
+ const OVERHEAD_ALLOWANCE = 20 * 1024; // 20KB Safety margin for document path & metadata
296
+ const CHUNK_LIMIT = SAFETY_THRESHOLD_BYTES - OVERHEAD_ALLOWANCE;
297
+ const totalSize = calculateFirestoreBytes(result); // 1. Calculate Total Size Once (O(N))
298
+ const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16; // Add the size of the document path itself (Firestore counts this against the 1MB limit)
299
+
300
+ if ((totalSize + docPathSize) < CHUNK_LIMIT) { const data = { ...result, _completed: true, _sharded: false }; return [{ ref: docRef, data, options: { merge: true } }]; } // CASE A: Fits in one document
301
+
302
+ logger.log('INFO', `[AutoShard] Result size ~${Math.round(totalSize/1024)}KB exceeds limit. Sharding...`);
303
+
304
+ const writes = [];
305
+ const shardCollection = docRef.collection('_shards');
306
+
307
+ let currentChunk = {};
308
+ let currentChunkSize = 0;
309
+ let shardIndex = 0;
310
+
311
+
312
+ for (const [key, value] of Object.entries(result)) { // 2. Efficient O(N) Loop
313
+ if (key.startsWith('_')) continue;
314
+ const keySize = Buffer.byteLength(key, 'utf8') + 1; // Calculate size of just this item
315
+ const valueSize = calculateFirestoreBytes(value);
316
+ const itemSize = keySize + valueSize;
317
+
318
+ if (currentChunkSize + itemSize > CHUNK_LIMIT) { // Check if adding this item would overflow the current chunk
319
+ // Flush current chunk
320
+ writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite
321
+ shardIndex++;
322
+ currentChunk = {};
323
+ currentChunkSize = 0;
324
+ }
325
+
326
+ // Add to current chunk
327
+ currentChunk[key] = value;
328
+ currentChunkSize += itemSize;
329
+ }
330
+
331
+ // Flush final chunk
332
+ if (Object.keys(currentChunk).length > 0) { writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); }
333
+
334
+ // Pointer Document
335
+ const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() };
336
+
337
+ // Use merge: false to ensure we overwrite any previous non-sharded blob
338
+ writes.push({ ref: docRef, data: pointerData, options: { merge: false } });
339
+
340
+ return writes;
341
+ }
342
+
398
343
  async function runBatchPriceComputation(config, deps, dateStrings, calcs, targetTickers = []) {
399
344
  const { logger, db, calculationUtils } = deps;
400
345
  const controller = new ComputationController(config, deps);
401
-
402
346
  const mappings = await controller.loader.loadMappings();
403
-
404
347
  let targetInstrumentIds = [];
405
348
  if (targetTickers && targetTickers.length > 0) {
406
349
  const tickerToInst = mappings.tickerToInstrument || {};
407
350
  targetInstrumentIds = targetTickers.map(t => tickerToInst[t]).filter(id => id);
408
- if (targetInstrumentIds.length === 0) {
409
- logger.log('WARN', '[BatchPrice] Target tickers provided but no IDs found. Aborting.');
410
- return;
411
- }
351
+ if (targetInstrumentIds.length === 0) { logger.log('WARN', '[BatchPrice] Target tickers provided but no IDs found. Aborting.'); return; }
412
352
  }
413
-
414
353
  const allShardRefs = await getRelevantShardRefs(config, deps, targetInstrumentIds);
415
-
416
- if (!allShardRefs.length) {
417
- logger.log('WARN', '[BatchPrice] No relevant price shards found. Exiting.');
418
- return;
419
- }
420
-
421
- const OUTER_CONCURRENCY_LIMIT = 2;
422
- const SHARD_BATCH_SIZE = 20;
423
- const WRITE_BATCH_LIMIT = 50;
424
-
354
+ if (!allShardRefs.length) { logger.log('WARN', '[BatchPrice] No relevant price shards found. Exiting.'); return; }
355
+ const OUTER_CONCURRENCY_LIMIT = 2, SHARD_BATCH_SIZE = 20, WRITE_BATCH_LIMIT = 50;
425
356
  logger.log('INFO', `[BatchPrice] Execution Plan: ${dateStrings.length} days, ${allShardRefs.length} shards. Concurrency: ${OUTER_CONCURRENCY_LIMIT}.`);
426
-
427
- const shardChunks = [];
428
- for (let i = 0; i < allShardRefs.length; i += SHARD_BATCH_SIZE) {
429
- shardChunks.push(allShardRefs.slice(i, i + SHARD_BATCH_SIZE));
430
- }
431
-
357
+ const shardChunks = []; for (let i = 0; i < allShardRefs.length; i += SHARD_BATCH_SIZE) { shardChunks.push(allShardRefs.slice(i, i + SHARD_BATCH_SIZE)); }
432
358
  const outerLimit = pLimit(OUTER_CONCURRENCY_LIMIT);
433
-
434
359
  const chunkPromises = [];
435
360
  for (let index = 0; index < shardChunks.length; index++) {
436
361
  const shardChunkRefs = shardChunks[index];
437
362
  chunkPromises.push(outerLimit(async () => {
438
363
  try {
439
364
  logger.log('INFO', `[BatchPrice] Processing chunk ${index + 1}/${shardChunks.length} (${shardChunkRefs.length} shards)...`);
440
-
441
365
  const pricesData = await loadDataByRefs(config, deps, shardChunkRefs);
442
-
443
- if (targetInstrumentIds.length > 0) {
444
- const requestedSet = new Set(targetInstrumentIds);
445
- for (const loadedInstrumentId in pricesData) {
446
- if (!requestedSet.has(loadedInstrumentId)) {
447
- delete pricesData[loadedInstrumentId];
448
- }
449
- }
450
- }
451
-
366
+ if (targetInstrumentIds.length > 0) { const requestedSet = new Set(targetInstrumentIds); for (const loadedInstrumentId in pricesData) { if (!requestedSet.has(loadedInstrumentId)) { delete pricesData[loadedInstrumentId]; } } }
452
367
  const writes = [];
453
-
454
368
  for (const dateStr of dateStrings) {
455
- const context = {
456
- mappings,
457
- prices: { history: pricesData },
458
- date: { today: dateStr },
459
- math: {
460
- extract: DataExtractor,
461
- history: HistoryExtractor,
462
- compute: MathPrimitives,
463
- aggregate: Aggregators,
464
- validate: Validators,
465
- signals: SignalPrimitives,
466
- schemas: SCHEMAS,
467
- distribution: DistributionAnalytics,
468
- TimeSeries: TimeSeries,
469
- priceExtractor: priceExtractor
470
- }
471
- };
472
-
369
+ const dynamicMathContext = {};
370
+ for (const [key, value] of Object.entries(mathLayer)) { dynamicMathContext[key] = value; if (LEGACY_MAPPING[key]) { dynamicMathContext[LEGACY_MAPPING[key]] = value;} }
371
+ const context = { mappings, prices: { history: pricesData }, date: { today: dateStr }, math: dynamicMathContext };
473
372
  for (const calcManifest of calcs) {
474
373
  try {
475
- const instance = new calcManifest.class();
476
- await instance.process(context);
477
- const result = await instance.getResult();
478
-
374
+ const instance = new calcManifest.class(); await instance.process(context); const result = await instance.getResult();
479
375
  if (result && Object.keys(result).length > 0) {
480
- let dataToWrite = result;
481
- if (result.by_instrument) dataToWrite = result.by_instrument;
482
-
376
+ let dataToWrite = result; if (result.by_instrument) dataToWrite = result.by_instrument;
483
377
  if (Object.keys(dataToWrite).length > 0) {
484
- const docRef = db.collection(config.resultsCollection).doc(dateStr)
485
- .collection(config.resultsSubcollection).doc(calcManifest.category)
486
- .collection(config.computationsSubcollection).doc(normalizeName(calcManifest.name));
487
-
488
- writes.push({
489
- ref: docRef,
490
- data: { ...dataToWrite, _completed: true },
491
- options: { merge: true }
492
- });
378
+ const docRef = db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(calcManifest.category).collection(config.computationsSubcollection).doc(normalizeName(calcManifest.name));
379
+ writes.push({ ref: docRef, data: { ...dataToWrite, _completed: true }, options: { merge: true } });
493
380
  }
494
381
  }
495
- } catch (err) {
496
- logger.log('ERROR', `[BatchPrice] \u2716 Failed ${calcManifest.name} for ${dateStr}: ${err.message}`);
497
- }
382
+ } catch (err) { logger.log('ERROR', `[BatchPrice] \u2716 Failed ${calcManifest.name} for ${dateStr}: ${err.message}`); }
498
383
  }
499
384
  }
500
-
501
385
  if (writes.length > 0) {
502
- const commitBatches = [];
503
- for (let i = 0; i < writes.length; i += WRITE_BATCH_LIMIT) {
504
- commitBatches.push(writes.slice(i, i + WRITE_BATCH_LIMIT));
505
- }
506
-
386
+ const commitBatches = []; for (let i = 0; i < writes.length; i += WRITE_BATCH_LIMIT) { commitBatches.push(writes.slice(i, i + WRITE_BATCH_LIMIT)); }
507
387
  const commitLimit = pLimit(10);
508
-
509
388
  await Promise.all(commitBatches.map((batchWrites, bIndex) => commitLimit(async () => {
510
- const batch = db.batch();
511
- batchWrites.forEach(w => batch.set(w.ref, w.data, w.options));
512
-
513
- try {
514
- await calculationUtils.withRetry(() => batch.commit(), `BatchPrice-C${index}-B${bIndex}`);
515
- } catch (commitErr) {
516
- logger.log('ERROR', `[BatchPrice] Commit failed for Chunk ${index} Batch ${bIndex}.`, { error: commitErr.message });
517
- }
389
+ const batch = db.batch(); batchWrites.forEach(w => batch.set(w.ref, w.data, w.options));
390
+ try { await calculationUtils.withRetry(() => batch.commit(), `BatchPrice-C${index}-B${bIndex}`); } catch (commitErr) { logger.log('ERROR', `[BatchPrice] Commit failed for Chunk ${index} Batch ${bIndex}.`, { error: commitErr.message }); }
518
391
  })));
519
392
  }
520
-
521
- } catch (chunkErr) {
522
- logger.log('ERROR', `[BatchPrice] Fatal error processing Chunk ${index}.`, { error: chunkErr.message });
523
- }
393
+ } catch (chunkErr) { logger.log('ERROR', `[BatchPrice] Fatal error processing Chunk ${index}.`, { error: chunkErr.message }); }
524
394
  }));
525
395
  }
526
-
527
396
  await Promise.all(chunkPromises);
528
397
  logger.log('INFO', '[BatchPrice] Optimization pass complete.');
529
398
  }
530
399
 
531
- module.exports = {
532
- groupByPass,
533
- checkRootDependencies,
534
- checkRootDataAvailability,
535
- fetchExistingResults,
536
- fetchComputationStatus,
537
- fetchGlobalComputationStatus,
538
- updateComputationStatus,
539
- updateGlobalComputationStatus,
540
- runStandardComputationPass,
541
- runMetaComputationPass,
542
- runBatchPriceComputation
543
- };
400
+ module.exports = { groupByPass, checkRootDependencies, checkRootDataAvailability, fetchExistingResults, fetchComputationStatus, fetchGlobalComputationStatus, updateComputationStatus, runStandardComputationPass, runMetaComputationPass, runBatchPriceComputation };