bulltrackers-module 1.0.215 → 1.0.217
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* FILENAME:
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* FILENAME: computation-system/helpers/orchestration_helpers.js
|
|
3
|
+
* FEATURE: Dynamic Auto-Sharding (Transparent 1MB Limit Handling)
|
|
4
|
+
* * DESCRIPTION:
|
|
5
|
+
* This module orchestrates the execution of computations. It handles:
|
|
6
|
+
* 1. Data Availability Checks
|
|
7
|
+
* 2. Dependency Injection (fetching results from previous passes)
|
|
8
|
+
* 3. Transparent Auto-Sharding:
|
|
9
|
+
* - Writes: Automatically detects if a result > 900KB. Splits it into a '_shards' subcollection.
|
|
10
|
+
* - Reads: Automatically detects sharded pointers and re-assembles the data.
|
|
6
11
|
*/
|
|
7
12
|
|
|
8
13
|
const { ComputationController } = require('../controllers/computation_controller');
|
|
@@ -13,60 +18,27 @@ const {
|
|
|
13
18
|
getHistoryPartRefs, streamPortfolioData, streamHistoryData,
|
|
14
19
|
getRelevantShardRefs, loadDataByRefs
|
|
15
20
|
} = require('../utils/data_loader');
|
|
16
|
-
|
|
17
|
-
// --- DYNAMIC LAYER LOADING ---
|
|
18
|
-
// Replaces the old static import from 'math_primitives.js'
|
|
19
21
|
const mathLayer = require('../layers/index.js');
|
|
20
|
-
|
|
21
22
|
const pLimit = require('p-limit');
|
|
22
23
|
|
|
23
|
-
// Mappings
|
|
24
|
-
// (e.g. allowing 'math.compute' to resolve to 'MathPrimitives')
|
|
24
|
+
// Mappings for backward compatibility
|
|
25
25
|
const LEGACY_MAPPING = {
|
|
26
|
-
DataExtractor:
|
|
27
|
-
HistoryExtractor: 'history',
|
|
28
|
-
MathPrimitives: 'compute',
|
|
29
|
-
Aggregators: 'aggregate',
|
|
30
|
-
Validators: 'validate',
|
|
31
|
-
SignalPrimitives: 'signals',
|
|
32
|
-
SCHEMAS: 'schemas',
|
|
33
|
-
DistributionAnalytics: 'distribution',
|
|
34
|
-
TimeSeries: 'TimeSeries',
|
|
35
|
-
priceExtractor: 'priceExtractor',
|
|
36
|
-
InsightsExtractor: 'insights',
|
|
37
|
-
UserClassifier: 'classifier',
|
|
38
|
-
CognitiveBiases: 'bias',
|
|
39
|
-
SkillAttribution: 'skill',
|
|
40
|
-
Psychometrics: 'psychometrics'
|
|
26
|
+
DataExtractor: 'extract', HistoryExtractor: 'history', MathPrimitives: 'compute', Aggregators: 'aggregate', Validators: 'validate', SignalPrimitives: 'signals', SCHEMAS: 'schemas', DistributionAnalytics: 'distribution', TimeSeries: 'TimeSeries', priceExtractor: 'priceExtractor', InsightsExtractor: 'insights', UserClassifier: 'classifier', CognitiveBiases: 'bias', SkillAttribution: 'skill', Psychometrics: 'psychometrics'
|
|
41
27
|
};
|
|
42
28
|
|
|
43
29
|
function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[calc.pass] = acc[calc.pass] || []).push(calc); return acc; }, {}); }
|
|
44
30
|
|
|
45
|
-
/**
|
|
46
|
-
* --- PASSIVE DATA VALIDATION ---
|
|
47
|
-
*/
|
|
48
31
|
function validateResultPatterns(logger, calcName, results, category) {
|
|
49
32
|
if (category === 'speculator' || category === 'speculators') return;
|
|
50
|
-
const tickers
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
const sampleTicker = tickers.find(t => results[t] && typeof results[t] === 'object');
|
|
54
|
-
if (!sampleTicker) return;
|
|
55
|
-
const keys = Object.keys(results[sampleTicker]);
|
|
56
|
-
keys.forEach(key => {
|
|
33
|
+
const tickers = Object.keys(results); const totalItems = tickers.length; if (totalItems < 5) return;
|
|
34
|
+
const sampleTicker = tickers.find(t => results[t] && typeof results[t] === 'object'); if (!sampleTicker) return;
|
|
35
|
+
Object.keys(results[sampleTicker]).forEach(key => {
|
|
57
36
|
if (key.startsWith('_')) return;
|
|
58
|
-
let nullCount
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if (val === null) nullCount++;
|
|
64
|
-
if (val === undefined) undefinedCount++;
|
|
65
|
-
if (typeof val === 'number' && isNaN(val)) nanCount++;
|
|
66
|
-
}
|
|
67
|
-
if (nanCount === totalItems) { logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is NaN for 100% of ${totalItems} items.`);
|
|
68
|
-
} else if (undefinedCount === totalItems) { logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is UNDEFINED for 100% of ${totalItems} items.`); }
|
|
69
|
-
else if (nullCount > (totalItems * 0.9)) { logger.log('WARN', `[DataQuality] Calc '${calcName}' field '${key}' is NULL for ${nullCount}/${totalItems} items.`); }
|
|
37
|
+
let nullCount = 0, nanCount = 0, undefinedCount = 0;
|
|
38
|
+
for (const t of tickers) { const val = results[t][key]; if (val === null) nullCount++; if (val === undefined) undefinedCount++; if (typeof val === 'number' && isNaN(val)) nanCount++; }
|
|
39
|
+
if (nanCount === totalItems) logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is NaN for 100% of items.`);
|
|
40
|
+
else if (undefinedCount === totalItems) logger.log('ERROR', `[DataQuality] Calc '${calcName}' field '${key}' is UNDEFINED for 100% of items.`);
|
|
41
|
+
else if (nullCount > (totalItems * 0.9)) logger.log('WARN', `[DataQuality] Calc '${calcName}' field '${key}' is NULL for ${nullCount}/${totalItems} items.`);
|
|
70
42
|
});
|
|
71
43
|
}
|
|
72
44
|
|
|
@@ -74,11 +46,11 @@ function checkRootDependencies(calcManifest, rootDataStatus) {
|
|
|
74
46
|
const missing = [];
|
|
75
47
|
if (!calcManifest.rootDataDependencies) return { canRun: true, missing };
|
|
76
48
|
for (const dep of calcManifest.rootDataDependencies) {
|
|
77
|
-
if (dep === 'portfolio'
|
|
78
|
-
else if (dep === 'insights' && !rootDataStatus.hasInsights)
|
|
79
|
-
else if (dep === 'social'
|
|
80
|
-
else if (dep === 'history'
|
|
81
|
-
else if (dep === 'price'
|
|
49
|
+
if (dep === 'portfolio' && !rootDataStatus.hasPortfolio) missing.push('portfolio');
|
|
50
|
+
else if (dep === 'insights' && !rootDataStatus.hasInsights) missing.push('insights');
|
|
51
|
+
else if (dep === 'social' && !rootDataStatus.hasSocial) missing.push('social');
|
|
52
|
+
else if (dep === 'history' && !rootDataStatus.hasHistory) missing.push('history');
|
|
53
|
+
else if (dep === 'price' && !rootDataStatus.hasPrices) missing.push('price');
|
|
82
54
|
}
|
|
83
55
|
return { canRun: missing.length === 0, missing };
|
|
84
56
|
}
|
|
@@ -88,115 +60,107 @@ async function checkRootDataAvailability(dateStr, config, dependencies, earliest
|
|
|
88
60
|
const dateToProcess = new Date(dateStr + 'T00:00:00Z');
|
|
89
61
|
let portfolioRefs = [], historyRefs = [];
|
|
90
62
|
let hasPortfolio = false, hasInsights = false, hasSocial = false, hasHistory = false, hasPrices = false, insightsData = null, socialData = null;
|
|
91
|
-
|
|
92
63
|
try {
|
|
93
64
|
const tasks = [];
|
|
94
|
-
if (dateToProcess >= earliestDates.portfolio) tasks.push(getPortfolioPartRefs
|
|
95
|
-
if (dateToProcess >= earliestDates.insights)
|
|
96
|
-
if (dateToProcess >= earliestDates.social)
|
|
97
|
-
if (dateToProcess >= earliestDates.history)
|
|
98
|
-
|
|
65
|
+
if (dateToProcess >= earliestDates.portfolio) tasks.push(getPortfolioPartRefs(config, dependencies, dateStr).then(r => { portfolioRefs = r; hasPortfolio = !!r.length; }));
|
|
66
|
+
if (dateToProcess >= earliestDates.insights) tasks.push(loadDailyInsights(config, dependencies, dateStr).then(r => { insightsData = r; hasInsights = !!r; }));
|
|
67
|
+
if (dateToProcess >= earliestDates.social) tasks.push(loadDailySocialPostInsights(config, dependencies, dateStr).then(r => { socialData = r; hasSocial = !!r; }));
|
|
68
|
+
if (dateToProcess >= earliestDates.history) tasks.push(getHistoryPartRefs(config, dependencies, dateStr).then(r => { historyRefs = r; hasHistory = !!r.length; }));
|
|
99
69
|
if (dateToProcess >= earliestDates.price) { tasks.push(checkPriceDataAvailability(config, dependencies).then(r => { hasPrices = r; })); }
|
|
100
70
|
await Promise.all(tasks);
|
|
101
71
|
if (!(hasPortfolio || hasInsights || hasSocial || hasHistory || hasPrices)) return null;
|
|
102
|
-
|
|
103
|
-
return {
|
|
104
|
-
portfolioRefs,
|
|
105
|
-
historyRefs,
|
|
106
|
-
todayInsights: insightsData,
|
|
107
|
-
todaySocialPostInsights: socialData,
|
|
108
|
-
status: { hasPortfolio, hasInsights, hasSocial, hasHistory, hasPrices },
|
|
109
|
-
yesterdayPortfolioRefs: null
|
|
110
|
-
};
|
|
111
|
-
|
|
72
|
+
return { portfolioRefs, historyRefs, todayInsights: insightsData, todaySocialPostInsights: socialData, status: { hasPortfolio, hasInsights, hasSocial, hasHistory, hasPrices }, yesterdayPortfolioRefs: null };
|
|
112
73
|
} catch (err) { logger.log('ERROR', `Error checking data: ${err.message}`); return null; }
|
|
113
74
|
}
|
|
114
75
|
|
|
115
76
|
async function firestoreHelper(action, { key, updates, config, db }) {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
case 'fetchStatus': {
|
|
123
|
-
if (!key) throw new Error('fetchStatus requires a key');
|
|
124
|
-
const docRef = db.collection(collections.status).doc(key);
|
|
125
|
-
const snap = await docRef.get();
|
|
126
|
-
return snap.exists ? snap.data() : {};
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
case 'updateStatus': {
|
|
130
|
-
if (!key) throw new Error('updateStatus requires a key');
|
|
131
|
-
if (!updates || Object.keys(updates).length === 0) return;
|
|
132
|
-
const docRef = db.collection(collections.status).doc(key);
|
|
133
|
-
await docRef.set(updates, { merge: true });
|
|
134
|
-
return true;
|
|
77
|
+
const collections = { price: config.priceCollection || 'asset_prices', status: config.computationStatusCollection || 'computation_status', };
|
|
78
|
+
switch (action) {
|
|
79
|
+
case 'checkAvailability': try { const snapshot = await db.collection(collections.price).limit(1).get(); return !snapshot.empty; } catch (e) { return false; }
|
|
80
|
+
case 'fetchStatus': { if (!key) throw new Error('fetchStatus requires a key'); const docRef = db.collection(collections.status).doc(key); const snap = await docRef.get(); return snap.exists ? snap.data() : {}; }
|
|
81
|
+
case 'updateStatus': { if (!key) throw new Error('updateStatus requires a key'); if (!updates || Object.keys(updates).length === 0) return; const docRef = db.collection(collections.status).doc(key); await docRef.set(updates, { merge: true }); return true; }
|
|
82
|
+
default: throw new Error(`Unknown action: ${action}`);
|
|
135
83
|
}
|
|
136
|
-
|
|
137
|
-
default: throw new Error(`Unknown action: ${action}`); }
|
|
138
84
|
}
|
|
139
85
|
|
|
140
|
-
async function checkPriceDataAvailability
|
|
141
|
-
async function fetchComputationStatus
|
|
142
|
-
async function fetchGlobalComputationStatus
|
|
143
|
-
async function updateComputationStatus
|
|
144
|
-
|
|
86
|
+
async function checkPriceDataAvailability(config, dependencies) { return firestoreHelper('checkAvailability', { config, db: dependencies.db }); }
|
|
87
|
+
async function fetchComputationStatus(dateStr, config, { db }) { return firestoreHelper('fetchStatus', { key: dateStr, config, db }); }
|
|
88
|
+
async function fetchGlobalComputationStatus(config, { db }) { return firestoreHelper('fetchStatus', { key: 'global_status', config, db }); }
|
|
89
|
+
async function updateComputationStatus(dateStr, updates, config, { db }) { return firestoreHelper('updateStatus', { key: dateStr, updates, config, db }); }
|
|
145
90
|
|
|
91
|
+
/**
|
|
92
|
+
* --- REFACTORED: fetchExistingResults ---
|
|
93
|
+
* Transparently handles both standard documents and auto-sharded documents.
|
|
94
|
+
* 1. Fetches the doc.
|
|
95
|
+
* 2. Checks for `_sharded: true` flag.
|
|
96
|
+
* 3. If sharded, fetches subcollection and merges data back into a single object.
|
|
97
|
+
*/
|
|
146
98
|
async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db }, includeSelf = false) {
|
|
147
|
-
const manifestMap
|
|
99
|
+
const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
|
|
148
100
|
const calcsToFetch = new Set();
|
|
149
|
-
for (const calc of calcsInPass) { if (calc.dependencies)
|
|
101
|
+
for (const calc of calcsInPass) { if (calc.dependencies) calc.dependencies.forEach(d => calcsToFetch.add(normalizeName(d))); if (includeSelf && calc.isHistorical) calcsToFetch.add(normalizeName(calc.name)); }
|
|
150
102
|
if (!calcsToFetch.size) return {};
|
|
151
103
|
const fetched = {};
|
|
152
104
|
const docRefs = [];
|
|
153
105
|
const names = [];
|
|
106
|
+
|
|
107
|
+
// 1. Prepare Reads
|
|
154
108
|
for (const name of calcsToFetch) {
|
|
155
109
|
const m = manifestMap.get(name);
|
|
156
|
-
if (m) { docRefs.push(db.collection(config.resultsCollection).doc(dateStr)
|
|
157
|
-
|
|
110
|
+
if (m) { docRefs.push(db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(m.category || 'unknown').collection(config.computationsSubcollection).doc(name)); names.push(name); }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (docRefs.length) {
|
|
114
|
+
const snaps = await db.getAll(...docRefs);
|
|
115
|
+
const hydrationPromises = [];
|
|
116
|
+
|
|
117
|
+
// 2. Process Initial Snapshots
|
|
118
|
+
snaps.forEach((doc, i) => { const name = names[i]; if (!doc.exists) return; const data = doc.data(); if (data._sharded === true) { hydrationPromises.push(hydrateAutoShardedResult(doc.ref, name)); } else if (data._completed) { fetched[name] = data; } }); // CHECK FOR AUTO-SHARDING FLAG
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
// 3. Hydrate Sharded Data in Parallel
|
|
122
|
+
if (hydrationPromises.length > 0) { const hydratedResults = await Promise.all(hydrationPromises); hydratedResults.forEach(res => { fetched[res.name] = res.data; }); }
|
|
123
|
+
}
|
|
158
124
|
return fetched;
|
|
159
125
|
}
|
|
160
126
|
|
|
127
|
+
/**
|
|
128
|
+
* Helper: Fetches all docs in the '_shards' subcollection and merges them.
|
|
129
|
+
*/
|
|
130
|
+
async function hydrateAutoShardedResult(docRef, resultName) {
|
|
131
|
+
// Determine subcollection name (defaulting to '_shards')
|
|
132
|
+
const shardsCol = docRef.collection('_shards');
|
|
133
|
+
const snapshot = await shardsCol.get();
|
|
134
|
+
const assembledData = { _completed: true }; // Rebuild the object
|
|
135
|
+
snapshot.forEach(doc => { const chunk = doc.data(); Object.assign(assembledData, chunk); });
|
|
136
|
+
// Remove internal flags if they leaked into the shards
|
|
137
|
+
delete assembledData._sharded;
|
|
138
|
+
delete assembledData._completed;
|
|
139
|
+
return { name: resultName, data: assembledData };
|
|
140
|
+
}
|
|
141
|
+
|
|
161
142
|
async function streamAndProcess(dateStr, state, passName, config, deps, rootData, portfolioRefs, historyRefs, fetchedDeps, previousFetchedDeps) {
|
|
162
143
|
const { logger } = deps;
|
|
163
|
-
const controller
|
|
164
|
-
const calcs
|
|
165
|
-
const streamingCalcs = calcs.filter(c => c.manifest.rootDataDependencies.includes('portfolio') || c.manifest.rootDataDependencies.includes('history')
|
|
166
|
-
|
|
144
|
+
const controller = new ComputationController(config, deps);
|
|
145
|
+
const calcs = Object.values(state).filter(c => c && c.manifest);
|
|
146
|
+
const streamingCalcs = calcs.filter(c => c.manifest.rootDataDependencies.includes('portfolio') || c.manifest.rootDataDependencies.includes('history'));
|
|
167
147
|
if (streamingCalcs.length === 0) return;
|
|
168
|
-
|
|
148
|
+
|
|
169
149
|
logger.log('INFO', `[${passName}] Streaming for ${streamingCalcs.length} computations...`);
|
|
170
|
-
|
|
171
150
|
await controller.loader.loadMappings();
|
|
172
|
-
const prevDate
|
|
151
|
+
const prevDate = new Date(dateStr + 'T00:00:00Z'); prevDate.setUTCDate(prevDate.getUTCDate() - 1);
|
|
173
152
|
const prevDateStr = prevDate.toISOString().slice(0, 10);
|
|
174
|
-
|
|
175
|
-
const tP_iter = streamPortfolioData(config, deps, dateStr, portfolioRefs);
|
|
153
|
+
const tP_iter = streamPortfolioData(config, deps, dateStr, portfolioRefs);
|
|
176
154
|
const needsYesterdayPortfolio = streamingCalcs.some(c => c.manifest.isHistorical);
|
|
177
|
-
const yP_iter
|
|
178
|
-
const needsTradingHistory
|
|
179
|
-
const tH_iter
|
|
180
|
-
|
|
181
|
-
let yP_chunk = {};
|
|
182
|
-
let tH_chunk = {};
|
|
155
|
+
const yP_iter = (needsYesterdayPortfolio && rootData.yesterdayPortfolioRefs) ? streamPortfolioData(config, deps, prevDateStr, rootData.yesterdayPortfolioRefs) : null;
|
|
156
|
+
const needsTradingHistory = streamingCalcs.some(c => c.manifest.rootDataDependencies.includes('history'));
|
|
157
|
+
const tH_iter = (needsTradingHistory && historyRefs) ? streamHistoryData(config, deps, dateStr, historyRefs) : null;
|
|
183
158
|
|
|
159
|
+
let yP_chunk = {}, tH_chunk = {};
|
|
184
160
|
for await (const tP_chunk of tP_iter) {
|
|
185
161
|
if (yP_iter) yP_chunk = (await yP_iter.next()).value || {};
|
|
186
162
|
if (tH_iter) tH_chunk = (await tH_iter.next()).value || {};
|
|
187
|
-
|
|
188
|
-
const promises = streamingCalcs.map(calc =>
|
|
189
|
-
controller.executor.executePerUser(
|
|
190
|
-
calc,
|
|
191
|
-
calc.manifest,
|
|
192
|
-
dateStr,
|
|
193
|
-
tP_chunk,
|
|
194
|
-
yP_chunk,
|
|
195
|
-
tH_chunk,
|
|
196
|
-
fetchedDeps,
|
|
197
|
-
previousFetchedDeps
|
|
198
|
-
)
|
|
199
|
-
);
|
|
163
|
+
const promises = streamingCalcs.map(calc => controller.executor.executePerUser(calc, calc.manifest, dateStr, tP_chunk, yP_chunk, tH_chunk, fetchedDeps, previousFetchedDeps));
|
|
200
164
|
await Promise.all(promises);
|
|
201
165
|
}
|
|
202
166
|
logger.log('INFO', `[${passName}] Streaming complete.`);
|
|
@@ -211,13 +175,8 @@ async function runStandardComputationPass(date, calcs, passName, config, deps, r
|
|
|
211
175
|
const prevStr = prev.toISOString().slice(0, 10);
|
|
212
176
|
fullRoot.yesterdayPortfolioRefs = await getPortfolioPartRefs(config, deps, prevStr);
|
|
213
177
|
}
|
|
214
|
-
|
|
215
178
|
const state = {};
|
|
216
|
-
for (const c of calcs) {
|
|
217
|
-
try { const inst = new c.class(); inst.manifest = c; state[normalizeName(c.name)] = inst; logger.log('INFO', `${c.name} calculation running for ${dStr}`); }
|
|
218
|
-
catch (e) { logger.log('WARN', `Failed to init ${c.name}`); }
|
|
219
|
-
}
|
|
220
|
-
|
|
179
|
+
for (const c of calcs) { try { const inst = new c.class(); inst.manifest = c; state[normalizeName(c.name)] = inst; logger.log('INFO', `${c.name} calculation running for ${dStr}`); } catch (e) { logger.log('WARN', `Failed to init ${c.name}`); } }
|
|
221
180
|
await streamAndProcess(dStr, state, passName, config, deps, fullRoot, rootData.portfolioRefs, rootData.historyRefs, fetchedDeps, previousFetchedDeps);
|
|
222
181
|
return await commitResults(state, dStr, passName, config, deps, skipStatusWrite);
|
|
223
182
|
}
|
|
@@ -226,12 +185,10 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
|
|
|
226
185
|
const controller = new ComputationController(config, deps);
|
|
227
186
|
const dStr = date.toISOString().slice(0, 10);
|
|
228
187
|
const state = {};
|
|
229
|
-
|
|
230
188
|
for (const mCalc of calcs) {
|
|
231
189
|
try {
|
|
232
190
|
deps.logger.log('INFO', `${mCalc.name} calculation running for ${dStr}`);
|
|
233
|
-
const inst = new mCalc.class();
|
|
234
|
-
inst.manifest = mCalc;
|
|
191
|
+
const inst = new mCalc.class(); inst.manifest = mCalc;
|
|
235
192
|
await controller.executor.executeOncePerDay(inst, mCalc, dStr, fetchedDeps, previousFetchedDeps);
|
|
236
193
|
state[normalizeName(mCalc.name)] = inst;
|
|
237
194
|
} catch (e) { deps.logger.log('ERROR', `Meta calc failed ${mCalc.name}: ${e.message}`); }
|
|
@@ -241,66 +198,38 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
|
|
|
241
198
|
|
|
242
199
|
/**
|
|
243
200
|
* --- REFACTORED: commitResults ---
|
|
244
|
-
*
|
|
245
|
-
* If
|
|
246
|
-
*
|
|
201
|
+
* Automatically detects result size.
|
|
202
|
+
* If > 900KB, it splits the result into chunks and writes to a subcollection.
|
|
203
|
+
* If < 900KB, it writes normally.
|
|
247
204
|
*/
|
|
248
205
|
async function commitResults(stateObj, dStr, passName, config, deps, skipStatusWrite = false) {
|
|
249
206
|
const successUpdates = {};
|
|
250
207
|
const schemas = [];
|
|
251
|
-
|
|
252
|
-
// Iterate PER CALCULATION to isolate failures
|
|
253
208
|
for (const name in stateObj) {
|
|
254
209
|
const calc = stateObj[name];
|
|
255
210
|
let hasData = false;
|
|
256
|
-
|
|
257
211
|
try {
|
|
258
212
|
const result = await calc.getResult();
|
|
259
213
|
if (!result) { deps.logger.log('INFO', `${name} for ${dStr}: Skipped (Empty Result)`); continue; }
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
const
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if (key.startsWith('sharded_')) {
|
|
268
|
-
const sData = result[key];
|
|
269
|
-
for (const colName in sData) {
|
|
270
|
-
const docsMap = sData[colName];
|
|
271
|
-
for (const docId in docsMap) { const ref = docId.includes('/') ? deps.db.doc(docId) : deps.db.collection(colName).doc(docId); shardedWrites.push({ ref, data: { ...docsMap[docId], _completed: true } }); } }
|
|
272
|
-
if (Object.keys(sData).length > 0) hasData = true;
|
|
273
|
-
} else { standardRes[key] = result[key]; }
|
|
214
|
+
const mainDocRef = deps.db.collection(config.resultsCollection).doc(dStr).collection(config.resultsSubcollection).doc(calc.manifest.category).collection(config.computationsSubcollection).doc(name);
|
|
215
|
+
// AUTO-SHARDING LOGIC
|
|
216
|
+
const updates = await prepareAutoShardedWrites(result, mainDocRef, deps.logger);
|
|
217
|
+
// Collect Schemas if present
|
|
218
|
+
if (calc.manifest.class.getSchema) {
|
|
219
|
+
const { class: _cls, ...safeMetadata } = calc.manifest;
|
|
220
|
+
schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata });
|
|
274
221
|
}
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
hasData = true;
|
|
222
|
+
if (updates.length > 0) {
|
|
223
|
+
await commitBatchInChunks(config, deps, updates, `${name} Results`);
|
|
224
|
+
successUpdates[name] = calc.manifest.hash || true;
|
|
225
|
+
const isSharded = updates.some(u => u.data._sharded === true);
|
|
226
|
+
deps.logger.log('INFO', `${name} for ${dStr}: \u2714 Success (Written ${isSharded ? 'Sharded' : 'Standard'})`);
|
|
227
|
+
} else {
|
|
228
|
+
deps.logger.log('INFO', `${name} for ${dStr}: - Empty Data`);
|
|
283
229
|
}
|
|
284
|
-
|
|
285
|
-
// 3. Queue Schema (Safe to accumulate)
|
|
286
|
-
if (calc.manifest.class.getSchema) { const { class: _cls, ...safeMetadata } = calc.manifest; schemas.push({ name, category: calc.manifest.category, schema: calc.manifest.class.getSchema(), metadata: safeMetadata }); }
|
|
287
|
-
|
|
288
|
-
// 4. ATTEMPT COMMIT FOR THIS CALCULATION ONLY
|
|
289
|
-
if (hasData) {
|
|
290
|
-
const allWritesForCalc = [...calcWrites, ...shardedWrites];
|
|
291
|
-
if (allWritesForCalc.length > 0) {
|
|
292
|
-
await commitBatchInChunks(config, deps, allWritesForCalc, `${name} Results`);
|
|
293
|
-
successUpdates[name] = calc.manifest.hash || true;
|
|
294
|
-
deps.logger.log('INFO', `${name} for ${dStr}: \u2714 Success (Written)`);
|
|
295
|
-
} else { deps.logger.log('INFO', `${name} for ${dStr}: - No Data to Write`); }
|
|
296
|
-
} else { deps.logger.log('INFO', `${name} for ${dStr}: - Empty`); }
|
|
297
230
|
} catch (e) { deps.logger.log('ERROR', `${name} for ${dStr}: \u2716 FAILED Commit: ${e.message}`); }
|
|
298
231
|
}
|
|
299
|
-
|
|
300
|
-
// Save Schemas (Best effort, isolated)
|
|
301
232
|
if (schemas.length) batchStoreSchemas(deps, config, schemas).catch(() => { });
|
|
302
|
-
|
|
303
|
-
// Update Status Document (Only for the ones that succeeded)
|
|
304
233
|
if (!skipStatusWrite && Object.keys(successUpdates).length > 0) {
|
|
305
234
|
await updateComputationStatus(dStr, successUpdates, config, deps);
|
|
306
235
|
deps.logger.log('INFO', `[${passName}] Updated status document for ${Object.keys(successUpdates).length} successful computations.`);
|
|
@@ -309,116 +238,115 @@ async function commitResults(stateObj, dStr, passName, config, deps, skipStatusW
|
|
|
309
238
|
}
|
|
310
239
|
|
|
311
240
|
/**
|
|
312
|
-
*
|
|
241
|
+
* Accurately calculates the size of a value according to Firestore storage rules.
|
|
242
|
+
* Reference: https://firebase.google.com/docs/firestore/storage-size
|
|
313
243
|
*/
|
|
244
|
+
function calculateFirestoreBytes(value) {
|
|
245
|
+
if (value === null) return 1;
|
|
246
|
+
if (value === undefined) return 0; // Firestore drops undefined fields
|
|
247
|
+
if (typeof value === 'boolean') return 1;
|
|
248
|
+
if (typeof value === 'number') return 8; // All numbers are 64-bit doubles or integers
|
|
249
|
+
if (typeof value === 'string') return Buffer.byteLength(value, 'utf8') + 1;
|
|
250
|
+
if (value instanceof Date) return 8; // Timestamps are 8 bytes
|
|
251
|
+
if (value.constructor && value.constructor.name === 'DocumentReference') { return Buffer.byteLength(value.path, 'utf8') + 16; }
|
|
252
|
+
if (Array.isArray(value)) { let sum = 0; for (const item of value) sum += calculateFirestoreBytes(item); return sum; }
|
|
253
|
+
// Handle Objects (Maps): Sum of (Key + 1 + Value)
|
|
254
|
+
if (typeof value === 'object') { let sum = 0; for (const k in value) { if (Object.prototype.hasOwnProperty.call(value, k)) { sum += (Buffer.byteLength(k, 'utf8') + 1) + calculateFirestoreBytes(value[k]); } } return sum; }
|
|
255
|
+
return 0; // Fallback
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
async function prepareAutoShardedWrites(result, docRef, logger) {
|
|
260
|
+
const SAFETY_THRESHOLD_BYTES = 1000 * 1024; // 1MB Limit (We target just under this)
|
|
261
|
+
const OVERHEAD_ALLOWANCE = 20 * 1024; // 20KB Safety margin for document path & metadata
|
|
262
|
+
const CHUNK_LIMIT = SAFETY_THRESHOLD_BYTES - OVERHEAD_ALLOWANCE;
|
|
263
|
+
const totalSize = calculateFirestoreBytes(result); // 1. Calculate Total Size Once (O(N))
|
|
264
|
+
const docPathSize = Buffer.byteLength(docRef.path, 'utf8') + 16; // Add the size of the document path itself (Firestore counts this against the 1MB limit)
|
|
265
|
+
if ((totalSize + docPathSize) < CHUNK_LIMIT) { const data = { ...result, _completed: true, _sharded: false }; return [{ ref: docRef, data, options: { merge: true } }]; } // CASE A: Fits in one document
|
|
266
|
+
logger.log('INFO', `[AutoShard] Result size ~${Math.round(totalSize/1024)}KB exceeds limit. Sharding...`);
|
|
267
|
+
const writes = [];
|
|
268
|
+
const shardCollection = docRef.collection('_shards');
|
|
269
|
+
let currentChunk = {};
|
|
270
|
+
let currentChunkSize = 0;
|
|
271
|
+
let shardIndex = 0;
|
|
272
|
+
for (const [key, value] of Object.entries(result)) { // 2. Efficient O(N) Loop
|
|
273
|
+
if (key.startsWith('_')) continue;
|
|
274
|
+
const keySize = Buffer.byteLength(key, 'utf8') + 1; // Calculate size of just this item
|
|
275
|
+
const valueSize = calculateFirestoreBytes(value);
|
|
276
|
+
const itemSize = keySize + valueSize;
|
|
277
|
+
if (currentChunkSize + itemSize > CHUNK_LIMIT) { // Check if adding this item would overflow the current chunk
|
|
278
|
+
// Flush current chunk
|
|
279
|
+
writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); // Overwrite
|
|
280
|
+
shardIndex++;
|
|
281
|
+
currentChunk = {};
|
|
282
|
+
currentChunkSize = 0;
|
|
283
|
+
}
|
|
284
|
+
// Add to current chunk
|
|
285
|
+
currentChunk[key] = value;
|
|
286
|
+
currentChunkSize += itemSize;
|
|
287
|
+
}
|
|
288
|
+
// Flush final chunk
|
|
289
|
+
if (Object.keys(currentChunk).length > 0) { writes.push({ ref: shardCollection.doc(`shard_${shardIndex}`), data: currentChunk, options: { merge: false } }); }
|
|
290
|
+
const pointerData = { _completed: true, _sharded: true, _shardCount: shardIndex + 1, _lastUpdated: new Date().toISOString() }; // Pointer Document
|
|
291
|
+
writes.push({ ref: docRef, data: pointerData, options: { merge: false } }); // Use merge: false to ensure we overwrite any previous non-sharded blob
|
|
292
|
+
return writes;
|
|
293
|
+
}
|
|
294
|
+
|
|
314
295
|
async function runBatchPriceComputation(config, deps, dateStrings, calcs, targetTickers = []) {
|
|
315
296
|
const { logger, db, calculationUtils } = deps;
|
|
316
297
|
const controller = new ComputationController(config, deps);
|
|
317
|
-
|
|
318
298
|
const mappings = await controller.loader.loadMappings();
|
|
319
|
-
|
|
320
299
|
let targetInstrumentIds = [];
|
|
321
300
|
if (targetTickers && targetTickers.length > 0) {
|
|
322
301
|
const tickerToInst = mappings.tickerToInstrument || {};
|
|
323
302
|
targetInstrumentIds = targetTickers.map(t => tickerToInst[t]).filter(id => id);
|
|
324
|
-
if (targetInstrumentIds.length === 0) { logger.log('WARN', '[BatchPrice] Target tickers provided but no IDs found. Aborting.'); return; }
|
|
325
|
-
|
|
303
|
+
if (targetInstrumentIds.length === 0) { logger.log('WARN', '[BatchPrice] Target tickers provided but no IDs found. Aborting.'); return; }
|
|
304
|
+
}
|
|
326
305
|
const allShardRefs = await getRelevantShardRefs(config, deps, targetInstrumentIds);
|
|
327
|
-
|
|
328
306
|
if (!allShardRefs.length) { logger.log('WARN', '[BatchPrice] No relevant price shards found. Exiting.'); return; }
|
|
329
|
-
|
|
330
|
-
const OUTER_CONCURRENCY_LIMIT = 2;
|
|
331
|
-
const SHARD_BATCH_SIZE = 20;
|
|
332
|
-
const WRITE_BATCH_LIMIT = 50;
|
|
333
|
-
|
|
307
|
+
const OUTER_CONCURRENCY_LIMIT = 2, SHARD_BATCH_SIZE = 20, WRITE_BATCH_LIMIT = 50;
|
|
334
308
|
logger.log('INFO', `[BatchPrice] Execution Plan: ${dateStrings.length} days, ${allShardRefs.length} shards. Concurrency: ${OUTER_CONCURRENCY_LIMIT}.`);
|
|
335
|
-
|
|
336
|
-
const shardChunks = [];
|
|
337
|
-
for (let i = 0; i < allShardRefs.length; i += SHARD_BATCH_SIZE) { shardChunks.push(allShardRefs.slice(i, i + SHARD_BATCH_SIZE)); }
|
|
338
|
-
|
|
309
|
+
const shardChunks = []; for (let i = 0; i < allShardRefs.length; i += SHARD_BATCH_SIZE) { shardChunks.push(allShardRefs.slice(i, i + SHARD_BATCH_SIZE)); }
|
|
339
310
|
const outerLimit = pLimit(OUTER_CONCURRENCY_LIMIT);
|
|
340
|
-
|
|
341
311
|
const chunkPromises = [];
|
|
342
312
|
for (let index = 0; index < shardChunks.length; index++) {
|
|
343
313
|
const shardChunkRefs = shardChunks[index];
|
|
344
314
|
chunkPromises.push(outerLimit(async () => {
|
|
345
315
|
try {
|
|
346
316
|
logger.log('INFO', `[BatchPrice] Processing chunk ${index + 1}/${shardChunks.length} (${shardChunkRefs.length} shards)...`);
|
|
347
|
-
|
|
348
317
|
const pricesData = await loadDataByRefs(config, deps, shardChunkRefs);
|
|
349
|
-
|
|
350
|
-
if (targetInstrumentIds.length > 0) {
|
|
351
|
-
const requestedSet = new Set(targetInstrumentIds);
|
|
352
|
-
for (const loadedInstrumentId in pricesData) { if (!requestedSet.has(loadedInstrumentId)) { delete pricesData[loadedInstrumentId]; } }
|
|
353
|
-
}
|
|
354
|
-
|
|
318
|
+
if (targetInstrumentIds.length > 0) { const requestedSet = new Set(targetInstrumentIds); for (const loadedInstrumentId in pricesData) { if (!requestedSet.has(loadedInstrumentId)) { delete pricesData[loadedInstrumentId]; } } }
|
|
355
319
|
const writes = [];
|
|
356
|
-
|
|
357
320
|
for (const dateStr of dateStrings) {
|
|
358
|
-
|
|
359
|
-
// --- DYNAMIC MATH CONTEXT CONSTRUCTION ---
|
|
360
321
|
const dynamicMathContext = {};
|
|
361
|
-
for (const [key, value] of Object.entries(mathLayer)) { dynamicMathContext[key] = value;
|
|
362
|
-
|
|
363
|
-
const context = {
|
|
364
|
-
mappings,
|
|
365
|
-
prices: { history: pricesData },
|
|
366
|
-
date: { today: dateStr },
|
|
367
|
-
math: dynamicMathContext // Injected here
|
|
368
|
-
};
|
|
369
|
-
|
|
322
|
+
for (const [key, value] of Object.entries(mathLayer)) { dynamicMathContext[key] = value; if (LEGACY_MAPPING[key]) { dynamicMathContext[LEGACY_MAPPING[key]] = value;} }
|
|
323
|
+
const context = { mappings, prices: { history: pricesData }, date: { today: dateStr }, math: dynamicMathContext };
|
|
370
324
|
for (const calcManifest of calcs) {
|
|
371
325
|
try {
|
|
372
|
-
const instance = new calcManifest.class();
|
|
373
|
-
await instance.process(context);
|
|
374
|
-
const result = await instance.getResult();
|
|
375
|
-
|
|
326
|
+
const instance = new calcManifest.class(); await instance.process(context); const result = await instance.getResult();
|
|
376
327
|
if (result && Object.keys(result).length > 0) {
|
|
377
|
-
let dataToWrite = result;
|
|
378
|
-
if (result.by_instrument) dataToWrite = result.by_instrument;
|
|
379
|
-
|
|
328
|
+
let dataToWrite = result; if (result.by_instrument) dataToWrite = result.by_instrument;
|
|
380
329
|
if (Object.keys(dataToWrite).length > 0) {
|
|
381
|
-
const docRef = db.collection(config.resultsCollection).doc(dateStr)
|
|
382
|
-
|
|
330
|
+
const docRef = db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(calcManifest.category).collection(config.computationsSubcollection).doc(normalizeName(calcManifest.name));
|
|
383
331
|
writes.push({ ref: docRef, data: { ...dataToWrite, _completed: true }, options: { merge: true } });
|
|
384
332
|
}
|
|
385
333
|
}
|
|
386
334
|
} catch (err) { logger.log('ERROR', `[BatchPrice] \u2716 Failed ${calcManifest.name} for ${dateStr}: ${err.message}`); }
|
|
387
335
|
}
|
|
388
336
|
}
|
|
389
|
-
|
|
390
337
|
if (writes.length > 0) {
|
|
391
|
-
const commitBatches = [];
|
|
392
|
-
for (let i = 0; i < writes.length; i += WRITE_BATCH_LIMIT) { commitBatches.push(writes.slice(i, i + WRITE_BATCH_LIMIT)); }
|
|
393
|
-
|
|
338
|
+
const commitBatches = []; for (let i = 0; i < writes.length; i += WRITE_BATCH_LIMIT) { commitBatches.push(writes.slice(i, i + WRITE_BATCH_LIMIT)); }
|
|
394
339
|
const commitLimit = pLimit(10);
|
|
395
|
-
|
|
396
340
|
await Promise.all(commitBatches.map((batchWrites, bIndex) => commitLimit(async () => {
|
|
397
|
-
const batch = db.batch();
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
try { await calculationUtils.withRetry(() => batch.commit(), `BatchPrice-C${index}-B${bIndex}`);
|
|
401
|
-
} catch (commitErr) { logger.log('ERROR', `[BatchPrice] Commit failed for Chunk ${index} Batch ${bIndex}.`, { error: commitErr.message }); }
|
|
341
|
+
const batch = db.batch(); batchWrites.forEach(w => batch.set(w.ref, w.data, w.options));
|
|
342
|
+
try { await calculationUtils.withRetry(() => batch.commit(), `BatchPrice-C${index}-B${bIndex}`); } catch (commitErr) { logger.log('ERROR', `[BatchPrice] Commit failed for Chunk ${index} Batch ${bIndex}.`, { error: commitErr.message }); }
|
|
402
343
|
})));
|
|
403
344
|
}
|
|
404
|
-
|
|
405
345
|
} catch (chunkErr) { logger.log('ERROR', `[BatchPrice] Fatal error processing Chunk ${index}.`, { error: chunkErr.message }); }
|
|
406
346
|
}));
|
|
407
347
|
}
|
|
408
|
-
|
|
409
348
|
await Promise.all(chunkPromises);
|
|
410
349
|
logger.log('INFO', '[BatchPrice] Optimization pass complete.');
|
|
411
350
|
}
|
|
412
351
|
|
|
413
|
-
module.exports = {
|
|
414
|
-
groupByPass,
|
|
415
|
-
checkRootDependencies,
|
|
416
|
-
checkRootDataAvailability,
|
|
417
|
-
fetchExistingResults,
|
|
418
|
-
fetchComputationStatus,
|
|
419
|
-
fetchGlobalComputationStatus,
|
|
420
|
-
updateComputationStatus,
|
|
421
|
-
runStandardComputationPass,
|
|
422
|
-
runMetaComputationPass,
|
|
423
|
-
runBatchPriceComputation
|
|
424
|
-
};
|
|
352
|
+
module.exports = { groupByPass, checkRootDependencies, checkRootDataAvailability, fetchExistingResults, fetchComputationStatus, fetchGlobalComputationStatus, updateComputationStatus, runStandardComputationPass, runMetaComputationPass, runBatchPriceComputation };
|