bulltrackers-module 1.0.131 → 1.0.133
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/helpers/computation_pass_runner.js +55 -10
- package/functions/computation-system/helpers/orchestration_helpers.js +154 -34
- package/functions/computation-system/utils/data_loader.js +35 -0
- package/functions/computation-system/utils/utils.js +102 -23
- package/package.json +1 -1
|
@@ -6,9 +6,18 @@
|
|
|
6
6
|
* It reads its pass number from the config and executes only those calculations.
|
|
7
7
|
* This file contains the high-level "manual" of steps. The "how-to" logic
|
|
8
8
|
* is extracted into 'computation_system_utils.js'.
|
|
9
|
+
* --- MODIFIED: To use getEarliestDataDates and pass the date map to the orchestrator helpers. ---
|
|
10
|
+
* --- MODIFIED: To run date processing in parallel batches. ---
|
|
11
|
+
* --- MODIFIED: To fetch ALL existing results to enable incremental (skip) logic. ---
|
|
9
12
|
*/
|
|
10
|
-
|
|
11
|
-
|
|
13
|
+
|
|
14
|
+
// --- MODIFIED: Renamed fetchDependenciesForPass to fetchExistingResults ---
|
|
15
|
+
const { groupByPass, checkRootDataAvailability, fetchExistingResults, filterCalculations, runStandardComputationPass, runMetaComputationPass } = require('./orchestration_helpers.js');
|
|
16
|
+
// --- MODIFIED: Import getEarliestDataDates ---
|
|
17
|
+
const { getExpectedDateStrings, getEarliestDataDates } = require('../utils/utils.js');
|
|
18
|
+
|
|
19
|
+
// --- NEW: Parallel processing batch size ---
|
|
20
|
+
const PARALLEL_BATCH_SIZE = 7; // Process a week at a time
|
|
12
21
|
|
|
13
22
|
async function runComputationPass(config, dependencies, computationManifest) {
|
|
14
23
|
const { logger } = dependencies;
|
|
@@ -17,7 +26,12 @@ async function runComputationPass(config, dependencies, computationManifest) {
|
|
|
17
26
|
|
|
18
27
|
const yesterday = new Date(); yesterday.setUTCDate(yesterday.getUTCDate()-1);
|
|
19
28
|
const endDateUTC = new Date(Date.UTC(yesterday.getUTCFullYear(), yesterday.getUTCMonth(), yesterday.getUTCDate()));
|
|
20
|
-
|
|
29
|
+
|
|
30
|
+
// --- MODIFIED: Call new date function ---
|
|
31
|
+
const earliestDates = await getEarliestDataDates(config, dependencies);
|
|
32
|
+
const firstDate = earliestDates.absoluteEarliest; // Use the absolute earliest for the loop
|
|
33
|
+
// --- END MODIFICATION ---
|
|
34
|
+
|
|
21
35
|
const startDateUTC = firstDate ? new Date(Date.UTC(firstDate.getUTCFullYear(), firstDate.getUTCMonth(), firstDate.getUTCDate())) : new Date(config.earliestComputationDate+'T00:00:00Z');
|
|
22
36
|
const allExpectedDates = getExpectedDateStrings(startDateUTC, endDateUTC);
|
|
23
37
|
|
|
@@ -27,21 +41,52 @@ async function runComputationPass(config, dependencies, computationManifest) {
|
|
|
27
41
|
const standardCalcs = calcsInThisPass.filter(c => c.type==='standard');
|
|
28
42
|
const metaCalcs = calcsInThisPass.filter(c => c.type==='meta');
|
|
29
43
|
|
|
30
|
-
|
|
44
|
+
// --- NEW: Helper function to process a single date ---
|
|
45
|
+
const processDate = async (dateStr) => {
|
|
31
46
|
const dateToProcess = new Date(dateStr+'T00:00:00Z');
|
|
32
47
|
try {
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
|
|
48
|
+
// --- MODIFIED: Pass earliestDates map to checkRootDataAvailability ---
|
|
49
|
+
const rootData = await checkRootDataAvailability(dateStr, config, dependencies, earliestDates);
|
|
50
|
+
if (!rootData) {
|
|
51
|
+
logger.log('WARN', `[PassRunner] Skipping ${dateStr} for Pass ${passToRun}: No root data.`);
|
|
52
|
+
return; // Use 'return' instead of 'continue' for a mapped function
|
|
53
|
+
}
|
|
54
|
+
// --- END MODIFICATION ---
|
|
55
|
+
|
|
56
|
+
// --- MODIFIED: Fetch ALL existing results for this pass, not just dependencies ---
|
|
57
|
+
const existingResults = await fetchExistingResults(dateStr, calcsInThisPass, computationManifest, config, dependencies);
|
|
58
|
+
|
|
59
|
+
// --- MODIFIED: Pass existingResults to filterCalculations ---
|
|
60
|
+
const { standardCalcsToRun, metaCalcsToRun } = filterCalculations(standardCalcs, metaCalcs, rootData.status, existingResults, passToRun, dateStr, logger);
|
|
61
|
+
|
|
62
|
+
if (standardCalcsToRun.length === 0 && metaCalcsToRun.length === 0) {
|
|
63
|
+
logger.log('INFO', `[PassRunner] All calcs for ${dateStr} Pass ${passToRun} are already complete. Skipping.`);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// --- MODIFIED: Pass existingResults (as fetchedDeps) to meta pass ---
|
|
36
68
|
if (standardCalcsToRun.length) await runStandardComputationPass(dateToProcess, standardCalcsToRun, `Pass ${passToRun} (Standard)`, config, dependencies, rootData);
|
|
37
|
-
if (metaCalcsToRun.length) await runMetaComputationPass(dateToProcess, metaCalcsToRun, `Pass ${passToRun} (Meta)`, config, dependencies,
|
|
38
|
-
|
|
69
|
+
if (metaCalcsToRun.length) await runMetaComputationPass(dateToProcess, metaCalcsToRun, `Pass ${passToRun} (Meta)`, config, dependencies, existingResults, rootData);
|
|
70
|
+
|
|
71
|
+
// Note: We no longer log "Completed" here, as the sub-functions do.
|
|
72
|
+
// logger.log('SUCCESS', `[PassRunner] Completed Pass ${passToRun} for ${dateStr}.`);
|
|
73
|
+
|
|
39
74
|
} catch (err) {
|
|
40
75
|
logger.log('ERROR', `[PassRunner] FAILED Pass ${passToRun} for ${dateStr}`, { errorMessage: err.message, stack: err.stack });
|
|
41
76
|
}
|
|
77
|
+
};
|
|
78
|
+
// --- END: Helper function ---
|
|
79
|
+
|
|
80
|
+
// --- NEW: Replace sequential loop with parallel batch loop ---
|
|
81
|
+
logger.log('INFO', `[PassRunner] Processing ${allExpectedDates.length} total dates in batches of ${PARALLEL_BATCH_SIZE}...`);
|
|
82
|
+
for (let i = 0; i < allExpectedDates.length; i += PARALLEL_BATCH_SIZE) {
|
|
83
|
+
const batch = allExpectedDates.slice(i, i + PARALLEL_BATCH_SIZE);
|
|
84
|
+
logger.log('INFO', `[PassRunner] Processing batch ${Math.floor(i / PARALLEL_BATCH_SIZE) + 1}/${Math.ceil(allExpectedDates.length / PARALLEL_BATCH_SIZE)} (Dates: ${batch[0]}...${batch[batch.length-1]})`);
|
|
85
|
+
await Promise.all(batch.map(dateStr => processDate(dateStr)));
|
|
42
86
|
}
|
|
87
|
+
// --- END: Parallel batch loop ---
|
|
43
88
|
|
|
44
89
|
logger.log('INFO', `[PassRunner] Pass ${passToRun} orchestration finished.`);
|
|
45
90
|
}
|
|
46
91
|
|
|
47
|
-
module.exports = { runComputationPass };
|
|
92
|
+
module.exports = { runComputationPass };
|
|
@@ -1,60 +1,133 @@
|
|
|
1
1
|
const { FieldPath } = require('@google-cloud/firestore');
|
|
2
|
-
|
|
2
|
+
// --- MODIFIED: Import streamPortfolioData ---
|
|
3
|
+
const { getPortfolioPartRefs, loadFullDayMap, loadDataByRefs, loadDailyInsights, loadDailySocialPostInsights, getHistoryPartRefs, streamPortfolioData } = require('../utils/data_loader.js');
|
|
3
4
|
const { normalizeName, commitBatchInChunks } = require('../utils/utils.js');
|
|
4
5
|
|
|
5
6
|
/** Stage 1: Group manifest by pass number */
|
|
6
7
|
function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[calc.pass] = acc[calc.pass] || []).push(calc); return acc; }, {}); }
|
|
7
8
|
|
|
8
|
-
/**
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
/** * --- MODIFIED: Returns detailed missing dependencies for logging ---
|
|
10
|
+
* Stage 2: Check root data dependencies for a calc
|
|
11
|
+
*/
|
|
12
|
+
function checkRootDependencies(calcManifest, rootDataStatus) {
|
|
13
|
+
const missing = [];
|
|
14
|
+
if (!calcManifest.rootDataDependencies || !calcManifest.rootDataDependencies.length) {
|
|
15
|
+
return { canRun: true, missing };}
|
|
16
|
+
for (const dep of calcManifest.rootDataDependencies) {
|
|
17
|
+
if (dep === 'portfolio' && !rootDataStatus.hasPortfolio) missing.push('portfolio');
|
|
18
|
+
else if (dep === 'insights' && !rootDataStatus.hasInsights) missing.push('insights');
|
|
19
|
+
else if (dep === 'social' && !rootDataStatus.hasSocial) missing.push('social');
|
|
20
|
+
else if (dep === 'history' && !rootDataStatus.hasHistory) missing.push('history');
|
|
21
|
+
}
|
|
22
|
+
return { canRun: missing.length === 0, missing };
|
|
12
23
|
}
|
|
13
24
|
|
|
14
|
-
/**
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
25
|
+
/** * --- MODIFIED: Uses earliestDates map to avoid unnecessary queries ---
|
|
26
|
+
* Stage 3: Check root data availability for a date
|
|
27
|
+
*/
|
|
28
|
+
async function checkRootDataAvailability(dateStr, config, dependencies, earliestDates) {
|
|
29
|
+
const { logger } = dependencies;
|
|
18
30
|
logger.log('INFO', `[PassRunner] Checking root data for ${dateStr}...`);
|
|
31
|
+
const dateToProcess = new Date(dateStr + 'T00:00:00Z');
|
|
32
|
+
let portfolioRefs = [], insightsData = null, socialData = null, historyRefs = [];
|
|
33
|
+
let hasPortfolio = false, hasInsights = false, hasSocial = false, hasHistory = false;
|
|
19
34
|
try {
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
if (
|
|
28
|
-
|
|
35
|
+
const tasks = [];
|
|
36
|
+
if (dateToProcess >= earliestDates.portfolio)
|
|
37
|
+
{tasks.push(getPortfolioPartRefs(config, dependencies, dateStr).then(res => {portfolioRefs = res;hasPortfolio = !!(res?.length);}));}
|
|
38
|
+
if (dateToProcess >= earliestDates.insights) {
|
|
39
|
+
tasks.push(loadDailyInsights(config, dependencies, dateStr).then(res => {insightsData = res;hasInsights = !!res;}));}
|
|
40
|
+
if (dateToProcess >= earliestDates.social) {
|
|
41
|
+
tasks.push(loadDailySocialPostInsights(config, dependencies, dateStr).then(res => {socialData = res;hasSocial = !!res;}));}
|
|
42
|
+
if (dateToProcess >= earliestDates.history) {
|
|
43
|
+
tasks.push(getHistoryPartRefs(config, dependencies, dateStr).then(res => {historyRefs = res;hasHistory = !!(res?.length);}));}
|
|
44
|
+
await Promise.all(tasks);
|
|
45
|
+
if (!(hasPortfolio || hasInsights || hasSocial || hasHistory)) {logger.log('WARN', `[PassRunner] No root data for ${dateStr}.`); return null;}
|
|
46
|
+
return {portfolioRefs, insightsData,socialData,historyRefs,status: { hasPortfolio, hasInsights, hasSocial, hasHistory }};
|
|
29
47
|
} catch (err) { logger.log('ERROR', `[PassRunner] Error checking data for ${dateStr}`, { errorMessage: err.message }); return null; }
|
|
30
48
|
}
|
|
31
49
|
|
|
32
|
-
|
|
33
|
-
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
/** --- MODIFIED: Stage 4: Fetch ALL existing computed results for the pass ---
|
|
53
|
+
* This function now checks for *all* calcs in the pass, not just meta-dependencies,
|
|
54
|
+
* to enable skipping completed work.
|
|
55
|
+
*/
|
|
56
|
+
async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db, logger }) {
|
|
34
57
|
const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
58
|
+
|
|
59
|
+
// --- MODIFIED: Check all calcs in this pass, not just meta-deps ---
|
|
60
|
+
const allCalcsInPass = new Set(calcsInPass.map(c => normalizeName(c.name)));
|
|
61
|
+
if (!allCalcsInPass.size) return {};
|
|
62
|
+
|
|
63
|
+
logger.log('INFO', `[PassRunner] Checking for ${allCalcsInPass.size} existing results for ${dateStr}...`);
|
|
38
64
|
const docRefs = [], depNames = [];
|
|
39
|
-
|
|
65
|
+
|
|
66
|
+
for (const calcName of allCalcsInPass) {
|
|
40
67
|
const calcManifest = manifestMap.get(calcName);
|
|
41
68
|
if (!calcManifest) { logger.log('ERROR', `[PassRunner] Missing manifest for ${calcName}`); continue; }
|
|
42
69
|
docRefs.push(db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(calcManifest.category||'unknown').collection(config.computationsSubcollection).doc(calcName));
|
|
43
70
|
depNames.push(calcName);
|
|
44
71
|
}
|
|
72
|
+
|
|
45
73
|
const fetched = {};
|
|
46
74
|
if (docRefs.length) (await db.getAll(...docRefs)).forEach((doc,i)=>fetched[depNames[i]]=doc.exists?doc.data():null);
|
|
47
75
|
return fetched;
|
|
48
76
|
}
|
|
49
77
|
|
|
50
|
-
/** Stage 5: Filter calculations
|
|
51
|
-
|
|
78
|
+
/** --- MODIFIED: Stage 5: Filter calculations to skip completed work ---
|
|
79
|
+
*/
|
|
80
|
+
function filterCalculations(standardCalcs, metaCalcs, rootDataStatus, existingResults, passToRun, dateStr, logger) {
|
|
52
81
|
const skipped = new Set();
|
|
53
|
-
|
|
54
|
-
|
|
82
|
+
|
|
83
|
+
// Filter Standard Calcs
|
|
84
|
+
const standardCalcsToRun = standardCalcs.filter(c => {
|
|
85
|
+
// --- NEW: Skip if result already exists ---
|
|
86
|
+
if (existingResults[c.name]) {
|
|
87
|
+
logger.log('TRACE', `[Pass ${passToRun}] Skipping ${c.name} for ${dateStr}. Result already exists.`);
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const { canRun, missing } = checkRootDependencies(c, rootDataStatus);
|
|
92
|
+
if (canRun) return true;
|
|
93
|
+
|
|
94
|
+
logger.log('INFO', `[Pass ${passToRun}] Skipping ${c.name} for ${dateStr}. Missing root data: [${missing.join(', ')}]`);
|
|
95
|
+
skipped.add(c.name);
|
|
96
|
+
return false;
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
// Filter Meta Calcs
|
|
100
|
+
const metaCalcsToRun = metaCalcs.filter(c => {
|
|
101
|
+
// --- NEW: Skip if result already exists ---
|
|
102
|
+
if (existingResults[c.name]) {
|
|
103
|
+
logger.log('TRACE', `[Pass ${passToRun} Meta] Skipping ${c.name} for ${dateStr}. Result already exists.`);
|
|
104
|
+
skipped.add(c.name);
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// 1. Check root data
|
|
109
|
+
const { canRun, missing: missingRoot } = checkRootDependencies(c, rootDataStatus);
|
|
110
|
+
if (!canRun) {
|
|
111
|
+
logger.log('INFO', `[Pass ${passToRun} Meta] Skipping ${c.name} for ${dateStr}. Missing root data: [${missingRoot.join(', ')}]`);
|
|
112
|
+
skipped.add(c.name);
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// 2. Check computed dependencies
|
|
117
|
+
// --- MODIFIED: Check existingResults (was fetchedDeps) ---
|
|
118
|
+
const missingDeps = (c.dependencies || []).map(normalizeName).filter(d => !existingResults[d]);
|
|
119
|
+
if (missingDeps.length > 0) {
|
|
120
|
+
logger.log('WARN', `[Pass ${passToRun} Meta] Skipping ${c.name} for ${dateStr}. Missing computed deps: [${missingDeps.join(', ')}]`);
|
|
121
|
+
skipped.add(c.name);
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
return true; // All checks passed
|
|
125
|
+
});
|
|
126
|
+
|
|
55
127
|
return { standardCalcsToRun, metaCalcsToRun };
|
|
56
128
|
}
|
|
57
129
|
|
|
130
|
+
|
|
58
131
|
/** Stage 6: Initialize calculator instances */
|
|
59
132
|
function initializeCalculators(calcs, logger) { const state = {}; for (const c of calcs) { const name=normalizeName(c.name), Cl=c.class; if(typeof Cl==='function') try { const inst=new Cl(); inst.manifest=c; state[name]=inst; } catch(e){logger.warn(`Init failed ${name}`,{errorMessage:e.message}); state[name]=null;} else {logger.warn(`Class missing ${name}`); state[name]=null;} } return state; }
|
|
60
133
|
|
|
@@ -66,21 +139,65 @@ async function loadHistoricalData(date, calcs, config, deps, rootData) { const u
|
|
|
66
139
|
await Promise.all(tasks); return updated;
|
|
67
140
|
}
|
|
68
141
|
|
|
69
|
-
/** Stage 8: Stream and process data for standard calculations
|
|
70
|
-
|
|
142
|
+
/** * --- MODIFIED: Stage 8: Stream and process data for standard calculations ---
|
|
143
|
+
* This function now uses an async generator to stream portfolio data
|
|
144
|
+
* instead of loading it all into memory.
|
|
145
|
+
*/
|
|
146
|
+
async function streamAndProcess(dateStr, state, passName, config, deps, rootData) {
|
|
147
|
+
const { logger, calculationUtils } = deps;
|
|
71
148
|
const { todayInsights, yesterdayInsights, todaySocialPostInsights, yesterdaySocialPostInsights, todayHistoryData, yesterdayHistoryData, yesterdayPortfolios } = rootData;
|
|
72
|
-
|
|
149
|
+
|
|
150
|
+
let firstUser=true;
|
|
151
|
+
// Load mappings once for all chunks
|
|
73
152
|
const context={instrumentMappings:(await calculationUtils.loadInstrumentMappings()).instrumentToTicker, sectorMapping:(await calculationUtils.loadInstrumentMappings()).instrumentToSector, todayDateStr:dateStr, dependencies:deps, config};
|
|
74
|
-
|
|
153
|
+
|
|
154
|
+
// Use the new async generator stream
|
|
155
|
+
for await (const chunk of streamPortfolioData(config, deps, dateStr)) {
|
|
156
|
+
// This inner loop processes one chunk of users at a time
|
|
157
|
+
for(const uid in chunk){
|
|
158
|
+
const p=chunk[uid]; if(!p) continue;
|
|
159
|
+
const userType=p.PublicPositions?'speculator':'normal';
|
|
160
|
+
context.userType=userType;
|
|
161
|
+
for(const name in state){
|
|
162
|
+
const calc=state[name]; if(!calc||typeof calc.process!=='function') continue;
|
|
163
|
+
const cat=calc.manifest.category, isSocialOrInsights=cat==='socialPosts'||cat==='insights', isHistorical=calc.manifest.isHistorical, isSpec=cat==='speculators';
|
|
164
|
+
// --- MODIFIED: Argument list now includes context ---
|
|
165
|
+
let args=[p,null,uid,context,todayInsights,yesterdayInsights,todaySocialPostInsights,yesterdaySocialPostInsights,todayHistoryData,yesterdayHistoryData];
|
|
166
|
+
|
|
167
|
+
if(isSocialOrInsights&&!firstUser) continue;
|
|
168
|
+
|
|
169
|
+
if(isHistorical){
|
|
170
|
+
const pY=yesterdayPortfolios[uid]; if(!pY) continue;
|
|
171
|
+
args=[p,pY,uid,context,todayInsights,yesterdayInsights,todaySocialPostInsights,yesterdaySocialPostInsights,todayHistoryData,yesterdayHistoryData];
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if((userType==='normal'&&isSpec)||(userType==='speculator'&&!isSpec&&name!=='users-processed')) continue;
|
|
175
|
+
|
|
176
|
+
try{
|
|
177
|
+
// Pass context to all process functions.
|
|
178
|
+
await Promise.resolve(calc.process(...args));
|
|
179
|
+
} catch(e){logger.log('WARN',`Process error ${name} for ${uid}`,{err:e.message});}
|
|
180
|
+
}
|
|
181
|
+
firstUser=false;
|
|
182
|
+
}
|
|
183
|
+
// Memory from 'chunk' is released here before the next iteration
|
|
184
|
+
}
|
|
75
185
|
}
|
|
186
|
+
// --- END MODIFICATION ---
|
|
76
187
|
|
|
77
188
|
/** Stage 9: Run standard computations */
|
|
78
189
|
async function runStandardComputationPass(date, calcs, passName, config, deps, rootData) {
|
|
79
190
|
const dStr = date.toISOString().slice(0, 10), logger = deps.logger;
|
|
191
|
+
// --- MODIFIED: Log only if there are calcs to run ---
|
|
192
|
+
if (calcs.length === 0) return;
|
|
80
193
|
logger.log('INFO', `[${passName}] Running ${dStr} with ${calcs.length} calcs.`);
|
|
194
|
+
|
|
195
|
+
// --- MODIFIED: We no longer need portfolioRefs in fullRoot, as streamAndProcess handles it.
|
|
81
196
|
const fullRoot = await loadHistoricalData(date, calcs, config, deps, rootData);
|
|
82
197
|
const state = initializeCalculators(calcs, logger);
|
|
83
|
-
|
|
198
|
+
|
|
199
|
+
// --- MODIFIED: Call to streamAndProcess no longer passes portfolioRefs ---
|
|
200
|
+
await streamAndProcess(dStr, state, passName, config, deps, fullRoot);
|
|
84
201
|
|
|
85
202
|
// --- START: FULL COMMIT LOGIC ---
|
|
86
203
|
let success = 0;
|
|
@@ -155,6 +272,8 @@ async function runStandardComputationPass(date, calcs, passName, config, deps, r
|
|
|
155
272
|
/** Stage 10: Run meta computations */
|
|
156
273
|
async function runMetaComputationPass(date, calcs, passName, config, deps, fetchedDeps, rootData) {
|
|
157
274
|
const dStr = date.toISOString().slice(0, 10), logger = deps.logger;
|
|
275
|
+
// --- MODIFIED: Log only if there are calcs to run ---
|
|
276
|
+
if (calcs.length === 0) return;
|
|
158
277
|
logger.log('INFO', `[${passName}] Running ${dStr} with ${calcs.length} calcs.`);
|
|
159
278
|
|
|
160
279
|
// --- START: FULL COMMIT LOGIC ---
|
|
@@ -231,4 +350,5 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
|
|
|
231
350
|
}
|
|
232
351
|
|
|
233
352
|
|
|
234
|
-
|
|
353
|
+
// --- MODIFIED: Export new function name ---
|
|
354
|
+
module.exports = { groupByPass, checkRootDataAvailability, fetchExistingResults, filterCalculations, runStandardComputationPass, runMetaComputationPass };
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Data loader sub-pipes for the Computation System.
|
|
3
3
|
* REFACTORED: Now stateless and receive dependencies.
|
|
4
|
+
* --- NEW: Added streamPortfolioData async generator ---
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
// <<< FIX: REMOVED all top-level 'require' and 'dependencies' lines >>>
|
|
@@ -165,6 +166,39 @@ async function getHistoryPartRefs(config, deps, dateString) {
|
|
|
165
166
|
return allPartRefs;
|
|
166
167
|
}
|
|
167
168
|
|
|
169
|
+
// --- NEW: Stage 7: Stream portfolio data in chunks ---
|
|
170
|
+
/**
|
|
171
|
+
* Streams portfolio data in chunks for a given date.
|
|
172
|
+
* This is an async generator.
|
|
173
|
+
* @param {object} config - The computation system configuration object.
|
|
174
|
+
* @param {object} deps - Contains db, logger, calculationUtils.
|
|
175
|
+
* @param {string} dateString - The date in YYYY-MM-DD format.
|
|
176
|
+
*/
|
|
177
|
+
async function* streamPortfolioData(config, deps, dateString) {
|
|
178
|
+
const { logger } = deps;
|
|
179
|
+
const refs = await getPortfolioPartRefs(config, deps, dateString);
|
|
180
|
+
if (refs.length === 0) {
|
|
181
|
+
logger.log('WARN', `[streamPortfolioData] No portfolio refs found for ${dateString}. Stream is empty.`);
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Use the same batch size as loadDataByRefs for consistency
|
|
186
|
+
const batchSize = config.partRefBatchSize || 50;
|
|
187
|
+
|
|
188
|
+
logger.log('INFO', `[streamPortfolioData] Streaming ${refs.length} portfolio parts in chunks of ${batchSize}...`);
|
|
189
|
+
|
|
190
|
+
for (let i = 0; i < refs.length; i += batchSize) {
|
|
191
|
+
const batchRefs = refs.slice(i, i + batchSize);
|
|
192
|
+
// Load one chunk of data
|
|
193
|
+
const data = await loadDataByRefs(config, deps, batchRefs);
|
|
194
|
+
// Yield it to the consumer, then release it from memory
|
|
195
|
+
yield data;
|
|
196
|
+
}
|
|
197
|
+
logger.log('INFO', `[streamPortfolioData] Finished streaming for ${dateString}.`);
|
|
198
|
+
}
|
|
199
|
+
// --- END: Stage 7 ---
|
|
200
|
+
|
|
201
|
+
|
|
168
202
|
module.exports = {
|
|
169
203
|
getPortfolioPartRefs,
|
|
170
204
|
loadDataByRefs,
|
|
@@ -172,4 +206,5 @@ module.exports = {
|
|
|
172
206
|
loadDailyInsights,
|
|
173
207
|
loadDailySocialPostInsights,
|
|
174
208
|
getHistoryPartRefs,
|
|
209
|
+
streamPortfolioData, // <-- EXPORT NEW FUNCTION
|
|
175
210
|
};
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
* @fileoverview Computation system sub-pipes and utils.
|
|
3
3
|
* REFACTORED: Now stateless and receive dependencies where needed.
|
|
4
4
|
* DYNAMIC: Categorization logic is removed, replaced by manifest.
|
|
5
|
+
* --- MODIFIED: getFirstDateFromSourceData is now getEarliestDataDates
|
|
6
|
+
* and queries all data sources to build an availability map. ---
|
|
5
7
|
*/
|
|
6
|
-
/** --- Computation System Sub-Pipes & Utils (Stateless) --- */
|
|
8
|
+
/** --- Computation System Sub-Pipes & Utils (Stateless, Dependency-Injection) --- */
|
|
7
9
|
|
|
8
10
|
const { FieldValue, FieldPath } = require('@google-cloud/firestore');
|
|
9
11
|
|
|
@@ -46,13 +48,46 @@ function getExpectedDateStrings(startDate, endDate) {
|
|
|
46
48
|
return dateStrings;
|
|
47
49
|
}
|
|
48
50
|
|
|
49
|
-
/**
|
|
51
|
+
/**
|
|
52
|
+
* --- NEW HELPER ---
|
|
53
|
+
* Stage 4: Get the earliest date in a *flat* collection where doc IDs are dates.
|
|
54
|
+
*/
|
|
55
|
+
async function getFirstDateFromSimpleCollection(config, deps, collectionName) {
|
|
56
|
+
const { db, logger, calculationUtils } = deps;
|
|
57
|
+
const { withRetry } = calculationUtils;
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
if (!collectionName) {
|
|
61
|
+
logger.log('WARN', `[Core Utils] Collection name not provided for simple date query.`);
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
const query = db.collection(collectionName)
|
|
65
|
+
.where(FieldPath.documentId(), '>=', '2000-01-01')
|
|
66
|
+
.orderBy(FieldPath.documentId(), 'asc')
|
|
67
|
+
.limit(1);
|
|
68
|
+
|
|
69
|
+
const snapshot = await withRetry(() => query.get(), `GetEarliestDoc(${collectionName})`);
|
|
70
|
+
|
|
71
|
+
if (!snapshot.empty && /^\d{4}-\d{2}-\d{2}$/.test(snapshot.docs[0].id)) {
|
|
72
|
+
return new Date(snapshot.docs[0].id + 'T00:00:00Z');
|
|
73
|
+
}
|
|
74
|
+
} catch (e) {
|
|
75
|
+
logger.log('ERROR', `GetFirstDate failed for ${collectionName}`, { errorMessage: e.message });
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** Stage 4: Get the earliest date in a sharded collection */
|
|
50
81
|
async function getFirstDateFromCollection(config, deps, collectionName) {
|
|
51
82
|
const { db, logger, calculationUtils } = deps;
|
|
52
83
|
const { withRetry } = calculationUtils;
|
|
53
84
|
|
|
54
85
|
let earliestDate = null;
|
|
55
86
|
try {
|
|
87
|
+
if (!collectionName) {
|
|
88
|
+
logger.log('WARN', `[Core Utils] Collection name not provided for sharded date query.`);
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
56
91
|
const blockDocRefs = await withRetry(() => db.collection(collectionName).listDocuments(), `GetBlocks(${collectionName})`);
|
|
57
92
|
if (!blockDocRefs.length) { logger.log('WARN', `No block documents in collection: ${collectionName}`); return null; }
|
|
58
93
|
|
|
@@ -75,26 +110,69 @@ async function getFirstDateFromCollection(config, deps, collectionName) {
|
|
|
75
110
|
return earliestDate;
|
|
76
111
|
}
|
|
77
112
|
|
|
78
|
-
/**
|
|
79
|
-
|
|
113
|
+
/** * --- MODIFIED FUNCTION ---
|
|
114
|
+
* Stage 5: Determine the earliest date from *all* source data.
|
|
115
|
+
*/
|
|
116
|
+
async function getEarliestDataDates(config, deps) {
|
|
80
117
|
const { logger } = deps;
|
|
81
|
-
logger.log('INFO', 'Querying for earliest date from source
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
118
|
+
logger.log('INFO', 'Querying for earliest date from ALL source data collections...');
|
|
119
|
+
|
|
120
|
+
// These return null on error or if empty
|
|
121
|
+
const [
|
|
122
|
+
investorDate,
|
|
123
|
+
speculatorDate,
|
|
124
|
+
investorHistoryDate,
|
|
125
|
+
speculatorHistoryDate,
|
|
126
|
+
insightsDate,
|
|
127
|
+
socialDate
|
|
128
|
+
] = await Promise.all([
|
|
129
|
+
getFirstDateFromCollection(config, deps, config.normalUserPortfolioCollection),
|
|
130
|
+
getFirstDateFromCollection(config, deps, config.speculatorPortfolioCollection),
|
|
131
|
+
getFirstDateFromCollection(config, deps, config.normalUserHistoryCollection),
|
|
132
|
+
getFirstDateFromCollection(config, deps, config.speculatorHistoryCollection),
|
|
133
|
+
getFirstDateFromSimpleCollection(config, deps, config.insightsCollectionName),
|
|
134
|
+
getFirstDateFromSimpleCollection(config, deps, config.socialInsightsCollectionName)
|
|
135
|
+
]);
|
|
136
|
+
|
|
137
|
+
// Helper to find the minimum (earliest) of a set of dates
|
|
138
|
+
const getMinDate = (...dates) => {
|
|
139
|
+
const validDates = dates.filter(Boolean); // Filter out nulls
|
|
140
|
+
if (validDates.length === 0) return null;
|
|
141
|
+
return new Date(Math.min(...validDates));
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const earliestPortfolioDate = getMinDate(investorDate, speculatorDate);
|
|
145
|
+
const earliestHistoryDate = getMinDate(investorHistoryDate, speculatorHistoryDate);
|
|
146
|
+
const earliestInsightsDate = getMinDate(insightsDate); // Already a single date
|
|
147
|
+
const earliestSocialDate = getMinDate(socialDate); // Already a single date
|
|
148
|
+
|
|
149
|
+
const absoluteEarliest = getMinDate(
|
|
150
|
+
earliestPortfolioDate,
|
|
151
|
+
earliestHistoryDate,
|
|
152
|
+
earliestInsightsDate,
|
|
153
|
+
earliestSocialDate
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
// Fallback date
|
|
157
|
+
const fallbackDate = new Date(config.earliestComputationDate + 'T00:00:00Z' || '2023-01-01T00:00:00Z');
|
|
158
|
+
|
|
159
|
+
const result = {
|
|
160
|
+
portfolio: earliestPortfolioDate || new Date('2999-12-31'), // Use a 'far future' date if null
|
|
161
|
+
history: earliestHistoryDate || new Date('2999-12-31'),
|
|
162
|
+
insights: earliestInsightsDate || new Date('2999-12-31'),
|
|
163
|
+
social: earliestSocialDate || new Date('2999-12-31'),
|
|
164
|
+
absoluteEarliest: absoluteEarliest || fallbackDate // Use fallback for the main loop
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
logger.log('INFO', 'Earliest data availability map built:', {
|
|
168
|
+
portfolio: result.portfolio.toISOString().slice(0, 10),
|
|
169
|
+
history: result.history.toISOString().slice(0, 10),
|
|
170
|
+
insights: result.insights.toISOString().slice(0, 10),
|
|
171
|
+
social: result.social.toISOString().slice(0, 10),
|
|
172
|
+
absoluteEarliest: result.absoluteEarliest.toISOString().slice(0, 10)
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
return result;
|
|
98
176
|
}
|
|
99
177
|
|
|
100
178
|
module.exports = {
|
|
@@ -103,5 +181,6 @@ module.exports = {
|
|
|
103
181
|
normalizeName,
|
|
104
182
|
commitBatchInChunks,
|
|
105
183
|
getExpectedDateStrings,
|
|
106
|
-
getFirstDateFromSourceData,
|
|
107
|
-
|
|
184
|
+
// getFirstDateFromSourceData, // This is replaced
|
|
185
|
+
getEarliestDataDates, // <-- EXPORT NEW FUNCTION
|
|
186
|
+
};
|