bulltrackers-module 1.0.131 → 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,9 +6,18 @@
6
6
  * It reads its pass number from the config and executes only those calculations.
7
7
  * This file contains the high-level "manual" of steps. The "how-to" logic
8
8
  * is extracted into 'computation_system_utils.js'.
9
+ * --- MODIFIED: To use getEarliestDataDates and pass the date map to the orchestrator helpers. ---
10
+ * --- MODIFIED: To run date processing in parallel batches. ---
11
+ * --- MODIFIED: To fetch ALL existing results to enable incremental (skip) logic. ---
9
12
  */
10
- const { groupByPass, checkRootDataAvailability, fetchDependenciesForPass, filterCalculations, runStandardComputationPass, runMetaComputationPass } = require('./orchestration_helpers.js');
11
- const { getExpectedDateStrings, getFirstDateFromSourceData } = require('../utils/utils.js');
13
+
14
+ // --- MODIFIED: Renamed fetchDependenciesForPass to fetchExistingResults ---
15
+ const { groupByPass, checkRootDataAvailability, fetchExistingResults, filterCalculations, runStandardComputationPass, runMetaComputationPass } = require('./orchestration_helpers.js');
16
+ // --- MODIFIED: Import getEarliestDataDates ---
17
+ const { getExpectedDateStrings, getEarliestDataDates } = require('../utils/utils.js');
18
+
19
+ // --- NEW: Parallel processing batch size ---
20
+ const PARALLEL_BATCH_SIZE = 7; // Process a week at a time
12
21
 
13
22
  async function runComputationPass(config, dependencies, computationManifest) {
14
23
  const { logger } = dependencies;
@@ -17,7 +26,12 @@ async function runComputationPass(config, dependencies, computationManifest) {
17
26
 
18
27
  const yesterday = new Date(); yesterday.setUTCDate(yesterday.getUTCDate()-1);
19
28
  const endDateUTC = new Date(Date.UTC(yesterday.getUTCFullYear(), yesterday.getUTCMonth(), yesterday.getUTCDate()));
20
- const firstDate = await getFirstDateFromSourceData(config, dependencies);
29
+
30
+ // --- MODIFIED: Call new date function ---
31
+ const earliestDates = await getEarliestDataDates(config, dependencies);
32
+ const firstDate = earliestDates.absoluteEarliest; // Use the absolute earliest for the loop
33
+ // --- END MODIFICATION ---
34
+
21
35
  const startDateUTC = firstDate ? new Date(Date.UTC(firstDate.getUTCFullYear(), firstDate.getUTCMonth(), firstDate.getUTCDate())) : new Date(config.earliestComputationDate+'T00:00:00Z');
22
36
  const allExpectedDates = getExpectedDateStrings(startDateUTC, endDateUTC);
23
37
 
@@ -27,21 +41,52 @@ async function runComputationPass(config, dependencies, computationManifest) {
27
41
  const standardCalcs = calcsInThisPass.filter(c => c.type==='standard');
28
42
  const metaCalcs = calcsInThisPass.filter(c => c.type==='meta');
29
43
 
30
- for (const dateStr of allExpectedDates) {
44
+ // --- NEW: Helper function to process a single date ---
45
+ const processDate = async (dateStr) => {
31
46
  const dateToProcess = new Date(dateStr+'T00:00:00Z');
32
47
  try {
33
- const rootData = await checkRootDataAvailability(dateStr, config, dependencies); if (!rootData) continue;
34
- const fetchedDeps = await fetchDependenciesForPass(dateStr, calcsInThisPass, computationManifest, config, dependencies);
35
- const { standardCalcsToRun, metaCalcsToRun } = filterCalculations(standardCalcs, metaCalcs, rootData.status, fetchedDeps, passToRun, dateStr, logger);
48
+ // --- MODIFIED: Pass earliestDates map to checkRootDataAvailability ---
49
+ const rootData = await checkRootDataAvailability(dateStr, config, dependencies, earliestDates);
50
+ if (!rootData) {
51
+ logger.log('WARN', `[PassRunner] Skipping ${dateStr} for Pass ${passToRun}: No root data.`);
52
+ return; // Use 'return' instead of 'continue' for a mapped function
53
+ }
54
+ // --- END MODIFICATION ---
55
+
56
+ // --- MODIFIED: Fetch ALL existing results for this pass, not just dependencies ---
57
+ const existingResults = await fetchExistingResults(dateStr, calcsInThisPass, computationManifest, config, dependencies);
58
+
59
+ // --- MODIFIED: Pass existingResults to filterCalculations ---
60
+ const { standardCalcsToRun, metaCalcsToRun } = filterCalculations(standardCalcs, metaCalcs, rootData.status, existingResults, passToRun, dateStr, logger);
61
+
62
+ if (standardCalcsToRun.length === 0 && metaCalcsToRun.length === 0) {
63
+ logger.log('INFO', `[PassRunner] All calcs for ${dateStr} Pass ${passToRun} are already complete. Skipping.`);
64
+ return;
65
+ }
66
+
67
+ // --- MODIFIED: Pass existingResults (as fetchedDeps) to meta pass ---
36
68
  if (standardCalcsToRun.length) await runStandardComputationPass(dateToProcess, standardCalcsToRun, `Pass ${passToRun} (Standard)`, config, dependencies, rootData);
37
- if (metaCalcsToRun.length) await runMetaComputationPass(dateToProcess, metaCalcsToRun, `Pass ${passToRun} (Meta)`, config, dependencies, fetchedDeps, rootData);
38
- logger.log('SUCCESS', `[PassRunner] Completed Pass ${passToRun} for ${dateStr}.`);
69
+ if (metaCalcsToRun.length) await runMetaComputationPass(dateToProcess, metaCalcsToRun, `Pass ${passToRun} (Meta)`, config, dependencies, existingResults, rootData);
70
+
71
+ // Note: We no longer log "Completed" here, as the sub-functions do.
72
+ // logger.log('SUCCESS', `[PassRunner] Completed Pass ${passToRun} for ${dateStr}.`);
73
+
39
74
  } catch (err) {
40
75
  logger.log('ERROR', `[PassRunner] FAILED Pass ${passToRun} for ${dateStr}`, { errorMessage: err.message, stack: err.stack });
41
76
  }
77
+ };
78
+ // --- END: Helper function ---
79
+
80
+ // --- NEW: Replace sequential loop with parallel batch loop ---
81
+ logger.log('INFO', `[PassRunner] Processing ${allExpectedDates.length} total dates in batches of ${PARALLEL_BATCH_SIZE}...`);
82
+ for (let i = 0; i < allExpectedDates.length; i += PARALLEL_BATCH_SIZE) {
83
+ const batch = allExpectedDates.slice(i, i + PARALLEL_BATCH_SIZE);
84
+ logger.log('INFO', `[PassRunner] Processing batch ${Math.floor(i / PARALLEL_BATCH_SIZE) + 1}/${Math.ceil(allExpectedDates.length / PARALLEL_BATCH_SIZE)} (Dates: ${batch[0]}...${batch[batch.length-1]})`);
85
+ await Promise.all(batch.map(dateStr => processDate(dateStr)));
42
86
  }
87
+ // --- END: Parallel batch loop ---
43
88
 
44
89
  logger.log('INFO', `[PassRunner] Pass ${passToRun} orchestration finished.`);
45
90
  }
46
91
 
47
- module.exports = { runComputationPass };
92
+ module.exports = { runComputationPass };
@@ -1,60 +1,133 @@
1
1
  const { FieldPath } = require('@google-cloud/firestore');
2
- const { getPortfolioPartRefs, loadFullDayMap, loadDataByRefs, loadDailyInsights, loadDailySocialPostInsights, getHistoryPartRefs } = require('../utils/data_loader.js');
2
+ // --- MODIFIED: Import streamPortfolioData ---
3
+ const { getPortfolioPartRefs, loadFullDayMap, loadDataByRefs, loadDailyInsights, loadDailySocialPostInsights, getHistoryPartRefs, streamPortfolioData } = require('../utils/data_loader.js');
3
4
  const { normalizeName, commitBatchInChunks } = require('../utils/utils.js');
4
5
 
5
6
  /** Stage 1: Group manifest by pass number */
6
7
  function groupByPass(manifest) { return manifest.reduce((acc, calc) => { (acc[calc.pass] = acc[calc.pass] || []).push(calc); return acc; }, {}); }
7
8
 
8
- /** Stage 2: Check root data dependencies for a calc */
9
- function checkRootDependencies(calcManifest, rootDataStatus) { if (!calcManifest.rootDataDependencies || !calcManifest.rootDataDependencies.length) return true;
10
- for (const dep of calcManifest.rootDataDependencies) if ((dep==='portfolio'&&!rootDataStatus.hasPortfolio)||(dep==='insights'&&!rootDataStatus.hasInsights)||(dep==='social'&&!rootDataStatus.hasSocial)||(dep==='history'&&!rootDataStatus.hasHistory)) return false;
11
- return true;
9
+ /** * --- MODIFIED: Returns detailed missing dependencies for logging ---
10
+ * Stage 2: Check root data dependencies for a calc
11
+ */
12
+ function checkRootDependencies(calcManifest, rootDataStatus) {
13
+ const missing = [];
14
+ if (!calcManifest.rootDataDependencies || !calcManifest.rootDataDependencies.length) {
15
+ return { canRun: true, missing };}
16
+ for (const dep of calcManifest.rootDataDependencies) {
17
+ if (dep === 'portfolio' && !rootDataStatus.hasPortfolio) missing.push('portfolio');
18
+ else if (dep === 'insights' && !rootDataStatus.hasInsights) missing.push('insights');
19
+ else if (dep === 'social' && !rootDataStatus.hasSocial) missing.push('social');
20
+ else if (dep === 'history' && !rootDataStatus.hasHistory) missing.push('history');
21
+ }
22
+ return { canRun: missing.length === 0, missing };
12
23
  }
13
24
 
14
- /** Stage 3: Check root data availability for a date */
15
- // --- FIX: Passes the full 'dependencies' object down ---
16
- async function checkRootDataAvailability(dateStr, config, dependencies) {
17
- const { logger } = dependencies; // Destructure logger for local use
25
+ /** * --- MODIFIED: Uses earliestDates map to avoid unnecessary queries ---
26
+ * Stage 3: Check root data availability for a date
27
+ */
28
+ async function checkRootDataAvailability(dateStr, config, dependencies, earliestDates) {
29
+ const { logger } = dependencies;
18
30
  logger.log('INFO', `[PassRunner] Checking root data for ${dateStr}...`);
31
+ const dateToProcess = new Date(dateStr + 'T00:00:00Z');
32
+ let portfolioRefs = [], insightsData = null, socialData = null, historyRefs = [];
33
+ let hasPortfolio = false, hasInsights = false, hasSocial = false, hasHistory = false;
19
34
  try {
20
- const [portfolioRefs, insightsData, socialData, historyRefs] = await Promise.all([
21
- getPortfolioPartRefs(config, dependencies, dateStr), // Pass full 'dependencies'
22
- loadDailyInsights(config, dependencies, dateStr), // Pass full 'dependencies'
23
- loadDailySocialPostInsights(config, dependencies, dateStr), // Pass full 'dependencies'
24
- getHistoryPartRefs(config, dependencies, dateStr) // Pass full 'dependencies'
25
- ]);
26
- const hasPortfolio = !!(portfolioRefs?.length), hasInsights = !!insightsData, hasSocial = !!socialData, hasHistory = !!(historyRefs?.length);
27
- if (!(hasPortfolio||hasInsights||hasSocial||hasHistory)) { logger.log('WARN', `[PassRunner] No root data for ${dateStr}.`); return null; }
28
- return { portfolioRefs: portfolioRefs||[], insightsData: insightsData||null, socialData: socialData||null, historyRefs: historyRefs||[], status: { hasPortfolio, hasInsights, hasSocial, hasHistory } };
35
+ const tasks = [];
36
+ if (dateToProcess >= earliestDates.portfolio)
37
+ {tasks.push(getPortfolioPartRefs(config, dependencies, dateStr).then(res => {portfolioRefs = res;hasPortfolio = !!(res?.length);}));}
38
+ if (dateToProcess >= earliestDates.insights) {
39
+ tasks.push(loadDailyInsights(config, dependencies, dateStr).then(res => {insightsData = res;hasInsights = !!res;}));}
40
+ if (dateToProcess >= earliestDates.social) {
41
+ tasks.push(loadDailySocialPostInsights(config, dependencies, dateStr).then(res => {socialData = res;hasSocial = !!res;}));}
42
+ if (dateToProcess >= earliestDates.history) {
43
+ tasks.push(getHistoryPartRefs(config, dependencies, dateStr).then(res => {historyRefs = res;hasHistory = !!(res?.length);}));}
44
+ await Promise.all(tasks);
45
+ if (!(hasPortfolio || hasInsights || hasSocial || hasHistory)) {logger.log('WARN', `[PassRunner] No root data for ${dateStr}.`); return null;}
46
+ return {portfolioRefs, insightsData,socialData,historyRefs,status: { hasPortfolio, hasInsights, hasSocial, hasHistory }};
29
47
  } catch (err) { logger.log('ERROR', `[PassRunner] Error checking data for ${dateStr}`, { errorMessage: err.message }); return null; }
30
48
  }
31
49
 
32
- /** Stage 4: Fetch computed dependencies from Firestore */
33
- async function fetchDependenciesForPass(dateStr, calcsInPass, fullManifest, config, { db, logger }) {
50
+
51
+
52
+ /** --- MODIFIED: Stage 4: Fetch ALL existing computed results for the pass ---
53
+ * This function now checks for *all* calcs in the pass, not just meta-dependencies,
54
+ * to enable skipping completed work.
55
+ */
56
+ async function fetchExistingResults(dateStr, calcsInPass, fullManifest, config, { db, logger }) {
34
57
  const manifestMap = new Map(fullManifest.map(c => [normalizeName(c.name), c]));
35
- const requiredDeps = new Set(calcsInPass.filter(c => c.type==='meta'&&c.dependencies).flatMap(c => c.dependencies.map(normalizeName)));
36
- if (!requiredDeps.size) return {};
37
- logger.log('INFO', `[PassRunner] Fetching ${requiredDeps.size} deps for ${dateStr}...`);
58
+
59
+ // --- MODIFIED: Check all calcs in this pass, not just meta-deps ---
60
+ const allCalcsInPass = new Set(calcsInPass.map(c => normalizeName(c.name)));
61
+ if (!allCalcsInPass.size) return {};
62
+
63
+ logger.log('INFO', `[PassRunner] Checking for ${allCalcsInPass.size} existing results for ${dateStr}...`);
38
64
  const docRefs = [], depNames = [];
39
- for (const calcName of requiredDeps) {
65
+
66
+ for (const calcName of allCalcsInPass) {
40
67
  const calcManifest = manifestMap.get(calcName);
41
68
  if (!calcManifest) { logger.log('ERROR', `[PassRunner] Missing manifest for ${calcName}`); continue; }
42
69
  docRefs.push(db.collection(config.resultsCollection).doc(dateStr).collection(config.resultsSubcollection).doc(calcManifest.category||'unknown').collection(config.computationsSubcollection).doc(calcName));
43
70
  depNames.push(calcName);
44
71
  }
72
+
45
73
  const fetched = {};
46
74
  if (docRefs.length) (await db.getAll(...docRefs)).forEach((doc,i)=>fetched[depNames[i]]=doc.exists?doc.data():null);
47
75
  return fetched;
48
76
  }
49
77
 
50
- /** Stage 5: Filter calculations based on available root data and dependencies */
51
- function filterCalculations(standardCalcs, metaCalcs, rootDataStatus, fetchedDeps, passToRun, dateStr, logger) {
78
+ /** --- MODIFIED: Stage 5: Filter calculations to skip completed work ---
79
+ */
80
+ function filterCalculations(standardCalcs, metaCalcs, rootDataStatus, existingResults, passToRun, dateStr, logger) {
52
81
  const skipped = new Set();
53
- const standardCalcsToRun = standardCalcs.filter(c => checkRootDependencies(c, rootDataStatus) || (logger.log('INFO', `[Pass ${passToRun}] Skipping ${c.name} missing root data`), skipped.add(c.name), false));
54
- const metaCalcsToRun = metaCalcs.filter(c => checkRootDependencies(c, rootDataStatus) && (c.dependencies||[]).every(d=>fetchedDeps[normalizeName(d)]) || (logger.log('WARN', `[Pass ${passToRun} Meta] Skipping ${c.name} missing dep`), skipped.add(c.name), false));
82
+
83
+ // Filter Standard Calcs
84
+ const standardCalcsToRun = standardCalcs.filter(c => {
85
+ // --- NEW: Skip if result already exists ---
86
+ if (existingResults[c.name]) {
87
+ logger.log('TRACE', `[Pass ${passToRun}] Skipping ${c.name} for ${dateStr}. Result already exists.`);
88
+ return false;
89
+ }
90
+
91
+ const { canRun, missing } = checkRootDependencies(c, rootDataStatus);
92
+ if (canRun) return true;
93
+
94
+ logger.log('INFO', `[Pass ${passToRun}] Skipping ${c.name} for ${dateStr}. Missing root data: [${missing.join(', ')}]`);
95
+ skipped.add(c.name);
96
+ return false;
97
+ });
98
+
99
+ // Filter Meta Calcs
100
+ const metaCalcsToRun = metaCalcs.filter(c => {
101
+ // --- NEW: Skip if result already exists ---
102
+ if (existingResults[c.name]) {
103
+ logger.log('TRACE', `[Pass ${passToRun} Meta] Skipping ${c.name} for ${dateStr}. Result already exists.`);
104
+ skipped.add(c.name);
105
+ return false;
106
+ }
107
+
108
+ // 1. Check root data
109
+ const { canRun, missing: missingRoot } = checkRootDependencies(c, rootDataStatus);
110
+ if (!canRun) {
111
+ logger.log('INFO', `[Pass ${passToRun} Meta] Skipping ${c.name} for ${dateStr}. Missing root data: [${missingRoot.join(', ')}]`);
112
+ skipped.add(c.name);
113
+ return false;
114
+ }
115
+
116
+ // 2. Check computed dependencies
117
+ // --- MODIFIED: Check existingResults (was fetchedDeps) ---
118
+ const missingDeps = (c.dependencies || []).map(normalizeName).filter(d => !existingResults[d]);
119
+ if (missingDeps.length > 0) {
120
+ logger.log('WARN', `[Pass ${passToRun} Meta] Skipping ${c.name} for ${dateStr}. Missing computed deps: [${missingDeps.join(', ')}]`);
121
+ skipped.add(c.name);
122
+ return false;
123
+ }
124
+ return true; // All checks passed
125
+ });
126
+
55
127
  return { standardCalcsToRun, metaCalcsToRun };
56
128
  }
57
129
 
130
+
58
131
  /** Stage 6: Initialize calculator instances */
59
132
  function initializeCalculators(calcs, logger) { const state = {}; for (const c of calcs) { const name=normalizeName(c.name), Cl=c.class; if(typeof Cl==='function') try { const inst=new Cl(); inst.manifest=c; state[name]=inst; } catch(e){logger.warn(`Init failed ${name}`,{errorMessage:e.message}); state[name]=null;} else {logger.warn(`Class missing ${name}`); state[name]=null;} } return state; }
60
133
 
@@ -66,21 +139,65 @@ async function loadHistoricalData(date, calcs, config, deps, rootData) { const u
66
139
  await Promise.all(tasks); return updated;
67
140
  }
68
141
 
69
- /** Stage 8: Stream and process data for standard calculations */
70
- async function streamAndProcess(dateStr, todayRefs, state, passName, config, deps, rootData) { const { logger, calculationUtils } = deps;
142
+ /** * --- MODIFIED: Stage 8: Stream and process data for standard calculations ---
143
+ * This function now uses an async generator to stream portfolio data
144
+ * instead of loading it all into memory.
145
+ */
146
+ async function streamAndProcess(dateStr, state, passName, config, deps, rootData) {
147
+ const { logger, calculationUtils } = deps;
71
148
  const { todayInsights, yesterdayInsights, todaySocialPostInsights, yesterdaySocialPostInsights, todayHistoryData, yesterdayHistoryData, yesterdayPortfolios } = rootData;
72
- const batchSize=config.partRefBatchSize||10; let firstUser=true;
149
+
150
+ let firstUser=true;
151
+ // Load mappings once for all chunks
73
152
  const context={instrumentMappings:(await calculationUtils.loadInstrumentMappings()).instrumentToTicker, sectorMapping:(await calculationUtils.loadInstrumentMappings()).instrumentToSector, todayDateStr:dateStr, dependencies:deps, config};
74
- for(let i=0;i<todayRefs.length;i+=batchSize){ const batch=todayRefs.slice(i,i+batchSize); const chunk=await loadDataByRefs(config,deps,batch); for(const uid in chunk){ const p=chunk[uid]; if(!p) continue; const userType=p.PublicPositions?'speculator':'normal'; context.userType=userType; for(const name in state){ const calc=state[name]; if(!calc||typeof calc.process!=='function') continue; const cat=calc.manifest.category, isSocialOrInsights=cat==='socialPosts'||cat==='insights', isHistorical=calc.manifest.isHistorical, isSpec=cat==='speculators'; let args=[p,null,uid,todayInsights,yesterdayInsights,todaySocialPostInsights,yesterdaySocialPostInsights,todayHistoryData,yesterdayHistoryData]; if(isSocialOrInsights&&!firstUser) continue; if(isHistorical){ const pY=yesterdayPortfolios[uid]; if(!pY) continue; args=[p,pY,uid,todayInsights,yesterdayInsights,todaySocialPostInsights,yesterdaySocialPostInsights,todayHistoryData,yesterdayHistoryData]; } if((userType==='normal'&&isSpec)||(userType==='speculator'&&!isSpec&&name!=='users-processed')) continue; try{ await Promise.resolve(calc.process(...args)); } catch(e){logger.log('WARN',`Process error ${name} for ${uid}`,{err:e.message});} } firstUser=false; } }
153
+
154
+ // Use the new async generator stream
155
+ for await (const chunk of streamPortfolioData(config, deps, dateStr)) {
156
+ // This inner loop processes one chunk of users at a time
157
+ for(const uid in chunk){
158
+ const p=chunk[uid]; if(!p) continue;
159
+ const userType=p.PublicPositions?'speculator':'normal';
160
+ context.userType=userType;
161
+ for(const name in state){
162
+ const calc=state[name]; if(!calc||typeof calc.process!=='function') continue;
163
+ const cat=calc.manifest.category, isSocialOrInsights=cat==='socialPosts'||cat==='insights', isHistorical=calc.manifest.isHistorical, isSpec=cat==='speculators';
164
+ // --- MODIFIED: Argument list now includes context ---
165
+ let args=[p,null,uid,context,todayInsights,yesterdayInsights,todaySocialPostInsights,yesterdaySocialPostInsights,todayHistoryData,yesterdayHistoryData];
166
+
167
+ if(isSocialOrInsights&&!firstUser) continue;
168
+
169
+ if(isHistorical){
170
+ const pY=yesterdayPortfolios[uid]; if(!pY) continue;
171
+ args=[p,pY,uid,context,todayInsights,yesterdayInsights,todaySocialPostInsights,yesterdaySocialPostInsights,todayHistoryData,yesterdayHistoryData];
172
+ }
173
+
174
+ if((userType==='normal'&&isSpec)||(userType==='speculator'&&!isSpec&&name!=='users-processed')) continue;
175
+
176
+ try{
177
+ // Pass context to all process functions.
178
+ await Promise.resolve(calc.process(...args));
179
+ } catch(e){logger.log('WARN',`Process error ${name} for ${uid}`,{err:e.message});}
180
+ }
181
+ firstUser=false;
182
+ }
183
+ // Memory from 'chunk' is released here before the next iteration
184
+ }
75
185
  }
186
+ // --- END MODIFICATION ---
76
187
 
77
188
  /** Stage 9: Run standard computations */
78
189
  async function runStandardComputationPass(date, calcs, passName, config, deps, rootData) {
79
190
  const dStr = date.toISOString().slice(0, 10), logger = deps.logger;
191
+ // --- MODIFIED: Log only if there are calcs to run ---
192
+ if (calcs.length === 0) return;
80
193
  logger.log('INFO', `[${passName}] Running ${dStr} with ${calcs.length} calcs.`);
194
+
195
+ // --- MODIFIED: We no longer need portfolioRefs in fullRoot, as streamAndProcess handles it.
81
196
  const fullRoot = await loadHistoricalData(date, calcs, config, deps, rootData);
82
197
  const state = initializeCalculators(calcs, logger);
83
- await streamAndProcess(dStr, fullRoot.portfolioRefs, state, passName, config, deps, fullRoot);
198
+
199
+ // --- MODIFIED: Call to streamAndProcess no longer passes portfolioRefs ---
200
+ await streamAndProcess(dStr, state, passName, config, deps, fullRoot);
84
201
 
85
202
  // --- START: FULL COMMIT LOGIC ---
86
203
  let success = 0;
@@ -155,6 +272,8 @@ async function runStandardComputationPass(date, calcs, passName, config, deps, r
155
272
  /** Stage 10: Run meta computations */
156
273
  async function runMetaComputationPass(date, calcs, passName, config, deps, fetchedDeps, rootData) {
157
274
  const dStr = date.toISOString().slice(0, 10), logger = deps.logger;
275
+ // --- MODIFIED: Log only if there are calcs to run ---
276
+ if (calcs.length === 0) return;
158
277
  logger.log('INFO', `[${passName}] Running ${dStr} with ${calcs.length} calcs.`);
159
278
 
160
279
  // --- START: FULL COMMIT LOGIC ---
@@ -231,4 +350,5 @@ async function runMetaComputationPass(date, calcs, passName, config, deps, fetch
231
350
  }
232
351
 
233
352
 
234
- module.exports = { groupByPass, checkRootDataAvailability, fetchDependenciesForPass, filterCalculations, runStandardComputationPass, runMetaComputationPass };
353
+ // --- MODIFIED: Export new function name ---
354
+ module.exports = { groupByPass, checkRootDataAvailability, fetchExistingResults, filterCalculations, runStandardComputationPass, runMetaComputationPass };
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * @fileoverview Data loader sub-pipes for the Computation System.
3
3
  * REFACTORED: Now stateless and receive dependencies.
4
+ * --- NEW: Added streamPortfolioData async generator ---
4
5
  */
5
6
 
6
7
  // <<< FIX: REMOVED all top-level 'require' and 'dependencies' lines >>>
@@ -165,6 +166,39 @@ async function getHistoryPartRefs(config, deps, dateString) {
165
166
  return allPartRefs;
166
167
  }
167
168
 
169
+ // --- NEW: Stage 7: Stream portfolio data in chunks ---
170
+ /**
171
+ * Streams portfolio data in chunks for a given date.
172
+ * This is an async generator.
173
+ * @param {object} config - The computation system configuration object.
174
+ * @param {object} deps - Contains db, logger, calculationUtils.
175
+ * @param {string} dateString - The date in YYYY-MM-DD format.
176
+ */
177
+ async function* streamPortfolioData(config, deps, dateString) {
178
+ const { logger } = deps;
179
+ const refs = await getPortfolioPartRefs(config, deps, dateString);
180
+ if (refs.length === 0) {
181
+ logger.log('WARN', `[streamPortfolioData] No portfolio refs found for ${dateString}. Stream is empty.`);
182
+ return;
183
+ }
184
+
185
+ // Use the same batch size as loadDataByRefs for consistency
186
+ const batchSize = config.partRefBatchSize || 50;
187
+
188
+ logger.log('INFO', `[streamPortfolioData] Streaming ${refs.length} portfolio parts in chunks of ${batchSize}...`);
189
+
190
+ for (let i = 0; i < refs.length; i += batchSize) {
191
+ const batchRefs = refs.slice(i, i + batchSize);
192
+ // Load one chunk of data
193
+ const data = await loadDataByRefs(config, deps, batchRefs);
194
+ // Yield it to the consumer, then release it from memory
195
+ yield data;
196
+ }
197
+ logger.log('INFO', `[streamPortfolioData] Finished streaming for ${dateString}.`);
198
+ }
199
+ // --- END: Stage 7 ---
200
+
201
+
168
202
  module.exports = {
169
203
  getPortfolioPartRefs,
170
204
  loadDataByRefs,
@@ -172,4 +206,5 @@ module.exports = {
172
206
  loadDailyInsights,
173
207
  loadDailySocialPostInsights,
174
208
  getHistoryPartRefs,
209
+ streamPortfolioData, // <-- EXPORT NEW FUNCTION
175
210
  };
@@ -2,8 +2,10 @@
2
2
  * @fileoverview Computation system sub-pipes and utils.
3
3
  * REFACTORED: Now stateless and receive dependencies where needed.
4
4
  * DYNAMIC: Categorization logic is removed, replaced by manifest.
5
+ * --- MODIFIED: getFirstDateFromSourceData is now getEarliestDataDates
6
+ * and queries all data sources to build an availability map. ---
5
7
  */
6
- /** --- Computation System Sub-Pipes & Utils (Stateless) --- */
8
+ /** --- Computation System Sub-Pipes & Utils (Stateless, Dependency-Injection) --- */
7
9
 
8
10
  const { FieldValue, FieldPath } = require('@google-cloud/firestore');
9
11
 
@@ -46,13 +48,46 @@ function getExpectedDateStrings(startDate, endDate) {
46
48
  return dateStrings;
47
49
  }
48
50
 
49
- /** Stage 4: Get the earliest date in a collection */
51
+ /**
52
+ * --- NEW HELPER ---
53
+ * Stage 4: Get the earliest date in a *flat* collection where doc IDs are dates.
54
+ */
55
+ async function getFirstDateFromSimpleCollection(config, deps, collectionName) {
56
+ const { db, logger, calculationUtils } = deps;
57
+ const { withRetry } = calculationUtils;
58
+
59
+ try {
60
+ if (!collectionName) {
61
+ logger.log('WARN', `[Core Utils] Collection name not provided for simple date query.`);
62
+ return null;
63
+ }
64
+ const query = db.collection(collectionName)
65
+ .where(FieldPath.documentId(), '>=', '2000-01-01')
66
+ .orderBy(FieldPath.documentId(), 'asc')
67
+ .limit(1);
68
+
69
+ const snapshot = await withRetry(() => query.get(), `GetEarliestDoc(${collectionName})`);
70
+
71
+ if (!snapshot.empty && /^\d{4}-\d{2}-\d{2}$/.test(snapshot.docs[0].id)) {
72
+ return new Date(snapshot.docs[0].id + 'T00:00:00Z');
73
+ }
74
+ } catch (e) {
75
+ logger.log('ERROR', `GetFirstDate failed for ${collectionName}`, { errorMessage: e.message });
76
+ }
77
+ return null;
78
+ }
79
+
80
+ /** Stage 4: Get the earliest date in a sharded collection */
50
81
  async function getFirstDateFromCollection(config, deps, collectionName) {
51
82
  const { db, logger, calculationUtils } = deps;
52
83
  const { withRetry } = calculationUtils;
53
84
 
54
85
  let earliestDate = null;
55
86
  try {
87
+ if (!collectionName) {
88
+ logger.log('WARN', `[Core Utils] Collection name not provided for sharded date query.`);
89
+ return null;
90
+ }
56
91
  const blockDocRefs = await withRetry(() => db.collection(collectionName).listDocuments(), `GetBlocks(${collectionName})`);
57
92
  if (!blockDocRefs.length) { logger.log('WARN', `No block documents in collection: ${collectionName}`); return null; }
58
93
 
@@ -75,26 +110,69 @@ async function getFirstDateFromCollection(config, deps, collectionName) {
75
110
  return earliestDate;
76
111
  }
77
112
 
78
- /** Stage 5: Determine the earliest date from source data across both user types */
79
- async function getFirstDateFromSourceData(config, deps) {
113
+ /** * --- MODIFIED FUNCTION ---
114
+ * Stage 5: Determine the earliest date from *all* source data.
115
+ */
116
+ async function getEarliestDataDates(config, deps) {
80
117
  const { logger } = deps;
81
- logger.log('INFO', 'Querying for earliest date from source portfolio data...');
82
-
83
- const investorDate = await getFirstDateFromCollection(config, deps, config.normalUserPortfolioCollection);
84
- const speculatorDate = await getFirstDateFromCollection(config, deps, config.speculatorPortfolioCollection);
85
-
86
- let earliestDate;
87
- if (investorDate && speculatorDate) earliestDate = investorDate < speculatorDate ? investorDate : speculatorDate;
88
- else earliestDate = investorDate || speculatorDate;
89
-
90
- if (earliestDate) {
91
- logger.log('INFO', `Found earliest source data date: ${earliestDate.toISOString().slice(0, 10)}`);
92
- return earliestDate;
93
- } else {
94
- const fallbackDate = new Date(config.earliestComputationDate + 'T00:00:00Z' || '2023-01-01T00:00:00Z');
95
- logger.log('WARN', `No source data found. Defaulting first date to: ${fallbackDate.toISOString().slice(0, 10)}`);
96
- return fallbackDate;
97
- }
118
+ logger.log('INFO', 'Querying for earliest date from ALL source data collections...');
119
+
120
+ // These return null on error or if empty
121
+ const [
122
+ investorDate,
123
+ speculatorDate,
124
+ investorHistoryDate,
125
+ speculatorHistoryDate,
126
+ insightsDate,
127
+ socialDate
128
+ ] = await Promise.all([
129
+ getFirstDateFromCollection(config, deps, config.normalUserPortfolioCollection),
130
+ getFirstDateFromCollection(config, deps, config.speculatorPortfolioCollection),
131
+ getFirstDateFromCollection(config, deps, config.normalUserHistoryCollection),
132
+ getFirstDateFromCollection(config, deps, config.speculatorHistoryCollection),
133
+ getFirstDateFromSimpleCollection(config, deps, config.insightsCollectionName),
134
+ getFirstDateFromSimpleCollection(config, deps, config.socialInsightsCollectionName)
135
+ ]);
136
+
137
+ // Helper to find the minimum (earliest) of a set of dates
138
+ const getMinDate = (...dates) => {
139
+ const validDates = dates.filter(Boolean); // Filter out nulls
140
+ if (validDates.length === 0) return null;
141
+ return new Date(Math.min(...validDates));
142
+ };
143
+
144
+ const earliestPortfolioDate = getMinDate(investorDate, speculatorDate);
145
+ const earliestHistoryDate = getMinDate(investorHistoryDate, speculatorHistoryDate);
146
+ const earliestInsightsDate = getMinDate(insightsDate); // Already a single date
147
+ const earliestSocialDate = getMinDate(socialDate); // Already a single date
148
+
149
+ const absoluteEarliest = getMinDate(
150
+ earliestPortfolioDate,
151
+ earliestHistoryDate,
152
+ earliestInsightsDate,
153
+ earliestSocialDate
154
+ );
155
+
156
+ // Fallback date
157
+ const fallbackDate = new Date(config.earliestComputationDate + 'T00:00:00Z' || '2023-01-01T00:00:00Z');
158
+
159
+ const result = {
160
+ portfolio: earliestPortfolioDate || new Date('2999-12-31'), // Use a 'far future' date if null
161
+ history: earliestHistoryDate || new Date('2999-12-31'),
162
+ insights: earliestInsightsDate || new Date('2999-12-31'),
163
+ social: earliestSocialDate || new Date('2999-12-31'),
164
+ absoluteEarliest: absoluteEarliest || fallbackDate // Use fallback for the main loop
165
+ };
166
+
167
+ logger.log('INFO', 'Earliest data availability map built:', {
168
+ portfolio: result.portfolio.toISOString().slice(0, 10),
169
+ history: result.history.toISOString().slice(0, 10),
170
+ insights: result.insights.toISOString().slice(0, 10),
171
+ social: result.social.toISOString().slice(0, 10),
172
+ absoluteEarliest: result.absoluteEarliest.toISOString().slice(0, 10)
173
+ });
174
+
175
+ return result;
98
176
  }
99
177
 
100
178
  module.exports = {
@@ -103,5 +181,6 @@ module.exports = {
103
181
  normalizeName,
104
182
  commitBatchInChunks,
105
183
  getExpectedDateStrings,
106
- getFirstDateFromSourceData,
107
- };
184
+ // getFirstDateFromSourceData, // This is replaced
185
+ getEarliestDataDates, // <-- EXPORT NEW FUNCTION
186
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bulltrackers-module",
3
- "version": "1.0.131",
3
+ "version": "1.0.133",
4
4
  "description": "Helper Functions for Bulltrackers.",
5
5
  "main": "index.js",
6
6
  "files": [