bulltrackers-module 1.0.657 → 1.0.659

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/functions/api-v2/routes/popular_investors.js +80 -0
  2. package/functions/computation-system/data/AvailabilityChecker.js +163 -317
  3. package/functions/computation-system/data/CachedDataLoader.js +158 -222
  4. package/functions/computation-system/data/DependencyFetcher.js +201 -406
  5. package/functions/computation-system/executors/MetaExecutor.js +176 -280
  6. package/functions/computation-system/executors/StandardExecutor.js +325 -383
  7. package/functions/computation-system/helpers/computation_dispatcher.js +294 -699
  8. package/functions/computation-system/helpers/computation_worker.js +3 -2
  9. package/functions/computation-system/legacy/AvailabilityCheckerOld.js +382 -0
  10. package/functions/computation-system/legacy/CachedDataLoaderOld.js +357 -0
  11. package/functions/computation-system/legacy/DependencyFetcherOld.js +478 -0
  12. package/functions/computation-system/legacy/MetaExecutorold.js +364 -0
  13. package/functions/computation-system/legacy/StandardExecutorold.js +476 -0
  14. package/functions/computation-system/legacy/computation_dispatcherold.js +944 -0
  15. package/functions/computation-system/persistence/ResultCommitter.js +137 -188
  16. package/functions/computation-system/services/SnapshotService.js +129 -0
  17. package/functions/computation-system/tools/BuildReporter.js +12 -7
  18. package/functions/computation-system/utils/data_loader.js +213 -238
  19. package/package.json +3 -2
  20. package/functions/computation-system/workflows/bulltrackers_pipeline.yaml +0 -163
  21. package/functions/computation-system/workflows/data_feeder_pipeline.yaml +0 -115
  22. package/functions/computation-system/workflows/datafeederpipelineinstructions.md +0 -30
  23. package/functions/computation-system/workflows/morning_prep_pipeline.yaml +0 -55
@@ -1,478 +1,273 @@
1
1
  /**
2
2
  * FILENAME: computation-system/data/DependencyFetcher.js
3
3
  * @fileoverview Fetches dependencies for computations.
4
- * UPDATED: Added 'fetchExistingResults' bridge for WorkflowOrchestrator compatibility.
5
- * UPDATED: Uses 'manifestLookup' to resolve the correct category (Core vs Non-Core).
6
- * UPDATED: Supports automatic reassembly of sharded results (_shards subcollection).
7
- * UPDATED: Supports decompression of zipped results.
4
+ * REFACTORED: Unified fetch logic, streamlined decompression/sharding/GCS.
8
5
  */
9
6
  const { normalizeName } = require('../utils/utils');
10
7
  const zlib = require('zlib');
8
+ const { Storage } = require('@google-cloud/storage');
11
9
 
12
- /**
13
- * Checks if data is effectively empty (no usable content).
14
- * @param {any} data - The data to check
15
- * @returns {boolean} True if data is empty/null/undefined or contains no meaningful content
16
- */
10
+ const storage = new Storage(); // Singleton Client
11
+
12
+ // =============================================================================
13
+ // HELPERS
14
+ // =============================================================================
15
+
16
+ /** Checks if data is effectively empty (null or only metadata keys) */
17
17
  function isDataEmpty(data) {
18
- if (!data || data === null || data === undefined) return true;
19
-
20
- // Check if it's an object with only metadata fields
21
- if (typeof data === 'object' && !Array.isArray(data)) {
22
- const keys = Object.keys(data);
23
- // If only metadata/internal fields, consider it empty
24
- const metadataFields = ['_completed', '_compressed', '_sharded', '_shardCount', '_isPageMode', '_pageCount', '_lastUpdated', '_expireAt'];
25
- const hasOnlyMetadata = keys.length > 0 && keys.every(k => metadataFields.includes(k) || k.startsWith('_'));
26
-
27
- if (hasOnlyMetadata) return true;
28
-
29
- // If object has no keys (after filtering metadata), it's empty
30
- const dataKeys = keys.filter(k => !k.startsWith('_'));
31
- if (dataKeys.length === 0) return true;
18
+ if (data == null) return true;
19
+ if (Array.isArray(data)) return data.length === 0;
20
+ if (typeof data === 'object') {
21
+ // Return true if NO keys exist that DON'T start with '_'
22
+ return !Object.keys(data).some(k => !k.startsWith('_'));
32
23
  }
33
-
34
- // Check if it's an empty array
35
- if (Array.isArray(data) && data.length === 0) return true;
36
-
37
24
  return false;
38
25
  }
39
26
 
27
+ /** Robust decompression helper (Buffer, Base64, or Firestore Binary) */
28
+ function tryDecompress(payload) {
29
+ if (!payload) return null;
30
+ try {
31
+ const buffer = (payload instanceof Buffer) ? payload :
32
+ (payload._byteString ? Buffer.from(payload._byteString, 'base64') :
33
+ (payload.toDate ? payload.toDate() : Buffer.from(payload)));
34
+
35
+ return JSON.parse(zlib.gunzipSync(buffer).toString('utf8'));
36
+ } catch (e) {
37
+ throw new Error(`Decompression failed: ${e.message}`);
38
+ }
39
+ }
40
+
41
+ // =============================================================================
42
+ // CORE FETCH LOGIC
43
+ // =============================================================================
44
+
40
45
  /**
41
- * BRIDGE FUNCTION: Matches WorkflowOrchestrator signature.
42
- * Adapts (dateStr, calcs, manifest, ...) -> (dateObj, calcs, ..., manifestLookup).
43
- * This fixes the 'fetchExistingResults is not a function' TypeError.
46
+ * Fetches, decompresses, and reassembles (if sharded or on GCS) a single result document.
44
47
  */
45
- async function fetchExistingResults(dateStr, calcs, fullManifest, config, deps, isHistoricalContext) {
46
- const { logger } = deps;
47
-
48
- // DEBUG: Log entry
49
- logger.log('INFO', `[DependencyFetcher] 📥 fetchExistingResults called for date: ${dateStr}, calcs: ${calcs.length}, isHistorical: ${isHistoricalContext}`);
50
- logger.log('INFO', `[DependencyFetcher] 📥 Calcs being processed: ${calcs.map(c => {
51
- const name = c.name || c.constructor?.name || 'unknown';
52
- const hasClass = !!c.class;
53
- const classType = c.class ? (typeof c.class === 'function' ? 'function' : typeof c.class) : 'none';
54
- return `${name} (has class: ${hasClass}, class type: ${classType})`;
55
- }).join(', ')}`);
56
-
57
- // 1. Build Manifest Lookup (Name -> Category)
58
- const manifestLookup = {};
59
- if (Array.isArray(fullManifest)) {
60
- fullManifest.forEach(c => {
61
- manifestLookup[normalizeName(c.name)] = c.category || 'analytics';
62
- });
63
- logger.log('INFO', `[DependencyFetcher] 📥 Built manifest lookup with ${Object.keys(manifestLookup).length} entries`);
64
- } else {
65
- logger.log('WARN', `[DependencyFetcher] ⚠️ fullManifest is not an array: ${typeof fullManifest}`);
48
+ async function fetchSingleResult(db, config, dateStr, name, category) {
49
+ const { resultsCollection = 'computation_results', resultsSubcollection = 'results', computationsSubcollection = 'computations' } = config;
50
+ const log = config.logger || console;
51
+
52
+ const docRef = db.collection(resultsCollection).doc(dateStr)
53
+ .collection(resultsSubcollection).doc(category)
54
+ .collection(computationsSubcollection).doc(name);
55
+
56
+ const snap = await docRef.get();
57
+ if (!snap.exists) return null;
58
+
59
+ let data = snap.data();
60
+
61
+ // -------------------------------------------------------------------------
62
+ // 1. GCS POINTER HANDLER (New)
63
+ // -------------------------------------------------------------------------
64
+ if (data.gcsUri || (data._gcs && data.gcsBucket && data.gcsPath)) {
65
+ try {
66
+ const bucketName = data.gcsBucket || data.gcsUri.split('/')[2];
67
+ const fileName = data.gcsPath || data.gcsUri.split('/').slice(3).join('/');
68
+
69
+ // Stream download is memory efficient for large files
70
+ const [fileContent] = await storage.bucket(bucketName).file(fileName).download();
71
+
72
+ // Assume Gzip (as writer does it), if fails try plain
73
+ try {
74
+ return JSON.parse(zlib.gunzipSync(fileContent).toString('utf8'));
75
+ } catch (gzipErr) {
76
+ // Fallback for uncompressed GCS files
77
+ return JSON.parse(fileContent.toString('utf8'));
78
+ }
79
+ } catch (e) {
80
+ log.error(`[DependencyFetcher] ❌ GCS Fetch Failed for ${name}: ${e.message}`);
81
+ // Depending on strictness, we might return null here or allow it to fail hard.
82
+ // Returning null allows 'isDataEmpty' to catch it as "MISSING"
83
+ return null;
84
+ }
66
85
  }
67
86
 
68
- // 2. Convert Date String to Date Object
69
- // We append T00:00:00Z to ensure it parses as UTC date if only YYYY-MM-DD is provided.
70
- const dateObj = new Date(dateStr + (dateStr.includes('T') ? '' : 'T00:00:00Z'));
87
+ // -------------------------------------------------------------------------
88
+ // 2. FIRESTORE COMPRESSED HANDLER
89
+ // -------------------------------------------------------------------------
90
+ if (data._compressed && data.payload) {
91
+ try {
92
+ const realData = tryDecompress(data.payload);
93
+ data = { ...data, ...realData }; // Merge payload into base
94
+ delete data.payload;
95
+ } catch (e) {
96
+ log.error(`[DependencyFetcher] ❌ ${e.message} at ${docRef.path}`);
97
+ return null;
98
+ }
99
+ }
71
100
 
72
- // 3. Delegate to fetchDependencies
73
- // CRITICAL: For historical context (yesterday's data), allow missing dependencies
74
- // Historical lookbacks are optional - gaps in historical data are permissible
75
- const result = await fetchDependencies(dateObj, calcs, config, deps, manifestLookup, isHistoricalContext);
76
-
77
- // DEBUG: Log result
78
- const resultKeys = Object.keys(result);
79
- logger.log('INFO', `[DependencyFetcher] 📤 fetchExistingResults returning ${resultKeys.length} dependencies: ${resultKeys.length > 0 ? resultKeys.join(', ') : 'NONE'}`);
80
-
81
- return result;
101
+ // -------------------------------------------------------------------------
102
+ // 3. FIRESTORE SHARDED HANDLER
103
+ // -------------------------------------------------------------------------
104
+ if (data._sharded) {
105
+ const shardSnaps = await docRef.collection('_shards').get();
106
+ if (shardSnaps.empty) {
107
+ log.error(`[DependencyFetcher] Sharded doc has no shards: ${docRef.path}`);
108
+ return null;
109
+ }
110
+
111
+ // Initialize merged data with any non-metadata fields from the pointer doc
112
+ const merged = {};
113
+ Object.entries(data).forEach(([k, v]) => { if (!k.startsWith('_')) merged[k] = v; });
114
+
115
+ for (const shard of shardSnaps.docs) {
116
+ let sData = shard.data();
117
+
118
+ // Decompress Shard if needed
119
+ if (sData._compressed && sData.payload) {
120
+ try {
121
+ const decomp = tryDecompress(sData.payload);
122
+ sData = (typeof decomp === 'string') ? JSON.parse(decomp) : decomp;
123
+ } catch (e) {
124
+ log.error(`[DependencyFetcher] ❌ Shard decompression failed (${shard.id}): ${e.message}`);
125
+ continue;
126
+ }
127
+ }
128
+
129
+ // Merge Shard Content
130
+ Object.entries(sData).forEach(([k, v]) => {
131
+ if (!k.startsWith('_')) merged[k] = v;
132
+ });
133
+ }
134
+
135
+ // Return null if result is empty after merging
136
+ if (Object.keys(merged).length === 0) return null;
137
+ return merged;
138
+ }
139
+
140
+ // -------------------------------------------------------------------------
141
+ // 4. STANDARD DOCUMENT HANDLER
142
+ // -------------------------------------------------------------------------
143
+ const clean = {};
144
+ let hasContent = false;
145
+ Object.entries(data).forEach(([k, v]) => {
146
+ if (!k.startsWith('_')) { clean[k] = v; hasContent = true; }
147
+ });
148
+
149
+ return hasContent ? clean : null;
82
150
  }
83
151
 
152
+ // =============================================================================
153
+ // PUBLIC METHODS
154
+ // =============================================================================
155
+
84
156
  /**
85
157
  * Fetches dependencies for a specific date (Standard pass).
86
- * @param {Date} date - The target date.
87
- * @param {Array} calcs - The computations requiring dependencies.
88
- * @param {Object} config - System config.
89
- * @param {Object} deps - System dependencies (db, logger).
90
- * @param {Object} manifestLookup - Map of { [calcName]: categoryString }.
91
- * @param {boolean} allowMissing - If true, missing/empty dependencies are allowed (for historical/lookback scenarios).
92
158
  */
93
159
  async function fetchDependencies(date, calcs, config, deps, manifestLookup = {}, allowMissing = false) {
94
160
  const { db, logger } = deps;
95
161
  const dStr = date.toISOString().slice(0, 10);
96
162
 
97
- // DEBUG: Log entry
98
- logger.log('INFO', `[DependencyFetcher] 🔍 fetchDependencies called with ${calcs.length} calc(s): ${calcs.map(c => c.name || c.constructor?.name || 'unknown').join(', ')}`);
99
-
100
- // 1. Identify unique dependencies needed
101
- // CHANGED: Use a Map to track { normalizedName: originalName }
102
- const needed = new Map();
103
-
163
+ // 1. Resolve Dependencies (Normalize Name -> Original Name)
164
+ const needed = new Map();
104
165
  calcs.forEach(c => {
105
- const calcName = c.name || c.constructor?.name || 'unknown';
106
-
107
- // DEBUG: Log what we're checking
108
- logger.log('INFO', `[DependencyFetcher] 🔍 Processing calc: ${calcName}`);
109
- logger.log('INFO', `[DependencyFetcher] - has class: ${!!c.class}`);
110
- logger.log('INFO', `[DependencyFetcher] - class type: ${c.class ? (typeof c.class === 'function' ? 'function' : typeof c.class) : 'none'}`);
111
- logger.log('INFO', `[DependencyFetcher] - class.getDependencies: ${c.class && typeof c.class.getDependencies === 'function' ? 'YES' : 'NO'}`);
112
- logger.log('INFO', `[DependencyFetcher] - has getDependencies: ${typeof c.getDependencies === 'function' ? 'YES' : 'NO'}`);
113
- logger.log('INFO', `[DependencyFetcher] - has dependencies array: ${Array.isArray(c.dependencies) ? `YES (${c.dependencies.length} items)` : 'NO'}`);
114
- if (Array.isArray(c.dependencies)) {
115
- logger.log('INFO', `[DependencyFetcher] - dependencies array: ${c.dependencies.join(', ')}`);
116
- }
166
+ // Priority: class.getDependencies() > instance.getDependencies() > manifest.dependencies
167
+ const getDeps = c.class?.getDependencies || c.getDependencies;
168
+ const reqs = (typeof getDeps === 'function') ? getDeps.call(c.class || c) : c.dependencies;
117
169
 
118
- // [FIX] Support both .getDependencies() method and .dependencies array
119
- // CRITICAL: Prefer class.getDependencies() over manifest.dependencies
120
- // because the class method returns original case-sensitive names,
121
- // while manifest.dependencies contains normalized names
122
- let reqs = [];
123
- if (c.class && typeof c.class.getDependencies === 'function') {
124
- // Use the class method - returns original case-sensitive names
125
- reqs = c.class.getDependencies();
126
- logger.log('INFO', `[DependencyFetcher] ✅ Using c.class.getDependencies() - returned: ${JSON.stringify(reqs)}`);
127
- } else if (typeof c.getDependencies === 'function') {
128
- // Fallback: direct method call (if c is the class itself)
129
- reqs = c.getDependencies();
130
- logger.log('INFO', `[DependencyFetcher] ✅ Using c.getDependencies() - returned: ${JSON.stringify(reqs)}`);
131
- } else if (c.dependencies && Array.isArray(c.dependencies)) {
132
- // Last resort: use manifest's dependencies array (normalized)
133
- // This is less ideal because names are normalized, but we'll use them as-is
134
- reqs = c.dependencies;
135
- logger.log('INFO', `[DependencyFetcher] ⚠️ Using c.dependencies array (normalized) - returned: ${JSON.stringify(reqs)}`);
136
- } else {
137
- logger.log('WARN', `[DependencyFetcher] ❌ No way to get dependencies for ${calcName} - all methods failed`);
138
- }
139
-
140
170
  if (Array.isArray(reqs)) {
141
- logger.log('INFO', `[DependencyFetcher] ✅ Found ${reqs.length} dependencies for ${calcName}: ${reqs.join(', ')}`);
142
- reqs.forEach(r => {
143
- // We map the normalized version to the original requested version
144
- // This ensures we fetch the right file (normalized) but return it
145
- // with the casing the user code expects (original).
146
- needed.set(normalizeName(r), r);
147
- });
148
- } else {
149
- logger.log('WARN', `[DependencyFetcher] ⚠️ reqs is not an array for ${calcName}: ${typeof reqs}`);
171
+ reqs.forEach(r => needed.set(normalizeName(r), r));
150
172
  }
151
173
  });
152
-
153
- if (needed.size === 0) {
154
- logger.log('WARN', `[DependencyFetcher] ⚠️ No dependencies needed - returning empty object`);
155
- return {};
156
- }
157
-
158
- const calcNames = calcs.map(c => c.name || c.constructor?.name || 'unknown').join(', ');
159
- logger.log('INFO', `[DependencyFetcher] Fetching ${needed.size} dependencies for computation(s): ${calcNames} (date: ${dStr})`);
160
-
161
- // DEBUG: Log what dependencies we're looking for
162
- const depList = Array.from(needed.entries()).map(([norm, orig]) => `${orig} (normalized: ${norm})`).join(', ');
163
- logger.log('INFO', `[DependencyFetcher] Dependencies requested: ${depList}`);
164
-
174
+
175
+ if (needed.size === 0) return {};
176
+
177
+ logger.log('INFO', `[DependencyFetcher] 🔍 Fetching ${needed.size} dependencies for ${dStr}`);
178
+
165
179
  const results = {};
166
- const missingDeps = [];
167
- const emptyDeps = [];
168
-
169
- // Helper to build path string
170
- const buildPath = (category, normName) => {
171
- return `${config.resultsCollection || 'computation_results'}/${dStr}/${config.resultsSubcollection || 'results'}/${category}/${config.computationsSubcollection || 'computations'}/${normName}`;
172
- };
173
-
174
- // CHANGED: Iterate over the entries to access both normalized and original names
175
- const promises = Array.from(needed.entries()).map(async ([normName, originalName]) => {
176
- // Resolve Category from Lookup, default to 'analytics' if unknown
177
- // Note: manifestLookup keys are expected to be normalized
178
- const category = manifestLookup[normName] || 'analytics';
179
- const path = buildPath(category, normName);
180
+ const errors = [];
181
+
182
+ // 2. Fetch All Dependencies in Parallel
183
+ await Promise.all(Array.from(needed.entries()).map(async ([norm, orig]) => {
184
+ const category = manifestLookup[norm] || 'analytics';
185
+ const path = `${config.resultsCollection || 'computation_results'}/${dStr}/.../${category}/${norm}`;
180
186
 
181
187
  try {
182
- // Pass logger in config for fetchSingleResult
183
- const fetchConfig = { ...config, logger };
188
+ const data = await fetchSingleResult(db, { ...config, logger }, dStr, norm, category);
184
189
 
185
- // Fetch using the normalized name (system standard)
186
- const data = await fetchSingleResult(db, fetchConfig, dStr, normName, category);
187
-
188
- // CRITICAL: Validate that dependency exists and has data
189
- if (!data) {
190
- missingDeps.push({ name: originalName, normalizedName: normName, path });
191
- // Log level depends on context - ERROR for current date, INFO for historical
192
- if (allowMissing) {
193
- logger.log('INFO', `[DependencyFetcher] ⚠️ Missing dependency '${originalName}' (${normName}) from: ${path} (Historical context - allowed)`);
194
- } else {
195
- logger.log('ERROR', `[DependencyFetcher] ❌ Missing required dependency '${originalName}' (${normName}) from: ${path}`);
196
- }
197
- } else if (isDataEmpty(data)) {
198
- emptyDeps.push({ name: originalName, normalizedName: normName, path });
199
- // Log level depends on context - ERROR for current date, INFO for historical
200
- if (allowMissing) {
201
- logger.log('INFO', `[DependencyFetcher] ⚠️ Empty dependency '${originalName}' (${normName}) from: ${path} (Historical context - allowed)`);
202
- } else {
203
- logger.log('ERROR', `[DependencyFetcher] ❌ Empty dependency '${originalName}' (${normName}) from: ${path} - Document exists but contains no usable data`);
204
- }
190
+ if (!data || isDataEmpty(data)) {
191
+ // Determine severity based on context
192
+ const status = data ? 'EMPTY' : 'MISSING';
193
+ errors.push({ name: orig, path, reason: status });
194
+
195
+ // Log immediately for visibility
196
+ const level = allowMissing ? 'INFO' : 'ERROR';
197
+ logger.log(level, `[DependencyFetcher] ⚠️ Dependency '${orig}' ${status} at ${path}`);
205
198
  } else {
206
- // CHANGED: Store result using the ORIGINAL name so context.computed['CaseSensitive'] works
207
- results[originalName] = data;
208
- // DEBUG: Log successful dependency load
209
- const dataKeys = Object.keys(data);
210
- logger.log('INFO', `[DependencyFetcher] ✅ Stored dependency '${originalName}' in results. Keys: ${dataKeys.length} (sample: ${dataKeys.slice(0, 5).join(', ')})`);
199
+ results[orig] = data; // Store using Original Name
211
200
  }
212
201
  } catch (e) {
213
- missingDeps.push({ name: originalName, normalizedName: normName, path, error: e.message });
214
- // Log level depends on context - ERROR for current date, INFO for historical
215
- if (allowMissing) {
216
- logger.log('INFO', `[DependencyFetcher] ⚠️ Failed to load dependency '${originalName}' (${normName}) from: ${path} - Error: ${e.message} (Historical context - allowed)`);
217
- } else {
218
- logger.log('ERROR', `[DependencyFetcher] ❌ Failed to load dependency '${originalName}' (${normName}) from: ${path} - Error: ${e.message}`);
219
- }
202
+ errors.push({ name: orig, path, reason: e.message });
203
+ logger.log('ERROR', `[DependencyFetcher] Error loading '${orig}': ${e.message}`);
220
204
  }
221
- });
222
-
223
- await Promise.all(promises);
224
-
225
- // DEBUG: Log what we're returning
226
- const resultKeys = Object.keys(results);
227
- logger.log('INFO', `[DependencyFetcher] Returning ${resultKeys.length} dependencies: ${resultKeys.join(', ')}`);
228
-
229
- // CRITICAL: Fail if any required dependencies are missing or empty
230
- // EXCEPTION: For historical/lookback scenarios, missing dependencies are permissible
231
- if ((missingDeps.length > 0 || emptyDeps.length > 0) && !allowMissing) {
232
- const missingList = missingDeps.map(d => `'${d.name}' (path: ${d.path}${d.error ? `, error: ${d.error}` : ''})`).join(', ');
233
- const emptyList = emptyDeps.map(d => `'${d.name}' (path: ${d.path})`).join(', ');
234
-
235
- const errorMsg = `[DependencyFetcher] ❌ CRITICAL: Cannot proceed - Required dependencies missing or empty for computation(s): ${calcNames}\n` +
236
- `Missing dependencies (${missingDeps.length}): ${missingList}\n` +
237
- (emptyDeps.length > 0 ? `Empty dependencies (${emptyDeps.length}): ${emptyList}\n` : '') +
238
- `Date: ${dStr}\n` +
239
- `This computation will FAIL and no results will be saved.`;
240
-
241
- logger.log('ERROR', errorMsg);
242
- throw new Error(errorMsg);
243
- } else if (missingDeps.length > 0 || emptyDeps.length > 0) {
244
- // Historical/lookback context - log but allow missing dependencies
245
- const missingList = missingDeps.map(d => `'${d.name}' (path: ${d.path})`).join(', ');
246
- const emptyList = emptyDeps.map(d => `'${d.name}' (path: ${d.path})`).join(', ');
247
- logger.log('INFO', `[DependencyFetcher] ⚠️ Historical/Lookback context: Missing/empty dependencies allowed for ${calcNames} on ${dStr}. Missing: ${missingList}${emptyDeps.length > 0 ? `, Empty: ${emptyList}` : ''}`);
205
+ }));
206
+
207
+ // 3. Final Validation
208
+ if (errors.length > 0 && !allowMissing) {
209
+ throw new Error(`[DependencyFetcher] CRITICAL: Missing required dependencies: ${errors.map(e => e.name).join(', ')}`);
210
+ } else if (errors.length > 0) {
211
+ logger.log('INFO', `[DependencyFetcher] ⚠️ Allowed missing/empty dependencies in Historical context: ${errors.map(e => e.name).join(', ')}`);
248
212
  }
249
-
213
+
250
214
  return results;
251
215
  }
252
216
 
253
217
  /**
254
218
  * Fetches result series (Historical data) for lookbacks.
255
- * @param {string} endDateStr - The most recent date.
256
- * @param {Array} calcNames - Names of computations to fetch.
257
- * @param {Object} manifestLookup - Map of { [calcName]: categoryString }.
258
219
  */
259
220
  async function fetchResultSeries(endDateStr, calcNames, manifestLookup, config, deps, lookbackDays) {
260
221
  const { db, logger } = deps;
261
- const results = {};
262
- const dates = [];
222
+ const results = {}; // normalizedName -> { date -> data }
263
223
 
264
- // Generate date list (starting from yesterday relative to endDateStr)
224
+ // Initialize results structure
225
+ calcNames.forEach(n => results[normalizeName(n)] = {});
226
+
227
+ // Generate Date List
228
+ const dates = [];
265
229
  const d = new Date(endDateStr);
266
230
  for (let i = 0; i < lookbackDays; i++) {
267
- d.setUTCDate(d.getUTCDate() - 1);
231
+ d.setUTCDate(d.getUTCDate() - 1);
268
232
  dates.push(d.toISOString().slice(0, 10));
269
233
  }
270
-
271
- // Initialize structure
272
- calcNames.forEach(name => { results[normalizeName(name)] = {}; });
273
-
274
- logger.log('INFO', `[DependencyFetcher] Loading series for ${calcNames.length} computation dependencies over ${lookbackDays} days: ${calcNames.join(', ')}`);
275
234
 
276
- const fetchOps = [];
277
-
235
+ // Build Fetch Operations
236
+ const ops = [];
278
237
  for (const dateStr of dates) {
279
238
  for (const rawName of calcNames) {
280
- const normName = normalizeName(rawName);
281
- const category = manifestLookup[normName] || 'analytics';
282
-
283
- fetchOps.push(async () => {
284
- const fetchConfig = { ...config, logger };
285
- const val = await fetchSingleResult(db, fetchConfig, dateStr, rawName, category);
286
- // CRITICAL: For series/lookback, we allow missing dates (historical lookback may have gaps)
287
- // This is expected behavior - not all historical dates will have data
288
- // But we still validate that the data isn't empty if it exists
239
+ const norm = normalizeName(rawName);
240
+ const category = manifestLookup[norm] || 'analytics';
241
+ ops.push(async () => {
242
+ const val = await fetchSingleResult(db, { ...config, logger }, dateStr, rawName, category);
289
243
  if (val && !isDataEmpty(val)) {
290
- if (!results[normName]) results[normName] = {};
291
- results[normName][dateStr] = val;
292
- } else if (val && isDataEmpty(val)) {
293
- // Log but don't fail - series can have gaps, empty data is treated as missing
294
- logger.log('INFO', `[DependencyFetcher] ⚠️ Empty dependency '${rawName}' found at ${dateStr} in series (allowing gap - historical lookback)`);
244
+ results[norm][dateStr] = val;
295
245
  }
296
- // If val is null, that's fine - missing dates in historical series are permissible
297
246
  });
298
247
  }
299
248
  }
300
-
301
- // Limited concurrency batch execution (Batch size 20)
249
+
250
+ // Execute in Batches (Limit Concurrency)
251
+ logger.log('INFO', `[DependencyFetcher] 📚 Loading series: ${calcNames.length} calcs x ${lookbackDays} days (${ops.length} ops)`);
302
252
  const BATCH_SIZE = 20;
303
- for (let i = 0; i < fetchOps.length; i += BATCH_SIZE) {
304
- await Promise.all(fetchOps.slice(i, i + BATCH_SIZE).map(fn => fn()));
253
+ for (let i = 0; i < ops.length; i += BATCH_SIZE) {
254
+ await Promise.all(ops.slice(i, i + BATCH_SIZE).map(fn => fn()));
305
255
  }
306
-
256
+
307
257
  return results;
308
258
  }
309
259
 
310
260
  /**
311
- * Core Helper: Fetches a single result, handles Sharding & Compression.
261
+ * Bridge function for WorkflowOrchestrator compatibility.
312
262
  */
313
- async function fetchSingleResult(db, config, dateStr, name, category) {
314
- const resultsCollection = config.resultsCollection || 'computation_results';
315
- const resultsSubcollection = config.resultsSubcollection || 'results';
316
- const computationsSubcollection = config.computationsSubcollection || 'computations';
317
-
318
- const path = `${resultsCollection}/${dateStr}/${resultsSubcollection}/${category}/${computationsSubcollection}/${name}`;
319
-
320
- // Log path - use console.log if logger not available (for backward compatibility)
321
- if (config.logger) {
322
- config.logger.log('INFO', `[DependencyFetcher] 📂 Loading Dependency '${name}' from: ${path}`);
323
- } else {
324
- console.log(`[DependencyFetcher] 📂 Loading Dependency '${name}' from: ${path}`);
325
- }
326
-
327
- const docRef = db.collection(resultsCollection)
328
- .doc(dateStr)
329
- .collection(resultsSubcollection)
330
- .doc(category)
331
- .collection(computationsSubcollection)
332
- .doc(name);
333
-
334
- const snap = await docRef.get();
335
- if (!snap.exists) {
336
- // Log the missing document path clearly
337
- if (config.logger) {
338
- config.logger.log('ERROR', `[DependencyFetcher] ❌ Document does not exist at: ${path}`);
339
- } else {
340
- console.error(`[DependencyFetcher] ❌ Document does not exist at: ${path}`);
341
- }
342
- return null;
343
- }
344
-
345
- let data = snap.data();
346
-
347
- // CRITICAL: Don't check if empty yet - we need to load shards/compressed data first
348
- // A sharded document will only have metadata in the pointer doc, but the actual data is in shards
349
- // A compressed document will only have metadata + payload, but the actual data is in the payload
350
-
351
- // 1. Handle Compression
352
- if (data._compressed && data.payload) {
353
- try {
354
- const buffer = (data.payload instanceof Buffer) ? data.payload : data.payload.toDate();
355
- const decompressed = zlib.gunzipSync(buffer);
356
- const jsonStr = decompressed.toString('utf8');
357
- const realData = JSON.parse(jsonStr);
358
- // Merge decompressed data
359
- data = { ...data, ...realData };
360
- delete data.payload;
361
- } catch (e) {
362
- const errorMsg = `Decompression failed for ${name}: ${e.message}`;
363
- if (config.logger) {
364
- config.logger.log('ERROR', `[DependencyFetcher] ❌ ${errorMsg} at: ${path}`);
365
- } else {
366
- console.error(`[DependencyFetcher] ❌ ${errorMsg} at: ${path}`);
367
- }
368
- return null;
369
- }
370
- }
371
-
372
- // 2. Handle Sharding (MUST happen before empty check)
373
- if (data._sharded) {
374
- const shardPath = `${path}/_shards`;
375
- if (config.logger) {
376
- config.logger.log('INFO', `[DependencyFetcher] 📂 Loading Shards for '${name}' from: ${shardPath}`);
377
- } else {
378
- console.log(`[DependencyFetcher] 📂 Loading Shards for '${name}' from: ${shardPath}`);
379
- }
380
-
381
- const shardCol = docRef.collection('_shards');
382
- const shardSnaps = await shardCol.get();
383
-
384
- if (shardSnaps.empty) {
385
- // No shards found - this is a problem
386
- if (config.logger) {
387
- config.logger.log('ERROR', `[DependencyFetcher] ❌ Document marked as sharded but no shards found at: ${shardPath}`);
388
- } else {
389
- console.error(`[DependencyFetcher] ❌ Document marked as sharded but no shards found at: ${shardPath}`);
390
- }
391
- return null; // Return null so it gets caught as missing
392
- }
393
-
394
- // Merge shard contents
395
- let hasData = false;
396
- shardSnaps.forEach(shard => {
397
- let shardData = shard.data();
398
- const shardId = shard.id;
399
- if (config.logger) {
400
- config.logger.log('TRACE', `[DependencyFetcher] 📂 Loading Shard '${shardId}' for '${name}' from: ${shardPath}/${shardId}`);
401
- }
402
-
403
- // CRITICAL: Shards themselves can be compressed (common in big data)
404
- // Decompress the shard if needed before merging
405
- if (shardData._compressed && shardData.payload) {
406
- try {
407
- const buffer = (shardData.payload instanceof Buffer) ? shardData.payload :
408
- (shardData.payload._byteString ? Buffer.from(shardData.payload._byteString, 'base64') :
409
- Buffer.from(shardData.payload));
410
- const decompressed = zlib.gunzipSync(buffer);
411
- const jsonStr = decompressed.toString('utf8');
412
- const realData = JSON.parse(jsonStr);
413
- // If it's double-encoded, parse again
414
- const parsedData = (typeof realData === 'string') ? JSON.parse(realData) : realData;
415
- shardData = { ...shardData, ...parsedData };
416
- delete shardData.payload;
417
- } catch (e) {
418
- if (config.logger) {
419
- config.logger.log('ERROR', `[DependencyFetcher] ❌ Failed to decompress shard '${shardId}' for '${name}': ${e.message}`);
420
- } else {
421
- console.error(`[DependencyFetcher] ❌ Failed to decompress shard '${shardId}' for '${name}': ${e.message}`);
422
- }
423
- // Continue with uncompressed data if decompression fails
424
- }
425
- }
426
-
427
- // Merge shard contents, ignoring internal metadata if it clashes
428
- Object.entries(shardData).forEach(([k, v]) => {
429
- if (!k.startsWith('_')) {
430
- data[k] = v;
431
- hasData = true;
432
- }
433
- });
434
- });
435
-
436
- // If shards contained no actual data, treat as empty
437
- if (!hasData) {
438
- if (config.logger) {
439
- config.logger.log('ERROR', `[DependencyFetcher] ❌ Shards found but contain no data at: ${shardPath}`);
440
- } else {
441
- console.error(`[DependencyFetcher] ❌ Shards found but contain no data at: ${shardPath}`);
442
- }
443
- return null;
444
- }
445
-
446
- // After loading shards, remove shard metadata from data object for cleaner output
447
- // Keep only the actual data fields
448
- const cleanedData = {};
449
- const dataKeys = [];
450
- Object.entries(data).forEach(([k, v]) => {
451
- if (!k.startsWith('_')) {
452
- cleanedData[k] = v;
453
- dataKeys.push(k);
454
- }
455
- });
456
- data = cleanedData;
457
-
458
- // Log what we loaded for debugging
459
- if (config.logger) {
460
- config.logger.log('INFO', `[DependencyFetcher] ✅ Loaded ${shardSnaps.size} shard(s) for '${name}'. Data fields: ${dataKeys.length > 0 ? dataKeys.slice(0, 10).join(', ') + (dataKeys.length > 10 ? `... (+${dataKeys.length - 10} more)` : '') : 'none'}`);
461
- }
462
- }
463
-
464
- // Final validation: ensure we have usable data after all processing (decompression + sharding)
465
- // Only check if we haven't already determined it's empty
466
- if (isDataEmpty(data)) {
467
- if (config.logger) {
468
- config.logger.log('ERROR', `[DependencyFetcher] ❌ Dependency '${name}' loaded but is empty (no usable data) at: ${path}`);
469
- } else {
470
- console.error(`[DependencyFetcher] ❌ Dependency '${name}' loaded but is empty (no usable data) at: ${path}`);
471
- }
472
- return null;
263
+ async function fetchExistingResults(dateStr, calcs, fullManifest, config, deps, isHistoricalContext) {
264
+ const lookup = {};
265
+ if (Array.isArray(fullManifest)) {
266
+ fullManifest.forEach(c => lookup[normalizeName(c.name)] = c.category || 'analytics');
473
267
  }
474
-
475
- return data;
268
+ // Ensure Date Object
269
+ const dateObj = new Date(dateStr.includes('T') ? dateStr : dateStr + 'T00:00:00Z');
270
+ return fetchDependencies(dateObj, calcs, config, deps, lookup, isHistoricalContext);
476
271
  }
477
272
 
478
273
  module.exports = { fetchDependencies, fetchResultSeries, fetchExistingResults };