bulltrackers-module 1.0.721 → 1.0.723

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -100,166 +100,94 @@ async function ensureTableExists(datasetId, tableId, schema, options = {}, logge
100
100
  throw error;
101
101
  }
102
102
  }
103
-
104
103
  /**
105
104
  * Insert rows using BigQuery MERGE statement (handles duplicates natively via SQL)
106
- * More efficient than checking then inserting - BigQuery handles deduplication in SQL
107
- * Uses a temporary table and MERGE statement for atomic deduplication
108
- * @param {string} datasetId - Dataset ID
109
- * @param {string} tableId - Table ID
110
- * @param {Array} rows - Array of row objects
111
- * @param {Array} keyFields - Fields that form unique key (e.g., ['date', 'user_id', 'user_type'])
112
- * @param {object} logger - Logger instance
113
- * @returns {Promise<number>} Number of rows actually inserted (not duplicates)
105
+ * FIXED: Ensures temporary tables are always deleted using try/finally block.
114
106
  */
115
107
  async function insertRowsWithMerge(datasetId, tableId, rows, keyFields, logger = null) {
116
- if (!rows || rows.length === 0) {
117
- if (logger) logger.log('WARN', `[BigQuery] No rows to merge into ${datasetId}.${tableId}`);
118
- return 0;
119
- }
108
+ if (!rows || rows.length === 0) return 0;
120
109
 
121
- const MAX_ROW_SIZE = 9 * 1024 * 1024; // 9MB safety limit
122
- const validRows = rows.filter(row => {
123
- const rowSize = JSON.stringify(row).length;
124
- return rowSize <= MAX_ROW_SIZE;
125
- });
110
+ const MAX_ROW_SIZE = 9 * 1024 * 1024;
111
+ const validRows = rows.filter(row => JSON.stringify(row).length <= MAX_ROW_SIZE);
126
112
 
127
113
  if (validRows.length === 0) {
128
114
  if (logger) logger.log('WARN', `[BigQuery] All rows too large for MERGE into ${datasetId}.${tableId}`);
129
115
  return 0;
130
116
  }
117
+
118
+ // 1. Define ID once, outside the try block, so it's consistent in finally/catch
119
+ const tempTableId = `${tableId}_temp_${Date.now()}_${Math.random().toString(36).substring(7)}`;
120
+ const tablePath = `${datasetId}.${tableId}`;
121
+ const tempTablePath = `${datasetId}.${tempTableId}`;
131
122
 
132
123
  try {
133
- const tablePath = `${datasetId}.${tableId}`;
134
- const keyFieldsStr = keyFields.join(', ');
135
- const tempTableId = `${tableId}_temp_${Date.now()}`;
136
- const tempTablePath = `${datasetId}.${tempTableId}`;
137
-
138
- // Get table schema
139
124
  const dataset = await getOrCreateDataset(datasetId, logger);
140
125
  const table = dataset.table(tableId);
141
126
  const [tableMetadata] = await table.getMetadata();
142
127
  const schema = tableMetadata.schema.fields;
143
128
 
144
- // Create temp table with same schema
129
+ // 2. Create Temp Table
145
130
  const tempTable = dataset.table(tempTableId);
146
- await tempTable.create({
147
- schema: schema,
148
- description: 'Temporary table for merge operation'
131
+ await tempTable.create({
132
+ schema: schema,
133
+ expirationTime: Date.now() + (1000 * 60 * 60 * 1), // Auto-delete after 1 hour (safety net)
134
+ description: 'Temporary table for merge operation'
149
135
  });
150
136
 
151
- if (logger) {
152
- logger.log('INFO', `[BigQuery] Created temp table ${tempTableId} for MERGE operation`);
153
- }
154
-
155
- // Insert all rows into temp table using LOAD JOB (free, not streaming)
156
- // Write to temporary file (load jobs require a file, not a stream)
157
- const tempFile = path.join(os.tmpdir(), `bigquery_merge_${Date.now()}_${Math.random().toString(36).substring(7)}.ndjson`);
137
+ // 3. Load Data into Temp Table
138
+ const tempFile = path.join(os.tmpdir(), `bq_merge_${tempTableId}.ndjson`);
158
139
  const ndjson = validRows.map(r => JSON.stringify(r)).join('\n');
159
140
 
160
141
  try {
161
142
  fs.writeFileSync(tempFile, ndjson, 'utf8');
162
-
163
- // Load into temp table using load job (FREE) from temp file
164
- // Use createLoadJob to get a Job object we can wait on
165
143
  const [loadJob] = await tempTable.createLoadJob(tempFile, {
166
144
  sourceFormat: 'NEWLINE_DELIMITED_JSON',
167
145
  writeDisposition: 'WRITE_APPEND',
168
- autodetect: false // Use existing table schema
146
+ autodetect: false
169
147
  });
170
-
171
- // [FIX] Use native job.promise() instead of custom polling
172
- // This automatically polls and waits for completion
173
148
  await loadJob.promise();
174
-
175
- // Get job metadata to check for errors
176
- const [jobMetadata] = await loadJob.getMetadata();
177
-
178
- // Check for errors
179
- if (jobMetadata.status?.errorResult) {
180
- throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
181
- }
182
149
  } finally {
183
- // Clean up temp file
184
- try {
185
- if (fs.existsSync(tempFile)) {
186
- fs.unlinkSync(tempFile);
187
- }
188
- } catch (cleanupError) {
189
- if (logger) {
190
- logger.log('WARN', `[BigQuery] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
191
- }
192
- }
193
- }
194
-
195
- if (logger) {
196
- logger.log('INFO', `[BigQuery] Loaded ${validRows.length} rows into temp table ${tempTableId} using LOAD JOB (free)`);
150
+ if (fs.existsSync(tempFile)) try { fs.unlinkSync(tempFile); } catch (e) {}
197
151
  }
198
152
 
199
- // Use MERGE to insert new rows or update existing rows (SQL-native deduplication/overwrite)
200
- // This is more efficient than checking in JavaScript
153
+ // 4. Perform MERGE
201
154
  const mergeConditions = keyFields.map(f => `target.${f} = source.${f}`).join(' AND ');
202
-
203
- // Build UPDATE clause - update all non-key fields
204
155
  const allFields = schema.map(f => f.name);
205
156
  const nonKeyFields = allFields.filter(f => !keyFields.includes(f));
206
157
  const updateClause = nonKeyFields.map(f => `${f} = source.${f}`).join(', ');
207
158
 
208
- // Count rows that will be inserted (don't exist in target) vs updated (already exist)
209
- // Query BEFORE the MERGE to get accurate counts
210
- const [insertedCountResult] = await query(`
159
+ // Query to count inserts vs updates
160
+ const [countRes] = await query(`
211
161
  SELECT COUNT(*) as inserted
212
162
  FROM \`${tempTablePath}\` AS source
213
- WHERE NOT EXISTS (
214
- SELECT 1 FROM \`${tablePath}\` AS target
215
- WHERE ${mergeConditions}
216
- )
163
+ WHERE NOT EXISTS (SELECT 1 FROM \`${tablePath}\` AS target WHERE ${mergeConditions})
217
164
  `, {}, logger);
218
-
219
- const rowsInserted = insertedCountResult[0]?.inserted || 0;
220
- const rowsUpdated = validRows.length - rowsInserted;
221
-
222
- // Now perform the MERGE (inserts new rows, updates existing rows)
165
+ const rowsInserted = countRes[0]?.inserted || 0;
166
+
223
167
  const mergeQuery = `
224
168
  MERGE \`${tablePath}\` AS target
225
169
  USING \`${tempTablePath}\` AS source
226
170
  ON ${mergeConditions}
227
- WHEN MATCHED THEN
228
- UPDATE SET ${updateClause}
229
- WHEN NOT MATCHED THEN
230
- INSERT ROW
171
+ WHEN MATCHED THEN UPDATE SET ${updateClause}
172
+ WHEN NOT MATCHED THEN INSERT ROW
231
173
  `;
232
-
233
174
  await query(mergeQuery, {}, logger);
234
175
 
235
- // Drop temp table
236
- await tempTable.delete();
237
-
238
- if (logger) {
239
- if (rowsUpdated > 0) {
240
- logger.log('INFO', `[BigQuery] MERGE completed: ${rowsInserted} new rows inserted, ${rowsUpdated} existing rows updated in ${tablePath}`);
241
- } else {
242
- logger.log('INFO', `[BigQuery] MERGE completed: ${rowsInserted} new rows inserted into ${tablePath} (${validRows.length - rowsInserted} duplicates skipped via SQL)`);
243
- }
244
- }
176
+ if (logger) logger.log('INFO', `[BigQuery] MERGE Success: ${rowsInserted} new, ${validRows.length - rowsInserted} updated.`);
245
177
 
246
178
  return rowsInserted;
179
+
247
180
  } catch (error) {
248
- const errorDetails = {
249
- message: error.message,
250
- code: error.code,
251
- errors: error.errors
252
- };
253
- if (logger) {
254
- logger.log('ERROR', `[BigQuery] MERGE failed for ${datasetId}.${tableId}:`, JSON.stringify(errorDetails, null, 2));
255
- }
256
- // Try to clean up temp table if it exists
181
+ if (logger) logger.log('ERROR', `[BigQuery] MERGE failed for ${datasetId}.${tableId}`, error);
182
+ throw error;
183
+ } finally {
184
+ // 5. CLEANUP: Always delete the temp table (the previous bug was here)
257
185
  try {
258
186
  const dataset = await getOrCreateDataset(datasetId, logger);
259
- const tempTableId = `${tableId}_temp_${Date.now()}`;
260
- await dataset.table(tempTableId).delete().catch(() => {}); // Ignore cleanup errors
261
- } catch {}
262
- throw error;
187
+ await dataset.table(tempTableId).delete({ ignoreNotFound: true }).catch(() => {});
188
+ } catch (cleanupError) {
189
+ if (logger) logger.log('WARN', `[BigQuery] Cleanup failed for ${tempTableId}: ${cleanupError.message}`);
190
+ }
263
191
  }
264
192
  }
265
193
 
@@ -1700,12 +1628,31 @@ async function queryPIRankings(dateStr, logger = null) {
1700
1628
  * @param {object} logger - Logger instance
1701
1629
  * @returns {Promise<object>} Price data map in format { instrumentId: { "YYYY-MM-DD": price } }
1702
1630
  */
1631
+ /**
1632
+ * Query asset prices from BigQuery with smart routing.
1633
+ * This function intelligently routes to more efficient queries when possible:
1634
+ * - If querying a single date with no instrument filter -> uses queryAllPricesForDate
1635
+ * - Otherwise uses the general date range query
1636
+ *
1637
+ * Returns data in format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
1638
+ * @param {string|null} startDateStr - Start date in YYYY-MM-DD format (optional)
1639
+ * @param {string|null} endDateStr - End date in YYYY-MM-DD format (optional)
1640
+ * @param {number[]|null} instrumentIds - Array of instrument IDs to filter (optional)
1641
+ * @param {object} logger - Logger instance
1642
+ * @returns {Promise<object|null>} Price map, or null if not found/error
1643
+ */
1703
1644
  async function queryAssetPrices(startDateStr = null, endDateStr = null, instrumentIds = null, logger = null) {
1704
1645
  if (process.env.BIGQUERY_ENABLED === 'false') {
1705
1646
  if (logger) logger.log('DEBUG', '[BigQuery] Asset prices query skipped (BIGQUERY_ENABLED=false)');
1706
1647
  return null;
1707
1648
  }
1708
1649
 
1650
+ // SMART ROUTING: If querying single date with no instrument filter, use optimized function
1651
+ if (startDateStr && startDateStr === endDateStr && (!instrumentIds || instrumentIds.length === 0)) {
1652
+ if (logger) logger.log('INFO', `[BigQuery] 🎯 Routing to optimized single-date query for ${startDateStr}`);
1653
+ return queryAllPricesForDate(startDateStr, logger);
1654
+ }
1655
+
1709
1656
  const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
1710
1657
  const tablePath = `${datasetId}.asset_prices`;
1711
1658
 
@@ -1733,7 +1680,12 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
1733
1680
  instrument_id,
1734
1681
  date,
1735
1682
  price,
1736
- ticker
1683
+ ticker,
1684
+ open,
1685
+ high,
1686
+ low,
1687
+ close,
1688
+ volume
1737
1689
  FROM \`${tablePath}\`
1738
1690
  ${whereClause}
1739
1691
  ORDER BY instrument_id, date DESC
@@ -1757,7 +1709,7 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
1757
1709
  return {};
1758
1710
  }
1759
1711
 
1760
- // Transform to expected format: { instrumentId: { "YYYY-MM-DD": price } }
1712
+ // Transform to expected format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
1761
1713
  const priceMap = {};
1762
1714
  for (const row of rows) {
1763
1715
  const instrumentId = String(row.instrument_id);
@@ -1767,8 +1719,16 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
1767
1719
  priceMap[instrumentId] = {};
1768
1720
  }
1769
1721
 
1770
- // Use close price if available, otherwise use price
1771
- priceMap[instrumentId][dateStr] = row.price || null;
1722
+ // Return full price data structure for consistency
1723
+ priceMap[instrumentId][dateStr] = {
1724
+ price: row.price || null,
1725
+ ticker: row.ticker || null,
1726
+ open: row.open || null,
1727
+ high: row.high || null,
1728
+ low: row.low || null,
1729
+ close: row.close || null,
1730
+ volume: row.volume || null
1731
+ };
1772
1732
  }
1773
1733
 
1774
1734
  if (logger) {
@@ -1784,6 +1744,180 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
1784
1744
  }
1785
1745
  }
1786
1746
 
1747
+ /**
1748
+ * Query all prices for a specific date (all tickers).
1749
+ * Returns data in format: { instrumentId: { "YYYY-MM-DD": price } }
1750
+ * @param {string} dateStr - Date in YYYY-MM-DD format
1751
+ * @param {object} logger - Logger instance
1752
+ * @returns {Promise<object|null>} Price map, or null if not found/error
1753
+ */
1754
+ async function queryAllPricesForDate(dateStr, logger = null) {
1755
+ if (process.env.BIGQUERY_ENABLED === 'false') {
1756
+ if (logger) logger.log('DEBUG', '[BigQuery] All prices query skipped (BIGQUERY_ENABLED=false)');
1757
+ return null;
1758
+ }
1759
+
1760
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
1761
+ const tablePath = `${datasetId}.asset_prices`;
1762
+
1763
+ try {
1764
+ const sqlQuery = `
1765
+ SELECT
1766
+ instrument_id,
1767
+ date,
1768
+ price,
1769
+ ticker,
1770
+ open,
1771
+ high,
1772
+ low,
1773
+ close,
1774
+ volume
1775
+ FROM \`${tablePath}\`
1776
+ WHERE date = @date
1777
+ ORDER BY instrument_id ASC
1778
+ `;
1779
+
1780
+ const params = { date: dateStr };
1781
+
1782
+ if (logger) {
1783
+ logger.log('INFO', `[BigQuery] 🔍 Querying all prices for date ${dateStr} from ${tablePath}`);
1784
+ }
1785
+
1786
+ const rows = await query(sqlQuery, { params }, logger);
1787
+
1788
+ if (!rows || rows.length === 0) {
1789
+ if (logger) logger.log('INFO', `[BigQuery] No prices found for date ${dateStr} in ${tablePath}`);
1790
+ return {};
1791
+ }
1792
+
1793
+ // Transform to expected format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
1794
+ const priceMap = {};
1795
+ for (const row of rows) {
1796
+ const instrumentId = String(row.instrument_id);
1797
+ const dateKey = row.date; // Already in YYYY-MM-DD format
1798
+
1799
+ if (!priceMap[instrumentId]) {
1800
+ priceMap[instrumentId] = {};
1801
+ }
1802
+
1803
+ priceMap[instrumentId][dateKey] = {
1804
+ price: row.price || null,
1805
+ ticker: row.ticker || null,
1806
+ open: row.open || null,
1807
+ high: row.high || null,
1808
+ low: row.low || null,
1809
+ close: row.close || null,
1810
+ volume: row.volume || null
1811
+ };
1812
+ }
1813
+
1814
+ if (logger) {
1815
+ logger.log('INFO', `[BigQuery] ✅ Retrieved prices for ${Object.keys(priceMap).length} instruments for date ${dateStr}`);
1816
+ }
1817
+
1818
+ return priceMap;
1819
+ } catch (error) {
1820
+ if (logger) {
1821
+ logger.log('WARN', `[BigQuery] All prices query failed for date ${dateStr}: ${error.message}`);
1822
+ }
1823
+ return null;
1824
+ }
1825
+ }
1826
+
1827
+ /**
1828
+ * Query prices for specific ticker(s) over a date range.
1829
+ * Returns data in format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
1830
+ * @param {string|string[]} tickers - Single ticker string or array of ticker strings (e.g., "AAPL" or ["AAPL", "MSFT"])
1831
+ * @param {string} startDateStr - Start date in YYYY-MM-DD format
1832
+ * @param {string} endDateStr - End date in YYYY-MM-DD format (inclusive)
1833
+ * @param {object} logger - Logger instance
1834
+ * @returns {Promise<object|null>} Price map, or null if not found/error
1835
+ */
1836
+ async function queryPricesForTickers(tickers, startDateStr, endDateStr, logger = null) {
1837
+ if (process.env.BIGQUERY_ENABLED === 'false') {
1838
+ if (logger) logger.log('DEBUG', '[BigQuery] Ticker prices query skipped (BIGQUERY_ENABLED=false)');
1839
+ return null;
1840
+ }
1841
+
1842
+ if (!tickers || (Array.isArray(tickers) && tickers.length === 0)) {
1843
+ if (logger) logger.log('WARN', '[BigQuery] No tickers provided to queryPricesForTickers');
1844
+ return {};
1845
+ }
1846
+
1847
+ const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
1848
+ const tablePath = `${datasetId}.asset_prices`;
1849
+
1850
+ try {
1851
+ const tickerArray = Array.isArray(tickers) ? tickers : [tickers];
1852
+
1853
+ const sqlQuery = `
1854
+ SELECT
1855
+ instrument_id,
1856
+ date,
1857
+ price,
1858
+ ticker,
1859
+ open,
1860
+ high,
1861
+ low,
1862
+ close,
1863
+ volume
1864
+ FROM \`${tablePath}\`
1865
+ WHERE ticker IN UNNEST(@tickers)
1866
+ AND date BETWEEN @startDate AND @endDate
1867
+ ORDER BY instrument_id ASC, date DESC
1868
+ `;
1869
+
1870
+ const params = {
1871
+ tickers: tickerArray,
1872
+ startDate: startDateStr,
1873
+ endDate: endDateStr
1874
+ };
1875
+
1876
+ if (logger) {
1877
+ logger.log('INFO', `[BigQuery] 🔍 Querying prices for tickers [${tickerArray.join(', ')}] from ${startDateStr} to ${endDateStr} from ${tablePath}`);
1878
+ }
1879
+
1880
+ const rows = await query(sqlQuery, { params }, logger);
1881
+
1882
+ if (!rows || rows.length === 0) {
1883
+ if (logger) logger.log('INFO', `[BigQuery] No prices found for tickers [${tickerArray.join(', ')}] in date range`);
1884
+ return {};
1885
+ }
1886
+
1887
+ // Transform to expected format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
1888
+ const priceMap = {};
1889
+ for (const row of rows) {
1890
+ const instrumentId = String(row.instrument_id);
1891
+ const dateKey = row.date;
1892
+
1893
+ if (!priceMap[instrumentId]) {
1894
+ priceMap[instrumentId] = {};
1895
+ }
1896
+
1897
+ priceMap[instrumentId][dateKey] = {
1898
+ price: row.price || null,
1899
+ ticker: row.ticker || null,
1900
+ open: row.open || null,
1901
+ high: row.high || null,
1902
+ low: row.low || null,
1903
+ close: row.close || null,
1904
+ volume: row.volume || null
1905
+ };
1906
+ }
1907
+
1908
+ if (logger) {
1909
+ logger.log('INFO', `[BigQuery] ✅ Retrieved prices for ${Object.keys(priceMap).length} instruments across ${tickerArray.length} ticker(s)`);
1910
+ }
1911
+
1912
+ return priceMap;
1913
+ } catch (error) {
1914
+ if (logger) {
1915
+ logger.log('WARN', `[BigQuery] Ticker prices query failed: ${error.message}`);
1916
+ }
1917
+ return null;
1918
+ }
1919
+ }
1920
+
1787
1921
  /**
1788
1922
  * Query ticker mappings from BigQuery
1789
1923
  * Returns data in format: { instrumentId: "TICKER", ... }
@@ -2142,6 +2276,8 @@ module.exports = {
2142
2276
  queryHistoryData,
2143
2277
  querySocialData,
2144
2278
  queryAssetPrices,
2279
+ queryAllPricesForDate,
2280
+ queryPricesForTickers,
2145
2281
  queryPIMasterList,
2146
2282
  queryPIRankings,
2147
2283
  queryInstrumentInsights,
@@ -215,7 +215,10 @@ exports.fetchAndStorePrices = async (config, dependencies) => {
215
215
  // Continue - price data is stored, indexer failure is non-critical
216
216
  }
217
217
 
218
- const successMessage = `Successfully processed and saved daily prices for ${results.length} instruments to ${batchPromises.length} shards.`;
218
+ const firestoreShardCount = Object.keys(shardUpdates).length;
219
+ const successMessage = process.env.FIRESTORE_PRICE_FETCH !== 'false'
220
+ ? `Successfully processed and saved daily prices for ${results.length} instruments to ${firestoreShardCount} Firestore shards.`
221
+ : `Successfully processed and saved daily prices for ${results.length} instruments (Firestore writes disabled).`;
219
222
  logger.log('SUCCESS', `[PriceFetcherHelpers] ${successMessage}`);
220
223
  return { success: true, message: successMessage, instrumentsProcessed: results.length };
221
224
  } catch (error) { logger.log('ERROR', '[PriceFetcherHelpers] Fatal error during closing price update', { errorMessage: error.message, errorStack: error.stack, headerId: selectedHeader ? selectedHeader.id : 'not-selected' }); throw error;
@@ -1,10 +1,10 @@
1
1
  /**
2
2
  * @fileoverview Main pipe: pipe.maintenance.runFetchInsights
3
- * REFACTORED: Now stateless and receives dependencies.
4
- * UPDATED: Implements Gzip compression for payloads >900KB.
3
+ * REFACTORED: Prioritizes BigQuery storage.
4
+ * UPDATED: Firestore (and compression) is now a fallback only.
5
5
  */
6
6
  const { FieldValue } = require('@google-cloud/firestore');
7
- const zlib = require('zlib'); // [NEW] Required for compression
7
+ const zlib = require('zlib');
8
8
 
9
9
  /**
10
10
  * Main pipe: pipe.maintenance.runFetchInsights
@@ -25,7 +25,6 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
25
25
  try {
26
26
  // Extract collection name from registry path: daily_instrument_insights/{date}
27
27
  const basePath = getCollectionPath('rootData', 'instrumentInsights', { date: '2025-01-01' });
28
- // Path is like "daily_instrument_insights/2025-01-01", extract collection name
29
28
  insightsCollectionName = basePath.split('/')[0];
30
29
  } catch (e) {
31
30
  logger.log('WARN', `[FetchInsightsHelpers] Failed to get collection from registry, using config: ${e.message}`);
@@ -86,54 +85,12 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
86
85
  }
87
86
 
88
87
  const today = new Date().toISOString().slice(0, 10);
89
- const docRef = db.collection(insightsCollectionName).doc(today);
90
-
91
- // [FIX] --- COMPRESSION LOGIC START ---
92
-
93
- // 1. Prepare a standard object to verify size
94
- // We use a static date string here so it can be stringified safely
95
- const dataObject = {
96
- fetchedAt: new Date().toISOString(),
97
- instrumentCount: insightsData.length,
98
- insights: insightsData
99
- };
100
-
101
- const jsonString = JSON.stringify(dataObject);
102
- const byteSize = Buffer.byteLength(jsonString);
103
-
104
- // Firestore limit is ~1MB. We use 900KB as a safety threshold.
105
- const SIZE_THRESHOLD = 900 * 1024;
106
88
 
107
- let firestorePayload;
108
-
109
- if (byteSize > SIZE_THRESHOLD) {
110
- logger.log('INFO', `[FetchInsightsHelpers] Payload size ${(byteSize / 1024).toFixed(2)}KB exceeds threshold. Compressing...`);
111
-
112
- // Gzip the JSON string
113
- const compressedBuffer = zlib.gzipSync(Buffer.from(jsonString));
114
-
115
- // Create the compressed wrapper structure
116
- firestorePayload = {
117
- fetchedAt: FieldValue.serverTimestamp(), // Keep outer timestamp for indexing
118
- instrumentCount: insightsData.length, // Keep outer metadata for indexing
119
- _compressed: true,
120
- payload: compressedBuffer
121
- };
122
-
123
- logger.log('INFO', `[FetchInsightsHelpers] Compressed size: ${(compressedBuffer.length / 1024).toFixed(2)}KB.`);
124
- } else {
125
- // Standard uncompressed write (Legacy behavior)
126
- firestorePayload = {
127
- fetchedAt: FieldValue.serverTimestamp(),
128
- instrumentCount: insightsData.length,
129
- insights: insightsData
130
- };
131
- }
132
- // [FIX] --- COMPRESSION LOGIC END ---
89
+ // --- STORAGE STRATEGY: BigQuery First, Firestore Fallback ---
90
+ let storageSuccess = false;
91
+ let storageMethod = 'NONE';
133
92
 
134
- await docRef.set(firestorePayload);
135
-
136
- // Write insights to BigQuery (one row per instrument)
93
+ // 1. Attempt BigQuery Write
137
94
  if (process.env.BIGQUERY_ENABLED !== 'false') {
138
95
  try {
139
96
  const { insertRows, ensureInstrumentInsightsTable } = require('../../core/utils/bigquery_utils');
@@ -152,46 +109,87 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
152
109
  const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
153
110
  await insertRows(datasetId, 'instrument_insights', bigqueryRows, logger);
154
111
 
155
- logger.log('INFO', `[FetchInsightsHelpers] Successfully stored ${bigqueryRows.length} insight records to BigQuery`);
112
+ logger.log('INFO', `[FetchInsightsHelpers] Successfully stored ${bigqueryRows.length} insight records to BigQuery.`);
113
+ storageSuccess = true;
114
+ storageMethod = 'BIGQUERY';
156
115
  } catch (bqError) {
157
- logger.log('WARN', `[FetchInsightsHelpers] BigQuery insights write failed: ${bqError.message}`);
158
- // Continue - Firestore write succeeded
116
+ logger.log('WARN', `[FetchInsightsHelpers] ⚠️ BigQuery write failed. Proceeding to Firestore fallback. Error: ${bqError.message}`);
117
+ // Proceed to Firestore logic below
159
118
  }
160
119
  }
161
-
162
- // Update root data indexer for today's date after insights data is stored
120
+
121
+ // 2. Firestore Fallback (If BigQuery failed or is disabled)
122
+ if (!storageSuccess) {
123
+ logger.log('INFO', `[FetchInsightsHelpers] 📂 Writing to Firestore (Fallback)...`);
124
+ const docRef = db.collection(insightsCollectionName).doc(today);
125
+
126
+ // [COMPRESSION LOGIC] - Only runs for Firestore
127
+ const dataObject = {
128
+ fetchedAt: new Date().toISOString(),
129
+ instrumentCount: insightsData.length,
130
+ insights: insightsData
131
+ };
132
+
133
+ const jsonString = JSON.stringify(dataObject);
134
+ const byteSize = Buffer.byteLength(jsonString);
135
+ const SIZE_THRESHOLD = 900 * 1024; // 900KB
136
+
137
+ let firestorePayload;
138
+
139
+ if (byteSize > SIZE_THRESHOLD) {
140
+ logger.log('INFO', `[FetchInsightsHelpers] Payload size ${(byteSize / 1024).toFixed(2)}KB exceeds threshold. Compressing for Firestore...`);
141
+ const compressedBuffer = zlib.gzipSync(Buffer.from(jsonString));
142
+ firestorePayload = {
143
+ fetchedAt: FieldValue.serverTimestamp(),
144
+ instrumentCount: insightsData.length,
145
+ _compressed: true,
146
+ payload: compressedBuffer
147
+ };
148
+ } else {
149
+ firestorePayload = {
150
+ fetchedAt: FieldValue.serverTimestamp(),
151
+ instrumentCount: insightsData.length,
152
+ insights: insightsData
153
+ };
154
+ }
155
+
156
+ await docRef.set(firestorePayload);
157
+ logger.log('INFO', `[FetchInsightsHelpers] ✅ Successfully stored insights to Firestore (Fallback).`);
158
+ storageSuccess = true;
159
+ storageMethod = 'FIRESTORE';
160
+ } else {
161
+ logger.log('INFO', `[FetchInsightsHelpers] ⏭️ Skipping Firestore write (BigQuery write successful).`);
162
+ }
163
+
164
+ // Update root data indexer
163
165
  try {
164
166
  const { runRootDataIndexer } = require('../../root-data-indexer/index');
165
167
 
166
168
  if (!config.rootDataIndexer) {
167
169
  logger.log('WARN', `[FetchInsightsHelpers] Root data indexer config not provided. Skipping index update.`);
168
170
  } else {
169
- // Merge insights collection name into the full config
170
171
  const indexerConfig = {
171
172
  ...config.rootDataIndexer,
172
173
  collections: {
173
174
  ...config.rootDataIndexer.collections,
174
- insights: insightsCollectionName // Override with actual collection name used
175
+ insights: insightsCollectionName
175
176
  },
176
- targetDate: today // Index only today's date for speed
177
+ targetDate: today
177
178
  };
178
179
 
179
- logger.log('INFO', `[FetchInsightsHelpers] Triggering root data indexer for date ${today} after insights data storage...`);
180
+ logger.log('INFO', `[FetchInsightsHelpers] Triggering root data indexer for date ${today}...`);
180
181
  const result = await runRootDataIndexer(indexerConfig, dependencies);
181
182
 
182
183
  if (result.success && result.count > 0) {
183
- logger.log('INFO', `[FetchInsightsHelpers] Root data indexer completed successfully for date ${today} (updated ${result.count} dates)`);
184
- } else {
185
- logger.log('WARN', `[FetchInsightsHelpers] Root data indexer completed but no dates were updated for ${today}`);
184
+ logger.log('INFO', `[FetchInsightsHelpers] Root data indexer completed successfully.`);
186
185
  }
187
186
  }
188
187
  } catch (indexerError) {
189
188
  logger.log('ERROR', `[FetchInsightsHelpers] Failed to run root data indexer for ${today}`, indexerError);
190
- // Continue - insights data is stored, indexer failure is non-critical
191
189
  }
192
190
 
193
- const successMsg = `Successfully fetched and stored ${insightsData.length} instrument insights for ${today}.`;
194
- logger.log('SUCCESS', `[FetchInsightsHelpers] ${successMsg}`, { documentId: today, instrumentCount: insightsData.length });
191
+ const successMsg = `Successfully fetched and stored ${insightsData.length} instrument insights for ${today} via ${storageMethod}.`;
192
+ logger.log('SUCCESS', `[FetchInsightsHelpers] ${successMsg}`, { documentId: today, instrumentCount: insightsData.length, storageMethod });
195
193
  return { success: true, message: successMsg, instrumentCount: insightsData.length };
196
194
 
197
195
  } catch (error) {