bulltrackers-module 1.0.721 → 1.0.723
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/functions/computation-system/data/CachedDataLoader.js +101 -102
- package/functions/computation-system/data/DependencyFetcher.js +48 -8
- package/functions/computation-system/persistence/ResultCommitter.js +158 -573
- package/functions/computation-system/utils/data_loader.js +253 -1088
- package/functions/core/utils/bigquery_utils.js +248 -112
- package/functions/etoro-price-fetcher/helpers/handler_helpers.js +4 -1
- package/functions/fetch-insights/helpers/handler_helpers.js +63 -65
- package/functions/fetch-popular-investors/helpers/fetch_helpers.js +143 -458
- package/functions/orchestrator/index.js +108 -141
- package/functions/root-data-indexer/index.js +130 -437
- package/index.js +0 -2
- package/package.json +3 -4
- package/functions/invalid-speculator-handler/helpers/handler_helpers.js +0 -38
- package/functions/speculator-cleanup-orchestrator/helpers/cleanup_helpers.js +0 -101
|
@@ -100,166 +100,94 @@ async function ensureTableExists(datasetId, tableId, schema, options = {}, logge
|
|
|
100
100
|
throw error;
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
|
|
104
103
|
/**
|
|
105
104
|
* Insert rows using BigQuery MERGE statement (handles duplicates natively via SQL)
|
|
106
|
-
*
|
|
107
|
-
* Uses a temporary table and MERGE statement for atomic deduplication
|
|
108
|
-
* @param {string} datasetId - Dataset ID
|
|
109
|
-
* @param {string} tableId - Table ID
|
|
110
|
-
* @param {Array} rows - Array of row objects
|
|
111
|
-
* @param {Array} keyFields - Fields that form unique key (e.g., ['date', 'user_id', 'user_type'])
|
|
112
|
-
* @param {object} logger - Logger instance
|
|
113
|
-
* @returns {Promise<number>} Number of rows actually inserted (not duplicates)
|
|
105
|
+
* FIXED: Ensures temporary tables are always deleted using try/finally block.
|
|
114
106
|
*/
|
|
115
107
|
async function insertRowsWithMerge(datasetId, tableId, rows, keyFields, logger = null) {
|
|
116
|
-
if (!rows || rows.length === 0)
|
|
117
|
-
if (logger) logger.log('WARN', `[BigQuery] No rows to merge into ${datasetId}.${tableId}`);
|
|
118
|
-
return 0;
|
|
119
|
-
}
|
|
108
|
+
if (!rows || rows.length === 0) return 0;
|
|
120
109
|
|
|
121
|
-
const MAX_ROW_SIZE = 9 * 1024 * 1024;
|
|
122
|
-
const validRows = rows.filter(row =>
|
|
123
|
-
const rowSize = JSON.stringify(row).length;
|
|
124
|
-
return rowSize <= MAX_ROW_SIZE;
|
|
125
|
-
});
|
|
110
|
+
const MAX_ROW_SIZE = 9 * 1024 * 1024;
|
|
111
|
+
const validRows = rows.filter(row => JSON.stringify(row).length <= MAX_ROW_SIZE);
|
|
126
112
|
|
|
127
113
|
if (validRows.length === 0) {
|
|
128
114
|
if (logger) logger.log('WARN', `[BigQuery] All rows too large for MERGE into ${datasetId}.${tableId}`);
|
|
129
115
|
return 0;
|
|
130
116
|
}
|
|
117
|
+
|
|
118
|
+
// 1. Define ID once, outside the try block, so it's consistent in finally/catch
|
|
119
|
+
const tempTableId = `${tableId}_temp_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
120
|
+
const tablePath = `${datasetId}.${tableId}`;
|
|
121
|
+
const tempTablePath = `${datasetId}.${tempTableId}`;
|
|
131
122
|
|
|
132
123
|
try {
|
|
133
|
-
const tablePath = `${datasetId}.${tableId}`;
|
|
134
|
-
const keyFieldsStr = keyFields.join(', ');
|
|
135
|
-
const tempTableId = `${tableId}_temp_${Date.now()}`;
|
|
136
|
-
const tempTablePath = `${datasetId}.${tempTableId}`;
|
|
137
|
-
|
|
138
|
-
// Get table schema
|
|
139
124
|
const dataset = await getOrCreateDataset(datasetId, logger);
|
|
140
125
|
const table = dataset.table(tableId);
|
|
141
126
|
const [tableMetadata] = await table.getMetadata();
|
|
142
127
|
const schema = tableMetadata.schema.fields;
|
|
143
128
|
|
|
144
|
-
// Create
|
|
129
|
+
// 2. Create Temp Table
|
|
145
130
|
const tempTable = dataset.table(tempTableId);
|
|
146
|
-
await tempTable.create({
|
|
147
|
-
schema: schema,
|
|
148
|
-
|
|
131
|
+
await tempTable.create({
|
|
132
|
+
schema: schema,
|
|
133
|
+
expirationTime: Date.now() + (1000 * 60 * 60 * 1), // Auto-delete after 1 hour (safety net)
|
|
134
|
+
description: 'Temporary table for merge operation'
|
|
149
135
|
});
|
|
150
136
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// Insert all rows into temp table using LOAD JOB (free, not streaming)
|
|
156
|
-
// Write to temporary file (load jobs require a file, not a stream)
|
|
157
|
-
const tempFile = path.join(os.tmpdir(), `bigquery_merge_${Date.now()}_${Math.random().toString(36).substring(7)}.ndjson`);
|
|
137
|
+
// 3. Load Data into Temp Table
|
|
138
|
+
const tempFile = path.join(os.tmpdir(), `bq_merge_${tempTableId}.ndjson`);
|
|
158
139
|
const ndjson = validRows.map(r => JSON.stringify(r)).join('\n');
|
|
159
140
|
|
|
160
141
|
try {
|
|
161
142
|
fs.writeFileSync(tempFile, ndjson, 'utf8');
|
|
162
|
-
|
|
163
|
-
// Load into temp table using load job (FREE) from temp file
|
|
164
|
-
// Use createLoadJob to get a Job object we can wait on
|
|
165
143
|
const [loadJob] = await tempTable.createLoadJob(tempFile, {
|
|
166
144
|
sourceFormat: 'NEWLINE_DELIMITED_JSON',
|
|
167
145
|
writeDisposition: 'WRITE_APPEND',
|
|
168
|
-
autodetect: false
|
|
146
|
+
autodetect: false
|
|
169
147
|
});
|
|
170
|
-
|
|
171
|
-
// [FIX] Use native job.promise() instead of custom polling
|
|
172
|
-
// This automatically polls and waits for completion
|
|
173
148
|
await loadJob.promise();
|
|
174
|
-
|
|
175
|
-
// Get job metadata to check for errors
|
|
176
|
-
const [jobMetadata] = await loadJob.getMetadata();
|
|
177
|
-
|
|
178
|
-
// Check for errors
|
|
179
|
-
if (jobMetadata.status?.errorResult) {
|
|
180
|
-
throw new Error(`Load job failed: ${jobMetadata.status.errorResult.message}`);
|
|
181
|
-
}
|
|
182
149
|
} finally {
|
|
183
|
-
|
|
184
|
-
try {
|
|
185
|
-
if (fs.existsSync(tempFile)) {
|
|
186
|
-
fs.unlinkSync(tempFile);
|
|
187
|
-
}
|
|
188
|
-
} catch (cleanupError) {
|
|
189
|
-
if (logger) {
|
|
190
|
-
logger.log('WARN', `[BigQuery] Failed to delete temp file ${tempFile}: ${cleanupError.message}`);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if (logger) {
|
|
196
|
-
logger.log('INFO', `[BigQuery] Loaded ${validRows.length} rows into temp table ${tempTableId} using LOAD JOB (free)`);
|
|
150
|
+
if (fs.existsSync(tempFile)) try { fs.unlinkSync(tempFile); } catch (e) {}
|
|
197
151
|
}
|
|
198
152
|
|
|
199
|
-
//
|
|
200
|
-
// This is more efficient than checking in JavaScript
|
|
153
|
+
// 4. Perform MERGE
|
|
201
154
|
const mergeConditions = keyFields.map(f => `target.${f} = source.${f}`).join(' AND ');
|
|
202
|
-
|
|
203
|
-
// Build UPDATE clause - update all non-key fields
|
|
204
155
|
const allFields = schema.map(f => f.name);
|
|
205
156
|
const nonKeyFields = allFields.filter(f => !keyFields.includes(f));
|
|
206
157
|
const updateClause = nonKeyFields.map(f => `${f} = source.${f}`).join(', ');
|
|
207
158
|
|
|
208
|
-
//
|
|
209
|
-
|
|
210
|
-
const [insertedCountResult] = await query(`
|
|
159
|
+
// Query to count inserts vs updates
|
|
160
|
+
const [countRes] = await query(`
|
|
211
161
|
SELECT COUNT(*) as inserted
|
|
212
162
|
FROM \`${tempTablePath}\` AS source
|
|
213
|
-
WHERE NOT EXISTS (
|
|
214
|
-
SELECT 1 FROM \`${tablePath}\` AS target
|
|
215
|
-
WHERE ${mergeConditions}
|
|
216
|
-
)
|
|
163
|
+
WHERE NOT EXISTS (SELECT 1 FROM \`${tablePath}\` AS target WHERE ${mergeConditions})
|
|
217
164
|
`, {}, logger);
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
const rowsUpdated = validRows.length - rowsInserted;
|
|
221
|
-
|
|
222
|
-
// Now perform the MERGE (inserts new rows, updates existing rows)
|
|
165
|
+
const rowsInserted = countRes[0]?.inserted || 0;
|
|
166
|
+
|
|
223
167
|
const mergeQuery = `
|
|
224
168
|
MERGE \`${tablePath}\` AS target
|
|
225
169
|
USING \`${tempTablePath}\` AS source
|
|
226
170
|
ON ${mergeConditions}
|
|
227
|
-
WHEN MATCHED THEN
|
|
228
|
-
|
|
229
|
-
WHEN NOT MATCHED THEN
|
|
230
|
-
INSERT ROW
|
|
171
|
+
WHEN MATCHED THEN UPDATE SET ${updateClause}
|
|
172
|
+
WHEN NOT MATCHED THEN INSERT ROW
|
|
231
173
|
`;
|
|
232
|
-
|
|
233
174
|
await query(mergeQuery, {}, logger);
|
|
234
175
|
|
|
235
|
-
|
|
236
|
-
await tempTable.delete();
|
|
237
|
-
|
|
238
|
-
if (logger) {
|
|
239
|
-
if (rowsUpdated > 0) {
|
|
240
|
-
logger.log('INFO', `[BigQuery] MERGE completed: ${rowsInserted} new rows inserted, ${rowsUpdated} existing rows updated in ${tablePath}`);
|
|
241
|
-
} else {
|
|
242
|
-
logger.log('INFO', `[BigQuery] MERGE completed: ${rowsInserted} new rows inserted into ${tablePath} (${validRows.length - rowsInserted} duplicates skipped via SQL)`);
|
|
243
|
-
}
|
|
244
|
-
}
|
|
176
|
+
if (logger) logger.log('INFO', `[BigQuery] MERGE Success: ${rowsInserted} new, ${validRows.length - rowsInserted} updated.`);
|
|
245
177
|
|
|
246
178
|
return rowsInserted;
|
|
179
|
+
|
|
247
180
|
} catch (error) {
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
};
|
|
253
|
-
if (logger) {
|
|
254
|
-
logger.log('ERROR', `[BigQuery] MERGE failed for ${datasetId}.${tableId}:`, JSON.stringify(errorDetails, null, 2));
|
|
255
|
-
}
|
|
256
|
-
// Try to clean up temp table if it exists
|
|
181
|
+
if (logger) logger.log('ERROR', `[BigQuery] MERGE failed for ${datasetId}.${tableId}`, error);
|
|
182
|
+
throw error;
|
|
183
|
+
} finally {
|
|
184
|
+
// 5. CLEANUP: Always delete the temp table (the previous bug was here)
|
|
257
185
|
try {
|
|
258
186
|
const dataset = await getOrCreateDataset(datasetId, logger);
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
187
|
+
await dataset.table(tempTableId).delete({ ignoreNotFound: true }).catch(() => {});
|
|
188
|
+
} catch (cleanupError) {
|
|
189
|
+
if (logger) logger.log('WARN', `[BigQuery] Cleanup failed for ${tempTableId}: ${cleanupError.message}`);
|
|
190
|
+
}
|
|
263
191
|
}
|
|
264
192
|
}
|
|
265
193
|
|
|
@@ -1700,12 +1628,31 @@ async function queryPIRankings(dateStr, logger = null) {
|
|
|
1700
1628
|
* @param {object} logger - Logger instance
|
|
1701
1629
|
* @returns {Promise<object>} Price data map in format { instrumentId: { "YYYY-MM-DD": price } }
|
|
1702
1630
|
*/
|
|
1631
|
+
/**
|
|
1632
|
+
* Query asset prices from BigQuery with smart routing.
|
|
1633
|
+
* This function intelligently routes to more efficient queries when possible:
|
|
1634
|
+
* - If querying a single date with no instrument filter -> uses queryAllPricesForDate
|
|
1635
|
+
* - Otherwise uses the general date range query
|
|
1636
|
+
*
|
|
1637
|
+
* Returns data in format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
|
|
1638
|
+
* @param {string|null} startDateStr - Start date in YYYY-MM-DD format (optional)
|
|
1639
|
+
* @param {string|null} endDateStr - End date in YYYY-MM-DD format (optional)
|
|
1640
|
+
* @param {number[]|null} instrumentIds - Array of instrument IDs to filter (optional)
|
|
1641
|
+
* @param {object} logger - Logger instance
|
|
1642
|
+
* @returns {Promise<object|null>} Price map, or null if not found/error
|
|
1643
|
+
*/
|
|
1703
1644
|
async function queryAssetPrices(startDateStr = null, endDateStr = null, instrumentIds = null, logger = null) {
|
|
1704
1645
|
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
1705
1646
|
if (logger) logger.log('DEBUG', '[BigQuery] Asset prices query skipped (BIGQUERY_ENABLED=false)');
|
|
1706
1647
|
return null;
|
|
1707
1648
|
}
|
|
1708
1649
|
|
|
1650
|
+
// SMART ROUTING: If querying single date with no instrument filter, use optimized function
|
|
1651
|
+
if (startDateStr && startDateStr === endDateStr && (!instrumentIds || instrumentIds.length === 0)) {
|
|
1652
|
+
if (logger) logger.log('INFO', `[BigQuery] 🎯 Routing to optimized single-date query for ${startDateStr}`);
|
|
1653
|
+
return queryAllPricesForDate(startDateStr, logger);
|
|
1654
|
+
}
|
|
1655
|
+
|
|
1709
1656
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
1710
1657
|
const tablePath = `${datasetId}.asset_prices`;
|
|
1711
1658
|
|
|
@@ -1733,7 +1680,12 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
|
|
|
1733
1680
|
instrument_id,
|
|
1734
1681
|
date,
|
|
1735
1682
|
price,
|
|
1736
|
-
ticker
|
|
1683
|
+
ticker,
|
|
1684
|
+
open,
|
|
1685
|
+
high,
|
|
1686
|
+
low,
|
|
1687
|
+
close,
|
|
1688
|
+
volume
|
|
1737
1689
|
FROM \`${tablePath}\`
|
|
1738
1690
|
${whereClause}
|
|
1739
1691
|
ORDER BY instrument_id, date DESC
|
|
@@ -1757,7 +1709,7 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
|
|
|
1757
1709
|
return {};
|
|
1758
1710
|
}
|
|
1759
1711
|
|
|
1760
|
-
// Transform to expected format: { instrumentId: { "YYYY-MM-DD": price } }
|
|
1712
|
+
// Transform to expected format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
|
|
1761
1713
|
const priceMap = {};
|
|
1762
1714
|
for (const row of rows) {
|
|
1763
1715
|
const instrumentId = String(row.instrument_id);
|
|
@@ -1767,8 +1719,16 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
|
|
|
1767
1719
|
priceMap[instrumentId] = {};
|
|
1768
1720
|
}
|
|
1769
1721
|
|
|
1770
|
-
//
|
|
1771
|
-
priceMap[instrumentId][dateStr] =
|
|
1722
|
+
// Return full price data structure for consistency
|
|
1723
|
+
priceMap[instrumentId][dateStr] = {
|
|
1724
|
+
price: row.price || null,
|
|
1725
|
+
ticker: row.ticker || null,
|
|
1726
|
+
open: row.open || null,
|
|
1727
|
+
high: row.high || null,
|
|
1728
|
+
low: row.low || null,
|
|
1729
|
+
close: row.close || null,
|
|
1730
|
+
volume: row.volume || null
|
|
1731
|
+
};
|
|
1772
1732
|
}
|
|
1773
1733
|
|
|
1774
1734
|
if (logger) {
|
|
@@ -1784,6 +1744,180 @@ async function queryAssetPrices(startDateStr = null, endDateStr = null, instrume
|
|
|
1784
1744
|
}
|
|
1785
1745
|
}
|
|
1786
1746
|
|
|
1747
|
+
/**
|
|
1748
|
+
* Query all prices for a specific date (all tickers).
|
|
1749
|
+
* Returns data in format: { instrumentId: { "YYYY-MM-DD": price } }
|
|
1750
|
+
* @param {string} dateStr - Date in YYYY-MM-DD format
|
|
1751
|
+
* @param {object} logger - Logger instance
|
|
1752
|
+
* @returns {Promise<object|null>} Price map, or null if not found/error
|
|
1753
|
+
*/
|
|
1754
|
+
async function queryAllPricesForDate(dateStr, logger = null) {
|
|
1755
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
1756
|
+
if (logger) logger.log('DEBUG', '[BigQuery] All prices query skipped (BIGQUERY_ENABLED=false)');
|
|
1757
|
+
return null;
|
|
1758
|
+
}
|
|
1759
|
+
|
|
1760
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
1761
|
+
const tablePath = `${datasetId}.asset_prices`;
|
|
1762
|
+
|
|
1763
|
+
try {
|
|
1764
|
+
const sqlQuery = `
|
|
1765
|
+
SELECT
|
|
1766
|
+
instrument_id,
|
|
1767
|
+
date,
|
|
1768
|
+
price,
|
|
1769
|
+
ticker,
|
|
1770
|
+
open,
|
|
1771
|
+
high,
|
|
1772
|
+
low,
|
|
1773
|
+
close,
|
|
1774
|
+
volume
|
|
1775
|
+
FROM \`${tablePath}\`
|
|
1776
|
+
WHERE date = @date
|
|
1777
|
+
ORDER BY instrument_id ASC
|
|
1778
|
+
`;
|
|
1779
|
+
|
|
1780
|
+
const params = { date: dateStr };
|
|
1781
|
+
|
|
1782
|
+
if (logger) {
|
|
1783
|
+
logger.log('INFO', `[BigQuery] 🔍 Querying all prices for date ${dateStr} from ${tablePath}`);
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1786
|
+
const rows = await query(sqlQuery, { params }, logger);
|
|
1787
|
+
|
|
1788
|
+
if (!rows || rows.length === 0) {
|
|
1789
|
+
if (logger) logger.log('INFO', `[BigQuery] No prices found for date ${dateStr} in ${tablePath}`);
|
|
1790
|
+
return {};
|
|
1791
|
+
}
|
|
1792
|
+
|
|
1793
|
+
// Transform to expected format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
|
|
1794
|
+
const priceMap = {};
|
|
1795
|
+
for (const row of rows) {
|
|
1796
|
+
const instrumentId = String(row.instrument_id);
|
|
1797
|
+
const dateKey = row.date; // Already in YYYY-MM-DD format
|
|
1798
|
+
|
|
1799
|
+
if (!priceMap[instrumentId]) {
|
|
1800
|
+
priceMap[instrumentId] = {};
|
|
1801
|
+
}
|
|
1802
|
+
|
|
1803
|
+
priceMap[instrumentId][dateKey] = {
|
|
1804
|
+
price: row.price || null,
|
|
1805
|
+
ticker: row.ticker || null,
|
|
1806
|
+
open: row.open || null,
|
|
1807
|
+
high: row.high || null,
|
|
1808
|
+
low: row.low || null,
|
|
1809
|
+
close: row.close || null,
|
|
1810
|
+
volume: row.volume || null
|
|
1811
|
+
};
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
if (logger) {
|
|
1815
|
+
logger.log('INFO', `[BigQuery] ✅ Retrieved prices for ${Object.keys(priceMap).length} instruments for date ${dateStr}`);
|
|
1816
|
+
}
|
|
1817
|
+
|
|
1818
|
+
return priceMap;
|
|
1819
|
+
} catch (error) {
|
|
1820
|
+
if (logger) {
|
|
1821
|
+
logger.log('WARN', `[BigQuery] All prices query failed for date ${dateStr}: ${error.message}`);
|
|
1822
|
+
}
|
|
1823
|
+
return null;
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
/**
|
|
1828
|
+
* Query prices for specific ticker(s) over a date range.
|
|
1829
|
+
* Returns data in format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
|
|
1830
|
+
* @param {string|string[]} tickers - Single ticker string or array of ticker strings (e.g., "AAPL" or ["AAPL", "MSFT"])
|
|
1831
|
+
* @param {string} startDateStr - Start date in YYYY-MM-DD format
|
|
1832
|
+
* @param {string} endDateStr - End date in YYYY-MM-DD format (inclusive)
|
|
1833
|
+
* @param {object} logger - Logger instance
|
|
1834
|
+
* @returns {Promise<object|null>} Price map, or null if not found/error
|
|
1835
|
+
*/
|
|
1836
|
+
async function queryPricesForTickers(tickers, startDateStr, endDateStr, logger = null) {
|
|
1837
|
+
if (process.env.BIGQUERY_ENABLED === 'false') {
|
|
1838
|
+
if (logger) logger.log('DEBUG', '[BigQuery] Ticker prices query skipped (BIGQUERY_ENABLED=false)');
|
|
1839
|
+
return null;
|
|
1840
|
+
}
|
|
1841
|
+
|
|
1842
|
+
if (!tickers || (Array.isArray(tickers) && tickers.length === 0)) {
|
|
1843
|
+
if (logger) logger.log('WARN', '[BigQuery] No tickers provided to queryPricesForTickers');
|
|
1844
|
+
return {};
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
1848
|
+
const tablePath = `${datasetId}.asset_prices`;
|
|
1849
|
+
|
|
1850
|
+
try {
|
|
1851
|
+
const tickerArray = Array.isArray(tickers) ? tickers : [tickers];
|
|
1852
|
+
|
|
1853
|
+
const sqlQuery = `
|
|
1854
|
+
SELECT
|
|
1855
|
+
instrument_id,
|
|
1856
|
+
date,
|
|
1857
|
+
price,
|
|
1858
|
+
ticker,
|
|
1859
|
+
open,
|
|
1860
|
+
high,
|
|
1861
|
+
low,
|
|
1862
|
+
close,
|
|
1863
|
+
volume
|
|
1864
|
+
FROM \`${tablePath}\`
|
|
1865
|
+
WHERE ticker IN UNNEST(@tickers)
|
|
1866
|
+
AND date BETWEEN @startDate AND @endDate
|
|
1867
|
+
ORDER BY instrument_id ASC, date DESC
|
|
1868
|
+
`;
|
|
1869
|
+
|
|
1870
|
+
const params = {
|
|
1871
|
+
tickers: tickerArray,
|
|
1872
|
+
startDate: startDateStr,
|
|
1873
|
+
endDate: endDateStr
|
|
1874
|
+
};
|
|
1875
|
+
|
|
1876
|
+
if (logger) {
|
|
1877
|
+
logger.log('INFO', `[BigQuery] 🔍 Querying prices for tickers [${tickerArray.join(', ')}] from ${startDateStr} to ${endDateStr} from ${tablePath}`);
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
const rows = await query(sqlQuery, { params }, logger);
|
|
1881
|
+
|
|
1882
|
+
if (!rows || rows.length === 0) {
|
|
1883
|
+
if (logger) logger.log('INFO', `[BigQuery] No prices found for tickers [${tickerArray.join(', ')}] in date range`);
|
|
1884
|
+
return {};
|
|
1885
|
+
}
|
|
1886
|
+
|
|
1887
|
+
// Transform to expected format: { instrumentId: { "YYYY-MM-DD": { price, ticker, ... } } }
|
|
1888
|
+
const priceMap = {};
|
|
1889
|
+
for (const row of rows) {
|
|
1890
|
+
const instrumentId = String(row.instrument_id);
|
|
1891
|
+
const dateKey = row.date;
|
|
1892
|
+
|
|
1893
|
+
if (!priceMap[instrumentId]) {
|
|
1894
|
+
priceMap[instrumentId] = {};
|
|
1895
|
+
}
|
|
1896
|
+
|
|
1897
|
+
priceMap[instrumentId][dateKey] = {
|
|
1898
|
+
price: row.price || null,
|
|
1899
|
+
ticker: row.ticker || null,
|
|
1900
|
+
open: row.open || null,
|
|
1901
|
+
high: row.high || null,
|
|
1902
|
+
low: row.low || null,
|
|
1903
|
+
close: row.close || null,
|
|
1904
|
+
volume: row.volume || null
|
|
1905
|
+
};
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
if (logger) {
|
|
1909
|
+
logger.log('INFO', `[BigQuery] ✅ Retrieved prices for ${Object.keys(priceMap).length} instruments across ${tickerArray.length} ticker(s)`);
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
return priceMap;
|
|
1913
|
+
} catch (error) {
|
|
1914
|
+
if (logger) {
|
|
1915
|
+
logger.log('WARN', `[BigQuery] Ticker prices query failed: ${error.message}`);
|
|
1916
|
+
}
|
|
1917
|
+
return null;
|
|
1918
|
+
}
|
|
1919
|
+
}
|
|
1920
|
+
|
|
1787
1921
|
/**
|
|
1788
1922
|
* Query ticker mappings from BigQuery
|
|
1789
1923
|
* Returns data in format: { instrumentId: "TICKER", ... }
|
|
@@ -2142,6 +2276,8 @@ module.exports = {
|
|
|
2142
2276
|
queryHistoryData,
|
|
2143
2277
|
querySocialData,
|
|
2144
2278
|
queryAssetPrices,
|
|
2279
|
+
queryAllPricesForDate,
|
|
2280
|
+
queryPricesForTickers,
|
|
2145
2281
|
queryPIMasterList,
|
|
2146
2282
|
queryPIRankings,
|
|
2147
2283
|
queryInstrumentInsights,
|
|
@@ -215,7 +215,10 @@ exports.fetchAndStorePrices = async (config, dependencies) => {
|
|
|
215
215
|
// Continue - price data is stored, indexer failure is non-critical
|
|
216
216
|
}
|
|
217
217
|
|
|
218
|
-
const
|
|
218
|
+
const firestoreShardCount = Object.keys(shardUpdates).length;
|
|
219
|
+
const successMessage = process.env.FIRESTORE_PRICE_FETCH !== 'false'
|
|
220
|
+
? `Successfully processed and saved daily prices for ${results.length} instruments to ${firestoreShardCount} Firestore shards.`
|
|
221
|
+
: `Successfully processed and saved daily prices for ${results.length} instruments (Firestore writes disabled).`;
|
|
219
222
|
logger.log('SUCCESS', `[PriceFetcherHelpers] ${successMessage}`);
|
|
220
223
|
return { success: true, message: successMessage, instrumentsProcessed: results.length };
|
|
221
224
|
} catch (error) { logger.log('ERROR', '[PriceFetcherHelpers] Fatal error during closing price update', { errorMessage: error.message, errorStack: error.stack, headerId: selectedHeader ? selectedHeader.id : 'not-selected' }); throw error;
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @fileoverview Main pipe: pipe.maintenance.runFetchInsights
|
|
3
|
-
* REFACTORED:
|
|
4
|
-
* UPDATED:
|
|
3
|
+
* REFACTORED: Prioritizes BigQuery storage.
|
|
4
|
+
* UPDATED: Firestore (and compression) is now a fallback only.
|
|
5
5
|
*/
|
|
6
6
|
const { FieldValue } = require('@google-cloud/firestore');
|
|
7
|
-
const zlib = require('zlib');
|
|
7
|
+
const zlib = require('zlib');
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* Main pipe: pipe.maintenance.runFetchInsights
|
|
@@ -25,7 +25,6 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
|
|
|
25
25
|
try {
|
|
26
26
|
// Extract collection name from registry path: daily_instrument_insights/{date}
|
|
27
27
|
const basePath = getCollectionPath('rootData', 'instrumentInsights', { date: '2025-01-01' });
|
|
28
|
-
// Path is like "daily_instrument_insights/2025-01-01", extract collection name
|
|
29
28
|
insightsCollectionName = basePath.split('/')[0];
|
|
30
29
|
} catch (e) {
|
|
31
30
|
logger.log('WARN', `[FetchInsightsHelpers] Failed to get collection from registry, using config: ${e.message}`);
|
|
@@ -86,54 +85,12 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
|
|
|
86
85
|
}
|
|
87
86
|
|
|
88
87
|
const today = new Date().toISOString().slice(0, 10);
|
|
89
|
-
const docRef = db.collection(insightsCollectionName).doc(today);
|
|
90
|
-
|
|
91
|
-
// [FIX] --- COMPRESSION LOGIC START ---
|
|
92
|
-
|
|
93
|
-
// 1. Prepare a standard object to verify size
|
|
94
|
-
// We use a static date string here so it can be stringified safely
|
|
95
|
-
const dataObject = {
|
|
96
|
-
fetchedAt: new Date().toISOString(),
|
|
97
|
-
instrumentCount: insightsData.length,
|
|
98
|
-
insights: insightsData
|
|
99
|
-
};
|
|
100
|
-
|
|
101
|
-
const jsonString = JSON.stringify(dataObject);
|
|
102
|
-
const byteSize = Buffer.byteLength(jsonString);
|
|
103
|
-
|
|
104
|
-
// Firestore limit is ~1MB. We use 900KB as a safety threshold.
|
|
105
|
-
const SIZE_THRESHOLD = 900 * 1024;
|
|
106
88
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
logger.log('INFO', `[FetchInsightsHelpers] Payload size ${(byteSize / 1024).toFixed(2)}KB exceeds threshold. Compressing...`);
|
|
111
|
-
|
|
112
|
-
// Gzip the JSON string
|
|
113
|
-
const compressedBuffer = zlib.gzipSync(Buffer.from(jsonString));
|
|
114
|
-
|
|
115
|
-
// Create the compressed wrapper structure
|
|
116
|
-
firestorePayload = {
|
|
117
|
-
fetchedAt: FieldValue.serverTimestamp(), // Keep outer timestamp for indexing
|
|
118
|
-
instrumentCount: insightsData.length, // Keep outer metadata for indexing
|
|
119
|
-
_compressed: true,
|
|
120
|
-
payload: compressedBuffer
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
logger.log('INFO', `[FetchInsightsHelpers] Compressed size: ${(compressedBuffer.length / 1024).toFixed(2)}KB.`);
|
|
124
|
-
} else {
|
|
125
|
-
// Standard uncompressed write (Legacy behavior)
|
|
126
|
-
firestorePayload = {
|
|
127
|
-
fetchedAt: FieldValue.serverTimestamp(),
|
|
128
|
-
instrumentCount: insightsData.length,
|
|
129
|
-
insights: insightsData
|
|
130
|
-
};
|
|
131
|
-
}
|
|
132
|
-
// [FIX] --- COMPRESSION LOGIC END ---
|
|
89
|
+
// --- STORAGE STRATEGY: BigQuery First, Firestore Fallback ---
|
|
90
|
+
let storageSuccess = false;
|
|
91
|
+
let storageMethod = 'NONE';
|
|
133
92
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
// Write insights to BigQuery (one row per instrument)
|
|
93
|
+
// 1. Attempt BigQuery Write
|
|
137
94
|
if (process.env.BIGQUERY_ENABLED !== 'false') {
|
|
138
95
|
try {
|
|
139
96
|
const { insertRows, ensureInstrumentInsightsTable } = require('../../core/utils/bigquery_utils');
|
|
@@ -152,46 +109,87 @@ exports.fetchAndStoreInsights = async (config, dependencies) => {
|
|
|
152
109
|
const datasetId = process.env.BIGQUERY_DATASET_ID || 'bulltrackers_data';
|
|
153
110
|
await insertRows(datasetId, 'instrument_insights', bigqueryRows, logger);
|
|
154
111
|
|
|
155
|
-
logger.log('INFO', `[FetchInsightsHelpers] Successfully stored ${bigqueryRows.length} insight records to BigQuery
|
|
112
|
+
logger.log('INFO', `[FetchInsightsHelpers] ✅ Successfully stored ${bigqueryRows.length} insight records to BigQuery.`);
|
|
113
|
+
storageSuccess = true;
|
|
114
|
+
storageMethod = 'BIGQUERY';
|
|
156
115
|
} catch (bqError) {
|
|
157
|
-
logger.log('WARN', `[FetchInsightsHelpers] BigQuery
|
|
158
|
-
//
|
|
116
|
+
logger.log('WARN', `[FetchInsightsHelpers] ⚠️ BigQuery write failed. Proceeding to Firestore fallback. Error: ${bqError.message}`);
|
|
117
|
+
// Proceed to Firestore logic below
|
|
159
118
|
}
|
|
160
119
|
}
|
|
161
|
-
|
|
162
|
-
//
|
|
120
|
+
|
|
121
|
+
// 2. Firestore Fallback (If BigQuery failed or is disabled)
|
|
122
|
+
if (!storageSuccess) {
|
|
123
|
+
logger.log('INFO', `[FetchInsightsHelpers] 📂 Writing to Firestore (Fallback)...`);
|
|
124
|
+
const docRef = db.collection(insightsCollectionName).doc(today);
|
|
125
|
+
|
|
126
|
+
// [COMPRESSION LOGIC] - Only runs for Firestore
|
|
127
|
+
const dataObject = {
|
|
128
|
+
fetchedAt: new Date().toISOString(),
|
|
129
|
+
instrumentCount: insightsData.length,
|
|
130
|
+
insights: insightsData
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const jsonString = JSON.stringify(dataObject);
|
|
134
|
+
const byteSize = Buffer.byteLength(jsonString);
|
|
135
|
+
const SIZE_THRESHOLD = 900 * 1024; // 900KB
|
|
136
|
+
|
|
137
|
+
let firestorePayload;
|
|
138
|
+
|
|
139
|
+
if (byteSize > SIZE_THRESHOLD) {
|
|
140
|
+
logger.log('INFO', `[FetchInsightsHelpers] Payload size ${(byteSize / 1024).toFixed(2)}KB exceeds threshold. Compressing for Firestore...`);
|
|
141
|
+
const compressedBuffer = zlib.gzipSync(Buffer.from(jsonString));
|
|
142
|
+
firestorePayload = {
|
|
143
|
+
fetchedAt: FieldValue.serverTimestamp(),
|
|
144
|
+
instrumentCount: insightsData.length,
|
|
145
|
+
_compressed: true,
|
|
146
|
+
payload: compressedBuffer
|
|
147
|
+
};
|
|
148
|
+
} else {
|
|
149
|
+
firestorePayload = {
|
|
150
|
+
fetchedAt: FieldValue.serverTimestamp(),
|
|
151
|
+
instrumentCount: insightsData.length,
|
|
152
|
+
insights: insightsData
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
await docRef.set(firestorePayload);
|
|
157
|
+
logger.log('INFO', `[FetchInsightsHelpers] ✅ Successfully stored insights to Firestore (Fallback).`);
|
|
158
|
+
storageSuccess = true;
|
|
159
|
+
storageMethod = 'FIRESTORE';
|
|
160
|
+
} else {
|
|
161
|
+
logger.log('INFO', `[FetchInsightsHelpers] ⏭️ Skipping Firestore write (BigQuery write successful).`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Update root data indexer
|
|
163
165
|
try {
|
|
164
166
|
const { runRootDataIndexer } = require('../../root-data-indexer/index');
|
|
165
167
|
|
|
166
168
|
if (!config.rootDataIndexer) {
|
|
167
169
|
logger.log('WARN', `[FetchInsightsHelpers] Root data indexer config not provided. Skipping index update.`);
|
|
168
170
|
} else {
|
|
169
|
-
// Merge insights collection name into the full config
|
|
170
171
|
const indexerConfig = {
|
|
171
172
|
...config.rootDataIndexer,
|
|
172
173
|
collections: {
|
|
173
174
|
...config.rootDataIndexer.collections,
|
|
174
|
-
insights: insightsCollectionName
|
|
175
|
+
insights: insightsCollectionName
|
|
175
176
|
},
|
|
176
|
-
targetDate: today
|
|
177
|
+
targetDate: today
|
|
177
178
|
};
|
|
178
179
|
|
|
179
|
-
logger.log('INFO', `[FetchInsightsHelpers] Triggering root data indexer for date ${today}
|
|
180
|
+
logger.log('INFO', `[FetchInsightsHelpers] Triggering root data indexer for date ${today}...`);
|
|
180
181
|
const result = await runRootDataIndexer(indexerConfig, dependencies);
|
|
181
182
|
|
|
182
183
|
if (result.success && result.count > 0) {
|
|
183
|
-
logger.log('INFO', `[FetchInsightsHelpers] Root data indexer completed successfully
|
|
184
|
-
} else {
|
|
185
|
-
logger.log('WARN', `[FetchInsightsHelpers] Root data indexer completed but no dates were updated for ${today}`);
|
|
184
|
+
logger.log('INFO', `[FetchInsightsHelpers] Root data indexer completed successfully.`);
|
|
186
185
|
}
|
|
187
186
|
}
|
|
188
187
|
} catch (indexerError) {
|
|
189
188
|
logger.log('ERROR', `[FetchInsightsHelpers] Failed to run root data indexer for ${today}`, indexerError);
|
|
190
|
-
// Continue - insights data is stored, indexer failure is non-critical
|
|
191
189
|
}
|
|
192
190
|
|
|
193
|
-
const successMsg = `Successfully fetched and stored ${insightsData.length} instrument insights for ${today}.`;
|
|
194
|
-
logger.log('SUCCESS', `[FetchInsightsHelpers] ${successMsg}`, { documentId: today, instrumentCount: insightsData.length });
|
|
191
|
+
const successMsg = `Successfully fetched and stored ${insightsData.length} instrument insights for ${today} via ${storageMethod}.`;
|
|
192
|
+
logger.log('SUCCESS', `[FetchInsightsHelpers] ${successMsg}`, { documentId: today, instrumentCount: insightsData.length, storageMethod });
|
|
195
193
|
return { success: true, message: successMsg, instrumentCount: insightsData.length };
|
|
196
194
|
|
|
197
195
|
} catch (error) {
|